{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 24.1211, "active_queue_size": 16384.0, "cl_loss": 137.8735, "doc_norm": 8.4213, "encoder_q-embeddings": 35708.3516, "encoder_q-layer.0": 33159.1094, "encoder_q-layer.1": 32716.375, "encoder_q-layer.10": 118617.3047, "encoder_q-layer.11": 76948.2812, "encoder_q-layer.2": 38265.832, "encoder_q-layer.3": 45591.2891, "encoder_q-layer.4": 54894.6055, "encoder_q-layer.5": 65659.4844, "encoder_q-layer.6": 90634.5859, "encoder_q-layer.7": 104004.0781, "encoder_q-layer.8": 131431.1719, "encoder_q-layer.9": 102914.9297, "epoch": 0.0, "inbatch_neg_score": 41.2921, "inbatch_pos_score": 51.0625, "learning_rate": 5.000000000000001e-07, "loss": 137.8735, "norm_diff": 0.5414, "norm_loss": 0.0, "num_token_doc": 66.7429, "num_token_overlap": 18.0019, "num_token_query": 52.1878, "num_token_union": 73.3313, "num_word_context": 202.3315, "num_word_doc": 49.801, "num_word_query": 39.7703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 107582.0615, "preclip_grad_norm_avg": 0.001, "q@queue_neg_score": 41.375, "query_norm": 7.8799, "queue_k_norm": 8.422, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1878, "sent_len_1": 66.7429, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5112, "stdk": 0.1806, "stdq": 0.1712, "stdqueue_k": 0.1808, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 21.3867, "active_queue_size": 16384.0, "cl_loss": 86.2618, "doc_norm": 8.3343, "encoder_q-embeddings": 10866.2529, "encoder_q-layer.0": 9397.5469, "encoder_q-layer.1": 11060.1475, "encoder_q-layer.10": 28587.9355, "encoder_q-layer.11": 32304.6484, "encoder_q-layer.2": 12267.665, "encoder_q-layer.3": 13205.3037, "encoder_q-layer.4": 14779.3535, "encoder_q-layer.5": 17109.5586, "encoder_q-layer.6": 21105.668, "encoder_q-layer.7": 22735.25, "encoder_q-layer.8": 27350.0332, "encoder_q-layer.9": 21164.8164, "epoch": 0.0, "inbatch_neg_score": 36.2832, "inbatch_pos_score": 41.75, "learning_rate": 1.0000000000000002e-06, "loss": 86.2618, "norm_diff": 1.3497, "norm_loss": 0.0, "num_token_doc": 66.7447, "num_token_overlap": 17.9873, "num_token_query": 52.1166, "num_token_union": 73.3367, "num_word_context": 202.0541, "num_word_doc": 49.7598, "num_word_query": 39.7066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27952.0104, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 36.3125, "query_norm": 6.9846, "queue_k_norm": 8.3362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1166, "sent_len_1": 66.7447, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6413, "stdk": 0.1778, "stdq": 0.1221, "stdqueue_k": 0.1779, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 23.0469, "active_queue_size": 16384.0, "cl_loss": 55.5798, "doc_norm": 8.1915, "encoder_q-embeddings": 7585.394, "encoder_q-layer.0": 6479.7412, "encoder_q-layer.1": 8262.1318, "encoder_q-layer.10": 14694.3301, "encoder_q-layer.11": 25136.082, "encoder_q-layer.2": 9855.7256, "encoder_q-layer.3": 9258.8213, "encoder_q-layer.4": 9068.6738, "encoder_q-layer.5": 9453.3574, "encoder_q-layer.6": 10508.248, "encoder_q-layer.7": 11185.2295, "encoder_q-layer.8": 12467.3564, "encoder_q-layer.9": 10656.7959, "epoch": 0.0, "inbatch_neg_score": 32.8094, "inbatch_pos_score": 36.75, "learning_rate": 1.5e-06, "loss": 55.5798, "norm_diff": 1.5366, "norm_loss": 0.0, "num_token_doc": 66.8561, "num_token_overlap": 18.0235, "num_token_query": 52.1846, "num_token_union": 73.4438, "num_word_context": 202.2068, "num_word_doc": 49.8918, "num_word_query": 39.7832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18290.0202, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 32.875, "query_norm": 6.6549, "queue_k_norm": 8.1885, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1846, "sent_len_1": 66.8561, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4363, "stdk": 0.1737, "stdq": 0.1065, "stdqueue_k": 0.1727, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 27.4414, "active_queue_size": 16384.0, "cl_loss": 39.1227, "doc_norm": 8.0146, "encoder_q-embeddings": 6344.7954, "encoder_q-layer.0": 5638.3818, "encoder_q-layer.1": 6215.3813, "encoder_q-layer.10": 10852.791, "encoder_q-layer.11": 17578.2891, "encoder_q-layer.2": 7110.1143, "encoder_q-layer.3": 7509.4697, "encoder_q-layer.4": 7795.248, "encoder_q-layer.5": 8403.1025, "encoder_q-layer.6": 8412.9131, "encoder_q-layer.7": 8266.7695, "encoder_q-layer.8": 9314.9717, "encoder_q-layer.9": 7746.9248, "epoch": 0.0, "inbatch_neg_score": 28.6262, "inbatch_pos_score": 31.5625, "learning_rate": 2.0000000000000003e-06, "loss": 39.1227, "norm_diff": 1.5999, "norm_loss": 0.0, "num_token_doc": 66.7642, "num_token_overlap": 17.9514, "num_token_query": 52.0112, "num_token_union": 73.3446, "num_word_context": 202.1151, "num_word_doc": 49.8164, "num_word_query": 39.6522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13240.5906, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 28.6406, "query_norm": 6.4147, "queue_k_norm": 8.0204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0112, "sent_len_1": 66.7642, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0213, "stdk": 0.167, "stdq": 0.0983, "stdqueue_k": 0.1675, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 25.6836, "active_queue_size": 16384.0, "cl_loss": 29.1619, "doc_norm": 7.8181, "encoder_q-embeddings": 4877.5122, "encoder_q-layer.0": 4118.1206, "encoder_q-layer.1": 4768.417, "encoder_q-layer.10": 9700.877, "encoder_q-layer.11": 13632.0312, "encoder_q-layer.2": 5194.0146, "encoder_q-layer.3": 5681.4727, "encoder_q-layer.4": 6349.4038, "encoder_q-layer.5": 6427.7715, "encoder_q-layer.6": 6576.2974, "encoder_q-layer.7": 6761.2119, "encoder_q-layer.8": 7701.5532, "encoder_q-layer.9": 6643.8105, "epoch": 0.0, "inbatch_neg_score": 24.9435, "inbatch_pos_score": 27.2188, "learning_rate": 2.5e-06, "loss": 29.1619, "norm_diff": 1.6041, "norm_loss": 0.0, "num_token_doc": 66.7386, "num_token_overlap": 17.9915, "num_token_query": 52.1254, "num_token_union": 73.3854, "num_word_context": 201.8788, "num_word_doc": 49.8316, "num_word_query": 39.7221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10285.4214, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 24.9531, "query_norm": 6.214, "queue_k_norm": 7.8359, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1254, "sent_len_1": 66.7386, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.5163, "stdk": 0.1611, "stdq": 0.0945, "stdqueue_k": 0.1618, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 26.7578, "active_queue_size": 16384.0, "cl_loss": 23.7514, "doc_norm": 7.6472, "encoder_q-embeddings": 5177.4556, "encoder_q-layer.0": 4110.9111, "encoder_q-layer.1": 4939.5728, "encoder_q-layer.10": 9406.1064, "encoder_q-layer.11": 13519.7266, "encoder_q-layer.2": 5638.4634, "encoder_q-layer.3": 6276.4438, "encoder_q-layer.4": 6724.7871, "encoder_q-layer.5": 6900.8579, "encoder_q-layer.6": 6944.5205, "encoder_q-layer.7": 6616.915, "encoder_q-layer.8": 7643.1494, "encoder_q-layer.9": 6360.7866, "epoch": 0.01, "inbatch_neg_score": 21.5233, "inbatch_pos_score": 23.3438, "learning_rate": 3e-06, "loss": 23.7514, "norm_diff": 1.9313, "norm_loss": 0.0, "num_token_doc": 66.8968, "num_token_overlap": 18.1082, "num_token_query": 52.4744, "num_token_union": 73.5785, "num_word_context": 202.5753, "num_word_doc": 49.938, "num_word_query": 40.0183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10448.5622, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 21.5, "query_norm": 5.7158, "queue_k_norm": 7.6476, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4744, "sent_len_1": 66.8968, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.9025, "stdk": 0.1564, "stdq": 0.091, "stdqueue_k": 0.1557, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 26.4648, "active_queue_size": 16384.0, "cl_loss": 20.8031, "doc_norm": 7.4432, "encoder_q-embeddings": 6418.7456, "encoder_q-layer.0": 5435.7559, "encoder_q-layer.1": 6636.0151, "encoder_q-layer.10": 11222.1035, "encoder_q-layer.11": 14598.6533, "encoder_q-layer.2": 7284.0674, "encoder_q-layer.3": 8212.6924, "encoder_q-layer.4": 8886.3242, "encoder_q-layer.5": 10017.6357, "encoder_q-layer.6": 10211.0234, "encoder_q-layer.7": 9277.8506, "encoder_q-layer.8": 9715.0898, "encoder_q-layer.9": 7051.3271, "epoch": 0.01, "inbatch_neg_score": 18.1354, "inbatch_pos_score": 19.75, "learning_rate": 3.5000000000000004e-06, "loss": 20.8031, "norm_diff": 2.2341, "norm_loss": 0.0, "num_token_doc": 66.7398, "num_token_overlap": 17.9829, "num_token_query": 52.1753, "num_token_union": 73.3906, "num_word_context": 202.1521, "num_word_doc": 49.7928, "num_word_query": 39.7693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13030.4445, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 18.1562, "query_norm": 5.2091, "queue_k_norm": 7.4537, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1753, "sent_len_1": 66.7398, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.2038, "stdk": 0.1509, "stdq": 0.0883, "stdqueue_k": 0.1501, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 26.3672, "active_queue_size": 16384.0, "cl_loss": 18.0466, "doc_norm": 7.254, "encoder_q-embeddings": 8043.0596, "encoder_q-layer.0": 6531.6299, "encoder_q-layer.1": 8163.4741, "encoder_q-layer.10": 14251.0459, "encoder_q-layer.11": 14778.918, "encoder_q-layer.2": 9003.9238, "encoder_q-layer.3": 9712.0479, "encoder_q-layer.4": 10613.1943, "encoder_q-layer.5": 11362.1104, "encoder_q-layer.6": 10665.2236, "encoder_q-layer.7": 9646.377, "encoder_q-layer.8": 10501.9258, "encoder_q-layer.9": 7707.5469, "epoch": 0.01, "inbatch_neg_score": 14.088, "inbatch_pos_score": 15.4688, "learning_rate": 4.000000000000001e-06, "loss": 18.0466, "norm_diff": 2.7187, "norm_loss": 0.0, "num_token_doc": 66.782, "num_token_overlap": 18.0153, "num_token_query": 52.24, "num_token_union": 73.4492, "num_word_context": 202.2671, "num_word_doc": 49.8479, "num_word_query": 39.8055, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14772.9329, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 14.0703, "query_norm": 4.5352, "queue_k_norm": 7.2642, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.24, "sent_len_1": 66.782, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5312, "stdk": 0.1439, "stdq": 0.0823, "stdqueue_k": 0.1444, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 28.8086, "active_queue_size": 16384.0, "cl_loss": 15.674, "doc_norm": 7.0699, "encoder_q-embeddings": 8240.9561, "encoder_q-layer.0": 6909.8999, "encoder_q-layer.1": 8682.7686, "encoder_q-layer.10": 9989.0645, "encoder_q-layer.11": 12943.5771, "encoder_q-layer.2": 9674.5537, "encoder_q-layer.3": 10198.7344, "encoder_q-layer.4": 10773.3174, "encoder_q-layer.5": 11014.8926, "encoder_q-layer.6": 9408.5352, "encoder_q-layer.7": 8275.29, "encoder_q-layer.8": 7741.8823, "encoder_q-layer.9": 5143.5552, "epoch": 0.01, "inbatch_neg_score": 10.6808, "inbatch_pos_score": 11.8828, "learning_rate": 4.5e-06, "loss": 15.674, "norm_diff": 3.2433, "norm_loss": 0.0, "num_token_doc": 66.8368, "num_token_overlap": 18.0438, "num_token_query": 52.199, "num_token_union": 73.4248, "num_word_context": 202.5508, "num_word_doc": 49.8993, "num_word_query": 39.7969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13561.1387, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 10.6484, "query_norm": 3.8266, "queue_k_norm": 7.0867, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.199, "sent_len_1": 66.8368, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.5938, "stdk": 0.1378, "stdq": 0.0771, "stdqueue_k": 0.1389, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 27.832, "active_queue_size": 16384.0, "cl_loss": 13.3108, "doc_norm": 6.9129, "encoder_q-embeddings": 11940.666, "encoder_q-layer.0": 10014.123, "encoder_q-layer.1": 12603.0312, "encoder_q-layer.10": 11636.6523, "encoder_q-layer.11": 13596.7822, "encoder_q-layer.2": 14689.0088, "encoder_q-layer.3": 16557.4844, "encoder_q-layer.4": 19617.7715, "encoder_q-layer.5": 21982.7129, "encoder_q-layer.6": 19203.4629, "encoder_q-layer.7": 15849.208, "encoder_q-layer.8": 16388.6719, "encoder_q-layer.9": 5830.978, "epoch": 0.01, "inbatch_neg_score": 6.9308, "inbatch_pos_score": 7.9414, "learning_rate": 5e-06, "loss": 13.3108, "norm_diff": 3.8888, "norm_loss": 0.0, "num_token_doc": 66.6765, "num_token_overlap": 18.0025, "num_token_query": 52.3335, "num_token_union": 73.4449, "num_word_context": 202.0959, "num_word_doc": 49.7356, "num_word_query": 39.888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21604.5024, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 6.9297, "query_norm": 3.0241, "queue_k_norm": 6.9202, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3335, "sent_len_1": 66.6765, "sent_len_max_0": 127.995, "sent_len_max_1": 191.0662, "stdk": 0.1337, "stdq": 0.0697, "stdqueue_k": 0.1337, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 28.125, "active_queue_size": 16384.0, "cl_loss": 11.5072, "doc_norm": 6.7496, "encoder_q-embeddings": 7050.3301, "encoder_q-layer.0": 5758.0854, "encoder_q-layer.1": 7383.5674, "encoder_q-layer.10": 7478.1279, "encoder_q-layer.11": 11205.1211, "encoder_q-layer.2": 8732.8975, "encoder_q-layer.3": 9888.834, "encoder_q-layer.4": 11655.5566, "encoder_q-layer.5": 13102.6338, "encoder_q-layer.6": 11336.6113, "encoder_q-layer.7": 7771.6943, "encoder_q-layer.8": 8659.3115, "encoder_q-layer.9": 3786.6577, "epoch": 0.01, "inbatch_neg_score": 5.1179, "inbatch_pos_score": 6.0234, "learning_rate": 5.500000000000001e-06, "loss": 11.5072, "norm_diff": 4.2546, "norm_loss": 0.0, "num_token_doc": 66.8665, "num_token_overlap": 18.0567, "num_token_query": 52.2201, "num_token_union": 73.464, "num_word_context": 202.2569, "num_word_doc": 49.8914, "num_word_query": 39.7831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13104.3124, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 5.125, "query_norm": 2.4951, "queue_k_norm": 6.7639, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2201, "sent_len_1": 66.8665, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.3862, "stdk": 0.1287, "stdq": 0.0627, "stdqueue_k": 0.1286, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 28.0273, "active_queue_size": 16384.0, "cl_loss": 10.1925, "doc_norm": 6.5999, "encoder_q-embeddings": 7936.585, "encoder_q-layer.0": 6600.0112, "encoder_q-layer.1": 8242.9756, "encoder_q-layer.10": 7964.5996, "encoder_q-layer.11": 10852.9756, "encoder_q-layer.2": 9562.9863, "encoder_q-layer.3": 10430.46, "encoder_q-layer.4": 11313.0391, "encoder_q-layer.5": 11984.7812, "encoder_q-layer.6": 9860.9971, "encoder_q-layer.7": 7178.1792, "encoder_q-layer.8": 6860.0576, "encoder_q-layer.9": 3642.8818, "epoch": 0.01, "inbatch_neg_score": 3.3401, "inbatch_pos_score": 4.1055, "learning_rate": 6e-06, "loss": 10.1925, "norm_diff": 4.4824, "norm_loss": 0.0, "num_token_doc": 66.7135, "num_token_overlap": 18.0428, "num_token_query": 52.2911, "num_token_union": 73.3811, "num_word_context": 201.7482, "num_word_doc": 49.7605, "num_word_query": 39.8719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12892.1463, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.3379, "query_norm": 2.1175, "queue_k_norm": 6.6197, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2911, "sent_len_1": 66.7135, "sent_len_max_0": 127.9963, "sent_len_max_1": 192.5437, "stdk": 0.1227, "stdq": 0.0571, "stdqueue_k": 0.1236, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 28.6133, "active_queue_size": 16384.0, "cl_loss": 9.3654, "doc_norm": 6.4656, "encoder_q-embeddings": 8198.9971, "encoder_q-layer.0": 6698.5518, "encoder_q-layer.1": 8268.9023, "encoder_q-layer.10": 6357.5674, "encoder_q-layer.11": 11435.7764, "encoder_q-layer.2": 9989.5459, "encoder_q-layer.3": 10847.8145, "encoder_q-layer.4": 11819.4062, "encoder_q-layer.5": 12847.291, "encoder_q-layer.6": 11325.1836, "encoder_q-layer.7": 10388.9717, "encoder_q-layer.8": 11957.7627, "encoder_q-layer.9": 4093.7754, "epoch": 0.01, "inbatch_neg_score": 1.6505, "inbatch_pos_score": 2.3496, "learning_rate": 6.5000000000000004e-06, "loss": 9.3654, "norm_diff": 4.5308, "norm_loss": 0.0, "num_token_doc": 67.0712, "num_token_overlap": 18.1052, "num_token_query": 52.3962, "num_token_union": 73.6682, "num_word_context": 202.6411, "num_word_doc": 50.0664, "num_word_query": 39.942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14322.4406, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.6494, "query_norm": 1.9348, "queue_k_norm": 6.4822, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3962, "sent_len_1": 67.0712, "sent_len_max_0": 128.0, "sent_len_max_1": 188.835, "stdk": 0.118, "stdq": 0.0551, "stdqueue_k": 0.1187, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 27.0508, "active_queue_size": 16384.0, "cl_loss": 8.9236, "doc_norm": 6.3487, "encoder_q-embeddings": 7851.5229, "encoder_q-layer.0": 6906.335, "encoder_q-layer.1": 8087.0786, "encoder_q-layer.10": 9966.3555, "encoder_q-layer.11": 13212.0449, "encoder_q-layer.2": 8839.125, "encoder_q-layer.3": 10248.5303, "encoder_q-layer.4": 11312.2588, "encoder_q-layer.5": 12143.9551, "encoder_q-layer.6": 9727.2295, "encoder_q-layer.7": 8415.0332, "encoder_q-layer.8": 8667.7588, "encoder_q-layer.9": 4753.5166, "epoch": 0.01, "inbatch_neg_score": 2.8907, "inbatch_pos_score": 3.543, "learning_rate": 7.000000000000001e-06, "loss": 8.9236, "norm_diff": 4.4308, "norm_loss": 0.0, "num_token_doc": 66.68, "num_token_overlap": 17.9622, "num_token_query": 52.1123, "num_token_union": 73.3322, "num_word_context": 202.15, "num_word_doc": 49.7734, "num_word_query": 39.7242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13561.132, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.8828, "query_norm": 1.9179, "queue_k_norm": 6.3586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1123, "sent_len_1": 66.68, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2312, "stdk": 0.1133, "stdq": 0.0524, "stdqueue_k": 0.1146, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 28.9062, "active_queue_size": 16384.0, "cl_loss": 8.3889, "doc_norm": 6.2511, "encoder_q-embeddings": 6922.5879, "encoder_q-layer.0": 5389.7427, "encoder_q-layer.1": 6805.9194, "encoder_q-layer.10": 7009.9341, "encoder_q-layer.11": 11171.6172, "encoder_q-layer.2": 7792.8872, "encoder_q-layer.3": 8599.8506, "encoder_q-layer.4": 9783.0215, "encoder_q-layer.5": 10273.5127, "encoder_q-layer.6": 10368.8271, "encoder_q-layer.7": 8552.8564, "encoder_q-layer.8": 7803.2451, "encoder_q-layer.9": 3783.5608, "epoch": 0.01, "inbatch_neg_score": 1.3199, "inbatch_pos_score": 1.9551, "learning_rate": 7.5e-06, "loss": 8.3889, "norm_diff": 4.4311, "norm_loss": 0.0, "num_token_doc": 66.699, "num_token_overlap": 17.9919, "num_token_query": 52.2337, "num_token_union": 73.4478, "num_word_context": 202.2152, "num_word_doc": 49.7787, "num_word_query": 39.8314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11893.9858, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3115, "query_norm": 1.82, "queue_k_norm": 6.2487, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2337, "sent_len_1": 66.699, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8725, "stdk": 0.1093, "stdq": 0.0512, "stdqueue_k": 0.1103, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 29.2969, "active_queue_size": 16384.0, "cl_loss": 8.0484, "doc_norm": 6.1406, "encoder_q-embeddings": 7444.48, "encoder_q-layer.0": 6365.835, "encoder_q-layer.1": 7825.0527, "encoder_q-layer.10": 5492.8857, "encoder_q-layer.11": 9453.957, "encoder_q-layer.2": 9493.2949, "encoder_q-layer.3": 10471.4844, "encoder_q-layer.4": 11141.1855, "encoder_q-layer.5": 11300.0186, "encoder_q-layer.6": 8715.8926, "encoder_q-layer.7": 6593.9038, "encoder_q-layer.8": 5451.124, "encoder_q-layer.9": 3217.6533, "epoch": 0.02, "inbatch_neg_score": 1.5797, "inbatch_pos_score": 2.2148, "learning_rate": 8.000000000000001e-06, "loss": 8.0484, "norm_diff": 4.3435, "norm_loss": 0.0, "num_token_doc": 66.4482, "num_token_overlap": 17.9352, "num_token_query": 52.1301, "num_token_union": 73.3391, "num_word_context": 201.8937, "num_word_doc": 49.6266, "num_word_query": 39.7435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12070.8576, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.5752, "query_norm": 1.7971, "queue_k_norm": 6.1398, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1301, "sent_len_1": 66.4482, "sent_len_max_0": 128.0, "sent_len_max_1": 190.265, "stdk": 0.1061, "stdq": 0.0491, "stdqueue_k": 0.1064, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 30.3711, "active_queue_size": 16384.0, "cl_loss": 7.7912, "doc_norm": 6.0121, "encoder_q-embeddings": 7333.8887, "encoder_q-layer.0": 6231.2959, "encoder_q-layer.1": 7170.8374, "encoder_q-layer.10": 4628.7695, "encoder_q-layer.11": 8069.9961, "encoder_q-layer.2": 8245.8613, "encoder_q-layer.3": 9025.0771, "encoder_q-layer.4": 10493.6113, "encoder_q-layer.5": 10520.835, "encoder_q-layer.6": 8302.2402, "encoder_q-layer.7": 6358.3623, "encoder_q-layer.8": 6168.835, "encoder_q-layer.9": 2805.2639, "epoch": 0.02, "inbatch_neg_score": 1.0905, "inbatch_pos_score": 1.7256, "learning_rate": 8.500000000000002e-06, "loss": 7.7912, "norm_diff": 4.2686, "norm_loss": 0.0, "num_token_doc": 66.6014, "num_token_overlap": 17.9787, "num_token_query": 51.9967, "num_token_union": 73.1974, "num_word_context": 201.7533, "num_word_doc": 49.6848, "num_word_query": 39.6346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11045.1476, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.0869, "query_norm": 1.7435, "queue_k_norm": 6.0348, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 51.9967, "sent_len_1": 66.6014, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.915, "stdk": 0.1016, "stdq": 0.0487, "stdqueue_k": 0.1027, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 29.4922, "active_queue_size": 16384.0, "cl_loss": 7.5932, "doc_norm": 5.91, "encoder_q-embeddings": 14970.7881, "encoder_q-layer.0": 12451.54, "encoder_q-layer.1": 15685.501, "encoder_q-layer.10": 6482.7271, "encoder_q-layer.11": 9925.4238, "encoder_q-layer.2": 18388.8789, "encoder_q-layer.3": 18443.1582, "encoder_q-layer.4": 18446.2305, "encoder_q-layer.5": 18623.6309, "encoder_q-layer.6": 14331.6094, "encoder_q-layer.7": 10445.6279, "encoder_q-layer.8": 8574.5918, "encoder_q-layer.9": 3861.5015, "epoch": 0.02, "inbatch_neg_score": 0.9581, "inbatch_pos_score": 1.5752, "learning_rate": 9e-06, "loss": 7.5932, "norm_diff": 4.1685, "norm_loss": 0.0, "num_token_doc": 66.8404, "num_token_overlap": 18.0302, "num_token_query": 52.3673, "num_token_union": 73.6009, "num_word_context": 202.5795, "num_word_doc": 49.9233, "num_word_query": 39.9181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20550.8637, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.9561, "query_norm": 1.7415, "queue_k_norm": 5.9288, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3673, "sent_len_1": 66.8404, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9187, "stdk": 0.0982, "stdq": 0.048, "stdqueue_k": 0.0988, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 29.6875, "active_queue_size": 16384.0, "cl_loss": 7.3968, "doc_norm": 5.7812, "encoder_q-embeddings": 7887.0991, "encoder_q-layer.0": 6786.2129, "encoder_q-layer.1": 7641.7407, "encoder_q-layer.10": 16051.8613, "encoder_q-layer.11": 18559.0605, "encoder_q-layer.2": 8904.585, "encoder_q-layer.3": 9704.7949, "encoder_q-layer.4": 11749.7324, "encoder_q-layer.5": 12726.708, "encoder_q-layer.6": 12819.1865, "encoder_q-layer.7": 12374.6289, "encoder_q-layer.8": 14345.4629, "encoder_q-layer.9": 10906.8174, "epoch": 0.02, "inbatch_neg_score": 1.0612, "inbatch_pos_score": 1.6797, "learning_rate": 9.5e-06, "loss": 7.3968, "norm_diff": 4.013, "norm_loss": 0.0, "num_token_doc": 66.7562, "num_token_overlap": 17.9589, "num_token_query": 52.1892, "num_token_union": 73.4737, "num_word_context": 202.3052, "num_word_doc": 49.7914, "num_word_query": 39.776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16377.07, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.0605, "query_norm": 1.7682, "queue_k_norm": 5.804, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1892, "sent_len_1": 66.7562, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6188, "stdk": 0.095, "stdq": 0.0493, "stdqueue_k": 0.095, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 29.6875, "active_queue_size": 16384.0, "cl_loss": 7.3159, "doc_norm": 5.6536, "encoder_q-embeddings": 3397.4329, "encoder_q-layer.0": 2943.1646, "encoder_q-layer.1": 3486.0154, "encoder_q-layer.10": 2641.6072, "encoder_q-layer.11": 4039.1326, "encoder_q-layer.2": 3987.3479, "encoder_q-layer.3": 4229.3384, "encoder_q-layer.4": 4841.6851, "encoder_q-layer.5": 5323.1987, "encoder_q-layer.6": 4832.2842, "encoder_q-layer.7": 4996.3774, "encoder_q-layer.8": 4770.3774, "encoder_q-layer.9": 2638.4045, "epoch": 0.02, "inbatch_neg_score": 0.4371, "inbatch_pos_score": 1.0195, "learning_rate": 1e-05, "loss": 7.3159, "norm_diff": 3.8873, "norm_loss": 0.0, "num_token_doc": 66.9893, "num_token_overlap": 17.9567, "num_token_query": 52.0604, "num_token_union": 73.5325, "num_word_context": 202.119, "num_word_doc": 49.9683, "num_word_query": 39.6714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5859.3692, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4312, "query_norm": 1.7663, "queue_k_norm": 5.6714, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0604, "sent_len_1": 66.9893, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3137, "stdk": 0.0907, "stdq": 0.0487, "stdqueue_k": 0.0917, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 27.3438, "active_queue_size": 16384.0, "cl_loss": 7.1648, "doc_norm": 5.4984, "encoder_q-embeddings": 2722.4319, "encoder_q-layer.0": 2192.2051, "encoder_q-layer.1": 2763.3945, "encoder_q-layer.10": 14336.3633, "encoder_q-layer.11": 14746.4951, "encoder_q-layer.2": 3341.2427, "encoder_q-layer.3": 3881.4497, "encoder_q-layer.4": 5036.9912, "encoder_q-layer.5": 6025.1143, "encoder_q-layer.6": 6754.9072, "encoder_q-layer.7": 8461.416, "encoder_q-layer.8": 8872.7012, "encoder_q-layer.9": 9052.8584, "epoch": 0.02, "inbatch_neg_score": 0.8158, "inbatch_pos_score": 1.3916, "learning_rate": 1.05e-05, "loss": 7.1648, "norm_diff": 3.693, "norm_loss": 0.0, "num_token_doc": 66.959, "num_token_overlap": 18.045, "num_token_query": 52.2427, "num_token_union": 73.5437, "num_word_context": 202.367, "num_word_doc": 50.0145, "num_word_query": 39.8457, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10388.8694, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8037, "query_norm": 1.8054, "queue_k_norm": 5.529, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2427, "sent_len_1": 66.959, "sent_len_max_0": 128.0, "sent_len_max_1": 188.925, "stdk": 0.0878, "stdq": 0.0499, "stdqueue_k": 0.0887, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 34.1797, "active_queue_size": 16384.0, "cl_loss": 6.9186, "doc_norm": 5.3583, "encoder_q-embeddings": 1665.2715, "encoder_q-layer.0": 1375.2128, "encoder_q-layer.1": 1755.8385, "encoder_q-layer.10": 3822.2891, "encoder_q-layer.11": 4585.4585, "encoder_q-layer.2": 2193.5852, "encoder_q-layer.3": 2475.2385, "encoder_q-layer.4": 2997.105, "encoder_q-layer.5": 3517.2422, "encoder_q-layer.6": 3281.9409, "encoder_q-layer.7": 2902.5156, "encoder_q-layer.8": 3017.4634, "encoder_q-layer.9": 2140.4934, "epoch": 0.02, "inbatch_neg_score": 1.0118, "inbatch_pos_score": 1.5967, "learning_rate": 1.1000000000000001e-05, "loss": 6.9186, "norm_diff": 3.565, "norm_loss": 0.0, "num_token_doc": 66.7158, "num_token_overlap": 18.0131, "num_token_query": 52.2778, "num_token_union": 73.4698, "num_word_context": 201.8809, "num_word_doc": 49.8103, "num_word_query": 39.8793, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3958.6313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0039, "query_norm": 1.7933, "queue_k_norm": 5.3586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2778, "sent_len_1": 66.7158, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2713, "stdk": 0.0855, "stdq": 0.0498, "stdqueue_k": 0.0857, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 31.543, "active_queue_size": 16384.0, "cl_loss": 6.7631, "doc_norm": 5.1505, "encoder_q-embeddings": 1827.7753, "encoder_q-layer.0": 1578.0436, "encoder_q-layer.1": 1948.2139, "encoder_q-layer.10": 8896.2646, "encoder_q-layer.11": 8710.8955, "encoder_q-layer.2": 2245.0989, "encoder_q-layer.3": 2497.0286, "encoder_q-layer.4": 2961.7925, "encoder_q-layer.5": 3780.1807, "encoder_q-layer.6": 3933.2129, "encoder_q-layer.7": 4302.4868, "encoder_q-layer.8": 5064.5269, "encoder_q-layer.9": 5601.7998, "epoch": 0.02, "inbatch_neg_score": 0.476, "inbatch_pos_score": 1.0811, "learning_rate": 1.1500000000000002e-05, "loss": 6.7631, "norm_diff": 3.3569, "norm_loss": 0.0, "num_token_doc": 66.7641, "num_token_overlap": 18.0254, "num_token_query": 52.2044, "num_token_union": 73.457, "num_word_context": 202.026, "num_word_doc": 49.8223, "num_word_query": 39.8265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6171.9319, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4712, "query_norm": 1.7936, "queue_k_norm": 5.1725, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2044, "sent_len_1": 66.7641, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1, "stdk": 0.0821, "stdq": 0.0515, "stdqueue_k": 0.0828, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 30.957, "active_queue_size": 16384.0, "cl_loss": 6.4573, "doc_norm": 4.9446, "encoder_q-embeddings": 1409.1438, "encoder_q-layer.0": 1162.3989, "encoder_q-layer.1": 1340.369, "encoder_q-layer.10": 3139.6958, "encoder_q-layer.11": 3633.1179, "encoder_q-layer.2": 1494.0057, "encoder_q-layer.3": 1655.448, "encoder_q-layer.4": 1671.1798, "encoder_q-layer.5": 1741.4576, "encoder_q-layer.6": 1666.6177, "encoder_q-layer.7": 1786.449, "encoder_q-layer.8": 2439.9951, "encoder_q-layer.9": 2467.457, "epoch": 0.02, "inbatch_neg_score": 0.5454, "inbatch_pos_score": 1.1113, "learning_rate": 1.2e-05, "loss": 6.4573, "norm_diff": 3.1655, "norm_loss": 0.0, "num_token_doc": 66.8891, "num_token_overlap": 18.0824, "num_token_query": 52.3041, "num_token_union": 73.4736, "num_word_context": 202.3044, "num_word_doc": 49.9328, "num_word_query": 39.8794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2850.6119, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.541, "query_norm": 1.7791, "queue_k_norm": 4.9754, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3041, "sent_len_1": 66.8891, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7612, "stdk": 0.0795, "stdq": 0.0497, "stdqueue_k": 0.0802, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 31.0547, "active_queue_size": 16384.0, "cl_loss": 6.4104, "doc_norm": 4.7438, "encoder_q-embeddings": 1228.3041, "encoder_q-layer.0": 1003.0634, "encoder_q-layer.1": 1442.1827, "encoder_q-layer.10": 7499.6509, "encoder_q-layer.11": 7006.8149, "encoder_q-layer.2": 1835.1804, "encoder_q-layer.3": 2303.7888, "encoder_q-layer.4": 3329.4648, "encoder_q-layer.5": 4553.3081, "encoder_q-layer.6": 5976.2231, "encoder_q-layer.7": 7367.3521, "encoder_q-layer.8": 8523.3857, "encoder_q-layer.9": 7372.3223, "epoch": 0.02, "inbatch_neg_score": 1.2361, "inbatch_pos_score": 1.8213, "learning_rate": 1.25e-05, "loss": 6.4104, "norm_diff": 2.8948, "norm_loss": 0.0, "num_token_doc": 66.9673, "num_token_overlap": 18.0914, "num_token_query": 52.4715, "num_token_union": 73.6254, "num_word_context": 202.3481, "num_word_doc": 49.9199, "num_word_query": 40.0011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7076.2777, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2266, "query_norm": 1.849, "queue_k_norm": 4.7744, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4715, "sent_len_1": 66.9673, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9013, "stdk": 0.0767, "stdq": 0.0516, "stdqueue_k": 0.0775, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 36.6211, "active_queue_size": 16384.0, "cl_loss": 6.261, "doc_norm": 4.5262, "encoder_q-embeddings": 3069.6294, "encoder_q-layer.0": 2304.7158, "encoder_q-layer.1": 2612.4475, "encoder_q-layer.10": 7302.3975, "encoder_q-layer.11": 6216.4458, "encoder_q-layer.2": 2715.8494, "encoder_q-layer.3": 2841.5242, "encoder_q-layer.4": 3224.9717, "encoder_q-layer.5": 3317.7512, "encoder_q-layer.6": 3966.436, "encoder_q-layer.7": 4829.4346, "encoder_q-layer.8": 5311.9976, "encoder_q-layer.9": 5728.8311, "epoch": 0.03, "inbatch_neg_score": 0.3889, "inbatch_pos_score": 0.9824, "learning_rate": 1.3000000000000001e-05, "loss": 6.261, "norm_diff": 2.7386, "norm_loss": 0.0, "num_token_doc": 66.8063, "num_token_overlap": 17.9595, "num_token_query": 52.0307, "num_token_union": 73.3968, "num_word_context": 202.3524, "num_word_doc": 49.8819, "num_word_query": 39.6373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5923.3513, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3823, "query_norm": 1.7876, "queue_k_norm": 4.5741, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0307, "sent_len_1": 66.8063, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.13, "stdk": 0.0749, "stdq": 0.0512, "stdqueue_k": 0.0754, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 33.0078, "active_queue_size": 16384.0, "cl_loss": 6.0784, "doc_norm": 4.3438, "encoder_q-embeddings": 961.2827, "encoder_q-layer.0": 753.139, "encoder_q-layer.1": 939.8614, "encoder_q-layer.10": 5236.5659, "encoder_q-layer.11": 4779.6021, "encoder_q-layer.2": 1152.1238, "encoder_q-layer.3": 1265.2135, "encoder_q-layer.4": 1655.407, "encoder_q-layer.5": 2104.1787, "encoder_q-layer.6": 2592.8101, "encoder_q-layer.7": 3438.3079, "encoder_q-layer.8": 3999.1631, "encoder_q-layer.9": 4104.7578, "epoch": 0.03, "inbatch_neg_score": 0.7254, "inbatch_pos_score": 1.2725, "learning_rate": 1.3500000000000001e-05, "loss": 6.0784, "norm_diff": 2.5469, "norm_loss": 0.0, "num_token_doc": 66.6456, "num_token_overlap": 17.8992, "num_token_query": 52.1976, "num_token_union": 73.4525, "num_word_context": 202.5186, "num_word_doc": 49.7342, "num_word_query": 39.7862, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3845.7452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7144, "query_norm": 1.7969, "queue_k_norm": 4.3594, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1976, "sent_len_1": 66.6456, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7988, "stdk": 0.0726, "stdq": 0.0517, "stdqueue_k": 0.0736, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 29.8828, "active_queue_size": 16384.0, "cl_loss": 6.0316, "doc_norm": 4.1389, "encoder_q-embeddings": 1860.8976, "encoder_q-layer.0": 1587.0052, "encoder_q-layer.1": 2124.5542, "encoder_q-layer.10": 21765.2617, "encoder_q-layer.11": 16281.9834, "encoder_q-layer.2": 2823.9678, "encoder_q-layer.3": 3571.5801, "encoder_q-layer.4": 5120.584, "encoder_q-layer.5": 6981.1704, "encoder_q-layer.6": 9179.3447, "encoder_q-layer.7": 12789.3916, "encoder_q-layer.8": 15725.4512, "encoder_q-layer.9": 17491.9023, "epoch": 0.03, "inbatch_neg_score": 0.3398, "inbatch_pos_score": 0.9053, "learning_rate": 1.4000000000000001e-05, "loss": 6.0316, "norm_diff": 2.2759, "norm_loss": 0.0, "num_token_doc": 66.7342, "num_token_overlap": 17.9249, "num_token_query": 52.11, "num_token_union": 73.4111, "num_word_context": 202.0651, "num_word_doc": 49.7793, "num_word_query": 39.715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14571.5175, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3318, "query_norm": 1.863, "queue_k_norm": 4.1679, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.11, "sent_len_1": 66.7342, "sent_len_max_0": 128.0, "sent_len_max_1": 190.67, "stdk": 0.0713, "stdq": 0.0529, "stdqueue_k": 0.0717, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 35.5469, "active_queue_size": 16384.0, "cl_loss": 6.0309, "doc_norm": 3.9646, "encoder_q-embeddings": 1535.7506, "encoder_q-layer.0": 1250.3176, "encoder_q-layer.1": 1640.8939, "encoder_q-layer.10": 9847.6533, "encoder_q-layer.11": 7402.2637, "encoder_q-layer.2": 2091.98, "encoder_q-layer.3": 2548.9465, "encoder_q-layer.4": 3392.0554, "encoder_q-layer.5": 4481.6206, "encoder_q-layer.6": 5448.0942, "encoder_q-layer.7": 7184.23, "encoder_q-layer.8": 8653.4004, "encoder_q-layer.9": 9094.1748, "epoch": 0.03, "inbatch_neg_score": 0.3273, "inbatch_pos_score": 0.917, "learning_rate": 1.45e-05, "loss": 6.0309, "norm_diff": 2.1785, "norm_loss": 0.0, "num_token_doc": 66.742, "num_token_overlap": 18.0011, "num_token_query": 52.2046, "num_token_union": 73.4035, "num_word_context": 201.9603, "num_word_doc": 49.817, "num_word_query": 39.787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7694.7606, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3225, "query_norm": 1.786, "queue_k_norm": 3.9774, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2046, "sent_len_1": 66.742, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1387, "stdk": 0.0697, "stdq": 0.0511, "stdqueue_k": 0.0701, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 35.4492, "active_queue_size": 16384.0, "cl_loss": 5.7734, "doc_norm": 3.7769, "encoder_q-embeddings": 939.0315, "encoder_q-layer.0": 791.2817, "encoder_q-layer.1": 1041.7266, "encoder_q-layer.10": 8932.8457, "encoder_q-layer.11": 6228.9829, "encoder_q-layer.2": 1345.8134, "encoder_q-layer.3": 1546.2311, "encoder_q-layer.4": 2074.5291, "encoder_q-layer.5": 2880.7888, "encoder_q-layer.6": 3538.8323, "encoder_q-layer.7": 4681.5825, "encoder_q-layer.8": 5859.3057, "encoder_q-layer.9": 6869.6719, "epoch": 0.03, "inbatch_neg_score": 0.51, "inbatch_pos_score": 1.0762, "learning_rate": 1.5e-05, "loss": 5.7734, "norm_diff": 1.9514, "norm_loss": 0.0, "num_token_doc": 66.6644, "num_token_overlap": 18.0009, "num_token_query": 52.3341, "num_token_union": 73.4736, "num_word_context": 202.4115, "num_word_doc": 49.7942, "num_word_query": 39.9135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5718.0125, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5083, "query_norm": 1.8255, "queue_k_norm": 3.8029, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3341, "sent_len_1": 66.6644, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.1662, "stdk": 0.0688, "stdq": 0.0523, "stdqueue_k": 0.0683, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 34.4727, "active_queue_size": 16384.0, "cl_loss": 5.7978, "doc_norm": 3.6248, "encoder_q-embeddings": 1018.1485, "encoder_q-layer.0": 843.779, "encoder_q-layer.1": 976.2901, "encoder_q-layer.10": 5792.7129, "encoder_q-layer.11": 3997.3694, "encoder_q-layer.2": 1244.0455, "encoder_q-layer.3": 1411.3146, "encoder_q-layer.4": 1822.7322, "encoder_q-layer.5": 2326.1829, "encoder_q-layer.6": 3035.0518, "encoder_q-layer.7": 4145.9199, "encoder_q-layer.8": 5052.2681, "encoder_q-layer.9": 5185.7109, "epoch": 0.03, "inbatch_neg_score": 0.6035, "inbatch_pos_score": 1.1602, "learning_rate": 1.55e-05, "loss": 5.7978, "norm_diff": 1.7665, "norm_loss": 0.0, "num_token_doc": 66.8391, "num_token_overlap": 18.0517, "num_token_query": 52.2993, "num_token_union": 73.5167, "num_word_context": 202.4798, "num_word_doc": 49.9328, "num_word_query": 39.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4371.896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.8583, "queue_k_norm": 3.6387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2993, "sent_len_1": 66.8391, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5775, "stdk": 0.0663, "stdq": 0.0529, "stdqueue_k": 0.067, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 5.9762, "doc_norm": 3.457, "encoder_q-embeddings": 1347.6783, "encoder_q-layer.0": 1125.793, "encoder_q-layer.1": 1466.0334, "encoder_q-layer.10": 6342.3018, "encoder_q-layer.11": 4361.332, "encoder_q-layer.2": 1589.656, "encoder_q-layer.3": 1589.2555, "encoder_q-layer.4": 1992.0355, "encoder_q-layer.5": 2520.7549, "encoder_q-layer.6": 3145.282, "encoder_q-layer.7": 4305.3779, "encoder_q-layer.8": 5213.7686, "encoder_q-layer.9": 5636.4238, "epoch": 0.03, "inbatch_neg_score": 0.4616, "inbatch_pos_score": 1.0293, "learning_rate": 1.6000000000000003e-05, "loss": 5.9762, "norm_diff": 1.6271, "norm_loss": 0.0, "num_token_doc": 66.5558, "num_token_overlap": 17.9557, "num_token_query": 52.0857, "num_token_union": 73.2734, "num_word_context": 202.2039, "num_word_doc": 49.701, "num_word_query": 39.7279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4711.4966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4561, "query_norm": 1.8298, "queue_k_norm": 3.4835, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0857, "sent_len_1": 66.5558, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.5687, "stdk": 0.0658, "stdq": 0.0517, "stdqueue_k": 0.066, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 35.7422, "active_queue_size": 16384.0, "cl_loss": 5.7341, "doc_norm": 3.3579, "encoder_q-embeddings": 1193.9534, "encoder_q-layer.0": 1043.8744, "encoder_q-layer.1": 1535.4781, "encoder_q-layer.10": 20272.4531, "encoder_q-layer.11": 12756.3086, "encoder_q-layer.2": 2176.4358, "encoder_q-layer.3": 2728.8909, "encoder_q-layer.4": 3807.4646, "encoder_q-layer.5": 5291.7451, "encoder_q-layer.6": 6912.4277, "encoder_q-layer.7": 9918.792, "encoder_q-layer.8": 12675.8037, "encoder_q-layer.9": 15965.1035, "epoch": 0.03, "inbatch_neg_score": 0.8932, "inbatch_pos_score": 1.4785, "learning_rate": 1.65e-05, "loss": 5.7341, "norm_diff": 1.491, "norm_loss": 0.0, "num_token_doc": 66.8474, "num_token_overlap": 18.0156, "num_token_query": 52.2779, "num_token_union": 73.494, "num_word_context": 202.6073, "num_word_doc": 49.871, "num_word_query": 39.8744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12274.9006, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8931, "query_norm": 1.8669, "queue_k_norm": 3.3516, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2779, "sent_len_1": 66.8474, "sent_len_max_0": 128.0, "sent_len_max_1": 190.38, "stdk": 0.0657, "stdq": 0.0535, "stdqueue_k": 0.0649, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 37.5, "active_queue_size": 16384.0, "cl_loss": 5.6616, "doc_norm": 3.1838, "encoder_q-embeddings": 1027.1647, "encoder_q-layer.0": 905.506, "encoder_q-layer.1": 1165.1459, "encoder_q-layer.10": 7391.4146, "encoder_q-layer.11": 4727.2515, "encoder_q-layer.2": 1402.7567, "encoder_q-layer.3": 1659.5513, "encoder_q-layer.4": 2241.5437, "encoder_q-layer.5": 3043.0103, "encoder_q-layer.6": 3568.8708, "encoder_q-layer.7": 4448.0596, "encoder_q-layer.8": 5398.9331, "encoder_q-layer.9": 6011.6445, "epoch": 0.03, "inbatch_neg_score": 0.5554, "inbatch_pos_score": 1.1143, "learning_rate": 1.7000000000000003e-05, "loss": 5.6616, "norm_diff": 1.3194, "norm_loss": 0.0, "num_token_doc": 66.8695, "num_token_overlap": 17.9962, "num_token_query": 52.2753, "num_token_union": 73.5485, "num_word_context": 202.6341, "num_word_doc": 49.8609, "num_word_query": 39.8546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5073.6602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5474, "query_norm": 1.8645, "queue_k_norm": 3.2131, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2753, "sent_len_1": 66.8695, "sent_len_max_0": 128.0, "sent_len_max_1": 192.0, "stdk": 0.0632, "stdq": 0.0528, "stdqueue_k": 0.0642, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 35.5469, "active_queue_size": 16384.0, "cl_loss": 5.5859, "doc_norm": 3.0826, "encoder_q-embeddings": 818.4747, "encoder_q-layer.0": 726.8892, "encoder_q-layer.1": 943.3179, "encoder_q-layer.10": 8309.334, "encoder_q-layer.11": 5460.9414, "encoder_q-layer.2": 1097.3339, "encoder_q-layer.3": 1324.9836, "encoder_q-layer.4": 1642.0099, "encoder_q-layer.5": 2221.7148, "encoder_q-layer.6": 2841.1851, "encoder_q-layer.7": 4098.5723, "encoder_q-layer.8": 5411.8442, "encoder_q-layer.9": 6838.8574, "epoch": 0.03, "inbatch_neg_score": 0.673, "inbatch_pos_score": 1.2246, "learning_rate": 1.75e-05, "loss": 5.5859, "norm_diff": 1.2563, "norm_loss": 0.0, "num_token_doc": 66.825, "num_token_overlap": 17.928, "num_token_query": 51.9961, "num_token_union": 73.4152, "num_word_context": 202.1206, "num_word_doc": 49.8707, "num_word_query": 39.602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5215.9424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6675, "query_norm": 1.8263, "queue_k_norm": 3.0892, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 51.9961, "sent_len_1": 66.825, "sent_len_max_0": 127.995, "sent_len_max_1": 188.2212, "stdk": 0.0634, "stdq": 0.052, "stdqueue_k": 0.0634, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 5.3554, "doc_norm": 2.9559, "encoder_q-embeddings": 1295.7972, "encoder_q-layer.0": 1096.0, "encoder_q-layer.1": 1292.5148, "encoder_q-layer.10": 11171.9473, "encoder_q-layer.11": 7398.6089, "encoder_q-layer.2": 1585.9028, "encoder_q-layer.3": 1605.834, "encoder_q-layer.4": 2005.3605, "encoder_q-layer.5": 2562.5317, "encoder_q-layer.6": 3228.7754, "encoder_q-layer.7": 4686.4517, "encoder_q-layer.8": 6344.9858, "encoder_q-layer.9": 8152.4297, "epoch": 0.04, "inbatch_neg_score": 0.5011, "inbatch_pos_score": 1.084, "learning_rate": 1.8e-05, "loss": 5.3554, "norm_diff": 1.1575, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 17.9908, "num_token_query": 52.2093, "num_token_union": 73.4806, "num_word_context": 202.5946, "num_word_doc": 49.8785, "num_word_query": 39.8333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6570.7405, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4968, "query_norm": 1.7984, "queue_k_norm": 2.98, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2093, "sent_len_1": 66.8275, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1, "stdk": 0.062, "stdq": 0.0514, "stdqueue_k": 0.0625, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 5.2684, "doc_norm": 2.8625, "encoder_q-embeddings": 1040.6603, "encoder_q-layer.0": 876.5811, "encoder_q-layer.1": 1123.2797, "encoder_q-layer.10": 9848.5771, "encoder_q-layer.11": 6506.0933, "encoder_q-layer.2": 1418.0811, "encoder_q-layer.3": 1535.2889, "encoder_q-layer.4": 2043.96, "encoder_q-layer.5": 2869.3335, "encoder_q-layer.6": 3467.6594, "encoder_q-layer.7": 4630.0493, "encoder_q-layer.8": 5922.3022, "encoder_q-layer.9": 7155.7358, "epoch": 0.04, "inbatch_neg_score": 0.7093, "inbatch_pos_score": 1.3096, "learning_rate": 1.85e-05, "loss": 5.2684, "norm_diff": 1.0267, "norm_loss": 0.0, "num_token_doc": 66.7956, "num_token_overlap": 17.9812, "num_token_query": 52.2175, "num_token_union": 73.5088, "num_word_context": 202.33, "num_word_doc": 49.8143, "num_word_query": 39.8248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5949.2087, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7036, "query_norm": 1.8358, "queue_k_norm": 2.8764, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2175, "sent_len_1": 66.7956, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.2038, "stdk": 0.0611, "stdq": 0.0515, "stdqueue_k": 0.062, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 36.4258, "active_queue_size": 16384.0, "cl_loss": 5.0834, "doc_norm": 2.7874, "encoder_q-embeddings": 1856.2479, "encoder_q-layer.0": 1734.6997, "encoder_q-layer.1": 1621.5416, "encoder_q-layer.10": 14013.1992, "encoder_q-layer.11": 9263.2188, "encoder_q-layer.2": 1757.6921, "encoder_q-layer.3": 1724.5704, "encoder_q-layer.4": 2066.2007, "encoder_q-layer.5": 2622.635, "encoder_q-layer.6": 3817.311, "encoder_q-layer.7": 6097.2056, "encoder_q-layer.8": 8670.1592, "encoder_q-layer.9": 10577.9854, "epoch": 0.04, "inbatch_neg_score": 0.7171, "inbatch_pos_score": 1.3027, "learning_rate": 1.9e-05, "loss": 5.0834, "norm_diff": 0.9538, "norm_loss": 0.0, "num_token_doc": 66.967, "num_token_overlap": 18.087, "num_token_query": 52.3971, "num_token_union": 73.5807, "num_word_context": 202.181, "num_word_doc": 49.9092, "num_word_query": 39.9103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8342.2263, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.708, "query_norm": 1.8337, "queue_k_norm": 2.7752, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3971, "sent_len_1": 66.967, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.9212, "stdk": 0.0614, "stdq": 0.0514, "stdqueue_k": 0.0614, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.9657, "doc_norm": 2.6801, "encoder_q-embeddings": 589.9197, "encoder_q-layer.0": 500.4023, "encoder_q-layer.1": 544.0502, "encoder_q-layer.10": 896.5798, "encoder_q-layer.11": 1514.4922, "encoder_q-layer.2": 573.79, "encoder_q-layer.3": 558.6412, "encoder_q-layer.4": 613.6658, "encoder_q-layer.5": 585.743, "encoder_q-layer.6": 528.5283, "encoder_q-layer.7": 514.2408, "encoder_q-layer.8": 657.8213, "encoder_q-layer.9": 700.8271, "epoch": 0.04, "inbatch_neg_score": 0.6106, "inbatch_pos_score": 1.1934, "learning_rate": 1.9500000000000003e-05, "loss": 4.9657, "norm_diff": 0.9234, "norm_loss": 0.0, "num_token_doc": 66.8536, "num_token_overlap": 18.0404, "num_token_query": 52.3317, "num_token_union": 73.6054, "num_word_context": 202.4845, "num_word_doc": 49.909, "num_word_query": 39.878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1031.5656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.606, "query_norm": 1.7567, "queue_k_norm": 2.6829, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3317, "sent_len_1": 66.8536, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4425, "stdk": 0.0605, "stdq": 0.0486, "stdqueue_k": 0.0607, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.8337, "doc_norm": 2.5953, "encoder_q-embeddings": 1285.5925, "encoder_q-layer.0": 972.2379, "encoder_q-layer.1": 1110.8724, "encoder_q-layer.10": 7364.0586, "encoder_q-layer.11": 5860.7896, "encoder_q-layer.2": 1246.2627, "encoder_q-layer.3": 1321.4623, "encoder_q-layer.4": 1451.8955, "encoder_q-layer.5": 1627.2131, "encoder_q-layer.6": 1941.4235, "encoder_q-layer.7": 2865.7632, "encoder_q-layer.8": 4412.6069, "encoder_q-layer.9": 5489.7451, "epoch": 0.04, "inbatch_neg_score": 0.5681, "inbatch_pos_score": 1.1387, "learning_rate": 2e-05, "loss": 4.8337, "norm_diff": 0.8629, "norm_loss": 0.0, "num_token_doc": 66.856, "num_token_overlap": 18.0205, "num_token_query": 52.3173, "num_token_union": 73.5719, "num_word_context": 202.8391, "num_word_doc": 49.8716, "num_word_query": 39.8988, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4591.3961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5654, "query_norm": 1.7324, "queue_k_norm": 2.5904, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3173, "sent_len_1": 66.856, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3487, "stdk": 0.0601, "stdq": 0.0493, "stdqueue_k": 0.06, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 4.7633, "doc_norm": 2.5005, "encoder_q-embeddings": 1411.9913, "encoder_q-layer.0": 1204.0906, "encoder_q-layer.1": 1371.54, "encoder_q-layer.10": 6297.3252, "encoder_q-layer.11": 7283.0771, "encoder_q-layer.2": 1469.6299, "encoder_q-layer.3": 1412.6084, "encoder_q-layer.4": 1525.9324, "encoder_q-layer.5": 1637.228, "encoder_q-layer.6": 1948.6201, "encoder_q-layer.7": 2640.3396, "encoder_q-layer.8": 3851.4016, "encoder_q-layer.9": 4644.083, "epoch": 0.04, "inbatch_neg_score": 0.6042, "inbatch_pos_score": 1.1738, "learning_rate": 2.05e-05, "loss": 4.7633, "norm_diff": 0.7917, "norm_loss": 0.0, "num_token_doc": 66.7433, "num_token_overlap": 17.9716, "num_token_query": 52.2008, "num_token_union": 73.467, "num_word_context": 202.1415, "num_word_doc": 49.7863, "num_word_query": 39.761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4654.9939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6016, "query_norm": 1.7088, "queue_k_norm": 2.5128, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2008, "sent_len_1": 66.7433, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3862, "stdk": 0.0591, "stdq": 0.0489, "stdqueue_k": 0.0596, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.7235, "doc_norm": 2.415, "encoder_q-embeddings": 3184.1003, "encoder_q-layer.0": 2505.7205, "encoder_q-layer.1": 2649.7585, "encoder_q-layer.10": 13332.8877, "encoder_q-layer.11": 10437.5078, "encoder_q-layer.2": 2779.3767, "encoder_q-layer.3": 2539.177, "encoder_q-layer.4": 2699.4414, "encoder_q-layer.5": 3006.7854, "encoder_q-layer.6": 3852.6348, "encoder_q-layer.7": 5546.6772, "encoder_q-layer.8": 7076.1851, "encoder_q-layer.9": 9024.5508, "epoch": 0.04, "inbatch_neg_score": 0.5647, "inbatch_pos_score": 1.1465, "learning_rate": 2.1e-05, "loss": 4.7235, "norm_diff": 0.6895, "norm_loss": 0.0, "num_token_doc": 66.6823, "num_token_overlap": 17.927, "num_token_query": 52.0303, "num_token_union": 73.3402, "num_word_context": 202.1415, "num_word_doc": 49.7202, "num_word_query": 39.6467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8298.8304, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5605, "query_norm": 1.7255, "queue_k_norm": 2.431, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0303, "sent_len_1": 66.6823, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.225, "stdk": 0.0582, "stdq": 0.0494, "stdqueue_k": 0.0589, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.5708, "doc_norm": 2.3573, "encoder_q-embeddings": 1540.8727, "encoder_q-layer.0": 1195.2605, "encoder_q-layer.1": 1386.1835, "encoder_q-layer.10": 2154.1421, "encoder_q-layer.11": 3461.0586, "encoder_q-layer.2": 1561.8086, "encoder_q-layer.3": 1486.5944, "encoder_q-layer.4": 1389.1383, "encoder_q-layer.5": 1274.3734, "encoder_q-layer.6": 1079.1085, "encoder_q-layer.7": 1062.6227, "encoder_q-layer.8": 1298.2061, "encoder_q-layer.9": 1288.9952, "epoch": 0.04, "inbatch_neg_score": 0.4611, "inbatch_pos_score": 1.0391, "learning_rate": 2.15e-05, "loss": 4.5708, "norm_diff": 0.6662, "norm_loss": 0.0, "num_token_doc": 66.9107, "num_token_overlap": 18.0451, "num_token_query": 52.2892, "num_token_union": 73.546, "num_word_context": 202.1841, "num_word_doc": 49.9606, "num_word_query": 39.886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2387.3492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4583, "query_norm": 1.6911, "queue_k_norm": 2.3623, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2892, "sent_len_1": 66.9107, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1362, "stdk": 0.0586, "stdq": 0.0476, "stdqueue_k": 0.0584, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.4632, "doc_norm": 2.2798, "encoder_q-embeddings": 1483.688, "encoder_q-layer.0": 1240.2452, "encoder_q-layer.1": 1407.0083, "encoder_q-layer.10": 5229.0283, "encoder_q-layer.11": 4570.4858, "encoder_q-layer.2": 1602.5381, "encoder_q-layer.3": 1606.8077, "encoder_q-layer.4": 1687.3057, "encoder_q-layer.5": 2201.1382, "encoder_q-layer.6": 2503.7063, "encoder_q-layer.7": 3208.2388, "encoder_q-layer.8": 4368.1465, "encoder_q-layer.9": 4484.1543, "epoch": 0.04, "inbatch_neg_score": 0.5815, "inbatch_pos_score": 1.168, "learning_rate": 2.2000000000000003e-05, "loss": 4.4632, "norm_diff": 0.5738, "norm_loss": 0.0, "num_token_doc": 66.5912, "num_token_overlap": 17.9457, "num_token_query": 52.1427, "num_token_union": 73.3831, "num_word_context": 202.1368, "num_word_doc": 49.7181, "num_word_query": 39.7686, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4096.1476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5771, "query_norm": 1.706, "queue_k_norm": 2.2915, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1427, "sent_len_1": 66.5912, "sent_len_max_0": 127.9838, "sent_len_max_1": 187.8313, "stdk": 0.0577, "stdq": 0.0474, "stdqueue_k": 0.0579, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.3778, "doc_norm": 2.2078, "encoder_q-embeddings": 744.959, "encoder_q-layer.0": 592.4818, "encoder_q-layer.1": 674.187, "encoder_q-layer.10": 1399.9341, "encoder_q-layer.11": 2443.9329, "encoder_q-layer.2": 773.5784, "encoder_q-layer.3": 793.9663, "encoder_q-layer.4": 850.0956, "encoder_q-layer.5": 927.044, "encoder_q-layer.6": 876.9264, "encoder_q-layer.7": 776.5609, "encoder_q-layer.8": 972.3915, "encoder_q-layer.9": 971.7573, "epoch": 0.04, "inbatch_neg_score": 0.5543, "inbatch_pos_score": 1.1602, "learning_rate": 2.25e-05, "loss": 4.3778, "norm_diff": 0.5532, "norm_loss": 0.0, "num_token_doc": 66.8162, "num_token_overlap": 17.9513, "num_token_query": 52.1511, "num_token_union": 73.4919, "num_word_context": 202.4528, "num_word_doc": 49.8585, "num_word_query": 39.7429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1549.1427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5522, "query_norm": 1.6546, "queue_k_norm": 2.2186, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1511, "sent_len_1": 66.8162, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3587, "stdk": 0.0567, "stdq": 0.0464, "stdqueue_k": 0.0571, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.3109, "doc_norm": 2.1453, "encoder_q-embeddings": 3091.9998, "encoder_q-layer.0": 2317.698, "encoder_q-layer.1": 2710.408, "encoder_q-layer.10": 3814.189, "encoder_q-layer.11": 3972.3022, "encoder_q-layer.2": 2520.7441, "encoder_q-layer.3": 2292.0972, "encoder_q-layer.4": 1767.6027, "encoder_q-layer.5": 1525.7667, "encoder_q-layer.6": 1365.7971, "encoder_q-layer.7": 1580.5341, "encoder_q-layer.8": 2058.4165, "encoder_q-layer.9": 2750.5454, "epoch": 0.04, "inbatch_neg_score": 0.5367, "inbatch_pos_score": 1.1299, "learning_rate": 2.3000000000000003e-05, "loss": 4.3109, "norm_diff": 0.4539, "norm_loss": 0.0, "num_token_doc": 66.7158, "num_token_overlap": 17.9559, "num_token_query": 52.1041, "num_token_union": 73.4134, "num_word_context": 202.367, "num_word_doc": 49.788, "num_word_query": 39.7088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3691.7706, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5371, "query_norm": 1.6914, "queue_k_norm": 2.1547, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1041, "sent_len_1": 66.7158, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3, "stdk": 0.0564, "stdq": 0.0468, "stdqueue_k": 0.0566, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2229, "doc_norm": 2.087, "encoder_q-embeddings": 2458.8745, "encoder_q-layer.0": 2101.2925, "encoder_q-layer.1": 2165.2539, "encoder_q-layer.10": 2833.3618, "encoder_q-layer.11": 2959.6367, "encoder_q-layer.2": 1931.6123, "encoder_q-layer.3": 1865.0897, "encoder_q-layer.4": 1809.9536, "encoder_q-layer.5": 1743.4486, "encoder_q-layer.6": 1736.7206, "encoder_q-layer.7": 1686.5924, "encoder_q-layer.8": 1760.5912, "encoder_q-layer.9": 2063.2234, "epoch": 0.05, "inbatch_neg_score": 0.5961, "inbatch_pos_score": 1.1621, "learning_rate": 2.35e-05, "loss": 4.2229, "norm_diff": 0.443, "norm_loss": 0.0, "num_token_doc": 66.7928, "num_token_overlap": 18.0427, "num_token_query": 52.2911, "num_token_union": 73.519, "num_word_context": 202.2811, "num_word_doc": 49.862, "num_word_query": 39.9007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3067.2597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5898, "query_norm": 1.6441, "queue_k_norm": 2.0959, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2911, "sent_len_1": 66.7928, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.2125, "stdk": 0.056, "stdq": 0.0461, "stdqueue_k": 0.0561, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1639, "doc_norm": 2.0372, "encoder_q-embeddings": 1728.0614, "encoder_q-layer.0": 1460.1021, "encoder_q-layer.1": 1404.4978, "encoder_q-layer.10": 1509.4651, "encoder_q-layer.11": 2507.5122, "encoder_q-layer.2": 1308.463, "encoder_q-layer.3": 1107.2489, "encoder_q-layer.4": 938.7265, "encoder_q-layer.5": 933.2557, "encoder_q-layer.6": 809.7473, "encoder_q-layer.7": 762.3388, "encoder_q-layer.8": 962.5991, "encoder_q-layer.9": 935.0524, "epoch": 0.05, "inbatch_neg_score": 0.5381, "inbatch_pos_score": 1.1006, "learning_rate": 2.4e-05, "loss": 4.1639, "norm_diff": 0.3879, "norm_loss": 0.0, "num_token_doc": 66.5727, "num_token_overlap": 17.9843, "num_token_query": 52.29, "num_token_union": 73.4143, "num_word_context": 202.0932, "num_word_doc": 49.6512, "num_word_query": 39.8496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2002.93, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5322, "query_norm": 1.6493, "queue_k_norm": 2.0388, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.29, "sent_len_1": 66.5727, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6325, "stdk": 0.0553, "stdq": 0.0454, "stdqueue_k": 0.0553, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0879, "doc_norm": 1.9886, "encoder_q-embeddings": 1449.506, "encoder_q-layer.0": 1194.4265, "encoder_q-layer.1": 1153.3744, "encoder_q-layer.10": 3308.3218, "encoder_q-layer.11": 3610.6165, "encoder_q-layer.2": 1188.96, "encoder_q-layer.3": 1050.5287, "encoder_q-layer.4": 918.83, "encoder_q-layer.5": 915.1978, "encoder_q-layer.6": 994.8553, "encoder_q-layer.7": 1473.163, "encoder_q-layer.8": 2480.3083, "encoder_q-layer.9": 2942.449, "epoch": 0.05, "inbatch_neg_score": 0.6111, "inbatch_pos_score": 1.1982, "learning_rate": 2.45e-05, "loss": 4.0879, "norm_diff": 0.3502, "norm_loss": 0.0, "num_token_doc": 67.0279, "num_token_overlap": 18.0058, "num_token_query": 52.2107, "num_token_union": 73.5823, "num_word_context": 202.4099, "num_word_doc": 50.0158, "num_word_query": 39.8062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2731.3581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6069, "query_norm": 1.6385, "queue_k_norm": 1.9885, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2107, "sent_len_1": 67.0279, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3013, "stdk": 0.0548, "stdq": 0.0456, "stdqueue_k": 0.0547, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 4.0486, "doc_norm": 1.9465, "encoder_q-embeddings": 796.1694, "encoder_q-layer.0": 592.9722, "encoder_q-layer.1": 674.3293, "encoder_q-layer.10": 1431.9382, "encoder_q-layer.11": 2292.3196, "encoder_q-layer.2": 756.8854, "encoder_q-layer.3": 759.8987, "encoder_q-layer.4": 767.549, "encoder_q-layer.5": 722.26, "encoder_q-layer.6": 727.6249, "encoder_q-layer.7": 755.8387, "encoder_q-layer.8": 942.3444, "encoder_q-layer.9": 1041.1147, "epoch": 0.05, "inbatch_neg_score": 0.6229, "inbatch_pos_score": 1.1973, "learning_rate": 2.5e-05, "loss": 4.0486, "norm_diff": 0.3161, "norm_loss": 0.0, "num_token_doc": 66.7039, "num_token_overlap": 17.9841, "num_token_query": 52.3046, "num_token_union": 73.4929, "num_word_context": 202.4573, "num_word_doc": 49.7829, "num_word_query": 39.8721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1487.8462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6182, "query_norm": 1.6304, "queue_k_norm": 1.9481, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3046, "sent_len_1": 66.7039, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3075, "stdk": 0.0543, "stdq": 0.0453, "stdqueue_k": 0.0543, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.9857, "doc_norm": 1.9069, "encoder_q-embeddings": 2619.5793, "encoder_q-layer.0": 2190.9944, "encoder_q-layer.1": 2665.5818, "encoder_q-layer.10": 2128.9734, "encoder_q-layer.11": 2544.0789, "encoder_q-layer.2": 2875.6541, "encoder_q-layer.3": 2741.082, "encoder_q-layer.4": 2724.7378, "encoder_q-layer.5": 2663.9563, "encoder_q-layer.6": 2041.2897, "encoder_q-layer.7": 2163.4863, "encoder_q-layer.8": 2738.241, "encoder_q-layer.9": 2350.0667, "epoch": 0.05, "inbatch_neg_score": 0.6021, "inbatch_pos_score": 1.2383, "learning_rate": 2.5500000000000003e-05, "loss": 3.9857, "norm_diff": 0.2428, "norm_loss": 0.0, "num_token_doc": 66.7246, "num_token_overlap": 17.9835, "num_token_query": 52.1551, "num_token_union": 73.3918, "num_word_context": 202.0514, "num_word_doc": 49.7888, "num_word_query": 39.733, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3700.3475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6001, "query_norm": 1.6641, "queue_k_norm": 1.9114, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1551, "sent_len_1": 66.7246, "sent_len_max_0": 128.0, "sent_len_max_1": 188.32, "stdk": 0.0535, "stdq": 0.0469, "stdqueue_k": 0.0538, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.9756, "doc_norm": 1.8758, "encoder_q-embeddings": 810.1799, "encoder_q-layer.0": 608.2292, "encoder_q-layer.1": 614.2324, "encoder_q-layer.10": 1101.8271, "encoder_q-layer.11": 1928.5728, "encoder_q-layer.2": 662.1501, "encoder_q-layer.3": 665.2524, "encoder_q-layer.4": 692.8862, "encoder_q-layer.5": 679.6406, "encoder_q-layer.6": 663.5143, "encoder_q-layer.7": 633.84, "encoder_q-layer.8": 832.8558, "encoder_q-layer.9": 861.1954, "epoch": 0.05, "inbatch_neg_score": 0.6476, "inbatch_pos_score": 1.2637, "learning_rate": 2.6000000000000002e-05, "loss": 3.9756, "norm_diff": 0.2255, "norm_loss": 0.0, "num_token_doc": 66.8538, "num_token_overlap": 17.9792, "num_token_query": 52.2879, "num_token_union": 73.5681, "num_word_context": 202.6512, "num_word_doc": 49.9054, "num_word_query": 39.86, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1292.8549, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.647, "query_norm": 1.6503, "queue_k_norm": 1.8773, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2879, "sent_len_1": 66.8538, "sent_len_max_0": 127.99, "sent_len_max_1": 189.2012, "stdk": 0.0531, "stdq": 0.0458, "stdqueue_k": 0.0533, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9461, "doc_norm": 1.8489, "encoder_q-embeddings": 960.6561, "encoder_q-layer.0": 788.0892, "encoder_q-layer.1": 786.2452, "encoder_q-layer.10": 2056.0803, "encoder_q-layer.11": 2719.4199, "encoder_q-layer.2": 738.1932, "encoder_q-layer.3": 731.3597, "encoder_q-layer.4": 731.741, "encoder_q-layer.5": 752.1526, "encoder_q-layer.6": 772.3828, "encoder_q-layer.7": 992.4248, "encoder_q-layer.8": 1358.073, "encoder_q-layer.9": 1532.2605, "epoch": 0.05, "inbatch_neg_score": 0.6473, "inbatch_pos_score": 1.2705, "learning_rate": 2.6500000000000004e-05, "loss": 3.9461, "norm_diff": 0.1677, "norm_loss": 0.0, "num_token_doc": 66.811, "num_token_overlap": 17.9927, "num_token_query": 52.1751, "num_token_union": 73.4626, "num_word_context": 202.1871, "num_word_doc": 49.8699, "num_word_query": 39.7576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1808.6404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6465, "query_norm": 1.6812, "queue_k_norm": 1.8512, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1751, "sent_len_1": 66.811, "sent_len_max_0": 127.995, "sent_len_max_1": 190.325, "stdk": 0.0528, "stdq": 0.0466, "stdqueue_k": 0.0528, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8765, "doc_norm": 1.8215, "encoder_q-embeddings": 685.3734, "encoder_q-layer.0": 501.6205, "encoder_q-layer.1": 538.7556, "encoder_q-layer.10": 1297.9741, "encoder_q-layer.11": 1953.2251, "encoder_q-layer.2": 622.0534, "encoder_q-layer.3": 699.2536, "encoder_q-layer.4": 758.1068, "encoder_q-layer.5": 802.9736, "encoder_q-layer.6": 870.2025, "encoder_q-layer.7": 1089.6162, "encoder_q-layer.8": 1473.6344, "encoder_q-layer.9": 1248.7867, "epoch": 0.05, "inbatch_neg_score": 0.6633, "inbatch_pos_score": 1.2451, "learning_rate": 2.7000000000000002e-05, "loss": 3.8765, "norm_diff": 0.1827, "norm_loss": 0.0, "num_token_doc": 66.7015, "num_token_overlap": 17.9804, "num_token_query": 52.2369, "num_token_union": 73.432, "num_word_context": 202.0667, "num_word_doc": 49.7853, "num_word_query": 39.8133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1494.9533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6611, "query_norm": 1.6388, "queue_k_norm": 1.8289, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2369, "sent_len_1": 66.7015, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1413, "stdk": 0.0521, "stdq": 0.0451, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8519, "doc_norm": 1.8081, "encoder_q-embeddings": 1333.2208, "encoder_q-layer.0": 1120.0907, "encoder_q-layer.1": 1180.6318, "encoder_q-layer.10": 1116.7815, "encoder_q-layer.11": 2016.4489, "encoder_q-layer.2": 1290.9724, "encoder_q-layer.3": 1242.7988, "encoder_q-layer.4": 1209.6239, "encoder_q-layer.5": 1105.5863, "encoder_q-layer.6": 1095.7859, "encoder_q-layer.7": 1247.4955, "encoder_q-layer.8": 1431.4608, "encoder_q-layer.9": 941.5483, "epoch": 0.05, "inbatch_neg_score": 0.7354, "inbatch_pos_score": 1.3203, "learning_rate": 2.7500000000000004e-05, "loss": 3.8519, "norm_diff": 0.1371, "norm_loss": 0.0, "num_token_doc": 66.7347, "num_token_overlap": 18.0333, "num_token_query": 52.2885, "num_token_union": 73.399, "num_word_context": 201.989, "num_word_doc": 49.7503, "num_word_query": 39.8514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1891.7261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7334, "query_norm": 1.671, "queue_k_norm": 1.8092, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2885, "sent_len_1": 66.7347, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7237, "stdk": 0.0519, "stdq": 0.0454, "stdqueue_k": 0.052, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8197, "doc_norm": 1.7895, "encoder_q-embeddings": 717.5412, "encoder_q-layer.0": 545.7924, "encoder_q-layer.1": 588.8241, "encoder_q-layer.10": 1114.7803, "encoder_q-layer.11": 1928.4052, "encoder_q-layer.2": 654.6473, "encoder_q-layer.3": 666.8036, "encoder_q-layer.4": 679.9308, "encoder_q-layer.5": 655.9805, "encoder_q-layer.6": 685.9529, "encoder_q-layer.7": 813.5141, "encoder_q-layer.8": 1201.6143, "encoder_q-layer.9": 1084.2427, "epoch": 0.05, "inbatch_neg_score": 0.7316, "inbatch_pos_score": 1.3477, "learning_rate": 2.8000000000000003e-05, "loss": 3.8197, "norm_diff": 0.1017, "norm_loss": 0.0, "num_token_doc": 66.648, "num_token_overlap": 17.964, "num_token_query": 52.1663, "num_token_union": 73.4072, "num_word_context": 202.4451, "num_word_doc": 49.7497, "num_word_query": 39.7539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1368.631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7295, "query_norm": 1.6878, "queue_k_norm": 1.7928, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1663, "sent_len_1": 66.648, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0075, "stdk": 0.0515, "stdq": 0.0461, "stdqueue_k": 0.0516, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.787, "doc_norm": 1.7788, "encoder_q-embeddings": 733.2531, "encoder_q-layer.0": 546.9761, "encoder_q-layer.1": 590.6739, "encoder_q-layer.10": 1548.4563, "encoder_q-layer.11": 2307.9976, "encoder_q-layer.2": 623.1828, "encoder_q-layer.3": 656.9905, "encoder_q-layer.4": 668.5426, "encoder_q-layer.5": 688.9365, "encoder_q-layer.6": 725.2158, "encoder_q-layer.7": 763.3569, "encoder_q-layer.8": 953.3597, "encoder_q-layer.9": 1102.6742, "epoch": 0.06, "inbatch_neg_score": 0.7093, "inbatch_pos_score": 1.3213, "learning_rate": 2.8499999999999998e-05, "loss": 3.787, "norm_diff": 0.0768, "norm_loss": 0.0, "num_token_doc": 67.011, "num_token_overlap": 18.0462, "num_token_query": 52.3533, "num_token_union": 73.6916, "num_word_context": 202.9347, "num_word_doc": 50.0273, "num_word_query": 39.9391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1438.156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.707, "query_norm": 1.702, "queue_k_norm": 1.7804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3533, "sent_len_1": 67.011, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.8375, "stdk": 0.0511, "stdq": 0.0468, "stdqueue_k": 0.0514, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.7662, "doc_norm": 1.7701, "encoder_q-embeddings": 979.0941, "encoder_q-layer.0": 802.65, "encoder_q-layer.1": 764.2827, "encoder_q-layer.10": 1490.7095, "encoder_q-layer.11": 2210.1226, "encoder_q-layer.2": 830.1889, "encoder_q-layer.3": 843.3086, "encoder_q-layer.4": 811.6019, "encoder_q-layer.5": 751.0664, "encoder_q-layer.6": 767.8003, "encoder_q-layer.7": 697.8733, "encoder_q-layer.8": 845.7935, "encoder_q-layer.9": 960.5245, "epoch": 0.06, "inbatch_neg_score": 0.7472, "inbatch_pos_score": 1.3545, "learning_rate": 2.9e-05, "loss": 3.7662, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.746, "num_token_overlap": 18.0366, "num_token_query": 52.2834, "num_token_union": 73.4461, "num_word_context": 202.412, "num_word_doc": 49.7975, "num_word_query": 39.8485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1521.3952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7456, "query_norm": 1.7285, "queue_k_norm": 1.7698, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2834, "sent_len_1": 66.746, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1425, "stdk": 0.051, "stdq": 0.0462, "stdqueue_k": 0.0511, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.7093, "doc_norm": 1.7591, "encoder_q-embeddings": 733.3249, "encoder_q-layer.0": 533.579, "encoder_q-layer.1": 580.8271, "encoder_q-layer.10": 1082.2415, "encoder_q-layer.11": 1961.697, "encoder_q-layer.2": 657.0529, "encoder_q-layer.3": 665.1107, "encoder_q-layer.4": 650.0818, "encoder_q-layer.5": 589.5693, "encoder_q-layer.6": 587.7493, "encoder_q-layer.7": 664.4557, "encoder_q-layer.8": 1092.1206, "encoder_q-layer.9": 1022.4682, "epoch": 0.06, "inbatch_neg_score": 0.7506, "inbatch_pos_score": 1.375, "learning_rate": 2.95e-05, "loss": 3.7093, "norm_diff": 0.0737, "norm_loss": 0.0, "num_token_doc": 66.7915, "num_token_overlap": 18.038, "num_token_query": 52.3647, "num_token_union": 73.4775, "num_word_context": 202.5776, "num_word_doc": 49.8609, "num_word_query": 39.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1308.3495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7451, "query_norm": 1.6853, "queue_k_norm": 1.7615, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3647, "sent_len_1": 66.7915, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1463, "stdk": 0.0505, "stdq": 0.0456, "stdqueue_k": 0.0508, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6903, "doc_norm": 1.7517, "encoder_q-embeddings": 1515.6472, "encoder_q-layer.0": 1157.0629, "encoder_q-layer.1": 1183.5455, "encoder_q-layer.10": 2213.5945, "encoder_q-layer.11": 4179.5869, "encoder_q-layer.2": 1304.6915, "encoder_q-layer.3": 1361.296, "encoder_q-layer.4": 1373.8975, "encoder_q-layer.5": 1331.1426, "encoder_q-layer.6": 1455.8359, "encoder_q-layer.7": 1618.3252, "encoder_q-layer.8": 1941.1437, "encoder_q-layer.9": 1778.3922, "epoch": 0.06, "inbatch_neg_score": 0.7176, "inbatch_pos_score": 1.3135, "learning_rate": 3e-05, "loss": 3.6903, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.643, "num_token_overlap": 18.0231, "num_token_query": 52.217, "num_token_union": 73.3285, "num_word_context": 202.1583, "num_word_doc": 49.7294, "num_word_query": 39.7718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2727.3394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7148, "query_norm": 1.6935, "queue_k_norm": 1.7537, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.217, "sent_len_1": 66.643, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.565, "stdk": 0.0503, "stdq": 0.0449, "stdqueue_k": 0.0506, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6804, "doc_norm": 1.743, "encoder_q-embeddings": 1145.8638, "encoder_q-layer.0": 830.7459, "encoder_q-layer.1": 940.4113, "encoder_q-layer.10": 2271.4805, "encoder_q-layer.11": 3645.7773, "encoder_q-layer.2": 1097.8892, "encoder_q-layer.3": 1142.5581, "encoder_q-layer.4": 1229.7904, "encoder_q-layer.5": 1359.7439, "encoder_q-layer.6": 1505.501, "encoder_q-layer.7": 1614.9495, "encoder_q-layer.8": 1992.8392, "encoder_q-layer.9": 1686.7599, "epoch": 0.06, "inbatch_neg_score": 0.7301, "inbatch_pos_score": 1.3242, "learning_rate": 3.05e-05, "loss": 3.6804, "norm_diff": 0.0348, "norm_loss": 0.0, "num_token_doc": 66.7808, "num_token_overlap": 17.9362, "num_token_query": 52.1356, "num_token_union": 73.5124, "num_word_context": 202.338, "num_word_doc": 49.8688, "num_word_query": 39.7367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2461.3652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7295, "query_norm": 1.7082, "queue_k_norm": 1.7474, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1356, "sent_len_1": 66.7808, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2713, "stdk": 0.0501, "stdq": 0.0443, "stdqueue_k": 0.0505, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.6692, "doc_norm": 1.7454, "encoder_q-embeddings": 2955.1423, "encoder_q-layer.0": 2449.0413, "encoder_q-layer.1": 2511.7332, "encoder_q-layer.10": 2336.9102, "encoder_q-layer.11": 3239.7639, "encoder_q-layer.2": 2493.6252, "encoder_q-layer.3": 2695.6526, "encoder_q-layer.4": 2397.2954, "encoder_q-layer.5": 2032.2947, "encoder_q-layer.6": 1831.7852, "encoder_q-layer.7": 1981.1719, "encoder_q-layer.8": 2549.9961, "encoder_q-layer.9": 2293.3477, "epoch": 0.06, "inbatch_neg_score": 0.7162, "inbatch_pos_score": 1.332, "learning_rate": 3.1e-05, "loss": 3.6692, "norm_diff": 0.0348, "norm_loss": 0.0, "num_token_doc": 66.5641, "num_token_overlap": 17.9199, "num_token_query": 52.0238, "num_token_union": 73.2805, "num_word_context": 202.2851, "num_word_doc": 49.7016, "num_word_query": 39.6803, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3662.3609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7134, "query_norm": 1.7253, "queue_k_norm": 1.7389, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0238, "sent_len_1": 66.5641, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.63, "stdk": 0.0504, "stdq": 0.047, "stdqueue_k": 0.0501, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.6411, "doc_norm": 1.7287, "encoder_q-embeddings": 1019.8612, "encoder_q-layer.0": 701.0287, "encoder_q-layer.1": 760.0724, "encoder_q-layer.10": 2232.0015, "encoder_q-layer.11": 3680.0791, "encoder_q-layer.2": 875.3977, "encoder_q-layer.3": 917.1665, "encoder_q-layer.4": 962.2267, "encoder_q-layer.5": 965.5792, "encoder_q-layer.6": 1074.5494, "encoder_q-layer.7": 1226.276, "encoder_q-layer.8": 1559.657, "encoder_q-layer.9": 1754.901, "epoch": 0.06, "inbatch_neg_score": 0.7012, "inbatch_pos_score": 1.3213, "learning_rate": 3.15e-05, "loss": 3.6411, "norm_diff": 0.0126, "norm_loss": 0.0, "num_token_doc": 66.5803, "num_token_overlap": 17.9606, "num_token_query": 52.1101, "num_token_union": 73.2842, "num_word_context": 201.7388, "num_word_doc": 49.6573, "num_word_query": 39.706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2214.2636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6978, "query_norm": 1.7365, "queue_k_norm": 1.7337, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1101, "sent_len_1": 66.5803, "sent_len_max_0": 127.995, "sent_len_max_1": 190.475, "stdk": 0.0497, "stdq": 0.0457, "stdqueue_k": 0.05, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.6313, "doc_norm": 1.7236, "encoder_q-embeddings": 3595.3228, "encoder_q-layer.0": 3165.7598, "encoder_q-layer.1": 2924.2124, "encoder_q-layer.10": 2567.7251, "encoder_q-layer.11": 4125.7583, "encoder_q-layer.2": 2739.1382, "encoder_q-layer.3": 2908.9661, "encoder_q-layer.4": 2969.0959, "encoder_q-layer.5": 2667.0342, "encoder_q-layer.6": 2575.2649, "encoder_q-layer.7": 2496.0056, "encoder_q-layer.8": 3094.3931, "encoder_q-layer.9": 2545.2622, "epoch": 0.06, "inbatch_neg_score": 0.6718, "inbatch_pos_score": 1.2969, "learning_rate": 3.2000000000000005e-05, "loss": 3.6313, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.681, "num_token_overlap": 17.9937, "num_token_query": 52.3366, "num_token_union": 73.4453, "num_word_context": 202.0431, "num_word_doc": 49.7025, "num_word_query": 39.8664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4398.6265, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6689, "query_norm": 1.772, "queue_k_norm": 1.7249, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3366, "sent_len_1": 66.681, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6387, "stdk": 0.0496, "stdq": 0.0466, "stdqueue_k": 0.0497, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5766, "doc_norm": 1.7253, "encoder_q-embeddings": 2180.4094, "encoder_q-layer.0": 1668.12, "encoder_q-layer.1": 1814.0507, "encoder_q-layer.10": 2519.9885, "encoder_q-layer.11": 4518.7114, "encoder_q-layer.2": 1780.2548, "encoder_q-layer.3": 1632.2062, "encoder_q-layer.4": 1651.4982, "encoder_q-layer.5": 1429.2615, "encoder_q-layer.6": 1498.9116, "encoder_q-layer.7": 1688.5492, "encoder_q-layer.8": 2542.4126, "encoder_q-layer.9": 2249.054, "epoch": 0.06, "inbatch_neg_score": 0.7533, "inbatch_pos_score": 1.3857, "learning_rate": 3.2500000000000004e-05, "loss": 3.5766, "norm_diff": 0.0898, "norm_loss": 0.0, "num_token_doc": 66.952, "num_token_overlap": 18.0923, "num_token_query": 52.3415, "num_token_union": 73.4885, "num_word_context": 202.1101, "num_word_doc": 49.9139, "num_word_query": 39.8858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3250.9608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7471, "query_norm": 1.8151, "queue_k_norm": 1.7202, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3415, "sent_len_1": 66.952, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.5588, "stdk": 0.0498, "stdq": 0.0472, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.5867, "doc_norm": 1.718, "encoder_q-embeddings": 1349.2993, "encoder_q-layer.0": 1000.1704, "encoder_q-layer.1": 1061.4807, "encoder_q-layer.10": 1821.1442, "encoder_q-layer.11": 3076.9824, "encoder_q-layer.2": 1172.2655, "encoder_q-layer.3": 1165.3846, "encoder_q-layer.4": 1199.6989, "encoder_q-layer.5": 1189.5658, "encoder_q-layer.6": 1207.7919, "encoder_q-layer.7": 1129.6456, "encoder_q-layer.8": 1455.264, "encoder_q-layer.9": 1567.7983, "epoch": 0.06, "inbatch_neg_score": 0.7159, "inbatch_pos_score": 1.3193, "learning_rate": 3.3e-05, "loss": 3.5867, "norm_diff": 0.0876, "norm_loss": 0.0, "num_token_doc": 66.6716, "num_token_overlap": 17.9924, "num_token_query": 52.3318, "num_token_union": 73.4754, "num_word_context": 202.2937, "num_word_doc": 49.7193, "num_word_query": 39.9328, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2177.1313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.709, "query_norm": 1.8057, "queue_k_norm": 1.7125, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3318, "sent_len_1": 66.6716, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8575, "stdk": 0.0496, "stdq": 0.0458, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5676, "doc_norm": 1.7029, "encoder_q-embeddings": 1221.4403, "encoder_q-layer.0": 898.8995, "encoder_q-layer.1": 944.1845, "encoder_q-layer.10": 1762.5491, "encoder_q-layer.11": 3038.8445, "encoder_q-layer.2": 1002.9175, "encoder_q-layer.3": 1018.1263, "encoder_q-layer.4": 1080.6653, "encoder_q-layer.5": 1160.7869, "encoder_q-layer.6": 1291.1212, "encoder_q-layer.7": 1340.7944, "encoder_q-layer.8": 1568.3302, "encoder_q-layer.9": 1393.5925, "epoch": 0.07, "inbatch_neg_score": 0.7227, "inbatch_pos_score": 1.3662, "learning_rate": 3.35e-05, "loss": 3.5676, "norm_diff": 0.1833, "norm_loss": 0.0, "num_token_doc": 66.7598, "num_token_overlap": 18.0003, "num_token_query": 52.2951, "num_token_union": 73.5221, "num_word_context": 202.3733, "num_word_doc": 49.8366, "num_word_query": 39.8719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2102.6675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7173, "query_norm": 1.8862, "queue_k_norm": 1.709, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2951, "sent_len_1": 66.7598, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5362, "stdk": 0.0489, "stdq": 0.0477, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5461, "doc_norm": 1.704, "encoder_q-embeddings": 1108.1396, "encoder_q-layer.0": 821.0812, "encoder_q-layer.1": 886.642, "encoder_q-layer.10": 1726.1584, "encoder_q-layer.11": 3501.6685, "encoder_q-layer.2": 985.9771, "encoder_q-layer.3": 1048.3898, "encoder_q-layer.4": 1084.8806, "encoder_q-layer.5": 1040.2319, "encoder_q-layer.6": 1035.8303, "encoder_q-layer.7": 989.465, "encoder_q-layer.8": 1202.2926, "encoder_q-layer.9": 1212.074, "epoch": 0.07, "inbatch_neg_score": 0.7435, "inbatch_pos_score": 1.3828, "learning_rate": 3.4000000000000007e-05, "loss": 3.5461, "norm_diff": 0.1777, "norm_loss": 0.0, "num_token_doc": 66.6719, "num_token_overlap": 17.9435, "num_token_query": 52.0311, "num_token_union": 73.3581, "num_word_context": 202.0251, "num_word_doc": 49.7605, "num_word_query": 39.6512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2114.7365, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7363, "query_norm": 1.8818, "queue_k_norm": 1.7029, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0311, "sent_len_1": 66.6719, "sent_len_max_0": 128.0, "sent_len_max_1": 188.82, "stdk": 0.0491, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.501, "doc_norm": 1.6936, "encoder_q-embeddings": 1779.4674, "encoder_q-layer.0": 1355.7361, "encoder_q-layer.1": 1423.6484, "encoder_q-layer.10": 2054.124, "encoder_q-layer.11": 3519.2944, "encoder_q-layer.2": 1506.2272, "encoder_q-layer.3": 1470.6071, "encoder_q-layer.4": 1412.5099, "encoder_q-layer.5": 1382.2089, "encoder_q-layer.6": 1295.7087, "encoder_q-layer.7": 1126.5786, "encoder_q-layer.8": 1315.6216, "encoder_q-layer.9": 1309.1093, "epoch": 0.07, "inbatch_neg_score": 0.7925, "inbatch_pos_score": 1.4082, "learning_rate": 3.45e-05, "loss": 3.501, "norm_diff": 0.2504, "norm_loss": 0.0, "num_token_doc": 66.979, "num_token_overlap": 18.0298, "num_token_query": 52.1982, "num_token_union": 73.58, "num_word_context": 202.5507, "num_word_doc": 49.965, "num_word_query": 39.7918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2517.7253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 1.944, "queue_k_norm": 1.6999, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1982, "sent_len_1": 66.979, "sent_len_max_0": 127.995, "sent_len_max_1": 189.3375, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.5175, "doc_norm": 1.7046, "encoder_q-embeddings": 1458.1414, "encoder_q-layer.0": 1151.2115, "encoder_q-layer.1": 1321.9331, "encoder_q-layer.10": 1585.2061, "encoder_q-layer.11": 3050.6489, "encoder_q-layer.2": 1372.056, "encoder_q-layer.3": 1389.6268, "encoder_q-layer.4": 1474.4409, "encoder_q-layer.5": 1471.959, "encoder_q-layer.6": 1602.1737, "encoder_q-layer.7": 1617.5922, "encoder_q-layer.8": 1805.0961, "encoder_q-layer.9": 1293.7157, "epoch": 0.07, "inbatch_neg_score": 0.812, "inbatch_pos_score": 1.4033, "learning_rate": 3.5e-05, "loss": 3.5175, "norm_diff": 0.2399, "norm_loss": 0.0, "num_token_doc": 67.1162, "num_token_overlap": 17.9469, "num_token_query": 52.067, "num_token_union": 73.6456, "num_word_context": 202.077, "num_word_doc": 50.0875, "num_word_query": 39.7191, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2394.2881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8022, "query_norm": 1.9445, "queue_k_norm": 1.7015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.067, "sent_len_1": 67.1162, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3225, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4867, "doc_norm": 1.6969, "encoder_q-embeddings": 1102.8713, "encoder_q-layer.0": 803.2993, "encoder_q-layer.1": 894.0017, "encoder_q-layer.10": 1863.5406, "encoder_q-layer.11": 3329.2571, "encoder_q-layer.2": 977.9976, "encoder_q-layer.3": 1041.7112, "encoder_q-layer.4": 1235.4634, "encoder_q-layer.5": 1352.7734, "encoder_q-layer.6": 1761.209, "encoder_q-layer.7": 2058.7053, "encoder_q-layer.8": 2644.2231, "encoder_q-layer.9": 2020.4458, "epoch": 0.07, "inbatch_neg_score": 0.8109, "inbatch_pos_score": 1.457, "learning_rate": 3.55e-05, "loss": 3.4867, "norm_diff": 0.2674, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 18.0597, "num_token_query": 52.265, "num_token_union": 73.4683, "num_word_context": 202.5887, "num_word_doc": 49.8435, "num_word_query": 39.8481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2479.6675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8042, "query_norm": 1.9644, "queue_k_norm": 1.6992, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.265, "sent_len_1": 66.8337, "sent_len_max_0": 127.985, "sent_len_max_1": 189.2275, "stdk": 0.0486, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5035, "doc_norm": 1.7031, "encoder_q-embeddings": 1031.1124, "encoder_q-layer.0": 750.7416, "encoder_q-layer.1": 834.2281, "encoder_q-layer.10": 1399.8671, "encoder_q-layer.11": 2801.0107, "encoder_q-layer.2": 916.0114, "encoder_q-layer.3": 1014.2895, "encoder_q-layer.4": 1100.7228, "encoder_q-layer.5": 1120.7778, "encoder_q-layer.6": 1226.4656, "encoder_q-layer.7": 1200.0206, "encoder_q-layer.8": 1390.8119, "encoder_q-layer.9": 1227.3362, "epoch": 0.07, "inbatch_neg_score": 0.8162, "inbatch_pos_score": 1.4678, "learning_rate": 3.6e-05, "loss": 3.5035, "norm_diff": 0.2317, "norm_loss": 0.0, "num_token_doc": 66.9483, "num_token_overlap": 18.0125, "num_token_query": 52.2698, "num_token_union": 73.586, "num_word_context": 202.6156, "num_word_doc": 49.9963, "num_word_query": 39.8554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8105, "query_norm": 1.9348, "queue_k_norm": 1.7005, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2698, "sent_len_1": 66.9483, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.4975, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4983, "doc_norm": 1.7066, "encoder_q-embeddings": 968.9995, "encoder_q-layer.0": 677.6992, "encoder_q-layer.1": 727.0355, "encoder_q-layer.10": 1603.1718, "encoder_q-layer.11": 2703.9924, "encoder_q-layer.2": 831.7118, "encoder_q-layer.3": 897.2983, "encoder_q-layer.4": 940.5588, "encoder_q-layer.5": 958.7805, "encoder_q-layer.6": 1024.7289, "encoder_q-layer.7": 1019.7476, "encoder_q-layer.8": 1203.0677, "encoder_q-layer.9": 1261.8618, "epoch": 0.07, "inbatch_neg_score": 0.7899, "inbatch_pos_score": 1.415, "learning_rate": 3.65e-05, "loss": 3.4983, "norm_diff": 0.1881, "norm_loss": 0.0, "num_token_doc": 66.6275, "num_token_overlap": 17.9176, "num_token_query": 52.215, "num_token_union": 73.4383, "num_word_context": 202.597, "num_word_doc": 49.7684, "num_word_query": 39.861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1793.3194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7793, "query_norm": 1.8947, "queue_k_norm": 1.7015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.215, "sent_len_1": 66.6275, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.31, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.4648, "doc_norm": 1.6916, "encoder_q-embeddings": 3559.6189, "encoder_q-layer.0": 2977.2793, "encoder_q-layer.1": 3474.4097, "encoder_q-layer.10": 1730.3041, "encoder_q-layer.11": 3231.0151, "encoder_q-layer.2": 3294.4712, "encoder_q-layer.3": 3573.1218, "encoder_q-layer.4": 3777.365, "encoder_q-layer.5": 3411.8833, "encoder_q-layer.6": 3402.6104, "encoder_q-layer.7": 2446.7458, "encoder_q-layer.8": 2638.9915, "encoder_q-layer.9": 1934.8113, "epoch": 0.07, "inbatch_neg_score": 0.6891, "inbatch_pos_score": 1.2998, "learning_rate": 3.7e-05, "loss": 3.4648, "norm_diff": 0.1363, "norm_loss": 0.0, "num_token_doc": 66.8003, "num_token_overlap": 18.0549, "num_token_query": 52.3134, "num_token_union": 73.4822, "num_word_context": 202.2329, "num_word_doc": 49.8555, "num_word_query": 39.8923, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4666.0276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6802, "query_norm": 1.8279, "queue_k_norm": 1.6964, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3134, "sent_len_1": 66.8003, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3363, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.48, "doc_norm": 1.6973, "encoder_q-embeddings": 1096.2086, "encoder_q-layer.0": 834.7438, "encoder_q-layer.1": 897.7867, "encoder_q-layer.10": 1313.4913, "encoder_q-layer.11": 2651.0442, "encoder_q-layer.2": 941.6169, "encoder_q-layer.3": 967.218, "encoder_q-layer.4": 1007.1562, "encoder_q-layer.5": 1014.2838, "encoder_q-layer.6": 1073.1089, "encoder_q-layer.7": 1045.2676, "encoder_q-layer.8": 1249.7407, "encoder_q-layer.9": 1167.7211, "epoch": 0.07, "inbatch_neg_score": 0.6273, "inbatch_pos_score": 1.2705, "learning_rate": 3.7500000000000003e-05, "loss": 3.48, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.7358, "num_token_overlap": 18.0178, "num_token_query": 52.136, "num_token_union": 73.3443, "num_word_context": 202.1657, "num_word_doc": 49.7865, "num_word_query": 39.7278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1815.9739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6196, "query_norm": 1.8256, "queue_k_norm": 1.692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.136, "sent_len_1": 66.7358, "sent_len_max_0": 128.0, "sent_len_max_1": 188.89, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.4445, "doc_norm": 1.6819, "encoder_q-embeddings": 1064.8201, "encoder_q-layer.0": 749.0745, "encoder_q-layer.1": 804.0394, "encoder_q-layer.10": 1474.9808, "encoder_q-layer.11": 2641.376, "encoder_q-layer.2": 890.2878, "encoder_q-layer.3": 926.9206, "encoder_q-layer.4": 952.274, "encoder_q-layer.5": 950.0032, "encoder_q-layer.6": 971.8436, "encoder_q-layer.7": 962.8011, "encoder_q-layer.8": 1123.6045, "encoder_q-layer.9": 1180.3469, "epoch": 0.07, "inbatch_neg_score": 0.5666, "inbatch_pos_score": 1.2178, "learning_rate": 3.8e-05, "loss": 3.4445, "norm_diff": 0.2223, "norm_loss": 0.0, "num_token_doc": 66.8956, "num_token_overlap": 17.9383, "num_token_query": 52.1154, "num_token_union": 73.5115, "num_word_context": 202.0786, "num_word_doc": 49.8931, "num_word_query": 39.7306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1783.0969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5605, "query_norm": 1.9042, "queue_k_norm": 1.6857, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1154, "sent_len_1": 66.8956, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3925, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.407, "doc_norm": 1.6788, "encoder_q-embeddings": 1468.994, "encoder_q-layer.0": 1094.5305, "encoder_q-layer.1": 998.0657, "encoder_q-layer.10": 1818.653, "encoder_q-layer.11": 3486.9382, "encoder_q-layer.2": 1015.7263, "encoder_q-layer.3": 984.5717, "encoder_q-layer.4": 1020.3369, "encoder_q-layer.5": 1028.5087, "encoder_q-layer.6": 1098.705, "encoder_q-layer.7": 1062.6292, "encoder_q-layer.8": 1252.6908, "encoder_q-layer.9": 1210.4552, "epoch": 0.08, "inbatch_neg_score": 0.5011, "inbatch_pos_score": 1.1416, "learning_rate": 3.85e-05, "loss": 3.407, "norm_diff": 0.3341, "norm_loss": 0.0, "num_token_doc": 66.5866, "num_token_overlap": 17.9461, "num_token_query": 52.1281, "num_token_union": 73.3275, "num_word_context": 202.0498, "num_word_doc": 49.6422, "num_word_query": 39.687, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2222.8305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4907, "query_norm": 2.0129, "queue_k_norm": 1.6755, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1281, "sent_len_1": 66.5866, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.7212, "stdk": 0.0488, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.4161, "doc_norm": 1.6639, "encoder_q-embeddings": 947.4559, "encoder_q-layer.0": 667.0931, "encoder_q-layer.1": 710.2111, "encoder_q-layer.10": 1529.7848, "encoder_q-layer.11": 2807.4233, "encoder_q-layer.2": 794.5478, "encoder_q-layer.3": 886.9217, "encoder_q-layer.4": 879.6448, "encoder_q-layer.5": 869.9218, "encoder_q-layer.6": 930.1577, "encoder_q-layer.7": 1021.2388, "encoder_q-layer.8": 1185.1754, "encoder_q-layer.9": 1143.9647, "epoch": 0.08, "inbatch_neg_score": 0.5816, "inbatch_pos_score": 1.2129, "learning_rate": 3.9000000000000006e-05, "loss": 3.4161, "norm_diff": 0.4307, "norm_loss": 0.0, "num_token_doc": 66.5995, "num_token_overlap": 17.951, "num_token_query": 52.1046, "num_token_union": 73.3139, "num_word_context": 202.2458, "num_word_doc": 49.7047, "num_word_query": 39.709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1746.2945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5679, "query_norm": 2.0946, "queue_k_norm": 1.6681, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1046, "sent_len_1": 66.5995, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1488, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3995, "doc_norm": 1.6636, "encoder_q-embeddings": 3388.094, "encoder_q-layer.0": 2873.7112, "encoder_q-layer.1": 2759.176, "encoder_q-layer.10": 1494.4976, "encoder_q-layer.11": 3058.9448, "encoder_q-layer.2": 2730.5742, "encoder_q-layer.3": 2591.198, "encoder_q-layer.4": 2392.387, "encoder_q-layer.5": 2182.1809, "encoder_q-layer.6": 1743.5947, "encoder_q-layer.7": 1206.0558, "encoder_q-layer.8": 1229.2389, "encoder_q-layer.9": 1102.86, "epoch": 0.08, "inbatch_neg_score": 0.6344, "inbatch_pos_score": 1.2861, "learning_rate": 3.9500000000000005e-05, "loss": 3.3995, "norm_diff": 0.3851, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 17.9801, "num_token_query": 52.2392, "num_token_union": 73.4347, "num_word_context": 202.3354, "num_word_doc": 49.7736, "num_word_query": 39.8401, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3573.0449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.626, "query_norm": 2.0486, "queue_k_norm": 1.6633, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2392, "sent_len_1": 66.7256, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1712, "stdk": 0.0486, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.4294, "doc_norm": 1.6557, "encoder_q-embeddings": 5134.249, "encoder_q-layer.0": 3869.2097, "encoder_q-layer.1": 3840.3501, "encoder_q-layer.10": 3138.2112, "encoder_q-layer.11": 5459.0542, "encoder_q-layer.2": 3636.8542, "encoder_q-layer.3": 3490.7385, "encoder_q-layer.4": 3298.749, "encoder_q-layer.5": 2848.4211, "encoder_q-layer.6": 2679.0012, "encoder_q-layer.7": 2154.8245, "encoder_q-layer.8": 2247.7998, "encoder_q-layer.9": 2511.4771, "epoch": 0.08, "inbatch_neg_score": 0.6626, "inbatch_pos_score": 1.2871, "learning_rate": 4e-05, "loss": 3.4294, "norm_diff": 0.2658, "norm_loss": 0.0, "num_token_doc": 66.9483, "num_token_overlap": 17.9926, "num_token_query": 52.2079, "num_token_union": 73.5264, "num_word_context": 202.4062, "num_word_doc": 49.9485, "num_word_query": 39.8241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5439.5667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6523, "query_norm": 1.9214, "queue_k_norm": 1.6575, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2079, "sent_len_1": 66.9483, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.8025, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4282, "doc_norm": 1.6582, "encoder_q-embeddings": 2146.2476, "encoder_q-layer.0": 1531.3354, "encoder_q-layer.1": 1695.8228, "encoder_q-layer.10": 2431.4417, "encoder_q-layer.11": 4928.4189, "encoder_q-layer.2": 1906.8103, "encoder_q-layer.3": 1926.9153, "encoder_q-layer.4": 2014.2659, "encoder_q-layer.5": 2033.3583, "encoder_q-layer.6": 1962.9441, "encoder_q-layer.7": 1925.1771, "encoder_q-layer.8": 2166.6587, "encoder_q-layer.9": 2061.9885, "epoch": 0.08, "inbatch_neg_score": 0.6507, "inbatch_pos_score": 1.2754, "learning_rate": 4.05e-05, "loss": 3.4282, "norm_diff": 0.1174, "norm_loss": 0.0, "num_token_doc": 66.7086, "num_token_overlap": 18.003, "num_token_query": 52.2591, "num_token_union": 73.4301, "num_word_context": 202.2033, "num_word_doc": 49.7879, "num_word_query": 39.8639, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3470.4963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.645, "query_norm": 1.7756, "queue_k_norm": 1.6526, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2591, "sent_len_1": 66.7086, "sent_len_max_0": 127.995, "sent_len_max_1": 189.27, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4212, "doc_norm": 1.6494, "encoder_q-embeddings": 1795.1154, "encoder_q-layer.0": 1195.8694, "encoder_q-layer.1": 1277.1719, "encoder_q-layer.10": 3194.7241, "encoder_q-layer.11": 5725.8281, "encoder_q-layer.2": 1440.8374, "encoder_q-layer.3": 1536.0166, "encoder_q-layer.4": 1616.6448, "encoder_q-layer.5": 1629.9657, "encoder_q-layer.6": 1841.605, "encoder_q-layer.7": 1991.444, "encoder_q-layer.8": 2419.5615, "encoder_q-layer.9": 2529.574, "epoch": 0.08, "inbatch_neg_score": 0.5995, "inbatch_pos_score": 1.2324, "learning_rate": 4.1e-05, "loss": 3.4212, "norm_diff": 0.1041, "norm_loss": 0.0, "num_token_doc": 66.5266, "num_token_overlap": 17.8939, "num_token_query": 52.0334, "num_token_union": 73.287, "num_word_context": 202.2599, "num_word_doc": 49.6718, "num_word_query": 39.68, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3525.6953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5957, "query_norm": 1.7535, "queue_k_norm": 1.6513, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0334, "sent_len_1": 66.5266, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.575, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3922, "doc_norm": 1.6454, "encoder_q-embeddings": 2818.5974, "encoder_q-layer.0": 2074.917, "encoder_q-layer.1": 2258.6416, "encoder_q-layer.10": 2509.584, "encoder_q-layer.11": 5144.6909, "encoder_q-layer.2": 2743.3953, "encoder_q-layer.3": 2744.7861, "encoder_q-layer.4": 3035.3235, "encoder_q-layer.5": 3197.5771, "encoder_q-layer.6": 3110.783, "encoder_q-layer.7": 2768.2607, "encoder_q-layer.8": 2460.1123, "encoder_q-layer.9": 1908.1541, "epoch": 0.08, "inbatch_neg_score": 0.5489, "inbatch_pos_score": 1.1895, "learning_rate": 4.15e-05, "loss": 3.3922, "norm_diff": 0.1715, "norm_loss": 0.0, "num_token_doc": 66.682, "num_token_overlap": 17.9988, "num_token_query": 52.2335, "num_token_union": 73.3794, "num_word_context": 202.7706, "num_word_doc": 49.731, "num_word_query": 39.8314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4360.5757, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5435, "query_norm": 1.8169, "queue_k_norm": 1.6445, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2335, "sent_len_1": 66.682, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.87, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.3619, "doc_norm": 1.644, "encoder_q-embeddings": 1875.3512, "encoder_q-layer.0": 1277.515, "encoder_q-layer.1": 1373.8418, "encoder_q-layer.10": 3570.3096, "encoder_q-layer.11": 6975.6489, "encoder_q-layer.2": 1557.0409, "encoder_q-layer.3": 1623.0338, "encoder_q-layer.4": 1682.9342, "encoder_q-layer.5": 1716.6128, "encoder_q-layer.6": 1825.6711, "encoder_q-layer.7": 1903.2662, "encoder_q-layer.8": 2300.8523, "encoder_q-layer.9": 2088.6057, "epoch": 0.08, "inbatch_neg_score": 0.5099, "inbatch_pos_score": 1.166, "learning_rate": 4.2e-05, "loss": 3.3619, "norm_diff": 0.3214, "norm_loss": 0.0, "num_token_doc": 66.5966, "num_token_overlap": 17.9167, "num_token_query": 52.0803, "num_token_union": 73.4242, "num_word_context": 201.8665, "num_word_doc": 49.7223, "num_word_query": 39.7279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4017.5269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4995, "query_norm": 1.9654, "queue_k_norm": 1.6392, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0803, "sent_len_1": 66.5966, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.7575, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.3208, "doc_norm": 1.6394, "encoder_q-embeddings": 2085.6416, "encoder_q-layer.0": 1346.337, "encoder_q-layer.1": 1486.1909, "encoder_q-layer.10": 2767.1802, "encoder_q-layer.11": 5399.5269, "encoder_q-layer.2": 1662.501, "encoder_q-layer.3": 1812.9625, "encoder_q-layer.4": 1928.8334, "encoder_q-layer.5": 1907.3323, "encoder_q-layer.6": 2024.9514, "encoder_q-layer.7": 2134.5115, "encoder_q-layer.8": 2516.8306, "encoder_q-layer.9": 2325.3396, "epoch": 0.08, "inbatch_neg_score": 0.6009, "inbatch_pos_score": 1.2627, "learning_rate": 4.25e-05, "loss": 3.3208, "norm_diff": 0.4802, "norm_loss": 0.0, "num_token_doc": 66.9194, "num_token_overlap": 17.9704, "num_token_query": 52.2305, "num_token_union": 73.5639, "num_word_context": 202.7325, "num_word_doc": 49.9194, "num_word_query": 39.8164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3582.4173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5894, "query_norm": 2.1196, "queue_k_norm": 1.6342, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2305, "sent_len_1": 66.9194, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.64, "stdk": 0.0485, "stdq": 0.0471, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3259, "doc_norm": 1.6315, "encoder_q-embeddings": 5629.1338, "encoder_q-layer.0": 4490.9897, "encoder_q-layer.1": 4086.1941, "encoder_q-layer.10": 2859.4641, "encoder_q-layer.11": 5213.585, "encoder_q-layer.2": 4177.4341, "encoder_q-layer.3": 4065.0562, "encoder_q-layer.4": 3415.8547, "encoder_q-layer.5": 3078.0032, "encoder_q-layer.6": 2884.0327, "encoder_q-layer.7": 2397.4783, "encoder_q-layer.8": 2320.26, "encoder_q-layer.9": 2389.2432, "epoch": 0.08, "inbatch_neg_score": 0.695, "inbatch_pos_score": 1.335, "learning_rate": 4.3e-05, "loss": 3.3259, "norm_diff": 0.4906, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 17.9523, "num_token_query": 52.0614, "num_token_union": 73.4446, "num_word_context": 202.1888, "num_word_doc": 49.8537, "num_word_query": 39.6884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5736.097, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6846, "query_norm": 2.1221, "queue_k_norm": 1.6329, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0614, "sent_len_1": 66.8562, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9575, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3137, "doc_norm": 1.6322, "encoder_q-embeddings": 5165.0576, "encoder_q-layer.0": 4269.5259, "encoder_q-layer.1": 4762.0239, "encoder_q-layer.10": 3139.8372, "encoder_q-layer.11": 5915.9526, "encoder_q-layer.2": 4740.0771, "encoder_q-layer.3": 4529.8584, "encoder_q-layer.4": 4130.48, "encoder_q-layer.5": 4130.918, "encoder_q-layer.6": 4251.3267, "encoder_q-layer.7": 3640.8774, "encoder_q-layer.8": 2842.2253, "encoder_q-layer.9": 2190.3823, "epoch": 0.08, "inbatch_neg_score": 0.7451, "inbatch_pos_score": 1.3896, "learning_rate": 4.35e-05, "loss": 3.3137, "norm_diff": 0.4182, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 17.9977, "num_token_query": 52.2401, "num_token_union": 73.4817, "num_word_context": 202.2413, "num_word_doc": 49.8594, "num_word_query": 39.7998, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6360.9085, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7363, "query_norm": 2.0504, "queue_k_norm": 1.6339, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2401, "sent_len_1": 66.8173, "sent_len_max_0": 127.995, "sent_len_max_1": 189.4175, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.3528, "doc_norm": 1.639, "encoder_q-embeddings": 2220.8254, "encoder_q-layer.0": 1723.677, "encoder_q-layer.1": 1741.1929, "encoder_q-layer.10": 2281.0103, "encoder_q-layer.11": 4569.3276, "encoder_q-layer.2": 1778.2167, "encoder_q-layer.3": 1841.5376, "encoder_q-layer.4": 1827.4204, "encoder_q-layer.5": 1875.4143, "encoder_q-layer.6": 2055.3948, "encoder_q-layer.7": 2098.4067, "encoder_q-layer.8": 2314.6572, "encoder_q-layer.9": 1907.868, "epoch": 0.09, "inbatch_neg_score": 0.7136, "inbatch_pos_score": 1.3672, "learning_rate": 4.4000000000000006e-05, "loss": 3.3528, "norm_diff": 0.2328, "norm_loss": 0.0, "num_token_doc": 66.7529, "num_token_overlap": 18.0074, "num_token_query": 52.2729, "num_token_union": 73.4749, "num_word_context": 202.0697, "num_word_doc": 49.775, "num_word_query": 39.8424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3370.8015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.707, "query_norm": 1.8718, "queue_k_norm": 1.6342, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2729, "sent_len_1": 66.7529, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.6675, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.3349, "doc_norm": 1.6354, "encoder_q-embeddings": 1925.1318, "encoder_q-layer.0": 1389.6425, "encoder_q-layer.1": 1475.2886, "encoder_q-layer.10": 2296.5173, "encoder_q-layer.11": 4989.8003, "encoder_q-layer.2": 1754.0922, "encoder_q-layer.3": 1855.3469, "encoder_q-layer.4": 2008.3126, "encoder_q-layer.5": 2112.7776, "encoder_q-layer.6": 2205.906, "encoder_q-layer.7": 2206.9434, "encoder_q-layer.8": 2433.6211, "encoder_q-layer.9": 1934.7937, "epoch": 0.09, "inbatch_neg_score": 0.6163, "inbatch_pos_score": 1.2549, "learning_rate": 4.4500000000000004e-05, "loss": 3.3349, "norm_diff": 0.1022, "norm_loss": 0.0, "num_token_doc": 66.7618, "num_token_overlap": 18.0241, "num_token_query": 52.2325, "num_token_union": 73.4926, "num_word_context": 201.9486, "num_word_doc": 49.8292, "num_word_query": 39.8175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3438.1035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.7376, "queue_k_norm": 1.6345, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2325, "sent_len_1": 66.7618, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1687, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3503, "doc_norm": 1.6256, "encoder_q-embeddings": 1620.1302, "encoder_q-layer.0": 1075.6747, "encoder_q-layer.1": 1150.8718, "encoder_q-layer.10": 2461.1362, "encoder_q-layer.11": 4886.5581, "encoder_q-layer.2": 1339.1466, "encoder_q-layer.3": 1516.5441, "encoder_q-layer.4": 1731.9495, "encoder_q-layer.5": 1673.6841, "encoder_q-layer.6": 1805.8499, "encoder_q-layer.7": 1881.2013, "encoder_q-layer.8": 2079.5566, "encoder_q-layer.9": 1843.724, "epoch": 0.09, "inbatch_neg_score": 0.5339, "inbatch_pos_score": 1.1875, "learning_rate": 4.5e-05, "loss": 3.3503, "norm_diff": 0.123, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 17.9825, "num_token_query": 52.1946, "num_token_union": 73.4701, "num_word_context": 202.2299, "num_word_doc": 49.7779, "num_word_query": 39.7496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3134.3309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5298, "query_norm": 1.7486, "queue_k_norm": 1.6285, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1946, "sent_len_1": 66.7788, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3462, "stdk": 0.0483, "stdq": 0.0468, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.3092, "doc_norm": 1.6284, "encoder_q-embeddings": 4060.9331, "encoder_q-layer.0": 2833.2148, "encoder_q-layer.1": 3064.4966, "encoder_q-layer.10": 2281.814, "encoder_q-layer.11": 4546.0801, "encoder_q-layer.2": 3649.9272, "encoder_q-layer.3": 3872.5176, "encoder_q-layer.4": 4275.939, "encoder_q-layer.5": 3927.5312, "encoder_q-layer.6": 3905.6167, "encoder_q-layer.7": 2971.6133, "encoder_q-layer.8": 2564.7725, "encoder_q-layer.9": 1905.8658, "epoch": 0.09, "inbatch_neg_score": 0.4374, "inbatch_pos_score": 1.0723, "learning_rate": 4.55e-05, "loss": 3.3092, "norm_diff": 0.2088, "norm_loss": 0.0, "num_token_doc": 66.9829, "num_token_overlap": 17.965, "num_token_query": 52.235, "num_token_union": 73.6045, "num_word_context": 202.6877, "num_word_doc": 49.9661, "num_word_query": 39.8367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5162.4606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4314, "query_norm": 1.8372, "queue_k_norm": 1.6212, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.235, "sent_len_1": 66.9829, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.2738, "stdk": 0.0486, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.2873, "doc_norm": 1.618, "encoder_q-embeddings": 2399.96, "encoder_q-layer.0": 1580.7556, "encoder_q-layer.1": 1610.5769, "encoder_q-layer.10": 2510.0061, "encoder_q-layer.11": 5459.8989, "encoder_q-layer.2": 1860.1686, "encoder_q-layer.3": 1997.4952, "encoder_q-layer.4": 1960.0007, "encoder_q-layer.5": 1973.0509, "encoder_q-layer.6": 2033.9528, "encoder_q-layer.7": 2132.0916, "encoder_q-layer.8": 2585.7917, "encoder_q-layer.9": 2233.5344, "epoch": 0.09, "inbatch_neg_score": 0.4553, "inbatch_pos_score": 1.084, "learning_rate": 4.600000000000001e-05, "loss": 3.2873, "norm_diff": 0.2861, "norm_loss": 0.0, "num_token_doc": 66.6361, "num_token_overlap": 17.9748, "num_token_query": 52.1409, "num_token_union": 73.3614, "num_word_context": 202.4628, "num_word_doc": 49.7343, "num_word_query": 39.762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3703.3093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4468, "query_norm": 1.9041, "queue_k_norm": 1.6137, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1409, "sent_len_1": 66.6361, "sent_len_max_0": 128.0, "sent_len_max_1": 188.835, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2654, "doc_norm": 1.6084, "encoder_q-embeddings": 1672.7699, "encoder_q-layer.0": 1117.3693, "encoder_q-layer.1": 1222.887, "encoder_q-layer.10": 2568.6968, "encoder_q-layer.11": 4959.7354, "encoder_q-layer.2": 1353.1747, "encoder_q-layer.3": 1434.4545, "encoder_q-layer.4": 1534.5404, "encoder_q-layer.5": 1579.6201, "encoder_q-layer.6": 1707.991, "encoder_q-layer.7": 1793.7175, "encoder_q-layer.8": 1988.7699, "encoder_q-layer.9": 1888.9771, "epoch": 0.09, "inbatch_neg_score": 0.5189, "inbatch_pos_score": 1.1875, "learning_rate": 4.6500000000000005e-05, "loss": 3.2654, "norm_diff": 0.3621, "norm_loss": 0.0, "num_token_doc": 66.6952, "num_token_overlap": 17.9897, "num_token_query": 52.1351, "num_token_union": 73.3653, "num_word_context": 202.142, "num_word_doc": 49.7821, "num_word_query": 39.7226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3095.065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5122, "query_norm": 1.9706, "queue_k_norm": 1.6064, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1351, "sent_len_1": 66.6952, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1637, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2657, "doc_norm": 1.6011, "encoder_q-embeddings": 1790.611, "encoder_q-layer.0": 1260.5289, "encoder_q-layer.1": 1318.8552, "encoder_q-layer.10": 2159.0632, "encoder_q-layer.11": 4361.123, "encoder_q-layer.2": 1530.959, "encoder_q-layer.3": 1608.9623, "encoder_q-layer.4": 1800.2278, "encoder_q-layer.5": 1833.8333, "encoder_q-layer.6": 2134.3162, "encoder_q-layer.7": 2051.8835, "encoder_q-layer.8": 2040.8569, "encoder_q-layer.9": 1705.016, "epoch": 0.09, "inbatch_neg_score": 0.5694, "inbatch_pos_score": 1.1973, "learning_rate": 4.7e-05, "loss": 3.2657, "norm_diff": 0.2549, "norm_loss": 0.0, "num_token_doc": 66.9372, "num_token_overlap": 18.0404, "num_token_query": 52.3201, "num_token_union": 73.5979, "num_word_context": 202.6764, "num_word_doc": 49.9628, "num_word_query": 39.8946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3102.4615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5605, "query_norm": 1.856, "queue_k_norm": 1.6022, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3201, "sent_len_1": 66.9372, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.155, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.2892, "doc_norm": 1.6008, "encoder_q-embeddings": 7541.7441, "encoder_q-layer.0": 6018.9658, "encoder_q-layer.1": 6087.8906, "encoder_q-layer.10": 2276.5007, "encoder_q-layer.11": 4407.9854, "encoder_q-layer.2": 6263.1636, "encoder_q-layer.3": 5964.3037, "encoder_q-layer.4": 5655.5088, "encoder_q-layer.5": 5687.5532, "encoder_q-layer.6": 5693.2871, "encoder_q-layer.7": 4616.3989, "encoder_q-layer.8": 3269.2754, "encoder_q-layer.9": 1897.9932, "epoch": 0.09, "inbatch_neg_score": 0.5966, "inbatch_pos_score": 1.2393, "learning_rate": 4.75e-05, "loss": 3.2892, "norm_diff": 0.1973, "norm_loss": 0.0, "num_token_doc": 66.7117, "num_token_overlap": 17.9911, "num_token_query": 52.106, "num_token_union": 73.3523, "num_word_context": 202.1047, "num_word_doc": 49.7424, "num_word_query": 39.6956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8029.4948, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5903, "query_norm": 1.7981, "queue_k_norm": 1.6006, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.106, "sent_len_1": 66.7117, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.4638, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.299, "doc_norm": 1.5962, "encoder_q-embeddings": 1953.452, "encoder_q-layer.0": 1272.5774, "encoder_q-layer.1": 1397.959, "encoder_q-layer.10": 2636.0889, "encoder_q-layer.11": 5610.8647, "encoder_q-layer.2": 1540.8638, "encoder_q-layer.3": 1736.7784, "encoder_q-layer.4": 1847.0725, "encoder_q-layer.5": 1875.5529, "encoder_q-layer.6": 2173.5381, "encoder_q-layer.7": 2158.9717, "encoder_q-layer.8": 2324.6704, "encoder_q-layer.9": 2215.1899, "epoch": 0.09, "inbatch_neg_score": 0.5878, "inbatch_pos_score": 1.2422, "learning_rate": 4.8e-05, "loss": 3.299, "norm_diff": 0.1372, "norm_loss": 0.0, "num_token_doc": 66.6234, "num_token_overlap": 17.9454, "num_token_query": 52.2266, "num_token_union": 73.3866, "num_word_context": 202.4151, "num_word_doc": 49.7012, "num_word_query": 39.8128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3542.76, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5845, "query_norm": 1.7335, "queue_k_norm": 1.5973, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2266, "sent_len_1": 66.6234, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8938, "stdk": 0.0481, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.2692, "doc_norm": 1.5908, "encoder_q-embeddings": 1780.6537, "encoder_q-layer.0": 1344.9443, "encoder_q-layer.1": 1448.6013, "encoder_q-layer.10": 2055.238, "encoder_q-layer.11": 4425.354, "encoder_q-layer.2": 1621.8519, "encoder_q-layer.3": 1654.3596, "encoder_q-layer.4": 1675.6072, "encoder_q-layer.5": 1737.1226, "encoder_q-layer.6": 1714.4183, "encoder_q-layer.7": 1642.5216, "encoder_q-layer.8": 1832.0576, "encoder_q-layer.9": 1721.7207, "epoch": 0.09, "inbatch_neg_score": 0.5878, "inbatch_pos_score": 1.2295, "learning_rate": 4.85e-05, "loss": 3.2692, "norm_diff": 0.1375, "norm_loss": 0.0, "num_token_doc": 66.7711, "num_token_overlap": 18.0167, "num_token_query": 52.1731, "num_token_union": 73.3522, "num_word_context": 202.4027, "num_word_doc": 49.8018, "num_word_query": 39.7837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2907.5487, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.584, "query_norm": 1.7282, "queue_k_norm": 1.5944, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1731, "sent_len_1": 66.7711, "sent_len_max_0": 127.9825, "sent_len_max_1": 189.95, "stdk": 0.0481, "stdq": 0.0437, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2311, "doc_norm": 1.5974, "encoder_q-embeddings": 1695.6979, "encoder_q-layer.0": 1165.6443, "encoder_q-layer.1": 1264.0898, "encoder_q-layer.10": 2256.1973, "encoder_q-layer.11": 5075.062, "encoder_q-layer.2": 1409.9243, "encoder_q-layer.3": 1488.0914, "encoder_q-layer.4": 1582.2333, "encoder_q-layer.5": 1632.433, "encoder_q-layer.6": 1760.2632, "encoder_q-layer.7": 1910.248, "encoder_q-layer.8": 2166.3594, "encoder_q-layer.9": 1898.62, "epoch": 0.1, "inbatch_neg_score": 0.6038, "inbatch_pos_score": 1.2588, "learning_rate": 4.9e-05, "loss": 3.2311, "norm_diff": 0.2683, "norm_loss": 0.0, "num_token_doc": 66.8707, "num_token_overlap": 18.0616, "num_token_query": 52.2948, "num_token_union": 73.5063, "num_word_context": 202.2029, "num_word_doc": 49.9176, "num_word_query": 39.8778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3142.2927, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5991, "query_norm": 1.8657, "queue_k_norm": 1.5925, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2948, "sent_len_1": 66.8707, "sent_len_max_0": 128.0, "sent_len_max_1": 188.755, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2146, "doc_norm": 1.5881, "encoder_q-embeddings": 2379.7122, "encoder_q-layer.0": 1730.2783, "encoder_q-layer.1": 1802.2716, "encoder_q-layer.10": 2416.4958, "encoder_q-layer.11": 4838.8755, "encoder_q-layer.2": 2016.0486, "encoder_q-layer.3": 2116.4805, "encoder_q-layer.4": 2199.4924, "encoder_q-layer.5": 2158.8433, "encoder_q-layer.6": 2252.502, "encoder_q-layer.7": 2064.1423, "encoder_q-layer.8": 2148.0312, "encoder_q-layer.9": 1957.1523, "epoch": 0.1, "inbatch_neg_score": 0.6257, "inbatch_pos_score": 1.2871, "learning_rate": 4.9500000000000004e-05, "loss": 3.2146, "norm_diff": 0.2753, "norm_loss": 0.0, "num_token_doc": 67.0133, "num_token_overlap": 18.1686, "num_token_query": 52.4515, "num_token_union": 73.5648, "num_word_context": 202.5724, "num_word_doc": 49.9833, "num_word_query": 39.9921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3577.4589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6211, "query_norm": 1.8634, "queue_k_norm": 1.5897, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4515, "sent_len_1": 67.0133, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.0825, "stdk": 0.0482, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2121, "doc_norm": 1.5911, "encoder_q-embeddings": 1516.9601, "encoder_q-layer.0": 988.8663, "encoder_q-layer.1": 1039.886, "encoder_q-layer.10": 1995.2316, "encoder_q-layer.11": 4081.0881, "encoder_q-layer.2": 1258.5179, "encoder_q-layer.3": 1357.1111, "encoder_q-layer.4": 1461.3468, "encoder_q-layer.5": 1504.5132, "encoder_q-layer.6": 1644.8092, "encoder_q-layer.7": 1743.8508, "encoder_q-layer.8": 1993.1543, "encoder_q-layer.9": 1625.0698, "epoch": 0.1, "inbatch_neg_score": 0.6543, "inbatch_pos_score": 1.3057, "learning_rate": 5e-05, "loss": 3.2121, "norm_diff": 0.2304, "norm_loss": 0.0, "num_token_doc": 66.693, "num_token_overlap": 17.9859, "num_token_query": 52.2261, "num_token_union": 73.4076, "num_word_context": 202.0535, "num_word_doc": 49.74, "num_word_query": 39.7777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2776.3863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6484, "query_norm": 1.8215, "queue_k_norm": 1.5896, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2261, "sent_len_1": 66.693, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8575, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 27.8136, "dev_samples_per_second": 2.301, "dev_steps_per_second": 0.036, "epoch": 0.1, "step": 10000, "test_accuracy": 91.90673828125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4769611358642578, "test_doc_norm": 1.513817548751831, "test_inbatch_neg_score": 0.9453749656677246, "test_inbatch_pos_score": 1.6779147386550903, "test_loss": 0.4769611358642578, "test_loss_align": 1.9609415531158447, "test_loss_unif": 3.2328193187713623, "test_loss_unif_q@queue": 3.232819080352783, "test_norm_diff": 0.22106751799583435, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6250914335250854, "test_query_norm": 1.7348849773406982, "test_queue_k_norm": 1.5894405841827393, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0395868755877018, "test_stdq": 0.036831632256507874, "test_stdqueue_k": 0.04823771119117737, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.8136, "dev_samples_per_second": 2.301, "dev_steps_per_second": 0.036, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.28501, "eval_beir-arguana_recall@10": 0.50284, "eval_beir-arguana_recall@100": 0.82717, "eval_beir-arguana_recall@20": 0.63371, "eval_beir-avg_ndcg@10": 0.25563575000000005, "eval_beir-avg_recall@10": 0.31432525000000006, "eval_beir-avg_recall@100": 0.5020945, "eval_beir-avg_recall@20": 0.37090608333333325, "eval_beir-cqadupstack_ndcg@10": 0.1639775, "eval_beir-cqadupstack_recall@10": 0.2300325, "eval_beir-cqadupstack_recall@100": 0.43271499999999996, "eval_beir-cqadupstack_recall@20": 0.28418083333333327, "eval_beir-fiqa_ndcg@10": 0.13374, "eval_beir-fiqa_recall@10": 0.18245, "eval_beir-fiqa_recall@100": 0.3873, "eval_beir-fiqa_recall@20": 0.2276, "eval_beir-nfcorpus_ndcg@10": 0.2082, "eval_beir-nfcorpus_recall@10": 0.09238, "eval_beir-nfcorpus_recall@100": 0.21357, "eval_beir-nfcorpus_recall@20": 0.11902, "eval_beir-nq_ndcg@10": 0.15099, "eval_beir-nq_recall@10": 0.25804, "eval_beir-nq_recall@100": 0.57083, "eval_beir-nq_recall@20": 0.35132, "eval_beir-quora_ndcg@10": 0.45892, "eval_beir-quora_recall@10": 0.56261, "eval_beir-quora_recall@100": 0.73771, "eval_beir-quora_recall@20": 0.61862, "eval_beir-scidocs_ndcg@10": 0.10826, "eval_beir-scidocs_recall@10": 0.11573, "eval_beir-scidocs_recall@100": 0.29075, "eval_beir-scidocs_recall@20": 0.16143, "eval_beir-scifact_ndcg@10": 0.54303, "eval_beir-scifact_recall@10": 0.69006, "eval_beir-scifact_recall@100": 0.87356, "eval_beir-scifact_recall@20": 0.76344, "eval_beir-trec-covid_ndcg@10": 0.39274, "eval_beir-trec-covid_recall@10": 0.432, "eval_beir-trec-covid_recall@100": 0.3286, "eval_beir-trec-covid_recall@20": 0.407, "eval_beir-webis-touche2020_ndcg@10": 0.11149, "eval_beir-webis-touche2020_recall@10": 0.07711, "eval_beir-webis-touche2020_recall@100": 0.35874, "eval_beir-webis-touche2020_recall@20": 0.14274, "eval_senteval-avg_sts": 0.6835814462918133, "eval_senteval-sickr_spearman": 0.6363700668412343, "eval_senteval-stsb_spearman": 0.7307928257423921, "step": 10000, "test_accuracy": 91.90673828125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4769611358642578, "test_doc_norm": 1.513817548751831, "test_inbatch_neg_score": 0.9453749656677246, "test_inbatch_pos_score": 1.6779147386550903, "test_loss": 0.4769611358642578, "test_loss_align": 1.9609415531158447, "test_loss_unif": 3.2328193187713623, "test_loss_unif_q@queue": 3.232819080352783, "test_norm_diff": 0.22106751799583435, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6250914335250854, "test_query_norm": 1.7348849773406982, "test_queue_k_norm": 1.5894405841827393, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.0395868755877018, "test_stdq": 0.036831632256507874, "test_stdqueue_k": 0.04823771119117737, "test_stdqueue_q": 0.0 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.208, "doc_norm": 1.5927, "encoder_q-embeddings": 2636.3633, "encoder_q-layer.0": 1931.9362, "encoder_q-layer.1": 2011.5983, "encoder_q-layer.10": 2112.8267, "encoder_q-layer.11": 4457.2974, "encoder_q-layer.2": 2068.3145, "encoder_q-layer.3": 2054.179, "encoder_q-layer.4": 1987.1168, "encoder_q-layer.5": 1888.8936, "encoder_q-layer.6": 1877.084, "encoder_q-layer.7": 1751.728, "encoder_q-layer.8": 1895.4126, "encoder_q-layer.9": 1717.801, "epoch": 0.1, "inbatch_neg_score": 0.6445, "inbatch_pos_score": 1.3066, "learning_rate": 4.994444444444445e-05, "loss": 3.208, "norm_diff": 0.202, "norm_loss": 0.0, "num_token_doc": 66.7261, "num_token_overlap": 18.0153, "num_token_query": 52.1563, "num_token_union": 73.4117, "num_word_context": 202.0564, "num_word_doc": 49.796, "num_word_query": 39.7412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3438.0544, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6426, "query_norm": 1.7946, "queue_k_norm": 1.5911, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1563, "sent_len_1": 66.7261, "sent_len_max_0": 128.0, "sent_len_max_1": 188.83, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2199, "doc_norm": 1.5901, "encoder_q-embeddings": 2184.3809, "encoder_q-layer.0": 1629.7279, "encoder_q-layer.1": 1705.9047, "encoder_q-layer.10": 1895.7361, "encoder_q-layer.11": 4192.8716, "encoder_q-layer.2": 1964.4651, "encoder_q-layer.3": 2055.2529, "encoder_q-layer.4": 2099.1733, "encoder_q-layer.5": 1970.0327, "encoder_q-layer.6": 2241.7219, "encoder_q-layer.7": 2003.0798, "encoder_q-layer.8": 2074.5989, "encoder_q-layer.9": 1571.3584, "epoch": 0.1, "inbatch_neg_score": 0.6123, "inbatch_pos_score": 1.2559, "learning_rate": 4.9888888888888894e-05, "loss": 3.2199, "norm_diff": 0.1371, "norm_loss": 0.0, "num_token_doc": 66.9438, "num_token_overlap": 18.0166, "num_token_query": 52.3179, "num_token_union": 73.5883, "num_word_context": 202.6132, "num_word_doc": 49.9542, "num_word_query": 39.8844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3294.8246, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6089, "query_norm": 1.7272, "queue_k_norm": 1.5889, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3179, "sent_len_1": 66.9438, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9288, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2395, "doc_norm": 1.5902, "encoder_q-embeddings": 2282.9797, "encoder_q-layer.0": 1586.4863, "encoder_q-layer.1": 1755.1431, "encoder_q-layer.10": 1980.39, "encoder_q-layer.11": 4069.5593, "encoder_q-layer.2": 1980.1493, "encoder_q-layer.3": 2158.2087, "encoder_q-layer.4": 2347.9812, "encoder_q-layer.5": 2330.7732, "encoder_q-layer.6": 2567.5264, "encoder_q-layer.7": 2539.7668, "encoder_q-layer.8": 2312.4458, "encoder_q-layer.9": 1725.4861, "epoch": 0.1, "inbatch_neg_score": 0.5674, "inbatch_pos_score": 1.2324, "learning_rate": 4.9833333333333336e-05, "loss": 3.2395, "norm_diff": 0.0869, "norm_loss": 0.0, "num_token_doc": 66.8572, "num_token_overlap": 18.0078, "num_token_query": 52.2309, "num_token_union": 73.5187, "num_word_context": 202.6262, "num_word_doc": 49.8952, "num_word_query": 39.8162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3428.1172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5645, "query_norm": 1.6771, "queue_k_norm": 1.5859, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2309, "sent_len_1": 66.8572, "sent_len_max_0": 127.9788, "sent_len_max_1": 190.0637, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.2155, "doc_norm": 1.5815, "encoder_q-embeddings": 4505.4502, "encoder_q-layer.0": 3233.9746, "encoder_q-layer.1": 3773.9915, "encoder_q-layer.10": 1894.0906, "encoder_q-layer.11": 4437.9136, "encoder_q-layer.2": 4183.2271, "encoder_q-layer.3": 4597.6123, "encoder_q-layer.4": 4549.6543, "encoder_q-layer.5": 4292.6206, "encoder_q-layer.6": 4006.0173, "encoder_q-layer.7": 3933.3486, "encoder_q-layer.8": 3531.1265, "encoder_q-layer.9": 2017.2692, "epoch": 0.1, "inbatch_neg_score": 0.5478, "inbatch_pos_score": 1.1855, "learning_rate": 4.977777777777778e-05, "loss": 3.2155, "norm_diff": 0.1008, "norm_loss": 0.0, "num_token_doc": 66.6038, "num_token_overlap": 17.943, "num_token_query": 52.1938, "num_token_union": 73.3668, "num_word_context": 202.2373, "num_word_doc": 49.7174, "num_word_query": 39.8027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5722.2169, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5415, "query_norm": 1.6824, "queue_k_norm": 1.586, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1938, "sent_len_1": 66.6038, "sent_len_max_0": 128.0, "sent_len_max_1": 190.795, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.2124, "doc_norm": 1.5874, "encoder_q-embeddings": 1788.9238, "encoder_q-layer.0": 1169.9976, "encoder_q-layer.1": 1307.1184, "encoder_q-layer.10": 2109.6895, "encoder_q-layer.11": 4392.7456, "encoder_q-layer.2": 1512.2958, "encoder_q-layer.3": 1703.3398, "encoder_q-layer.4": 1771.2263, "encoder_q-layer.5": 1869.2905, "encoder_q-layer.6": 2005.2701, "encoder_q-layer.7": 1995.1696, "encoder_q-layer.8": 2179.9714, "encoder_q-layer.9": 1777.1401, "epoch": 0.1, "inbatch_neg_score": 0.5742, "inbatch_pos_score": 1.2373, "learning_rate": 4.972222222222223e-05, "loss": 3.2124, "norm_diff": 0.1464, "norm_loss": 0.0, "num_token_doc": 66.6559, "num_token_overlap": 17.9769, "num_token_query": 52.2406, "num_token_union": 73.4395, "num_word_context": 202.2355, "num_word_doc": 49.753, "num_word_query": 39.8126, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3070.3609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.7339, "queue_k_norm": 1.5831, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2406, "sent_len_1": 66.6559, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5387, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.236, "doc_norm": 1.5882, "encoder_q-embeddings": 2179.1763, "encoder_q-layer.0": 1565.4924, "encoder_q-layer.1": 1646.2372, "encoder_q-layer.10": 1988.8125, "encoder_q-layer.11": 4325.8896, "encoder_q-layer.2": 1834.1306, "encoder_q-layer.3": 1903.9927, "encoder_q-layer.4": 2016.1934, "encoder_q-layer.5": 1992.0958, "encoder_q-layer.6": 2175.8723, "encoder_q-layer.7": 1937.5533, "encoder_q-layer.8": 2289.0972, "encoder_q-layer.9": 1928.6971, "epoch": 0.1, "inbatch_neg_score": 0.5704, "inbatch_pos_score": 1.2617, "learning_rate": 4.966666666666667e-05, "loss": 3.236, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.5594, "num_token_overlap": 17.9303, "num_token_query": 52.0752, "num_token_union": 73.2884, "num_word_context": 202.1889, "num_word_doc": 49.6721, "num_word_query": 39.7089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3299.6703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5679, "query_norm": 1.7063, "queue_k_norm": 1.5811, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0752, "sent_len_1": 66.5594, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3225, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2052, "doc_norm": 1.5817, "encoder_q-embeddings": 2170.1213, "encoder_q-layer.0": 1485.6752, "encoder_q-layer.1": 1571.9111, "encoder_q-layer.10": 2047.5323, "encoder_q-layer.11": 4137.1484, "encoder_q-layer.2": 1854.6017, "encoder_q-layer.3": 2084.3413, "encoder_q-layer.4": 2247.5276, "encoder_q-layer.5": 2188.094, "encoder_q-layer.6": 2177.4194, "encoder_q-layer.7": 2207.8477, "encoder_q-layer.8": 2298.915, "encoder_q-layer.9": 1836.3217, "epoch": 0.1, "inbatch_neg_score": 0.5366, "inbatch_pos_score": 1.1934, "learning_rate": 4.961111111111111e-05, "loss": 3.2052, "norm_diff": 0.0803, "norm_loss": 0.0, "num_token_doc": 66.7157, "num_token_overlap": 18.0399, "num_token_query": 52.3063, "num_token_union": 73.4405, "num_word_context": 202.2742, "num_word_doc": 49.7749, "num_word_query": 39.8834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3321.055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5327, "query_norm": 1.662, "queue_k_norm": 1.5787, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3063, "sent_len_1": 66.7157, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.175, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.211, "doc_norm": 1.5663, "encoder_q-embeddings": 4874.6885, "encoder_q-layer.0": 3432.5039, "encoder_q-layer.1": 3876.585, "encoder_q-layer.10": 2142.1128, "encoder_q-layer.11": 4570.5811, "encoder_q-layer.2": 4451.5142, "encoder_q-layer.3": 4658.4189, "encoder_q-layer.4": 4774.4155, "encoder_q-layer.5": 4523.4766, "encoder_q-layer.6": 4318.1167, "encoder_q-layer.7": 3681.9084, "encoder_q-layer.8": 3520.8711, "encoder_q-layer.9": 2315.6125, "epoch": 0.11, "inbatch_neg_score": 0.5461, "inbatch_pos_score": 1.2148, "learning_rate": 4.955555555555556e-05, "loss": 3.211, "norm_diff": 0.0633, "norm_loss": 0.0, "num_token_doc": 66.7303, "num_token_overlap": 17.9561, "num_token_query": 52.2122, "num_token_union": 73.4009, "num_word_context": 202.207, "num_word_doc": 49.7626, "num_word_query": 39.8319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5939.0183, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5444, "query_norm": 1.6295, "queue_k_norm": 1.5756, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2122, "sent_len_1": 66.7303, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.585, "stdk": 0.0477, "stdq": 0.0456, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2077, "doc_norm": 1.5749, "encoder_q-embeddings": 11339.6406, "encoder_q-layer.0": 8348.7773, "encoder_q-layer.1": 9257.2197, "encoder_q-layer.10": 1864.6063, "encoder_q-layer.11": 3424.8335, "encoder_q-layer.2": 10369.9336, "encoder_q-layer.3": 9850.5859, "encoder_q-layer.4": 9147.3037, "encoder_q-layer.5": 9104.6328, "encoder_q-layer.6": 7554.2622, "encoder_q-layer.7": 5459.5864, "encoder_q-layer.8": 3741.2117, "encoder_q-layer.9": 2024.2808, "epoch": 0.11, "inbatch_neg_score": 0.5421, "inbatch_pos_score": 1.2129, "learning_rate": 4.9500000000000004e-05, "loss": 3.2077, "norm_diff": 0.0199, "norm_loss": 0.0, "num_token_doc": 66.7512, "num_token_overlap": 17.9962, "num_token_query": 52.1932, "num_token_union": 73.4804, "num_word_context": 202.5768, "num_word_doc": 49.8459, "num_word_query": 39.7957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11762.9892, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5415, "query_norm": 1.5948, "queue_k_norm": 1.5749, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1932, "sent_len_1": 66.7512, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1425, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2172, "doc_norm": 1.5723, "encoder_q-embeddings": 4012.3494, "encoder_q-layer.0": 3315.7268, "encoder_q-layer.1": 3814.5237, "encoder_q-layer.10": 1908.8961, "encoder_q-layer.11": 4190.1597, "encoder_q-layer.2": 4506.4189, "encoder_q-layer.3": 4653.814, "encoder_q-layer.4": 5029.4146, "encoder_q-layer.5": 4564.2095, "encoder_q-layer.6": 5081.0762, "encoder_q-layer.7": 4996.019, "encoder_q-layer.8": 5989.1895, "encoder_q-layer.9": 3669.6804, "epoch": 0.11, "inbatch_neg_score": 0.5144, "inbatch_pos_score": 1.1738, "learning_rate": 4.9444444444444446e-05, "loss": 3.2172, "norm_diff": 0.0728, "norm_loss": 0.0, "num_token_doc": 66.898, "num_token_overlap": 18.0347, "num_token_query": 52.3566, "num_token_union": 73.572, "num_word_context": 202.4846, "num_word_doc": 49.8319, "num_word_query": 39.891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6518.2739, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5107, "query_norm": 1.6451, "queue_k_norm": 1.5714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3566, "sent_len_1": 66.898, "sent_len_max_0": 128.0, "sent_len_max_1": 192.4125, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2194, "doc_norm": 1.5696, "encoder_q-embeddings": 3029.0032, "encoder_q-layer.0": 2237.0984, "encoder_q-layer.1": 2519.6255, "encoder_q-layer.10": 1803.7441, "encoder_q-layer.11": 4122.1021, "encoder_q-layer.2": 2756.0278, "encoder_q-layer.3": 2958.5073, "encoder_q-layer.4": 3111.6609, "encoder_q-layer.5": 3279.3684, "encoder_q-layer.6": 3197.2271, "encoder_q-layer.7": 3010.394, "encoder_q-layer.8": 2503.9336, "encoder_q-layer.9": 1677.2012, "epoch": 0.11, "inbatch_neg_score": 0.5233, "inbatch_pos_score": 1.1826, "learning_rate": 4.938888888888889e-05, "loss": 3.2194, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 67.0062, "num_token_overlap": 18.0678, "num_token_query": 52.3361, "num_token_union": 73.6055, "num_word_context": 202.8841, "num_word_doc": 50.0067, "num_word_query": 39.8945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4122.9887, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5225, "query_norm": 1.5823, "queue_k_norm": 1.5699, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3361, "sent_len_1": 67.0062, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1213, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.1938, "doc_norm": 1.5723, "encoder_q-embeddings": 2468.2236, "encoder_q-layer.0": 1664.288, "encoder_q-layer.1": 1973.3911, "encoder_q-layer.10": 2038.2744, "encoder_q-layer.11": 4297.5713, "encoder_q-layer.2": 2326.8418, "encoder_q-layer.3": 2455.2146, "encoder_q-layer.4": 2592.7476, "encoder_q-layer.5": 2571.8059, "encoder_q-layer.6": 2635.3123, "encoder_q-layer.7": 2591.1562, "encoder_q-layer.8": 2683.739, "encoder_q-layer.9": 1934.1372, "epoch": 0.11, "inbatch_neg_score": 0.4994, "inbatch_pos_score": 1.124, "learning_rate": 4.933333333333334e-05, "loss": 3.1938, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.6908, "num_token_overlap": 17.9868, "num_token_query": 52.2334, "num_token_union": 73.4105, "num_word_context": 202.2068, "num_word_doc": 49.7437, "num_word_query": 39.7906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3681.0412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4976, "query_norm": 1.5348, "queue_k_norm": 1.5683, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2334, "sent_len_1": 66.6908, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2325, "stdk": 0.0483, "stdq": 0.0433, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.217, "doc_norm": 1.5641, "encoder_q-embeddings": 2776.675, "encoder_q-layer.0": 1797.9292, "encoder_q-layer.1": 1963.4213, "encoder_q-layer.10": 1920.1893, "encoder_q-layer.11": 4011.948, "encoder_q-layer.2": 2252.3643, "encoder_q-layer.3": 2354.636, "encoder_q-layer.4": 2435.207, "encoder_q-layer.5": 2436.3911, "encoder_q-layer.6": 2426.699, "encoder_q-layer.7": 2321.2769, "encoder_q-layer.8": 2198.5991, "encoder_q-layer.9": 1843.8403, "epoch": 0.11, "inbatch_neg_score": 0.5136, "inbatch_pos_score": 1.1846, "learning_rate": 4.927777777777778e-05, "loss": 3.217, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.5927, "num_token_overlap": 17.916, "num_token_query": 52.0783, "num_token_union": 73.3482, "num_word_context": 202.0574, "num_word_doc": 49.6903, "num_word_query": 39.7159, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3570.5617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5103, "query_norm": 1.5846, "queue_k_norm": 1.5644, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0783, "sent_len_1": 66.5927, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.455, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1836, "doc_norm": 1.5717, "encoder_q-embeddings": 1073.8064, "encoder_q-layer.0": 775.2767, "encoder_q-layer.1": 871.2789, "encoder_q-layer.10": 834.5189, "encoder_q-layer.11": 1654.0721, "encoder_q-layer.2": 972.1516, "encoder_q-layer.3": 1021.9462, "encoder_q-layer.4": 1036.0293, "encoder_q-layer.5": 1046.4756, "encoder_q-layer.6": 1011.8817, "encoder_q-layer.7": 883.2935, "encoder_q-layer.8": 869.7574, "encoder_q-layer.9": 732.4301, "epoch": 0.11, "inbatch_neg_score": 0.5206, "inbatch_pos_score": 1.1914, "learning_rate": 4.922222222222222e-05, "loss": 3.1836, "norm_diff": 0.0092, "norm_loss": 0.0, "num_token_doc": 66.5028, "num_token_overlap": 17.9698, "num_token_query": 52.1079, "num_token_union": 73.2628, "num_word_context": 201.7922, "num_word_doc": 49.6567, "num_word_query": 39.7212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1474.7572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5186, "query_norm": 1.5761, "queue_k_norm": 1.5632, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1079, "sent_len_1": 66.5028, "sent_len_max_0": 128.0, "sent_len_max_1": 186.7125, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2066, "doc_norm": 1.5618, "encoder_q-embeddings": 5597.1528, "encoder_q-layer.0": 3980.1609, "encoder_q-layer.1": 4086.3601, "encoder_q-layer.10": 907.6394, "encoder_q-layer.11": 1914.2498, "encoder_q-layer.2": 5033.4048, "encoder_q-layer.3": 4714.8711, "encoder_q-layer.4": 4675.1035, "encoder_q-layer.5": 4006.0525, "encoder_q-layer.6": 3683.3765, "encoder_q-layer.7": 2154.5359, "encoder_q-layer.8": 1429.1718, "encoder_q-layer.9": 878.5964, "epoch": 0.11, "inbatch_neg_score": 0.5139, "inbatch_pos_score": 1.1748, "learning_rate": 4.9166666666666665e-05, "loss": 3.2066, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.6031, "num_token_overlap": 18.0022, "num_token_query": 52.3334, "num_token_union": 73.4194, "num_word_context": 202.3221, "num_word_doc": 49.7408, "num_word_query": 39.8748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5658.4319, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5127, "query_norm": 1.5808, "queue_k_norm": 1.5628, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3334, "sent_len_1": 66.6031, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5425, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2013, "doc_norm": 1.562, "encoder_q-embeddings": 2670.4053, "encoder_q-layer.0": 1976.4392, "encoder_q-layer.1": 1988.5818, "encoder_q-layer.10": 892.9203, "encoder_q-layer.11": 2018.0593, "encoder_q-layer.2": 2467.605, "encoder_q-layer.3": 2513.7041, "encoder_q-layer.4": 2820.918, "encoder_q-layer.5": 2878.7297, "encoder_q-layer.6": 2933.0669, "encoder_q-layer.7": 2390.4072, "encoder_q-layer.8": 2355.512, "encoder_q-layer.9": 1439.7876, "epoch": 0.11, "inbatch_neg_score": 0.4936, "inbatch_pos_score": 1.1406, "learning_rate": 4.9111111111111114e-05, "loss": 3.2013, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.7823, "num_token_overlap": 18.0219, "num_token_query": 52.3562, "num_token_union": 73.543, "num_word_context": 202.6839, "num_word_doc": 49.8516, "num_word_query": 39.918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3433.9992, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4934, "query_norm": 1.553, "queue_k_norm": 1.5601, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3562, "sent_len_1": 66.7823, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7388, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.204, "doc_norm": 1.5598, "encoder_q-embeddings": 2235.1455, "encoder_q-layer.0": 1648.2275, "encoder_q-layer.1": 1726.0038, "encoder_q-layer.10": 1127.1949, "encoder_q-layer.11": 2536.3247, "encoder_q-layer.2": 2044.7268, "encoder_q-layer.3": 2071.5535, "encoder_q-layer.4": 2147.6877, "encoder_q-layer.5": 2094.3186, "encoder_q-layer.6": 2327.0698, "encoder_q-layer.7": 2019.0518, "encoder_q-layer.8": 1864.7515, "encoder_q-layer.9": 1297.4755, "epoch": 0.11, "inbatch_neg_score": 0.5118, "inbatch_pos_score": 1.1777, "learning_rate": 4.905555555555556e-05, "loss": 3.204, "norm_diff": 0.0527, "norm_loss": 0.0, "num_token_doc": 66.8813, "num_token_overlap": 17.9762, "num_token_query": 52.2378, "num_token_union": 73.5739, "num_word_context": 202.6239, "num_word_doc": 49.9155, "num_word_query": 39.8087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2866.1022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5083, "query_norm": 1.6125, "queue_k_norm": 1.559, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2378, "sent_len_1": 66.8813, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.505, "stdk": 0.048, "stdq": 0.0467, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2255, "doc_norm": 1.5556, "encoder_q-embeddings": 8978.8047, "encoder_q-layer.0": 7193.7744, "encoder_q-layer.1": 7849.8403, "encoder_q-layer.10": 901.1407, "encoder_q-layer.11": 1905.7848, "encoder_q-layer.2": 7718.8799, "encoder_q-layer.3": 7143.6899, "encoder_q-layer.4": 6696.2637, "encoder_q-layer.5": 5324.2251, "encoder_q-layer.6": 5039.0005, "encoder_q-layer.7": 4662.625, "encoder_q-layer.8": 2665.0813, "encoder_q-layer.9": 1079.199, "epoch": 0.12, "inbatch_neg_score": 0.5457, "inbatch_pos_score": 1.2109, "learning_rate": 4.9e-05, "loss": 3.2255, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.7865, "num_token_overlap": 17.929, "num_token_query": 52.1603, "num_token_union": 73.4693, "num_word_context": 202.1649, "num_word_doc": 49.822, "num_word_query": 39.7726, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9243.4668, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5449, "query_norm": 1.6101, "queue_k_norm": 1.5579, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1603, "sent_len_1": 66.7865, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.3288, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.2066, "doc_norm": 1.559, "encoder_q-embeddings": 2809.8921, "encoder_q-layer.0": 2077.2087, "encoder_q-layer.1": 2316.4607, "encoder_q-layer.10": 967.2919, "encoder_q-layer.11": 2004.5764, "encoder_q-layer.2": 2648.4536, "encoder_q-layer.3": 2830.0244, "encoder_q-layer.4": 3010.1499, "encoder_q-layer.5": 3383.4563, "encoder_q-layer.6": 3333.7739, "encoder_q-layer.7": 2912.2117, "encoder_q-layer.8": 2208.6582, "encoder_q-layer.9": 1056.3473, "epoch": 0.12, "inbatch_neg_score": 0.4791, "inbatch_pos_score": 1.1367, "learning_rate": 4.894444444444445e-05, "loss": 3.2066, "norm_diff": 0.0114, "norm_loss": 0.0, "num_token_doc": 66.6854, "num_token_overlap": 17.9696, "num_token_query": 52.2675, "num_token_union": 73.4852, "num_word_context": 202.3304, "num_word_doc": 49.7851, "num_word_query": 39.8434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3728.3545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4788, "query_norm": 1.5665, "queue_k_norm": 1.5565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2675, "sent_len_1": 66.6854, "sent_len_max_0": 127.985, "sent_len_max_1": 189.695, "stdk": 0.048, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.203, "doc_norm": 1.5588, "encoder_q-embeddings": 5829.0039, "encoder_q-layer.0": 4026.8733, "encoder_q-layer.1": 4821.123, "encoder_q-layer.10": 859.5768, "encoder_q-layer.11": 1764.3523, "encoder_q-layer.2": 5635.0244, "encoder_q-layer.3": 6304.6406, "encoder_q-layer.4": 6347.4502, "encoder_q-layer.5": 6712.0459, "encoder_q-layer.6": 5605.0996, "encoder_q-layer.7": 3645.7622, "encoder_q-layer.8": 2641.7583, "encoder_q-layer.9": 1285.6527, "epoch": 0.12, "inbatch_neg_score": 0.4935, "inbatch_pos_score": 1.1074, "learning_rate": 4.888888888888889e-05, "loss": 3.203, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 66.6875, "num_token_overlap": 17.9627, "num_token_query": 52.2275, "num_token_union": 73.4454, "num_word_context": 202.4239, "num_word_doc": 49.7465, "num_word_query": 39.8029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7045.3989, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4912, "query_norm": 1.5197, "queue_k_norm": 1.5598, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2275, "sent_len_1": 66.6875, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8162, "stdk": 0.048, "stdq": 0.0432, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.1833, "doc_norm": 1.548, "encoder_q-embeddings": 1999.7594, "encoder_q-layer.0": 1396.2946, "encoder_q-layer.1": 1711.1229, "encoder_q-layer.10": 895.2283, "encoder_q-layer.11": 2159.853, "encoder_q-layer.2": 2004.8588, "encoder_q-layer.3": 2071.0745, "encoder_q-layer.4": 2241.2507, "encoder_q-layer.5": 2092.5339, "encoder_q-layer.6": 2202.9207, "encoder_q-layer.7": 2070.6682, "encoder_q-layer.8": 1862.1772, "encoder_q-layer.9": 1149.6418, "epoch": 0.12, "inbatch_neg_score": 0.4511, "inbatch_pos_score": 1.1006, "learning_rate": 4.883333333333334e-05, "loss": 3.1833, "norm_diff": 0.0214, "norm_loss": 0.0, "num_token_doc": 66.7948, "num_token_overlap": 18.0305, "num_token_query": 52.2653, "num_token_union": 73.4806, "num_word_context": 202.1058, "num_word_doc": 49.8803, "num_word_query": 39.8598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2698.247, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4495, "query_norm": 1.5266, "queue_k_norm": 1.5594, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2653, "sent_len_1": 66.7948, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1413, "stdk": 0.0476, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1806, "doc_norm": 1.5552, "encoder_q-embeddings": 2756.7192, "encoder_q-layer.0": 2038.0634, "encoder_q-layer.1": 2301.918, "encoder_q-layer.10": 849.7227, "encoder_q-layer.11": 1998.7072, "encoder_q-layer.2": 2910.8391, "encoder_q-layer.3": 2842.0371, "encoder_q-layer.4": 2930.28, "encoder_q-layer.5": 3138.7156, "encoder_q-layer.6": 3368.6472, "encoder_q-layer.7": 2971.0847, "encoder_q-layer.8": 1606.5289, "encoder_q-layer.9": 871.3564, "epoch": 0.12, "inbatch_neg_score": 0.4228, "inbatch_pos_score": 1.0771, "learning_rate": 4.8777777777777775e-05, "loss": 3.1806, "norm_diff": 0.0147, "norm_loss": 0.0, "num_token_doc": 66.6836, "num_token_overlap": 18.0018, "num_token_query": 52.25, "num_token_union": 73.4247, "num_word_context": 202.1172, "num_word_doc": 49.7168, "num_word_query": 39.7971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3679.705, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4226, "query_norm": 1.5414, "queue_k_norm": 1.5609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.25, "sent_len_1": 66.6836, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.6937, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1932, "doc_norm": 1.5633, "encoder_q-embeddings": 1644.7528, "encoder_q-layer.0": 1232.457, "encoder_q-layer.1": 1459.9727, "encoder_q-layer.10": 872.3119, "encoder_q-layer.11": 1686.4233, "encoder_q-layer.2": 1714.5127, "encoder_q-layer.3": 1928.767, "encoder_q-layer.4": 2046.0327, "encoder_q-layer.5": 2055.6587, "encoder_q-layer.6": 2237.678, "encoder_q-layer.7": 2105.9863, "encoder_q-layer.8": 2024.5946, "encoder_q-layer.9": 1167.3566, "epoch": 0.12, "inbatch_neg_score": 0.4423, "inbatch_pos_score": 1.085, "learning_rate": 4.8722222222222224e-05, "loss": 3.1932, "norm_diff": 0.0421, "norm_loss": 0.0, "num_token_doc": 66.9308, "num_token_overlap": 17.9671, "num_token_query": 52.0798, "num_token_union": 73.4922, "num_word_context": 202.1992, "num_word_doc": 49.9262, "num_word_query": 39.7002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2495.9982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4417, "query_norm": 1.5212, "queue_k_norm": 1.5607, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0798, "sent_len_1": 66.9308, "sent_len_max_0": 128.0, "sent_len_max_1": 189.04, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.2034, "doc_norm": 1.5596, "encoder_q-embeddings": 1993.0458, "encoder_q-layer.0": 1503.3309, "encoder_q-layer.1": 1686.9329, "encoder_q-layer.10": 817.2655, "encoder_q-layer.11": 1509.3585, "encoder_q-layer.2": 1836.8876, "encoder_q-layer.3": 1899.9852, "encoder_q-layer.4": 1866.9402, "encoder_q-layer.5": 1715.3851, "encoder_q-layer.6": 1858.2253, "encoder_q-layer.7": 1528.7897, "encoder_q-layer.8": 1230.576, "encoder_q-layer.9": 914.0322, "epoch": 0.12, "inbatch_neg_score": 0.4415, "inbatch_pos_score": 1.127, "learning_rate": 4.866666666666667e-05, "loss": 3.2034, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.9306, "num_token_overlap": 18.0045, "num_token_query": 52.163, "num_token_union": 73.5402, "num_word_context": 202.4811, "num_word_doc": 49.9771, "num_word_query": 39.7783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2368.4507, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4409, "query_norm": 1.572, "queue_k_norm": 1.5592, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.163, "sent_len_1": 66.9306, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.2275, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1795, "doc_norm": 1.5591, "encoder_q-embeddings": 2893.4963, "encoder_q-layer.0": 2105.6345, "encoder_q-layer.1": 2442.0171, "encoder_q-layer.10": 846.579, "encoder_q-layer.11": 1662.9907, "encoder_q-layer.2": 2869.7261, "encoder_q-layer.3": 2986.3674, "encoder_q-layer.4": 3132.905, "encoder_q-layer.5": 3459.834, "encoder_q-layer.6": 3904.5181, "encoder_q-layer.7": 4010.9087, "encoder_q-layer.8": 3336.967, "encoder_q-layer.9": 1488.23, "epoch": 0.12, "inbatch_neg_score": 0.4501, "inbatch_pos_score": 1.0938, "learning_rate": 4.8611111111111115e-05, "loss": 3.1795, "norm_diff": 0.0288, "norm_loss": 0.0, "num_token_doc": 66.7909, "num_token_overlap": 17.9659, "num_token_query": 52.0653, "num_token_union": 73.4315, "num_word_context": 202.4892, "num_word_doc": 49.834, "num_word_query": 39.7045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4140.8743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4485, "query_norm": 1.5303, "queue_k_norm": 1.5595, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0653, "sent_len_1": 66.7909, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1612, "stdk": 0.0481, "stdq": 0.0434, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1998, "doc_norm": 1.5593, "encoder_q-embeddings": 1913.822, "encoder_q-layer.0": 1350.1594, "encoder_q-layer.1": 1631.8342, "encoder_q-layer.10": 889.7244, "encoder_q-layer.11": 1746.8018, "encoder_q-layer.2": 1940.3696, "encoder_q-layer.3": 1949.7266, "encoder_q-layer.4": 2141.6672, "encoder_q-layer.5": 1944.4196, "encoder_q-layer.6": 1862.3628, "encoder_q-layer.7": 1717.0675, "encoder_q-layer.8": 1493.9922, "encoder_q-layer.9": 1112.2878, "epoch": 0.12, "inbatch_neg_score": 0.3859, "inbatch_pos_score": 1.0625, "learning_rate": 4.855555555555556e-05, "loss": 3.1998, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.7318, "num_token_overlap": 17.9841, "num_token_query": 52.1667, "num_token_union": 73.4006, "num_word_context": 201.9479, "num_word_doc": 49.7683, "num_word_query": 39.7542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2467.9612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3826, "query_norm": 1.5885, "queue_k_norm": 1.5574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1667, "sent_len_1": 66.7318, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6, "stdk": 0.0481, "stdq": 0.0453, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1925, "doc_norm": 1.5593, "encoder_q-embeddings": 983.4167, "encoder_q-layer.0": 664.5998, "encoder_q-layer.1": 792.8907, "encoder_q-layer.10": 872.0674, "encoder_q-layer.11": 1683.9304, "encoder_q-layer.2": 877.9673, "encoder_q-layer.3": 943.0999, "encoder_q-layer.4": 1039.5863, "encoder_q-layer.5": 973.7742, "encoder_q-layer.6": 1111.594, "encoder_q-layer.7": 1134.4448, "encoder_q-layer.8": 1231.9418, "encoder_q-layer.9": 904.5274, "epoch": 0.12, "inbatch_neg_score": 0.4065, "inbatch_pos_score": 1.0898, "learning_rate": 4.85e-05, "loss": 3.1925, "norm_diff": 0.0261, "norm_loss": 0.0, "num_token_doc": 66.7793, "num_token_overlap": 18.0275, "num_token_query": 52.3227, "num_token_union": 73.5597, "num_word_context": 202.4841, "num_word_doc": 49.8542, "num_word_query": 39.9311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1477.4635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4087, "query_norm": 1.5854, "queue_k_norm": 1.5585, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3227, "sent_len_1": 66.7793, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5825, "stdk": 0.0482, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1857, "doc_norm": 1.5559, "encoder_q-embeddings": 1018.5947, "encoder_q-layer.0": 685.5554, "encoder_q-layer.1": 806.8661, "encoder_q-layer.10": 763.8926, "encoder_q-layer.11": 1526.3981, "encoder_q-layer.2": 911.8181, "encoder_q-layer.3": 979.2913, "encoder_q-layer.4": 1084.9829, "encoder_q-layer.5": 1083.3165, "encoder_q-layer.6": 1166.6996, "encoder_q-layer.7": 1115.0209, "encoder_q-layer.8": 1117.011, "encoder_q-layer.9": 803.174, "epoch": 0.12, "inbatch_neg_score": 0.3982, "inbatch_pos_score": 1.0586, "learning_rate": 4.844444444444445e-05, "loss": 3.1857, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.8403, "num_token_overlap": 18.1081, "num_token_query": 52.4257, "num_token_union": 73.5226, "num_word_context": 202.3353, "num_word_doc": 49.916, "num_word_query": 39.9844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1472.3831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3943, "query_norm": 1.5749, "queue_k_norm": 1.5578, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4257, "sent_len_1": 66.8403, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.4137, "stdk": 0.048, "stdq": 0.0439, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.188, "doc_norm": 1.5539, "encoder_q-embeddings": 5472.4272, "encoder_q-layer.0": 3978.5186, "encoder_q-layer.1": 4123.9028, "encoder_q-layer.10": 818.7167, "encoder_q-layer.11": 1794.941, "encoder_q-layer.2": 5042.7021, "encoder_q-layer.3": 5966.0176, "encoder_q-layer.4": 5719.02, "encoder_q-layer.5": 6680.1592, "encoder_q-layer.6": 5628.1538, "encoder_q-layer.7": 4280.2397, "encoder_q-layer.8": 2267.157, "encoder_q-layer.9": 879.0442, "epoch": 0.13, "inbatch_neg_score": 0.368, "inbatch_pos_score": 1.0322, "learning_rate": 4.838888888888889e-05, "loss": 3.188, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.8242, "num_token_overlap": 17.9099, "num_token_query": 52.0751, "num_token_union": 73.4739, "num_word_context": 202.1872, "num_word_doc": 49.8187, "num_word_query": 39.6627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6729.9547, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3652, "query_norm": 1.6092, "queue_k_norm": 1.5593, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0751, "sent_len_1": 66.8242, "sent_len_max_0": 127.9862, "sent_len_max_1": 186.8262, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1869, "doc_norm": 1.557, "encoder_q-embeddings": 1611.2347, "encoder_q-layer.0": 1154.3665, "encoder_q-layer.1": 1292.8645, "encoder_q-layer.10": 819.7279, "encoder_q-layer.11": 1615.5574, "encoder_q-layer.2": 1459.1112, "encoder_q-layer.3": 1556.8229, "encoder_q-layer.4": 1542.2535, "encoder_q-layer.5": 1525.9596, "encoder_q-layer.6": 1725.9526, "encoder_q-layer.7": 1558.7388, "encoder_q-layer.8": 1147.8988, "encoder_q-layer.9": 770.0211, "epoch": 0.13, "inbatch_neg_score": 0.3936, "inbatch_pos_score": 1.043, "learning_rate": 4.8333333333333334e-05, "loss": 3.1869, "norm_diff": 0.0281, "norm_loss": 0.0, "num_token_doc": 66.6836, "num_token_overlap": 18.0125, "num_token_query": 52.1837, "num_token_union": 73.3206, "num_word_context": 202.0279, "num_word_doc": 49.6829, "num_word_query": 39.761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2073.4971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3918, "query_norm": 1.5852, "queue_k_norm": 1.5557, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1837, "sent_len_1": 66.6836, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5475, "stdk": 0.0482, "stdq": 0.0437, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.1553, "doc_norm": 1.5526, "encoder_q-embeddings": 1194.7067, "encoder_q-layer.0": 1059.295, "encoder_q-layer.1": 1086.8154, "encoder_q-layer.10": 481.1067, "encoder_q-layer.11": 891.2973, "encoder_q-layer.2": 1424.8549, "encoder_q-layer.3": 1614.2291, "encoder_q-layer.4": 1758.1554, "encoder_q-layer.5": 1610.2861, "encoder_q-layer.6": 1739.541, "encoder_q-layer.7": 2308.7629, "encoder_q-layer.8": 1811.7329, "encoder_q-layer.9": 1197.2065, "epoch": 0.13, "inbatch_neg_score": 0.3806, "inbatch_pos_score": 1.0273, "learning_rate": 4.8277777777777776e-05, "loss": 3.1553, "norm_diff": 0.0851, "norm_loss": 0.0, "num_token_doc": 66.7068, "num_token_overlap": 17.9563, "num_token_query": 52.2454, "num_token_union": 73.4275, "num_word_context": 202.4363, "num_word_doc": 49.7735, "num_word_query": 39.8262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2113.9255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3772, "query_norm": 1.6377, "queue_k_norm": 1.5535, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2454, "sent_len_1": 66.7068, "sent_len_max_0": 128.0, "sent_len_max_1": 190.33, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.1606, "doc_norm": 1.5483, "encoder_q-embeddings": 765.2347, "encoder_q-layer.0": 528.9607, "encoder_q-layer.1": 581.5878, "encoder_q-layer.10": 427.796, "encoder_q-layer.11": 893.0763, "encoder_q-layer.2": 707.6597, "encoder_q-layer.3": 794.4668, "encoder_q-layer.4": 856.3962, "encoder_q-layer.5": 886.4897, "encoder_q-layer.6": 940.5045, "encoder_q-layer.7": 841.3505, "encoder_q-layer.8": 717.0945, "encoder_q-layer.9": 509.7523, "epoch": 0.13, "inbatch_neg_score": 0.3956, "inbatch_pos_score": 1.0684, "learning_rate": 4.8222222222222225e-05, "loss": 3.1606, "norm_diff": 0.065, "norm_loss": 0.0, "num_token_doc": 66.8069, "num_token_overlap": 18.0593, "num_token_query": 52.384, "num_token_union": 73.5456, "num_word_context": 202.3962, "num_word_doc": 49.8577, "num_word_query": 39.9392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1065.9013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3933, "query_norm": 1.6133, "queue_k_norm": 1.5496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.384, "sent_len_1": 66.8069, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.6262, "stdk": 0.0481, "stdq": 0.0442, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.159, "doc_norm": 1.5413, "encoder_q-embeddings": 1141.6108, "encoder_q-layer.0": 728.9138, "encoder_q-layer.1": 807.9304, "encoder_q-layer.10": 381.3116, "encoder_q-layer.11": 914.9812, "encoder_q-layer.2": 945.7043, "encoder_q-layer.3": 1043.0341, "encoder_q-layer.4": 1088.4717, "encoder_q-layer.5": 1130.6222, "encoder_q-layer.6": 1271.1177, "encoder_q-layer.7": 1278.3853, "encoder_q-layer.8": 983.8458, "encoder_q-layer.9": 550.0602, "epoch": 0.13, "inbatch_neg_score": 0.4071, "inbatch_pos_score": 1.0713, "learning_rate": 4.8166666666666674e-05, "loss": 3.159, "norm_diff": 0.0873, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 17.9891, "num_token_query": 52.2615, "num_token_union": 73.5194, "num_word_context": 202.273, "num_word_doc": 49.833, "num_word_query": 39.8348, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1435.1825, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.6286, "queue_k_norm": 1.5441, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2615, "sent_len_1": 66.8259, "sent_len_max_0": 128.0, "sent_len_max_1": 190.265, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1729, "doc_norm": 1.535, "encoder_q-embeddings": 889.7404, "encoder_q-layer.0": 630.5776, "encoder_q-layer.1": 704.3668, "encoder_q-layer.10": 377.4827, "encoder_q-layer.11": 951.0239, "encoder_q-layer.2": 827.2542, "encoder_q-layer.3": 949.2383, "encoder_q-layer.4": 1003.7139, "encoder_q-layer.5": 1026.948, "encoder_q-layer.6": 1178.1514, "encoder_q-layer.7": 1083.4564, "encoder_q-layer.8": 868.4515, "encoder_q-layer.9": 448.1153, "epoch": 0.13, "inbatch_neg_score": 0.4134, "inbatch_pos_score": 1.0879, "learning_rate": 4.811111111111111e-05, "loss": 3.1729, "norm_diff": 0.1192, "norm_loss": 0.0, "num_token_doc": 66.6974, "num_token_overlap": 18.0211, "num_token_query": 52.1987, "num_token_union": 73.3623, "num_word_context": 202.3065, "num_word_doc": 49.7892, "num_word_query": 39.7991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1256.5034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4094, "query_norm": 1.6541, "queue_k_norm": 1.5355, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1987, "sent_len_1": 66.6974, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.8225, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.1547, "doc_norm": 1.5291, "encoder_q-embeddings": 528.2522, "encoder_q-layer.0": 356.4055, "encoder_q-layer.1": 391.5612, "encoder_q-layer.10": 424.2635, "encoder_q-layer.11": 862.5414, "encoder_q-layer.2": 451.5857, "encoder_q-layer.3": 521.6339, "encoder_q-layer.4": 566.8628, "encoder_q-layer.5": 578.8456, "encoder_q-layer.6": 652.7383, "encoder_q-layer.7": 598.3132, "encoder_q-layer.8": 503.3727, "encoder_q-layer.9": 377.5333, "epoch": 0.13, "inbatch_neg_score": 0.372, "inbatch_pos_score": 1.0098, "learning_rate": 4.805555555555556e-05, "loss": 3.1547, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.7054, "num_token_overlap": 17.9329, "num_token_query": 51.9851, "num_token_union": 73.3176, "num_word_context": 202.0151, "num_word_doc": 49.7418, "num_word_query": 39.6018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 759.6658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3677, "query_norm": 1.6044, "queue_k_norm": 1.529, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 51.9851, "sent_len_1": 66.7054, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9663, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1677, "doc_norm": 1.524, "encoder_q-embeddings": 2466.3083, "encoder_q-layer.0": 1719.5188, "encoder_q-layer.1": 1761.2452, "encoder_q-layer.10": 423.842, "encoder_q-layer.11": 1007.8157, "encoder_q-layer.2": 2087.5479, "encoder_q-layer.3": 2143.8076, "encoder_q-layer.4": 2241.7168, "encoder_q-layer.5": 2723.2727, "encoder_q-layer.6": 3077.0376, "encoder_q-layer.7": 3319.9023, "encoder_q-layer.8": 3397.905, "encoder_q-layer.9": 1493.8442, "epoch": 0.13, "inbatch_neg_score": 0.3815, "inbatch_pos_score": 1.0322, "learning_rate": 4.8e-05, "loss": 3.1677, "norm_diff": 0.1117, "norm_loss": 0.0, "num_token_doc": 66.9998, "num_token_overlap": 18.0363, "num_token_query": 52.3016, "num_token_union": 73.5939, "num_word_context": 202.6684, "num_word_doc": 49.979, "num_word_query": 39.8987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3425.4692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3796, "query_norm": 1.6357, "queue_k_norm": 1.5203, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3016, "sent_len_1": 66.9998, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.05, "stdk": 0.0477, "stdq": 0.0456, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1766, "doc_norm": 1.5123, "encoder_q-embeddings": 1079.3617, "encoder_q-layer.0": 737.3729, "encoder_q-layer.1": 868.0186, "encoder_q-layer.10": 445.026, "encoder_q-layer.11": 822.5665, "encoder_q-layer.2": 954.9388, "encoder_q-layer.3": 1073.7828, "encoder_q-layer.4": 1134.6929, "encoder_q-layer.5": 1194.5027, "encoder_q-layer.6": 1225.4821, "encoder_q-layer.7": 1138.0856, "encoder_q-layer.8": 795.8735, "encoder_q-layer.9": 418.4513, "epoch": 0.13, "inbatch_neg_score": 0.3943, "inbatch_pos_score": 1.0635, "learning_rate": 4.794444444444445e-05, "loss": 3.1766, "norm_diff": 0.1397, "norm_loss": 0.0, "num_token_doc": 66.6564, "num_token_overlap": 17.9678, "num_token_query": 52.2726, "num_token_union": 73.475, "num_word_context": 202.2567, "num_word_doc": 49.722, "num_word_query": 39.8703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1392.0153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3926, "query_norm": 1.652, "queue_k_norm": 1.5122, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2726, "sent_len_1": 66.6564, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1575, "stdk": 0.0475, "stdq": 0.0453, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.1517, "doc_norm": 1.5067, "encoder_q-embeddings": 513.1296, "encoder_q-layer.0": 378.7745, "encoder_q-layer.1": 419.0106, "encoder_q-layer.10": 202.2691, "encoder_q-layer.11": 427.2418, "encoder_q-layer.2": 509.5508, "encoder_q-layer.3": 541.2225, "encoder_q-layer.4": 505.5501, "encoder_q-layer.5": 523.7206, "encoder_q-layer.6": 551.8929, "encoder_q-layer.7": 539.2858, "encoder_q-layer.8": 434.2297, "encoder_q-layer.9": 250.5431, "epoch": 0.13, "inbatch_neg_score": 0.3862, "inbatch_pos_score": 1.0117, "learning_rate": 4.7888888888888886e-05, "loss": 3.1517, "norm_diff": 0.1274, "norm_loss": 0.0, "num_token_doc": 66.8515, "num_token_overlap": 18.0662, "num_token_query": 52.2437, "num_token_union": 73.4893, "num_word_context": 202.2335, "num_word_doc": 49.9136, "num_word_query": 39.8091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 659.3974, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.384, "query_norm": 1.6341, "queue_k_norm": 1.5067, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2437, "sent_len_1": 66.8515, "sent_len_max_0": 128.0, "sent_len_max_1": 187.34, "stdk": 0.0475, "stdq": 0.0442, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1669, "doc_norm": 1.5044, "encoder_q-embeddings": 581.8597, "encoder_q-layer.0": 431.0562, "encoder_q-layer.1": 464.4413, "encoder_q-layer.10": 181.2105, "encoder_q-layer.11": 353.1376, "encoder_q-layer.2": 538.7703, "encoder_q-layer.3": 531.4263, "encoder_q-layer.4": 549.2116, "encoder_q-layer.5": 600.8958, "encoder_q-layer.6": 633.0604, "encoder_q-layer.7": 686.7607, "encoder_q-layer.8": 510.3875, "encoder_q-layer.9": 254.9484, "epoch": 0.14, "inbatch_neg_score": 0.3876, "inbatch_pos_score": 1.0508, "learning_rate": 4.7833333333333335e-05, "loss": 3.1669, "norm_diff": 0.1622, "norm_loss": 0.0, "num_token_doc": 66.7361, "num_token_overlap": 17.946, "num_token_query": 52.2169, "num_token_union": 73.4361, "num_word_context": 202.2343, "num_word_doc": 49.7515, "num_word_query": 39.8085, "postclip_grad_norm": 1.0, "preclip_grad_norm": 735.2307, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3821, "query_norm": 1.6666, "queue_k_norm": 1.4997, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2169, "sent_len_1": 66.7361, "sent_len_max_0": 128.0, "sent_len_max_1": 190.555, "stdk": 0.0475, "stdq": 0.0453, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1436, "doc_norm": 1.494, "encoder_q-embeddings": 819.8428, "encoder_q-layer.0": 619.8486, "encoder_q-layer.1": 635.3561, "encoder_q-layer.10": 207.3898, "encoder_q-layer.11": 405.997, "encoder_q-layer.2": 713.3317, "encoder_q-layer.3": 751.8216, "encoder_q-layer.4": 715.7466, "encoder_q-layer.5": 749.8264, "encoder_q-layer.6": 910.4193, "encoder_q-layer.7": 1120.7284, "encoder_q-layer.8": 869.832, "encoder_q-layer.9": 446.8288, "epoch": 0.14, "inbatch_neg_score": 0.3556, "inbatch_pos_score": 1.0166, "learning_rate": 4.7777777777777784e-05, "loss": 3.1436, "norm_diff": 0.1575, "norm_loss": 0.0, "num_token_doc": 66.8021, "num_token_overlap": 18.0514, "num_token_query": 52.251, "num_token_union": 73.478, "num_word_context": 202.3438, "num_word_doc": 49.8585, "num_word_query": 39.8465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1058.9295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3545, "query_norm": 1.6514, "queue_k_norm": 1.4953, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.251, "sent_len_1": 66.8021, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.6575, "stdk": 0.0473, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1626, "doc_norm": 1.4877, "encoder_q-embeddings": 898.7643, "encoder_q-layer.0": 586.0219, "encoder_q-layer.1": 665.9845, "encoder_q-layer.10": 177.2878, "encoder_q-layer.11": 372.9229, "encoder_q-layer.2": 767.9805, "encoder_q-layer.3": 787.4272, "encoder_q-layer.4": 858.4462, "encoder_q-layer.5": 824.0392, "encoder_q-layer.6": 938.7014, "encoder_q-layer.7": 919.6454, "encoder_q-layer.8": 753.7369, "encoder_q-layer.9": 312.2516, "epoch": 0.14, "inbatch_neg_score": 0.3695, "inbatch_pos_score": 1.0254, "learning_rate": 4.7722222222222226e-05, "loss": 3.1626, "norm_diff": 0.168, "norm_loss": 0.0, "num_token_doc": 66.618, "num_token_overlap": 18.025, "num_token_query": 52.4282, "num_token_union": 73.4539, "num_word_context": 201.9911, "num_word_doc": 49.7143, "num_word_query": 39.993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1081.9936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.6557, "queue_k_norm": 1.4906, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4282, "sent_len_1": 66.618, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9525, "stdk": 0.0472, "stdq": 0.045, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.17, "doc_norm": 1.4862, "encoder_q-embeddings": 838.689, "encoder_q-layer.0": 613.5048, "encoder_q-layer.1": 639.4073, "encoder_q-layer.10": 194.7853, "encoder_q-layer.11": 392.36, "encoder_q-layer.2": 727.6696, "encoder_q-layer.3": 728.4899, "encoder_q-layer.4": 843.3295, "encoder_q-layer.5": 952.2637, "encoder_q-layer.6": 1108.3174, "encoder_q-layer.7": 1208.8958, "encoder_q-layer.8": 970.0228, "encoder_q-layer.9": 265.7549, "epoch": 0.14, "inbatch_neg_score": 0.3795, "inbatch_pos_score": 1.0117, "learning_rate": 4.766666666666667e-05, "loss": 3.17, "norm_diff": 0.1382, "norm_loss": 0.0, "num_token_doc": 66.4996, "num_token_overlap": 17.8989, "num_token_query": 52.1208, "num_token_union": 73.3665, "num_word_context": 201.8858, "num_word_doc": 49.6115, "num_word_query": 39.7184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1163.7777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.6245, "queue_k_norm": 1.4878, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1208, "sent_len_1": 66.4996, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1838, "stdk": 0.0472, "stdq": 0.0439, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1448, "doc_norm": 1.4875, "encoder_q-embeddings": 1856.3118, "encoder_q-layer.0": 1385.7516, "encoder_q-layer.1": 1395.8514, "encoder_q-layer.10": 193.1882, "encoder_q-layer.11": 408.6765, "encoder_q-layer.2": 1681.7411, "encoder_q-layer.3": 1792.7864, "encoder_q-layer.4": 1935.484, "encoder_q-layer.5": 2045.3429, "encoder_q-layer.6": 2482.2048, "encoder_q-layer.7": 3001.281, "encoder_q-layer.8": 1751.2202, "encoder_q-layer.9": 347.5439, "epoch": 0.14, "inbatch_neg_score": 0.4027, "inbatch_pos_score": 1.0801, "learning_rate": 4.761111111111111e-05, "loss": 3.1448, "norm_diff": 0.2032, "norm_loss": 0.0, "num_token_doc": 66.8518, "num_token_overlap": 18.0087, "num_token_query": 52.3015, "num_token_union": 73.5245, "num_word_context": 202.4956, "num_word_doc": 49.9279, "num_word_query": 39.9086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2587.3941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4009, "query_norm": 1.6907, "queue_k_norm": 1.4868, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3015, "sent_len_1": 66.8518, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.3275, "stdk": 0.0474, "stdq": 0.0459, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1881, "doc_norm": 1.4898, "encoder_q-embeddings": 636.7886, "encoder_q-layer.0": 446.8336, "encoder_q-layer.1": 527.5181, "encoder_q-layer.10": 200.5446, "encoder_q-layer.11": 392.7873, "encoder_q-layer.2": 578.4214, "encoder_q-layer.3": 585.3046, "encoder_q-layer.4": 573.6411, "encoder_q-layer.5": 572.5251, "encoder_q-layer.6": 624.0812, "encoder_q-layer.7": 567.5458, "encoder_q-layer.8": 396.9014, "encoder_q-layer.9": 202.5016, "epoch": 0.14, "inbatch_neg_score": 0.4364, "inbatch_pos_score": 1.1006, "learning_rate": 4.755555555555556e-05, "loss": 3.1881, "norm_diff": 0.1994, "norm_loss": 0.0, "num_token_doc": 66.8017, "num_token_overlap": 18.0414, "num_token_query": 52.2842, "num_token_union": 73.4741, "num_word_context": 202.5397, "num_word_doc": 49.848, "num_word_query": 39.8392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 749.4694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4338, "query_norm": 1.6892, "queue_k_norm": 1.486, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2842, "sent_len_1": 66.8017, "sent_len_max_0": 128.0, "sent_len_max_1": 188.085, "stdk": 0.0475, "stdq": 0.0462, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1664, "doc_norm": 1.4819, "encoder_q-embeddings": 1200.9846, "encoder_q-layer.0": 873.4056, "encoder_q-layer.1": 902.0007, "encoder_q-layer.10": 199.7634, "encoder_q-layer.11": 407.553, "encoder_q-layer.2": 901.9837, "encoder_q-layer.3": 905.9902, "encoder_q-layer.4": 994.9855, "encoder_q-layer.5": 943.9012, "encoder_q-layer.6": 822.2877, "encoder_q-layer.7": 782.9082, "encoder_q-layer.8": 589.5168, "encoder_q-layer.9": 261.4795, "epoch": 0.14, "inbatch_neg_score": 0.4562, "inbatch_pos_score": 1.1055, "learning_rate": 4.75e-05, "loss": 3.1664, "norm_diff": 0.1763, "norm_loss": 0.0, "num_token_doc": 66.8891, "num_token_overlap": 18.0613, "num_token_query": 52.4198, "num_token_union": 73.5561, "num_word_context": 202.4345, "num_word_doc": 49.8796, "num_word_query": 39.9543, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1229.8295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4529, "query_norm": 1.6583, "queue_k_norm": 1.4861, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4198, "sent_len_1": 66.8891, "sent_len_max_0": 127.9838, "sent_len_max_1": 190.6662, "stdk": 0.0472, "stdq": 0.0445, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1688, "doc_norm": 1.4833, "encoder_q-embeddings": 1358.3126, "encoder_q-layer.0": 942.6642, "encoder_q-layer.1": 1053.5774, "encoder_q-layer.10": 197.4722, "encoder_q-layer.11": 409.2624, "encoder_q-layer.2": 1169.5083, "encoder_q-layer.3": 1246.7937, "encoder_q-layer.4": 1231.6588, "encoder_q-layer.5": 1250.4938, "encoder_q-layer.6": 1241.1669, "encoder_q-layer.7": 984.9331, "encoder_q-layer.8": 443.5918, "encoder_q-layer.9": 217.1644, "epoch": 0.14, "inbatch_neg_score": 0.4639, "inbatch_pos_score": 1.1504, "learning_rate": 4.7444444444444445e-05, "loss": 3.1688, "norm_diff": 0.1866, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 18.0027, "num_token_query": 52.1762, "num_token_union": 73.4131, "num_word_context": 202.4194, "num_word_doc": 49.8109, "num_word_query": 39.7815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1488.7713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4622, "query_norm": 1.67, "queue_k_norm": 1.4864, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1762, "sent_len_1": 66.7575, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2463, "stdk": 0.0473, "stdq": 0.0458, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.1548, "doc_norm": 1.4882, "encoder_q-embeddings": 2360.6133, "encoder_q-layer.0": 1830.0616, "encoder_q-layer.1": 1956.006, "encoder_q-layer.10": 203.0504, "encoder_q-layer.11": 420.9352, "encoder_q-layer.2": 2297.3828, "encoder_q-layer.3": 2590.5403, "encoder_q-layer.4": 2756.8179, "encoder_q-layer.5": 2872.228, "encoder_q-layer.6": 3098.8748, "encoder_q-layer.7": 2772.3577, "encoder_q-layer.8": 1435.6521, "encoder_q-layer.9": 332.0853, "epoch": 0.14, "inbatch_neg_score": 0.4737, "inbatch_pos_score": 1.1504, "learning_rate": 4.7388888888888894e-05, "loss": 3.1548, "norm_diff": 0.1893, "norm_loss": 0.0, "num_token_doc": 66.7853, "num_token_overlap": 17.9882, "num_token_query": 52.2596, "num_token_union": 73.5114, "num_word_context": 202.2604, "num_word_doc": 49.8191, "num_word_query": 39.8233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3211.5173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4727, "query_norm": 1.6776, "queue_k_norm": 1.4855, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2596, "sent_len_1": 66.7853, "sent_len_max_0": 128.0, "sent_len_max_1": 188.705, "stdk": 0.0475, "stdq": 0.0453, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1464, "doc_norm": 1.4822, "encoder_q-embeddings": 2848.928, "encoder_q-layer.0": 2249.1252, "encoder_q-layer.1": 2108.334, "encoder_q-layer.10": 183.8642, "encoder_q-layer.11": 410.6148, "encoder_q-layer.2": 1831.3986, "encoder_q-layer.3": 1659.4443, "encoder_q-layer.4": 1851.025, "encoder_q-layer.5": 1925.1069, "encoder_q-layer.6": 1522.8845, "encoder_q-layer.7": 1941.723, "encoder_q-layer.8": 1066.0458, "encoder_q-layer.9": 302.301, "epoch": 0.14, "inbatch_neg_score": 0.4527, "inbatch_pos_score": 1.0996, "learning_rate": 4.7333333333333336e-05, "loss": 3.1464, "norm_diff": 0.145, "norm_loss": 0.0, "num_token_doc": 66.7479, "num_token_overlap": 18.0594, "num_token_query": 52.228, "num_token_union": 73.3681, "num_word_context": 202.2033, "num_word_doc": 49.8199, "num_word_query": 39.8318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2660.9435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4502, "query_norm": 1.6272, "queue_k_norm": 1.4857, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.228, "sent_len_1": 66.7479, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.5012, "stdk": 0.0472, "stdq": 0.0443, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.191, "doc_norm": 1.4873, "encoder_q-embeddings": 1089.1213, "encoder_q-layer.0": 802.5178, "encoder_q-layer.1": 897.257, "encoder_q-layer.10": 183.5924, "encoder_q-layer.11": 408.8223, "encoder_q-layer.2": 1000.3668, "encoder_q-layer.3": 1058.3479, "encoder_q-layer.4": 952.5788, "encoder_q-layer.5": 895.4168, "encoder_q-layer.6": 880.9838, "encoder_q-layer.7": 852.028, "encoder_q-layer.8": 457.1341, "encoder_q-layer.9": 197.544, "epoch": 0.15, "inbatch_neg_score": 0.4728, "inbatch_pos_score": 1.1357, "learning_rate": 4.727777777777778e-05, "loss": 3.191, "norm_diff": 0.156, "norm_loss": 0.0, "num_token_doc": 66.5596, "num_token_overlap": 17.9755, "num_token_query": 52.127, "num_token_union": 73.2602, "num_word_context": 201.9584, "num_word_doc": 49.6491, "num_word_query": 39.7188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.8303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.6433, "queue_k_norm": 1.4848, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.127, "sent_len_1": 66.5596, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5538, "stdk": 0.0475, "stdq": 0.0448, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1594, "doc_norm": 1.493, "encoder_q-embeddings": 1461.4736, "encoder_q-layer.0": 1191.3589, "encoder_q-layer.1": 1207.1254, "encoder_q-layer.10": 229.5762, "encoder_q-layer.11": 456.5283, "encoder_q-layer.2": 1082.5729, "encoder_q-layer.3": 1161.084, "encoder_q-layer.4": 1082.1416, "encoder_q-layer.5": 1239.1405, "encoder_q-layer.6": 1168.0746, "encoder_q-layer.7": 1270.4513, "encoder_q-layer.8": 993.168, "encoder_q-layer.9": 551.4649, "epoch": 0.15, "inbatch_neg_score": 0.4869, "inbatch_pos_score": 1.127, "learning_rate": 4.722222222222222e-05, "loss": 3.1594, "norm_diff": 0.1487, "norm_loss": 0.0, "num_token_doc": 66.7129, "num_token_overlap": 18.0051, "num_token_query": 52.2096, "num_token_union": 73.4225, "num_word_context": 201.8475, "num_word_doc": 49.7814, "num_word_query": 39.8113, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1603.6676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4832, "query_norm": 1.6417, "queue_k_norm": 1.4859, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2096, "sent_len_1": 66.7129, "sent_len_max_0": 128.0, "sent_len_max_1": 187.86, "stdk": 0.0477, "stdq": 0.0439, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.1628, "doc_norm": 1.488, "encoder_q-embeddings": 324.2814, "encoder_q-layer.0": 217.5684, "encoder_q-layer.1": 236.5079, "encoder_q-layer.10": 193.0499, "encoder_q-layer.11": 394.4686, "encoder_q-layer.2": 253.8627, "encoder_q-layer.3": 276.1055, "encoder_q-layer.4": 260.2464, "encoder_q-layer.5": 261.9206, "encoder_q-layer.6": 307.0495, "encoder_q-layer.7": 296.1549, "encoder_q-layer.8": 261.6778, "encoder_q-layer.9": 190.8389, "epoch": 0.15, "inbatch_neg_score": 0.4802, "inbatch_pos_score": 1.1055, "learning_rate": 4.716666666666667e-05, "loss": 3.1628, "norm_diff": 0.1681, "norm_loss": 0.0, "num_token_doc": 67.0923, "num_token_overlap": 18.0963, "num_token_query": 52.4104, "num_token_union": 73.647, "num_word_context": 202.4596, "num_word_doc": 50.0393, "num_word_query": 39.949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 398.3898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4761, "query_norm": 1.6561, "queue_k_norm": 1.4884, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4104, "sent_len_1": 67.0923, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7812, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1945, "doc_norm": 1.4868, "encoder_q-embeddings": 411.5883, "encoder_q-layer.0": 293.5033, "encoder_q-layer.1": 318.6926, "encoder_q-layer.10": 192.461, "encoder_q-layer.11": 354.4918, "encoder_q-layer.2": 331.1666, "encoder_q-layer.3": 330.3368, "encoder_q-layer.4": 315.718, "encoder_q-layer.5": 327.5298, "encoder_q-layer.6": 345.9778, "encoder_q-layer.7": 350.4173, "encoder_q-layer.8": 251.0382, "encoder_q-layer.9": 169.2238, "epoch": 0.15, "inbatch_neg_score": 0.5114, "inbatch_pos_score": 1.1846, "learning_rate": 4.711111111111111e-05, "loss": 3.1945, "norm_diff": 0.1769, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 17.9763, "num_token_query": 52.3095, "num_token_union": 73.4986, "num_word_context": 202.2352, "num_word_doc": 49.7946, "num_word_query": 39.8759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 467.8967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5098, "query_norm": 1.6637, "queue_k_norm": 1.4903, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3095, "sent_len_1": 66.778, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.5737, "stdk": 0.0474, "stdq": 0.0452, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1803, "doc_norm": 1.4889, "encoder_q-embeddings": 454.8306, "encoder_q-layer.0": 302.9397, "encoder_q-layer.1": 329.8116, "encoder_q-layer.10": 203.9082, "encoder_q-layer.11": 403.398, "encoder_q-layer.2": 374.126, "encoder_q-layer.3": 398.6897, "encoder_q-layer.4": 469.347, "encoder_q-layer.5": 447.425, "encoder_q-layer.6": 492.1278, "encoder_q-layer.7": 424.2572, "encoder_q-layer.8": 327.0544, "encoder_q-layer.9": 197.3598, "epoch": 0.15, "inbatch_neg_score": 0.5138, "inbatch_pos_score": 1.1699, "learning_rate": 4.7055555555555555e-05, "loss": 3.1803, "norm_diff": 0.149, "norm_loss": 0.0, "num_token_doc": 66.7568, "num_token_overlap": 18.0092, "num_token_query": 52.1995, "num_token_union": 73.4514, "num_word_context": 202.0497, "num_word_doc": 49.8145, "num_word_query": 39.8154, "postclip_grad_norm": 1.0, "preclip_grad_norm": 554.8184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5107, "query_norm": 1.6378, "queue_k_norm": 1.4883, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1995, "sent_len_1": 66.7568, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4087, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.1686, "doc_norm": 1.4873, "encoder_q-embeddings": 627.3865, "encoder_q-layer.0": 434.9202, "encoder_q-layer.1": 497.2548, "encoder_q-layer.10": 197.32, "encoder_q-layer.11": 373.5058, "encoder_q-layer.2": 561.8579, "encoder_q-layer.3": 620.302, "encoder_q-layer.4": 679.6077, "encoder_q-layer.5": 651.3152, "encoder_q-layer.6": 634.5035, "encoder_q-layer.7": 578.9163, "encoder_q-layer.8": 422.8904, "encoder_q-layer.9": 199.4211, "epoch": 0.15, "inbatch_neg_score": 0.4972, "inbatch_pos_score": 1.1377, "learning_rate": 4.7e-05, "loss": 3.1686, "norm_diff": 0.1616, "norm_loss": 0.0, "num_token_doc": 66.9225, "num_token_overlap": 17.9922, "num_token_query": 52.2006, "num_token_union": 73.4886, "num_word_context": 202.3793, "num_word_doc": 49.8708, "num_word_query": 39.7932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 769.5802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4946, "query_norm": 1.6489, "queue_k_norm": 1.4907, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2006, "sent_len_1": 66.9225, "sent_len_max_0": 127.9887, "sent_len_max_1": 191.46, "stdk": 0.0473, "stdq": 0.0453, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1753, "doc_norm": 1.4967, "encoder_q-embeddings": 800.7928, "encoder_q-layer.0": 601.2594, "encoder_q-layer.1": 687.6469, "encoder_q-layer.10": 190.8576, "encoder_q-layer.11": 386.3144, "encoder_q-layer.2": 750.1934, "encoder_q-layer.3": 787.1146, "encoder_q-layer.4": 857.0739, "encoder_q-layer.5": 918.7312, "encoder_q-layer.6": 986.8513, "encoder_q-layer.7": 915.0591, "encoder_q-layer.8": 543.8389, "encoder_q-layer.9": 199.9092, "epoch": 0.15, "inbatch_neg_score": 0.4895, "inbatch_pos_score": 1.168, "learning_rate": 4.6944444444444446e-05, "loss": 3.1753, "norm_diff": 0.1257, "norm_loss": 0.0, "num_token_doc": 66.7886, "num_token_overlap": 17.9906, "num_token_query": 52.1576, "num_token_union": 73.4242, "num_word_context": 202.0775, "num_word_doc": 49.8297, "num_word_query": 39.7535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.5244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.489, "query_norm": 1.6224, "queue_k_norm": 1.4917, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1576, "sent_len_1": 66.7886, "sent_len_max_0": 127.995, "sent_len_max_1": 189.7275, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.1347, "doc_norm": 1.4902, "encoder_q-embeddings": 228.0112, "encoder_q-layer.0": 144.8016, "encoder_q-layer.1": 163.0807, "encoder_q-layer.10": 203.1929, "encoder_q-layer.11": 432.6729, "encoder_q-layer.2": 186.9744, "encoder_q-layer.3": 205.5753, "encoder_q-layer.4": 223.8926, "encoder_q-layer.5": 213.3151, "encoder_q-layer.6": 229.7467, "encoder_q-layer.7": 224.7921, "encoder_q-layer.8": 241.5598, "encoder_q-layer.9": 197.1141, "epoch": 0.15, "inbatch_neg_score": 0.476, "inbatch_pos_score": 1.127, "learning_rate": 4.6888888888888895e-05, "loss": 3.1347, "norm_diff": 0.1665, "norm_loss": 0.0, "num_token_doc": 66.6306, "num_token_overlap": 18.0905, "num_token_query": 52.2992, "num_token_union": 73.3913, "num_word_context": 202.1295, "num_word_doc": 49.8076, "num_word_query": 39.8734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 326.9499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4756, "query_norm": 1.6567, "queue_k_norm": 1.4935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2992, "sent_len_1": 66.6306, "sent_len_max_0": 127.9988, "sent_len_max_1": 185.2837, "stdk": 0.0474, "stdq": 0.0452, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1802, "doc_norm": 1.4927, "encoder_q-embeddings": 996.2069, "encoder_q-layer.0": 681.8662, "encoder_q-layer.1": 735.4337, "encoder_q-layer.10": 184.6417, "encoder_q-layer.11": 360.361, "encoder_q-layer.2": 852.7647, "encoder_q-layer.3": 897.1083, "encoder_q-layer.4": 863.8923, "encoder_q-layer.5": 879.3344, "encoder_q-layer.6": 1015.7146, "encoder_q-layer.7": 737.6561, "encoder_q-layer.8": 418.6609, "encoder_q-layer.9": 206.7449, "epoch": 0.15, "inbatch_neg_score": 0.475, "inbatch_pos_score": 1.1396, "learning_rate": 4.683333333333334e-05, "loss": 3.1802, "norm_diff": 0.1281, "norm_loss": 0.0, "num_token_doc": 67.0051, "num_token_overlap": 17.9967, "num_token_query": 52.1138, "num_token_union": 73.5221, "num_word_context": 202.4372, "num_word_doc": 49.9773, "num_word_query": 39.7288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1111.3968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4744, "query_norm": 1.6208, "queue_k_norm": 1.494, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1138, "sent_len_1": 67.0051, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.1838, "stdk": 0.0475, "stdq": 0.0447, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1391, "doc_norm": 1.4914, "encoder_q-embeddings": 3369.8674, "encoder_q-layer.0": 2409.3342, "encoder_q-layer.1": 2995.1616, "encoder_q-layer.10": 396.1707, "encoder_q-layer.11": 713.8658, "encoder_q-layer.2": 3452.8984, "encoder_q-layer.3": 3826.8533, "encoder_q-layer.4": 2916.5828, "encoder_q-layer.5": 2543.7483, "encoder_q-layer.6": 2551.7886, "encoder_q-layer.7": 2869.0635, "encoder_q-layer.8": 2288.7869, "encoder_q-layer.9": 740.7478, "epoch": 0.15, "inbatch_neg_score": 0.4861, "inbatch_pos_score": 1.1348, "learning_rate": 4.677777777777778e-05, "loss": 3.1391, "norm_diff": 0.1578, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 18.0782, "num_token_query": 52.3101, "num_token_union": 73.47, "num_word_context": 202.7516, "num_word_doc": 49.8418, "num_word_query": 39.8669, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4083.9686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4822, "query_norm": 1.6492, "queue_k_norm": 1.4911, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3101, "sent_len_1": 66.7788, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0563, "stdk": 0.0474, "stdq": 0.0453, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1761, "doc_norm": 1.4938, "encoder_q-embeddings": 1131.3438, "encoder_q-layer.0": 797.8721, "encoder_q-layer.1": 922.3792, "encoder_q-layer.10": 392.0608, "encoder_q-layer.11": 782.7526, "encoder_q-layer.2": 1033.4561, "encoder_q-layer.3": 1095.4242, "encoder_q-layer.4": 1213.5085, "encoder_q-layer.5": 1087.7996, "encoder_q-layer.6": 1003.4979, "encoder_q-layer.7": 888.018, "encoder_q-layer.8": 636.8473, "encoder_q-layer.9": 388.757, "epoch": 0.16, "inbatch_neg_score": 0.4853, "inbatch_pos_score": 1.1455, "learning_rate": 4.672222222222222e-05, "loss": 3.1761, "norm_diff": 0.1132, "norm_loss": 0.0, "num_token_doc": 66.799, "num_token_overlap": 18.0123, "num_token_query": 52.4163, "num_token_union": 73.5752, "num_word_context": 202.6325, "num_word_doc": 49.8003, "num_word_query": 39.9702, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1342.3574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4824, "query_norm": 1.6071, "queue_k_norm": 1.4917, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4163, "sent_len_1": 66.799, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.2825, "stdk": 0.0474, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1637, "doc_norm": 1.4888, "encoder_q-embeddings": 468.9782, "encoder_q-layer.0": 307.2374, "encoder_q-layer.1": 374.3964, "encoder_q-layer.10": 179.4799, "encoder_q-layer.11": 371.5578, "encoder_q-layer.2": 397.9854, "encoder_q-layer.3": 382.4482, "encoder_q-layer.4": 364.2446, "encoder_q-layer.5": 339.6457, "encoder_q-layer.6": 324.7877, "encoder_q-layer.7": 371.8984, "encoder_q-layer.8": 268.0266, "encoder_q-layer.9": 175.3398, "epoch": 0.16, "inbatch_neg_score": 0.4915, "inbatch_pos_score": 1.1748, "learning_rate": 4.666666666666667e-05, "loss": 3.1637, "norm_diff": 0.1311, "norm_loss": 0.0, "num_token_doc": 66.6929, "num_token_overlap": 18.0302, "num_token_query": 52.2968, "num_token_union": 73.4155, "num_word_context": 202.2916, "num_word_doc": 49.7327, "num_word_query": 39.8561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 511.6533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4902, "query_norm": 1.6199, "queue_k_norm": 1.4913, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2968, "sent_len_1": 66.6929, "sent_len_max_0": 127.9887, "sent_len_max_1": 192.3038, "stdk": 0.0473, "stdq": 0.0459, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1833, "doc_norm": 1.4874, "encoder_q-embeddings": 604.0562, "encoder_q-layer.0": 401.2681, "encoder_q-layer.1": 440.9644, "encoder_q-layer.10": 184.258, "encoder_q-layer.11": 375.1837, "encoder_q-layer.2": 506.1552, "encoder_q-layer.3": 569.6108, "encoder_q-layer.4": 615.3343, "encoder_q-layer.5": 565.0876, "encoder_q-layer.6": 641.6114, "encoder_q-layer.7": 721.4499, "encoder_q-layer.8": 708.3146, "encoder_q-layer.9": 346.4051, "epoch": 0.16, "inbatch_neg_score": 0.4512, "inbatch_pos_score": 1.1045, "learning_rate": 4.6611111111111114e-05, "loss": 3.1833, "norm_diff": 0.162, "norm_loss": 0.0, "num_token_doc": 66.8033, "num_token_overlap": 18.0419, "num_token_query": 52.2766, "num_token_union": 73.4799, "num_word_context": 202.3484, "num_word_doc": 49.8097, "num_word_query": 39.8524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 798.5072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4478, "query_norm": 1.6494, "queue_k_norm": 1.4895, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2766, "sent_len_1": 66.8033, "sent_len_max_0": 127.9862, "sent_len_max_1": 191.2512, "stdk": 0.0472, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1639, "doc_norm": 1.4837, "encoder_q-embeddings": 940.4116, "encoder_q-layer.0": 664.4426, "encoder_q-layer.1": 768.0139, "encoder_q-layer.10": 188.2399, "encoder_q-layer.11": 393.0168, "encoder_q-layer.2": 892.1436, "encoder_q-layer.3": 893.0804, "encoder_q-layer.4": 927.0851, "encoder_q-layer.5": 716.4246, "encoder_q-layer.6": 770.4171, "encoder_q-layer.7": 697.9905, "encoder_q-layer.8": 586.1249, "encoder_q-layer.9": 241.3259, "epoch": 0.16, "inbatch_neg_score": 0.4587, "inbatch_pos_score": 1.1113, "learning_rate": 4.6555555555555556e-05, "loss": 3.1639, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.4908, "num_token_overlap": 18.0121, "num_token_query": 52.4267, "num_token_union": 73.3813, "num_word_context": 201.9432, "num_word_doc": 49.6233, "num_word_query": 39.9713, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4551, "query_norm": 1.6058, "queue_k_norm": 1.4872, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4267, "sent_len_1": 66.4908, "sent_len_max_0": 128.0, "sent_len_max_1": 188.105, "stdk": 0.0471, "stdq": 0.0444, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1806, "doc_norm": 1.4861, "encoder_q-embeddings": 578.9531, "encoder_q-layer.0": 439.2297, "encoder_q-layer.1": 461.4962, "encoder_q-layer.10": 103.0707, "encoder_q-layer.11": 199.8445, "encoder_q-layer.2": 507.3398, "encoder_q-layer.3": 516.9869, "encoder_q-layer.4": 491.565, "encoder_q-layer.5": 459.8874, "encoder_q-layer.6": 518.5364, "encoder_q-layer.7": 456.3253, "encoder_q-layer.8": 346.1373, "encoder_q-layer.9": 176.5217, "epoch": 0.16, "inbatch_neg_score": 0.4656, "inbatch_pos_score": 1.1221, "learning_rate": 4.6500000000000005e-05, "loss": 3.1806, "norm_diff": 0.1161, "norm_loss": 0.0, "num_token_doc": 66.948, "num_token_overlap": 18.0365, "num_token_query": 52.207, "num_token_union": 73.5095, "num_word_context": 202.4995, "num_word_doc": 49.9434, "num_word_query": 39.8005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 645.1898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4651, "query_norm": 1.6021, "queue_k_norm": 1.4835, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.207, "sent_len_1": 66.948, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3225, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.1851, "doc_norm": 1.4789, "encoder_q-embeddings": 1110.6367, "encoder_q-layer.0": 860.0463, "encoder_q-layer.1": 1049.885, "encoder_q-layer.10": 93.2804, "encoder_q-layer.11": 173.2036, "encoder_q-layer.2": 1210.6168, "encoder_q-layer.3": 1199.5323, "encoder_q-layer.4": 1071.2134, "encoder_q-layer.5": 1160.953, "encoder_q-layer.6": 1145.0277, "encoder_q-layer.7": 1065.0481, "encoder_q-layer.8": 869.6957, "encoder_q-layer.9": 428.7406, "epoch": 0.16, "inbatch_neg_score": 0.4743, "inbatch_pos_score": 1.1211, "learning_rate": 4.644444444444445e-05, "loss": 3.1851, "norm_diff": 0.1655, "norm_loss": 0.0, "num_token_doc": 66.8092, "num_token_overlap": 17.972, "num_token_query": 52.2118, "num_token_union": 73.5027, "num_word_context": 202.3909, "num_word_doc": 49.8504, "num_word_query": 39.7707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1414.9342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.6444, "queue_k_norm": 1.482, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2118, "sent_len_1": 66.8092, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.055, "stdk": 0.0469, "stdq": 0.0451, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.1632, "doc_norm": 1.4846, "encoder_q-embeddings": 346.3167, "encoder_q-layer.0": 255.1874, "encoder_q-layer.1": 294.6768, "encoder_q-layer.10": 92.9577, "encoder_q-layer.11": 183.8405, "encoder_q-layer.2": 329.0183, "encoder_q-layer.3": 353.3687, "encoder_q-layer.4": 384.8976, "encoder_q-layer.5": 372.151, "encoder_q-layer.6": 383.6288, "encoder_q-layer.7": 409.6462, "encoder_q-layer.8": 437.1949, "encoder_q-layer.9": 208.6648, "epoch": 0.16, "inbatch_neg_score": 0.4892, "inbatch_pos_score": 1.1104, "learning_rate": 4.638888888888889e-05, "loss": 3.1632, "norm_diff": 0.1517, "norm_loss": 0.0, "num_token_doc": 66.4861, "num_token_overlap": 17.9292, "num_token_query": 52.0578, "num_token_union": 73.3041, "num_word_context": 201.8447, "num_word_doc": 49.5662, "num_word_query": 39.6767, "postclip_grad_norm": 1.0, "preclip_grad_norm": 483.9476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4849, "query_norm": 1.6364, "queue_k_norm": 1.4808, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0578, "sent_len_1": 66.4861, "sent_len_max_0": 127.99, "sent_len_max_1": 188.53, "stdk": 0.0471, "stdq": 0.0444, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.145, "doc_norm": 1.4792, "encoder_q-embeddings": 602.1476, "encoder_q-layer.0": 407.5267, "encoder_q-layer.1": 426.5584, "encoder_q-layer.10": 99.5875, "encoder_q-layer.11": 205.1965, "encoder_q-layer.2": 456.4955, "encoder_q-layer.3": 478.887, "encoder_q-layer.4": 487.4972, "encoder_q-layer.5": 620.8314, "encoder_q-layer.6": 655.9911, "encoder_q-layer.7": 540.16, "encoder_q-layer.8": 229.3437, "encoder_q-layer.9": 102.606, "epoch": 0.16, "inbatch_neg_score": 0.4927, "inbatch_pos_score": 1.1045, "learning_rate": 4.633333333333333e-05, "loss": 3.145, "norm_diff": 0.1246, "norm_loss": 0.0, "num_token_doc": 66.8233, "num_token_overlap": 18.0146, "num_token_query": 52.2974, "num_token_union": 73.523, "num_word_context": 202.631, "num_word_doc": 49.9095, "num_word_query": 39.8543, "postclip_grad_norm": 1.0, "preclip_grad_norm": 673.5862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4878, "query_norm": 1.6038, "queue_k_norm": 1.4822, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2974, "sent_len_1": 66.8233, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.9475, "stdk": 0.0469, "stdq": 0.0434, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1732, "doc_norm": 1.4801, "encoder_q-embeddings": 2578.0586, "encoder_q-layer.0": 1852.6855, "encoder_q-layer.1": 2002.1561, "encoder_q-layer.10": 89.4938, "encoder_q-layer.11": 181.8059, "encoder_q-layer.2": 1730.1946, "encoder_q-layer.3": 1690.2386, "encoder_q-layer.4": 1825.0613, "encoder_q-layer.5": 1620.0083, "encoder_q-layer.6": 1364.2643, "encoder_q-layer.7": 942.2173, "encoder_q-layer.8": 416.6052, "encoder_q-layer.9": 163.3623, "epoch": 0.16, "inbatch_neg_score": 0.499, "inbatch_pos_score": 1.1553, "learning_rate": 4.627777777777778e-05, "loss": 3.1732, "norm_diff": 0.2019, "norm_loss": 0.0, "num_token_doc": 66.568, "num_token_overlap": 17.9842, "num_token_query": 52.2369, "num_token_union": 73.3787, "num_word_context": 201.9926, "num_word_doc": 49.7164, "num_word_query": 39.8273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2311.6918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4958, "query_norm": 1.682, "queue_k_norm": 1.4825, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2369, "sent_len_1": 66.568, "sent_len_max_0": 128.0, "sent_len_max_1": 186.8275, "stdk": 0.0469, "stdq": 0.0463, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1511, "doc_norm": 1.4844, "encoder_q-embeddings": 412.2953, "encoder_q-layer.0": 281.7355, "encoder_q-layer.1": 368.4987, "encoder_q-layer.10": 89.6621, "encoder_q-layer.11": 184.6748, "encoder_q-layer.2": 470.2164, "encoder_q-layer.3": 496.3122, "encoder_q-layer.4": 414.3399, "encoder_q-layer.5": 343.7527, "encoder_q-layer.6": 345.9865, "encoder_q-layer.7": 308.5457, "encoder_q-layer.8": 282.506, "encoder_q-layer.9": 167.4702, "epoch": 0.16, "inbatch_neg_score": 0.5233, "inbatch_pos_score": 1.1738, "learning_rate": 4.6222222222222224e-05, "loss": 3.1511, "norm_diff": 0.1959, "norm_loss": 0.0, "num_token_doc": 66.8192, "num_token_overlap": 18.0154, "num_token_query": 52.2867, "num_token_union": 73.4773, "num_word_context": 202.4291, "num_word_doc": 49.8652, "num_word_query": 39.8823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 512.524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5215, "query_norm": 1.6802, "queue_k_norm": 1.4811, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2867, "sent_len_1": 66.8192, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1062, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1466, "doc_norm": 1.4789, "encoder_q-embeddings": 244.5276, "encoder_q-layer.0": 192.8132, "encoder_q-layer.1": 217.4159, "encoder_q-layer.10": 93.0764, "encoder_q-layer.11": 170.1851, "encoder_q-layer.2": 248.2843, "encoder_q-layer.3": 259.4829, "encoder_q-layer.4": 247.9425, "encoder_q-layer.5": 234.7379, "encoder_q-layer.6": 280.117, "encoder_q-layer.7": 274.3586, "encoder_q-layer.8": 320.1861, "encoder_q-layer.9": 157.8869, "epoch": 0.16, "inbatch_neg_score": 0.5438, "inbatch_pos_score": 1.2207, "learning_rate": 4.6166666666666666e-05, "loss": 3.1466, "norm_diff": 0.2453, "norm_loss": 0.0, "num_token_doc": 66.7981, "num_token_overlap": 17.9941, "num_token_query": 52.1564, "num_token_union": 73.4417, "num_word_context": 202.6184, "num_word_doc": 49.8574, "num_word_query": 39.7629, "postclip_grad_norm": 1.0, "preclip_grad_norm": 348.7913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5396, "query_norm": 1.7242, "queue_k_norm": 1.4823, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1564, "sent_len_1": 66.7981, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4638, "stdk": 0.0468, "stdq": 0.0457, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1435, "doc_norm": 1.4806, "encoder_q-embeddings": 161.5365, "encoder_q-layer.0": 110.7047, "encoder_q-layer.1": 127.1957, "encoder_q-layer.10": 119.2463, "encoder_q-layer.11": 228.2936, "encoder_q-layer.2": 151.4102, "encoder_q-layer.3": 150.1316, "encoder_q-layer.4": 153.7353, "encoder_q-layer.5": 155.0446, "encoder_q-layer.6": 150.1969, "encoder_q-layer.7": 141.2188, "encoder_q-layer.8": 150.3065, "encoder_q-layer.9": 126.5709, "epoch": 0.17, "inbatch_neg_score": 0.5411, "inbatch_pos_score": 1.1777, "learning_rate": 4.6111111111111115e-05, "loss": 3.1435, "norm_diff": 0.1811, "norm_loss": 0.0, "num_token_doc": 66.5858, "num_token_overlap": 17.9943, "num_token_query": 52.296, "num_token_union": 73.3961, "num_word_context": 202.0202, "num_word_doc": 49.7061, "num_word_query": 39.8742, "postclip_grad_norm": 1.0, "preclip_grad_norm": 217.0432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5386, "query_norm": 1.6617, "queue_k_norm": 1.4839, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.296, "sent_len_1": 66.5858, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.0075, "stdk": 0.0468, "stdq": 0.0435, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1589, "doc_norm": 1.482, "encoder_q-embeddings": 293.1833, "encoder_q-layer.0": 204.7639, "encoder_q-layer.1": 237.5372, "encoder_q-layer.10": 81.0282, "encoder_q-layer.11": 172.2901, "encoder_q-layer.2": 272.9219, "encoder_q-layer.3": 267.9765, "encoder_q-layer.4": 285.3362, "encoder_q-layer.5": 260.5939, "encoder_q-layer.6": 302.757, "encoder_q-layer.7": 263.2333, "encoder_q-layer.8": 203.8768, "encoder_q-layer.9": 98.8289, "epoch": 0.17, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.1777, "learning_rate": 4.605555555555556e-05, "loss": 3.1589, "norm_diff": 0.2056, "norm_loss": 0.0, "num_token_doc": 66.6097, "num_token_overlap": 17.9791, "num_token_query": 52.259, "num_token_union": 73.3913, "num_word_context": 202.1024, "num_word_doc": 49.6778, "num_word_query": 39.8275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 355.0175, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5176, "query_norm": 1.6876, "queue_k_norm": 1.4812, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.259, "sent_len_1": 66.6097, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.2488, "stdk": 0.0468, "stdq": 0.0456, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1495, "doc_norm": 1.4929, "encoder_q-embeddings": 214.9044, "encoder_q-layer.0": 162.4449, "encoder_q-layer.1": 206.3704, "encoder_q-layer.10": 91.8881, "encoder_q-layer.11": 180.8506, "encoder_q-layer.2": 238.6958, "encoder_q-layer.3": 233.427, "encoder_q-layer.4": 212.4113, "encoder_q-layer.5": 181.6074, "encoder_q-layer.6": 162.5259, "encoder_q-layer.7": 121.7967, "encoder_q-layer.8": 103.4771, "encoder_q-layer.9": 84.9909, "epoch": 0.17, "inbatch_neg_score": 0.5381, "inbatch_pos_score": 1.2324, "learning_rate": 4.600000000000001e-05, "loss": 3.1495, "norm_diff": 0.2376, "norm_loss": 0.0, "num_token_doc": 66.8063, "num_token_overlap": 17.9832, "num_token_query": 52.1724, "num_token_union": 73.4363, "num_word_context": 202.3125, "num_word_doc": 49.826, "num_word_query": 39.7804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 265.4344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5361, "query_norm": 1.7305, "queue_k_norm": 1.4852, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1724, "sent_len_1": 66.8063, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5575, "stdk": 0.0472, "stdq": 0.0466, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1598, "doc_norm": 1.4889, "encoder_q-embeddings": 555.1445, "encoder_q-layer.0": 400.5802, "encoder_q-layer.1": 419.359, "encoder_q-layer.10": 101.7208, "encoder_q-layer.11": 195.728, "encoder_q-layer.2": 513.9998, "encoder_q-layer.3": 545.887, "encoder_q-layer.4": 519.9146, "encoder_q-layer.5": 479.5741, "encoder_q-layer.6": 545.1704, "encoder_q-layer.7": 423.4618, "encoder_q-layer.8": 169.0914, "encoder_q-layer.9": 102.2103, "epoch": 0.17, "inbatch_neg_score": 0.5476, "inbatch_pos_score": 1.1914, "learning_rate": 4.594444444444444e-05, "loss": 3.1598, "norm_diff": 0.216, "norm_loss": 0.0, "num_token_doc": 66.7312, "num_token_overlap": 17.9554, "num_token_query": 52.1733, "num_token_union": 73.3855, "num_word_context": 202.3592, "num_word_doc": 49.8038, "num_word_query": 39.7566, "postclip_grad_norm": 1.0, "preclip_grad_norm": 629.842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5449, "query_norm": 1.7048, "queue_k_norm": 1.4876, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1733, "sent_len_1": 66.7312, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3063, "stdk": 0.047, "stdq": 0.0449, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.175, "doc_norm": 1.4836, "encoder_q-embeddings": 172.033, "encoder_q-layer.0": 129.3622, "encoder_q-layer.1": 138.0825, "encoder_q-layer.10": 92.9311, "encoder_q-layer.11": 209.4973, "encoder_q-layer.2": 133.259, "encoder_q-layer.3": 126.1843, "encoder_q-layer.4": 125.9786, "encoder_q-layer.5": 117.7931, "encoder_q-layer.6": 128.7937, "encoder_q-layer.7": 139.9057, "encoder_q-layer.8": 129.2315, "encoder_q-layer.9": 96.9137, "epoch": 0.17, "inbatch_neg_score": 0.5192, "inbatch_pos_score": 1.1602, "learning_rate": 4.588888888888889e-05, "loss": 3.175, "norm_diff": 0.1633, "norm_loss": 0.0, "num_token_doc": 66.744, "num_token_overlap": 18.0014, "num_token_query": 52.0509, "num_token_union": 73.3319, "num_word_context": 202.3774, "num_word_doc": 49.7847, "num_word_query": 39.7213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 202.6665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5176, "query_norm": 1.6469, "queue_k_norm": 1.486, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0509, "sent_len_1": 66.744, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.3975, "stdk": 0.0468, "stdq": 0.045, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1751, "doc_norm": 1.4866, "encoder_q-embeddings": 314.3243, "encoder_q-layer.0": 232.0456, "encoder_q-layer.1": 250.6521, "encoder_q-layer.10": 102.6198, "encoder_q-layer.11": 212.7031, "encoder_q-layer.2": 274.2993, "encoder_q-layer.3": 293.8828, "encoder_q-layer.4": 229.2149, "encoder_q-layer.5": 201.2342, "encoder_q-layer.6": 211.1819, "encoder_q-layer.7": 193.3328, "encoder_q-layer.8": 157.9061, "encoder_q-layer.9": 102.7345, "epoch": 0.17, "inbatch_neg_score": 0.537, "inbatch_pos_score": 1.1934, "learning_rate": 4.5833333333333334e-05, "loss": 3.1751, "norm_diff": 0.1922, "norm_loss": 0.0, "num_token_doc": 66.9461, "num_token_overlap": 17.9559, "num_token_query": 52.283, "num_token_union": 73.658, "num_word_context": 202.4301, "num_word_doc": 49.9496, "num_word_query": 39.8655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 333.8369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5371, "query_norm": 1.6788, "queue_k_norm": 1.4887, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.283, "sent_len_1": 66.9461, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.7175, "stdk": 0.0469, "stdq": 0.0465, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1703, "doc_norm": 1.4864, "encoder_q-embeddings": 606.2583, "encoder_q-layer.0": 439.8822, "encoder_q-layer.1": 481.5235, "encoder_q-layer.10": 92.209, "encoder_q-layer.11": 182.5845, "encoder_q-layer.2": 548.198, "encoder_q-layer.3": 631.2048, "encoder_q-layer.4": 651.6368, "encoder_q-layer.5": 777.4136, "encoder_q-layer.6": 839.7968, "encoder_q-layer.7": 980.8576, "encoder_q-layer.8": 967.3708, "encoder_q-layer.9": 586.8347, "epoch": 0.17, "inbatch_neg_score": 0.499, "inbatch_pos_score": 1.168, "learning_rate": 4.577777777777778e-05, "loss": 3.1703, "norm_diff": 0.1478, "norm_loss": 0.0, "num_token_doc": 66.7289, "num_token_overlap": 17.9972, "num_token_query": 52.1207, "num_token_union": 73.3759, "num_word_context": 202.265, "num_word_doc": 49.8127, "num_word_query": 39.7322, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.2892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4958, "query_norm": 1.6342, "queue_k_norm": 1.4864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1207, "sent_len_1": 66.7289, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.6075, "stdk": 0.0469, "stdq": 0.0463, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1528, "doc_norm": 1.4886, "encoder_q-embeddings": 359.265, "encoder_q-layer.0": 283.2655, "encoder_q-layer.1": 323.9582, "encoder_q-layer.10": 91.3151, "encoder_q-layer.11": 213.4307, "encoder_q-layer.2": 268.6117, "encoder_q-layer.3": 227.9503, "encoder_q-layer.4": 222.6159, "encoder_q-layer.5": 232.8995, "encoder_q-layer.6": 258.635, "encoder_q-layer.7": 203.578, "encoder_q-layer.8": 172.6043, "encoder_q-layer.9": 105.6925, "epoch": 0.17, "inbatch_neg_score": 0.4821, "inbatch_pos_score": 1.1514, "learning_rate": 4.572222222222222e-05, "loss": 3.1528, "norm_diff": 0.1428, "norm_loss": 0.0, "num_token_doc": 67.0978, "num_token_overlap": 18.0948, "num_token_query": 52.2286, "num_token_union": 73.5879, "num_word_context": 202.573, "num_word_doc": 50.1054, "num_word_query": 39.8354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 368.3193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4795, "query_norm": 1.6314, "queue_k_norm": 1.4867, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2286, "sent_len_1": 67.0978, "sent_len_max_0": 127.9838, "sent_len_max_1": 189.7312, "stdk": 0.0469, "stdq": 0.0469, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1445, "doc_norm": 1.4917, "encoder_q-embeddings": 724.6269, "encoder_q-layer.0": 546.9397, "encoder_q-layer.1": 616.1453, "encoder_q-layer.10": 102.3729, "encoder_q-layer.11": 221.8597, "encoder_q-layer.2": 683.3764, "encoder_q-layer.3": 690.1502, "encoder_q-layer.4": 603.9371, "encoder_q-layer.5": 611.1408, "encoder_q-layer.6": 734.5422, "encoder_q-layer.7": 580.6418, "encoder_q-layer.8": 337.5548, "encoder_q-layer.9": 136.2388, "epoch": 0.17, "inbatch_neg_score": 0.4653, "inbatch_pos_score": 1.1045, "learning_rate": 4.566666666666667e-05, "loss": 3.1445, "norm_diff": 0.0799, "norm_loss": 0.0, "num_token_doc": 67.0545, "num_token_overlap": 18.0946, "num_token_query": 52.5597, "num_token_union": 73.7469, "num_word_context": 203.0972, "num_word_doc": 50.0333, "num_word_query": 40.069, "postclip_grad_norm": 1.0, "preclip_grad_norm": 831.7641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4648, "query_norm": 1.5715, "queue_k_norm": 1.4849, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.5597, "sent_len_1": 67.0545, "sent_len_max_0": 128.0, "sent_len_max_1": 187.31, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1723, "doc_norm": 1.4874, "encoder_q-embeddings": 573.3056, "encoder_q-layer.0": 394.1472, "encoder_q-layer.1": 438.2108, "encoder_q-layer.10": 85.0822, "encoder_q-layer.11": 179.7796, "encoder_q-layer.2": 487.9601, "encoder_q-layer.3": 474.6965, "encoder_q-layer.4": 540.8192, "encoder_q-layer.5": 500.1784, "encoder_q-layer.6": 423.8402, "encoder_q-layer.7": 304.6465, "encoder_q-layer.8": 137.2385, "encoder_q-layer.9": 88.9913, "epoch": 0.17, "inbatch_neg_score": 0.4742, "inbatch_pos_score": 1.165, "learning_rate": 4.561111111111112e-05, "loss": 3.1723, "norm_diff": 0.095, "norm_loss": 0.0, "num_token_doc": 66.6402, "num_token_overlap": 17.9599, "num_token_query": 52.0153, "num_token_union": 73.2765, "num_word_context": 202.0657, "num_word_doc": 49.67, "num_word_query": 39.634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 602.3489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4736, "query_norm": 1.5824, "queue_k_norm": 1.484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0153, "sent_len_1": 66.6402, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9075, "stdk": 0.047, "stdq": 0.0461, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1262, "doc_norm": 1.4739, "encoder_q-embeddings": 664.7184, "encoder_q-layer.0": 483.3258, "encoder_q-layer.1": 554.9905, "encoder_q-layer.10": 89.2633, "encoder_q-layer.11": 199.0987, "encoder_q-layer.2": 615.7606, "encoder_q-layer.3": 606.9073, "encoder_q-layer.4": 697.3196, "encoder_q-layer.5": 581.475, "encoder_q-layer.6": 626.7043, "encoder_q-layer.7": 524.445, "encoder_q-layer.8": 279.3582, "encoder_q-layer.9": 159.6204, "epoch": 0.18, "inbatch_neg_score": 0.4805, "inbatch_pos_score": 1.1416, "learning_rate": 4.555555555555556e-05, "loss": 3.1262, "norm_diff": 0.1105, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 17.9964, "num_token_query": 52.1494, "num_token_union": 73.4043, "num_word_context": 202.0683, "num_word_doc": 49.8434, "num_word_query": 39.7618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 762.6426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.479, "query_norm": 1.5844, "queue_k_norm": 1.4829, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1494, "sent_len_1": 66.7773, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1, "stdk": 0.0465, "stdq": 0.0462, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.172, "doc_norm": 1.4803, "encoder_q-embeddings": 1066.0471, "encoder_q-layer.0": 798.3528, "encoder_q-layer.1": 945.9674, "encoder_q-layer.10": 87.2405, "encoder_q-layer.11": 170.0141, "encoder_q-layer.2": 600.8622, "encoder_q-layer.3": 531.0761, "encoder_q-layer.4": 524.1921, "encoder_q-layer.5": 482.2806, "encoder_q-layer.6": 501.7688, "encoder_q-layer.7": 385.6395, "encoder_q-layer.8": 194.7426, "encoder_q-layer.9": 104.1446, "epoch": 0.18, "inbatch_neg_score": 0.4359, "inbatch_pos_score": 1.1084, "learning_rate": 4.55e-05, "loss": 3.172, "norm_diff": 0.0629, "norm_loss": 0.0, "num_token_doc": 66.6727, "num_token_overlap": 17.9137, "num_token_query": 52.1019, "num_token_union": 73.4441, "num_word_context": 202.3063, "num_word_doc": 49.7977, "num_word_query": 39.7342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 926.9586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4353, "query_norm": 1.5432, "queue_k_norm": 1.4784, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1019, "sent_len_1": 66.6727, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8438, "stdk": 0.0468, "stdq": 0.0455, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.1547, "doc_norm": 1.477, "encoder_q-embeddings": 222.2629, "encoder_q-layer.0": 145.7611, "encoder_q-layer.1": 168.2578, "encoder_q-layer.10": 85.4375, "encoder_q-layer.11": 181.9464, "encoder_q-layer.2": 178.6399, "encoder_q-layer.3": 195.6643, "encoder_q-layer.4": 197.8177, "encoder_q-layer.5": 197.2271, "encoder_q-layer.6": 199.3932, "encoder_q-layer.7": 161.282, "encoder_q-layer.8": 99.3973, "encoder_q-layer.9": 82.3048, "epoch": 0.18, "inbatch_neg_score": 0.4301, "inbatch_pos_score": 1.1113, "learning_rate": 4.5444444444444444e-05, "loss": 3.1547, "norm_diff": 0.0939, "norm_loss": 0.0, "num_token_doc": 66.8039, "num_token_overlap": 17.9944, "num_token_query": 52.1723, "num_token_union": 73.4679, "num_word_context": 202.2452, "num_word_doc": 49.8109, "num_word_query": 39.7519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 253.2893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4272, "query_norm": 1.5709, "queue_k_norm": 1.4784, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1723, "sent_len_1": 66.8039, "sent_len_max_0": 127.99, "sent_len_max_1": 192.05, "stdk": 0.0468, "stdq": 0.0463, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1357, "doc_norm": 1.474, "encoder_q-embeddings": 697.0399, "encoder_q-layer.0": 518.0829, "encoder_q-layer.1": 594.3557, "encoder_q-layer.10": 179.0064, "encoder_q-layer.11": 390.8766, "encoder_q-layer.2": 614.3979, "encoder_q-layer.3": 679.8057, "encoder_q-layer.4": 776.5385, "encoder_q-layer.5": 633.9219, "encoder_q-layer.6": 796.1393, "encoder_q-layer.7": 563.9373, "encoder_q-layer.8": 295.5008, "encoder_q-layer.9": 208.0293, "epoch": 0.18, "inbatch_neg_score": 0.3937, "inbatch_pos_score": 1.0537, "learning_rate": 4.538888888888889e-05, "loss": 3.1357, "norm_diff": 0.1025, "norm_loss": 0.0, "num_token_doc": 66.8041, "num_token_overlap": 18.007, "num_token_query": 52.2019, "num_token_union": 73.4323, "num_word_context": 202.4185, "num_word_doc": 49.8333, "num_word_query": 39.7718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 877.3442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3926, "query_norm": 1.5765, "queue_k_norm": 1.4764, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2019, "sent_len_1": 66.8041, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.0425, "stdk": 0.0467, "stdq": 0.0466, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.168, "doc_norm": 1.4711, "encoder_q-embeddings": 634.6042, "encoder_q-layer.0": 443.7311, "encoder_q-layer.1": 478.8965, "encoder_q-layer.10": 189.0194, "encoder_q-layer.11": 446.3759, "encoder_q-layer.2": 301.2212, "encoder_q-layer.3": 271.9326, "encoder_q-layer.4": 252.592, "encoder_q-layer.5": 255.0263, "encoder_q-layer.6": 281.1376, "encoder_q-layer.7": 281.1297, "encoder_q-layer.8": 275.8557, "encoder_q-layer.9": 203.5233, "epoch": 0.18, "inbatch_neg_score": 0.354, "inbatch_pos_score": 0.9834, "learning_rate": 4.5333333333333335e-05, "loss": 3.168, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.7185, "num_token_overlap": 17.9786, "num_token_query": 52.1588, "num_token_union": 73.4575, "num_word_context": 202.5942, "num_word_doc": 49.8273, "num_word_query": 39.7751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 556.8523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.5256, "queue_k_norm": 1.4733, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1588, "sent_len_1": 66.7185, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5188, "stdk": 0.0468, "stdq": 0.0451, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1179, "doc_norm": 1.4727, "encoder_q-embeddings": 1604.1278, "encoder_q-layer.0": 1137.2457, "encoder_q-layer.1": 1297.1797, "encoder_q-layer.10": 205.5375, "encoder_q-layer.11": 369.399, "encoder_q-layer.2": 1506.5852, "encoder_q-layer.3": 1595.5309, "encoder_q-layer.4": 1714.329, "encoder_q-layer.5": 1658.783, "encoder_q-layer.6": 1929.6398, "encoder_q-layer.7": 1583.9187, "encoder_q-layer.8": 672.5632, "encoder_q-layer.9": 216.4369, "epoch": 0.18, "inbatch_neg_score": 0.3529, "inbatch_pos_score": 1.043, "learning_rate": 4.527777777777778e-05, "loss": 3.1179, "norm_diff": 0.0993, "norm_loss": 0.0, "num_token_doc": 66.7925, "num_token_overlap": 18.0937, "num_token_query": 52.2975, "num_token_union": 73.4364, "num_word_context": 202.4089, "num_word_doc": 49.8426, "num_word_query": 39.878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.9182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3511, "query_norm": 1.572, "queue_k_norm": 1.4698, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2975, "sent_len_1": 66.7925, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8938, "stdk": 0.047, "stdq": 0.0467, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1191, "doc_norm": 1.4664, "encoder_q-embeddings": 1264.4506, "encoder_q-layer.0": 1080.8431, "encoder_q-layer.1": 1111.6531, "encoder_q-layer.10": 179.8081, "encoder_q-layer.11": 377.7275, "encoder_q-layer.2": 1198.7426, "encoder_q-layer.3": 1124.8492, "encoder_q-layer.4": 1035.2855, "encoder_q-layer.5": 888.0519, "encoder_q-layer.6": 1018.7893, "encoder_q-layer.7": 695.3057, "encoder_q-layer.8": 305.7231, "encoder_q-layer.9": 170.7475, "epoch": 0.18, "inbatch_neg_score": 0.3659, "inbatch_pos_score": 1.0029, "learning_rate": 4.522222222222223e-05, "loss": 3.1191, "norm_diff": 0.0843, "norm_loss": 0.0, "num_token_doc": 66.7046, "num_token_overlap": 18.0423, "num_token_query": 52.2649, "num_token_union": 73.458, "num_word_context": 202.3138, "num_word_doc": 49.7727, "num_word_query": 39.8357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1367.8198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.364, "query_norm": 1.5507, "queue_k_norm": 1.4668, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2649, "sent_len_1": 66.7046, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9013, "stdk": 0.0468, "stdq": 0.0454, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1399, "doc_norm": 1.4579, "encoder_q-embeddings": 372.3221, "encoder_q-layer.0": 299.6274, "encoder_q-layer.1": 305.8694, "encoder_q-layer.10": 100.9401, "encoder_q-layer.11": 195.4015, "encoder_q-layer.2": 335.5874, "encoder_q-layer.3": 305.7729, "encoder_q-layer.4": 213.7329, "encoder_q-layer.5": 184.1693, "encoder_q-layer.6": 195.5016, "encoder_q-layer.7": 158.9711, "encoder_q-layer.8": 129.0784, "encoder_q-layer.9": 91.8367, "epoch": 0.18, "inbatch_neg_score": 0.3891, "inbatch_pos_score": 1.0264, "learning_rate": 4.516666666666667e-05, "loss": 3.1399, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.8361, "num_token_overlap": 18.0612, "num_token_query": 52.3008, "num_token_union": 73.4883, "num_word_context": 202.4404, "num_word_doc": 49.8973, "num_word_query": 39.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 368.4452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3865, "query_norm": 1.5566, "queue_k_norm": 1.4622, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3008, "sent_len_1": 66.8361, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.3275, "stdk": 0.0466, "stdq": 0.0452, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1521, "doc_norm": 1.4605, "encoder_q-embeddings": 165.8001, "encoder_q-layer.0": 114.7254, "encoder_q-layer.1": 136.5636, "encoder_q-layer.10": 100.268, "encoder_q-layer.11": 204.1388, "encoder_q-layer.2": 152.9724, "encoder_q-layer.3": 149.9632, "encoder_q-layer.4": 154.5854, "encoder_q-layer.5": 169.1615, "encoder_q-layer.6": 173.5003, "encoder_q-layer.7": 137.7637, "encoder_q-layer.8": 117.358, "encoder_q-layer.9": 88.5474, "epoch": 0.18, "inbatch_neg_score": 0.3624, "inbatch_pos_score": 1.0117, "learning_rate": 4.511111111111112e-05, "loss": 3.1521, "norm_diff": 0.0945, "norm_loss": 0.0, "num_token_doc": 66.6228, "num_token_overlap": 17.9534, "num_token_query": 52.1588, "num_token_union": 73.4438, "num_word_context": 202.3453, "num_word_doc": 49.7532, "num_word_query": 39.7955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 212.1647, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3601, "query_norm": 1.555, "queue_k_norm": 1.4614, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1588, "sent_len_1": 66.6228, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0662, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1382, "doc_norm": 1.4577, "encoder_q-embeddings": 206.4135, "encoder_q-layer.0": 136.729, "encoder_q-layer.1": 155.0316, "encoder_q-layer.10": 93.0666, "encoder_q-layer.11": 184.1402, "encoder_q-layer.2": 172.6104, "encoder_q-layer.3": 169.9704, "encoder_q-layer.4": 162.8911, "encoder_q-layer.5": 159.4238, "encoder_q-layer.6": 169.4335, "encoder_q-layer.7": 162.1765, "encoder_q-layer.8": 136.4237, "encoder_q-layer.9": 94.5404, "epoch": 0.18, "inbatch_neg_score": 0.3254, "inbatch_pos_score": 0.9839, "learning_rate": 4.5055555555555554e-05, "loss": 3.1382, "norm_diff": 0.1664, "norm_loss": 0.0, "num_token_doc": 67.0066, "num_token_overlap": 18.0954, "num_token_query": 52.2251, "num_token_union": 73.4849, "num_word_context": 202.3655, "num_word_doc": 49.9555, "num_word_query": 39.8107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 234.7186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.323, "query_norm": 1.624, "queue_k_norm": 1.4564, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2251, "sent_len_1": 67.0066, "sent_len_max_0": 128.0, "sent_len_max_1": 191.375, "stdk": 0.0468, "stdq": 0.0469, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.1146, "doc_norm": 1.4536, "encoder_q-embeddings": 507.066, "encoder_q-layer.0": 388.1573, "encoder_q-layer.1": 492.2348, "encoder_q-layer.10": 86.7977, "encoder_q-layer.11": 192.9719, "encoder_q-layer.2": 568.4299, "encoder_q-layer.3": 691.3804, "encoder_q-layer.4": 537.1601, "encoder_q-layer.5": 287.9976, "encoder_q-layer.6": 235.3973, "encoder_q-layer.7": 216.0611, "encoder_q-layer.8": 160.1217, "encoder_q-layer.9": 84.6172, "epoch": 0.19, "inbatch_neg_score": 0.3531, "inbatch_pos_score": 1.0166, "learning_rate": 4.5e-05, "loss": 3.1146, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.9749, "num_token_overlap": 18.0077, "num_token_query": 52.1632, "num_token_union": 73.5412, "num_word_context": 202.3304, "num_word_doc": 49.9946, "num_word_query": 39.7811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 587.1988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3518, "query_norm": 1.5712, "queue_k_norm": 1.4566, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1632, "sent_len_1": 66.9749, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3038, "stdk": 0.0468, "stdq": 0.0451, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1245, "doc_norm": 1.4571, "encoder_q-embeddings": 205.8818, "encoder_q-layer.0": 165.4312, "encoder_q-layer.1": 161.5643, "encoder_q-layer.10": 90.1377, "encoder_q-layer.11": 175.6567, "encoder_q-layer.2": 144.007, "encoder_q-layer.3": 140.8218, "encoder_q-layer.4": 143.8594, "encoder_q-layer.5": 135.1435, "encoder_q-layer.6": 136.8601, "encoder_q-layer.7": 117.826, "encoder_q-layer.8": 102.7127, "encoder_q-layer.9": 84.3533, "epoch": 0.19, "inbatch_neg_score": 0.3313, "inbatch_pos_score": 0.9902, "learning_rate": 4.4944444444444445e-05, "loss": 3.1245, "norm_diff": 0.1617, "norm_loss": 0.0, "num_token_doc": 66.9481, "num_token_overlap": 18.0206, "num_token_query": 52.2559, "num_token_union": 73.5873, "num_word_context": 202.6361, "num_word_doc": 50.0064, "num_word_query": 39.8388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 213.9112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3291, "query_norm": 1.6188, "queue_k_norm": 1.4553, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2559, "sent_len_1": 66.9481, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5325, "stdk": 0.047, "stdq": 0.0461, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.125, "doc_norm": 1.4554, "encoder_q-embeddings": 167.9182, "encoder_q-layer.0": 122.4542, "encoder_q-layer.1": 132.2968, "encoder_q-layer.10": 97.7552, "encoder_q-layer.11": 185.8178, "encoder_q-layer.2": 137.6997, "encoder_q-layer.3": 146.9144, "encoder_q-layer.4": 144.0526, "encoder_q-layer.5": 157.5999, "encoder_q-layer.6": 143.6628, "encoder_q-layer.7": 126.021, "encoder_q-layer.8": 111.3496, "encoder_q-layer.9": 87.5425, "epoch": 0.19, "inbatch_neg_score": 0.3229, "inbatch_pos_score": 0.9878, "learning_rate": 4.4888888888888894e-05, "loss": 3.125, "norm_diff": 0.1667, "norm_loss": 0.0, "num_token_doc": 66.591, "num_token_overlap": 17.9899, "num_token_query": 52.2742, "num_token_union": 73.398, "num_word_context": 202.1511, "num_word_doc": 49.6692, "num_word_query": 39.8621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 202.9188, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3215, "query_norm": 1.6221, "queue_k_norm": 1.4511, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2742, "sent_len_1": 66.591, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.68, "stdk": 0.047, "stdq": 0.046, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.137, "doc_norm": 1.4455, "encoder_q-embeddings": 710.6893, "encoder_q-layer.0": 499.262, "encoder_q-layer.1": 563.2078, "encoder_q-layer.10": 93.0263, "encoder_q-layer.11": 194.6568, "encoder_q-layer.2": 725.449, "encoder_q-layer.3": 758.7191, "encoder_q-layer.4": 804.4591, "encoder_q-layer.5": 672.33, "encoder_q-layer.6": 642.9095, "encoder_q-layer.7": 638.8542, "encoder_q-layer.8": 294.063, "encoder_q-layer.9": 98.9227, "epoch": 0.19, "inbatch_neg_score": 0.4047, "inbatch_pos_score": 1.0732, "learning_rate": 4.483333333333333e-05, "loss": 3.137, "norm_diff": 0.1538, "norm_loss": 0.0, "num_token_doc": 66.8807, "num_token_overlap": 17.998, "num_token_query": 52.1378, "num_token_union": 73.4802, "num_word_context": 202.4372, "num_word_doc": 49.9111, "num_word_query": 39.7244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 837.4681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4028, "query_norm": 1.5993, "queue_k_norm": 1.4508, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1378, "sent_len_1": 66.8807, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7038, "stdk": 0.0467, "stdq": 0.0464, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.1642, "doc_norm": 1.4496, "encoder_q-embeddings": 1372.7448, "encoder_q-layer.0": 978.5114, "encoder_q-layer.1": 1112.369, "encoder_q-layer.10": 96.8784, "encoder_q-layer.11": 196.266, "encoder_q-layer.2": 1131.4821, "encoder_q-layer.3": 1097.3708, "encoder_q-layer.4": 664.4725, "encoder_q-layer.5": 699.3038, "encoder_q-layer.6": 679.9239, "encoder_q-layer.7": 601.757, "encoder_q-layer.8": 310.9004, "encoder_q-layer.9": 94.0355, "epoch": 0.19, "inbatch_neg_score": 0.393, "inbatch_pos_score": 1.0527, "learning_rate": 4.477777777777778e-05, "loss": 3.1642, "norm_diff": 0.1536, "norm_loss": 0.0, "num_token_doc": 66.7621, "num_token_overlap": 18.0276, "num_token_query": 52.3116, "num_token_union": 73.4751, "num_word_context": 201.9471, "num_word_doc": 49.8479, "num_word_query": 39.8834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1256.9528, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3901, "query_norm": 1.6032, "queue_k_norm": 1.4464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3116, "sent_len_1": 66.7621, "sent_len_max_0": 127.995, "sent_len_max_1": 189.145, "stdk": 0.0469, "stdq": 0.0469, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1561, "doc_norm": 1.4539, "encoder_q-embeddings": 509.3008, "encoder_q-layer.0": 391.8751, "encoder_q-layer.1": 409.6646, "encoder_q-layer.10": 91.1021, "encoder_q-layer.11": 176.0584, "encoder_q-layer.2": 383.049, "encoder_q-layer.3": 287.9186, "encoder_q-layer.4": 206.7353, "encoder_q-layer.5": 166.2486, "encoder_q-layer.6": 176.0972, "encoder_q-layer.7": 183.7398, "encoder_q-layer.8": 230.7566, "encoder_q-layer.9": 137.4566, "epoch": 0.19, "inbatch_neg_score": 0.3801, "inbatch_pos_score": 1.0518, "learning_rate": 4.472222222222223e-05, "loss": 3.1561, "norm_diff": 0.1533, "norm_loss": 0.0, "num_token_doc": 66.8433, "num_token_overlap": 18.0165, "num_token_query": 52.3905, "num_token_union": 73.5869, "num_word_context": 202.3327, "num_word_doc": 49.892, "num_word_query": 39.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 453.3844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3789, "query_norm": 1.6073, "queue_k_norm": 1.4497, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3905, "sent_len_1": 66.8433, "sent_len_max_0": 127.995, "sent_len_max_1": 189.5737, "stdk": 0.0472, "stdq": 0.0465, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1615, "doc_norm": 1.4487, "encoder_q-embeddings": 731.9053, "encoder_q-layer.0": 537.7955, "encoder_q-layer.1": 600.468, "encoder_q-layer.10": 109.9336, "encoder_q-layer.11": 213.594, "encoder_q-layer.2": 641.6548, "encoder_q-layer.3": 666.2639, "encoder_q-layer.4": 687.2531, "encoder_q-layer.5": 695.2563, "encoder_q-layer.6": 627.9583, "encoder_q-layer.7": 574.5009, "encoder_q-layer.8": 418.5944, "encoder_q-layer.9": 124.7646, "epoch": 0.19, "inbatch_neg_score": 0.4021, "inbatch_pos_score": 1.0752, "learning_rate": 4.466666666666667e-05, "loss": 3.1615, "norm_diff": 0.133, "norm_loss": 0.0, "num_token_doc": 66.7527, "num_token_overlap": 18.0328, "num_token_query": 52.3343, "num_token_union": 73.4919, "num_word_context": 202.3733, "num_word_doc": 49.8101, "num_word_query": 39.8932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 833.9089, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4019, "query_norm": 1.5818, "queue_k_norm": 1.4488, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3343, "sent_len_1": 66.7527, "sent_len_max_0": 128.0, "sent_len_max_1": 190.62, "stdk": 0.047, "stdq": 0.0456, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1463, "doc_norm": 1.448, "encoder_q-embeddings": 525.6693, "encoder_q-layer.0": 389.1228, "encoder_q-layer.1": 360.6746, "encoder_q-layer.10": 83.99, "encoder_q-layer.11": 210.7972, "encoder_q-layer.2": 347.0525, "encoder_q-layer.3": 339.7333, "encoder_q-layer.4": 347.9857, "encoder_q-layer.5": 297.2692, "encoder_q-layer.6": 307.5409, "encoder_q-layer.7": 290.6716, "encoder_q-layer.8": 199.6527, "encoder_q-layer.9": 98.5233, "epoch": 0.19, "inbatch_neg_score": 0.4072, "inbatch_pos_score": 1.0566, "learning_rate": 4.461111111111111e-05, "loss": 3.1463, "norm_diff": 0.1561, "norm_loss": 0.0, "num_token_doc": 66.7126, "num_token_overlap": 17.9748, "num_token_query": 52.2456, "num_token_union": 73.4295, "num_word_context": 202.3581, "num_word_doc": 49.7829, "num_word_query": 39.8435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 496.2181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4045, "query_norm": 1.6042, "queue_k_norm": 1.452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2456, "sent_len_1": 66.7126, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6225, "stdk": 0.047, "stdq": 0.0464, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.1411, "doc_norm": 1.4462, "encoder_q-embeddings": 514.7349, "encoder_q-layer.0": 352.001, "encoder_q-layer.1": 404.7911, "encoder_q-layer.10": 101.3606, "encoder_q-layer.11": 218.0323, "encoder_q-layer.2": 457.7965, "encoder_q-layer.3": 461.6966, "encoder_q-layer.4": 504.9277, "encoder_q-layer.5": 523.4499, "encoder_q-layer.6": 506.1717, "encoder_q-layer.7": 408.0826, "encoder_q-layer.8": 242.3683, "encoder_q-layer.9": 116.5235, "epoch": 0.19, "inbatch_neg_score": 0.435, "inbatch_pos_score": 1.084, "learning_rate": 4.4555555555555555e-05, "loss": 3.1411, "norm_diff": 0.1637, "norm_loss": 0.0, "num_token_doc": 66.7074, "num_token_overlap": 17.9855, "num_token_query": 51.9951, "num_token_union": 73.3248, "num_word_context": 202.1485, "num_word_doc": 49.7757, "num_word_query": 39.6576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 595.8371, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4321, "query_norm": 1.6099, "queue_k_norm": 1.4563, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 51.9951, "sent_len_1": 66.7074, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.9888, "stdk": 0.0469, "stdq": 0.0466, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1322, "doc_norm": 1.4564, "encoder_q-embeddings": 266.811, "encoder_q-layer.0": 186.6248, "encoder_q-layer.1": 201.6046, "encoder_q-layer.10": 86.7087, "encoder_q-layer.11": 202.0943, "encoder_q-layer.2": 193.3201, "encoder_q-layer.3": 205.8599, "encoder_q-layer.4": 219.1906, "encoder_q-layer.5": 204.6537, "encoder_q-layer.6": 222.2067, "encoder_q-layer.7": 189.1288, "encoder_q-layer.8": 123.7404, "encoder_q-layer.9": 87.0742, "epoch": 0.19, "inbatch_neg_score": 0.4432, "inbatch_pos_score": 1.0977, "learning_rate": 4.4500000000000004e-05, "loss": 3.1322, "norm_diff": 0.1224, "norm_loss": 0.0, "num_token_doc": 66.948, "num_token_overlap": 18.0062, "num_token_query": 52.1707, "num_token_union": 73.5374, "num_word_context": 202.2541, "num_word_doc": 49.9534, "num_word_query": 39.7737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 288.388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4446, "query_norm": 1.5788, "queue_k_norm": 1.4597, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1707, "sent_len_1": 66.948, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.0675, "stdk": 0.0473, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1447, "doc_norm": 1.4604, "encoder_q-embeddings": 229.1187, "encoder_q-layer.0": 159.5185, "encoder_q-layer.1": 182.7658, "encoder_q-layer.10": 86.687, "encoder_q-layer.11": 190.3152, "encoder_q-layer.2": 207.8798, "encoder_q-layer.3": 224.6266, "encoder_q-layer.4": 210.9055, "encoder_q-layer.5": 184.1123, "encoder_q-layer.6": 189.2845, "encoder_q-layer.7": 196.3621, "encoder_q-layer.8": 183.2317, "encoder_q-layer.9": 120.303, "epoch": 0.2, "inbatch_neg_score": 0.4572, "inbatch_pos_score": 1.1123, "learning_rate": 4.4444444444444447e-05, "loss": 3.1447, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.4942, "num_token_overlap": 17.9582, "num_token_query": 52.0574, "num_token_union": 73.2436, "num_word_context": 202.0282, "num_word_doc": 49.6263, "num_word_query": 39.6871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 275.6582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.457, "query_norm": 1.5785, "queue_k_norm": 1.4617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0574, "sent_len_1": 66.4942, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5625, "stdk": 0.0474, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 28.3435, "dev_samples_per_second": 2.258, "dev_steps_per_second": 0.035, "epoch": 0.2, "step": 20000, "test_accuracy": 91.27197265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5021268725395203, "test_doc_norm": 1.3884601593017578, "test_inbatch_neg_score": 0.7691583633422852, "test_inbatch_pos_score": 1.5531013011932373, "test_loss": 0.5021268725395203, "test_loss_align": 1.3248651027679443, "test_loss_unif": 3.5940608978271484, "test_loss_unif_q@queue": 3.5940613746643066, "test_norm_diff": 0.1878102868795395, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.44821539521217346, "test_query_norm": 1.5762704610824585, "test_queue_k_norm": 1.4619295597076416, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03929115831851959, "test_stdq": 0.03973805531859398, "test_stdqueue_k": 0.04750288650393486, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.3435, "dev_samples_per_second": 2.258, "dev_steps_per_second": 0.035, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.27844, "eval_beir-arguana_recall@10": 0.49787, "eval_beir-arguana_recall@100": 0.83357, "eval_beir-arguana_recall@20": 0.6266, "eval_beir-avg_ndcg@10": 0.3126675, "eval_beir-avg_recall@10": 0.3765816666666667, "eval_beir-avg_recall@100": 0.55728625, "eval_beir-avg_recall@20": 0.4314595, "eval_beir-cqadupstack_ndcg@10": 0.18962500000000002, "eval_beir-cqadupstack_recall@10": 0.26808666666666664, "eval_beir-cqadupstack_recall@100": 0.48841249999999997, "eval_beir-cqadupstack_recall@20": 0.328405, "eval_beir-fiqa_ndcg@10": 0.16695, "eval_beir-fiqa_recall@10": 0.21348, "eval_beir-fiqa_recall@100": 0.45352, "eval_beir-fiqa_recall@20": 0.27252, "eval_beir-nfcorpus_ndcg@10": 0.22875, "eval_beir-nfcorpus_recall@10": 0.10804, "eval_beir-nfcorpus_recall@100": 0.22976, "eval_beir-nfcorpus_recall@20": 0.1384, "eval_beir-nq_ndcg@10": 0.20509, "eval_beir-nq_recall@10": 0.34171, "eval_beir-nq_recall@100": 0.66527, "eval_beir-nq_recall@20": 0.44749, "eval_beir-quora_ndcg@10": 0.69816, "eval_beir-quora_recall@10": 0.82413, "eval_beir-quora_recall@100": 0.95593, "eval_beir-quora_recall@20": 0.87787, "eval_beir-scidocs_ndcg@10": 0.12234, "eval_beir-scidocs_recall@10": 0.13033, "eval_beir-scidocs_recall@100": 0.30507, "eval_beir-scidocs_recall@20": 0.17723, "eval_beir-scifact_ndcg@10": 0.56271, "eval_beir-scifact_recall@10": 0.71933, "eval_beir-scifact_recall@100": 0.90178, "eval_beir-scifact_recall@20": 0.77583, "eval_beir-trec-covid_ndcg@10": 0.49921, "eval_beir-trec-covid_recall@10": 0.542, "eval_beir-trec-covid_recall@100": 0.3468, "eval_beir-trec-covid_recall@20": 0.492, "eval_beir-webis-touche2020_ndcg@10": 0.1754, "eval_beir-webis-touche2020_recall@10": 0.12084, "eval_beir-webis-touche2020_recall@100": 0.39275, "eval_beir-webis-touche2020_recall@20": 0.17825, "eval_senteval-avg_sts": 0.7320296292018303, "eval_senteval-sickr_spearman": 0.6955675828349838, "eval_senteval-stsb_spearman": 0.7684916755686769, "step": 20000, "test_accuracy": 91.27197265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5021268725395203, "test_doc_norm": 1.3884601593017578, "test_inbatch_neg_score": 0.7691583633422852, "test_inbatch_pos_score": 1.5531013011932373, "test_loss": 0.5021268725395203, "test_loss_align": 1.3248651027679443, "test_loss_unif": 3.5940608978271484, "test_loss_unif_q@queue": 3.5940613746643066, "test_norm_diff": 0.1878102868795395, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.44821539521217346, "test_query_norm": 1.5762704610824585, "test_queue_k_norm": 1.4619295597076416, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03929115831851959, "test_stdq": 0.03973805531859398, "test_stdqueue_k": 0.04750288650393486, "test_stdqueue_q": 0.0 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1397, "doc_norm": 1.4648, "encoder_q-embeddings": 235.8854, "encoder_q-layer.0": 157.0198, "encoder_q-layer.1": 182.2113, "encoder_q-layer.10": 85.8417, "encoder_q-layer.11": 179.5028, "encoder_q-layer.2": 217.6219, "encoder_q-layer.3": 219.1072, "encoder_q-layer.4": 201.8644, "encoder_q-layer.5": 199.3252, "encoder_q-layer.6": 207.6761, "encoder_q-layer.7": 201.1252, "encoder_q-layer.8": 153.3802, "encoder_q-layer.9": 91.7508, "epoch": 0.2, "inbatch_neg_score": 0.4547, "inbatch_pos_score": 1.125, "learning_rate": 4.438888888888889e-05, "loss": 3.1397, "norm_diff": 0.1109, "norm_loss": 0.0, "num_token_doc": 66.7503, "num_token_overlap": 17.9887, "num_token_query": 52.2944, "num_token_union": 73.5074, "num_word_context": 202.572, "num_word_doc": 49.8019, "num_word_query": 39.869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 272.049, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4561, "query_norm": 1.5757, "queue_k_norm": 1.4661, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2944, "sent_len_1": 66.7503, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5563, "stdk": 0.0475, "stdq": 0.0455, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1475, "doc_norm": 1.4667, "encoder_q-embeddings": 351.2755, "encoder_q-layer.0": 217.1747, "encoder_q-layer.1": 253.2086, "encoder_q-layer.10": 86.0444, "encoder_q-layer.11": 184.693, "encoder_q-layer.2": 271.9165, "encoder_q-layer.3": 303.0528, "encoder_q-layer.4": 278.6171, "encoder_q-layer.5": 260.1746, "encoder_q-layer.6": 323.0364, "encoder_q-layer.7": 326.8293, "encoder_q-layer.8": 260.8166, "encoder_q-layer.9": 106.793, "epoch": 0.2, "inbatch_neg_score": 0.4518, "inbatch_pos_score": 1.1162, "learning_rate": 4.433333333333334e-05, "loss": 3.1475, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 66.6817, "num_token_overlap": 17.915, "num_token_query": 52.0165, "num_token_union": 73.3504, "num_word_context": 202.0342, "num_word_doc": 49.7517, "num_word_query": 39.6572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 399.1249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4514, "query_norm": 1.571, "queue_k_norm": 1.4669, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0165, "sent_len_1": 66.6817, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5412, "stdk": 0.0475, "stdq": 0.0455, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.142, "doc_norm": 1.4677, "encoder_q-embeddings": 852.905, "encoder_q-layer.0": 610.0173, "encoder_q-layer.1": 658.7803, "encoder_q-layer.10": 100.4674, "encoder_q-layer.11": 194.0494, "encoder_q-layer.2": 615.265, "encoder_q-layer.3": 576.3245, "encoder_q-layer.4": 539.5284, "encoder_q-layer.5": 569.428, "encoder_q-layer.6": 523.3577, "encoder_q-layer.7": 430.7634, "encoder_q-layer.8": 241.7076, "encoder_q-layer.9": 111.5586, "epoch": 0.2, "inbatch_neg_score": 0.447, "inbatch_pos_score": 1.1211, "learning_rate": 4.427777777777778e-05, "loss": 3.142, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 17.9471, "num_token_query": 52.1196, "num_token_union": 73.4826, "num_word_context": 202.3514, "num_word_doc": 49.8672, "num_word_query": 39.7104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 803.544, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.447, "query_norm": 1.5877, "queue_k_norm": 1.4651, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1196, "sent_len_1": 66.8467, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.3438, "stdk": 0.0475, "stdq": 0.0468, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1103, "doc_norm": 1.4612, "encoder_q-embeddings": 191.6379, "encoder_q-layer.0": 134.6373, "encoder_q-layer.1": 152.7636, "encoder_q-layer.10": 90.8808, "encoder_q-layer.11": 197.5167, "encoder_q-layer.2": 158.2926, "encoder_q-layer.3": 161.2526, "encoder_q-layer.4": 157.0912, "encoder_q-layer.5": 156.4808, "encoder_q-layer.6": 156.802, "encoder_q-layer.7": 144.6399, "encoder_q-layer.8": 139.8651, "encoder_q-layer.9": 94.7679, "epoch": 0.2, "inbatch_neg_score": 0.4593, "inbatch_pos_score": 1.1055, "learning_rate": 4.422222222222222e-05, "loss": 3.1103, "norm_diff": 0.0836, "norm_loss": 0.0, "num_token_doc": 66.9813, "num_token_overlap": 18.1252, "num_token_query": 52.303, "num_token_union": 73.5391, "num_word_context": 202.3823, "num_word_doc": 50.0224, "num_word_query": 39.8533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 227.231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.458, "query_norm": 1.5449, "queue_k_norm": 1.4636, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.303, "sent_len_1": 66.9813, "sent_len_max_0": 127.9838, "sent_len_max_1": 188.7688, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1205, "doc_norm": 1.4635, "encoder_q-embeddings": 798.9477, "encoder_q-layer.0": 515.3992, "encoder_q-layer.1": 462.4632, "encoder_q-layer.10": 92.1472, "encoder_q-layer.11": 190.5984, "encoder_q-layer.2": 437.7318, "encoder_q-layer.3": 377.8721, "encoder_q-layer.4": 383.0715, "encoder_q-layer.5": 363.0291, "encoder_q-layer.6": 305.6538, "encoder_q-layer.7": 312.1665, "encoder_q-layer.8": 166.4165, "encoder_q-layer.9": 92.4198, "epoch": 0.2, "inbatch_neg_score": 0.434, "inbatch_pos_score": 1.125, "learning_rate": 4.4166666666666665e-05, "loss": 3.1205, "norm_diff": 0.1067, "norm_loss": 0.0, "num_token_doc": 66.7388, "num_token_overlap": 18.0164, "num_token_query": 52.3105, "num_token_union": 73.4556, "num_word_context": 202.2522, "num_word_doc": 49.7963, "num_word_query": 39.8672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 636.3953, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4331, "query_norm": 1.5702, "queue_k_norm": 1.461, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3105, "sent_len_1": 66.7388, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.035, "stdk": 0.0471, "stdq": 0.0466, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1313, "doc_norm": 1.448, "encoder_q-embeddings": 347.4723, "encoder_q-layer.0": 211.3736, "encoder_q-layer.1": 216.8837, "encoder_q-layer.10": 85.34, "encoder_q-layer.11": 190.7931, "encoder_q-layer.2": 231.1874, "encoder_q-layer.3": 230.0761, "encoder_q-layer.4": 238.1097, "encoder_q-layer.5": 248.0937, "encoder_q-layer.6": 292.7434, "encoder_q-layer.7": 338.941, "encoder_q-layer.8": 213.5677, "encoder_q-layer.9": 104.1405, "epoch": 0.2, "inbatch_neg_score": 0.4359, "inbatch_pos_score": 1.1084, "learning_rate": 4.4111111111111114e-05, "loss": 3.1313, "norm_diff": 0.1392, "norm_loss": 0.0, "num_token_doc": 66.5855, "num_token_overlap": 17.9817, "num_token_query": 52.1098, "num_token_union": 73.3592, "num_word_context": 202.1344, "num_word_doc": 49.723, "num_word_query": 39.7183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 360.5581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4336, "query_norm": 1.5872, "queue_k_norm": 1.4572, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1098, "sent_len_1": 66.5855, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.9787, "stdk": 0.0464, "stdq": 0.047, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1476, "doc_norm": 1.454, "encoder_q-embeddings": 266.9663, "encoder_q-layer.0": 174.8849, "encoder_q-layer.1": 194.2576, "encoder_q-layer.10": 219.3237, "encoder_q-layer.11": 435.0588, "encoder_q-layer.2": 216.6012, "encoder_q-layer.3": 224.9896, "encoder_q-layer.4": 218.5854, "encoder_q-layer.5": 248.7277, "encoder_q-layer.6": 250.4663, "encoder_q-layer.7": 213.5007, "encoder_q-layer.8": 225.9319, "encoder_q-layer.9": 180.8385, "epoch": 0.2, "inbatch_neg_score": 0.4058, "inbatch_pos_score": 1.0732, "learning_rate": 4.4055555555555557e-05, "loss": 3.1476, "norm_diff": 0.1263, "norm_loss": 0.0, "num_token_doc": 66.7786, "num_token_overlap": 17.9309, "num_token_query": 52.0789, "num_token_union": 73.4041, "num_word_context": 202.1977, "num_word_doc": 49.7865, "num_word_query": 39.6895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 350.9485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4041, "query_norm": 1.5803, "queue_k_norm": 1.4555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0789, "sent_len_1": 66.7786, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5662, "stdk": 0.0467, "stdq": 0.0467, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1068, "doc_norm": 1.4542, "encoder_q-embeddings": 290.4448, "encoder_q-layer.0": 198.4925, "encoder_q-layer.1": 223.5728, "encoder_q-layer.10": 191.1837, "encoder_q-layer.11": 381.3809, "encoder_q-layer.2": 256.4866, "encoder_q-layer.3": 278.351, "encoder_q-layer.4": 244.2067, "encoder_q-layer.5": 249.1929, "encoder_q-layer.6": 231.6158, "encoder_q-layer.7": 222.5173, "encoder_q-layer.8": 230.6368, "encoder_q-layer.9": 184.894, "epoch": 0.2, "inbatch_neg_score": 0.4049, "inbatch_pos_score": 1.085, "learning_rate": 4.4000000000000006e-05, "loss": 3.1068, "norm_diff": 0.1299, "norm_loss": 0.0, "num_token_doc": 66.6604, "num_token_overlap": 18.0158, "num_token_query": 52.211, "num_token_union": 73.3931, "num_word_context": 202.2124, "num_word_doc": 49.7383, "num_word_query": 39.7927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 363.9458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4055, "query_norm": 1.5842, "queue_k_norm": 1.4551, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.211, "sent_len_1": 66.6604, "sent_len_max_0": 128.0, "sent_len_max_1": 189.49, "stdk": 0.0467, "stdq": 0.0469, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1371, "doc_norm": 1.4513, "encoder_q-embeddings": 512.9161, "encoder_q-layer.0": 376.2902, "encoder_q-layer.1": 416.3268, "encoder_q-layer.10": 179.9767, "encoder_q-layer.11": 380.4503, "encoder_q-layer.2": 461.0149, "encoder_q-layer.3": 412.1031, "encoder_q-layer.4": 361.3069, "encoder_q-layer.5": 336.7093, "encoder_q-layer.6": 312.9949, "encoder_q-layer.7": 276.6501, "encoder_q-layer.8": 303.9218, "encoder_q-layer.9": 215.1848, "epoch": 0.2, "inbatch_neg_score": 0.3836, "inbatch_pos_score": 1.0654, "learning_rate": 4.394444444444445e-05, "loss": 3.1371, "norm_diff": 0.1477, "norm_loss": 0.0, "num_token_doc": 66.9099, "num_token_overlap": 17.9679, "num_token_query": 52.1973, "num_token_union": 73.5258, "num_word_context": 202.7763, "num_word_doc": 49.9295, "num_word_query": 39.7755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 546.0424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3813, "query_norm": 1.599, "queue_k_norm": 1.4541, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1973, "sent_len_1": 66.9099, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7488, "stdk": 0.0465, "stdq": 0.0472, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1328, "doc_norm": 1.4488, "encoder_q-embeddings": 748.6213, "encoder_q-layer.0": 509.0036, "encoder_q-layer.1": 577.3553, "encoder_q-layer.10": 195.4046, "encoder_q-layer.11": 406.1164, "encoder_q-layer.2": 661.7139, "encoder_q-layer.3": 657.6284, "encoder_q-layer.4": 596.6448, "encoder_q-layer.5": 624.2061, "encoder_q-layer.6": 586.0656, "encoder_q-layer.7": 487.0557, "encoder_q-layer.8": 432.941, "encoder_q-layer.9": 279.4542, "epoch": 0.21, "inbatch_neg_score": 0.3924, "inbatch_pos_score": 1.0625, "learning_rate": 4.388888888888889e-05, "loss": 3.1328, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.5646, "num_token_overlap": 17.9802, "num_token_query": 52.2283, "num_token_union": 73.3569, "num_word_context": 202.3613, "num_word_doc": 49.6735, "num_word_query": 39.8036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 820.2948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3911, "query_norm": 1.5667, "queue_k_norm": 1.4506, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2283, "sent_len_1": 66.5646, "sent_len_max_0": 127.995, "sent_len_max_1": 189.3063, "stdk": 0.0465, "stdq": 0.0459, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1444, "doc_norm": 1.4464, "encoder_q-embeddings": 1567.4186, "encoder_q-layer.0": 1146.3628, "encoder_q-layer.1": 1219.6681, "encoder_q-layer.10": 188.1503, "encoder_q-layer.11": 361.3767, "encoder_q-layer.2": 1426.5198, "encoder_q-layer.3": 1491.7397, "encoder_q-layer.4": 1450.7068, "encoder_q-layer.5": 1610.8652, "encoder_q-layer.6": 1555.8939, "encoder_q-layer.7": 1464.7559, "encoder_q-layer.8": 1574.7687, "encoder_q-layer.9": 748.5592, "epoch": 0.21, "inbatch_neg_score": 0.3757, "inbatch_pos_score": 1.0273, "learning_rate": 4.383333333333334e-05, "loss": 3.1444, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 66.5454, "num_token_overlap": 17.982, "num_token_query": 52.1218, "num_token_union": 73.2913, "num_word_context": 202.0233, "num_word_doc": 49.636, "num_word_query": 39.7423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1934.0257, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.374, "query_norm": 1.5639, "queue_k_norm": 1.4509, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1218, "sent_len_1": 66.5454, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.7988, "stdk": 0.0464, "stdq": 0.0461, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1251, "doc_norm": 1.4472, "encoder_q-embeddings": 473.6252, "encoder_q-layer.0": 312.1941, "encoder_q-layer.1": 355.2022, "encoder_q-layer.10": 191.8622, "encoder_q-layer.11": 390.2962, "encoder_q-layer.2": 434.632, "encoder_q-layer.3": 472.1452, "encoder_q-layer.4": 491.011, "encoder_q-layer.5": 527.3717, "encoder_q-layer.6": 556.5019, "encoder_q-layer.7": 545.0804, "encoder_q-layer.8": 480.6775, "encoder_q-layer.9": 316.2976, "epoch": 0.21, "inbatch_neg_score": 0.3383, "inbatch_pos_score": 1.0088, "learning_rate": 4.377777777777778e-05, "loss": 3.1251, "norm_diff": 0.1555, "norm_loss": 0.0, "num_token_doc": 66.7916, "num_token_overlap": 18.0162, "num_token_query": 52.1445, "num_token_union": 73.4236, "num_word_context": 202.4405, "num_word_doc": 49.903, "num_word_query": 39.7659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 635.7499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3376, "query_norm": 1.6027, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1445, "sent_len_1": 66.7916, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0725, "stdk": 0.0466, "stdq": 0.047, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0857, "doc_norm": 1.4525, "encoder_q-embeddings": 245.6148, "encoder_q-layer.0": 168.5192, "encoder_q-layer.1": 174.1167, "encoder_q-layer.10": 172.5001, "encoder_q-layer.11": 366.212, "encoder_q-layer.2": 199.8347, "encoder_q-layer.3": 191.4701, "encoder_q-layer.4": 189.3682, "encoder_q-layer.5": 172.2337, "encoder_q-layer.6": 187.2105, "encoder_q-layer.7": 191.7974, "encoder_q-layer.8": 221.7121, "encoder_q-layer.9": 174.3554, "epoch": 0.21, "inbatch_neg_score": 0.3465, "inbatch_pos_score": 1.0459, "learning_rate": 4.3722222222222224e-05, "loss": 3.0857, "norm_diff": 0.1572, "norm_loss": 0.0, "num_token_doc": 66.6961, "num_token_overlap": 18.0513, "num_token_query": 52.3575, "num_token_union": 73.4208, "num_word_context": 202.2394, "num_word_doc": 49.745, "num_word_query": 39.9218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 309.0212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3442, "query_norm": 1.6098, "queue_k_norm": 1.4492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3575, "sent_len_1": 66.6961, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.87, "stdk": 0.0468, "stdq": 0.0472, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0901, "doc_norm": 1.4461, "encoder_q-embeddings": 1260.9559, "encoder_q-layer.0": 929.7353, "encoder_q-layer.1": 1050.5667, "encoder_q-layer.10": 181.399, "encoder_q-layer.11": 375.6807, "encoder_q-layer.2": 1402.7748, "encoder_q-layer.3": 1831.3511, "encoder_q-layer.4": 1299.9055, "encoder_q-layer.5": 557.4102, "encoder_q-layer.6": 495.3743, "encoder_q-layer.7": 460.5331, "encoder_q-layer.8": 420.9148, "encoder_q-layer.9": 280.1029, "epoch": 0.21, "inbatch_neg_score": 0.3474, "inbatch_pos_score": 1.0127, "learning_rate": 4.3666666666666666e-05, "loss": 3.0901, "norm_diff": 0.1369, "norm_loss": 0.0, "num_token_doc": 66.5754, "num_token_overlap": 17.984, "num_token_query": 52.1032, "num_token_union": 73.263, "num_word_context": 201.9326, "num_word_doc": 49.6845, "num_word_query": 39.7302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1438.3759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3459, "query_norm": 1.583, "queue_k_norm": 1.4497, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1032, "sent_len_1": 66.5754, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.6138, "stdk": 0.0467, "stdq": 0.0462, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1077, "doc_norm": 1.454, "encoder_q-embeddings": 1010.045, "encoder_q-layer.0": 648.4497, "encoder_q-layer.1": 794.6901, "encoder_q-layer.10": 200.6494, "encoder_q-layer.11": 386.6144, "encoder_q-layer.2": 908.887, "encoder_q-layer.3": 744.7993, "encoder_q-layer.4": 364.8025, "encoder_q-layer.5": 267.4758, "encoder_q-layer.6": 303.4516, "encoder_q-layer.7": 259.688, "encoder_q-layer.8": 290.7103, "encoder_q-layer.9": 188.342, "epoch": 0.21, "inbatch_neg_score": 0.34, "inbatch_pos_score": 0.9897, "learning_rate": 4.3611111111111116e-05, "loss": 3.1077, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.823, "num_token_overlap": 18.015, "num_token_query": 52.273, "num_token_union": 73.5066, "num_word_context": 202.2893, "num_word_doc": 49.8684, "num_word_query": 39.8708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 906.6841, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3401, "query_norm": 1.5566, "queue_k_norm": 1.4498, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.273, "sent_len_1": 66.823, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7562, "stdk": 0.0471, "stdq": 0.0453, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1658, "doc_norm": 1.4516, "encoder_q-embeddings": 347.4849, "encoder_q-layer.0": 231.9608, "encoder_q-layer.1": 260.5159, "encoder_q-layer.10": 164.5405, "encoder_q-layer.11": 369.3017, "encoder_q-layer.2": 290.7769, "encoder_q-layer.3": 262.3529, "encoder_q-layer.4": 175.6382, "encoder_q-layer.5": 127.1017, "encoder_q-layer.6": 149.0662, "encoder_q-layer.7": 171.0024, "encoder_q-layer.8": 203.9913, "encoder_q-layer.9": 171.507, "epoch": 0.21, "inbatch_neg_score": 0.3498, "inbatch_pos_score": 1.0186, "learning_rate": 4.355555555555556e-05, "loss": 3.1658, "norm_diff": 0.1393, "norm_loss": 0.0, "num_token_doc": 66.754, "num_token_overlap": 18.1048, "num_token_query": 52.394, "num_token_union": 73.4613, "num_word_context": 202.2254, "num_word_doc": 49.8153, "num_word_query": 39.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 372.8863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3494, "query_norm": 1.5908, "queue_k_norm": 1.4524, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.394, "sent_len_1": 66.754, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1262, "stdk": 0.0471, "stdq": 0.0466, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1006, "doc_norm": 1.4542, "encoder_q-embeddings": 132.5022, "encoder_q-layer.0": 86.7883, "encoder_q-layer.1": 94.4273, "encoder_q-layer.10": 88.73, "encoder_q-layer.11": 193.1656, "encoder_q-layer.2": 101.8714, "encoder_q-layer.3": 115.0799, "encoder_q-layer.4": 109.9255, "encoder_q-layer.5": 102.8667, "encoder_q-layer.6": 106.5985, "encoder_q-layer.7": 114.6251, "encoder_q-layer.8": 113.7155, "encoder_q-layer.9": 87.0241, "epoch": 0.21, "inbatch_neg_score": 0.3625, "inbatch_pos_score": 1.0342, "learning_rate": 4.35e-05, "loss": 3.1006, "norm_diff": 0.147, "norm_loss": 0.0, "num_token_doc": 66.6394, "num_token_overlap": 17.9705, "num_token_query": 52.2033, "num_token_union": 73.4048, "num_word_context": 202.3514, "num_word_doc": 49.7237, "num_word_query": 39.7938, "postclip_grad_norm": 1.0, "preclip_grad_norm": 166.5474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3616, "query_norm": 1.6012, "queue_k_norm": 1.4559, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2033, "sent_len_1": 66.6394, "sent_len_max_0": 127.99, "sent_len_max_1": 187.5238, "stdk": 0.0472, "stdq": 0.0456, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.0993, "doc_norm": 1.4552, "encoder_q-embeddings": 191.6582, "encoder_q-layer.0": 127.962, "encoder_q-layer.1": 144.2227, "encoder_q-layer.10": 84.4728, "encoder_q-layer.11": 189.4951, "encoder_q-layer.2": 175.9816, "encoder_q-layer.3": 166.1009, "encoder_q-layer.4": 158.3682, "encoder_q-layer.5": 167.546, "encoder_q-layer.6": 170.0943, "encoder_q-layer.7": 150.1682, "encoder_q-layer.8": 155.1889, "encoder_q-layer.9": 108.1029, "epoch": 0.21, "inbatch_neg_score": 0.376, "inbatch_pos_score": 1.0059, "learning_rate": 4.344444444444445e-05, "loss": 3.0993, "norm_diff": 0.1361, "norm_loss": 0.0, "num_token_doc": 66.7372, "num_token_overlap": 18.031, "num_token_query": 52.3384, "num_token_union": 73.4872, "num_word_context": 202.2496, "num_word_doc": 49.7618, "num_word_query": 39.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 231.2095, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3752, "query_norm": 1.5913, "queue_k_norm": 1.4544, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3384, "sent_len_1": 66.7372, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2875, "stdk": 0.0473, "stdq": 0.045, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.071, "doc_norm": 1.4459, "encoder_q-embeddings": 462.9203, "encoder_q-layer.0": 363.4636, "encoder_q-layer.1": 424.7848, "encoder_q-layer.10": 86.8062, "encoder_q-layer.11": 187.7576, "encoder_q-layer.2": 451.8007, "encoder_q-layer.3": 354.1021, "encoder_q-layer.4": 281.299, "encoder_q-layer.5": 260.2722, "encoder_q-layer.6": 302.1689, "encoder_q-layer.7": 318.541, "encoder_q-layer.8": 262.6646, "encoder_q-layer.9": 130.1292, "epoch": 0.21, "inbatch_neg_score": 0.4024, "inbatch_pos_score": 1.0645, "learning_rate": 4.338888888888889e-05, "loss": 3.071, "norm_diff": 0.1743, "norm_loss": 0.0, "num_token_doc": 66.966, "num_token_overlap": 18.0528, "num_token_query": 52.36, "num_token_union": 73.6217, "num_word_context": 202.8188, "num_word_doc": 50.0316, "num_word_query": 39.9133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 487.3802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3997, "query_norm": 1.6203, "queue_k_norm": 1.4549, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.36, "sent_len_1": 66.966, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.8587, "stdk": 0.0469, "stdq": 0.0458, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0833, "doc_norm": 1.4561, "encoder_q-embeddings": 185.7026, "encoder_q-layer.0": 134.0476, "encoder_q-layer.1": 150.9563, "encoder_q-layer.10": 86.6373, "encoder_q-layer.11": 186.8917, "encoder_q-layer.2": 161.3052, "encoder_q-layer.3": 142.7791, "encoder_q-layer.4": 113.7503, "encoder_q-layer.5": 98.7404, "encoder_q-layer.6": 108.3753, "encoder_q-layer.7": 107.076, "encoder_q-layer.8": 114.4649, "encoder_q-layer.9": 88.1468, "epoch": 0.21, "inbatch_neg_score": 0.4386, "inbatch_pos_score": 1.1035, "learning_rate": 4.3333333333333334e-05, "loss": 3.0833, "norm_diff": 0.2036, "norm_loss": 0.0, "num_token_doc": 66.9003, "num_token_overlap": 18.0292, "num_token_query": 52.3412, "num_token_union": 73.6055, "num_word_context": 202.6697, "num_word_doc": 49.9348, "num_word_query": 39.9243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 201.1485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4358, "query_norm": 1.6597, "queue_k_norm": 1.4548, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3412, "sent_len_1": 66.9003, "sent_len_max_0": 127.9775, "sent_len_max_1": 192.4638, "stdk": 0.0473, "stdq": 0.0449, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0758, "doc_norm": 1.454, "encoder_q-embeddings": 254.1896, "encoder_q-layer.0": 175.3312, "encoder_q-layer.1": 193.6359, "encoder_q-layer.10": 80.5329, "encoder_q-layer.11": 181.1487, "encoder_q-layer.2": 222.1649, "encoder_q-layer.3": 222.0308, "encoder_q-layer.4": 264.9954, "encoder_q-layer.5": 286.9816, "encoder_q-layer.6": 272.9661, "encoder_q-layer.7": 240.7829, "encoder_q-layer.8": 182.5231, "encoder_q-layer.9": 92.1327, "epoch": 0.22, "inbatch_neg_score": 0.4671, "inbatch_pos_score": 1.126, "learning_rate": 4.3277777777777776e-05, "loss": 3.0758, "norm_diff": 0.2475, "norm_loss": 0.0, "num_token_doc": 66.5889, "num_token_overlap": 17.9423, "num_token_query": 52.0552, "num_token_union": 73.261, "num_word_context": 201.8354, "num_word_doc": 49.6481, "num_word_query": 39.6538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 318.0833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4634, "query_norm": 1.7015, "queue_k_norm": 1.4551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0552, "sent_len_1": 66.5889, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2875, "stdk": 0.0471, "stdq": 0.0457, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0791, "doc_norm": 1.4516, "encoder_q-embeddings": 356.3576, "encoder_q-layer.0": 212.4558, "encoder_q-layer.1": 235.2517, "encoder_q-layer.10": 117.6451, "encoder_q-layer.11": 217.9946, "encoder_q-layer.2": 269.6352, "encoder_q-layer.3": 263.3637, "encoder_q-layer.4": 238.4504, "encoder_q-layer.5": 127.4598, "encoder_q-layer.6": 130.3145, "encoder_q-layer.7": 126.1535, "encoder_q-layer.8": 147.2353, "encoder_q-layer.9": 121.7699, "epoch": 0.22, "inbatch_neg_score": 0.4863, "inbatch_pos_score": 1.1641, "learning_rate": 4.3222222222222226e-05, "loss": 3.0791, "norm_diff": 0.2276, "norm_loss": 0.0, "num_token_doc": 66.772, "num_token_overlap": 17.9832, "num_token_query": 52.0941, "num_token_union": 73.363, "num_word_context": 202.4671, "num_word_doc": 49.8191, "num_word_query": 39.6946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 334.0883, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4829, "query_norm": 1.6792, "queue_k_norm": 1.4588, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0941, "sent_len_1": 66.772, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8512, "stdk": 0.047, "stdq": 0.046, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0931, "doc_norm": 1.4652, "encoder_q-embeddings": 516.4061, "encoder_q-layer.0": 341.2379, "encoder_q-layer.1": 378.1267, "encoder_q-layer.10": 86.4569, "encoder_q-layer.11": 191.3543, "encoder_q-layer.2": 473.0213, "encoder_q-layer.3": 464.4865, "encoder_q-layer.4": 393.5446, "encoder_q-layer.5": 397.0185, "encoder_q-layer.6": 348.4847, "encoder_q-layer.7": 258.8308, "encoder_q-layer.8": 233.0759, "encoder_q-layer.9": 120.8392, "epoch": 0.22, "inbatch_neg_score": 0.4961, "inbatch_pos_score": 1.1768, "learning_rate": 4.316666666666667e-05, "loss": 3.0931, "norm_diff": 0.1806, "norm_loss": 0.0, "num_token_doc": 66.7775, "num_token_overlap": 17.9935, "num_token_query": 52.2506, "num_token_union": 73.493, "num_word_context": 202.4279, "num_word_doc": 49.8516, "num_word_query": 39.8618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 540.4194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4917, "query_norm": 1.6458, "queue_k_norm": 1.4612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2506, "sent_len_1": 66.7775, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3338, "stdk": 0.0473, "stdq": 0.0465, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0834, "doc_norm": 1.4681, "encoder_q-embeddings": 520.8903, "encoder_q-layer.0": 347.1936, "encoder_q-layer.1": 387.4384, "encoder_q-layer.10": 91.7923, "encoder_q-layer.11": 175.6589, "encoder_q-layer.2": 445.4182, "encoder_q-layer.3": 476.6628, "encoder_q-layer.4": 505.4926, "encoder_q-layer.5": 558.7138, "encoder_q-layer.6": 547.0431, "encoder_q-layer.7": 405.3178, "encoder_q-layer.8": 218.2642, "encoder_q-layer.9": 95.3502, "epoch": 0.22, "inbatch_neg_score": 0.5043, "inbatch_pos_score": 1.1924, "learning_rate": 4.311111111111111e-05, "loss": 3.0834, "norm_diff": 0.1965, "norm_loss": 0.0, "num_token_doc": 66.9466, "num_token_overlap": 18.0684, "num_token_query": 52.3564, "num_token_union": 73.6071, "num_word_context": 202.3775, "num_word_doc": 49.9627, "num_word_query": 39.8991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 598.2612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5015, "query_norm": 1.6646, "queue_k_norm": 1.467, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3564, "sent_len_1": 66.9466, "sent_len_max_0": 127.995, "sent_len_max_1": 189.4338, "stdk": 0.0473, "stdq": 0.0467, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.08, "doc_norm": 1.4709, "encoder_q-embeddings": 260.1588, "encoder_q-layer.0": 179.4804, "encoder_q-layer.1": 203.5159, "encoder_q-layer.10": 89.3258, "encoder_q-layer.11": 196.3633, "encoder_q-layer.2": 222.7563, "encoder_q-layer.3": 239.8382, "encoder_q-layer.4": 250.2228, "encoder_q-layer.5": 246.8149, "encoder_q-layer.6": 235.657, "encoder_q-layer.7": 205.8527, "encoder_q-layer.8": 153.0353, "encoder_q-layer.9": 87.6357, "epoch": 0.22, "inbatch_neg_score": 0.4822, "inbatch_pos_score": 1.1602, "learning_rate": 4.305555555555556e-05, "loss": 3.08, "norm_diff": 0.1271, "norm_loss": 0.0, "num_token_doc": 66.6887, "num_token_overlap": 17.9742, "num_token_query": 52.1818, "num_token_union": 73.3882, "num_word_context": 202.713, "num_word_doc": 49.7873, "num_word_query": 39.7676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 304.9594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4822, "query_norm": 1.598, "queue_k_norm": 1.4681, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1818, "sent_len_1": 66.6887, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3425, "stdk": 0.0474, "stdq": 0.0457, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0865, "doc_norm": 1.4755, "encoder_q-embeddings": 450.8288, "encoder_q-layer.0": 308.0247, "encoder_q-layer.1": 346.2513, "encoder_q-layer.10": 85.3085, "encoder_q-layer.11": 187.4096, "encoder_q-layer.2": 398.5369, "encoder_q-layer.3": 416.8327, "encoder_q-layer.4": 437.3276, "encoder_q-layer.5": 471.4048, "encoder_q-layer.6": 470.8431, "encoder_q-layer.7": 330.7924, "encoder_q-layer.8": 172.1276, "encoder_q-layer.9": 86.0631, "epoch": 0.22, "inbatch_neg_score": 0.4989, "inbatch_pos_score": 1.1699, "learning_rate": 4.3e-05, "loss": 3.0865, "norm_diff": 0.1426, "norm_loss": 0.0, "num_token_doc": 66.877, "num_token_overlap": 18.0, "num_token_query": 52.2038, "num_token_union": 73.5007, "num_word_context": 202.2441, "num_word_doc": 49.8639, "num_word_query": 39.7693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 520.1792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.498, "query_norm": 1.6181, "queue_k_norm": 1.4717, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2038, "sent_len_1": 66.877, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5588, "stdk": 0.0474, "stdq": 0.0462, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.0978, "doc_norm": 1.4805, "encoder_q-embeddings": 220.1197, "encoder_q-layer.0": 156.2827, "encoder_q-layer.1": 164.106, "encoder_q-layer.10": 82.9559, "encoder_q-layer.11": 185.7222, "encoder_q-layer.2": 184.5818, "encoder_q-layer.3": 204.6395, "encoder_q-layer.4": 181.9066, "encoder_q-layer.5": 178.6542, "encoder_q-layer.6": 174.2748, "encoder_q-layer.7": 147.7289, "encoder_q-layer.8": 108.6556, "encoder_q-layer.9": 80.6688, "epoch": 0.22, "inbatch_neg_score": 0.4966, "inbatch_pos_score": 1.1631, "learning_rate": 4.294444444444445e-05, "loss": 3.0978, "norm_diff": 0.1358, "norm_loss": 0.0, "num_token_doc": 66.7803, "num_token_overlap": 17.9841, "num_token_query": 52.1907, "num_token_union": 73.4463, "num_word_context": 202.3776, "num_word_doc": 49.8604, "num_word_query": 39.8194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 248.6589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4954, "query_norm": 1.6163, "queue_k_norm": 1.4761, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1907, "sent_len_1": 66.7803, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9875, "stdk": 0.0475, "stdq": 0.0463, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1129, "doc_norm": 1.4796, "encoder_q-embeddings": 189.6752, "encoder_q-layer.0": 136.186, "encoder_q-layer.1": 159.8662, "encoder_q-layer.10": 93.8432, "encoder_q-layer.11": 195.2675, "encoder_q-layer.2": 182.7782, "encoder_q-layer.3": 200.2626, "encoder_q-layer.4": 193.9775, "encoder_q-layer.5": 188.4544, "encoder_q-layer.6": 207.2174, "encoder_q-layer.7": 155.3115, "encoder_q-layer.8": 112.7115, "encoder_q-layer.9": 85.7325, "epoch": 0.22, "inbatch_neg_score": 0.484, "inbatch_pos_score": 1.1543, "learning_rate": 4.2888888888888886e-05, "loss": 3.1129, "norm_diff": 0.1079, "norm_loss": 0.0, "num_token_doc": 66.7175, "num_token_overlap": 17.9925, "num_token_query": 52.2214, "num_token_union": 73.4475, "num_word_context": 202.4089, "num_word_doc": 49.7578, "num_word_query": 39.8141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 247.9052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4849, "query_norm": 1.5875, "queue_k_norm": 1.4764, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2214, "sent_len_1": 66.7175, "sent_len_max_0": 127.995, "sent_len_max_1": 191.1763, "stdk": 0.0474, "stdq": 0.0459, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.0831, "doc_norm": 1.4762, "encoder_q-embeddings": 1208.1199, "encoder_q-layer.0": 890.9359, "encoder_q-layer.1": 948.5192, "encoder_q-layer.10": 87.7847, "encoder_q-layer.11": 217.6699, "encoder_q-layer.2": 999.6017, "encoder_q-layer.3": 1113.9401, "encoder_q-layer.4": 972.0968, "encoder_q-layer.5": 679.501, "encoder_q-layer.6": 638.4525, "encoder_q-layer.7": 446.0575, "encoder_q-layer.8": 256.2713, "encoder_q-layer.9": 102.7836, "epoch": 0.22, "inbatch_neg_score": 0.5146, "inbatch_pos_score": 1.1602, "learning_rate": 4.2833333333333335e-05, "loss": 3.0831, "norm_diff": 0.0946, "norm_loss": 0.0, "num_token_doc": 66.8781, "num_token_overlap": 17.9904, "num_token_query": 52.0967, "num_token_union": 73.4271, "num_word_context": 201.9943, "num_word_doc": 49.8603, "num_word_query": 39.6974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1187.59, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5142, "query_norm": 1.5708, "queue_k_norm": 1.4777, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0967, "sent_len_1": 66.8781, "sent_len_max_0": 128.0, "sent_len_max_1": 192.96, "stdk": 0.0472, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0885, "doc_norm": 1.4814, "encoder_q-embeddings": 329.1317, "encoder_q-layer.0": 235.7286, "encoder_q-layer.1": 252.4352, "encoder_q-layer.10": 84.4763, "encoder_q-layer.11": 200.2721, "encoder_q-layer.2": 281.0733, "encoder_q-layer.3": 289.7133, "encoder_q-layer.4": 272.3019, "encoder_q-layer.5": 203.0167, "encoder_q-layer.6": 180.7799, "encoder_q-layer.7": 148.0781, "encoder_q-layer.8": 130.5185, "encoder_q-layer.9": 93.7898, "epoch": 0.22, "inbatch_neg_score": 0.4945, "inbatch_pos_score": 1.1611, "learning_rate": 4.277777777777778e-05, "loss": 3.0885, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.6, "num_token_overlap": 18.0433, "num_token_query": 52.3449, "num_token_union": 73.3923, "num_word_context": 201.7063, "num_word_doc": 49.6905, "num_word_query": 39.9101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 336.6307, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4937, "query_norm": 1.6008, "queue_k_norm": 1.48, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3449, "sent_len_1": 66.6, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7912, "stdk": 0.0473, "stdq": 0.046, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1082, "doc_norm": 1.486, "encoder_q-embeddings": 205.2889, "encoder_q-layer.0": 143.6405, "encoder_q-layer.1": 165.7166, "encoder_q-layer.10": 89.2541, "encoder_q-layer.11": 191.613, "encoder_q-layer.2": 190.2095, "encoder_q-layer.3": 204.9322, "encoder_q-layer.4": 210.0544, "encoder_q-layer.5": 208.7235, "encoder_q-layer.6": 189.3333, "encoder_q-layer.7": 205.0838, "encoder_q-layer.8": 172.5997, "encoder_q-layer.9": 112.1672, "epoch": 0.23, "inbatch_neg_score": 0.482, "inbatch_pos_score": 1.1699, "learning_rate": 4.272222222222223e-05, "loss": 3.1082, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.9529, "num_token_overlap": 17.9747, "num_token_query": 52.136, "num_token_union": 73.5436, "num_word_context": 202.6493, "num_word_doc": 50.0069, "num_word_query": 39.7648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 265.5557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4807, "query_norm": 1.5911, "queue_k_norm": 1.481, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.136, "sent_len_1": 66.9529, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.68, "stdk": 0.0474, "stdq": 0.0459, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.0837, "doc_norm": 1.4848, "encoder_q-embeddings": 566.7886, "encoder_q-layer.0": 424.9199, "encoder_q-layer.1": 361.5637, "encoder_q-layer.10": 94.654, "encoder_q-layer.11": 205.8689, "encoder_q-layer.2": 354.8081, "encoder_q-layer.3": 350.6543, "encoder_q-layer.4": 308.4051, "encoder_q-layer.5": 253.7876, "encoder_q-layer.6": 223.9309, "encoder_q-layer.7": 207.4672, "encoder_q-layer.8": 151.7598, "encoder_q-layer.9": 91.652, "epoch": 0.23, "inbatch_neg_score": 0.4799, "inbatch_pos_score": 1.1406, "learning_rate": 4.266666666666667e-05, "loss": 3.0837, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.9532, "num_token_overlap": 17.9907, "num_token_query": 52.2168, "num_token_union": 73.6221, "num_word_context": 202.4399, "num_word_doc": 49.9595, "num_word_query": 39.8059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 481.0915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4785, "query_norm": 1.582, "queue_k_norm": 1.4821, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2168, "sent_len_1": 66.9532, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1188, "stdk": 0.0474, "stdq": 0.0463, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0982, "doc_norm": 1.485, "encoder_q-embeddings": 274.8328, "encoder_q-layer.0": 189.8619, "encoder_q-layer.1": 235.3315, "encoder_q-layer.10": 84.8593, "encoder_q-layer.11": 190.91, "encoder_q-layer.2": 225.7102, "encoder_q-layer.3": 236.6699, "encoder_q-layer.4": 269.9087, "encoder_q-layer.5": 311.9914, "encoder_q-layer.6": 284.6429, "encoder_q-layer.7": 256.0313, "encoder_q-layer.8": 167.9914, "encoder_q-layer.9": 92.5293, "epoch": 0.23, "inbatch_neg_score": 0.4776, "inbatch_pos_score": 1.1562, "learning_rate": 4.261111111111111e-05, "loss": 3.0982, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.5442, "num_token_overlap": 17.9303, "num_token_query": 52.2595, "num_token_union": 73.4396, "num_word_context": 202.3576, "num_word_doc": 49.6972, "num_word_query": 39.8586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 335.7034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.478, "query_norm": 1.5605, "queue_k_norm": 1.4811, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2595, "sent_len_1": 66.5442, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.08, "stdk": 0.0474, "stdq": 0.0456, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.0822, "doc_norm": 1.4879, "encoder_q-embeddings": 193.0284, "encoder_q-layer.0": 134.6991, "encoder_q-layer.1": 152.8991, "encoder_q-layer.10": 81.8181, "encoder_q-layer.11": 194.0113, "encoder_q-layer.2": 186.4364, "encoder_q-layer.3": 203.7603, "encoder_q-layer.4": 192.1024, "encoder_q-layer.5": 180.7133, "encoder_q-layer.6": 191.1046, "encoder_q-layer.7": 158.1634, "encoder_q-layer.8": 149.6643, "encoder_q-layer.9": 101.6703, "epoch": 0.23, "inbatch_neg_score": 0.4718, "inbatch_pos_score": 1.123, "learning_rate": 4.255555555555556e-05, "loss": 3.0822, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.7726, "num_token_overlap": 17.9393, "num_token_query": 52.1975, "num_token_union": 73.515, "num_word_context": 202.5962, "num_word_doc": 49.846, "num_word_query": 39.8022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 249.3651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.47, "query_norm": 1.5222, "queue_k_norm": 1.4856, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1975, "sent_len_1": 66.7726, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9638, "stdk": 0.0475, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0793, "doc_norm": 1.4777, "encoder_q-embeddings": 448.9684, "encoder_q-layer.0": 262.7183, "encoder_q-layer.1": 236.5188, "encoder_q-layer.10": 86.7551, "encoder_q-layer.11": 195.7046, "encoder_q-layer.2": 236.0393, "encoder_q-layer.3": 255.8833, "encoder_q-layer.4": 242.9335, "encoder_q-layer.5": 206.1704, "encoder_q-layer.6": 207.1372, "encoder_q-layer.7": 178.5158, "encoder_q-layer.8": 137.6468, "encoder_q-layer.9": 94.2165, "epoch": 0.23, "inbatch_neg_score": 0.4687, "inbatch_pos_score": 1.126, "learning_rate": 4.25e-05, "loss": 3.0793, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.7791, "num_token_overlap": 18.0474, "num_token_query": 52.221, "num_token_union": 73.382, "num_word_context": 202.4675, "num_word_doc": 49.8395, "num_word_query": 39.8024, "postclip_grad_norm": 1.0, "preclip_grad_norm": 375.6216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4678, "query_norm": 1.5362, "queue_k_norm": 1.4829, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.221, "sent_len_1": 66.7791, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.425, "stdk": 0.0471, "stdq": 0.0454, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0836, "doc_norm": 1.4775, "encoder_q-embeddings": 301.3139, "encoder_q-layer.0": 193.1928, "encoder_q-layer.1": 219.7179, "encoder_q-layer.10": 80.5774, "encoder_q-layer.11": 186.4205, "encoder_q-layer.2": 276.4922, "encoder_q-layer.3": 301.9736, "encoder_q-layer.4": 289.4658, "encoder_q-layer.5": 250.3948, "encoder_q-layer.6": 269.9125, "encoder_q-layer.7": 211.0788, "encoder_q-layer.8": 146.7578, "encoder_q-layer.9": 81.3939, "epoch": 0.23, "inbatch_neg_score": 0.4386, "inbatch_pos_score": 1.084, "learning_rate": 4.2444444444444445e-05, "loss": 3.0836, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.634, "num_token_overlap": 18.0325, "num_token_query": 52.2244, "num_token_union": 73.3574, "num_word_context": 201.9576, "num_word_doc": 49.7315, "num_word_query": 39.8204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 345.8768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.438, "query_norm": 1.4984, "queue_k_norm": 1.4815, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2244, "sent_len_1": 66.634, "sent_len_max_0": 128.0, "sent_len_max_1": 188.925, "stdk": 0.0471, "stdq": 0.0449, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0731, "doc_norm": 1.4867, "encoder_q-embeddings": 1146.4254, "encoder_q-layer.0": 824.1462, "encoder_q-layer.1": 868.6561, "encoder_q-layer.10": 179.1551, "encoder_q-layer.11": 363.3029, "encoder_q-layer.2": 1008.4242, "encoder_q-layer.3": 1158.5281, "encoder_q-layer.4": 1034.8939, "encoder_q-layer.5": 902.6648, "encoder_q-layer.6": 824.6379, "encoder_q-layer.7": 544.3973, "encoder_q-layer.8": 283.1235, "encoder_q-layer.9": 171.3972, "epoch": 0.23, "inbatch_neg_score": 0.4118, "inbatch_pos_score": 1.0889, "learning_rate": 4.238888888888889e-05, "loss": 3.0731, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.9363, "num_token_overlap": 18.0498, "num_token_query": 52.2831, "num_token_union": 73.5233, "num_word_context": 202.534, "num_word_doc": 49.9181, "num_word_query": 39.8213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1205.0435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4111, "query_norm": 1.5093, "queue_k_norm": 1.4823, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2831, "sent_len_1": 66.9363, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.93, "stdk": 0.0475, "stdq": 0.0456, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.0659, "doc_norm": 1.487, "encoder_q-embeddings": 740.8259, "encoder_q-layer.0": 538.7769, "encoder_q-layer.1": 620.5818, "encoder_q-layer.10": 204.3386, "encoder_q-layer.11": 406.8948, "encoder_q-layer.2": 727.1633, "encoder_q-layer.3": 709.401, "encoder_q-layer.4": 667.3841, "encoder_q-layer.5": 500.033, "encoder_q-layer.6": 440.9147, "encoder_q-layer.7": 380.8356, "encoder_q-layer.8": 315.734, "encoder_q-layer.9": 197.0892, "epoch": 0.23, "inbatch_neg_score": 0.4183, "inbatch_pos_score": 1.1035, "learning_rate": 4.233333333333334e-05, "loss": 3.0659, "norm_diff": 0.0308, "norm_loss": 0.0, "num_token_doc": 66.8817, "num_token_overlap": 18.1503, "num_token_query": 52.5929, "num_token_union": 73.6152, "num_word_context": 202.5354, "num_word_doc": 49.9353, "num_word_query": 40.1044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 793.8455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.418, "query_norm": 1.5178, "queue_k_norm": 1.4791, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.5929, "sent_len_1": 66.8817, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4025, "stdk": 0.0476, "stdq": 0.0458, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0888, "doc_norm": 1.475, "encoder_q-embeddings": 640.7091, "encoder_q-layer.0": 465.9058, "encoder_q-layer.1": 509.1301, "encoder_q-layer.10": 162.9137, "encoder_q-layer.11": 356.3428, "encoder_q-layer.2": 590.9955, "encoder_q-layer.3": 601.702, "encoder_q-layer.4": 633.7676, "encoder_q-layer.5": 573.8953, "encoder_q-layer.6": 544.4233, "encoder_q-layer.7": 583.5178, "encoder_q-layer.8": 420.3629, "encoder_q-layer.9": 245.6542, "epoch": 0.23, "inbatch_neg_score": 0.4094, "inbatch_pos_score": 1.0703, "learning_rate": 4.227777777777778e-05, "loss": 3.0888, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.759, "num_token_overlap": 17.9642, "num_token_query": 52.1886, "num_token_union": 73.4091, "num_word_context": 201.8703, "num_word_doc": 49.755, "num_word_query": 39.7577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 757.3262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4082, "query_norm": 1.4968, "queue_k_norm": 1.4776, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1886, "sent_len_1": 66.759, "sent_len_max_0": 127.9762, "sent_len_max_1": 190.7375, "stdk": 0.0471, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.085, "doc_norm": 1.4862, "encoder_q-embeddings": 285.2789, "encoder_q-layer.0": 189.4059, "encoder_q-layer.1": 224.2461, "encoder_q-layer.10": 173.349, "encoder_q-layer.11": 376.7312, "encoder_q-layer.2": 257.5588, "encoder_q-layer.3": 270.6287, "encoder_q-layer.4": 278.6102, "encoder_q-layer.5": 243.3283, "encoder_q-layer.6": 261.4431, "encoder_q-layer.7": 239.4786, "encoder_q-layer.8": 257.2597, "encoder_q-layer.9": 212.6516, "epoch": 0.23, "inbatch_neg_score": 0.3982, "inbatch_pos_score": 1.083, "learning_rate": 4.222222222222222e-05, "loss": 3.085, "norm_diff": 0.0267, "norm_loss": 0.0, "num_token_doc": 66.6381, "num_token_overlap": 17.9844, "num_token_query": 52.1567, "num_token_union": 73.3748, "num_word_context": 202.2571, "num_word_doc": 49.7469, "num_word_query": 39.7775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 379.9806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3972, "query_norm": 1.5129, "queue_k_norm": 1.4766, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1567, "sent_len_1": 66.6381, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.93, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0877, "doc_norm": 1.4777, "encoder_q-embeddings": 408.3729, "encoder_q-layer.0": 292.3329, "encoder_q-layer.1": 318.8008, "encoder_q-layer.10": 174.7412, "encoder_q-layer.11": 424.5184, "encoder_q-layer.2": 375.0935, "encoder_q-layer.3": 390.7892, "encoder_q-layer.4": 394.8119, "encoder_q-layer.5": 344.1252, "encoder_q-layer.6": 330.7174, "encoder_q-layer.7": 250.6802, "encoder_q-layer.8": 217.2907, "encoder_q-layer.9": 164.1678, "epoch": 0.24, "inbatch_neg_score": 0.4171, "inbatch_pos_score": 1.0938, "learning_rate": 4.216666666666667e-05, "loss": 3.0877, "norm_diff": 0.0468, "norm_loss": 0.0, "num_token_doc": 66.8711, "num_token_overlap": 18.0649, "num_token_query": 52.2807, "num_token_union": 73.5147, "num_word_context": 202.4429, "num_word_doc": 49.9009, "num_word_query": 39.8381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 489.4791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4158, "query_norm": 1.5245, "queue_k_norm": 1.4767, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2807, "sent_len_1": 66.8711, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3075, "stdk": 0.0475, "stdq": 0.0463, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0818, "doc_norm": 1.4717, "encoder_q-embeddings": 2055.5454, "encoder_q-layer.0": 1448.2904, "encoder_q-layer.1": 1560.7885, "encoder_q-layer.10": 174.5951, "encoder_q-layer.11": 388.4536, "encoder_q-layer.2": 1698.6815, "encoder_q-layer.3": 1777.791, "encoder_q-layer.4": 1906.3176, "encoder_q-layer.5": 1643.5049, "encoder_q-layer.6": 1400.0726, "encoder_q-layer.7": 923.6398, "encoder_q-layer.8": 549.6808, "encoder_q-layer.9": 176.1926, "epoch": 0.24, "inbatch_neg_score": 0.4061, "inbatch_pos_score": 1.0889, "learning_rate": 4.211111111111111e-05, "loss": 3.0818, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.9599, "num_token_overlap": 18.0094, "num_token_query": 52.4083, "num_token_union": 73.7312, "num_word_context": 202.8306, "num_word_doc": 49.9761, "num_word_query": 40.006, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2102.0007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4043, "query_norm": 1.488, "queue_k_norm": 1.4736, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4083, "sent_len_1": 66.9599, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5962, "stdk": 0.0473, "stdq": 0.0456, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.0924, "doc_norm": 1.4687, "encoder_q-embeddings": 1314.6216, "encoder_q-layer.0": 1025.7914, "encoder_q-layer.1": 1086.819, "encoder_q-layer.10": 179.3866, "encoder_q-layer.11": 416.0447, "encoder_q-layer.2": 1184.623, "encoder_q-layer.3": 1285.334, "encoder_q-layer.4": 1416.8396, "encoder_q-layer.5": 1517.036, "encoder_q-layer.6": 1430.5396, "encoder_q-layer.7": 1021.864, "encoder_q-layer.8": 448.6671, "encoder_q-layer.9": 186.5857, "epoch": 0.24, "inbatch_neg_score": 0.4076, "inbatch_pos_score": 1.0547, "learning_rate": 4.205555555555556e-05, "loss": 3.0924, "norm_diff": 0.023, "norm_loss": 0.0, "num_token_doc": 66.5806, "num_token_overlap": 18.0012, "num_token_query": 52.4389, "num_token_union": 73.4633, "num_word_context": 202.4146, "num_word_doc": 49.7213, "num_word_query": 40.0034, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1571.2348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4077, "query_norm": 1.4867, "queue_k_norm": 1.4706, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4389, "sent_len_1": 66.5806, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2375, "stdk": 0.0473, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0674, "doc_norm": 1.4706, "encoder_q-embeddings": 696.1378, "encoder_q-layer.0": 470.7051, "encoder_q-layer.1": 532.0753, "encoder_q-layer.10": 196.4542, "encoder_q-layer.11": 394.8355, "encoder_q-layer.2": 603.7975, "encoder_q-layer.3": 640.3605, "encoder_q-layer.4": 586.3193, "encoder_q-layer.5": 477.6132, "encoder_q-layer.6": 486.5731, "encoder_q-layer.7": 418.6686, "encoder_q-layer.8": 257.5147, "encoder_q-layer.9": 171.852, "epoch": 0.24, "inbatch_neg_score": 0.399, "inbatch_pos_score": 1.0654, "learning_rate": 4.2e-05, "loss": 3.0674, "norm_diff": 0.0205, "norm_loss": 0.0, "num_token_doc": 66.7956, "num_token_overlap": 18.0358, "num_token_query": 52.0395, "num_token_union": 73.3442, "num_word_context": 202.0011, "num_word_doc": 49.8534, "num_word_query": 39.6398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 744.4092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3982, "query_norm": 1.4911, "queue_k_norm": 1.4715, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0395, "sent_len_1": 66.7956, "sent_len_max_0": 127.9963, "sent_len_max_1": 186.785, "stdk": 0.0474, "stdq": 0.0456, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0927, "doc_norm": 1.4654, "encoder_q-embeddings": 1934.062, "encoder_q-layer.0": 1374.6498, "encoder_q-layer.1": 1427.9517, "encoder_q-layer.10": 178.3819, "encoder_q-layer.11": 403.8871, "encoder_q-layer.2": 1391.6914, "encoder_q-layer.3": 1324.5753, "encoder_q-layer.4": 1235.6305, "encoder_q-layer.5": 1190.2307, "encoder_q-layer.6": 1105.288, "encoder_q-layer.7": 886.2276, "encoder_q-layer.8": 438.035, "encoder_q-layer.9": 173.6656, "epoch": 0.24, "inbatch_neg_score": 0.3929, "inbatch_pos_score": 1.0576, "learning_rate": 4.194444444444445e-05, "loss": 3.0927, "norm_diff": 0.0256, "norm_loss": 0.0, "num_token_doc": 66.6721, "num_token_overlap": 17.976, "num_token_query": 52.2103, "num_token_union": 73.3861, "num_word_context": 202.054, "num_word_doc": 49.7331, "num_word_query": 39.7981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1765.066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 1.491, "queue_k_norm": 1.4685, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2103, "sent_len_1": 66.6721, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.52, "stdk": 0.0472, "stdq": 0.0452, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0833, "doc_norm": 1.4679, "encoder_q-embeddings": 192.8746, "encoder_q-layer.0": 137.8992, "encoder_q-layer.1": 143.0804, "encoder_q-layer.10": 167.7784, "encoder_q-layer.11": 391.9118, "encoder_q-layer.2": 147.6704, "encoder_q-layer.3": 150.8228, "encoder_q-layer.4": 162.4998, "encoder_q-layer.5": 154.5892, "encoder_q-layer.6": 171.5388, "encoder_q-layer.7": 173.4062, "encoder_q-layer.8": 178.9436, "encoder_q-layer.9": 167.2029, "epoch": 0.24, "inbatch_neg_score": 0.4079, "inbatch_pos_score": 1.0752, "learning_rate": 4.188888888888889e-05, "loss": 3.0833, "norm_diff": 0.0609, "norm_loss": 0.0, "num_token_doc": 66.9015, "num_token_overlap": 18.0602, "num_token_query": 52.1545, "num_token_union": 73.4431, "num_word_context": 202.2878, "num_word_doc": 49.9002, "num_word_query": 39.7503, "postclip_grad_norm": 1.0, "preclip_grad_norm": 288.6322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4062, "query_norm": 1.5288, "queue_k_norm": 1.4689, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1545, "sent_len_1": 66.9015, "sent_len_max_0": 127.99, "sent_len_max_1": 190.585, "stdk": 0.0474, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0769, "doc_norm": 1.4663, "encoder_q-embeddings": 594.2791, "encoder_q-layer.0": 412.6227, "encoder_q-layer.1": 357.5323, "encoder_q-layer.10": 176.5187, "encoder_q-layer.11": 368.8151, "encoder_q-layer.2": 324.4788, "encoder_q-layer.3": 324.3996, "encoder_q-layer.4": 291.1202, "encoder_q-layer.5": 265.0558, "encoder_q-layer.6": 276.4675, "encoder_q-layer.7": 277.6774, "encoder_q-layer.8": 212.2176, "encoder_q-layer.9": 166.0934, "epoch": 0.24, "inbatch_neg_score": 0.3976, "inbatch_pos_score": 1.0684, "learning_rate": 4.183333333333334e-05, "loss": 3.0769, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.8445, "num_token_overlap": 17.9972, "num_token_query": 52.2402, "num_token_union": 73.5046, "num_word_context": 202.3488, "num_word_doc": 49.8806, "num_word_query": 39.8305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 522.6508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3967, "query_norm": 1.5036, "queue_k_norm": 1.4678, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2402, "sent_len_1": 66.8445, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.21, "stdk": 0.0473, "stdq": 0.0453, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0873, "doc_norm": 1.4689, "encoder_q-embeddings": 705.9901, "encoder_q-layer.0": 482.597, "encoder_q-layer.1": 558.2458, "encoder_q-layer.10": 172.1736, "encoder_q-layer.11": 373.651, "encoder_q-layer.2": 676.6336, "encoder_q-layer.3": 693.0427, "encoder_q-layer.4": 745.3134, "encoder_q-layer.5": 723.0857, "encoder_q-layer.6": 421.4293, "encoder_q-layer.7": 281.0582, "encoder_q-layer.8": 244.6503, "encoder_q-layer.9": 171.1113, "epoch": 0.24, "inbatch_neg_score": 0.4065, "inbatch_pos_score": 1.0781, "learning_rate": 4.177777777777778e-05, "loss": 3.0873, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.6022, "num_token_overlap": 17.9153, "num_token_query": 52.1846, "num_token_union": 73.3867, "num_word_context": 202.1727, "num_word_doc": 49.6873, "num_word_query": 39.7646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 784.4528, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4053, "query_norm": 1.5125, "queue_k_norm": 1.4671, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1846, "sent_len_1": 66.6022, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6587, "stdk": 0.0475, "stdq": 0.0455, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0681, "doc_norm": 1.4622, "encoder_q-embeddings": 6165.3936, "encoder_q-layer.0": 4655.833, "encoder_q-layer.1": 5591.3926, "encoder_q-layer.10": 211.3628, "encoder_q-layer.11": 380.9084, "encoder_q-layer.2": 6388.1821, "encoder_q-layer.3": 7198.2085, "encoder_q-layer.4": 8212.3516, "encoder_q-layer.5": 7357.1001, "encoder_q-layer.6": 6938.3018, "encoder_q-layer.7": 4461.2314, "encoder_q-layer.8": 2418.0706, "encoder_q-layer.9": 723.0977, "epoch": 0.24, "inbatch_neg_score": 0.427, "inbatch_pos_score": 1.1387, "learning_rate": 4.172222222222222e-05, "loss": 3.0681, "norm_diff": 0.0932, "norm_loss": 0.0, "num_token_doc": 66.9183, "num_token_overlap": 18.0811, "num_token_query": 52.2449, "num_token_union": 73.5043, "num_word_context": 202.6035, "num_word_doc": 49.9111, "num_word_query": 39.8381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7986.2123, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4265, "query_norm": 1.5555, "queue_k_norm": 1.4652, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2449, "sent_len_1": 66.9183, "sent_len_max_0": 128.0, "sent_len_max_1": 191.175, "stdk": 0.0472, "stdq": 0.047, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0782, "doc_norm": 1.4659, "encoder_q-embeddings": 867.9759, "encoder_q-layer.0": 628.0694, "encoder_q-layer.1": 764.8251, "encoder_q-layer.10": 177.465, "encoder_q-layer.11": 393.8936, "encoder_q-layer.2": 682.5244, "encoder_q-layer.3": 680.7083, "encoder_q-layer.4": 665.7143, "encoder_q-layer.5": 562.5139, "encoder_q-layer.6": 553.4585, "encoder_q-layer.7": 495.874, "encoder_q-layer.8": 411.62, "encoder_q-layer.9": 234.3676, "epoch": 0.24, "inbatch_neg_score": 0.4161, "inbatch_pos_score": 1.1016, "learning_rate": 4.166666666666667e-05, "loss": 3.0782, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 66.662, "num_token_overlap": 17.9605, "num_token_query": 52.0463, "num_token_union": 73.2952, "num_word_context": 202.3447, "num_word_doc": 49.7376, "num_word_query": 39.6853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 881.542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4148, "query_norm": 1.5438, "queue_k_norm": 1.4673, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0463, "sent_len_1": 66.662, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.6012, "stdk": 0.0473, "stdq": 0.0466, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0444, "doc_norm": 1.4682, "encoder_q-embeddings": 834.2312, "encoder_q-layer.0": 601.6187, "encoder_q-layer.1": 665.81, "encoder_q-layer.10": 171.4153, "encoder_q-layer.11": 387.3831, "encoder_q-layer.2": 898.5089, "encoder_q-layer.3": 841.7404, "encoder_q-layer.4": 724.9435, "encoder_q-layer.5": 500.6773, "encoder_q-layer.6": 380.7509, "encoder_q-layer.7": 292.2675, "encoder_q-layer.8": 263.0122, "encoder_q-layer.9": 198.5851, "epoch": 0.25, "inbatch_neg_score": 0.4321, "inbatch_pos_score": 1.1357, "learning_rate": 4.1611111111111114e-05, "loss": 3.0444, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8678, "num_token_overlap": 18.012, "num_token_query": 52.2965, "num_token_union": 73.589, "num_word_context": 202.4722, "num_word_doc": 49.8933, "num_word_query": 39.8766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 885.1489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4304, "query_norm": 1.5607, "queue_k_norm": 1.4665, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2965, "sent_len_1": 66.8678, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.4825, "stdk": 0.0474, "stdq": 0.0471, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.0638, "doc_norm": 1.4647, "encoder_q-embeddings": 271.5754, "encoder_q-layer.0": 226.7864, "encoder_q-layer.1": 219.3616, "encoder_q-layer.10": 169.2865, "encoder_q-layer.11": 388.7791, "encoder_q-layer.2": 230.897, "encoder_q-layer.3": 239.7117, "encoder_q-layer.4": 239.8885, "encoder_q-layer.5": 221.572, "encoder_q-layer.6": 213.4257, "encoder_q-layer.7": 225.862, "encoder_q-layer.8": 234.169, "encoder_q-layer.9": 180.972, "epoch": 0.25, "inbatch_neg_score": 0.4257, "inbatch_pos_score": 1.1299, "learning_rate": 4.155555555555556e-05, "loss": 3.0638, "norm_diff": 0.081, "norm_loss": 0.0, "num_token_doc": 66.7986, "num_token_overlap": 18.0252, "num_token_query": 52.2407, "num_token_union": 73.4626, "num_word_context": 202.4123, "num_word_doc": 49.8318, "num_word_query": 39.8068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 366.4935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4253, "query_norm": 1.5457, "queue_k_norm": 1.4669, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2407, "sent_len_1": 66.7986, "sent_len_max_0": 127.995, "sent_len_max_1": 190.55, "stdk": 0.0473, "stdq": 0.0466, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0734, "doc_norm": 1.4682, "encoder_q-embeddings": 449.1355, "encoder_q-layer.0": 332.8273, "encoder_q-layer.1": 385.0852, "encoder_q-layer.10": 168.2255, "encoder_q-layer.11": 384.1333, "encoder_q-layer.2": 475.9796, "encoder_q-layer.3": 476.6636, "encoder_q-layer.4": 467.4928, "encoder_q-layer.5": 395.4899, "encoder_q-layer.6": 327.605, "encoder_q-layer.7": 281.059, "encoder_q-layer.8": 252.2096, "encoder_q-layer.9": 181.0627, "epoch": 0.25, "inbatch_neg_score": 0.4217, "inbatch_pos_score": 1.0889, "learning_rate": 4.15e-05, "loss": 3.0734, "norm_diff": 0.0467, "norm_loss": 0.0, "num_token_doc": 66.8472, "num_token_overlap": 17.9826, "num_token_query": 52.2576, "num_token_union": 73.546, "num_word_context": 202.5129, "num_word_doc": 49.8849, "num_word_query": 39.8426, "postclip_grad_norm": 1.0, "preclip_grad_norm": 548.0946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4207, "query_norm": 1.5149, "queue_k_norm": 1.4691, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2576, "sent_len_1": 66.8472, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.0762, "stdk": 0.0474, "stdq": 0.046, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0678, "doc_norm": 1.4686, "encoder_q-embeddings": 1088.4644, "encoder_q-layer.0": 769.6172, "encoder_q-layer.1": 1028.9476, "encoder_q-layer.10": 180.6922, "encoder_q-layer.11": 375.3108, "encoder_q-layer.2": 1121.1764, "encoder_q-layer.3": 1133.1962, "encoder_q-layer.4": 1135.3816, "encoder_q-layer.5": 994.632, "encoder_q-layer.6": 702.5886, "encoder_q-layer.7": 517.8927, "encoder_q-layer.8": 379.9826, "encoder_q-layer.9": 216.2842, "epoch": 0.25, "inbatch_neg_score": 0.4151, "inbatch_pos_score": 1.0869, "learning_rate": 4.144444444444445e-05, "loss": 3.0678, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.7657, "num_token_overlap": 18.0191, "num_token_query": 52.3374, "num_token_union": 73.4532, "num_word_context": 202.5418, "num_word_doc": 49.7637, "num_word_query": 39.8782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1218.6897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4143, "query_norm": 1.5078, "queue_k_norm": 1.4694, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3374, "sent_len_1": 66.7657, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5662, "stdk": 0.0475, "stdq": 0.0459, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0593, "doc_norm": 1.4729, "encoder_q-embeddings": 841.868, "encoder_q-layer.0": 573.3835, "encoder_q-layer.1": 675.0479, "encoder_q-layer.10": 162.4917, "encoder_q-layer.11": 355.1531, "encoder_q-layer.2": 799.2076, "encoder_q-layer.3": 835.2402, "encoder_q-layer.4": 858.4279, "encoder_q-layer.5": 820.1008, "encoder_q-layer.6": 602.8568, "encoder_q-layer.7": 419.9673, "encoder_q-layer.8": 350.5243, "encoder_q-layer.9": 213.1662, "epoch": 0.25, "inbatch_neg_score": 0.388, "inbatch_pos_score": 1.0566, "learning_rate": 4.138888888888889e-05, "loss": 3.0593, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.7284, "num_token_overlap": 17.9959, "num_token_query": 52.2643, "num_token_union": 73.477, "num_word_context": 202.0198, "num_word_doc": 49.7345, "num_word_query": 39.8358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 949.7284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3875, "query_norm": 1.4868, "queue_k_norm": 1.4678, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2643, "sent_len_1": 66.7284, "sent_len_max_0": 128.0, "sent_len_max_1": 189.175, "stdk": 0.0476, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.0543, "doc_norm": 1.4706, "encoder_q-embeddings": 8390.2373, "encoder_q-layer.0": 6692.3403, "encoder_q-layer.1": 7102.4072, "encoder_q-layer.10": 391.2895, "encoder_q-layer.11": 432.7748, "encoder_q-layer.2": 7865.0132, "encoder_q-layer.3": 8340.7354, "encoder_q-layer.4": 9257.2793, "encoder_q-layer.5": 9266.3359, "encoder_q-layer.6": 9156.2432, "encoder_q-layer.7": 9467.4941, "encoder_q-layer.8": 9295.7051, "encoder_q-layer.9": 3827.9844, "epoch": 0.25, "inbatch_neg_score": 0.3971, "inbatch_pos_score": 1.0635, "learning_rate": 4.133333333333333e-05, "loss": 3.0543, "norm_diff": 0.046, "norm_loss": 0.0, "num_token_doc": 66.8244, "num_token_overlap": 18.0085, "num_token_query": 52.1695, "num_token_union": 73.484, "num_word_context": 202.0941, "num_word_doc": 49.8871, "num_word_query": 39.7565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11890.0257, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3992, "query_norm": 1.5167, "queue_k_norm": 1.4657, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1695, "sent_len_1": 66.8244, "sent_len_max_0": 128.0, "sent_len_max_1": 187.2012, "stdk": 0.0476, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0753, "doc_norm": 1.4668, "encoder_q-embeddings": 1344.3036, "encoder_q-layer.0": 949.047, "encoder_q-layer.1": 1158.1729, "encoder_q-layer.10": 391.5417, "encoder_q-layer.11": 830.6161, "encoder_q-layer.2": 1339.7961, "encoder_q-layer.3": 1382.657, "encoder_q-layer.4": 1173.4813, "encoder_q-layer.5": 985.6564, "encoder_q-layer.6": 854.9368, "encoder_q-layer.7": 645.0363, "encoder_q-layer.8": 539.5333, "encoder_q-layer.9": 375.6522, "epoch": 0.25, "inbatch_neg_score": 0.4024, "inbatch_pos_score": 1.0703, "learning_rate": 4.127777777777778e-05, "loss": 3.0753, "norm_diff": 0.0185, "norm_loss": 0.0, "num_token_doc": 66.7405, "num_token_overlap": 17.9912, "num_token_query": 52.1712, "num_token_union": 73.4072, "num_word_context": 202.1275, "num_word_doc": 49.8092, "num_word_query": 39.7674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1481.9181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4028, "query_norm": 1.4818, "queue_k_norm": 1.4667, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1712, "sent_len_1": 66.7405, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.4762, "stdk": 0.0475, "stdq": 0.0452, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0577, "doc_norm": 1.4668, "encoder_q-embeddings": 749.7272, "encoder_q-layer.0": 533.287, "encoder_q-layer.1": 550.3793, "encoder_q-layer.10": 331.3935, "encoder_q-layer.11": 768.1472, "encoder_q-layer.2": 600.5662, "encoder_q-layer.3": 593.5965, "encoder_q-layer.4": 551.5179, "encoder_q-layer.5": 505.7109, "encoder_q-layer.6": 475.9231, "encoder_q-layer.7": 415.3048, "encoder_q-layer.8": 384.6439, "encoder_q-layer.9": 319.1674, "epoch": 0.25, "inbatch_neg_score": 0.3947, "inbatch_pos_score": 1.0762, "learning_rate": 4.1222222222222224e-05, "loss": 3.0577, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.8597, "num_token_overlap": 17.9642, "num_token_query": 52.0935, "num_token_union": 73.4747, "num_word_context": 202.4894, "num_word_doc": 49.8607, "num_word_query": 39.6672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 829.9698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3958, "query_norm": 1.491, "queue_k_norm": 1.4688, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0935, "sent_len_1": 66.8597, "sent_len_max_0": 127.99, "sent_len_max_1": 190.7812, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.0821, "doc_norm": 1.4581, "encoder_q-embeddings": 1359.9192, "encoder_q-layer.0": 977.4598, "encoder_q-layer.1": 1141.1365, "encoder_q-layer.10": 396.9026, "encoder_q-layer.11": 807.0627, "encoder_q-layer.2": 1348.0012, "encoder_q-layer.3": 1311.5212, "encoder_q-layer.4": 1406.7039, "encoder_q-layer.5": 1299.6193, "encoder_q-layer.6": 1206.4966, "encoder_q-layer.7": 886.2805, "encoder_q-layer.8": 606.9023, "encoder_q-layer.9": 371.2593, "epoch": 0.25, "inbatch_neg_score": 0.397, "inbatch_pos_score": 1.0527, "learning_rate": 4.116666666666667e-05, "loss": 3.0821, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.7636, "num_token_overlap": 17.9897, "num_token_query": 52.2331, "num_token_union": 73.4844, "num_word_context": 202.2094, "num_word_doc": 49.791, "num_word_query": 39.8327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1602.708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.395, "query_norm": 1.48, "queue_k_norm": 1.4651, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2331, "sent_len_1": 66.7636, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.375, "stdk": 0.0471, "stdq": 0.0458, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0528, "doc_norm": 1.465, "encoder_q-embeddings": 2103.375, "encoder_q-layer.0": 1565.1636, "encoder_q-layer.1": 1527.9465, "encoder_q-layer.10": 382.6788, "encoder_q-layer.11": 802.6389, "encoder_q-layer.2": 1587.5245, "encoder_q-layer.3": 1519.5872, "encoder_q-layer.4": 1579.1565, "encoder_q-layer.5": 1492.1007, "encoder_q-layer.6": 1559.7728, "encoder_q-layer.7": 1158.79, "encoder_q-layer.8": 537.6651, "encoder_q-layer.9": 338.4312, "epoch": 0.25, "inbatch_neg_score": 0.3972, "inbatch_pos_score": 1.0889, "learning_rate": 4.111111111111111e-05, "loss": 3.0528, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.7869, "num_token_overlap": 18.0078, "num_token_query": 52.5025, "num_token_union": 73.6133, "num_word_context": 202.5537, "num_word_doc": 49.8466, "num_word_query": 40.0304, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2075.5266, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3982, "query_norm": 1.5134, "queue_k_norm": 1.4664, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.5025, "sent_len_1": 66.7869, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4538, "stdk": 0.0474, "stdq": 0.0471, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0538, "doc_norm": 1.4664, "encoder_q-embeddings": 1610.3556, "encoder_q-layer.0": 1105.6865, "encoder_q-layer.1": 1382.8052, "encoder_q-layer.10": 169.3681, "encoder_q-layer.11": 390.5828, "encoder_q-layer.2": 1430.6124, "encoder_q-layer.3": 1475.9292, "encoder_q-layer.4": 1382.1367, "encoder_q-layer.5": 1083.0139, "encoder_q-layer.6": 1061.8467, "encoder_q-layer.7": 842.6334, "encoder_q-layer.8": 379.4025, "encoder_q-layer.9": 176.3412, "epoch": 0.25, "inbatch_neg_score": 0.3942, "inbatch_pos_score": 1.083, "learning_rate": 4.105555555555556e-05, "loss": 3.0538, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.7325, "num_token_overlap": 17.9983, "num_token_query": 52.2131, "num_token_union": 73.4211, "num_word_context": 202.1867, "num_word_doc": 49.7751, "num_word_query": 39.7795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1656.7123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3945, "query_norm": 1.4807, "queue_k_norm": 1.4664, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2131, "sent_len_1": 66.7325, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3225, "stdk": 0.0475, "stdq": 0.0458, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.069, "doc_norm": 1.4667, "encoder_q-embeddings": 560.0891, "encoder_q-layer.0": 419.7954, "encoder_q-layer.1": 442.9595, "encoder_q-layer.10": 187.6578, "encoder_q-layer.11": 380.5067, "encoder_q-layer.2": 544.9372, "encoder_q-layer.3": 495.5977, "encoder_q-layer.4": 483.2343, "encoder_q-layer.5": 442.5167, "encoder_q-layer.6": 335.7596, "encoder_q-layer.7": 291.1603, "encoder_q-layer.8": 260.6735, "encoder_q-layer.9": 178.6025, "epoch": 0.26, "inbatch_neg_score": 0.3872, "inbatch_pos_score": 1.0625, "learning_rate": 4.1e-05, "loss": 3.069, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.8041, "num_token_overlap": 18.0365, "num_token_query": 52.201, "num_token_union": 73.4245, "num_word_context": 202.1352, "num_word_doc": 49.8238, "num_word_query": 39.8083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 616.2379, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3862, "query_norm": 1.4903, "queue_k_norm": 1.4659, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.201, "sent_len_1": 66.8041, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.6687, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0663, "doc_norm": 1.4645, "encoder_q-embeddings": 233.9873, "encoder_q-layer.0": 154.0415, "encoder_q-layer.1": 173.8561, "encoder_q-layer.10": 176.7568, "encoder_q-layer.11": 396.5861, "encoder_q-layer.2": 193.1752, "encoder_q-layer.3": 199.7485, "encoder_q-layer.4": 211.8548, "encoder_q-layer.5": 200.4376, "encoder_q-layer.6": 210.642, "encoder_q-layer.7": 241.4871, "encoder_q-layer.8": 217.0682, "encoder_q-layer.9": 171.0254, "epoch": 0.26, "inbatch_neg_score": 0.3977, "inbatch_pos_score": 1.0615, "learning_rate": 4.094444444444445e-05, "loss": 3.0663, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.925, "num_token_overlap": 18.0473, "num_token_query": 52.335, "num_token_union": 73.5618, "num_word_context": 202.7373, "num_word_doc": 49.9172, "num_word_query": 39.9159, "postclip_grad_norm": 1.0, "preclip_grad_norm": 333.626, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3979, "query_norm": 1.4785, "queue_k_norm": 1.4628, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.335, "sent_len_1": 66.925, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2475, "stdk": 0.0475, "stdq": 0.045, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0618, "doc_norm": 1.4566, "encoder_q-embeddings": 257.1383, "encoder_q-layer.0": 174.6609, "encoder_q-layer.1": 187.3616, "encoder_q-layer.10": 178.5017, "encoder_q-layer.11": 367.291, "encoder_q-layer.2": 204.7046, "encoder_q-layer.3": 210.7661, "encoder_q-layer.4": 212.9462, "encoder_q-layer.5": 212.6463, "encoder_q-layer.6": 222.5403, "encoder_q-layer.7": 197.8082, "encoder_q-layer.8": 200.235, "encoder_q-layer.9": 168.43, "epoch": 0.26, "inbatch_neg_score": 0.3999, "inbatch_pos_score": 1.0713, "learning_rate": 4.088888888888889e-05, "loss": 3.0618, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.8363, "num_token_overlap": 18.0407, "num_token_query": 52.2576, "num_token_union": 73.4596, "num_word_context": 202.3568, "num_word_doc": 49.8862, "num_word_query": 39.8602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 328.0386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3994, "query_norm": 1.5078, "queue_k_norm": 1.4654, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2576, "sent_len_1": 66.8363, "sent_len_max_0": 127.9838, "sent_len_max_1": 191.0437, "stdk": 0.0472, "stdq": 0.0459, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0604, "doc_norm": 1.4617, "encoder_q-embeddings": 411.4898, "encoder_q-layer.0": 300.4529, "encoder_q-layer.1": 318.6606, "encoder_q-layer.10": 217.8308, "encoder_q-layer.11": 411.1024, "encoder_q-layer.2": 396.9825, "encoder_q-layer.3": 421.1217, "encoder_q-layer.4": 367.3596, "encoder_q-layer.5": 298.4291, "encoder_q-layer.6": 323.5248, "encoder_q-layer.7": 243.5538, "encoder_q-layer.8": 260.7877, "encoder_q-layer.9": 217.0623, "epoch": 0.26, "inbatch_neg_score": 0.3915, "inbatch_pos_score": 1.0723, "learning_rate": 4.0833333333333334e-05, "loss": 3.0604, "norm_diff": 0.0718, "norm_loss": 0.0, "num_token_doc": 66.5184, "num_token_overlap": 17.9887, "num_token_query": 52.2719, "num_token_union": 73.3811, "num_word_context": 202.0292, "num_word_doc": 49.6695, "num_word_query": 39.8673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 488.6725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3918, "query_norm": 1.5335, "queue_k_norm": 1.4661, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2719, "sent_len_1": 66.5184, "sent_len_max_0": 127.9988, "sent_len_max_1": 186.5588, "stdk": 0.0474, "stdq": 0.0474, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0803, "doc_norm": 1.4678, "encoder_q-embeddings": 1070.1559, "encoder_q-layer.0": 783.8254, "encoder_q-layer.1": 940.4069, "encoder_q-layer.10": 167.1505, "encoder_q-layer.11": 382.0571, "encoder_q-layer.2": 846.6689, "encoder_q-layer.3": 891.6753, "encoder_q-layer.4": 845.5353, "encoder_q-layer.5": 832.7848, "encoder_q-layer.6": 665.4041, "encoder_q-layer.7": 590.9902, "encoder_q-layer.8": 470.6806, "encoder_q-layer.9": 272.353, "epoch": 0.26, "inbatch_neg_score": 0.412, "inbatch_pos_score": 1.1006, "learning_rate": 4.0777777777777783e-05, "loss": 3.0803, "norm_diff": 0.0402, "norm_loss": 0.0, "num_token_doc": 66.651, "num_token_overlap": 17.8976, "num_token_query": 52.1116, "num_token_union": 73.4647, "num_word_context": 202.0252, "num_word_doc": 49.7998, "num_word_query": 39.7267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1104.4066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4111, "query_norm": 1.508, "queue_k_norm": 1.4652, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1116, "sent_len_1": 66.651, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8113, "stdk": 0.0476, "stdq": 0.0465, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0635, "doc_norm": 1.4658, "encoder_q-embeddings": 802.5167, "encoder_q-layer.0": 588.6282, "encoder_q-layer.1": 629.202, "encoder_q-layer.10": 166.5321, "encoder_q-layer.11": 381.1711, "encoder_q-layer.2": 747.716, "encoder_q-layer.3": 768.3688, "encoder_q-layer.4": 815.0511, "encoder_q-layer.5": 764.5928, "encoder_q-layer.6": 763.9109, "encoder_q-layer.7": 697.9861, "encoder_q-layer.8": 606.3805, "encoder_q-layer.9": 310.8983, "epoch": 0.26, "inbatch_neg_score": 0.4093, "inbatch_pos_score": 1.0762, "learning_rate": 4.0722222222222226e-05, "loss": 3.0635, "norm_diff": 0.0104, "norm_loss": 0.0, "num_token_doc": 66.9217, "num_token_overlap": 18.0466, "num_token_query": 52.2117, "num_token_union": 73.4718, "num_word_context": 202.2426, "num_word_doc": 49.9329, "num_word_query": 39.7859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 970.4833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4077, "query_norm": 1.4703, "queue_k_norm": 1.4656, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2117, "sent_len_1": 66.9217, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9263, "stdk": 0.0475, "stdq": 0.0451, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.0707, "doc_norm": 1.4667, "encoder_q-embeddings": 392.3487, "encoder_q-layer.0": 271.0556, "encoder_q-layer.1": 284.6894, "encoder_q-layer.10": 184.1104, "encoder_q-layer.11": 410.9031, "encoder_q-layer.2": 298.0558, "encoder_q-layer.3": 309.9895, "encoder_q-layer.4": 298.445, "encoder_q-layer.5": 270.2209, "encoder_q-layer.6": 281.0641, "encoder_q-layer.7": 254.9577, "encoder_q-layer.8": 226.0159, "encoder_q-layer.9": 174.4966, "epoch": 0.26, "inbatch_neg_score": 0.4135, "inbatch_pos_score": 1.0811, "learning_rate": 4.066666666666667e-05, "loss": 3.0707, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.6501, "num_token_overlap": 18.0094, "num_token_query": 52.2376, "num_token_union": 73.371, "num_word_context": 202.0495, "num_word_doc": 49.7531, "num_word_query": 39.8354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 439.8637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4128, "query_norm": 1.5066, "queue_k_norm": 1.462, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2376, "sent_len_1": 66.6501, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9988, "stdk": 0.0475, "stdq": 0.0465, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0419, "doc_norm": 1.4683, "encoder_q-embeddings": 2201.1208, "encoder_q-layer.0": 1661.527, "encoder_q-layer.1": 1953.9141, "encoder_q-layer.10": 182.8933, "encoder_q-layer.11": 375.7851, "encoder_q-layer.2": 2571.0786, "encoder_q-layer.3": 3385.6528, "encoder_q-layer.4": 4404.4106, "encoder_q-layer.5": 3810.1721, "encoder_q-layer.6": 2094.949, "encoder_q-layer.7": 929.9192, "encoder_q-layer.8": 654.4744, "encoder_q-layer.9": 345.6623, "epoch": 0.26, "inbatch_neg_score": 0.397, "inbatch_pos_score": 1.0801, "learning_rate": 4.061111111111111e-05, "loss": 3.0419, "norm_diff": 0.0371, "norm_loss": 0.0, "num_token_doc": 67.0837, "num_token_overlap": 18.0774, "num_token_query": 52.4224, "num_token_union": 73.706, "num_word_context": 202.7556, "num_word_doc": 50.0689, "num_word_query": 39.9519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3395.6329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3953, "query_norm": 1.5054, "queue_k_norm": 1.4645, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4224, "sent_len_1": 67.0837, "sent_len_max_0": 128.0, "sent_len_max_1": 190.795, "stdk": 0.0476, "stdq": 0.0462, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0521, "doc_norm": 1.4702, "encoder_q-embeddings": 1441.8523, "encoder_q-layer.0": 1085.9589, "encoder_q-layer.1": 1087.7706, "encoder_q-layer.10": 206.1278, "encoder_q-layer.11": 379.8888, "encoder_q-layer.2": 1174.1666, "encoder_q-layer.3": 1118.9928, "encoder_q-layer.4": 1036.3339, "encoder_q-layer.5": 898.9897, "encoder_q-layer.6": 1001.3033, "encoder_q-layer.7": 810.6219, "encoder_q-layer.8": 583.1591, "encoder_q-layer.9": 286.3038, "epoch": 0.26, "inbatch_neg_score": 0.3936, "inbatch_pos_score": 1.0752, "learning_rate": 4.055555555555556e-05, "loss": 3.0521, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.7807, "num_token_overlap": 18.0359, "num_token_query": 52.2394, "num_token_union": 73.4392, "num_word_context": 202.347, "num_word_doc": 49.8204, "num_word_query": 39.8213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1441.083, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.5175, "queue_k_norm": 1.463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2394, "sent_len_1": 66.7807, "sent_len_max_0": 128.0, "sent_len_max_1": 188.97, "stdk": 0.0477, "stdq": 0.0465, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0677, "doc_norm": 1.4645, "encoder_q-embeddings": 317.0941, "encoder_q-layer.0": 215.5443, "encoder_q-layer.1": 240.242, "encoder_q-layer.10": 154.6544, "encoder_q-layer.11": 354.738, "encoder_q-layer.2": 266.9173, "encoder_q-layer.3": 283.6789, "encoder_q-layer.4": 276.8748, "encoder_q-layer.5": 274.0438, "encoder_q-layer.6": 288.513, "encoder_q-layer.7": 235.3443, "encoder_q-layer.8": 214.1201, "encoder_q-layer.9": 163.2818, "epoch": 0.26, "inbatch_neg_score": 0.4008, "inbatch_pos_score": 1.0801, "learning_rate": 4.05e-05, "loss": 3.0677, "norm_diff": 0.049, "norm_loss": 0.0, "num_token_doc": 66.6694, "num_token_overlap": 18.0013, "num_token_query": 52.3013, "num_token_union": 73.4509, "num_word_context": 202.4094, "num_word_doc": 49.7507, "num_word_query": 39.9092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 385.9363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3997, "query_norm": 1.5134, "queue_k_norm": 1.4627, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3013, "sent_len_1": 66.6694, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5175, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0664, "doc_norm": 1.4552, "encoder_q-embeddings": 4809.6284, "encoder_q-layer.0": 4171.4004, "encoder_q-layer.1": 5016.3462, "encoder_q-layer.10": 246.5916, "encoder_q-layer.11": 374.5891, "encoder_q-layer.2": 5338.6245, "encoder_q-layer.3": 5363.0171, "encoder_q-layer.4": 5850.7241, "encoder_q-layer.5": 5157.7681, "encoder_q-layer.6": 6696.0278, "encoder_q-layer.7": 7946.2578, "encoder_q-layer.8": 8233.083, "encoder_q-layer.9": 3102.4446, "epoch": 0.27, "inbatch_neg_score": 0.418, "inbatch_pos_score": 1.0693, "learning_rate": 4.0444444444444444e-05, "loss": 3.0664, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.8261, "num_token_overlap": 17.9346, "num_token_query": 52.1517, "num_token_union": 73.5034, "num_word_context": 202.2025, "num_word_doc": 49.8173, "num_word_query": 39.7308, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8245.1277, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4163, "query_norm": 1.4965, "queue_k_norm": 1.4633, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1517, "sent_len_1": 66.8261, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2688, "stdk": 0.0471, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0585, "doc_norm": 1.4634, "encoder_q-embeddings": 719.2939, "encoder_q-layer.0": 547.1489, "encoder_q-layer.1": 595.2042, "encoder_q-layer.10": 182.9549, "encoder_q-layer.11": 363.5573, "encoder_q-layer.2": 666.5976, "encoder_q-layer.3": 619.7446, "encoder_q-layer.4": 581.4655, "encoder_q-layer.5": 490.9277, "encoder_q-layer.6": 484.727, "encoder_q-layer.7": 419.4321, "encoder_q-layer.8": 286.5667, "encoder_q-layer.9": 161.4921, "epoch": 0.27, "inbatch_neg_score": 0.4042, "inbatch_pos_score": 1.083, "learning_rate": 4.038888888888889e-05, "loss": 3.0585, "norm_diff": 0.0642, "norm_loss": 0.0, "num_token_doc": 66.6293, "num_token_overlap": 18.012, "num_token_query": 52.1494, "num_token_union": 73.3313, "num_word_context": 202.253, "num_word_doc": 49.7047, "num_word_query": 39.7672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 769.0203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4041, "query_norm": 1.5276, "queue_k_norm": 1.4625, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1494, "sent_len_1": 66.6293, "sent_len_max_0": 127.99, "sent_len_max_1": 189.475, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0645, "doc_norm": 1.4662, "encoder_q-embeddings": 891.6485, "encoder_q-layer.0": 634.4525, "encoder_q-layer.1": 725.0401, "encoder_q-layer.10": 164.768, "encoder_q-layer.11": 372.9057, "encoder_q-layer.2": 860.4438, "encoder_q-layer.3": 944.1427, "encoder_q-layer.4": 767.8719, "encoder_q-layer.5": 647.9073, "encoder_q-layer.6": 482.4154, "encoder_q-layer.7": 308.1, "encoder_q-layer.8": 223.2023, "encoder_q-layer.9": 157.8751, "epoch": 0.27, "inbatch_neg_score": 0.4263, "inbatch_pos_score": 1.1025, "learning_rate": 4.0333333333333336e-05, "loss": 3.0645, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.7549, "num_token_overlap": 18.0474, "num_token_query": 52.269, "num_token_union": 73.465, "num_word_context": 202.3505, "num_word_doc": 49.8472, "num_word_query": 39.853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 934.667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4272, "query_norm": 1.5306, "queue_k_norm": 1.4619, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.269, "sent_len_1": 66.7549, "sent_len_max_0": 127.99, "sent_len_max_1": 189.7163, "stdk": 0.0476, "stdq": 0.0462, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0567, "doc_norm": 1.463, "encoder_q-embeddings": 896.3506, "encoder_q-layer.0": 618.9962, "encoder_q-layer.1": 784.0607, "encoder_q-layer.10": 178.9286, "encoder_q-layer.11": 367.2807, "encoder_q-layer.2": 907.1776, "encoder_q-layer.3": 862.6478, "encoder_q-layer.4": 853.0958, "encoder_q-layer.5": 711.2925, "encoder_q-layer.6": 602.0759, "encoder_q-layer.7": 634.3596, "encoder_q-layer.8": 546.0963, "encoder_q-layer.9": 296.4851, "epoch": 0.27, "inbatch_neg_score": 0.429, "inbatch_pos_score": 1.1084, "learning_rate": 4.027777777777778e-05, "loss": 3.0567, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 67.0108, "num_token_overlap": 18.0001, "num_token_query": 52.3421, "num_token_union": 73.6493, "num_word_context": 203.039, "num_word_doc": 49.9971, "num_word_query": 39.9098, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1030.4868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4287, "query_norm": 1.5219, "queue_k_norm": 1.4636, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3421, "sent_len_1": 67.0108, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9613, "stdk": 0.0474, "stdq": 0.0457, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.056, "doc_norm": 1.4686, "encoder_q-embeddings": 1004.4515, "encoder_q-layer.0": 719.7049, "encoder_q-layer.1": 792.5265, "encoder_q-layer.10": 189.7714, "encoder_q-layer.11": 388.7274, "encoder_q-layer.2": 926.2301, "encoder_q-layer.3": 789.2581, "encoder_q-layer.4": 655.0876, "encoder_q-layer.5": 523.6417, "encoder_q-layer.6": 491.3724, "encoder_q-layer.7": 437.7107, "encoder_q-layer.8": 302.0423, "encoder_q-layer.9": 181.5033, "epoch": 0.27, "inbatch_neg_score": 0.4342, "inbatch_pos_score": 1.1387, "learning_rate": 4.022222222222222e-05, "loss": 3.056, "norm_diff": 0.0879, "norm_loss": 0.0, "num_token_doc": 66.6182, "num_token_overlap": 17.9714, "num_token_query": 52.1075, "num_token_union": 73.3642, "num_word_context": 202.0298, "num_word_doc": 49.6968, "num_word_query": 39.7041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 998.304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4331, "query_norm": 1.5565, "queue_k_norm": 1.4632, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1075, "sent_len_1": 66.6182, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.22, "stdk": 0.0476, "stdq": 0.0472, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.0513, "doc_norm": 1.4674, "encoder_q-embeddings": 967.0709, "encoder_q-layer.0": 720.994, "encoder_q-layer.1": 787.1708, "encoder_q-layer.10": 152.0163, "encoder_q-layer.11": 336.6769, "encoder_q-layer.2": 882.4779, "encoder_q-layer.3": 911.0207, "encoder_q-layer.4": 768.3673, "encoder_q-layer.5": 581.713, "encoder_q-layer.6": 518.579, "encoder_q-layer.7": 417.2386, "encoder_q-layer.8": 351.5531, "encoder_q-layer.9": 220.5789, "epoch": 0.27, "inbatch_neg_score": 0.4192, "inbatch_pos_score": 1.1064, "learning_rate": 4.016666666666667e-05, "loss": 3.0513, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.8456, "num_token_overlap": 18.0351, "num_token_query": 52.3162, "num_token_union": 73.5337, "num_word_context": 202.3354, "num_word_doc": 49.8603, "num_word_query": 39.8796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1000.7252, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4182, "query_norm": 1.5228, "queue_k_norm": 1.4652, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3162, "sent_len_1": 66.8456, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7887, "stdk": 0.0475, "stdq": 0.0461, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0715, "doc_norm": 1.4605, "encoder_q-embeddings": 988.5147, "encoder_q-layer.0": 677.4014, "encoder_q-layer.1": 827.8352, "encoder_q-layer.10": 158.4319, "encoder_q-layer.11": 351.8313, "encoder_q-layer.2": 1016.2031, "encoder_q-layer.3": 1140.2295, "encoder_q-layer.4": 1036.7263, "encoder_q-layer.5": 935.6019, "encoder_q-layer.6": 673.1567, "encoder_q-layer.7": 563.0353, "encoder_q-layer.8": 317.1559, "encoder_q-layer.9": 176.7581, "epoch": 0.27, "inbatch_neg_score": 0.4113, "inbatch_pos_score": 1.0615, "learning_rate": 4.011111111111111e-05, "loss": 3.0715, "norm_diff": 0.0201, "norm_loss": 0.0, "num_token_doc": 66.674, "num_token_overlap": 17.9603, "num_token_query": 52.0365, "num_token_union": 73.304, "num_word_context": 202.3359, "num_word_doc": 49.7442, "num_word_query": 39.6776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1136.2631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4106, "query_norm": 1.4803, "queue_k_norm": 1.4648, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0365, "sent_len_1": 66.674, "sent_len_max_0": 127.995, "sent_len_max_1": 188.9988, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0397, "doc_norm": 1.4709, "encoder_q-embeddings": 218.1309, "encoder_q-layer.0": 160.9251, "encoder_q-layer.1": 169.5769, "encoder_q-layer.10": 88.0892, "encoder_q-layer.11": 178.472, "encoder_q-layer.2": 176.6142, "encoder_q-layer.3": 177.0032, "encoder_q-layer.4": 173.1848, "encoder_q-layer.5": 145.3658, "encoder_q-layer.6": 114.8003, "encoder_q-layer.7": 113.9977, "encoder_q-layer.8": 106.1236, "encoder_q-layer.9": 84.2699, "epoch": 0.27, "inbatch_neg_score": 0.4156, "inbatch_pos_score": 1.0859, "learning_rate": 4.0055555555555554e-05, "loss": 3.0397, "norm_diff": 0.0262, "norm_loss": 0.0, "num_token_doc": 66.8281, "num_token_overlap": 18.0252, "num_token_query": 52.2556, "num_token_union": 73.4539, "num_word_context": 202.362, "num_word_doc": 49.8889, "num_word_query": 39.8384, "postclip_grad_norm": 1.0, "preclip_grad_norm": 228.5753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4136, "query_norm": 1.4971, "queue_k_norm": 1.4654, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2556, "sent_len_1": 66.8281, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.93, "stdk": 0.0477, "stdq": 0.0456, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.0545, "doc_norm": 1.4683, "encoder_q-embeddings": 129.7915, "encoder_q-layer.0": 92.9383, "encoder_q-layer.1": 100.916, "encoder_q-layer.10": 89.005, "encoder_q-layer.11": 188.4438, "encoder_q-layer.2": 110.133, "encoder_q-layer.3": 104.1451, "encoder_q-layer.4": 93.2028, "encoder_q-layer.5": 87.1394, "encoder_q-layer.6": 91.6614, "encoder_q-layer.7": 89.2539, "encoder_q-layer.8": 102.1453, "encoder_q-layer.9": 88.3997, "epoch": 0.27, "inbatch_neg_score": 0.4152, "inbatch_pos_score": 1.1289, "learning_rate": 4e-05, "loss": 3.0545, "norm_diff": 0.0788, "norm_loss": 0.0, "num_token_doc": 66.8289, "num_token_overlap": 18.054, "num_token_query": 52.2867, "num_token_union": 73.486, "num_word_context": 202.2595, "num_word_doc": 49.9177, "num_word_query": 39.8766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 160.713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4143, "query_norm": 1.5471, "queue_k_norm": 1.4655, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2867, "sent_len_1": 66.8289, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.345, "stdk": 0.0475, "stdq": 0.0477, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0291, "doc_norm": 1.4742, "encoder_q-embeddings": 241.8396, "encoder_q-layer.0": 209.6723, "encoder_q-layer.1": 256.1392, "encoder_q-layer.10": 89.0793, "encoder_q-layer.11": 189.2194, "encoder_q-layer.2": 281.5975, "encoder_q-layer.3": 280.1617, "encoder_q-layer.4": 206.1524, "encoder_q-layer.5": 113.772, "encoder_q-layer.6": 108.3032, "encoder_q-layer.7": 95.8624, "encoder_q-layer.8": 93.6019, "encoder_q-layer.9": 85.2566, "epoch": 0.27, "inbatch_neg_score": 0.4084, "inbatch_pos_score": 1.084, "learning_rate": 3.9944444444444446e-05, "loss": 3.0291, "norm_diff": 0.0522, "norm_loss": 0.0, "num_token_doc": 66.7752, "num_token_overlap": 18.0395, "num_token_query": 52.2867, "num_token_union": 73.467, "num_word_context": 202.377, "num_word_doc": 49.8033, "num_word_query": 39.8474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 289.8493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.408, "query_norm": 1.5264, "queue_k_norm": 1.466, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2867, "sent_len_1": 66.7752, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3187, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.0393, "doc_norm": 1.4648, "encoder_q-embeddings": 368.2809, "encoder_q-layer.0": 299.0151, "encoder_q-layer.1": 308.1232, "encoder_q-layer.10": 97.4268, "encoder_q-layer.11": 193.7528, "encoder_q-layer.2": 155.8219, "encoder_q-layer.3": 143.1481, "encoder_q-layer.4": 149.8606, "encoder_q-layer.5": 151.3029, "encoder_q-layer.6": 147.6606, "encoder_q-layer.7": 106.84, "encoder_q-layer.8": 107.7896, "encoder_q-layer.9": 86.7621, "epoch": 0.28, "inbatch_neg_score": 0.4068, "inbatch_pos_score": 1.0615, "learning_rate": 3.9888888888888895e-05, "loss": 3.0393, "norm_diff": 0.0271, "norm_loss": 0.0, "num_token_doc": 66.9394, "num_token_overlap": 18.0131, "num_token_query": 52.2063, "num_token_union": 73.5121, "num_word_context": 202.3125, "num_word_doc": 49.9486, "num_word_query": 39.8048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 315.718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4058, "query_norm": 1.4919, "queue_k_norm": 1.466, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2063, "sent_len_1": 66.9394, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3575, "stdk": 0.0474, "stdq": 0.0456, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.0262, "doc_norm": 1.4659, "encoder_q-embeddings": 134.7261, "encoder_q-layer.0": 95.9481, "encoder_q-layer.1": 101.9754, "encoder_q-layer.10": 90.1345, "encoder_q-layer.11": 196.3525, "encoder_q-layer.2": 110.6991, "encoder_q-layer.3": 109.3551, "encoder_q-layer.4": 102.6157, "encoder_q-layer.5": 97.2433, "encoder_q-layer.6": 124.8459, "encoder_q-layer.7": 144.4234, "encoder_q-layer.8": 211.0146, "encoder_q-layer.9": 135.9738, "epoch": 0.28, "inbatch_neg_score": 0.392, "inbatch_pos_score": 1.0713, "learning_rate": 3.983333333333333e-05, "loss": 3.0262, "norm_diff": 0.0439, "norm_loss": 0.0, "num_token_doc": 66.8133, "num_token_overlap": 18.0191, "num_token_query": 52.1679, "num_token_union": 73.4311, "num_word_context": 202.2426, "num_word_doc": 49.9139, "num_word_query": 39.7866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 209.3057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3936, "query_norm": 1.5098, "queue_k_norm": 1.4661, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1679, "sent_len_1": 66.8133, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.6325, "stdk": 0.0475, "stdq": 0.0464, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0227, "doc_norm": 1.4598, "encoder_q-embeddings": 739.049, "encoder_q-layer.0": 555.7176, "encoder_q-layer.1": 621.7178, "encoder_q-layer.10": 95.1777, "encoder_q-layer.11": 196.1071, "encoder_q-layer.2": 681.0898, "encoder_q-layer.3": 662.5941, "encoder_q-layer.4": 560.5577, "encoder_q-layer.5": 412.6178, "encoder_q-layer.6": 386.287, "encoder_q-layer.7": 263.3704, "encoder_q-layer.8": 161.6304, "encoder_q-layer.9": 96.528, "epoch": 0.28, "inbatch_neg_score": 0.3715, "inbatch_pos_score": 1.043, "learning_rate": 3.977777777777778e-05, "loss": 3.0227, "norm_diff": 0.0419, "norm_loss": 0.0, "num_token_doc": 66.786, "num_token_overlap": 18.0055, "num_token_query": 52.1141, "num_token_union": 73.4198, "num_word_context": 202.2667, "num_word_doc": 49.8768, "num_word_query": 39.7302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 734.1007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3701, "query_norm": 1.5017, "queue_k_norm": 1.4668, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1141, "sent_len_1": 66.786, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9538, "stdk": 0.0473, "stdq": 0.0465, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.0434, "doc_norm": 1.4632, "encoder_q-embeddings": 631.5769, "encoder_q-layer.0": 509.1712, "encoder_q-layer.1": 549.3727, "encoder_q-layer.10": 82.4678, "encoder_q-layer.11": 173.1944, "encoder_q-layer.2": 588.9308, "encoder_q-layer.3": 545.1847, "encoder_q-layer.4": 452.8846, "encoder_q-layer.5": 330.1509, "encoder_q-layer.6": 323.6542, "encoder_q-layer.7": 254.0416, "encoder_q-layer.8": 174.1494, "encoder_q-layer.9": 83.2886, "epoch": 0.28, "inbatch_neg_score": 0.3555, "inbatch_pos_score": 1.0342, "learning_rate": 3.972222222222222e-05, "loss": 3.0434, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.6963, "num_token_overlap": 18.0209, "num_token_query": 52.2495, "num_token_union": 73.4095, "num_word_context": 202.3892, "num_word_doc": 49.756, "num_word_query": 39.8371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 634.5503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.4984, "queue_k_norm": 1.465, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2495, "sent_len_1": 66.6963, "sent_len_max_0": 128.0, "sent_len_max_1": 189.09, "stdk": 0.0474, "stdq": 0.0462, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0403, "doc_norm": 1.4692, "encoder_q-embeddings": 312.345, "encoder_q-layer.0": 252.6348, "encoder_q-layer.1": 247.2649, "encoder_q-layer.10": 82.9348, "encoder_q-layer.11": 173.3411, "encoder_q-layer.2": 243.5411, "encoder_q-layer.3": 234.2089, "encoder_q-layer.4": 229.9478, "encoder_q-layer.5": 160.0822, "encoder_q-layer.6": 141.1036, "encoder_q-layer.7": 135.4482, "encoder_q-layer.8": 102.5998, "encoder_q-layer.9": 81.6098, "epoch": 0.28, "inbatch_neg_score": 0.3482, "inbatch_pos_score": 1.0234, "learning_rate": 3.966666666666667e-05, "loss": 3.0403, "norm_diff": 0.0095, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 17.9767, "num_token_query": 52.1454, "num_token_union": 73.4189, "num_word_context": 202.2218, "num_word_doc": 49.8709, "num_word_query": 39.7573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 309.5708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3477, "query_norm": 1.477, "queue_k_norm": 1.4628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1454, "sent_len_1": 66.777, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0387, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.0395, "doc_norm": 1.4612, "encoder_q-embeddings": 123.5706, "encoder_q-layer.0": 81.9766, "encoder_q-layer.1": 94.5266, "encoder_q-layer.10": 88.8982, "encoder_q-layer.11": 185.2948, "encoder_q-layer.2": 103.5903, "encoder_q-layer.3": 112.8127, "encoder_q-layer.4": 111.5891, "encoder_q-layer.5": 108.5542, "encoder_q-layer.6": 115.7012, "encoder_q-layer.7": 106.6885, "encoder_q-layer.8": 118.1073, "encoder_q-layer.9": 97.1691, "epoch": 0.28, "inbatch_neg_score": 0.3571, "inbatch_pos_score": 1.0166, "learning_rate": 3.961111111111111e-05, "loss": 3.0395, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.6341, "num_token_overlap": 17.9908, "num_token_query": 52.3919, "num_token_union": 73.4587, "num_word_context": 202.148, "num_word_doc": 49.6773, "num_word_query": 39.9388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 168.92, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3569, "query_norm": 1.495, "queue_k_norm": 1.4623, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3919, "sent_len_1": 66.6341, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.24, "stdk": 0.0474, "stdq": 0.0463, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.0396, "doc_norm": 1.4595, "encoder_q-embeddings": 417.6028, "encoder_q-layer.0": 324.8105, "encoder_q-layer.1": 367.8311, "encoder_q-layer.10": 81.4752, "encoder_q-layer.11": 176.7647, "encoder_q-layer.2": 453.7736, "encoder_q-layer.3": 442.5071, "encoder_q-layer.4": 389.448, "encoder_q-layer.5": 346.5737, "encoder_q-layer.6": 324.8369, "encoder_q-layer.7": 234.924, "encoder_q-layer.8": 155.4627, "encoder_q-layer.9": 91.745, "epoch": 0.28, "inbatch_neg_score": 0.3344, "inbatch_pos_score": 1.0273, "learning_rate": 3.9555555555555556e-05, "loss": 3.0396, "norm_diff": 0.0291, "norm_loss": 0.0, "num_token_doc": 66.7375, "num_token_overlap": 17.9391, "num_token_query": 52.2189, "num_token_union": 73.4877, "num_word_context": 202.3682, "num_word_doc": 49.8202, "num_word_query": 39.8148, "postclip_grad_norm": 1.0, "preclip_grad_norm": 477.2378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3325, "query_norm": 1.4885, "queue_k_norm": 1.4608, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2189, "sent_len_1": 66.7375, "sent_len_max_0": 127.995, "sent_len_max_1": 189.3413, "stdk": 0.0474, "stdq": 0.046, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0197, "doc_norm": 1.4664, "encoder_q-embeddings": 372.0701, "encoder_q-layer.0": 233.6911, "encoder_q-layer.1": 285.1377, "encoder_q-layer.10": 93.5319, "encoder_q-layer.11": 198.3623, "encoder_q-layer.2": 320.5409, "encoder_q-layer.3": 349.8958, "encoder_q-layer.4": 261.4095, "encoder_q-layer.5": 142.3987, "encoder_q-layer.6": 130.2156, "encoder_q-layer.7": 105.5342, "encoder_q-layer.8": 101.9708, "encoder_q-layer.9": 89.0965, "epoch": 0.28, "inbatch_neg_score": 0.3231, "inbatch_pos_score": 0.9785, "learning_rate": 3.9500000000000005e-05, "loss": 3.0197, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.6986, "num_token_overlap": 17.9837, "num_token_query": 51.9571, "num_token_union": 73.2339, "num_word_context": 201.8629, "num_word_doc": 49.7372, "num_word_query": 39.6016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 366.0499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3235, "query_norm": 1.4788, "queue_k_norm": 1.4593, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 51.9571, "sent_len_1": 66.6986, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1188, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.0306, "doc_norm": 1.46, "encoder_q-embeddings": 210.3169, "encoder_q-layer.0": 153.2259, "encoder_q-layer.1": 174.5848, "encoder_q-layer.10": 80.7756, "encoder_q-layer.11": 186.9871, "encoder_q-layer.2": 177.5994, "encoder_q-layer.3": 185.0503, "encoder_q-layer.4": 161.306, "encoder_q-layer.5": 124.4813, "encoder_q-layer.6": 141.0389, "encoder_q-layer.7": 113.5988, "encoder_q-layer.8": 96.3969, "encoder_q-layer.9": 83.6094, "epoch": 0.28, "inbatch_neg_score": 0.3298, "inbatch_pos_score": 0.9785, "learning_rate": 3.944444444444445e-05, "loss": 3.0306, "norm_diff": 0.016, "norm_loss": 0.0, "num_token_doc": 66.5535, "num_token_overlap": 18.0294, "num_token_query": 52.3811, "num_token_union": 73.3911, "num_word_context": 202.2443, "num_word_doc": 49.6931, "num_word_query": 39.9285, "postclip_grad_norm": 1.0, "preclip_grad_norm": 228.8068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.4653, "queue_k_norm": 1.4596, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3811, "sent_len_1": 66.5535, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7438, "stdk": 0.0476, "stdq": 0.045, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.048, "doc_norm": 1.4545, "encoder_q-embeddings": 384.4026, "encoder_q-layer.0": 303.3673, "encoder_q-layer.1": 349.9526, "encoder_q-layer.10": 84.0747, "encoder_q-layer.11": 206.6464, "encoder_q-layer.2": 363.6508, "encoder_q-layer.3": 282.7828, "encoder_q-layer.4": 179.4333, "encoder_q-layer.5": 118.4964, "encoder_q-layer.6": 122.0709, "encoder_q-layer.7": 123.4817, "encoder_q-layer.8": 117.4847, "encoder_q-layer.9": 82.4674, "epoch": 0.28, "inbatch_neg_score": 0.3236, "inbatch_pos_score": 0.9639, "learning_rate": 3.938888888888889e-05, "loss": 3.048, "norm_diff": 0.0145, "norm_loss": 0.0, "num_token_doc": 66.7313, "num_token_overlap": 17.9533, "num_token_query": 51.9713, "num_token_union": 73.3317, "num_word_context": 202.3665, "num_word_doc": 49.8301, "num_word_query": 39.6106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 367.7498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3232, "query_norm": 1.4509, "queue_k_norm": 1.4587, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 51.9713, "sent_len_1": 66.7313, "sent_len_max_0": 128.0, "sent_len_max_1": 187.78, "stdk": 0.0474, "stdq": 0.0447, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0201, "doc_norm": 1.4582, "encoder_q-embeddings": 2327.2634, "encoder_q-layer.0": 1801.8232, "encoder_q-layer.1": 1957.6656, "encoder_q-layer.10": 86.7751, "encoder_q-layer.11": 181.0059, "encoder_q-layer.2": 1541.3177, "encoder_q-layer.3": 1452.1648, "encoder_q-layer.4": 878.2745, "encoder_q-layer.5": 168.0516, "encoder_q-layer.6": 91.8993, "encoder_q-layer.7": 90.8893, "encoder_q-layer.8": 93.7279, "encoder_q-layer.9": 79.1972, "epoch": 0.29, "inbatch_neg_score": 0.328, "inbatch_pos_score": 1.0088, "learning_rate": 3.933333333333333e-05, "loss": 3.0201, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.7756, "num_token_overlap": 18.0588, "num_token_query": 52.3289, "num_token_union": 73.4607, "num_word_context": 202.2429, "num_word_doc": 49.8231, "num_word_query": 39.8728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1881.9401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3271, "query_norm": 1.4735, "queue_k_norm": 1.4549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3289, "sent_len_1": 66.7756, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6475, "stdk": 0.0476, "stdq": 0.0463, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0398, "doc_norm": 1.458, "encoder_q-embeddings": 501.2141, "encoder_q-layer.0": 385.14, "encoder_q-layer.1": 413.7248, "encoder_q-layer.10": 82.2388, "encoder_q-layer.11": 191.4521, "encoder_q-layer.2": 467.936, "encoder_q-layer.3": 311.2096, "encoder_q-layer.4": 185.6399, "encoder_q-layer.5": 149.9047, "encoder_q-layer.6": 146.6664, "encoder_q-layer.7": 123.7792, "encoder_q-layer.8": 111.9084, "encoder_q-layer.9": 92.0427, "epoch": 0.29, "inbatch_neg_score": 0.3339, "inbatch_pos_score": 1.0176, "learning_rate": 3.927777777777778e-05, "loss": 3.0398, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.5394, "num_token_overlap": 17.9804, "num_token_query": 52.3029, "num_token_union": 73.3528, "num_word_context": 202.0134, "num_word_doc": 49.6443, "num_word_query": 39.8536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 464.2405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3342, "query_norm": 1.4527, "queue_k_norm": 1.4558, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3029, "sent_len_1": 66.5394, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5637, "stdk": 0.0476, "stdq": 0.0457, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0261, "doc_norm": 1.4582, "encoder_q-embeddings": 153.8006, "encoder_q-layer.0": 110.7359, "encoder_q-layer.1": 130.3949, "encoder_q-layer.10": 85.4252, "encoder_q-layer.11": 191.7256, "encoder_q-layer.2": 144.6531, "encoder_q-layer.3": 149.4798, "encoder_q-layer.4": 130.3692, "encoder_q-layer.5": 90.1825, "encoder_q-layer.6": 88.2043, "encoder_q-layer.7": 81.739, "encoder_q-layer.8": 91.8753, "encoder_q-layer.9": 80.8011, "epoch": 0.29, "inbatch_neg_score": 0.3411, "inbatch_pos_score": 1.0303, "learning_rate": 3.922222222222223e-05, "loss": 3.0261, "norm_diff": 0.0166, "norm_loss": 0.0, "num_token_doc": 66.5365, "num_token_overlap": 17.9607, "num_token_query": 52.2608, "num_token_union": 73.3231, "num_word_context": 202.302, "num_word_doc": 49.668, "num_word_query": 39.8401, "postclip_grad_norm": 1.0, "preclip_grad_norm": 186.815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3411, "query_norm": 1.4737, "queue_k_norm": 1.4549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2608, "sent_len_1": 66.5365, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2138, "stdk": 0.0477, "stdq": 0.0463, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0337, "doc_norm": 1.4568, "encoder_q-embeddings": 213.2253, "encoder_q-layer.0": 152.7315, "encoder_q-layer.1": 178.3718, "encoder_q-layer.10": 86.3575, "encoder_q-layer.11": 175.9593, "encoder_q-layer.2": 167.6035, "encoder_q-layer.3": 153.5344, "encoder_q-layer.4": 144.9448, "encoder_q-layer.5": 118.8236, "encoder_q-layer.6": 107.4269, "encoder_q-layer.7": 91.2327, "encoder_q-layer.8": 91.3745, "encoder_q-layer.9": 80.3426, "epoch": 0.29, "inbatch_neg_score": 0.3396, "inbatch_pos_score": 1.0283, "learning_rate": 3.9166666666666665e-05, "loss": 3.0337, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.6885, "num_token_overlap": 17.9654, "num_token_query": 52.1939, "num_token_union": 73.3565, "num_word_context": 202.305, "num_word_doc": 49.7313, "num_word_query": 39.7963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 217.8442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3411, "query_norm": 1.4717, "queue_k_norm": 1.4526, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1939, "sent_len_1": 66.6885, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9913, "stdk": 0.0477, "stdq": 0.046, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.055, "doc_norm": 1.4507, "encoder_q-embeddings": 499.9719, "encoder_q-layer.0": 366.8392, "encoder_q-layer.1": 438.2234, "encoder_q-layer.10": 83.3391, "encoder_q-layer.11": 179.7576, "encoder_q-layer.2": 536.3654, "encoder_q-layer.3": 522.5604, "encoder_q-layer.4": 385.7651, "encoder_q-layer.5": 321.6533, "encoder_q-layer.6": 296.9786, "encoder_q-layer.7": 203.6221, "encoder_q-layer.8": 113.9459, "encoder_q-layer.9": 83.5453, "epoch": 0.29, "inbatch_neg_score": 0.3436, "inbatch_pos_score": 1.0303, "learning_rate": 3.9111111111111115e-05, "loss": 3.055, "norm_diff": 0.0279, "norm_loss": 0.0, "num_token_doc": 66.6739, "num_token_overlap": 17.9217, "num_token_query": 52.1412, "num_token_union": 73.4167, "num_word_context": 202.1657, "num_word_doc": 49.7425, "num_word_query": 39.7602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 529.8528, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3433, "query_norm": 1.4787, "queue_k_norm": 1.4529, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1412, "sent_len_1": 66.6739, "sent_len_max_0": 127.99, "sent_len_max_1": 189.9575, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.0667, "doc_norm": 1.451, "encoder_q-embeddings": 1160.1492, "encoder_q-layer.0": 862.7386, "encoder_q-layer.1": 1036.3231, "encoder_q-layer.10": 84.5259, "encoder_q-layer.11": 177.571, "encoder_q-layer.2": 1189.0005, "encoder_q-layer.3": 1156.0228, "encoder_q-layer.4": 966.7683, "encoder_q-layer.5": 670.716, "encoder_q-layer.6": 484.7997, "encoder_q-layer.7": 308.9038, "encoder_q-layer.8": 144.2498, "encoder_q-layer.9": 87.9253, "epoch": 0.29, "inbatch_neg_score": 0.3438, "inbatch_pos_score": 1.002, "learning_rate": 3.905555555555556e-05, "loss": 3.0667, "norm_diff": 0.0159, "norm_loss": 0.0, "num_token_doc": 66.6391, "num_token_overlap": 18.0108, "num_token_query": 52.2863, "num_token_union": 73.4201, "num_word_context": 202.4697, "num_word_doc": 49.7303, "num_word_query": 39.8633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1197.0491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3442, "query_norm": 1.4655, "queue_k_norm": 1.4518, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2863, "sent_len_1": 66.6391, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1587, "stdk": 0.0475, "stdq": 0.0458, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0343, "doc_norm": 1.4549, "encoder_q-embeddings": 1346.9679, "encoder_q-layer.0": 987.8334, "encoder_q-layer.1": 1114.71, "encoder_q-layer.10": 83.3788, "encoder_q-layer.11": 173.0095, "encoder_q-layer.2": 1273.2993, "encoder_q-layer.3": 1245.8187, "encoder_q-layer.4": 1246.067, "encoder_q-layer.5": 877.7505, "encoder_q-layer.6": 603.0756, "encoder_q-layer.7": 420.1309, "encoder_q-layer.8": 271.0326, "encoder_q-layer.9": 138.4763, "epoch": 0.29, "inbatch_neg_score": 0.3461, "inbatch_pos_score": 1.0361, "learning_rate": 3.9000000000000006e-05, "loss": 3.0343, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.8325, "num_token_overlap": 18.0317, "num_token_query": 52.1221, "num_token_union": 73.3966, "num_word_context": 202.2232, "num_word_doc": 49.8511, "num_word_query": 39.7352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1356.5789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3452, "query_norm": 1.4729, "queue_k_norm": 1.4534, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1221, "sent_len_1": 66.8325, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.1838, "stdk": 0.0477, "stdq": 0.0463, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0333, "doc_norm": 1.4588, "encoder_q-embeddings": 592.1195, "encoder_q-layer.0": 403.5971, "encoder_q-layer.1": 481.5439, "encoder_q-layer.10": 187.8217, "encoder_q-layer.11": 350.486, "encoder_q-layer.2": 553.8659, "encoder_q-layer.3": 552.909, "encoder_q-layer.4": 441.8415, "encoder_q-layer.5": 371.3025, "encoder_q-layer.6": 308.5117, "encoder_q-layer.7": 240.4299, "encoder_q-layer.8": 213.412, "encoder_q-layer.9": 171.5175, "epoch": 0.29, "inbatch_neg_score": 0.3447, "inbatch_pos_score": 1.0293, "learning_rate": 3.894444444444444e-05, "loss": 3.0333, "norm_diff": 0.0119, "norm_loss": 0.0, "num_token_doc": 66.9023, "num_token_overlap": 18.0295, "num_token_query": 52.1505, "num_token_union": 73.4566, "num_word_context": 202.5839, "num_word_doc": 49.9044, "num_word_query": 39.7698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 618.373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3462, "query_norm": 1.4696, "queue_k_norm": 1.4545, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1505, "sent_len_1": 66.9023, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5288, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0291, "doc_norm": 1.4492, "encoder_q-embeddings": 560.0992, "encoder_q-layer.0": 401.4041, "encoder_q-layer.1": 442.7641, "encoder_q-layer.10": 168.6848, "encoder_q-layer.11": 369.6367, "encoder_q-layer.2": 535.3823, "encoder_q-layer.3": 532.7612, "encoder_q-layer.4": 489.063, "encoder_q-layer.5": 604.8239, "encoder_q-layer.6": 674.7397, "encoder_q-layer.7": 424.1485, "encoder_q-layer.8": 245.7482, "encoder_q-layer.9": 186.7309, "epoch": 0.29, "inbatch_neg_score": 0.3557, "inbatch_pos_score": 1.0117, "learning_rate": 3.888888888888889e-05, "loss": 3.0291, "norm_diff": 0.049, "norm_loss": 0.0, "num_token_doc": 66.5375, "num_token_overlap": 17.9471, "num_token_query": 52.0636, "num_token_union": 73.2803, "num_word_context": 202.0063, "num_word_doc": 49.6802, "num_word_query": 39.7027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 684.5741, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3538, "query_norm": 1.4982, "queue_k_norm": 1.4541, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0636, "sent_len_1": 66.5375, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1275, "stdk": 0.0475, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 28.5898, "dev_samples_per_second": 2.239, "dev_steps_per_second": 0.035, "epoch": 0.29, "step": 30000, "test_accuracy": 92.333984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.448369562625885, "test_doc_norm": 1.395767331123352, "test_inbatch_neg_score": 0.6642370223999023, "test_inbatch_pos_score": 1.510129690170288, "test_loss": 0.448369562625885, "test_loss_align": 1.2063122987747192, "test_loss_unif": 3.74872088432312, "test_loss_unif_q@queue": 3.748720645904541, "test_norm_diff": 0.1059337630867958, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.34607696533203125, "test_query_norm": 1.5017009973526, "test_queue_k_norm": 1.4541300535202026, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04029618948698044, "test_stdq": 0.04164886474609375, "test_stdqueue_k": 0.04781588912010193, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.5898, "dev_samples_per_second": 2.239, "dev_steps_per_second": 0.035, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.31777, "eval_beir-arguana_recall@10": 0.55121, "eval_beir-arguana_recall@100": 0.86273, "eval_beir-arguana_recall@20": 0.68492, "eval_beir-avg_ndcg@10": 0.32845624999999995, "eval_beir-avg_recall@10": 0.3942770833333334, "eval_beir-avg_recall@100": 0.5807025, "eval_beir-avg_recall@20": 0.45410200000000006, "eval_beir-cqadupstack_ndcg@10": 0.2243425, "eval_beir-cqadupstack_recall@10": 0.30994083333333333, "eval_beir-cqadupstack_recall@100": 0.5347350000000001, "eval_beir-cqadupstack_recall@20": 0.37094, "eval_beir-fiqa_ndcg@10": 0.18519, "eval_beir-fiqa_recall@10": 0.23723, "eval_beir-fiqa_recall@100": 0.4989, "eval_beir-fiqa_recall@20": 0.31314, "eval_beir-nfcorpus_ndcg@10": 0.24369, "eval_beir-nfcorpus_recall@10": 0.11988, "eval_beir-nfcorpus_recall@100": 0.24476, "eval_beir-nfcorpus_recall@20": 0.15536, "eval_beir-nq_ndcg@10": 0.21588, "eval_beir-nq_recall@10": 0.36457, "eval_beir-nq_recall@100": 0.70995, "eval_beir-nq_recall@20": 0.4781, "eval_beir-quora_ndcg@10": 0.73832, "eval_beir-quora_recall@10": 0.85484, "eval_beir-quora_recall@100": 0.96564, "eval_beir-quora_recall@20": 0.9029, "eval_beir-scidocs_ndcg@10": 0.13457, "eval_beir-scidocs_recall@10": 0.13908, "eval_beir-scidocs_recall@100": 0.32852, "eval_beir-scidocs_recall@20": 0.18757, "eval_beir-scifact_ndcg@10": 0.592, "eval_beir-scifact_recall@10": 0.74722, "eval_beir-scifact_recall@100": 0.90744, "eval_beir-scifact_recall@20": 0.79467, "eval_beir-trec-covid_ndcg@10": 0.47234, "eval_beir-trec-covid_recall@10": 0.506, "eval_beir-trec-covid_recall@100": 0.3522, "eval_beir-trec-covid_recall@20": 0.474, "eval_beir-webis-touche2020_ndcg@10": 0.16046, "eval_beir-webis-touche2020_recall@10": 0.1128, "eval_beir-webis-touche2020_recall@100": 0.40215, "eval_beir-webis-touche2020_recall@20": 0.17942, "eval_senteval-avg_sts": 0.7301401289956719, "eval_senteval-sickr_spearman": 0.6877805405143298, "eval_senteval-stsb_spearman": 0.7724997174770142, "step": 30000, "test_accuracy": 92.333984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.448369562625885, "test_doc_norm": 1.395767331123352, "test_inbatch_neg_score": 0.6642370223999023, "test_inbatch_pos_score": 1.510129690170288, "test_loss": 0.448369562625885, "test_loss_align": 1.2063122987747192, "test_loss_unif": 3.74872088432312, "test_loss_unif_q@queue": 3.748720645904541, "test_norm_diff": 0.1059337630867958, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.34607696533203125, "test_query_norm": 1.5017009973526, "test_queue_k_norm": 1.4541300535202026, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04029618948698044, "test_stdq": 0.04164886474609375, "test_stdqueue_k": 0.04781588912010193, "test_stdqueue_q": 0.0 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 3.0111, "doc_norm": 1.455, "encoder_q-embeddings": 282.2499, "encoder_q-layer.0": 201.7208, "encoder_q-layer.1": 223.2005, "encoder_q-layer.10": 169.5552, "encoder_q-layer.11": 344.136, "encoder_q-layer.2": 260.2471, "encoder_q-layer.3": 266.5605, "encoder_q-layer.4": 304.9241, "encoder_q-layer.5": 295.3226, "encoder_q-layer.6": 259.6276, "encoder_q-layer.7": 246.2534, "encoder_q-layer.8": 188.4437, "encoder_q-layer.9": 152.8806, "epoch": 0.29, "inbatch_neg_score": 0.3456, "inbatch_pos_score": 1.0693, "learning_rate": 3.883333333333333e-05, "loss": 3.0111, "norm_diff": 0.0309, "norm_loss": 0.0, "num_token_doc": 66.7766, "num_token_overlap": 18.0449, "num_token_query": 52.3048, "num_token_union": 73.5267, "num_word_context": 202.6147, "num_word_doc": 49.8285, "num_word_query": 39.8781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 375.4721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3452, "query_norm": 1.4859, "queue_k_norm": 1.4556, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3048, "sent_len_1": 66.7766, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2912, "stdk": 0.0478, "stdq": 0.0459, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0434, "doc_norm": 1.4546, "encoder_q-embeddings": 421.6446, "encoder_q-layer.0": 315.4263, "encoder_q-layer.1": 375.7909, "encoder_q-layer.10": 164.4112, "encoder_q-layer.11": 362.8544, "encoder_q-layer.2": 442.4923, "encoder_q-layer.3": 386.8304, "encoder_q-layer.4": 402.169, "encoder_q-layer.5": 362.2585, "encoder_q-layer.6": 387.762, "encoder_q-layer.7": 424.1862, "encoder_q-layer.8": 440.2083, "encoder_q-layer.9": 265.6116, "epoch": 0.29, "inbatch_neg_score": 0.3411, "inbatch_pos_score": 1.0088, "learning_rate": 3.877777777777778e-05, "loss": 3.0434, "norm_diff": 0.0148, "norm_loss": 0.0, "num_token_doc": 66.8393, "num_token_overlap": 17.9786, "num_token_query": 52.0988, "num_token_union": 73.4382, "num_word_context": 202.5798, "num_word_doc": 49.8537, "num_word_query": 39.7146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.7897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3403, "query_norm": 1.4684, "queue_k_norm": 1.453, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0988, "sent_len_1": 66.8393, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2337, "stdk": 0.0478, "stdq": 0.0451, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0247, "doc_norm": 1.4512, "encoder_q-embeddings": 3287.0295, "encoder_q-layer.0": 2672.6152, "encoder_q-layer.1": 2816.197, "encoder_q-layer.10": 176.4018, "encoder_q-layer.11": 378.8413, "encoder_q-layer.2": 1640.1135, "encoder_q-layer.3": 1392.9071, "encoder_q-layer.4": 1485.9313, "encoder_q-layer.5": 1300.9453, "encoder_q-layer.6": 1445.4004, "encoder_q-layer.7": 957.3608, "encoder_q-layer.8": 414.8994, "encoder_q-layer.9": 214.578, "epoch": 0.3, "inbatch_neg_score": 0.3538, "inbatch_pos_score": 1.0156, "learning_rate": 3.8722222222222225e-05, "loss": 3.0247, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.9623, "num_token_overlap": 18.0264, "num_token_query": 52.2642, "num_token_union": 73.5698, "num_word_context": 202.3064, "num_word_doc": 49.9437, "num_word_query": 39.8531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2756.6909, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3528, "query_norm": 1.4732, "queue_k_norm": 1.4534, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2642, "sent_len_1": 66.9623, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.05, "stdk": 0.0477, "stdq": 0.0452, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0184, "doc_norm": 1.4541, "encoder_q-embeddings": 1825.1725, "encoder_q-layer.0": 1443.5264, "encoder_q-layer.1": 1511.3186, "encoder_q-layer.10": 76.4011, "encoder_q-layer.11": 170.5183, "encoder_q-layer.2": 1190.7269, "encoder_q-layer.3": 1146.0005, "encoder_q-layer.4": 1029.0889, "encoder_q-layer.5": 756.6373, "encoder_q-layer.6": 574.2397, "encoder_q-layer.7": 355.3047, "encoder_q-layer.8": 144.7228, "encoder_q-layer.9": 78.8707, "epoch": 0.3, "inbatch_neg_score": 0.3428, "inbatch_pos_score": 1.0146, "learning_rate": 3.866666666666667e-05, "loss": 3.0184, "norm_diff": 0.0319, "norm_loss": 0.0, "num_token_doc": 66.7081, "num_token_overlap": 18.0534, "num_token_query": 52.3809, "num_token_union": 73.4502, "num_word_context": 202.0061, "num_word_doc": 49.7591, "num_word_query": 39.9252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1595.7898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3408, "query_norm": 1.486, "queue_k_norm": 1.4511, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3809, "sent_len_1": 66.7081, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2363, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0114, "doc_norm": 1.4527, "encoder_q-embeddings": 281.1419, "encoder_q-layer.0": 208.4294, "encoder_q-layer.1": 243.6926, "encoder_q-layer.10": 92.5032, "encoder_q-layer.11": 183.012, "encoder_q-layer.2": 202.0839, "encoder_q-layer.3": 198.3667, "encoder_q-layer.4": 206.7683, "encoder_q-layer.5": 209.4664, "encoder_q-layer.6": 184.5109, "encoder_q-layer.7": 154.5654, "encoder_q-layer.8": 97.2677, "encoder_q-layer.9": 76.7683, "epoch": 0.3, "inbatch_neg_score": 0.3585, "inbatch_pos_score": 1.0537, "learning_rate": 3.8611111111111116e-05, "loss": 3.0114, "norm_diff": 0.0691, "norm_loss": 0.0, "num_token_doc": 66.7525, "num_token_overlap": 17.9511, "num_token_query": 52.142, "num_token_union": 73.4474, "num_word_context": 202.1317, "num_word_doc": 49.8295, "num_word_query": 39.7423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 287.1438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3586, "query_norm": 1.5218, "queue_k_norm": 1.4529, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.142, "sent_len_1": 66.7525, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3562, "stdk": 0.0478, "stdq": 0.0464, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.019, "doc_norm": 1.4521, "encoder_q-embeddings": 439.8963, "encoder_q-layer.0": 339.1517, "encoder_q-layer.1": 365.1876, "encoder_q-layer.10": 84.7358, "encoder_q-layer.11": 182.8949, "encoder_q-layer.2": 401.1048, "encoder_q-layer.3": 400.6631, "encoder_q-layer.4": 291.0729, "encoder_q-layer.5": 250.8979, "encoder_q-layer.6": 256.1257, "encoder_q-layer.7": 190.3523, "encoder_q-layer.8": 115.6977, "encoder_q-layer.9": 78.1362, "epoch": 0.3, "inbatch_neg_score": 0.3743, "inbatch_pos_score": 1.0439, "learning_rate": 3.855555555555556e-05, "loss": 3.019, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.5684, "num_token_overlap": 17.972, "num_token_query": 52.4636, "num_token_union": 73.5411, "num_word_context": 202.3679, "num_word_doc": 49.695, "num_word_query": 40.0175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 445.8504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.373, "query_norm": 1.5004, "queue_k_norm": 1.4524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4636, "sent_len_1": 66.5684, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5488, "stdk": 0.0477, "stdq": 0.0455, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0158, "doc_norm": 1.4547, "encoder_q-embeddings": 2669.2627, "encoder_q-layer.0": 1716.2858, "encoder_q-layer.1": 2259.3469, "encoder_q-layer.10": 88.3274, "encoder_q-layer.11": 190.2285, "encoder_q-layer.2": 2727.2075, "encoder_q-layer.3": 3133.3879, "encoder_q-layer.4": 3392.8333, "encoder_q-layer.5": 3399.0076, "encoder_q-layer.6": 3198.6555, "encoder_q-layer.7": 2386.218, "encoder_q-layer.8": 781.2035, "encoder_q-layer.9": 140.4595, "epoch": 0.3, "inbatch_neg_score": 0.3864, "inbatch_pos_score": 1.0645, "learning_rate": 3.85e-05, "loss": 3.0158, "norm_diff": 0.0406, "norm_loss": 0.0, "num_token_doc": 66.7485, "num_token_overlap": 18.0839, "num_token_query": 52.3984, "num_token_union": 73.4638, "num_word_context": 202.25, "num_word_doc": 49.8408, "num_word_query": 39.9612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3477.7843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.385, "query_norm": 1.4953, "queue_k_norm": 1.4527, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3984, "sent_len_1": 66.7485, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.38, "stdk": 0.0478, "stdq": 0.0451, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.016, "doc_norm": 1.4584, "encoder_q-embeddings": 556.3079, "encoder_q-layer.0": 408.7915, "encoder_q-layer.1": 479.7408, "encoder_q-layer.10": 89.3728, "encoder_q-layer.11": 184.8808, "encoder_q-layer.2": 523.0711, "encoder_q-layer.3": 588.3832, "encoder_q-layer.4": 487.9643, "encoder_q-layer.5": 426.0316, "encoder_q-layer.6": 314.6463, "encoder_q-layer.7": 244.1431, "encoder_q-layer.8": 155.817, "encoder_q-layer.9": 89.3318, "epoch": 0.3, "inbatch_neg_score": 0.3973, "inbatch_pos_score": 1.0869, "learning_rate": 3.844444444444444e-05, "loss": 3.016, "norm_diff": 0.087, "norm_loss": 0.0, "num_token_doc": 66.8215, "num_token_overlap": 17.9911, "num_token_query": 52.2119, "num_token_union": 73.5293, "num_word_context": 202.3454, "num_word_doc": 49.8852, "num_word_query": 39.8195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 598.2373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.396, "query_norm": 1.5455, "queue_k_norm": 1.4532, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2119, "sent_len_1": 66.8215, "sent_len_max_0": 128.0, "sent_len_max_1": 189.705, "stdk": 0.0479, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0261, "doc_norm": 1.4606, "encoder_q-embeddings": 501.2019, "encoder_q-layer.0": 381.4131, "encoder_q-layer.1": 397.7653, "encoder_q-layer.10": 84.4807, "encoder_q-layer.11": 178.7699, "encoder_q-layer.2": 353.6535, "encoder_q-layer.3": 312.3058, "encoder_q-layer.4": 308.1403, "encoder_q-layer.5": 236.9829, "encoder_q-layer.6": 216.3735, "encoder_q-layer.7": 173.3165, "encoder_q-layer.8": 103.6983, "encoder_q-layer.9": 77.2976, "epoch": 0.3, "inbatch_neg_score": 0.4025, "inbatch_pos_score": 1.0713, "learning_rate": 3.838888888888889e-05, "loss": 3.0261, "norm_diff": 0.0706, "norm_loss": 0.0, "num_token_doc": 66.714, "num_token_overlap": 17.9352, "num_token_query": 52.0176, "num_token_union": 73.3584, "num_word_context": 201.8313, "num_word_doc": 49.775, "num_word_query": 39.6725, "postclip_grad_norm": 1.0, "preclip_grad_norm": 453.0439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4001, "query_norm": 1.5313, "queue_k_norm": 1.4522, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0176, "sent_len_1": 66.714, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.8862, "stdk": 0.0479, "stdq": 0.0457, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9967, "doc_norm": 1.4512, "encoder_q-embeddings": 468.4765, "encoder_q-layer.0": 344.7563, "encoder_q-layer.1": 353.1888, "encoder_q-layer.10": 80.8789, "encoder_q-layer.11": 175.8225, "encoder_q-layer.2": 437.0034, "encoder_q-layer.3": 395.6164, "encoder_q-layer.4": 319.3244, "encoder_q-layer.5": 226.3306, "encoder_q-layer.6": 195.6138, "encoder_q-layer.7": 152.823, "encoder_q-layer.8": 100.2271, "encoder_q-layer.9": 80.0251, "epoch": 0.3, "inbatch_neg_score": 0.4148, "inbatch_pos_score": 1.0986, "learning_rate": 3.8333333333333334e-05, "loss": 2.9967, "norm_diff": 0.0811, "norm_loss": 0.0, "num_token_doc": 66.8921, "num_token_overlap": 18.0431, "num_token_query": 52.3071, "num_token_union": 73.5799, "num_word_context": 202.3547, "num_word_doc": 49.9076, "num_word_query": 39.8826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 454.9297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4126, "query_norm": 1.5322, "queue_k_norm": 1.4557, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3071, "sent_len_1": 66.8921, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.97, "stdk": 0.0475, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.9939, "doc_norm": 1.4641, "encoder_q-embeddings": 177.1897, "encoder_q-layer.0": 122.3074, "encoder_q-layer.1": 136.9441, "encoder_q-layer.10": 81.3782, "encoder_q-layer.11": 185.2208, "encoder_q-layer.2": 151.5333, "encoder_q-layer.3": 140.4644, "encoder_q-layer.4": 127.1028, "encoder_q-layer.5": 107.9483, "encoder_q-layer.6": 90.5175, "encoder_q-layer.7": 87.875, "encoder_q-layer.8": 89.5373, "encoder_q-layer.9": 80.7268, "epoch": 0.3, "inbatch_neg_score": 0.4218, "inbatch_pos_score": 1.1152, "learning_rate": 3.827777777777778e-05, "loss": 2.9939, "norm_diff": 0.0721, "norm_loss": 0.0, "num_token_doc": 66.8855, "num_token_overlap": 18.014, "num_token_query": 52.2389, "num_token_union": 73.5037, "num_word_context": 202.4983, "num_word_doc": 49.9551, "num_word_query": 39.8311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 192.6682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4204, "query_norm": 1.5361, "queue_k_norm": 1.4575, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2389, "sent_len_1": 66.8855, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.235, "stdk": 0.0479, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0223, "doc_norm": 1.4593, "encoder_q-embeddings": 157.6818, "encoder_q-layer.0": 112.3927, "encoder_q-layer.1": 128.1528, "encoder_q-layer.10": 91.5197, "encoder_q-layer.11": 180.5879, "encoder_q-layer.2": 135.8138, "encoder_q-layer.3": 150.8048, "encoder_q-layer.4": 144.4952, "encoder_q-layer.5": 124.9396, "encoder_q-layer.6": 116.6012, "encoder_q-layer.7": 108.9263, "encoder_q-layer.8": 97.7691, "encoder_q-layer.9": 78.3717, "epoch": 0.3, "inbatch_neg_score": 0.4176, "inbatch_pos_score": 1.1152, "learning_rate": 3.8222222222222226e-05, "loss": 3.0223, "norm_diff": 0.0571, "norm_loss": 0.0, "num_token_doc": 66.7965, "num_token_overlap": 18.0139, "num_token_query": 52.1232, "num_token_union": 73.3961, "num_word_context": 202.013, "num_word_doc": 49.8219, "num_word_query": 39.7385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 189.0788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4165, "query_norm": 1.5164, "queue_k_norm": 1.4603, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1232, "sent_len_1": 66.7965, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6312, "stdk": 0.0477, "stdq": 0.0458, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0221, "doc_norm": 1.4628, "encoder_q-embeddings": 450.6064, "encoder_q-layer.0": 315.8982, "encoder_q-layer.1": 335.9113, "encoder_q-layer.10": 89.1849, "encoder_q-layer.11": 190.0015, "encoder_q-layer.2": 348.8176, "encoder_q-layer.3": 311.6352, "encoder_q-layer.4": 199.5463, "encoder_q-layer.5": 150.4395, "encoder_q-layer.6": 126.4359, "encoder_q-layer.7": 110.5185, "encoder_q-layer.8": 95.5161, "encoder_q-layer.9": 82.3321, "epoch": 0.31, "inbatch_neg_score": 0.4175, "inbatch_pos_score": 1.0938, "learning_rate": 3.816666666666667e-05, "loss": 3.0221, "norm_diff": 0.058, "norm_loss": 0.0, "num_token_doc": 66.6629, "num_token_overlap": 18.0187, "num_token_query": 52.3139, "num_token_union": 73.4342, "num_word_context": 202.1094, "num_word_doc": 49.7201, "num_word_query": 39.8719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 399.764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4155, "query_norm": 1.5208, "queue_k_norm": 1.4591, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3139, "sent_len_1": 66.6629, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.66, "stdk": 0.0478, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0044, "doc_norm": 1.4596, "encoder_q-embeddings": 730.0692, "encoder_q-layer.0": 588.6527, "encoder_q-layer.1": 643.6683, "encoder_q-layer.10": 85.8789, "encoder_q-layer.11": 185.0947, "encoder_q-layer.2": 464.686, "encoder_q-layer.3": 421.9139, "encoder_q-layer.4": 422.8226, "encoder_q-layer.5": 415.6497, "encoder_q-layer.6": 297.9148, "encoder_q-layer.7": 254.2251, "encoder_q-layer.8": 174.3496, "encoder_q-layer.9": 96.3613, "epoch": 0.31, "inbatch_neg_score": 0.393, "inbatch_pos_score": 1.0889, "learning_rate": 3.811111111111112e-05, "loss": 3.0044, "norm_diff": 0.0432, "norm_loss": 0.0, "num_token_doc": 66.797, "num_token_overlap": 18.0298, "num_token_query": 52.2863, "num_token_union": 73.5226, "num_word_context": 202.5979, "num_word_doc": 49.8695, "num_word_query": 39.8702, "postclip_grad_norm": 1.0, "preclip_grad_norm": 661.2036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3933, "query_norm": 1.5028, "queue_k_norm": 1.4618, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2863, "sent_len_1": 66.797, "sent_len_max_0": 128.0, "sent_len_max_1": 187.48, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.9834, "doc_norm": 1.4671, "encoder_q-embeddings": 473.5431, "encoder_q-layer.0": 306.4335, "encoder_q-layer.1": 367.3633, "encoder_q-layer.10": 84.335, "encoder_q-layer.11": 188.0998, "encoder_q-layer.2": 438.4347, "encoder_q-layer.3": 422.5745, "encoder_q-layer.4": 372.595, "encoder_q-layer.5": 311.8112, "encoder_q-layer.6": 298.8407, "encoder_q-layer.7": 213.3572, "encoder_q-layer.8": 110.5183, "encoder_q-layer.9": 79.0163, "epoch": 0.31, "inbatch_neg_score": 0.4062, "inbatch_pos_score": 1.0977, "learning_rate": 3.805555555555555e-05, "loss": 2.9834, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.7174, "num_token_overlap": 18.0461, "num_token_query": 52.2455, "num_token_union": 73.3903, "num_word_context": 202.0176, "num_word_doc": 49.7426, "num_word_query": 39.8038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 479.6492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4062, "query_norm": 1.4845, "queue_k_norm": 1.464, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2455, "sent_len_1": 66.7174, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2562, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.0242, "doc_norm": 1.4557, "encoder_q-embeddings": 844.3028, "encoder_q-layer.0": 603.9957, "encoder_q-layer.1": 685.4575, "encoder_q-layer.10": 107.6997, "encoder_q-layer.11": 216.9734, "encoder_q-layer.2": 730.8598, "encoder_q-layer.3": 740.9164, "encoder_q-layer.4": 665.2999, "encoder_q-layer.5": 657.1767, "encoder_q-layer.6": 730.2812, "encoder_q-layer.7": 662.1406, "encoder_q-layer.8": 234.9124, "encoder_q-layer.9": 112.9708, "epoch": 0.31, "inbatch_neg_score": 0.4089, "inbatch_pos_score": 1.084, "learning_rate": 3.8e-05, "loss": 3.0242, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.8471, "num_token_overlap": 18.0416, "num_token_query": 52.2883, "num_token_union": 73.5113, "num_word_context": 202.666, "num_word_doc": 49.8867, "num_word_query": 39.8688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 934.3362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4072, "query_norm": 1.5083, "queue_k_norm": 1.4642, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2883, "sent_len_1": 66.8471, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.525, "stdk": 0.0474, "stdq": 0.0464, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 3.0024, "doc_norm": 1.4705, "encoder_q-embeddings": 524.9838, "encoder_q-layer.0": 447.2785, "encoder_q-layer.1": 500.678, "encoder_q-layer.10": 81.3522, "encoder_q-layer.11": 185.1731, "encoder_q-layer.2": 502.3081, "encoder_q-layer.3": 481.5711, "encoder_q-layer.4": 405.9745, "encoder_q-layer.5": 375.119, "encoder_q-layer.6": 347.0463, "encoder_q-layer.7": 274.0279, "encoder_q-layer.8": 149.0075, "encoder_q-layer.9": 75.0208, "epoch": 0.31, "inbatch_neg_score": 0.3958, "inbatch_pos_score": 1.1162, "learning_rate": 3.7944444444444444e-05, "loss": 3.0024, "norm_diff": 0.0501, "norm_loss": 0.0, "num_token_doc": 66.7709, "num_token_overlap": 17.9863, "num_token_query": 52.0937, "num_token_union": 73.3945, "num_word_context": 202.102, "num_word_doc": 49.8147, "num_word_query": 39.7333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 559.3427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3965, "query_norm": 1.5206, "queue_k_norm": 1.463, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0937, "sent_len_1": 66.7709, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7562, "stdk": 0.048, "stdq": 0.0472, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0289, "doc_norm": 1.4665, "encoder_q-embeddings": 2648.2485, "encoder_q-layer.0": 2099.4705, "encoder_q-layer.1": 2347.1599, "encoder_q-layer.10": 358.8823, "encoder_q-layer.11": 206.9211, "encoder_q-layer.2": 2746.4861, "encoder_q-layer.3": 3255.3567, "encoder_q-layer.4": 3483.8853, "encoder_q-layer.5": 4147.6958, "encoder_q-layer.6": 6148.9199, "encoder_q-layer.7": 8351.7275, "encoder_q-layer.8": 13312.085, "encoder_q-layer.9": 6655.8433, "epoch": 0.31, "inbatch_neg_score": 0.3963, "inbatch_pos_score": 1.1055, "learning_rate": 3.7888888888888894e-05, "loss": 3.0289, "norm_diff": 0.048, "norm_loss": 0.0, "num_token_doc": 66.5727, "num_token_overlap": 17.9844, "num_token_query": 52.0672, "num_token_union": 73.2386, "num_word_context": 201.9846, "num_word_doc": 49.6883, "num_word_query": 39.6787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9940.5103, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3953, "query_norm": 1.5145, "queue_k_norm": 1.4628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0672, "sent_len_1": 66.5727, "sent_len_max_0": 128.0, "sent_len_max_1": 190.18, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0092, "doc_norm": 1.4664, "encoder_q-embeddings": 3836.6074, "encoder_q-layer.0": 2625.0403, "encoder_q-layer.1": 1922.8541, "encoder_q-layer.10": 80.8918, "encoder_q-layer.11": 182.3872, "encoder_q-layer.2": 818.9792, "encoder_q-layer.3": 459.556, "encoder_q-layer.4": 379.7212, "encoder_q-layer.5": 315.6723, "encoder_q-layer.6": 332.3037, "encoder_q-layer.7": 251.7722, "encoder_q-layer.8": 143.2636, "encoder_q-layer.9": 85.1863, "epoch": 0.31, "inbatch_neg_score": 0.3884, "inbatch_pos_score": 1.0762, "learning_rate": 3.7833333333333336e-05, "loss": 3.0092, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.9271, "num_token_overlap": 18.0405, "num_token_query": 52.3132, "num_token_union": 73.6042, "num_word_context": 202.2792, "num_word_doc": 49.9596, "num_word_query": 39.8959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2579.0681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3889, "query_norm": 1.4849, "queue_k_norm": 1.4634, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3132, "sent_len_1": 66.9271, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2488, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0038, "doc_norm": 1.4582, "encoder_q-embeddings": 224.0471, "encoder_q-layer.0": 155.2036, "encoder_q-layer.1": 171.7951, "encoder_q-layer.10": 79.572, "encoder_q-layer.11": 184.1731, "encoder_q-layer.2": 176.6632, "encoder_q-layer.3": 189.1149, "encoder_q-layer.4": 188.2414, "encoder_q-layer.5": 183.0708, "encoder_q-layer.6": 176.6486, "encoder_q-layer.7": 132.1867, "encoder_q-layer.8": 95.5586, "encoder_q-layer.9": 80.0635, "epoch": 0.31, "inbatch_neg_score": 0.3816, "inbatch_pos_score": 1.0479, "learning_rate": 3.777777777777778e-05, "loss": 3.0038, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.7608, "num_token_overlap": 18.0034, "num_token_query": 52.2636, "num_token_union": 73.4952, "num_word_context": 202.2749, "num_word_doc": 49.824, "num_word_query": 39.8248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 246.094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3818, "query_norm": 1.4817, "queue_k_norm": 1.4633, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2636, "sent_len_1": 66.7608, "sent_len_max_0": 127.995, "sent_len_max_1": 189.125, "stdk": 0.0475, "stdq": 0.0464, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0209, "doc_norm": 1.4612, "encoder_q-embeddings": 161.0378, "encoder_q-layer.0": 105.744, "encoder_q-layer.1": 122.8582, "encoder_q-layer.10": 82.0008, "encoder_q-layer.11": 185.1866, "encoder_q-layer.2": 133.8008, "encoder_q-layer.3": 149.1845, "encoder_q-layer.4": 173.3373, "encoder_q-layer.5": 142.5342, "encoder_q-layer.6": 139.3254, "encoder_q-layer.7": 112.4401, "encoder_q-layer.8": 101.7787, "encoder_q-layer.9": 80.6088, "epoch": 0.31, "inbatch_neg_score": 0.3765, "inbatch_pos_score": 1.0537, "learning_rate": 3.772222222222223e-05, "loss": 3.0209, "norm_diff": 0.0133, "norm_loss": 0.0, "num_token_doc": 66.9291, "num_token_overlap": 17.981, "num_token_query": 52.2966, "num_token_union": 73.616, "num_word_context": 202.5759, "num_word_doc": 49.9202, "num_word_query": 39.868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 199.7592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.4556, "queue_k_norm": 1.4621, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2966, "sent_len_1": 66.9291, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4013, "stdk": 0.0476, "stdq": 0.0452, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0044, "doc_norm": 1.4659, "encoder_q-embeddings": 581.3364, "encoder_q-layer.0": 636.6785, "encoder_q-layer.1": 566.0306, "encoder_q-layer.10": 82.7282, "encoder_q-layer.11": 180.7117, "encoder_q-layer.2": 400.7325, "encoder_q-layer.3": 326.3587, "encoder_q-layer.4": 284.7906, "encoder_q-layer.5": 246.4641, "encoder_q-layer.6": 253.7555, "encoder_q-layer.7": 198.3905, "encoder_q-layer.8": 123.379, "encoder_q-layer.9": 82.1197, "epoch": 0.31, "inbatch_neg_score": 0.3844, "inbatch_pos_score": 1.0811, "learning_rate": 3.766666666666667e-05, "loss": 3.0044, "norm_diff": 0.01, "norm_loss": 0.0, "num_token_doc": 66.9743, "num_token_overlap": 18.0707, "num_token_query": 52.3315, "num_token_union": 73.5579, "num_word_context": 202.4984, "num_word_doc": 49.9894, "num_word_query": 39.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 587.8807, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3826, "query_norm": 1.4726, "queue_k_norm": 1.4637, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3315, "sent_len_1": 66.9743, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.3137, "stdk": 0.0478, "stdq": 0.0461, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0248, "doc_norm": 1.4653, "encoder_q-embeddings": 247.9382, "encoder_q-layer.0": 177.4764, "encoder_q-layer.1": 219.3304, "encoder_q-layer.10": 98.8557, "encoder_q-layer.11": 201.0797, "encoder_q-layer.2": 148.3068, "encoder_q-layer.3": 120.3019, "encoder_q-layer.4": 111.9353, "encoder_q-layer.5": 103.8392, "encoder_q-layer.6": 114.481, "encoder_q-layer.7": 114.9775, "encoder_q-layer.8": 108.7313, "encoder_q-layer.9": 85.9613, "epoch": 0.32, "inbatch_neg_score": 0.3861, "inbatch_pos_score": 1.082, "learning_rate": 3.761111111111111e-05, "loss": 3.0248, "norm_diff": 0.0388, "norm_loss": 0.0, "num_token_doc": 66.6487, "num_token_overlap": 17.982, "num_token_query": 52.3455, "num_token_union": 73.4666, "num_word_context": 202.3485, "num_word_doc": 49.7229, "num_word_query": 39.9171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 238.8496, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3865, "query_norm": 1.5041, "queue_k_norm": 1.4611, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3455, "sent_len_1": 66.6487, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8887, "stdk": 0.0478, "stdq": 0.047, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.04, "doc_norm": 1.4651, "encoder_q-embeddings": 2515.4719, "encoder_q-layer.0": 1864.5707, "encoder_q-layer.1": 2054.9275, "encoder_q-layer.10": 165.6584, "encoder_q-layer.11": 365.1055, "encoder_q-layer.2": 1814.7097, "encoder_q-layer.3": 1974.6486, "encoder_q-layer.4": 2136.7202, "encoder_q-layer.5": 2189.7739, "encoder_q-layer.6": 1995.0669, "encoder_q-layer.7": 2051.7964, "encoder_q-layer.8": 957.3015, "encoder_q-layer.9": 241.5438, "epoch": 0.32, "inbatch_neg_score": 0.3853, "inbatch_pos_score": 1.0361, "learning_rate": 3.7555555555555554e-05, "loss": 3.04, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.6116, "num_token_overlap": 17.9787, "num_token_query": 52.2426, "num_token_union": 73.411, "num_word_context": 202.1176, "num_word_doc": 49.7255, "num_word_query": 39.8342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2667.0886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3857, "query_norm": 1.4482, "queue_k_norm": 1.4631, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2426, "sent_len_1": 66.6116, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6213, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0156, "doc_norm": 1.4571, "encoder_q-embeddings": 530.1161, "encoder_q-layer.0": 395.7478, "encoder_q-layer.1": 428.0633, "encoder_q-layer.10": 195.2848, "encoder_q-layer.11": 411.6144, "encoder_q-layer.2": 436.6374, "encoder_q-layer.3": 416.9798, "encoder_q-layer.4": 372.3349, "encoder_q-layer.5": 307.1583, "encoder_q-layer.6": 300.8622, "encoder_q-layer.7": 282.8932, "encoder_q-layer.8": 252.2427, "encoder_q-layer.9": 199.053, "epoch": 0.32, "inbatch_neg_score": 0.3901, "inbatch_pos_score": 1.0615, "learning_rate": 3.7500000000000003e-05, "loss": 3.0156, "norm_diff": 0.0273, "norm_loss": 0.0, "num_token_doc": 66.7915, "num_token_overlap": 18.0307, "num_token_query": 52.3348, "num_token_union": 73.5183, "num_word_context": 202.6076, "num_word_doc": 49.8415, "num_word_query": 39.9054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 548.0373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3904, "query_norm": 1.4825, "queue_k_norm": 1.4628, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3348, "sent_len_1": 66.7915, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9825, "stdk": 0.0475, "stdq": 0.0458, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0101, "doc_norm": 1.4598, "encoder_q-embeddings": 2704.1467, "encoder_q-layer.0": 1604.7501, "encoder_q-layer.1": 1494.4015, "encoder_q-layer.10": 173.465, "encoder_q-layer.11": 384.9439, "encoder_q-layer.2": 1547.9747, "encoder_q-layer.3": 1409.0112, "encoder_q-layer.4": 1193.1288, "encoder_q-layer.5": 970.6948, "encoder_q-layer.6": 1005.6869, "encoder_q-layer.7": 1035.0247, "encoder_q-layer.8": 483.0142, "encoder_q-layer.9": 217.7652, "epoch": 0.32, "inbatch_neg_score": 0.3763, "inbatch_pos_score": 1.0537, "learning_rate": 3.7444444444444446e-05, "loss": 3.0101, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.7109, "num_token_overlap": 18.0164, "num_token_query": 52.2577, "num_token_union": 73.4407, "num_word_context": 202.1352, "num_word_doc": 49.7295, "num_word_query": 39.8268, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2127.7193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3755, "query_norm": 1.4488, "queue_k_norm": 1.4625, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2577, "sent_len_1": 66.7109, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9575, "stdk": 0.0476, "stdq": 0.0452, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.0292, "doc_norm": 1.4616, "encoder_q-embeddings": 292.8881, "encoder_q-layer.0": 229.1294, "encoder_q-layer.1": 247.8742, "encoder_q-layer.10": 40.4771, "encoder_q-layer.11": 95.8201, "encoder_q-layer.2": 218.5431, "encoder_q-layer.3": 212.6879, "encoder_q-layer.4": 219.0625, "encoder_q-layer.5": 245.4765, "encoder_q-layer.6": 198.9176, "encoder_q-layer.7": 131.8443, "encoder_q-layer.8": 56.3827, "encoder_q-layer.9": 41.9167, "epoch": 0.32, "inbatch_neg_score": 0.3769, "inbatch_pos_score": 1.0625, "learning_rate": 3.738888888888889e-05, "loss": 3.0292, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.9032, "num_token_overlap": 18.0387, "num_token_query": 52.1116, "num_token_union": 73.4455, "num_word_context": 202.3215, "num_word_doc": 49.9723, "num_word_query": 39.7099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 291.0934, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.4575, "queue_k_norm": 1.4619, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1116, "sent_len_1": 66.9032, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2937, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0215, "doc_norm": 1.461, "encoder_q-embeddings": 240.5204, "encoder_q-layer.0": 170.7858, "encoder_q-layer.1": 183.3489, "encoder_q-layer.10": 42.6297, "encoder_q-layer.11": 94.6343, "encoder_q-layer.2": 193.1744, "encoder_q-layer.3": 209.2316, "encoder_q-layer.4": 197.1889, "encoder_q-layer.5": 189.1575, "encoder_q-layer.6": 184.8047, "encoder_q-layer.7": 149.5309, "encoder_q-layer.8": 83.2552, "encoder_q-layer.9": 46.3116, "epoch": 0.32, "inbatch_neg_score": 0.3748, "inbatch_pos_score": 1.041, "learning_rate": 3.733333333333334e-05, "loss": 3.0215, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.9781, "num_token_overlap": 18.0204, "num_token_query": 52.2498, "num_token_union": 73.5993, "num_word_context": 202.4927, "num_word_doc": 49.9693, "num_word_query": 39.8364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 248.408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3738, "query_norm": 1.4526, "queue_k_norm": 1.4598, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2498, "sent_len_1": 66.9781, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5387, "stdk": 0.0476, "stdq": 0.0453, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0278, "doc_norm": 1.4652, "encoder_q-embeddings": 404.3059, "encoder_q-layer.0": 292.201, "encoder_q-layer.1": 373.1799, "encoder_q-layer.10": 48.5279, "encoder_q-layer.11": 98.9694, "encoder_q-layer.2": 425.8871, "encoder_q-layer.3": 447.295, "encoder_q-layer.4": 468.9087, "encoder_q-layer.5": 407.9379, "encoder_q-layer.6": 322.3299, "encoder_q-layer.7": 232.9991, "encoder_q-layer.8": 150.4319, "encoder_q-layer.9": 91.044, "epoch": 0.32, "inbatch_neg_score": 0.3692, "inbatch_pos_score": 1.0459, "learning_rate": 3.727777777777778e-05, "loss": 3.0278, "norm_diff": 0.0092, "norm_loss": 0.0, "num_token_doc": 66.8055, "num_token_overlap": 18.0168, "num_token_query": 52.3785, "num_token_union": 73.5374, "num_word_context": 202.3521, "num_word_doc": 49.8511, "num_word_query": 39.9578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 480.7676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3701, "query_norm": 1.4721, "queue_k_norm": 1.4613, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3785, "sent_len_1": 66.8055, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8975, "stdk": 0.0479, "stdq": 0.0461, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0168, "doc_norm": 1.4528, "encoder_q-embeddings": 52.3637, "encoder_q-layer.0": 35.1019, "encoder_q-layer.1": 37.0749, "encoder_q-layer.10": 47.5393, "encoder_q-layer.11": 92.6653, "encoder_q-layer.2": 39.3776, "encoder_q-layer.3": 38.5085, "encoder_q-layer.4": 38.4617, "encoder_q-layer.5": 36.1243, "encoder_q-layer.6": 38.5487, "encoder_q-layer.7": 40.7756, "encoder_q-layer.8": 44.0793, "encoder_q-layer.9": 39.2695, "epoch": 0.32, "inbatch_neg_score": 0.3611, "inbatch_pos_score": 1.0352, "learning_rate": 3.722222222222222e-05, "loss": 3.0168, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.6911, "num_token_overlap": 17.9888, "num_token_query": 51.9838, "num_token_union": 73.2586, "num_word_context": 201.9818, "num_word_doc": 49.7516, "num_word_query": 39.6379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 71.4527, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3618, "query_norm": 1.4497, "queue_k_norm": 1.4598, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 51.9838, "sent_len_1": 66.6911, "sent_len_max_0": 127.9775, "sent_len_max_1": 188.415, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0082, "doc_norm": 1.4592, "encoder_q-embeddings": 250.6536, "encoder_q-layer.0": 168.9919, "encoder_q-layer.1": 194.522, "encoder_q-layer.10": 45.4172, "encoder_q-layer.11": 96.0689, "encoder_q-layer.2": 208.0197, "encoder_q-layer.3": 194.1681, "encoder_q-layer.4": 185.9624, "encoder_q-layer.5": 176.1875, "encoder_q-layer.6": 183.4383, "encoder_q-layer.7": 158.4377, "encoder_q-layer.8": 118.3569, "encoder_q-layer.9": 63.7084, "epoch": 0.32, "inbatch_neg_score": 0.3512, "inbatch_pos_score": 1.0312, "learning_rate": 3.7166666666666664e-05, "loss": 3.0082, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 18.0224, "num_token_query": 52.3096, "num_token_union": 73.5788, "num_word_context": 202.0857, "num_word_doc": 49.9193, "num_word_query": 39.8826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 256.5919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3516, "query_norm": 1.4742, "queue_k_norm": 1.4602, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3096, "sent_len_1": 66.8871, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.2837, "stdk": 0.0477, "stdq": 0.0464, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0315, "doc_norm": 1.4609, "encoder_q-embeddings": 1459.667, "encoder_q-layer.0": 984.3803, "encoder_q-layer.1": 1138.2943, "encoder_q-layer.10": 42.1429, "encoder_q-layer.11": 97.5855, "encoder_q-layer.2": 1274.6615, "encoder_q-layer.3": 1294.1904, "encoder_q-layer.4": 1130.4247, "encoder_q-layer.5": 1048.6335, "encoder_q-layer.6": 740.4664, "encoder_q-layer.7": 552.2416, "encoder_q-layer.8": 216.8569, "encoder_q-layer.9": 57.038, "epoch": 0.32, "inbatch_neg_score": 0.3596, "inbatch_pos_score": 1.0215, "learning_rate": 3.7111111111111113e-05, "loss": 3.0315, "norm_diff": 0.013, "norm_loss": 0.0, "num_token_doc": 66.8289, "num_token_overlap": 17.9526, "num_token_query": 52.1768, "num_token_union": 73.5742, "num_word_context": 202.5354, "num_word_doc": 49.8655, "num_word_query": 39.7586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1433.5056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3596, "query_norm": 1.4664, "queue_k_norm": 1.4588, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1768, "sent_len_1": 66.8289, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.5838, "stdk": 0.0478, "stdq": 0.0458, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0209, "doc_norm": 1.4574, "encoder_q-embeddings": 290.5339, "encoder_q-layer.0": 214.208, "encoder_q-layer.1": 251.416, "encoder_q-layer.10": 40.714, "encoder_q-layer.11": 85.2125, "encoder_q-layer.2": 275.144, "encoder_q-layer.3": 282.922, "encoder_q-layer.4": 268.3944, "encoder_q-layer.5": 265.5155, "encoder_q-layer.6": 223.2631, "encoder_q-layer.7": 179.7106, "encoder_q-layer.8": 75.4022, "encoder_q-layer.9": 46.1598, "epoch": 0.33, "inbatch_neg_score": 0.3396, "inbatch_pos_score": 1.0156, "learning_rate": 3.705555555555556e-05, "loss": 3.0209, "norm_diff": 0.0129, "norm_loss": 0.0, "num_token_doc": 66.9683, "num_token_overlap": 18.1024, "num_token_query": 52.3432, "num_token_union": 73.5196, "num_word_context": 202.6249, "num_word_doc": 49.9725, "num_word_query": 39.9404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 326.5801, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3389, "query_norm": 1.4607, "queue_k_norm": 1.4589, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3432, "sent_len_1": 66.9683, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1662, "stdk": 0.0476, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0042, "doc_norm": 1.4572, "encoder_q-embeddings": 87.4971, "encoder_q-layer.0": 63.6727, "encoder_q-layer.1": 72.1763, "encoder_q-layer.10": 42.3253, "encoder_q-layer.11": 91.8315, "encoder_q-layer.2": 85.4048, "encoder_q-layer.3": 88.2145, "encoder_q-layer.4": 81.055, "encoder_q-layer.5": 78.9958, "encoder_q-layer.6": 67.6187, "encoder_q-layer.7": 66.8853, "encoder_q-layer.8": 54.254, "encoder_q-layer.9": 43.1956, "epoch": 0.33, "inbatch_neg_score": 0.333, "inbatch_pos_score": 1.0215, "learning_rate": 3.7e-05, "loss": 3.0042, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.9503, "num_token_overlap": 18.095, "num_token_query": 52.4931, "num_token_union": 73.6572, "num_word_context": 202.8193, "num_word_doc": 49.9819, "num_word_query": 40.0117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 109.0806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3318, "query_norm": 1.4864, "queue_k_norm": 1.4603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4931, "sent_len_1": 66.9503, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5412, "stdk": 0.0477, "stdq": 0.0469, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0191, "doc_norm": 1.4593, "encoder_q-embeddings": 190.6108, "encoder_q-layer.0": 127.4869, "encoder_q-layer.1": 139.0575, "encoder_q-layer.10": 48.3772, "encoder_q-layer.11": 94.5952, "encoder_q-layer.2": 147.7219, "encoder_q-layer.3": 153.3204, "encoder_q-layer.4": 153.5738, "encoder_q-layer.5": 146.8602, "encoder_q-layer.6": 132.5237, "encoder_q-layer.7": 103.0442, "encoder_q-layer.8": 63.358, "encoder_q-layer.9": 44.526, "epoch": 0.33, "inbatch_neg_score": 0.3293, "inbatch_pos_score": 0.9946, "learning_rate": 3.694444444444445e-05, "loss": 3.0191, "norm_diff": 0.017, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 18.0268, "num_token_query": 52.4295, "num_token_union": 73.5889, "num_word_context": 202.3991, "num_word_doc": 49.8509, "num_word_query": 39.9609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 195.0127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3306, "query_norm": 1.4469, "queue_k_norm": 1.4585, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4295, "sent_len_1": 66.8218, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0813, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0094, "doc_norm": 1.4541, "encoder_q-embeddings": 125.7333, "encoder_q-layer.0": 92.0187, "encoder_q-layer.1": 107.6197, "encoder_q-layer.10": 47.6482, "encoder_q-layer.11": 98.3883, "encoder_q-layer.2": 113.3721, "encoder_q-layer.3": 102.5495, "encoder_q-layer.4": 108.7861, "encoder_q-layer.5": 96.0326, "encoder_q-layer.6": 101.2371, "encoder_q-layer.7": 86.5853, "encoder_q-layer.8": 99.3478, "encoder_q-layer.9": 62.9971, "epoch": 0.33, "inbatch_neg_score": 0.3529, "inbatch_pos_score": 1.0293, "learning_rate": 3.688888888888889e-05, "loss": 3.0094, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.8094, "num_token_overlap": 18.0532, "num_token_query": 52.4613, "num_token_union": 73.6216, "num_word_context": 202.6412, "num_word_doc": 49.8471, "num_word_query": 39.9983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 149.6521, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3518, "query_norm": 1.4914, "queue_k_norm": 1.4571, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4613, "sent_len_1": 66.8094, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7063, "stdk": 0.0476, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0048, "doc_norm": 1.4572, "encoder_q-embeddings": 105.1378, "encoder_q-layer.0": 69.2403, "encoder_q-layer.1": 77.6571, "encoder_q-layer.10": 37.4272, "encoder_q-layer.11": 85.4135, "encoder_q-layer.2": 84.1811, "encoder_q-layer.3": 81.0145, "encoder_q-layer.4": 77.4989, "encoder_q-layer.5": 74.9842, "encoder_q-layer.6": 68.5869, "encoder_q-layer.7": 63.2221, "encoder_q-layer.8": 74.5356, "encoder_q-layer.9": 50.3569, "epoch": 0.33, "inbatch_neg_score": 0.324, "inbatch_pos_score": 1.0166, "learning_rate": 3.683333333333334e-05, "loss": 3.0048, "norm_diff": 0.0155, "norm_loss": 0.0, "num_token_doc": 66.4582, "num_token_overlap": 17.9314, "num_token_query": 52.2562, "num_token_union": 73.3429, "num_word_context": 202.3797, "num_word_doc": 49.5958, "num_word_query": 39.8406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 115.0526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3245, "query_norm": 1.463, "queue_k_norm": 1.4558, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2562, "sent_len_1": 66.4582, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9087, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.016, "doc_norm": 1.4568, "encoder_q-embeddings": 289.1631, "encoder_q-layer.0": 287.4254, "encoder_q-layer.1": 393.8685, "encoder_q-layer.10": 48.3281, "encoder_q-layer.11": 100.4361, "encoder_q-layer.2": 568.361, "encoder_q-layer.3": 524.4071, "encoder_q-layer.4": 455.5882, "encoder_q-layer.5": 316.4457, "encoder_q-layer.6": 331.0132, "encoder_q-layer.7": 284.742, "encoder_q-layer.8": 205.3, "encoder_q-layer.9": 89.2564, "epoch": 0.33, "inbatch_neg_score": 0.3532, "inbatch_pos_score": 1.0518, "learning_rate": 3.677777777777778e-05, "loss": 3.016, "norm_diff": 0.0087, "norm_loss": 0.0, "num_token_doc": 66.532, "num_token_overlap": 17.9603, "num_token_query": 52.2792, "num_token_union": 73.3675, "num_word_context": 201.7585, "num_word_doc": 49.6401, "num_word_query": 39.8358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 551.4711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3528, "query_norm": 1.4614, "queue_k_norm": 1.4568, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2792, "sent_len_1": 66.532, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.1925, "stdk": 0.0477, "stdq": 0.0459, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0107, "doc_norm": 1.4622, "encoder_q-embeddings": 126.6134, "encoder_q-layer.0": 87.4813, "encoder_q-layer.1": 113.2476, "encoder_q-layer.10": 45.4471, "encoder_q-layer.11": 94.9409, "encoder_q-layer.2": 99.1392, "encoder_q-layer.3": 100.5503, "encoder_q-layer.4": 99.789, "encoder_q-layer.5": 84.5726, "encoder_q-layer.6": 77.9362, "encoder_q-layer.7": 58.9424, "encoder_q-layer.8": 54.0851, "encoder_q-layer.9": 42.5493, "epoch": 0.33, "inbatch_neg_score": 0.3331, "inbatch_pos_score": 1.04, "learning_rate": 3.672222222222222e-05, "loss": 3.0107, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.7191, "num_token_overlap": 17.9744, "num_token_query": 52.1069, "num_token_union": 73.3784, "num_word_context": 201.9931, "num_word_doc": 49.7416, "num_word_query": 39.7402, "postclip_grad_norm": 1.0, "preclip_grad_norm": 132.5207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3342, "query_norm": 1.4931, "queue_k_norm": 1.4559, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1069, "sent_len_1": 66.7191, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.51, "stdk": 0.048, "stdq": 0.0474, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0249, "doc_norm": 1.4541, "encoder_q-embeddings": 754.9185, "encoder_q-layer.0": 566.5757, "encoder_q-layer.1": 637.7765, "encoder_q-layer.10": 46.3773, "encoder_q-layer.11": 88.434, "encoder_q-layer.2": 707.5775, "encoder_q-layer.3": 780.8228, "encoder_q-layer.4": 837.7979, "encoder_q-layer.5": 880.3146, "encoder_q-layer.6": 701.564, "encoder_q-layer.7": 632.191, "encoder_q-layer.8": 460.7771, "encoder_q-layer.9": 190.3806, "epoch": 0.33, "inbatch_neg_score": 0.3246, "inbatch_pos_score": 1.0088, "learning_rate": 3.6666666666666666e-05, "loss": 3.0249, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 66.5777, "num_token_overlap": 17.969, "num_token_query": 52.3079, "num_token_union": 73.4047, "num_word_context": 202.1515, "num_word_doc": 49.6906, "num_word_query": 39.8768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 934.5464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.4613, "queue_k_norm": 1.4563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3079, "sent_len_1": 66.5777, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3575, "stdk": 0.0477, "stdq": 0.0462, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0269, "doc_norm": 1.4554, "encoder_q-embeddings": 168.1032, "encoder_q-layer.0": 116.5116, "encoder_q-layer.1": 129.5496, "encoder_q-layer.10": 42.0142, "encoder_q-layer.11": 95.8313, "encoder_q-layer.2": 145.8653, "encoder_q-layer.3": 147.1888, "encoder_q-layer.4": 152.7141, "encoder_q-layer.5": 119.0325, "encoder_q-layer.6": 109.3698, "encoder_q-layer.7": 112.4533, "encoder_q-layer.8": 111.6138, "encoder_q-layer.9": 65.7988, "epoch": 0.33, "inbatch_neg_score": 0.3395, "inbatch_pos_score": 1.0059, "learning_rate": 3.6611111111111115e-05, "loss": 3.0269, "norm_diff": 0.0109, "norm_loss": 0.0, "num_token_doc": 66.7682, "num_token_overlap": 17.9621, "num_token_query": 52.0428, "num_token_union": 73.3327, "num_word_context": 201.9925, "num_word_doc": 49.7962, "num_word_query": 39.6737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 183.7359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3394, "query_norm": 1.4592, "queue_k_norm": 1.4532, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0428, "sent_len_1": 66.7682, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.74, "stdk": 0.0478, "stdq": 0.046, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0261, "doc_norm": 1.4528, "encoder_q-embeddings": 127.4238, "encoder_q-layer.0": 94.8571, "encoder_q-layer.1": 109.6179, "encoder_q-layer.10": 43.5975, "encoder_q-layer.11": 91.5311, "encoder_q-layer.2": 129.6627, "encoder_q-layer.3": 164.3157, "encoder_q-layer.4": 136.4566, "encoder_q-layer.5": 101.5364, "encoder_q-layer.6": 116.3779, "encoder_q-layer.7": 89.1263, "encoder_q-layer.8": 68.1479, "encoder_q-layer.9": 44.0662, "epoch": 0.33, "inbatch_neg_score": 0.3384, "inbatch_pos_score": 1.0283, "learning_rate": 3.655555555555556e-05, "loss": 3.0261, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.559, "num_token_overlap": 17.9416, "num_token_query": 52.1379, "num_token_union": 73.333, "num_word_context": 202.1884, "num_word_doc": 49.6781, "num_word_query": 39.7404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 158.3958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3396, "query_norm": 1.4512, "queue_k_norm": 1.4511, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1379, "sent_len_1": 66.559, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.9575, "stdk": 0.0477, "stdq": 0.0459, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0089, "doc_norm": 1.4495, "encoder_q-embeddings": 101.3746, "encoder_q-layer.0": 70.5856, "encoder_q-layer.1": 86.2999, "encoder_q-layer.10": 38.9223, "encoder_q-layer.11": 89.0827, "encoder_q-layer.2": 91.6542, "encoder_q-layer.3": 94.8072, "encoder_q-layer.4": 80.4881, "encoder_q-layer.5": 65.7294, "encoder_q-layer.6": 60.4924, "encoder_q-layer.7": 56.3116, "encoder_q-layer.8": 48.968, "encoder_q-layer.9": 39.1479, "epoch": 0.33, "inbatch_neg_score": 0.3352, "inbatch_pos_score": 1.0137, "learning_rate": 3.65e-05, "loss": 3.0089, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.6178, "num_token_overlap": 18.0236, "num_token_query": 52.2364, "num_token_union": 73.3287, "num_word_context": 202.0099, "num_word_doc": 49.7195, "num_word_query": 39.8237, "postclip_grad_norm": 1.0, "preclip_grad_norm": 113.8164, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3345, "query_norm": 1.439, "queue_k_norm": 1.4506, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2364, "sent_len_1": 66.6178, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0575, "stdk": 0.0476, "stdq": 0.0454, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0088, "doc_norm": 1.4552, "encoder_q-embeddings": 144.7133, "encoder_q-layer.0": 105.1073, "encoder_q-layer.1": 121.3157, "encoder_q-layer.10": 44.8648, "encoder_q-layer.11": 95.3501, "encoder_q-layer.2": 122.037, "encoder_q-layer.3": 106.7322, "encoder_q-layer.4": 97.1531, "encoder_q-layer.5": 83.435, "encoder_q-layer.6": 90.5613, "encoder_q-layer.7": 64.812, "encoder_q-layer.8": 60.5048, "encoder_q-layer.9": 47.3696, "epoch": 0.34, "inbatch_neg_score": 0.3387, "inbatch_pos_score": 1.041, "learning_rate": 3.644444444444445e-05, "loss": 3.0088, "norm_diff": 0.0139, "norm_loss": 0.0, "num_token_doc": 67.0042, "num_token_overlap": 18.0318, "num_token_query": 52.2223, "num_token_union": 73.5819, "num_word_context": 202.2951, "num_word_doc": 49.9795, "num_word_query": 39.7885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 148.7255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3389, "query_norm": 1.4688, "queue_k_norm": 1.4518, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2223, "sent_len_1": 67.0042, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4288, "stdk": 0.0479, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 2.9862, "doc_norm": 1.4481, "encoder_q-embeddings": 97.0176, "encoder_q-layer.0": 71.7312, "encoder_q-layer.1": 80.9976, "encoder_q-layer.10": 39.5578, "encoder_q-layer.11": 98.1335, "encoder_q-layer.2": 86.3906, "encoder_q-layer.3": 79.0468, "encoder_q-layer.4": 78.9513, "encoder_q-layer.5": 62.2223, "encoder_q-layer.6": 61.8547, "encoder_q-layer.7": 57.4311, "encoder_q-layer.8": 56.8333, "encoder_q-layer.9": 46.048, "epoch": 0.34, "inbatch_neg_score": 0.3568, "inbatch_pos_score": 1.0303, "learning_rate": 3.638888888888889e-05, "loss": 2.9862, "norm_diff": 0.0302, "norm_loss": 0.0, "num_token_doc": 66.9616, "num_token_overlap": 18.0522, "num_token_query": 52.3379, "num_token_union": 73.5642, "num_word_context": 202.4684, "num_word_doc": 49.9516, "num_word_query": 39.8915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 112.4477, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.4783, "queue_k_norm": 1.452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3379, "sent_len_1": 66.9616, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8663, "stdk": 0.0476, "stdq": 0.0462, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0351, "doc_norm": 1.4531, "encoder_q-embeddings": 118.3577, "encoder_q-layer.0": 77.8375, "encoder_q-layer.1": 86.6954, "encoder_q-layer.10": 41.6345, "encoder_q-layer.11": 97.6092, "encoder_q-layer.2": 108.1833, "encoder_q-layer.3": 102.2607, "encoder_q-layer.4": 97.8432, "encoder_q-layer.5": 80.4343, "encoder_q-layer.6": 84.6862, "encoder_q-layer.7": 74.9377, "encoder_q-layer.8": 62.9673, "encoder_q-layer.9": 43.7926, "epoch": 0.34, "inbatch_neg_score": 0.3549, "inbatch_pos_score": 1.0146, "learning_rate": 3.633333333333333e-05, "loss": 3.0351, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.881, "num_token_overlap": 17.9862, "num_token_query": 52.2125, "num_token_union": 73.5595, "num_word_context": 202.5548, "num_word_doc": 49.8715, "num_word_query": 39.7951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 130.7597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3525, "query_norm": 1.4442, "queue_k_norm": 1.4526, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2125, "sent_len_1": 66.881, "sent_len_max_0": 127.99, "sent_len_max_1": 190.61, "stdk": 0.0477, "stdq": 0.0452, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.9991, "doc_norm": 1.4537, "encoder_q-embeddings": 285.7155, "encoder_q-layer.0": 204.2924, "encoder_q-layer.1": 211.2467, "encoder_q-layer.10": 88.1596, "encoder_q-layer.11": 172.5243, "encoder_q-layer.2": 204.9943, "encoder_q-layer.3": 213.0967, "encoder_q-layer.4": 194.5406, "encoder_q-layer.5": 206.4916, "encoder_q-layer.6": 174.0358, "encoder_q-layer.7": 138.8287, "encoder_q-layer.8": 133.2968, "encoder_q-layer.9": 98.9607, "epoch": 0.34, "inbatch_neg_score": 0.3394, "inbatch_pos_score": 1.0488, "learning_rate": 3.6277777777777776e-05, "loss": 2.9991, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.4929, "num_token_overlap": 18.0269, "num_token_query": 52.2288, "num_token_union": 73.2379, "num_word_context": 201.8215, "num_word_doc": 49.6153, "num_word_query": 39.8332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 285.4059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3394, "query_norm": 1.4899, "queue_k_norm": 1.4521, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2288, "sent_len_1": 66.4929, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4137, "stdk": 0.0478, "stdq": 0.0468, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0159, "doc_norm": 1.451, "encoder_q-embeddings": 1014.915, "encoder_q-layer.0": 786.6957, "encoder_q-layer.1": 807.0406, "encoder_q-layer.10": 86.0301, "encoder_q-layer.11": 182.9432, "encoder_q-layer.2": 917.3131, "encoder_q-layer.3": 811.0764, "encoder_q-layer.4": 641.6378, "encoder_q-layer.5": 592.2874, "encoder_q-layer.6": 429.9086, "encoder_q-layer.7": 263.5305, "encoder_q-layer.8": 136.4083, "encoder_q-layer.9": 100.214, "epoch": 0.34, "inbatch_neg_score": 0.3562, "inbatch_pos_score": 1.0469, "learning_rate": 3.6222222222222225e-05, "loss": 3.0159, "norm_diff": 0.0619, "norm_loss": 0.0, "num_token_doc": 66.8733, "num_token_overlap": 17.996, "num_token_query": 52.1377, "num_token_union": 73.4415, "num_word_context": 202.4363, "num_word_doc": 49.9188, "num_word_query": 39.74, "postclip_grad_norm": 1.0, "preclip_grad_norm": 953.2806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.356, "query_norm": 1.513, "queue_k_norm": 1.451, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1377, "sent_len_1": 66.8733, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.8762, "stdk": 0.0477, "stdq": 0.047, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0019, "doc_norm": 1.4508, "encoder_q-embeddings": 288.9843, "encoder_q-layer.0": 225.6183, "encoder_q-layer.1": 226.2928, "encoder_q-layer.10": 83.3163, "encoder_q-layer.11": 191.5792, "encoder_q-layer.2": 243.6763, "encoder_q-layer.3": 229.4632, "encoder_q-layer.4": 206.1331, "encoder_q-layer.5": 180.2128, "encoder_q-layer.6": 170.903, "encoder_q-layer.7": 152.3646, "encoder_q-layer.8": 153.9679, "encoder_q-layer.9": 107.2472, "epoch": 0.34, "inbatch_neg_score": 0.3613, "inbatch_pos_score": 1.041, "learning_rate": 3.6166666666666674e-05, "loss": 3.0019, "norm_diff": 0.0649, "norm_loss": 0.0, "num_token_doc": 66.9585, "num_token_overlap": 18.0413, "num_token_query": 52.2767, "num_token_union": 73.5654, "num_word_context": 202.369, "num_word_doc": 49.9201, "num_word_query": 39.8341, "postclip_grad_norm": 1.0, "preclip_grad_norm": 303.917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3584, "query_norm": 1.5157, "queue_k_norm": 1.4525, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2767, "sent_len_1": 66.9585, "sent_len_max_0": 128.0, "sent_len_max_1": 192.3088, "stdk": 0.0476, "stdq": 0.0469, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 2.9677, "doc_norm": 1.4477, "encoder_q-embeddings": 303.1339, "encoder_q-layer.0": 292.1107, "encoder_q-layer.1": 413.0881, "encoder_q-layer.10": 86.2999, "encoder_q-layer.11": 196.4655, "encoder_q-layer.2": 474.9152, "encoder_q-layer.3": 468.4622, "encoder_q-layer.4": 475.362, "encoder_q-layer.5": 342.7952, "encoder_q-layer.6": 418.9391, "encoder_q-layer.7": 394.9862, "encoder_q-layer.8": 447.7651, "encoder_q-layer.9": 246.744, "epoch": 0.34, "inbatch_neg_score": 0.3658, "inbatch_pos_score": 1.041, "learning_rate": 3.611111111111111e-05, "loss": 2.9677, "norm_diff": 0.0731, "norm_loss": 0.0, "num_token_doc": 66.8482, "num_token_overlap": 18.0114, "num_token_query": 52.2287, "num_token_union": 73.5203, "num_word_context": 202.3761, "num_word_doc": 49.8976, "num_word_query": 39.8421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 560.8007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.5208, "queue_k_norm": 1.4529, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2287, "sent_len_1": 66.8482, "sent_len_max_0": 127.99, "sent_len_max_1": 189.7612, "stdk": 0.0476, "stdq": 0.0467, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.008, "doc_norm": 1.46, "encoder_q-embeddings": 99.8801, "encoder_q-layer.0": 68.4045, "encoder_q-layer.1": 74.7348, "encoder_q-layer.10": 82.8204, "encoder_q-layer.11": 178.1534, "encoder_q-layer.2": 82.8165, "encoder_q-layer.3": 80.1184, "encoder_q-layer.4": 78.1948, "encoder_q-layer.5": 76.1555, "encoder_q-layer.6": 97.3649, "encoder_q-layer.7": 122.034, "encoder_q-layer.8": 163.5335, "encoder_q-layer.9": 130.7695, "epoch": 0.34, "inbatch_neg_score": 0.3639, "inbatch_pos_score": 1.042, "learning_rate": 3.605555555555556e-05, "loss": 3.008, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.508, "num_token_overlap": 17.9308, "num_token_query": 52.0544, "num_token_union": 73.1949, "num_word_context": 201.9912, "num_word_doc": 49.582, "num_word_query": 39.641, "postclip_grad_norm": 1.0, "preclip_grad_norm": 164.3544, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3638, "query_norm": 1.4973, "queue_k_norm": 1.4526, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0544, "sent_len_1": 66.508, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.4238, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.9841, "doc_norm": 1.4609, "encoder_q-embeddings": 352.0466, "encoder_q-layer.0": 282.6097, "encoder_q-layer.1": 346.0053, "encoder_q-layer.10": 117.3024, "encoder_q-layer.11": 270.1695, "encoder_q-layer.2": 200.3416, "encoder_q-layer.3": 142.6246, "encoder_q-layer.4": 129.9775, "encoder_q-layer.5": 98.8136, "encoder_q-layer.6": 111.758, "encoder_q-layer.7": 110.0035, "encoder_q-layer.8": 115.6383, "encoder_q-layer.9": 97.9013, "epoch": 0.34, "inbatch_neg_score": 0.3684, "inbatch_pos_score": 1.0439, "learning_rate": 3.6e-05, "loss": 2.9841, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.8603, "num_token_overlap": 17.9935, "num_token_query": 52.0736, "num_token_union": 73.4546, "num_word_context": 202.2939, "num_word_doc": 49.8949, "num_word_query": 39.6909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 330.1068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3672, "query_norm": 1.4825, "queue_k_norm": 1.4536, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0736, "sent_len_1": 66.8603, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7337, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0239, "doc_norm": 1.4573, "encoder_q-embeddings": 388.8243, "encoder_q-layer.0": 268.834, "encoder_q-layer.1": 303.8142, "encoder_q-layer.10": 84.9111, "encoder_q-layer.11": 189.4672, "encoder_q-layer.2": 358.9023, "encoder_q-layer.3": 425.5203, "encoder_q-layer.4": 435.9047, "encoder_q-layer.5": 362.0709, "encoder_q-layer.6": 415.2477, "encoder_q-layer.7": 422.0471, "encoder_q-layer.8": 507.7985, "encoder_q-layer.9": 228.9446, "epoch": 0.34, "inbatch_neg_score": 0.3726, "inbatch_pos_score": 1.0557, "learning_rate": 3.594444444444445e-05, "loss": 3.0239, "norm_diff": 0.0599, "norm_loss": 0.0, "num_token_doc": 66.6559, "num_token_overlap": 17.9288, "num_token_query": 52.1928, "num_token_union": 73.4785, "num_word_context": 202.2788, "num_word_doc": 49.7305, "num_word_query": 39.7791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 532.3024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3716, "query_norm": 1.5172, "queue_k_norm": 1.4535, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1928, "sent_len_1": 66.6559, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3438, "stdk": 0.0479, "stdq": 0.0464, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0026, "doc_norm": 1.46, "encoder_q-embeddings": 365.9289, "encoder_q-layer.0": 255.1303, "encoder_q-layer.1": 271.5747, "encoder_q-layer.10": 78.7165, "encoder_q-layer.11": 188.5486, "encoder_q-layer.2": 298.3051, "encoder_q-layer.3": 265.4794, "encoder_q-layer.4": 246.718, "encoder_q-layer.5": 216.5692, "encoder_q-layer.6": 231.3687, "encoder_q-layer.7": 233.0567, "encoder_q-layer.8": 252.9177, "encoder_q-layer.9": 149.2801, "epoch": 0.35, "inbatch_neg_score": 0.3803, "inbatch_pos_score": 1.085, "learning_rate": 3.5888888888888886e-05, "loss": 3.0026, "norm_diff": 0.0697, "norm_loss": 0.0, "num_token_doc": 66.5367, "num_token_overlap": 18.0281, "num_token_query": 52.3111, "num_token_union": 73.407, "num_word_context": 202.3193, "num_word_doc": 49.6625, "num_word_query": 39.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 381.3257, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3809, "query_norm": 1.5297, "queue_k_norm": 1.4532, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3111, "sent_len_1": 66.5367, "sent_len_max_0": 127.995, "sent_len_max_1": 189.0062, "stdk": 0.048, "stdq": 0.0467, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9999, "doc_norm": 1.4583, "encoder_q-embeddings": 249.602, "encoder_q-layer.0": 177.6158, "encoder_q-layer.1": 205.6607, "encoder_q-layer.10": 96.6225, "encoder_q-layer.11": 194.4602, "encoder_q-layer.2": 245.7454, "encoder_q-layer.3": 240.9021, "encoder_q-layer.4": 222.9657, "encoder_q-layer.5": 241.2283, "encoder_q-layer.6": 242.107, "encoder_q-layer.7": 188.9899, "encoder_q-layer.8": 202.1371, "encoder_q-layer.9": 126.1398, "epoch": 0.35, "inbatch_neg_score": 0.4003, "inbatch_pos_score": 1.0908, "learning_rate": 3.5833333333333335e-05, "loss": 2.9999, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.7325, "num_token_overlap": 17.9925, "num_token_query": 52.2121, "num_token_union": 73.4227, "num_word_context": 202.093, "num_word_doc": 49.7533, "num_word_query": 39.801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 312.5496, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3997, "query_norm": 1.5296, "queue_k_norm": 1.4562, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2121, "sent_len_1": 66.7325, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1738, "stdk": 0.0479, "stdq": 0.0464, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.9955, "doc_norm": 1.4581, "encoder_q-embeddings": 408.3426, "encoder_q-layer.0": 300.1718, "encoder_q-layer.1": 342.6395, "encoder_q-layer.10": 46.3562, "encoder_q-layer.11": 95.5755, "encoder_q-layer.2": 358.0555, "encoder_q-layer.3": 288.6353, "encoder_q-layer.4": 279.8439, "encoder_q-layer.5": 222.7262, "encoder_q-layer.6": 265.6559, "encoder_q-layer.7": 222.7842, "encoder_q-layer.8": 172.6083, "encoder_q-layer.9": 73.0419, "epoch": 0.35, "inbatch_neg_score": 0.4047, "inbatch_pos_score": 1.0791, "learning_rate": 3.577777777777778e-05, "loss": 2.9955, "norm_diff": 0.075, "norm_loss": 0.0, "num_token_doc": 66.7439, "num_token_overlap": 17.9948, "num_token_query": 52.248, "num_token_union": 73.4735, "num_word_context": 202.202, "num_word_doc": 49.8157, "num_word_query": 39.8431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 410.6532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4033, "query_norm": 1.5332, "queue_k_norm": 1.4587, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.248, "sent_len_1": 66.7439, "sent_len_max_0": 127.99, "sent_len_max_1": 187.3338, "stdk": 0.0478, "stdq": 0.0461, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.9895, "doc_norm": 1.4645, "encoder_q-embeddings": 1034.9124, "encoder_q-layer.0": 668.6311, "encoder_q-layer.1": 624.0494, "encoder_q-layer.10": 39.6987, "encoder_q-layer.11": 89.1718, "encoder_q-layer.2": 532.0615, "encoder_q-layer.3": 356.2411, "encoder_q-layer.4": 233.1629, "encoder_q-layer.5": 155.2009, "encoder_q-layer.6": 125.9615, "encoder_q-layer.7": 92.9773, "encoder_q-layer.8": 67.7192, "encoder_q-layer.9": 43.7554, "epoch": 0.35, "inbatch_neg_score": 0.4029, "inbatch_pos_score": 1.084, "learning_rate": 3.5722222222222226e-05, "loss": 2.9895, "norm_diff": 0.0559, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 18.0112, "num_token_query": 52.2201, "num_token_union": 73.4182, "num_word_context": 202.4558, "num_word_doc": 49.8142, "num_word_query": 39.8405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 727.1947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4006, "query_norm": 1.5204, "queue_k_norm": 1.4609, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2201, "sent_len_1": 66.7575, "sent_len_max_0": 128.0, "sent_len_max_1": 187.3512, "stdk": 0.048, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.9894, "doc_norm": 1.463, "encoder_q-embeddings": 246.9617, "encoder_q-layer.0": 169.9152, "encoder_q-layer.1": 209.3329, "encoder_q-layer.10": 49.5711, "encoder_q-layer.11": 101.258, "encoder_q-layer.2": 217.088, "encoder_q-layer.3": 192.12, "encoder_q-layer.4": 200.2996, "encoder_q-layer.5": 191.5907, "encoder_q-layer.6": 185.5774, "encoder_q-layer.7": 156.5994, "encoder_q-layer.8": 135.5393, "encoder_q-layer.9": 84.5091, "epoch": 0.35, "inbatch_neg_score": 0.4034, "inbatch_pos_score": 1.0918, "learning_rate": 3.566666666666667e-05, "loss": 2.9894, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 66.8407, "num_token_overlap": 17.9978, "num_token_query": 52.1453, "num_token_union": 73.509, "num_word_context": 202.288, "num_word_doc": 49.9177, "num_word_query": 39.777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 263.838, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4016, "query_norm": 1.5401, "queue_k_norm": 1.4617, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1453, "sent_len_1": 66.8407, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.4925, "stdk": 0.0479, "stdq": 0.0466, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9927, "doc_norm": 1.4612, "encoder_q-embeddings": 79.4725, "encoder_q-layer.0": 55.6466, "encoder_q-layer.1": 61.0653, "encoder_q-layer.10": 45.768, "encoder_q-layer.11": 96.7471, "encoder_q-layer.2": 67.5077, "encoder_q-layer.3": 71.8279, "encoder_q-layer.4": 67.298, "encoder_q-layer.5": 56.9765, "encoder_q-layer.6": 60.7777, "encoder_q-layer.7": 64.6157, "encoder_q-layer.8": 64.3796, "encoder_q-layer.9": 50.5333, "epoch": 0.35, "inbatch_neg_score": 0.3932, "inbatch_pos_score": 1.0977, "learning_rate": 3.561111111111111e-05, "loss": 2.9927, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.7918, "num_token_overlap": 17.9897, "num_token_query": 52.2784, "num_token_union": 73.4768, "num_word_context": 202.4654, "num_word_doc": 49.8025, "num_word_query": 39.8696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 98.4397, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.5473, "queue_k_norm": 1.4656, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2784, "sent_len_1": 66.7918, "sent_len_max_0": 128.0, "sent_len_max_1": 191.64, "stdk": 0.0478, "stdq": 0.0474, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.997, "doc_norm": 1.4685, "encoder_q-embeddings": 329.4618, "encoder_q-layer.0": 259.0403, "encoder_q-layer.1": 300.2476, "encoder_q-layer.10": 41.4517, "encoder_q-layer.11": 95.9786, "encoder_q-layer.2": 308.3109, "encoder_q-layer.3": 221.4097, "encoder_q-layer.4": 178.6171, "encoder_q-layer.5": 155.1762, "encoder_q-layer.6": 136.4662, "encoder_q-layer.7": 152.518, "encoder_q-layer.8": 172.8185, "encoder_q-layer.9": 99.933, "epoch": 0.35, "inbatch_neg_score": 0.3964, "inbatch_pos_score": 1.0684, "learning_rate": 3.555555555555556e-05, "loss": 2.997, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.7106, "num_token_overlap": 17.9803, "num_token_query": 52.0756, "num_token_union": 73.3493, "num_word_context": 202.3206, "num_word_doc": 49.7949, "num_word_query": 39.7115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 332.1775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3945, "query_norm": 1.4732, "queue_k_norm": 1.4677, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0756, "sent_len_1": 66.7106, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4187, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.977, "doc_norm": 1.4679, "encoder_q-embeddings": 320.1683, "encoder_q-layer.0": 245.2667, "encoder_q-layer.1": 292.6979, "encoder_q-layer.10": 41.9641, "encoder_q-layer.11": 90.5692, "encoder_q-layer.2": 316.3846, "encoder_q-layer.3": 248.4366, "encoder_q-layer.4": 156.7937, "encoder_q-layer.5": 140.8371, "encoder_q-layer.6": 120.5421, "encoder_q-layer.7": 105.3558, "encoder_q-layer.8": 89.0169, "encoder_q-layer.9": 56.1361, "epoch": 0.35, "inbatch_neg_score": 0.3973, "inbatch_pos_score": 1.1094, "learning_rate": 3.55e-05, "loss": 2.977, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.8528, "num_token_overlap": 18.0238, "num_token_query": 52.3443, "num_token_union": 73.6006, "num_word_context": 202.7842, "num_word_doc": 49.9534, "num_word_query": 39.9139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 318.6061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3975, "query_norm": 1.5143, "queue_k_norm": 1.4673, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3443, "sent_len_1": 66.8528, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.4837, "stdk": 0.0481, "stdq": 0.0467, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.001, "doc_norm": 1.4741, "encoder_q-embeddings": 129.3871, "encoder_q-layer.0": 105.1682, "encoder_q-layer.1": 109.6574, "encoder_q-layer.10": 50.9585, "encoder_q-layer.11": 102.2859, "encoder_q-layer.2": 112.5527, "encoder_q-layer.3": 105.1914, "encoder_q-layer.4": 87.2914, "encoder_q-layer.5": 57.6184, "encoder_q-layer.6": 54.1006, "encoder_q-layer.7": 47.2121, "encoder_q-layer.8": 58.234, "encoder_q-layer.9": 49.4968, "epoch": 0.35, "inbatch_neg_score": 0.387, "inbatch_pos_score": 1.0479, "learning_rate": 3.5444444444444445e-05, "loss": 3.001, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.8803, "num_token_overlap": 18.0496, "num_token_query": 52.192, "num_token_union": 73.4775, "num_word_context": 202.0959, "num_word_doc": 49.9241, "num_word_query": 39.78, "postclip_grad_norm": 1.0, "preclip_grad_norm": 132.4821, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3862, "query_norm": 1.4625, "queue_k_norm": 1.4684, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.192, "sent_len_1": 66.8803, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.6612, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.9972, "doc_norm": 1.4602, "encoder_q-embeddings": 157.7244, "encoder_q-layer.0": 101.7909, "encoder_q-layer.1": 122.4459, "encoder_q-layer.10": 41.7853, "encoder_q-layer.11": 94.4113, "encoder_q-layer.2": 122.9032, "encoder_q-layer.3": 114.0033, "encoder_q-layer.4": 103.1969, "encoder_q-layer.5": 80.7838, "encoder_q-layer.6": 74.9169, "encoder_q-layer.7": 68.2617, "encoder_q-layer.8": 71.2561, "encoder_q-layer.9": 50.4191, "epoch": 0.35, "inbatch_neg_score": 0.3738, "inbatch_pos_score": 1.0459, "learning_rate": 3.538888888888889e-05, "loss": 2.9972, "norm_diff": 0.0084, "norm_loss": 0.0, "num_token_doc": 66.8724, "num_token_overlap": 18.0141, "num_token_query": 52.2087, "num_token_union": 73.5278, "num_word_context": 202.2607, "num_word_doc": 49.8976, "num_word_query": 39.7867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 151.9773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3745, "query_norm": 1.4644, "queue_k_norm": 1.4661, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2087, "sent_len_1": 66.8724, "sent_len_max_0": 128.0, "sent_len_max_1": 189.435, "stdk": 0.0477, "stdq": 0.0454, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.9837, "doc_norm": 1.471, "encoder_q-embeddings": 123.7081, "encoder_q-layer.0": 91.1264, "encoder_q-layer.1": 102.8188, "encoder_q-layer.10": 42.0587, "encoder_q-layer.11": 89.2144, "encoder_q-layer.2": 117.1238, "encoder_q-layer.3": 118.2737, "encoder_q-layer.4": 115.4456, "encoder_q-layer.5": 98.4799, "encoder_q-layer.6": 95.5131, "encoder_q-layer.7": 90.5654, "encoder_q-layer.8": 72.7235, "encoder_q-layer.9": 49.5893, "epoch": 0.36, "inbatch_neg_score": 0.3737, "inbatch_pos_score": 1.0664, "learning_rate": 3.5333333333333336e-05, "loss": 2.9837, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.9517, "num_token_overlap": 18.0819, "num_token_query": 52.3725, "num_token_union": 73.6249, "num_word_context": 202.4315, "num_word_doc": 49.9361, "num_word_query": 39.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 143.0932, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.373, "query_norm": 1.4958, "queue_k_norm": 1.4677, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3725, "sent_len_1": 66.9517, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.6612, "stdk": 0.0481, "stdq": 0.0468, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0075, "doc_norm": 1.4707, "encoder_q-embeddings": 225.4747, "encoder_q-layer.0": 156.962, "encoder_q-layer.1": 180.805, "encoder_q-layer.10": 40.5557, "encoder_q-layer.11": 92.9108, "encoder_q-layer.2": 211.4885, "encoder_q-layer.3": 229.6429, "encoder_q-layer.4": 240.4428, "encoder_q-layer.5": 221.9115, "encoder_q-layer.6": 198.8449, "encoder_q-layer.7": 149.1127, "encoder_q-layer.8": 104.0816, "encoder_q-layer.9": 57.2896, "epoch": 0.36, "inbatch_neg_score": 0.3697, "inbatch_pos_score": 1.041, "learning_rate": 3.527777777777778e-05, "loss": 3.0075, "norm_diff": 0.0155, "norm_loss": 0.0, "num_token_doc": 66.7155, "num_token_overlap": 17.9467, "num_token_query": 52.1103, "num_token_union": 73.4064, "num_word_context": 202.4313, "num_word_doc": 49.771, "num_word_query": 39.7408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 258.4194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3701, "query_norm": 1.4682, "queue_k_norm": 1.467, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1103, "sent_len_1": 66.7155, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5525, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.9943, "doc_norm": 1.4721, "encoder_q-embeddings": 387.0024, "encoder_q-layer.0": 247.1092, "encoder_q-layer.1": 250.176, "encoder_q-layer.10": 40.8458, "encoder_q-layer.11": 83.9252, "encoder_q-layer.2": 273.7091, "encoder_q-layer.3": 272.417, "encoder_q-layer.4": 258.227, "encoder_q-layer.5": 260.9148, "encoder_q-layer.6": 285.3115, "encoder_q-layer.7": 282.3565, "encoder_q-layer.8": 158.532, "encoder_q-layer.9": 50.2417, "epoch": 0.36, "inbatch_neg_score": 0.3731, "inbatch_pos_score": 1.0732, "learning_rate": 3.522222222222222e-05, "loss": 2.9943, "norm_diff": 0.0092, "norm_loss": 0.0, "num_token_doc": 66.7361, "num_token_overlap": 18.0683, "num_token_query": 52.2336, "num_token_union": 73.4313, "num_word_context": 202.2791, "num_word_doc": 49.8516, "num_word_query": 39.8049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 373.1774, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3743, "query_norm": 1.4795, "queue_k_norm": 1.4671, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2336, "sent_len_1": 66.7361, "sent_len_max_0": 127.9975, "sent_len_max_1": 185.715, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 2.9919, "doc_norm": 1.4688, "encoder_q-embeddings": 113.1771, "encoder_q-layer.0": 85.0631, "encoder_q-layer.1": 101.8358, "encoder_q-layer.10": 41.8129, "encoder_q-layer.11": 89.1981, "encoder_q-layer.2": 120.7237, "encoder_q-layer.3": 119.8101, "encoder_q-layer.4": 131.7212, "encoder_q-layer.5": 108.9467, "encoder_q-layer.6": 82.7253, "encoder_q-layer.7": 80.4088, "encoder_q-layer.8": 82.8837, "encoder_q-layer.9": 58.9447, "epoch": 0.36, "inbatch_neg_score": 0.3689, "inbatch_pos_score": 1.0547, "learning_rate": 3.516666666666667e-05, "loss": 2.9919, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.8037, "num_token_overlap": 17.9473, "num_token_query": 52.1122, "num_token_union": 73.5067, "num_word_context": 202.5844, "num_word_doc": 49.8723, "num_word_query": 39.7412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 144.5005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3674, "query_norm": 1.4876, "queue_k_norm": 1.4656, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1122, "sent_len_1": 66.8037, "sent_len_max_0": 128.0, "sent_len_max_1": 188.215, "stdk": 0.048, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 2.9915, "doc_norm": 1.4594, "encoder_q-embeddings": 767.657, "encoder_q-layer.0": 611.4558, "encoder_q-layer.1": 692.9245, "encoder_q-layer.10": 42.9879, "encoder_q-layer.11": 86.203, "encoder_q-layer.2": 732.419, "encoder_q-layer.3": 434.0968, "encoder_q-layer.4": 103.5509, "encoder_q-layer.5": 99.3225, "encoder_q-layer.6": 95.551, "encoder_q-layer.7": 79.6089, "encoder_q-layer.8": 78.9279, "encoder_q-layer.9": 55.0781, "epoch": 0.36, "inbatch_neg_score": 0.3546, "inbatch_pos_score": 1.0312, "learning_rate": 3.511111111111111e-05, "loss": 2.9915, "norm_diff": 0.009, "norm_loss": 0.0, "num_token_doc": 66.8361, "num_token_overlap": 17.9697, "num_token_query": 52.1818, "num_token_union": 73.5242, "num_word_context": 202.3821, "num_word_doc": 49.9362, "num_word_query": 39.8016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 719.3012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3528, "query_norm": 1.4636, "queue_k_norm": 1.4663, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1818, "sent_len_1": 66.8361, "sent_len_max_0": 128.0, "sent_len_max_1": 187.765, "stdk": 0.0476, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.9813, "doc_norm": 1.4618, "encoder_q-embeddings": 93.8806, "encoder_q-layer.0": 67.0777, "encoder_q-layer.1": 73.9944, "encoder_q-layer.10": 42.3565, "encoder_q-layer.11": 90.4723, "encoder_q-layer.2": 81.9719, "encoder_q-layer.3": 51.6584, "encoder_q-layer.4": 43.8116, "encoder_q-layer.5": 35.6256, "encoder_q-layer.6": 36.8626, "encoder_q-layer.7": 37.0628, "encoder_q-layer.8": 43.2462, "encoder_q-layer.9": 39.693, "epoch": 0.36, "inbatch_neg_score": 0.3604, "inbatch_pos_score": 1.0498, "learning_rate": 3.505555555555556e-05, "loss": 2.9813, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 67.0981, "num_token_overlap": 17.9708, "num_token_query": 52.2382, "num_token_union": 73.6875, "num_word_context": 202.5711, "num_word_doc": 50.0067, "num_word_query": 39.8223, "postclip_grad_norm": 1.0, "preclip_grad_norm": 92.0996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3589, "query_norm": 1.4923, "queue_k_norm": 1.4661, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2382, "sent_len_1": 67.0981, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.5788, "stdk": 0.0477, "stdq": 0.0464, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.9836, "doc_norm": 1.4696, "encoder_q-embeddings": 81.5039, "encoder_q-layer.0": 57.6623, "encoder_q-layer.1": 69.2632, "encoder_q-layer.10": 42.2815, "encoder_q-layer.11": 96.8809, "encoder_q-layer.2": 76.9031, "encoder_q-layer.3": 74.7724, "encoder_q-layer.4": 70.6871, "encoder_q-layer.5": 58.022, "encoder_q-layer.6": 49.2406, "encoder_q-layer.7": 51.0495, "encoder_q-layer.8": 51.1189, "encoder_q-layer.9": 45.6353, "epoch": 0.36, "inbatch_neg_score": 0.3542, "inbatch_pos_score": 1.0527, "learning_rate": 3.5e-05, "loss": 2.9836, "norm_diff": 0.0252, "norm_loss": 0.0, "num_token_doc": 66.7913, "num_token_overlap": 18.0241, "num_token_query": 52.196, "num_token_union": 73.4266, "num_word_context": 201.8489, "num_word_doc": 49.8194, "num_word_query": 39.749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 96.9121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3535, "query_norm": 1.4941, "queue_k_norm": 1.4644, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.196, "sent_len_1": 66.7913, "sent_len_max_0": 128.0, "sent_len_max_1": 189.02, "stdk": 0.0481, "stdq": 0.0464, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0224, "doc_norm": 1.4587, "encoder_q-embeddings": 121.9765, "encoder_q-layer.0": 82.1618, "encoder_q-layer.1": 93.222, "encoder_q-layer.10": 44.5578, "encoder_q-layer.11": 93.919, "encoder_q-layer.2": 97.4366, "encoder_q-layer.3": 98.6838, "encoder_q-layer.4": 99.7934, "encoder_q-layer.5": 91.8682, "encoder_q-layer.6": 99.2509, "encoder_q-layer.7": 111.1285, "encoder_q-layer.8": 129.3305, "encoder_q-layer.9": 86.6929, "epoch": 0.36, "inbatch_neg_score": 0.3532, "inbatch_pos_score": 1.0039, "learning_rate": 3.4944444444444446e-05, "loss": 3.0224, "norm_diff": 0.009, "norm_loss": 0.0, "num_token_doc": 66.7205, "num_token_overlap": 17.9658, "num_token_query": 52.2288, "num_token_union": 73.4401, "num_word_context": 202.2895, "num_word_doc": 49.7928, "num_word_query": 39.8066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 148.1957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3511, "query_norm": 1.4514, "queue_k_norm": 1.4618, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2288, "sent_len_1": 66.7205, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5987, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.9971, "doc_norm": 1.4663, "encoder_q-embeddings": 1950.9508, "encoder_q-layer.0": 1439.4138, "encoder_q-layer.1": 1828.9354, "encoder_q-layer.10": 45.8186, "encoder_q-layer.11": 87.2803, "encoder_q-layer.2": 536.1823, "encoder_q-layer.3": 233.441, "encoder_q-layer.4": 162.1238, "encoder_q-layer.5": 131.4902, "encoder_q-layer.6": 109.25, "encoder_q-layer.7": 84.3728, "encoder_q-layer.8": 66.6024, "encoder_q-layer.9": 45.7127, "epoch": 0.36, "inbatch_neg_score": 0.3387, "inbatch_pos_score": 1.0342, "learning_rate": 3.4888888888888895e-05, "loss": 2.9971, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.8778, "num_token_overlap": 17.9975, "num_token_query": 52.1231, "num_token_union": 73.4577, "num_word_context": 202.4474, "num_word_doc": 49.9011, "num_word_query": 39.7494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1469.8146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3376, "query_norm": 1.4845, "queue_k_norm": 1.4618, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1231, "sent_len_1": 66.8778, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8363, "stdk": 0.048, "stdq": 0.0464, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.9772, "doc_norm": 1.4556, "encoder_q-embeddings": 111.5016, "encoder_q-layer.0": 82.4952, "encoder_q-layer.1": 86.6261, "encoder_q-layer.10": 38.5317, "encoder_q-layer.11": 83.3327, "encoder_q-layer.2": 94.8547, "encoder_q-layer.3": 98.6118, "encoder_q-layer.4": 95.6025, "encoder_q-layer.5": 99.2911, "encoder_q-layer.6": 92.4518, "encoder_q-layer.7": 71.7185, "encoder_q-layer.8": 82.2642, "encoder_q-layer.9": 58.0638, "epoch": 0.36, "inbatch_neg_score": 0.35, "inbatch_pos_score": 1.041, "learning_rate": 3.483333333333334e-05, "loss": 2.9772, "norm_diff": 0.0354, "norm_loss": 0.0, "num_token_doc": 66.7743, "num_token_overlap": 17.9598, "num_token_query": 52.1136, "num_token_union": 73.4544, "num_word_context": 201.9839, "num_word_doc": 49.8418, "num_word_query": 39.7138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 128.2169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3479, "query_norm": 1.491, "queue_k_norm": 1.4597, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1136, "sent_len_1": 66.7743, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.8363, "stdk": 0.0476, "stdq": 0.0466, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.9823, "doc_norm": 1.4552, "encoder_q-embeddings": 167.569, "encoder_q-layer.0": 118.8593, "encoder_q-layer.1": 138.6316, "encoder_q-layer.10": 45.7844, "encoder_q-layer.11": 91.6051, "encoder_q-layer.2": 132.8063, "encoder_q-layer.3": 120.5502, "encoder_q-layer.4": 116.8205, "encoder_q-layer.5": 113.2591, "encoder_q-layer.6": 122.3838, "encoder_q-layer.7": 112.2443, "encoder_q-layer.8": 95.0392, "encoder_q-layer.9": 67.8847, "epoch": 0.37, "inbatch_neg_score": 0.3341, "inbatch_pos_score": 1.001, "learning_rate": 3.477777777777778e-05, "loss": 2.9823, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.9683, "num_token_overlap": 18.0363, "num_token_query": 52.1568, "num_token_union": 73.4829, "num_word_context": 202.0994, "num_word_doc": 49.9476, "num_word_query": 39.7306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 176.9396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3323, "query_norm": 1.4827, "queue_k_norm": 1.4583, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1568, "sent_len_1": 66.9683, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.65, "stdk": 0.0476, "stdq": 0.0456, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.9825, "doc_norm": 1.4649, "encoder_q-embeddings": 296.2531, "encoder_q-layer.0": 201.8835, "encoder_q-layer.1": 257.5618, "encoder_q-layer.10": 37.5859, "encoder_q-layer.11": 85.4877, "encoder_q-layer.2": 295.5013, "encoder_q-layer.3": 272.2739, "encoder_q-layer.4": 255.3683, "encoder_q-layer.5": 216.91, "encoder_q-layer.6": 215.392, "encoder_q-layer.7": 146.9955, "encoder_q-layer.8": 83.097, "encoder_q-layer.9": 50.4072, "epoch": 0.37, "inbatch_neg_score": 0.3308, "inbatch_pos_score": 1.0371, "learning_rate": 3.472222222222222e-05, "loss": 2.9825, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.7407, "num_token_overlap": 18.0175, "num_token_query": 52.2244, "num_token_union": 73.3801, "num_word_context": 202.469, "num_word_doc": 49.8263, "num_word_query": 39.8331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 312.5902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3303, "query_norm": 1.506, "queue_k_norm": 1.4595, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2244, "sent_len_1": 66.7407, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.6188, "stdk": 0.048, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 2.9934, "doc_norm": 1.4552, "encoder_q-embeddings": 486.5942, "encoder_q-layer.0": 350.8703, "encoder_q-layer.1": 353.9455, "encoder_q-layer.10": 81.8854, "encoder_q-layer.11": 179.4594, "encoder_q-layer.2": 411.1243, "encoder_q-layer.3": 380.8012, "encoder_q-layer.4": 347.3558, "encoder_q-layer.5": 312.4785, "encoder_q-layer.6": 356.9447, "encoder_q-layer.7": 339.5161, "encoder_q-layer.8": 369.814, "encoder_q-layer.9": 215.7307, "epoch": 0.37, "inbatch_neg_score": 0.3245, "inbatch_pos_score": 1.0059, "learning_rate": 3.466666666666667e-05, "loss": 2.9934, "norm_diff": 0.0472, "norm_loss": 0.0, "num_token_doc": 66.5657, "num_token_overlap": 17.9432, "num_token_query": 52.2019, "num_token_union": 73.3941, "num_word_context": 202.2247, "num_word_doc": 49.689, "num_word_query": 39.806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 513.5261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.5024, "queue_k_norm": 1.4572, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2019, "sent_len_1": 66.5657, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9588, "stdk": 0.0477, "stdq": 0.0465, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.972, "doc_norm": 1.455, "encoder_q-embeddings": 673.3268, "encoder_q-layer.0": 425.1052, "encoder_q-layer.1": 548.9591, "encoder_q-layer.10": 94.3627, "encoder_q-layer.11": 188.9005, "encoder_q-layer.2": 675.794, "encoder_q-layer.3": 663.0207, "encoder_q-layer.4": 490.2745, "encoder_q-layer.5": 459.4628, "encoder_q-layer.6": 362.2203, "encoder_q-layer.7": 269.9711, "encoder_q-layer.8": 136.8418, "encoder_q-layer.9": 88.5366, "epoch": 0.37, "inbatch_neg_score": 0.3299, "inbatch_pos_score": 1.0312, "learning_rate": 3.4611111111111114e-05, "loss": 2.972, "norm_diff": 0.0613, "norm_loss": 0.0, "num_token_doc": 67.0266, "num_token_overlap": 18.0546, "num_token_query": 52.1463, "num_token_union": 73.5387, "num_word_context": 202.4657, "num_word_doc": 50.0478, "num_word_query": 39.7483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 692.903, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3293, "query_norm": 1.5163, "queue_k_norm": 1.4563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1463, "sent_len_1": 67.0266, "sent_len_max_0": 127.9963, "sent_len_max_1": 186.97, "stdk": 0.0478, "stdq": 0.0466, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.9908, "doc_norm": 1.4549, "encoder_q-embeddings": 1473.201, "encoder_q-layer.0": 1058.4491, "encoder_q-layer.1": 1054.3014, "encoder_q-layer.10": 80.8867, "encoder_q-layer.11": 180.3783, "encoder_q-layer.2": 1151.8538, "encoder_q-layer.3": 1077.0625, "encoder_q-layer.4": 906.0912, "encoder_q-layer.5": 769.1415, "encoder_q-layer.6": 865.1909, "encoder_q-layer.7": 697.7573, "encoder_q-layer.8": 269.541, "encoder_q-layer.9": 109.7942, "epoch": 0.37, "inbatch_neg_score": 0.3375, "inbatch_pos_score": 1.0342, "learning_rate": 3.4555555555555556e-05, "loss": 2.9908, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.7235, "num_token_overlap": 17.9917, "num_token_query": 52.1509, "num_token_union": 73.413, "num_word_context": 202.2916, "num_word_doc": 49.7703, "num_word_query": 39.7576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1350.4924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3362, "query_norm": 1.5064, "queue_k_norm": 1.4568, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1509, "sent_len_1": 66.7235, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6488, "stdk": 0.0478, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.9858, "doc_norm": 1.454, "encoder_q-embeddings": 123.0774, "encoder_q-layer.0": 79.7322, "encoder_q-layer.1": 92.2292, "encoder_q-layer.10": 85.5918, "encoder_q-layer.11": 189.0974, "encoder_q-layer.2": 101.0336, "encoder_q-layer.3": 122.7321, "encoder_q-layer.4": 105.2293, "encoder_q-layer.5": 99.4998, "encoder_q-layer.6": 114.4723, "encoder_q-layer.7": 111.9206, "encoder_q-layer.8": 96.7324, "encoder_q-layer.9": 84.0017, "epoch": 0.37, "inbatch_neg_score": 0.3377, "inbatch_pos_score": 1.0518, "learning_rate": 3.45e-05, "loss": 2.9858, "norm_diff": 0.0796, "norm_loss": 0.0, "num_token_doc": 66.7354, "num_token_overlap": 18.0023, "num_token_query": 52.2019, "num_token_union": 73.4224, "num_word_context": 202.238, "num_word_doc": 49.8216, "num_word_query": 39.806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 167.8566, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3364, "query_norm": 1.5336, "queue_k_norm": 1.4555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2019, "sent_len_1": 66.7354, "sent_len_max_0": 128.0, "sent_len_max_1": 188.25, "stdk": 0.0478, "stdq": 0.0474, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.9768, "doc_norm": 1.4611, "encoder_q-embeddings": 213.659, "encoder_q-layer.0": 156.141, "encoder_q-layer.1": 169.5673, "encoder_q-layer.10": 76.3886, "encoder_q-layer.11": 180.3048, "encoder_q-layer.2": 179.6424, "encoder_q-layer.3": 177.6015, "encoder_q-layer.4": 180.0224, "encoder_q-layer.5": 158.3433, "encoder_q-layer.6": 137.7077, "encoder_q-layer.7": 119.1479, "encoder_q-layer.8": 106.4018, "encoder_q-layer.9": 78.8996, "epoch": 0.37, "inbatch_neg_score": 0.3409, "inbatch_pos_score": 1.0293, "learning_rate": 3.444444444444445e-05, "loss": 2.9768, "norm_diff": 0.0285, "norm_loss": 0.0, "num_token_doc": 66.9314, "num_token_overlap": 18.0179, "num_token_query": 52.1518, "num_token_union": 73.4935, "num_word_context": 202.3423, "num_word_doc": 49.9156, "num_word_query": 39.7124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 232.8703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3411, "query_norm": 1.4897, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1518, "sent_len_1": 66.9314, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2688, "stdk": 0.0481, "stdq": 0.0457, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.9886, "doc_norm": 1.458, "encoder_q-embeddings": 217.7143, "encoder_q-layer.0": 158.0629, "encoder_q-layer.1": 181.0335, "encoder_q-layer.10": 87.8161, "encoder_q-layer.11": 174.2283, "encoder_q-layer.2": 209.5321, "encoder_q-layer.3": 207.2999, "encoder_q-layer.4": 187.8114, "encoder_q-layer.5": 167.2628, "encoder_q-layer.6": 174.2618, "encoder_q-layer.7": 150.5779, "encoder_q-layer.8": 104.7897, "encoder_q-layer.9": 86.7346, "epoch": 0.37, "inbatch_neg_score": 0.3627, "inbatch_pos_score": 1.041, "learning_rate": 3.438888888888889e-05, "loss": 2.9886, "norm_diff": 0.0429, "norm_loss": 0.0, "num_token_doc": 66.7116, "num_token_overlap": 17.979, "num_token_query": 52.1236, "num_token_union": 73.3069, "num_word_context": 202.2502, "num_word_doc": 49.7487, "num_word_query": 39.715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 253.6523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3604, "query_norm": 1.501, "queue_k_norm": 1.4549, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1236, "sent_len_1": 66.7116, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3975, "stdk": 0.048, "stdq": 0.0462, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.9946, "doc_norm": 1.4502, "encoder_q-embeddings": 213.1028, "encoder_q-layer.0": 147.4312, "encoder_q-layer.1": 182.4108, "encoder_q-layer.10": 78.9756, "encoder_q-layer.11": 174.2919, "encoder_q-layer.2": 250.4506, "encoder_q-layer.3": 217.4672, "encoder_q-layer.4": 212.504, "encoder_q-layer.5": 137.0743, "encoder_q-layer.6": 128.8442, "encoder_q-layer.7": 111.2982, "encoder_q-layer.8": 121.095, "encoder_q-layer.9": 97.8474, "epoch": 0.37, "inbatch_neg_score": 0.355, "inbatch_pos_score": 1.0293, "learning_rate": 3.433333333333333e-05, "loss": 2.9946, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 66.8019, "num_token_overlap": 17.9541, "num_token_query": 52.0146, "num_token_union": 73.3787, "num_word_context": 202.1118, "num_word_doc": 49.839, "num_word_query": 39.657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 251.2162, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3538, "query_norm": 1.4786, "queue_k_norm": 1.4577, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0146, "sent_len_1": 66.8019, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.345, "stdk": 0.0476, "stdq": 0.0457, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9725, "doc_norm": 1.4563, "encoder_q-embeddings": 915.0755, "encoder_q-layer.0": 707.0639, "encoder_q-layer.1": 798.6567, "encoder_q-layer.10": 90.9806, "encoder_q-layer.11": 199.2746, "encoder_q-layer.2": 279.872, "encoder_q-layer.3": 128.8327, "encoder_q-layer.4": 105.6187, "encoder_q-layer.5": 94.1342, "encoder_q-layer.6": 94.0674, "encoder_q-layer.7": 94.6063, "encoder_q-layer.8": 103.0802, "encoder_q-layer.9": 86.158, "epoch": 0.37, "inbatch_neg_score": 0.3656, "inbatch_pos_score": 1.0527, "learning_rate": 3.427777777777778e-05, "loss": 2.9725, "norm_diff": 0.0486, "norm_loss": 0.0, "num_token_doc": 66.7443, "num_token_overlap": 18.0039, "num_token_query": 52.1111, "num_token_union": 73.3518, "num_word_context": 201.8643, "num_word_doc": 49.7974, "num_word_query": 39.7338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 699.5429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3647, "query_norm": 1.5049, "queue_k_norm": 1.4553, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1111, "sent_len_1": 66.7443, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.8325, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.9979, "doc_norm": 1.4552, "encoder_q-embeddings": 154.9331, "encoder_q-layer.0": 109.4738, "encoder_q-layer.1": 125.2325, "encoder_q-layer.10": 84.3401, "encoder_q-layer.11": 199.4397, "encoder_q-layer.2": 139.1354, "encoder_q-layer.3": 137.0289, "encoder_q-layer.4": 121.198, "encoder_q-layer.5": 111.1892, "encoder_q-layer.6": 104.5479, "encoder_q-layer.7": 112.937, "encoder_q-layer.8": 103.1813, "encoder_q-layer.9": 81.3627, "epoch": 0.37, "inbatch_neg_score": 0.363, "inbatch_pos_score": 1.0684, "learning_rate": 3.4222222222222224e-05, "loss": 2.9979, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.8788, "num_token_overlap": 17.9982, "num_token_query": 52.1508, "num_token_union": 73.4906, "num_word_context": 202.2363, "num_word_doc": 49.9036, "num_word_query": 39.7412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 188.2934, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3625, "query_norm": 1.4787, "queue_k_norm": 1.4584, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1508, "sent_len_1": 66.8788, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0312, "stdk": 0.0478, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 2.9739, "doc_norm": 1.4525, "encoder_q-embeddings": 225.4909, "encoder_q-layer.0": 168.1804, "encoder_q-layer.1": 177.6526, "encoder_q-layer.10": 84.1244, "encoder_q-layer.11": 181.5755, "encoder_q-layer.2": 179.8679, "encoder_q-layer.3": 164.9344, "encoder_q-layer.4": 174.364, "encoder_q-layer.5": 148.3918, "encoder_q-layer.6": 140.0171, "encoder_q-layer.7": 136.7477, "encoder_q-layer.8": 153.8264, "encoder_q-layer.9": 110.9115, "epoch": 0.38, "inbatch_neg_score": 0.361, "inbatch_pos_score": 1.042, "learning_rate": 3.4166666666666666e-05, "loss": 2.9739, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 17.9854, "num_token_query": 52.1908, "num_token_union": 73.5052, "num_word_context": 202.5394, "num_word_doc": 49.8492, "num_word_query": 39.7887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 243.6299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3596, "query_norm": 1.4891, "queue_k_norm": 1.4584, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1908, "sent_len_1": 66.8029, "sent_len_max_0": 127.99, "sent_len_max_1": 189.8313, "stdk": 0.0477, "stdq": 0.0465, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.9642, "doc_norm": 1.4604, "encoder_q-embeddings": 172.2502, "encoder_q-layer.0": 121.0394, "encoder_q-layer.1": 132.9653, "encoder_q-layer.10": 81.307, "encoder_q-layer.11": 183.2095, "encoder_q-layer.2": 140.9979, "encoder_q-layer.3": 138.8386, "encoder_q-layer.4": 119.3903, "encoder_q-layer.5": 118.4901, "encoder_q-layer.6": 104.9356, "encoder_q-layer.7": 105.6573, "encoder_q-layer.8": 104.8305, "encoder_q-layer.9": 83.0548, "epoch": 0.38, "inbatch_neg_score": 0.3557, "inbatch_pos_score": 1.0469, "learning_rate": 3.411111111111111e-05, "loss": 2.9642, "norm_diff": 0.0113, "norm_loss": 0.0, "num_token_doc": 66.6962, "num_token_overlap": 18.0226, "num_token_query": 52.287, "num_token_union": 73.4321, "num_word_context": 202.4253, "num_word_doc": 49.8041, "num_word_query": 39.8587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 191.124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3545, "query_norm": 1.4658, "queue_k_norm": 1.4583, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.287, "sent_len_1": 66.6962, "sent_len_max_0": 127.995, "sent_len_max_1": 187.6838, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.9844, "doc_norm": 1.4639, "encoder_q-embeddings": 2370.001, "encoder_q-layer.0": 1631.5662, "encoder_q-layer.1": 1753.8208, "encoder_q-layer.10": 77.7481, "encoder_q-layer.11": 183.6245, "encoder_q-layer.2": 2003.8497, "encoder_q-layer.3": 1720.4458, "encoder_q-layer.4": 1453.6564, "encoder_q-layer.5": 1201.2422, "encoder_q-layer.6": 1090.7051, "encoder_q-layer.7": 1081.7026, "encoder_q-layer.8": 358.6124, "encoder_q-layer.9": 135.2628, "epoch": 0.38, "inbatch_neg_score": 0.3685, "inbatch_pos_score": 1.0723, "learning_rate": 3.405555555555556e-05, "loss": 2.9844, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.9503, "num_token_overlap": 18.0467, "num_token_query": 52.4151, "num_token_union": 73.6448, "num_word_context": 202.6413, "num_word_doc": 49.9515, "num_word_query": 39.9632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2192.5922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3682, "query_norm": 1.4931, "queue_k_norm": 1.457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4151, "sent_len_1": 66.9503, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.585, "stdk": 0.0481, "stdq": 0.0468, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.9494, "doc_norm": 1.4604, "encoder_q-embeddings": 173.0504, "encoder_q-layer.0": 111.6182, "encoder_q-layer.1": 133.2881, "encoder_q-layer.10": 86.0213, "encoder_q-layer.11": 187.9289, "encoder_q-layer.2": 155.6505, "encoder_q-layer.3": 155.0984, "encoder_q-layer.4": 159.9276, "encoder_q-layer.5": 147.0399, "encoder_q-layer.6": 135.7253, "encoder_q-layer.7": 135.4499, "encoder_q-layer.8": 119.0615, "encoder_q-layer.9": 89.5747, "epoch": 0.38, "inbatch_neg_score": 0.3684, "inbatch_pos_score": 1.0635, "learning_rate": 3.4000000000000007e-05, "loss": 2.9494, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.8802, "num_token_overlap": 18.0599, "num_token_query": 52.4083, "num_token_union": 73.6116, "num_word_context": 202.5297, "num_word_doc": 49.9295, "num_word_query": 39.9843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 212.8363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3679, "query_norm": 1.4904, "queue_k_norm": 1.4593, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4083, "sent_len_1": 66.8802, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2188, "stdk": 0.048, "stdq": 0.0468, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9734, "doc_norm": 1.455, "encoder_q-embeddings": 1850.3822, "encoder_q-layer.0": 1343.8186, "encoder_q-layer.1": 1483.2314, "encoder_q-layer.10": 80.247, "encoder_q-layer.11": 195.5902, "encoder_q-layer.2": 1846.5952, "encoder_q-layer.3": 1678.2833, "encoder_q-layer.4": 1449.5643, "encoder_q-layer.5": 1025.2878, "encoder_q-layer.6": 540.9947, "encoder_q-layer.7": 408.1065, "encoder_q-layer.8": 244.6742, "encoder_q-layer.9": 113.4131, "epoch": 0.38, "inbatch_neg_score": 0.3771, "inbatch_pos_score": 1.0586, "learning_rate": 3.394444444444444e-05, "loss": 2.9734, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 18.0428, "num_token_query": 52.252, "num_token_union": 73.4442, "num_word_context": 202.2526, "num_word_doc": 49.8329, "num_word_query": 39.8432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1809.1665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3757, "query_norm": 1.4638, "queue_k_norm": 1.4581, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.252, "sent_len_1": 66.777, "sent_len_max_0": 127.995, "sent_len_max_1": 189.4638, "stdk": 0.0478, "stdq": 0.0458, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.9812, "doc_norm": 1.4636, "encoder_q-embeddings": 531.3442, "encoder_q-layer.0": 362.144, "encoder_q-layer.1": 369.7237, "encoder_q-layer.10": 86.7432, "encoder_q-layer.11": 195.4496, "encoder_q-layer.2": 455.4042, "encoder_q-layer.3": 441.8841, "encoder_q-layer.4": 364.8458, "encoder_q-layer.5": 214.1311, "encoder_q-layer.6": 162.039, "encoder_q-layer.7": 149.7328, "encoder_q-layer.8": 146.4028, "encoder_q-layer.9": 103.6456, "epoch": 0.38, "inbatch_neg_score": 0.3745, "inbatch_pos_score": 1.0742, "learning_rate": 3.388888888888889e-05, "loss": 2.9812, "norm_diff": 0.0139, "norm_loss": 0.0, "num_token_doc": 66.9405, "num_token_overlap": 18.0915, "num_token_query": 52.308, "num_token_union": 73.5174, "num_word_context": 202.4214, "num_word_doc": 49.9448, "num_word_query": 39.8765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 487.9716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3765, "query_norm": 1.4776, "queue_k_norm": 1.4593, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.308, "sent_len_1": 66.9405, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1312, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 2.9952, "doc_norm": 1.4597, "encoder_q-embeddings": 493.2948, "encoder_q-layer.0": 309.7891, "encoder_q-layer.1": 348.6654, "encoder_q-layer.10": 87.9894, "encoder_q-layer.11": 193.5182, "encoder_q-layer.2": 407.64, "encoder_q-layer.3": 402.9446, "encoder_q-layer.4": 313.6033, "encoder_q-layer.5": 162.9421, "encoder_q-layer.6": 113.3392, "encoder_q-layer.7": 104.8972, "encoder_q-layer.8": 110.4702, "encoder_q-layer.9": 88.75, "epoch": 0.38, "inbatch_neg_score": 0.3956, "inbatch_pos_score": 1.0625, "learning_rate": 3.3833333333333334e-05, "loss": 2.9952, "norm_diff": 0.0316, "norm_loss": 0.0, "num_token_doc": 66.8474, "num_token_overlap": 18.0409, "num_token_query": 52.3017, "num_token_union": 73.5181, "num_word_context": 202.6818, "num_word_doc": 49.836, "num_word_query": 39.8606, "postclip_grad_norm": 1.0, "preclip_grad_norm": 446.3834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3943, "query_norm": 1.4913, "queue_k_norm": 1.4586, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3017, "sent_len_1": 66.8474, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.5962, "stdk": 0.0479, "stdq": 0.0465, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.9861, "doc_norm": 1.4603, "encoder_q-embeddings": 449.9981, "encoder_q-layer.0": 319.174, "encoder_q-layer.1": 362.7307, "encoder_q-layer.10": 81.2176, "encoder_q-layer.11": 183.5376, "encoder_q-layer.2": 445.7032, "encoder_q-layer.3": 376.9693, "encoder_q-layer.4": 253.243, "encoder_q-layer.5": 134.904, "encoder_q-layer.6": 112.2808, "encoder_q-layer.7": 99.7455, "encoder_q-layer.8": 95.6539, "encoder_q-layer.9": 75.2236, "epoch": 0.38, "inbatch_neg_score": 0.382, "inbatch_pos_score": 1.0703, "learning_rate": 3.377777777777778e-05, "loss": 2.9861, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.7656, "num_token_overlap": 18.0321, "num_token_query": 52.2191, "num_token_union": 73.4047, "num_word_context": 201.9044, "num_word_doc": 49.7891, "num_word_query": 39.8043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 425.1211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3823, "query_norm": 1.4626, "queue_k_norm": 1.4628, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2191, "sent_len_1": 66.7656, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.5975, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 2.9797, "doc_norm": 1.4584, "encoder_q-embeddings": 244.4962, "encoder_q-layer.0": 170.075, "encoder_q-layer.1": 185.4688, "encoder_q-layer.10": 87.5407, "encoder_q-layer.11": 190.8482, "encoder_q-layer.2": 185.0213, "encoder_q-layer.3": 195.0375, "encoder_q-layer.4": 157.7721, "encoder_q-layer.5": 129.5915, "encoder_q-layer.6": 111.0148, "encoder_q-layer.7": 125.0232, "encoder_q-layer.8": 95.4591, "encoder_q-layer.9": 83.9453, "epoch": 0.38, "inbatch_neg_score": 0.3688, "inbatch_pos_score": 1.0459, "learning_rate": 3.3722222222222225e-05, "loss": 2.9797, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 67.2138, "num_token_overlap": 18.1886, "num_token_query": 52.5238, "num_token_union": 73.7371, "num_word_context": 202.9116, "num_word_doc": 50.1175, "num_word_query": 40.0504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 245.3516, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3674, "query_norm": 1.4545, "queue_k_norm": 1.462, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.5238, "sent_len_1": 67.2138, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8137, "stdk": 0.0479, "stdq": 0.0458, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 2.9678, "doc_norm": 1.4659, "encoder_q-embeddings": 152.695, "encoder_q-layer.0": 113.501, "encoder_q-layer.1": 124.2884, "encoder_q-layer.10": 97.6981, "encoder_q-layer.11": 211.0981, "encoder_q-layer.2": 104.3458, "encoder_q-layer.3": 93.0893, "encoder_q-layer.4": 88.2121, "encoder_q-layer.5": 80.8361, "encoder_q-layer.6": 78.036, "encoder_q-layer.7": 85.8642, "encoder_q-layer.8": 97.0754, "encoder_q-layer.9": 85.3017, "epoch": 0.38, "inbatch_neg_score": 0.3753, "inbatch_pos_score": 1.0176, "learning_rate": 3.366666666666667e-05, "loss": 2.9678, "norm_diff": 0.0536, "norm_loss": 0.0, "num_token_doc": 66.9726, "num_token_overlap": 18.0413, "num_token_query": 52.2099, "num_token_union": 73.5264, "num_word_context": 202.3482, "num_word_doc": 49.9999, "num_word_query": 39.7888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 177.4781, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3748, "query_norm": 1.4122, "queue_k_norm": 1.4602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2099, "sent_len_1": 66.9726, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1325, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.9791, "doc_norm": 1.4666, "encoder_q-embeddings": 102.6266, "encoder_q-layer.0": 66.0119, "encoder_q-layer.1": 71.448, "encoder_q-layer.10": 82.9248, "encoder_q-layer.11": 200.405, "encoder_q-layer.2": 79.6654, "encoder_q-layer.3": 79.594, "encoder_q-layer.4": 81.3355, "encoder_q-layer.5": 82.2828, "encoder_q-layer.6": 90.1162, "encoder_q-layer.7": 93.0388, "encoder_q-layer.8": 97.5547, "encoder_q-layer.9": 85.0395, "epoch": 0.39, "inbatch_neg_score": 0.3648, "inbatch_pos_score": 1.0488, "learning_rate": 3.3611111111111116e-05, "loss": 2.9791, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 66.825, "num_token_overlap": 18.042, "num_token_query": 52.2675, "num_token_union": 73.4702, "num_word_context": 202.4924, "num_word_doc": 49.8263, "num_word_query": 39.8645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 148.1167, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3633, "query_norm": 1.4741, "queue_k_norm": 1.4609, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2675, "sent_len_1": 66.825, "sent_len_max_0": 127.985, "sent_len_max_1": 192.1813, "stdk": 0.0481, "stdq": 0.0466, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 2.9778, "doc_norm": 1.4581, "encoder_q-embeddings": 425.8424, "encoder_q-layer.0": 301.7735, "encoder_q-layer.1": 311.1436, "encoder_q-layer.10": 160.4507, "encoder_q-layer.11": 383.6029, "encoder_q-layer.2": 245.4117, "encoder_q-layer.3": 214.3359, "encoder_q-layer.4": 196.6754, "encoder_q-layer.5": 173.9026, "encoder_q-layer.6": 175.5302, "encoder_q-layer.7": 168.2031, "encoder_q-layer.8": 180.4938, "encoder_q-layer.9": 157.718, "epoch": 0.39, "inbatch_neg_score": 0.3575, "inbatch_pos_score": 1.0225, "learning_rate": 3.355555555555556e-05, "loss": 2.9778, "norm_diff": 0.0063, "norm_loss": 0.0, "num_token_doc": 66.7439, "num_token_overlap": 17.9269, "num_token_query": 52.0435, "num_token_union": 73.3395, "num_word_context": 202.1543, "num_word_doc": 49.7505, "num_word_query": 39.6982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 399.5432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3574, "query_norm": 1.456, "queue_k_norm": 1.4594, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0435, "sent_len_1": 66.7439, "sent_len_max_0": 127.9963, "sent_len_max_1": 193.2713, "stdk": 0.0478, "stdq": 0.046, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.9742, "doc_norm": 1.4602, "encoder_q-embeddings": 6005.2944, "encoder_q-layer.0": 4358.4878, "encoder_q-layer.1": 4724.6094, "encoder_q-layer.10": 169.9156, "encoder_q-layer.11": 361.1049, "encoder_q-layer.2": 4710.5986, "encoder_q-layer.3": 4486.1265, "encoder_q-layer.4": 4304.9624, "encoder_q-layer.5": 2716.1445, "encoder_q-layer.6": 1037.719, "encoder_q-layer.7": 710.9615, "encoder_q-layer.8": 390.3327, "encoder_q-layer.9": 216.6511, "epoch": 0.39, "inbatch_neg_score": 0.3501, "inbatch_pos_score": 1.0381, "learning_rate": 3.35e-05, "loss": 2.9742, "norm_diff": 0.0262, "norm_loss": 0.0, "num_token_doc": 66.7262, "num_token_overlap": 17.9197, "num_token_query": 52.1494, "num_token_union": 73.4433, "num_word_context": 202.055, "num_word_doc": 49.7771, "num_word_query": 39.7367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5304.1584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3496, "query_norm": 1.434, "queue_k_norm": 1.4607, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1494, "sent_len_1": 66.7262, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1188, "stdk": 0.048, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9648, "doc_norm": 1.4605, "encoder_q-embeddings": 815.6692, "encoder_q-layer.0": 652.8548, "encoder_q-layer.1": 745.7742, "encoder_q-layer.10": 177.9491, "encoder_q-layer.11": 387.9748, "encoder_q-layer.2": 895.7206, "encoder_q-layer.3": 625.8253, "encoder_q-layer.4": 402.434, "encoder_q-layer.5": 391.3657, "encoder_q-layer.6": 336.1595, "encoder_q-layer.7": 323.301, "encoder_q-layer.8": 316.3793, "encoder_q-layer.9": 233.3114, "epoch": 0.39, "inbatch_neg_score": 0.3499, "inbatch_pos_score": 1.0566, "learning_rate": 3.3444444444444443e-05, "loss": 2.9648, "norm_diff": 0.0191, "norm_loss": 0.0, "num_token_doc": 66.8776, "num_token_overlap": 18.0028, "num_token_query": 52.2232, "num_token_union": 73.4831, "num_word_context": 202.2461, "num_word_doc": 49.8868, "num_word_query": 39.8027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 861.5133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3491, "query_norm": 1.4796, "queue_k_norm": 1.4596, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2232, "sent_len_1": 66.8776, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5563, "stdk": 0.048, "stdq": 0.0471, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.9737, "doc_norm": 1.4572, "encoder_q-embeddings": 640.8055, "encoder_q-layer.0": 468.822, "encoder_q-layer.1": 545.1307, "encoder_q-layer.10": 174.1645, "encoder_q-layer.11": 371.8625, "encoder_q-layer.2": 675.6299, "encoder_q-layer.3": 579.7874, "encoder_q-layer.4": 503.352, "encoder_q-layer.5": 506.1308, "encoder_q-layer.6": 462.9708, "encoder_q-layer.7": 271.0207, "encoder_q-layer.8": 223.3218, "encoder_q-layer.9": 187.4675, "epoch": 0.39, "inbatch_neg_score": 0.3433, "inbatch_pos_score": 1.0391, "learning_rate": 3.338888888888889e-05, "loss": 2.9737, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.9628, "num_token_overlap": 18.0111, "num_token_query": 52.2343, "num_token_union": 73.5753, "num_word_context": 202.3765, "num_word_doc": 49.9508, "num_word_query": 39.8068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 701.489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3418, "query_norm": 1.4447, "queue_k_norm": 1.4605, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2343, "sent_len_1": 66.9628, "sent_len_max_0": 127.9838, "sent_len_max_1": 190.2713, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.963, "doc_norm": 1.4621, "encoder_q-embeddings": 741.6102, "encoder_q-layer.0": 571.502, "encoder_q-layer.1": 681.5982, "encoder_q-layer.10": 175.6204, "encoder_q-layer.11": 386.7587, "encoder_q-layer.2": 774.4953, "encoder_q-layer.3": 610.209, "encoder_q-layer.4": 522.1873, "encoder_q-layer.5": 368.442, "encoder_q-layer.6": 332.7088, "encoder_q-layer.7": 203.034, "encoder_q-layer.8": 194.5994, "encoder_q-layer.9": 169.0672, "epoch": 0.39, "inbatch_neg_score": 0.3472, "inbatch_pos_score": 1.0371, "learning_rate": 3.3333333333333335e-05, "loss": 2.963, "norm_diff": 0.025, "norm_loss": 0.0, "num_token_doc": 66.5613, "num_token_overlap": 18.042, "num_token_query": 52.3159, "num_token_union": 73.3248, "num_word_context": 202.0361, "num_word_doc": 49.6673, "num_word_query": 39.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 748.5858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3464, "query_norm": 1.4372, "queue_k_norm": 1.4577, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3159, "sent_len_1": 66.5613, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.465, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 28.4863, "dev_samples_per_second": 2.247, "dev_steps_per_second": 0.035, "epoch": 0.39, "step": 40000, "test_accuracy": 92.39501953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.43980616331100464, "test_doc_norm": 1.4081811904907227, "test_inbatch_neg_score": 0.6561570167541504, "test_inbatch_pos_score": 1.5005125999450684, "test_loss": 0.43980616331100464, "test_loss_align": 1.0798273086547852, "test_loss_unif": 3.758479595184326, "test_loss_unif_q@queue": 3.7584798336029053, "test_norm_diff": 0.031445108354091644, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.33899059891700745, "test_query_norm": 1.4396235942840576, "test_queue_k_norm": 1.457575798034668, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04092756658792496, "test_stdq": 0.04074352979660034, "test_stdqueue_k": 0.0480077788233757, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.4863, "dev_samples_per_second": 2.247, "dev_steps_per_second": 0.035, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.32882, "eval_beir-arguana_recall@10": 0.55334, "eval_beir-arguana_recall@100": 0.85917, "eval_beir-arguana_recall@20": 0.68634, "eval_beir-avg_ndcg@10": 0.34290883333333333, "eval_beir-avg_recall@10": 0.40919, "eval_beir-avg_recall@100": 0.5957565, "eval_beir-avg_recall@20": 0.4710770833333333, "eval_beir-cqadupstack_ndcg@10": 0.23067833333333335, "eval_beir-cqadupstack_recall@10": 0.31548000000000004, "eval_beir-cqadupstack_recall@100": 0.541845, "eval_beir-cqadupstack_recall@20": 0.38021083333333333, "eval_beir-fiqa_ndcg@10": 0.22013, "eval_beir-fiqa_recall@10": 0.27763, "eval_beir-fiqa_recall@100": 0.52869, "eval_beir-fiqa_recall@20": 0.34673, "eval_beir-nfcorpus_ndcg@10": 0.26083, "eval_beir-nfcorpus_recall@10": 0.12936, "eval_beir-nfcorpus_recall@100": 0.24594, "eval_beir-nfcorpus_recall@20": 0.15703, "eval_beir-nq_ndcg@10": 0.24548, "eval_beir-nq_recall@10": 0.40911, "eval_beir-nq_recall@100": 0.7499, "eval_beir-nq_recall@20": 0.52668, "eval_beir-quora_ndcg@10": 0.72344, "eval_beir-quora_recall@10": 0.84163, "eval_beir-quora_recall@100": 0.9631, "eval_beir-quora_recall@20": 0.89681, "eval_beir-scidocs_ndcg@10": 0.13933, "eval_beir-scidocs_recall@10": 0.1447, "eval_beir-scidocs_recall@100": 0.3359, "eval_beir-scidocs_recall@20": 0.19478, "eval_beir-scifact_ndcg@10": 0.59754, "eval_beir-scifact_recall@10": 0.73167, "eval_beir-scifact_recall@100": 0.91144, "eval_beir-scifact_recall@20": 0.80733, "eval_beir-trec-covid_ndcg@10": 0.52752, "eval_beir-trec-covid_recall@10": 0.572, "eval_beir-trec-covid_recall@100": 0.402, "eval_beir-trec-covid_recall@20": 0.529, "eval_beir-webis-touche2020_ndcg@10": 0.15532, "eval_beir-webis-touche2020_recall@10": 0.11698, "eval_beir-webis-touche2020_recall@100": 0.41958, "eval_beir-webis-touche2020_recall@20": 0.18586, "eval_senteval-avg_sts": 0.7328126320645281, "eval_senteval-sickr_spearman": 0.7054196262997665, "eval_senteval-stsb_spearman": 0.7602056378292896, "step": 40000, "test_accuracy": 92.39501953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.43980616331100464, "test_doc_norm": 1.4081811904907227, "test_inbatch_neg_score": 0.6561570167541504, "test_inbatch_pos_score": 1.5005125999450684, "test_loss": 0.43980616331100464, "test_loss_align": 1.0798273086547852, "test_loss_unif": 3.758479595184326, "test_loss_unif_q@queue": 3.7584798336029053, "test_norm_diff": 0.031445108354091644, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.33899059891700745, "test_query_norm": 1.4396235942840576, "test_queue_k_norm": 1.457575798034668, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04092756658792496, "test_stdq": 0.04074352979660034, "test_stdqueue_k": 0.0480077788233757, "test_stdqueue_q": 0.0 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.9622, "doc_norm": 1.4583, "encoder_q-embeddings": 894.3094, "encoder_q-layer.0": 611.534, "encoder_q-layer.1": 756.0311, "encoder_q-layer.10": 148.3864, "encoder_q-layer.11": 327.5704, "encoder_q-layer.2": 903.9437, "encoder_q-layer.3": 703.7903, "encoder_q-layer.4": 569.7455, "encoder_q-layer.5": 457.0333, "encoder_q-layer.6": 430.2604, "encoder_q-layer.7": 255.0587, "encoder_q-layer.8": 192.6491, "encoder_q-layer.9": 151.2713, "epoch": 0.39, "inbatch_neg_score": 0.3273, "inbatch_pos_score": 1.0049, "learning_rate": 3.327777777777778e-05, "loss": 2.9622, "norm_diff": 0.029, "norm_loss": 0.0, "num_token_doc": 66.8378, "num_token_overlap": 18.1123, "num_token_query": 52.3392, "num_token_union": 73.4455, "num_word_context": 201.9837, "num_word_doc": 49.8597, "num_word_query": 39.8863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 879.3275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3271, "query_norm": 1.4293, "queue_k_norm": 1.4596, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3392, "sent_len_1": 66.8378, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5525, "stdk": 0.048, "stdq": 0.0457, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.9882, "doc_norm": 1.4548, "encoder_q-embeddings": 3877.5054, "encoder_q-layer.0": 2897.5994, "encoder_q-layer.1": 3628.2847, "encoder_q-layer.10": 183.2575, "encoder_q-layer.11": 365.6117, "encoder_q-layer.2": 3679.8855, "encoder_q-layer.3": 3018.4182, "encoder_q-layer.4": 2506.4517, "encoder_q-layer.5": 2221.729, "encoder_q-layer.6": 1374.6808, "encoder_q-layer.7": 672.9727, "encoder_q-layer.8": 255.3555, "encoder_q-layer.9": 160.093, "epoch": 0.39, "inbatch_neg_score": 0.3297, "inbatch_pos_score": 1.0176, "learning_rate": 3.322222222222222e-05, "loss": 2.9882, "norm_diff": 0.0277, "norm_loss": 0.0, "num_token_doc": 66.5588, "num_token_overlap": 17.9213, "num_token_query": 52.1001, "num_token_union": 73.333, "num_word_context": 202.0514, "num_word_doc": 49.6508, "num_word_query": 39.6982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3702.4001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3301, "query_norm": 1.4271, "queue_k_norm": 1.4588, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1001, "sent_len_1": 66.5588, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.715, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.9756, "doc_norm": 1.4554, "encoder_q-embeddings": 525.3254, "encoder_q-layer.0": 396.0167, "encoder_q-layer.1": 478.5317, "encoder_q-layer.10": 185.2079, "encoder_q-layer.11": 384.5032, "encoder_q-layer.2": 559.7111, "encoder_q-layer.3": 540.461, "encoder_q-layer.4": 621.4214, "encoder_q-layer.5": 642.4384, "encoder_q-layer.6": 460.8509, "encoder_q-layer.7": 236.8724, "encoder_q-layer.8": 214.2893, "encoder_q-layer.9": 185.3872, "epoch": 0.39, "inbatch_neg_score": 0.3404, "inbatch_pos_score": 0.9839, "learning_rate": 3.316666666666667e-05, "loss": 2.9756, "norm_diff": 0.0424, "norm_loss": 0.0, "num_token_doc": 66.7865, "num_token_overlap": 17.9804, "num_token_query": 52.2858, "num_token_union": 73.5309, "num_word_context": 202.3722, "num_word_doc": 49.8294, "num_word_query": 39.8406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 659.7458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3394, "query_norm": 1.413, "queue_k_norm": 1.4565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2858, "sent_len_1": 66.7865, "sent_len_max_0": 127.985, "sent_len_max_1": 189.1175, "stdk": 0.0479, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.9583, "doc_norm": 1.4593, "encoder_q-embeddings": 367.5526, "encoder_q-layer.0": 256.4657, "encoder_q-layer.1": 308.6454, "encoder_q-layer.10": 166.3717, "encoder_q-layer.11": 391.9643, "encoder_q-layer.2": 355.0465, "encoder_q-layer.3": 356.4958, "encoder_q-layer.4": 392.5332, "encoder_q-layer.5": 311.8235, "encoder_q-layer.6": 231.038, "encoder_q-layer.7": 185.0226, "encoder_q-layer.8": 193.685, "encoder_q-layer.9": 167.5228, "epoch": 0.39, "inbatch_neg_score": 0.339, "inbatch_pos_score": 1.043, "learning_rate": 3.311111111111112e-05, "loss": 2.9583, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.7553, "num_token_overlap": 18.0193, "num_token_query": 52.2195, "num_token_union": 73.4772, "num_word_context": 202.526, "num_word_doc": 49.8421, "num_word_query": 39.8435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 442.8703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3381, "query_norm": 1.4546, "queue_k_norm": 1.4572, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2195, "sent_len_1": 66.7553, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2125, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.9706, "doc_norm": 1.4526, "encoder_q-embeddings": 318.0629, "encoder_q-layer.0": 221.8973, "encoder_q-layer.1": 242.6189, "encoder_q-layer.10": 151.0115, "encoder_q-layer.11": 350.826, "encoder_q-layer.2": 249.541, "encoder_q-layer.3": 214.5457, "encoder_q-layer.4": 180.321, "encoder_q-layer.5": 164.234, "encoder_q-layer.6": 155.4423, "encoder_q-layer.7": 157.3004, "encoder_q-layer.8": 170.0409, "encoder_q-layer.9": 149.6916, "epoch": 0.4, "inbatch_neg_score": 0.3385, "inbatch_pos_score": 1.043, "learning_rate": 3.3055555555555553e-05, "loss": 2.9706, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.662, "num_token_overlap": 18.0037, "num_token_query": 52.2416, "num_token_union": 73.4181, "num_word_context": 202.094, "num_word_doc": 49.7273, "num_word_query": 39.8093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 338.6129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3411, "query_norm": 1.4733, "queue_k_norm": 1.4564, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2416, "sent_len_1": 66.662, "sent_len_max_0": 128.0, "sent_len_max_1": 189.32, "stdk": 0.0479, "stdq": 0.0468, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.9493, "doc_norm": 1.4597, "encoder_q-embeddings": 468.1262, "encoder_q-layer.0": 335.7797, "encoder_q-layer.1": 383.6491, "encoder_q-layer.10": 161.0123, "encoder_q-layer.11": 354.0261, "encoder_q-layer.2": 427.2463, "encoder_q-layer.3": 389.6576, "encoder_q-layer.4": 324.3201, "encoder_q-layer.5": 271.4319, "encoder_q-layer.6": 270.7873, "encoder_q-layer.7": 245.6709, "encoder_q-layer.8": 206.4972, "encoder_q-layer.9": 159.3615, "epoch": 0.4, "inbatch_neg_score": 0.3444, "inbatch_pos_score": 1.0391, "learning_rate": 3.3e-05, "loss": 2.9493, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 66.7349, "num_token_overlap": 17.9957, "num_token_query": 52.2399, "num_token_union": 73.3854, "num_word_context": 202.097, "num_word_doc": 49.7454, "num_word_query": 39.8016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 494.6346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3435, "query_norm": 1.4547, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2399, "sent_len_1": 66.7349, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.4638, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9458, "doc_norm": 1.4531, "encoder_q-embeddings": 870.3427, "encoder_q-layer.0": 540.2261, "encoder_q-layer.1": 577.155, "encoder_q-layer.10": 155.1611, "encoder_q-layer.11": 364.6013, "encoder_q-layer.2": 625.6957, "encoder_q-layer.3": 576.6531, "encoder_q-layer.4": 363.789, "encoder_q-layer.5": 206.987, "encoder_q-layer.6": 201.1697, "encoder_q-layer.7": 172.313, "encoder_q-layer.8": 185.8051, "encoder_q-layer.9": 167.1195, "epoch": 0.4, "inbatch_neg_score": 0.3298, "inbatch_pos_score": 1.0264, "learning_rate": 3.2944444444444445e-05, "loss": 2.9458, "norm_diff": 0.0287, "norm_loss": 0.0, "num_token_doc": 66.8364, "num_token_overlap": 18.0417, "num_token_query": 52.2387, "num_token_union": 73.5183, "num_word_context": 202.2003, "num_word_doc": 49.8919, "num_word_query": 39.8483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 715.4642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3289, "query_norm": 1.4747, "queue_k_norm": 1.4569, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2387, "sent_len_1": 66.8364, "sent_len_max_0": 128.0, "sent_len_max_1": 187.335, "stdk": 0.0479, "stdq": 0.0468, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.9564, "doc_norm": 1.4539, "encoder_q-embeddings": 339.5021, "encoder_q-layer.0": 223.8894, "encoder_q-layer.1": 244.0187, "encoder_q-layer.10": 158.0316, "encoder_q-layer.11": 356.7222, "encoder_q-layer.2": 270.7024, "encoder_q-layer.3": 295.9159, "encoder_q-layer.4": 262.718, "encoder_q-layer.5": 248.2932, "encoder_q-layer.6": 236.578, "encoder_q-layer.7": 208.5665, "encoder_q-layer.8": 198.9131, "encoder_q-layer.9": 161.8386, "epoch": 0.4, "inbatch_neg_score": 0.3282, "inbatch_pos_score": 1.0273, "learning_rate": 3.2888888888888894e-05, "loss": 2.9564, "norm_diff": 0.0109, "norm_loss": 0.0, "num_token_doc": 66.858, "num_token_overlap": 18.0703, "num_token_query": 52.4205, "num_token_union": 73.5274, "num_word_context": 202.727, "num_word_doc": 49.8713, "num_word_query": 39.9471, "postclip_grad_norm": 1.0, "preclip_grad_norm": 388.3762, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3276, "query_norm": 1.4629, "queue_k_norm": 1.4543, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4205, "sent_len_1": 66.858, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4762, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.9353, "doc_norm": 1.4503, "encoder_q-embeddings": 963.0166, "encoder_q-layer.0": 678.015, "encoder_q-layer.1": 705.3965, "encoder_q-layer.10": 179.9291, "encoder_q-layer.11": 357.5707, "encoder_q-layer.2": 829.9238, "encoder_q-layer.3": 869.4064, "encoder_q-layer.4": 875.4518, "encoder_q-layer.5": 856.4977, "encoder_q-layer.6": 744.8124, "encoder_q-layer.7": 674.9078, "encoder_q-layer.8": 370.6071, "encoder_q-layer.9": 205.151, "epoch": 0.4, "inbatch_neg_score": 0.32, "inbatch_pos_score": 1.0264, "learning_rate": 3.283333333333333e-05, "loss": 2.9353, "norm_diff": 0.0244, "norm_loss": 0.0, "num_token_doc": 66.7726, "num_token_overlap": 18.0516, "num_token_query": 52.2898, "num_token_union": 73.3907, "num_word_context": 202.2517, "num_word_doc": 49.826, "num_word_query": 39.8719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1049.6671, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3208, "query_norm": 1.4747, "queue_k_norm": 1.456, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2898, "sent_len_1": 66.7726, "sent_len_max_0": 128.0, "sent_len_max_1": 189.52, "stdk": 0.0478, "stdq": 0.0464, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.9497, "doc_norm": 1.4556, "encoder_q-embeddings": 315.0064, "encoder_q-layer.0": 210.6318, "encoder_q-layer.1": 232.1438, "encoder_q-layer.10": 157.9896, "encoder_q-layer.11": 382.8891, "encoder_q-layer.2": 278.6656, "encoder_q-layer.3": 287.8268, "encoder_q-layer.4": 291.0578, "encoder_q-layer.5": 279.956, "encoder_q-layer.6": 271.2787, "encoder_q-layer.7": 257.4677, "encoder_q-layer.8": 286.5865, "encoder_q-layer.9": 205.1072, "epoch": 0.4, "inbatch_neg_score": 0.3301, "inbatch_pos_score": 1.0098, "learning_rate": 3.277777777777778e-05, "loss": 2.9497, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.8148, "num_token_overlap": 18.0004, "num_token_query": 52.2619, "num_token_union": 73.4549, "num_word_context": 202.2051, "num_word_doc": 49.8177, "num_word_query": 39.8494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 406.2138, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3279, "query_norm": 1.4668, "queue_k_norm": 1.4554, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2619, "sent_len_1": 66.8148, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3713, "stdk": 0.048, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.9598, "doc_norm": 1.4523, "encoder_q-embeddings": 612.7255, "encoder_q-layer.0": 429.0487, "encoder_q-layer.1": 471.8383, "encoder_q-layer.10": 153.4729, "encoder_q-layer.11": 357.7635, "encoder_q-layer.2": 495.2534, "encoder_q-layer.3": 496.037, "encoder_q-layer.4": 460.2012, "encoder_q-layer.5": 459.2458, "encoder_q-layer.6": 406.0139, "encoder_q-layer.7": 388.5008, "encoder_q-layer.8": 407.3805, "encoder_q-layer.9": 303.3562, "epoch": 0.4, "inbatch_neg_score": 0.3352, "inbatch_pos_score": 1.0459, "learning_rate": 3.272222222222223e-05, "loss": 2.9598, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.7722, "num_token_overlap": 17.9819, "num_token_query": 52.1477, "num_token_union": 73.4165, "num_word_context": 202.1735, "num_word_doc": 49.7807, "num_word_query": 39.7377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 658.1247, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3347, "query_norm": 1.494, "queue_k_norm": 1.4546, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1477, "sent_len_1": 66.7722, "sent_len_max_0": 128.0, "sent_len_max_1": 190.87, "stdk": 0.0479, "stdq": 0.0469, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.9497, "doc_norm": 1.4571, "encoder_q-embeddings": 703.052, "encoder_q-layer.0": 519.417, "encoder_q-layer.1": 571.0341, "encoder_q-layer.10": 162.0702, "encoder_q-layer.11": 345.4462, "encoder_q-layer.2": 522.2274, "encoder_q-layer.3": 494.5301, "encoder_q-layer.4": 455.7441, "encoder_q-layer.5": 447.0277, "encoder_q-layer.6": 399.5265, "encoder_q-layer.7": 329.6395, "encoder_q-layer.8": 314.0013, "encoder_q-layer.9": 220.8027, "epoch": 0.4, "inbatch_neg_score": 0.3393, "inbatch_pos_score": 1.0264, "learning_rate": 3.266666666666667e-05, "loss": 2.9497, "norm_diff": 0.0382, "norm_loss": 0.0, "num_token_doc": 66.5537, "num_token_overlap": 17.9767, "num_token_query": 52.1487, "num_token_union": 73.2876, "num_word_context": 202.1475, "num_word_doc": 49.641, "num_word_query": 39.7408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 690.5994, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3396, "query_norm": 1.4953, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1487, "sent_len_1": 66.5537, "sent_len_max_0": 127.9737, "sent_len_max_1": 189.7725, "stdk": 0.0481, "stdq": 0.0467, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.9355, "doc_norm": 1.4584, "encoder_q-embeddings": 1490.9503, "encoder_q-layer.0": 1123.3094, "encoder_q-layer.1": 1257.9629, "encoder_q-layer.10": 154.4106, "encoder_q-layer.11": 348.2274, "encoder_q-layer.2": 1513.4796, "encoder_q-layer.3": 1521.9781, "encoder_q-layer.4": 1506.969, "encoder_q-layer.5": 1247.481, "encoder_q-layer.6": 1298.5776, "encoder_q-layer.7": 1225.005, "encoder_q-layer.8": 875.3767, "encoder_q-layer.9": 387.3813, "epoch": 0.4, "inbatch_neg_score": 0.3488, "inbatch_pos_score": 1.0664, "learning_rate": 3.261111111111111e-05, "loss": 2.9355, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.8677, "num_token_overlap": 18.059, "num_token_query": 52.2624, "num_token_union": 73.4935, "num_word_context": 202.358, "num_word_doc": 49.9005, "num_word_query": 39.8519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1742.4609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3479, "query_norm": 1.509, "queue_k_norm": 1.4555, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2624, "sent_len_1": 66.8677, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7575, "stdk": 0.0481, "stdq": 0.0472, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 2.9364, "doc_norm": 1.4558, "encoder_q-embeddings": 852.7042, "encoder_q-layer.0": 564.5797, "encoder_q-layer.1": 663.7699, "encoder_q-layer.10": 174.0677, "encoder_q-layer.11": 386.0337, "encoder_q-layer.2": 754.1935, "encoder_q-layer.3": 692.3926, "encoder_q-layer.4": 610.6023, "encoder_q-layer.5": 528.3071, "encoder_q-layer.6": 487.9181, "encoder_q-layer.7": 387.5909, "encoder_q-layer.8": 314.3967, "encoder_q-layer.9": 175.4019, "epoch": 0.4, "inbatch_neg_score": 0.3509, "inbatch_pos_score": 1.0352, "learning_rate": 3.2555555555555555e-05, "loss": 2.9364, "norm_diff": 0.0316, "norm_loss": 0.0, "num_token_doc": 66.9688, "num_token_overlap": 18.0437, "num_token_query": 52.2586, "num_token_union": 73.6162, "num_word_context": 202.8307, "num_word_doc": 50.0078, "num_word_query": 39.8317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 841.9564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3499, "query_norm": 1.4874, "queue_k_norm": 1.456, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2586, "sent_len_1": 66.9688, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7562, "stdk": 0.048, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.9267, "doc_norm": 1.4509, "encoder_q-embeddings": 5286.3789, "encoder_q-layer.0": 3723.7976, "encoder_q-layer.1": 4131.834, "encoder_q-layer.10": 186.0399, "encoder_q-layer.11": 385.5989, "encoder_q-layer.2": 3773.072, "encoder_q-layer.3": 3188.3147, "encoder_q-layer.4": 2179.7302, "encoder_q-layer.5": 1542.7953, "encoder_q-layer.6": 874.6256, "encoder_q-layer.7": 380.5903, "encoder_q-layer.8": 268.5883, "encoder_q-layer.9": 197.3901, "epoch": 0.41, "inbatch_neg_score": 0.3559, "inbatch_pos_score": 1.0703, "learning_rate": 3.2500000000000004e-05, "loss": 2.9267, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 67.0825, "num_token_overlap": 18.0982, "num_token_query": 52.2706, "num_token_union": 73.5659, "num_word_context": 202.5167, "num_word_doc": 50.0675, "num_word_query": 39.8722, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4271.2969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3555, "query_norm": 1.5063, "queue_k_norm": 1.4561, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2706, "sent_len_1": 67.0825, "sent_len_max_0": 128.0, "sent_len_max_1": 190.61, "stdk": 0.0478, "stdq": 0.0471, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.9548, "doc_norm": 1.4593, "encoder_q-embeddings": 787.162, "encoder_q-layer.0": 529.0602, "encoder_q-layer.1": 628.9886, "encoder_q-layer.10": 399.4401, "encoder_q-layer.11": 808.8459, "encoder_q-layer.2": 655.0512, "encoder_q-layer.3": 580.1489, "encoder_q-layer.4": 567.0344, "encoder_q-layer.5": 609.0709, "encoder_q-layer.6": 611.2495, "encoder_q-layer.7": 630.0588, "encoder_q-layer.8": 640.9388, "encoder_q-layer.9": 542.2291, "epoch": 0.41, "inbatch_neg_score": 0.3539, "inbatch_pos_score": 1.0381, "learning_rate": 3.2444444444444446e-05, "loss": 2.9548, "norm_diff": 0.0216, "norm_loss": 0.0, "num_token_doc": 66.7687, "num_token_overlap": 17.882, "num_token_query": 51.9497, "num_token_union": 73.375, "num_word_context": 202.3456, "num_word_doc": 49.8458, "num_word_query": 39.5885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.5676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.353, "query_norm": 1.4798, "queue_k_norm": 1.4567, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 51.9497, "sent_len_1": 66.7687, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9837, "stdk": 0.0482, "stdq": 0.0462, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.9654, "doc_norm": 1.4624, "encoder_q-embeddings": 1042.53, "encoder_q-layer.0": 692.3611, "encoder_q-layer.1": 752.3666, "encoder_q-layer.10": 325.856, "encoder_q-layer.11": 753.4454, "encoder_q-layer.2": 827.2199, "encoder_q-layer.3": 879.2281, "encoder_q-layer.4": 887.1673, "encoder_q-layer.5": 740.0604, "encoder_q-layer.6": 604.3646, "encoder_q-layer.7": 551.2963, "encoder_q-layer.8": 455.173, "encoder_q-layer.9": 326.5507, "epoch": 0.41, "inbatch_neg_score": 0.3573, "inbatch_pos_score": 1.0723, "learning_rate": 3.238888888888889e-05, "loss": 2.9654, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.747, "num_token_overlap": 17.916, "num_token_query": 52.0164, "num_token_union": 73.4059, "num_word_context": 201.9378, "num_word_doc": 49.8035, "num_word_query": 39.6429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1100.2637, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3552, "query_norm": 1.485, "queue_k_norm": 1.4584, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0164, "sent_len_1": 66.747, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9475, "stdk": 0.0483, "stdq": 0.0466, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.925, "doc_norm": 1.4579, "encoder_q-embeddings": 839.2682, "encoder_q-layer.0": 571.3542, "encoder_q-layer.1": 678.6556, "encoder_q-layer.10": 403.9322, "encoder_q-layer.11": 795.2249, "encoder_q-layer.2": 773.3588, "encoder_q-layer.3": 763.2228, "encoder_q-layer.4": 695.1353, "encoder_q-layer.5": 638.5809, "encoder_q-layer.6": 558.5237, "encoder_q-layer.7": 483.9089, "encoder_q-layer.8": 418.768, "encoder_q-layer.9": 351.8888, "epoch": 0.41, "inbatch_neg_score": 0.358, "inbatch_pos_score": 1.0547, "learning_rate": 3.233333333333333e-05, "loss": 2.925, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.7749, "num_token_overlap": 18.0789, "num_token_query": 52.3581, "num_token_union": 73.4843, "num_word_context": 202.3291, "num_word_doc": 49.8432, "num_word_query": 39.9299, "postclip_grad_norm": 1.0, "preclip_grad_norm": 955.6683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3577, "query_norm": 1.4756, "queue_k_norm": 1.4593, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3581, "sent_len_1": 66.7749, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.9913, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.9435, "doc_norm": 1.4628, "encoder_q-embeddings": 1196.412, "encoder_q-layer.0": 856.1859, "encoder_q-layer.1": 891.3526, "encoder_q-layer.10": 326.7909, "encoder_q-layer.11": 767.7285, "encoder_q-layer.2": 1072.5388, "encoder_q-layer.3": 1069.5168, "encoder_q-layer.4": 864.3631, "encoder_q-layer.5": 738.4191, "encoder_q-layer.6": 624.8782, "encoder_q-layer.7": 559.1514, "encoder_q-layer.8": 516.2735, "encoder_q-layer.9": 388.1225, "epoch": 0.41, "inbatch_neg_score": 0.354, "inbatch_pos_score": 1.0576, "learning_rate": 3.227777777777778e-05, "loss": 2.9435, "norm_diff": 0.026, "norm_loss": 0.0, "num_token_doc": 66.9104, "num_token_overlap": 18.0295, "num_token_query": 52.2222, "num_token_union": 73.4758, "num_word_context": 202.443, "num_word_doc": 49.9452, "num_word_query": 39.8074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1232.1689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.4888, "queue_k_norm": 1.4578, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2222, "sent_len_1": 66.9104, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9275, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.9548, "doc_norm": 1.452, "encoder_q-embeddings": 3114.2683, "encoder_q-layer.0": 2328.3452, "encoder_q-layer.1": 2519.2756, "encoder_q-layer.10": 393.5065, "encoder_q-layer.11": 775.8361, "encoder_q-layer.2": 2851.9658, "encoder_q-layer.3": 2870.5923, "encoder_q-layer.4": 2828.6338, "encoder_q-layer.5": 2785.5176, "encoder_q-layer.6": 2235.9507, "encoder_q-layer.7": 2461.5459, "encoder_q-layer.8": 4117.4448, "encoder_q-layer.9": 1951.5431, "epoch": 0.41, "inbatch_neg_score": 0.3535, "inbatch_pos_score": 1.0293, "learning_rate": 3.222222222222223e-05, "loss": 2.9548, "norm_diff": 0.0289, "norm_loss": 0.0, "num_token_doc": 66.615, "num_token_overlap": 17.971, "num_token_query": 52.0495, "num_token_union": 73.3057, "num_word_context": 202.0219, "num_word_doc": 49.6931, "num_word_query": 39.6845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4055.8308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.354, "query_norm": 1.4774, "queue_k_norm": 1.4597, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0495, "sent_len_1": 66.615, "sent_len_max_0": 128.0, "sent_len_max_1": 188.72, "stdk": 0.0478, "stdq": 0.0466, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 2.9477, "doc_norm": 1.4625, "encoder_q-embeddings": 563.0463, "encoder_q-layer.0": 392.8465, "encoder_q-layer.1": 436.8148, "encoder_q-layer.10": 356.3802, "encoder_q-layer.11": 778.8074, "encoder_q-layer.2": 504.4469, "encoder_q-layer.3": 506.2505, "encoder_q-layer.4": 520.8302, "encoder_q-layer.5": 576.5812, "encoder_q-layer.6": 578.6657, "encoder_q-layer.7": 515.3118, "encoder_q-layer.8": 543.1012, "encoder_q-layer.9": 404.4478, "epoch": 0.41, "inbatch_neg_score": 0.3665, "inbatch_pos_score": 1.0381, "learning_rate": 3.2166666666666665e-05, "loss": 2.9477, "norm_diff": 0.0192, "norm_loss": 0.0, "num_token_doc": 66.8879, "num_token_overlap": 17.9955, "num_token_query": 52.3251, "num_token_union": 73.5628, "num_word_context": 202.5466, "num_word_doc": 49.8969, "num_word_query": 39.8833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 782.108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3652, "query_norm": 1.4686, "queue_k_norm": 1.46, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3251, "sent_len_1": 66.8879, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.0087, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.9543, "doc_norm": 1.4586, "encoder_q-embeddings": 1893.4312, "encoder_q-layer.0": 1278.1958, "encoder_q-layer.1": 1505.6643, "encoder_q-layer.10": 345.4911, "encoder_q-layer.11": 759.926, "encoder_q-layer.2": 1721.2758, "encoder_q-layer.3": 1818.6912, "encoder_q-layer.4": 1649.5801, "encoder_q-layer.5": 1490.6405, "encoder_q-layer.6": 1660.6143, "encoder_q-layer.7": 1303.6023, "encoder_q-layer.8": 1251.6062, "encoder_q-layer.9": 845.2272, "epoch": 0.41, "inbatch_neg_score": 0.3568, "inbatch_pos_score": 1.0391, "learning_rate": 3.2111111111111114e-05, "loss": 2.9543, "norm_diff": 0.0088, "norm_loss": 0.0, "num_token_doc": 66.6346, "num_token_overlap": 17.9082, "num_token_query": 52.1115, "num_token_union": 73.3839, "num_word_context": 202.0441, "num_word_doc": 49.6873, "num_word_query": 39.7, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.1993, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3562, "query_norm": 1.4641, "queue_k_norm": 1.4609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1115, "sent_len_1": 66.6346, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0925, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 2.9399, "doc_norm": 1.4572, "encoder_q-embeddings": 1579.4495, "encoder_q-layer.0": 1177.4581, "encoder_q-layer.1": 1270.5729, "encoder_q-layer.10": 376.9042, "encoder_q-layer.11": 768.8428, "encoder_q-layer.2": 1491.359, "encoder_q-layer.3": 1074.0833, "encoder_q-layer.4": 836.8121, "encoder_q-layer.5": 549.6014, "encoder_q-layer.6": 477.1059, "encoder_q-layer.7": 425.1364, "encoder_q-layer.8": 426.5767, "encoder_q-layer.9": 333.2723, "epoch": 0.41, "inbatch_neg_score": 0.3479, "inbatch_pos_score": 1.0303, "learning_rate": 3.2055555555555556e-05, "loss": 2.9399, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 17.9732, "num_token_query": 52.1778, "num_token_union": 73.462, "num_word_context": 201.9497, "num_word_doc": 49.7865, "num_word_query": 39.7981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1453.9409, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3474, "query_norm": 1.4635, "queue_k_norm": 1.4599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1778, "sent_len_1": 66.6695, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.7562, "stdk": 0.048, "stdq": 0.0466, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.9358, "doc_norm": 1.4605, "encoder_q-embeddings": 1103.2463, "encoder_q-layer.0": 745.9929, "encoder_q-layer.1": 890.1789, "encoder_q-layer.10": 332.7142, "encoder_q-layer.11": 777.3916, "encoder_q-layer.2": 916.2919, "encoder_q-layer.3": 884.6849, "encoder_q-layer.4": 773.826, "encoder_q-layer.5": 777.3712, "encoder_q-layer.6": 741.4908, "encoder_q-layer.7": 502.882, "encoder_q-layer.8": 412.511, "encoder_q-layer.9": 313.6244, "epoch": 0.41, "inbatch_neg_score": 0.3445, "inbatch_pos_score": 1.0449, "learning_rate": 3.2000000000000005e-05, "loss": 2.9358, "norm_diff": 0.012, "norm_loss": 0.0, "num_token_doc": 67.0747, "num_token_overlap": 17.9844, "num_token_query": 52.2515, "num_token_union": 73.7228, "num_word_context": 202.3792, "num_word_doc": 50.0449, "num_word_query": 39.8404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1134.6884, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3423, "query_norm": 1.4596, "queue_k_norm": 1.4602, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2515, "sent_len_1": 67.0747, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0488, "stdk": 0.0481, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.9504, "doc_norm": 1.4641, "encoder_q-embeddings": 1070.9298, "encoder_q-layer.0": 758.9458, "encoder_q-layer.1": 853.2174, "encoder_q-layer.10": 309.9019, "encoder_q-layer.11": 741.3568, "encoder_q-layer.2": 989.2436, "encoder_q-layer.3": 904.051, "encoder_q-layer.4": 704.895, "encoder_q-layer.5": 639.3496, "encoder_q-layer.6": 589.0623, "encoder_q-layer.7": 457.6081, "encoder_q-layer.8": 455.9867, "encoder_q-layer.9": 332.5895, "epoch": 0.41, "inbatch_neg_score": 0.3347, "inbatch_pos_score": 1.0537, "learning_rate": 3.194444444444444e-05, "loss": 2.9504, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.7537, "num_token_overlap": 17.9837, "num_token_query": 52.2476, "num_token_union": 73.4804, "num_word_context": 202.0829, "num_word_doc": 49.8174, "num_word_query": 39.846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1099.9727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3335, "query_norm": 1.4663, "queue_k_norm": 1.4586, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2476, "sent_len_1": 66.7537, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8025, "stdk": 0.0483, "stdq": 0.0472, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.9565, "doc_norm": 1.4616, "encoder_q-embeddings": 837.8906, "encoder_q-layer.0": 561.9467, "encoder_q-layer.1": 707.0396, "encoder_q-layer.10": 329.7921, "encoder_q-layer.11": 741.7461, "encoder_q-layer.2": 767.2223, "encoder_q-layer.3": 715.4288, "encoder_q-layer.4": 701.3531, "encoder_q-layer.5": 753.1797, "encoder_q-layer.6": 703.7913, "encoder_q-layer.7": 446.1141, "encoder_q-layer.8": 420.1231, "encoder_q-layer.9": 329.9904, "epoch": 0.42, "inbatch_neg_score": 0.3166, "inbatch_pos_score": 1.0205, "learning_rate": 3.188888888888889e-05, "loss": 2.9565, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.7894, "num_token_overlap": 18.0512, "num_token_query": 52.3631, "num_token_union": 73.5662, "num_word_context": 202.6953, "num_word_doc": 49.8549, "num_word_query": 39.9317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 963.0231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3176, "query_norm": 1.4444, "queue_k_norm": 1.4586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3631, "sent_len_1": 66.7894, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.3288, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.9456, "doc_norm": 1.4553, "encoder_q-embeddings": 684.4003, "encoder_q-layer.0": 446.016, "encoder_q-layer.1": 485.3333, "encoder_q-layer.10": 347.474, "encoder_q-layer.11": 783.2516, "encoder_q-layer.2": 564.629, "encoder_q-layer.3": 518.3748, "encoder_q-layer.4": 495.4624, "encoder_q-layer.5": 474.1857, "encoder_q-layer.6": 425.4693, "encoder_q-layer.7": 409.3801, "encoder_q-layer.8": 388.512, "encoder_q-layer.9": 338.3643, "epoch": 0.42, "inbatch_neg_score": 0.3038, "inbatch_pos_score": 0.9697, "learning_rate": 3.183333333333334e-05, "loss": 2.9456, "norm_diff": 0.0491, "norm_loss": 0.0, "num_token_doc": 66.7196, "num_token_overlap": 18.0235, "num_token_query": 52.1509, "num_token_union": 73.3432, "num_word_context": 201.9535, "num_word_doc": 49.7239, "num_word_query": 39.7614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 774.2126, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4062, "queue_k_norm": 1.4575, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1509, "sent_len_1": 66.7196, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4075, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.9424, "doc_norm": 1.4594, "encoder_q-embeddings": 789.1039, "encoder_q-layer.0": 571.9761, "encoder_q-layer.1": 784.3759, "encoder_q-layer.10": 306.8825, "encoder_q-layer.11": 723.5605, "encoder_q-layer.2": 649.2205, "encoder_q-layer.3": 589.3423, "encoder_q-layer.4": 590.8606, "encoder_q-layer.5": 624.7559, "encoder_q-layer.6": 615.2206, "encoder_q-layer.7": 560.3254, "encoder_q-layer.8": 501.9889, "encoder_q-layer.9": 345.2076, "epoch": 0.42, "inbatch_neg_score": 0.2953, "inbatch_pos_score": 1.0098, "learning_rate": 3.177777777777778e-05, "loss": 2.9424, "norm_diff": 0.0128, "norm_loss": 0.0, "num_token_doc": 66.8507, "num_token_overlap": 18.0545, "num_token_query": 52.4492, "num_token_union": 73.5274, "num_word_context": 202.428, "num_word_doc": 49.8773, "num_word_query": 39.9812, "postclip_grad_norm": 1.0, "preclip_grad_norm": 903.7615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.4542, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4492, "sent_len_1": 66.8507, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9575, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.9585, "doc_norm": 1.4575, "encoder_q-embeddings": 3806.584, "encoder_q-layer.0": 2654.6987, "encoder_q-layer.1": 3711.166, "encoder_q-layer.10": 331.8859, "encoder_q-layer.11": 710.7267, "encoder_q-layer.2": 3067.2432, "encoder_q-layer.3": 2977.1714, "encoder_q-layer.4": 2869.4758, "encoder_q-layer.5": 2379.0151, "encoder_q-layer.6": 1894.3232, "encoder_q-layer.7": 1315.418, "encoder_q-layer.8": 657.6768, "encoder_q-layer.9": 394.0509, "epoch": 0.42, "inbatch_neg_score": 0.2781, "inbatch_pos_score": 0.9683, "learning_rate": 3.1722222222222224e-05, "loss": 2.9585, "norm_diff": 0.0178, "norm_loss": 0.0, "num_token_doc": 66.5904, "num_token_overlap": 17.9573, "num_token_query": 52.2308, "num_token_union": 73.3802, "num_word_context": 202.0633, "num_word_doc": 49.6882, "num_word_query": 39.8064, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3661.6194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2786, "query_norm": 1.4408, "queue_k_norm": 1.454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2308, "sent_len_1": 66.5904, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1075, "stdk": 0.0482, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9166, "doc_norm": 1.4585, "encoder_q-embeddings": 1758.0482, "encoder_q-layer.0": 1338.5745, "encoder_q-layer.1": 1655.0469, "encoder_q-layer.10": 323.0961, "encoder_q-layer.11": 721.493, "encoder_q-layer.2": 1693.2953, "encoder_q-layer.3": 1749.9668, "encoder_q-layer.4": 1166.2823, "encoder_q-layer.5": 921.5968, "encoder_q-layer.6": 801.925, "encoder_q-layer.7": 682.4846, "encoder_q-layer.8": 542.9431, "encoder_q-layer.9": 425.774, "epoch": 0.42, "inbatch_neg_score": 0.2932, "inbatch_pos_score": 1.0137, "learning_rate": 3.1666666666666666e-05, "loss": 2.9166, "norm_diff": 0.0082, "norm_loss": 0.0, "num_token_doc": 66.9074, "num_token_overlap": 18.0612, "num_token_query": 52.3628, "num_token_union": 73.5424, "num_word_context": 202.3075, "num_word_doc": 49.9, "num_word_query": 39.9089, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1812.4947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.4636, "queue_k_norm": 1.4534, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3628, "sent_len_1": 66.9074, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1925, "stdk": 0.0483, "stdq": 0.0467, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.9424, "doc_norm": 1.4502, "encoder_q-embeddings": 1386.9083, "encoder_q-layer.0": 970.931, "encoder_q-layer.1": 1210.9119, "encoder_q-layer.10": 300.6274, "encoder_q-layer.11": 670.2869, "encoder_q-layer.2": 1471.624, "encoder_q-layer.3": 1382.9841, "encoder_q-layer.4": 1140.6587, "encoder_q-layer.5": 1018.1222, "encoder_q-layer.6": 762.4038, "encoder_q-layer.7": 657.769, "encoder_q-layer.8": 609.2869, "encoder_q-layer.9": 456.008, "epoch": 0.42, "inbatch_neg_score": 0.2942, "inbatch_pos_score": 1.0205, "learning_rate": 3.1611111111111115e-05, "loss": 2.9424, "norm_diff": 0.0193, "norm_loss": 0.0, "num_token_doc": 66.7511, "num_token_overlap": 18.0056, "num_token_query": 52.2572, "num_token_union": 73.4301, "num_word_context": 202.2021, "num_word_doc": 49.8064, "num_word_query": 39.8496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1522.4152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.4691, "queue_k_norm": 1.4511, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2572, "sent_len_1": 66.7511, "sent_len_max_0": 127.995, "sent_len_max_1": 189.7975, "stdk": 0.048, "stdq": 0.0465, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9488, "doc_norm": 1.4459, "encoder_q-embeddings": 1120.6129, "encoder_q-layer.0": 734.8006, "encoder_q-layer.1": 790.7892, "encoder_q-layer.10": 317.2636, "encoder_q-layer.11": 725.6803, "encoder_q-layer.2": 883.7693, "encoder_q-layer.3": 1063.2792, "encoder_q-layer.4": 1147.4957, "encoder_q-layer.5": 1245.0444, "encoder_q-layer.6": 870.2645, "encoder_q-layer.7": 552.4736, "encoder_q-layer.8": 420.1592, "encoder_q-layer.9": 336.7766, "epoch": 0.42, "inbatch_neg_score": 0.3012, "inbatch_pos_score": 0.978, "learning_rate": 3.155555555555556e-05, "loss": 2.9488, "norm_diff": 0.0243, "norm_loss": 0.0, "num_token_doc": 66.6626, "num_token_overlap": 17.9407, "num_token_query": 52.2025, "num_token_union": 73.4422, "num_word_context": 202.4427, "num_word_doc": 49.7677, "num_word_query": 39.8012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1278.8966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.467, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2025, "sent_len_1": 66.6626, "sent_len_max_0": 128.0, "sent_len_max_1": 187.89, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9318, "doc_norm": 1.4499, "encoder_q-embeddings": 2821.6462, "encoder_q-layer.0": 1898.1, "encoder_q-layer.1": 2005.3893, "encoder_q-layer.10": 330.469, "encoder_q-layer.11": 744.469, "encoder_q-layer.2": 1663.4272, "encoder_q-layer.3": 1317.3632, "encoder_q-layer.4": 937.795, "encoder_q-layer.5": 672.2925, "encoder_q-layer.6": 523.7432, "encoder_q-layer.7": 367.367, "encoder_q-layer.8": 386.2148, "encoder_q-layer.9": 328.5459, "epoch": 0.42, "inbatch_neg_score": 0.3, "inbatch_pos_score": 0.9731, "learning_rate": 3.15e-05, "loss": 2.9318, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.6925, "num_token_overlap": 18.0277, "num_token_query": 52.1869, "num_token_union": 73.4241, "num_word_context": 202.2267, "num_word_doc": 49.8052, "num_word_query": 39.7753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2205.0866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.4671, "queue_k_norm": 1.4494, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1869, "sent_len_1": 66.6925, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.0163, "stdk": 0.048, "stdq": 0.0462, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9444, "doc_norm": 1.444, "encoder_q-embeddings": 561.7166, "encoder_q-layer.0": 408.3666, "encoder_q-layer.1": 436.4272, "encoder_q-layer.10": 359.6046, "encoder_q-layer.11": 776.5523, "encoder_q-layer.2": 463.458, "encoder_q-layer.3": 464.6464, "encoder_q-layer.4": 482.7864, "encoder_q-layer.5": 536.7382, "encoder_q-layer.6": 513.8436, "encoder_q-layer.7": 525.7856, "encoder_q-layer.8": 541.8627, "encoder_q-layer.9": 397.1631, "epoch": 0.42, "inbatch_neg_score": 0.2994, "inbatch_pos_score": 0.9829, "learning_rate": 3.144444444444445e-05, "loss": 2.9444, "norm_diff": 0.0074, "norm_loss": 0.0, "num_token_doc": 66.6979, "num_token_overlap": 18.0083, "num_token_query": 52.1628, "num_token_union": 73.3479, "num_word_context": 202.3589, "num_word_doc": 49.7924, "num_word_query": 39.7585, "postclip_grad_norm": 1.0, "preclip_grad_norm": 767.395, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4471, "queue_k_norm": 1.4494, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1628, "sent_len_1": 66.6979, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1438, "stdk": 0.0479, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.9387, "doc_norm": 1.4475, "encoder_q-embeddings": 850.5879, "encoder_q-layer.0": 593.855, "encoder_q-layer.1": 702.3981, "encoder_q-layer.10": 307.7745, "encoder_q-layer.11": 722.0118, "encoder_q-layer.2": 836.3257, "encoder_q-layer.3": 761.5699, "encoder_q-layer.4": 519.9806, "encoder_q-layer.5": 370.8059, "encoder_q-layer.6": 378.8903, "encoder_q-layer.7": 374.0912, "encoder_q-layer.8": 407.0553, "encoder_q-layer.9": 344.1426, "epoch": 0.42, "inbatch_neg_score": 0.3076, "inbatch_pos_score": 1.001, "learning_rate": 3.138888888888889e-05, "loss": 2.9387, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 18.0133, "num_token_query": 52.2423, "num_token_union": 73.5566, "num_word_context": 202.4167, "num_word_doc": 49.9367, "num_word_query": 39.8375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 895.7191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.4707, "queue_k_norm": 1.4493, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2423, "sent_len_1": 66.8871, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5825, "stdk": 0.048, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.9374, "doc_norm": 1.4456, "encoder_q-embeddings": 1571.9564, "encoder_q-layer.0": 1105.1294, "encoder_q-layer.1": 1225.8058, "encoder_q-layer.10": 784.1759, "encoder_q-layer.11": 1532.2629, "encoder_q-layer.2": 1159.0599, "encoder_q-layer.3": 1109.9419, "encoder_q-layer.4": 1055.1678, "encoder_q-layer.5": 1048.7725, "encoder_q-layer.6": 980.5278, "encoder_q-layer.7": 952.0271, "encoder_q-layer.8": 1170.9388, "encoder_q-layer.9": 864.6051, "epoch": 0.43, "inbatch_neg_score": 0.3245, "inbatch_pos_score": 1.0127, "learning_rate": 3.1333333333333334e-05, "loss": 2.9374, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.5464, "num_token_overlap": 17.9734, "num_token_query": 52.0776, "num_token_union": 73.2406, "num_word_context": 201.7714, "num_word_doc": 49.6751, "num_word_query": 39.6978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1741.6643, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3237, "query_norm": 1.4713, "queue_k_norm": 1.4482, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0776, "sent_len_1": 66.5464, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7925, "stdk": 0.048, "stdq": 0.0464, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 2.948, "doc_norm": 1.452, "encoder_q-embeddings": 1721.8506, "encoder_q-layer.0": 1316.7737, "encoder_q-layer.1": 1425.4625, "encoder_q-layer.10": 741.3004, "encoder_q-layer.11": 1590.5891, "encoder_q-layer.2": 1625.8674, "encoder_q-layer.3": 1634.6851, "encoder_q-layer.4": 1583.5677, "encoder_q-layer.5": 1630.3456, "encoder_q-layer.6": 1597.23, "encoder_q-layer.7": 1308.1318, "encoder_q-layer.8": 1284.7576, "encoder_q-layer.9": 966.7345, "epoch": 0.43, "inbatch_neg_score": 0.3317, "inbatch_pos_score": 1.0215, "learning_rate": 3.1277777777777776e-05, "loss": 2.948, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.8104, "num_token_overlap": 17.9615, "num_token_query": 52.1063, "num_token_union": 73.4194, "num_word_context": 202.4951, "num_word_doc": 49.8622, "num_word_query": 39.6954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2152.9491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3289, "query_norm": 1.4748, "queue_k_norm": 1.4499, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1063, "sent_len_1": 66.8104, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.2713, "stdk": 0.0482, "stdq": 0.0466, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.9714, "doc_norm": 1.4378, "encoder_q-embeddings": 915.4144, "encoder_q-layer.0": 606.3568, "encoder_q-layer.1": 685.1372, "encoder_q-layer.10": 762.1553, "encoder_q-layer.11": 1689.7725, "encoder_q-layer.2": 776.7158, "encoder_q-layer.3": 783.4608, "encoder_q-layer.4": 756.0952, "encoder_q-layer.5": 698.8289, "encoder_q-layer.6": 728.5181, "encoder_q-layer.7": 805.0245, "encoder_q-layer.8": 865.049, "encoder_q-layer.9": 743.6679, "epoch": 0.43, "inbatch_neg_score": 0.332, "inbatch_pos_score": 1.0059, "learning_rate": 3.1222222222222225e-05, "loss": 2.9714, "norm_diff": 0.0263, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 17.9393, "num_token_query": 52.0154, "num_token_union": 73.3759, "num_word_context": 202.1657, "num_word_doc": 49.8198, "num_word_query": 39.614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1317.7154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3301, "query_norm": 1.4641, "queue_k_norm": 1.4503, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0154, "sent_len_1": 66.7773, "sent_len_max_0": 127.99, "sent_len_max_1": 190.7675, "stdk": 0.0476, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.9432, "doc_norm": 1.4453, "encoder_q-embeddings": 6363.6445, "encoder_q-layer.0": 4978.2114, "encoder_q-layer.1": 5701.7461, "encoder_q-layer.10": 660.4738, "encoder_q-layer.11": 1518.209, "encoder_q-layer.2": 6396.0493, "encoder_q-layer.3": 6346.5977, "encoder_q-layer.4": 6496.2061, "encoder_q-layer.5": 6963.5615, "encoder_q-layer.6": 5579.604, "encoder_q-layer.7": 3749.1001, "encoder_q-layer.8": 3505.5183, "encoder_q-layer.9": 1997.113, "epoch": 0.43, "inbatch_neg_score": 0.327, "inbatch_pos_score": 1.0195, "learning_rate": 3.116666666666667e-05, "loss": 2.9432, "norm_diff": 0.0218, "norm_loss": 0.0, "num_token_doc": 66.879, "num_token_overlap": 18.0662, "num_token_query": 52.3716, "num_token_union": 73.5168, "num_word_context": 202.2914, "num_word_doc": 49.8652, "num_word_query": 39.9266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7421.931, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3271, "query_norm": 1.4235, "queue_k_norm": 1.449, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3716, "sent_len_1": 66.879, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5838, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.9406, "doc_norm": 1.4504, "encoder_q-embeddings": 1944.882, "encoder_q-layer.0": 1332.5712, "encoder_q-layer.1": 1540.6709, "encoder_q-layer.10": 665.6151, "encoder_q-layer.11": 1427.1899, "encoder_q-layer.2": 1723.5984, "encoder_q-layer.3": 1636.2803, "encoder_q-layer.4": 1521.8315, "encoder_q-layer.5": 1345.2136, "encoder_q-layer.6": 1392.2115, "encoder_q-layer.7": 1257.7708, "encoder_q-layer.8": 1153.5466, "encoder_q-layer.9": 751.8602, "epoch": 0.43, "inbatch_neg_score": 0.3244, "inbatch_pos_score": 1.0127, "learning_rate": 3.111111111111111e-05, "loss": 2.9406, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.8524, "num_token_overlap": 18.0367, "num_token_query": 52.2576, "num_token_union": 73.457, "num_word_context": 202.1823, "num_word_doc": 49.8343, "num_word_query": 39.8409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2133.8627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.325, "query_norm": 1.4374, "queue_k_norm": 1.4496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2576, "sent_len_1": 66.8524, "sent_len_max_0": 127.9988, "sent_len_max_1": 193.1575, "stdk": 0.0481, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 2.9385, "doc_norm": 1.4461, "encoder_q-embeddings": 2523.1892, "encoder_q-layer.0": 1803.5801, "encoder_q-layer.1": 1847.2936, "encoder_q-layer.10": 703.1658, "encoder_q-layer.11": 1617.1776, "encoder_q-layer.2": 1927.6512, "encoder_q-layer.3": 1831.6431, "encoder_q-layer.4": 1650.3705, "encoder_q-layer.5": 1448.9963, "encoder_q-layer.6": 1434.354, "encoder_q-layer.7": 1345.8536, "encoder_q-layer.8": 1189.5734, "encoder_q-layer.9": 793.4438, "epoch": 0.43, "inbatch_neg_score": 0.3122, "inbatch_pos_score": 0.9863, "learning_rate": 3.105555555555555e-05, "loss": 2.9385, "norm_diff": 0.0116, "norm_loss": 0.0, "num_token_doc": 66.8063, "num_token_overlap": 18.0057, "num_token_query": 52.1961, "num_token_union": 73.4158, "num_word_context": 202.0605, "num_word_doc": 49.8444, "num_word_query": 39.7854, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2475.6907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3108, "query_norm": 1.4349, "queue_k_norm": 1.4507, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1961, "sent_len_1": 66.8063, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.59, "stdk": 0.048, "stdq": 0.0462, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.9285, "doc_norm": 1.4512, "encoder_q-embeddings": 2541.6729, "encoder_q-layer.0": 2018.9446, "encoder_q-layer.1": 2299.5654, "encoder_q-layer.10": 333.6725, "encoder_q-layer.11": 741.7627, "encoder_q-layer.2": 2465.1409, "encoder_q-layer.3": 2566.5056, "encoder_q-layer.4": 2324.9673, "encoder_q-layer.5": 2495.1125, "encoder_q-layer.6": 2355.7722, "encoder_q-layer.7": 2858.77, "encoder_q-layer.8": 2156.062, "encoder_q-layer.9": 499.9994, "epoch": 0.43, "inbatch_neg_score": 0.3166, "inbatch_pos_score": 1.0088, "learning_rate": 3.1e-05, "loss": 2.9285, "norm_diff": 0.0216, "norm_loss": 0.0, "num_token_doc": 66.7707, "num_token_overlap": 18.0016, "num_token_query": 52.2167, "num_token_union": 73.4842, "num_word_context": 202.1852, "num_word_doc": 49.8302, "num_word_query": 39.8411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3147.016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3164, "query_norm": 1.4314, "queue_k_norm": 1.4495, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2167, "sent_len_1": 66.7707, "sent_len_max_0": 127.995, "sent_len_max_1": 189.0213, "stdk": 0.0482, "stdq": 0.046, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9641, "doc_norm": 1.4478, "encoder_q-embeddings": 2315.0635, "encoder_q-layer.0": 1606.1492, "encoder_q-layer.1": 1718.2693, "encoder_q-layer.10": 375.5915, "encoder_q-layer.11": 763.1391, "encoder_q-layer.2": 2039.7584, "encoder_q-layer.3": 2001.4166, "encoder_q-layer.4": 1778.0166, "encoder_q-layer.5": 1236.3379, "encoder_q-layer.6": 1166.5946, "encoder_q-layer.7": 753.0711, "encoder_q-layer.8": 696.1294, "encoder_q-layer.9": 453.1002, "epoch": 0.43, "inbatch_neg_score": 0.3132, "inbatch_pos_score": 0.9922, "learning_rate": 3.094444444444445e-05, "loss": 2.9641, "norm_diff": 0.0328, "norm_loss": 0.0, "num_token_doc": 66.6733, "num_token_overlap": 17.9411, "num_token_query": 52.1607, "num_token_union": 73.4108, "num_word_context": 202.0786, "num_word_doc": 49.7239, "num_word_query": 39.7834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2237.0601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.415, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1607, "sent_len_1": 66.6733, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.645, "stdk": 0.048, "stdq": 0.0454, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.9411, "doc_norm": 1.4499, "encoder_q-embeddings": 675.3638, "encoder_q-layer.0": 490.3549, "encoder_q-layer.1": 581.0438, "encoder_q-layer.10": 340.1745, "encoder_q-layer.11": 774.2705, "encoder_q-layer.2": 658.2672, "encoder_q-layer.3": 651.3356, "encoder_q-layer.4": 770.0159, "encoder_q-layer.5": 541.8019, "encoder_q-layer.6": 457.9202, "encoder_q-layer.7": 471.5454, "encoder_q-layer.8": 498.6875, "encoder_q-layer.9": 382.3645, "epoch": 0.43, "inbatch_neg_score": 0.3197, "inbatch_pos_score": 0.998, "learning_rate": 3.088888888888889e-05, "loss": 2.9411, "norm_diff": 0.0254, "norm_loss": 0.0, "num_token_doc": 66.7906, "num_token_overlap": 18.0137, "num_token_query": 52.4106, "num_token_union": 73.5717, "num_word_context": 202.4938, "num_word_doc": 49.8547, "num_word_query": 39.941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 873.1102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3188, "query_norm": 1.4245, "queue_k_norm": 1.4482, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4106, "sent_len_1": 66.7906, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2012, "stdk": 0.0481, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 2.9331, "doc_norm": 1.4455, "encoder_q-embeddings": 3654.1753, "encoder_q-layer.0": 2560.6365, "encoder_q-layer.1": 3022.8887, "encoder_q-layer.10": 344.3842, "encoder_q-layer.11": 749.5791, "encoder_q-layer.2": 3068.28, "encoder_q-layer.3": 2587.4729, "encoder_q-layer.4": 1048.0929, "encoder_q-layer.5": 541.5563, "encoder_q-layer.6": 489.6184, "encoder_q-layer.7": 461.2831, "encoder_q-layer.8": 443.123, "encoder_q-layer.9": 341.7763, "epoch": 0.43, "inbatch_neg_score": 0.3044, "inbatch_pos_score": 0.9937, "learning_rate": 3.0833333333333335e-05, "loss": 2.9331, "norm_diff": 0.0275, "norm_loss": 0.0, "num_token_doc": 66.9746, "num_token_overlap": 18.0618, "num_token_query": 52.2202, "num_token_union": 73.5401, "num_word_context": 202.4709, "num_word_doc": 49.9471, "num_word_query": 39.8293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3076.5577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.4187, "queue_k_norm": 1.4511, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2202, "sent_len_1": 66.9746, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.3512, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.9395, "doc_norm": 1.4535, "encoder_q-embeddings": 1445.324, "encoder_q-layer.0": 1101.3835, "encoder_q-layer.1": 1218.9861, "encoder_q-layer.10": 334.3254, "encoder_q-layer.11": 759.0322, "encoder_q-layer.2": 1182.3978, "encoder_q-layer.3": 1228.2025, "encoder_q-layer.4": 961.6581, "encoder_q-layer.5": 598.6373, "encoder_q-layer.6": 558.9573, "encoder_q-layer.7": 565.4709, "encoder_q-layer.8": 520.8837, "encoder_q-layer.9": 402.5641, "epoch": 0.44, "inbatch_neg_score": 0.3065, "inbatch_pos_score": 0.998, "learning_rate": 3.077777777777778e-05, "loss": 2.9395, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.7216, "num_token_overlap": 17.9897, "num_token_query": 52.2162, "num_token_union": 73.4532, "num_word_context": 202.3929, "num_word_doc": 49.8371, "num_word_query": 39.8663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1434.2906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3054, "query_norm": 1.4183, "queue_k_norm": 1.45, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2162, "sent_len_1": 66.7216, "sent_len_max_0": 127.9975, "sent_len_max_1": 186.7075, "stdk": 0.0483, "stdq": 0.0456, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.9401, "doc_norm": 1.4456, "encoder_q-embeddings": 5426.1816, "encoder_q-layer.0": 3991.4646, "encoder_q-layer.1": 4281.3193, "encoder_q-layer.10": 319.3873, "encoder_q-layer.11": 755.8904, "encoder_q-layer.2": 4523.0869, "encoder_q-layer.3": 4100.7314, "encoder_q-layer.4": 3562.1816, "encoder_q-layer.5": 2493.9836, "encoder_q-layer.6": 2021.385, "encoder_q-layer.7": 1137.5503, "encoder_q-layer.8": 665.4417, "encoder_q-layer.9": 372.3003, "epoch": 0.44, "inbatch_neg_score": 0.3117, "inbatch_pos_score": 1.0234, "learning_rate": 3.0722222222222227e-05, "loss": 2.9401, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.721, "num_token_overlap": 17.9496, "num_token_query": 52.2205, "num_token_union": 73.4887, "num_word_context": 202.3814, "num_word_doc": 49.8175, "num_word_query": 39.8249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4921.3878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.311, "query_norm": 1.4512, "queue_k_norm": 1.4488, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2205, "sent_len_1": 66.721, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.3288, "stdk": 0.048, "stdq": 0.0468, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.9476, "doc_norm": 1.4497, "encoder_q-embeddings": 1276.9109, "encoder_q-layer.0": 922.5941, "encoder_q-layer.1": 1048.3579, "encoder_q-layer.10": 357.4152, "encoder_q-layer.11": 771.1125, "encoder_q-layer.2": 1215.2704, "encoder_q-layer.3": 1279.0564, "encoder_q-layer.4": 1178.1824, "encoder_q-layer.5": 1150.3978, "encoder_q-layer.6": 749.4182, "encoder_q-layer.7": 444.2309, "encoder_q-layer.8": 409.0685, "encoder_q-layer.9": 348.3442, "epoch": 0.44, "inbatch_neg_score": 0.3216, "inbatch_pos_score": 1.0088, "learning_rate": 3.066666666666667e-05, "loss": 2.9476, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.6886, "num_token_overlap": 17.9901, "num_token_query": 52.2093, "num_token_union": 73.3722, "num_word_context": 202.2229, "num_word_doc": 49.7348, "num_word_query": 39.8019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1410.7606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3191, "query_norm": 1.4439, "queue_k_norm": 1.4497, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2093, "sent_len_1": 66.6886, "sent_len_max_0": 128.0, "sent_len_max_1": 192.0925, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 2.9355, "doc_norm": 1.4468, "encoder_q-embeddings": 854.5039, "encoder_q-layer.0": 615.9526, "encoder_q-layer.1": 702.9695, "encoder_q-layer.10": 343.5753, "encoder_q-layer.11": 758.5073, "encoder_q-layer.2": 783.2192, "encoder_q-layer.3": 797.97, "encoder_q-layer.4": 698.198, "encoder_q-layer.5": 604.7902, "encoder_q-layer.6": 492.0114, "encoder_q-layer.7": 439.5916, "encoder_q-layer.8": 435.1924, "encoder_q-layer.9": 348.612, "epoch": 0.44, "inbatch_neg_score": 0.3278, "inbatch_pos_score": 1.0166, "learning_rate": 3.061111111111111e-05, "loss": 2.9355, "norm_diff": 0.0103, "norm_loss": 0.0, "num_token_doc": 66.7143, "num_token_overlap": 17.9814, "num_token_query": 52.1503, "num_token_union": 73.3895, "num_word_context": 202.1162, "num_word_doc": 49.8225, "num_word_query": 39.7506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.3001, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3267, "query_norm": 1.4465, "queue_k_norm": 1.4487, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1503, "sent_len_1": 66.7143, "sent_len_max_0": 127.98, "sent_len_max_1": 189.3812, "stdk": 0.0481, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.9411, "doc_norm": 1.4518, "encoder_q-embeddings": 1568.3766, "encoder_q-layer.0": 1087.8331, "encoder_q-layer.1": 1251.816, "encoder_q-layer.10": 286.6562, "encoder_q-layer.11": 699.1234, "encoder_q-layer.2": 1507.0225, "encoder_q-layer.3": 1555.8273, "encoder_q-layer.4": 1607.6163, "encoder_q-layer.5": 1495.1656, "encoder_q-layer.6": 1527.2126, "encoder_q-layer.7": 1497.3577, "encoder_q-layer.8": 1219.3872, "encoder_q-layer.9": 571.9731, "epoch": 0.44, "inbatch_neg_score": 0.3164, "inbatch_pos_score": 1.0322, "learning_rate": 3.055555555555556e-05, "loss": 2.9411, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.756, "num_token_overlap": 17.9988, "num_token_query": 52.178, "num_token_union": 73.4885, "num_word_context": 202.4344, "num_word_doc": 49.8579, "num_word_query": 39.8046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.7317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.439, "queue_k_norm": 1.4516, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.178, "sent_len_1": 66.756, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5925, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.9297, "doc_norm": 1.4529, "encoder_q-embeddings": 1481.5452, "encoder_q-layer.0": 1064.7491, "encoder_q-layer.1": 1242.0687, "encoder_q-layer.10": 327.3437, "encoder_q-layer.11": 756.6922, "encoder_q-layer.2": 1533.4207, "encoder_q-layer.3": 1631.9135, "encoder_q-layer.4": 1782.6906, "encoder_q-layer.5": 1905.2649, "encoder_q-layer.6": 1632.0364, "encoder_q-layer.7": 1636.7687, "encoder_q-layer.8": 901.9178, "encoder_q-layer.9": 402.219, "epoch": 0.44, "inbatch_neg_score": 0.3226, "inbatch_pos_score": 1.0225, "learning_rate": 3.05e-05, "loss": 2.9297, "norm_diff": 0.0106, "norm_loss": 0.0, "num_token_doc": 66.748, "num_token_overlap": 18.0519, "num_token_query": 52.1626, "num_token_union": 73.3976, "num_word_context": 201.9003, "num_word_doc": 49.7966, "num_word_query": 39.7475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1989.9484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.4447, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1626, "sent_len_1": 66.748, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6175, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.9509, "doc_norm": 1.4472, "encoder_q-embeddings": 2466.6912, "encoder_q-layer.0": 1849.2721, "encoder_q-layer.1": 2005.0615, "encoder_q-layer.10": 325.0795, "encoder_q-layer.11": 805.7144, "encoder_q-layer.2": 2128.718, "encoder_q-layer.3": 1826.3691, "encoder_q-layer.4": 1582.224, "encoder_q-layer.5": 1348.8064, "encoder_q-layer.6": 1255.8846, "encoder_q-layer.7": 1115.3848, "encoder_q-layer.8": 889.2075, "encoder_q-layer.9": 449.2014, "epoch": 0.44, "inbatch_neg_score": 0.3308, "inbatch_pos_score": 1.0156, "learning_rate": 3.044444444444445e-05, "loss": 2.9509, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.9773, "num_token_overlap": 17.9614, "num_token_query": 52.0674, "num_token_union": 73.5445, "num_word_context": 202.5176, "num_word_doc": 50.0161, "num_word_query": 39.7167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2390.2615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3318, "query_norm": 1.4658, "queue_k_norm": 1.4519, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0674, "sent_len_1": 66.9773, "sent_len_max_0": 128.0, "sent_len_max_1": 188.57, "stdk": 0.0481, "stdq": 0.0465, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 2.9541, "doc_norm": 1.456, "encoder_q-embeddings": 13856.042, "encoder_q-layer.0": 10459.1299, "encoder_q-layer.1": 11201.6953, "encoder_q-layer.10": 389.5926, "encoder_q-layer.11": 786.0285, "encoder_q-layer.2": 9397.0967, "encoder_q-layer.3": 9010.6113, "encoder_q-layer.4": 5910.0532, "encoder_q-layer.5": 3894.0391, "encoder_q-layer.6": 3921.5527, "encoder_q-layer.7": 3343.012, "encoder_q-layer.8": 3124.6528, "encoder_q-layer.9": 1895.8428, "epoch": 0.44, "inbatch_neg_score": 0.3118, "inbatch_pos_score": 0.98, "learning_rate": 3.0388888888888887e-05, "loss": 2.9541, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.7517, "num_token_overlap": 17.9612, "num_token_query": 52.1823, "num_token_union": 73.5083, "num_word_context": 202.5719, "num_word_doc": 49.8313, "num_word_query": 39.7473, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12100.5461, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.312, "query_norm": 1.4137, "queue_k_norm": 1.4518, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1823, "sent_len_1": 66.7517, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8025, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.9303, "doc_norm": 1.4474, "encoder_q-embeddings": 2163.7229, "encoder_q-layer.0": 1597.6626, "encoder_q-layer.1": 1692.0875, "encoder_q-layer.10": 337.6782, "encoder_q-layer.11": 755.3092, "encoder_q-layer.2": 1822.4066, "encoder_q-layer.3": 2037.4171, "encoder_q-layer.4": 1865.1897, "encoder_q-layer.5": 1849.8462, "encoder_q-layer.6": 1536.4209, "encoder_q-layer.7": 1519.5359, "encoder_q-layer.8": 935.864, "encoder_q-layer.9": 321.2413, "epoch": 0.44, "inbatch_neg_score": 0.3077, "inbatch_pos_score": 0.9902, "learning_rate": 3.0333333333333337e-05, "loss": 2.9303, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.8374, "num_token_overlap": 18.0066, "num_token_query": 52.2296, "num_token_union": 73.4805, "num_word_context": 202.3862, "num_word_doc": 49.8748, "num_word_query": 39.8194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2333.5346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3059, "query_norm": 1.4424, "queue_k_norm": 1.4498, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2296, "sent_len_1": 66.8374, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.61, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 2.9617, "doc_norm": 1.4486, "encoder_q-embeddings": 5671.8657, "encoder_q-layer.0": 4379.1304, "encoder_q-layer.1": 5481.1987, "encoder_q-layer.10": 341.1415, "encoder_q-layer.11": 809.771, "encoder_q-layer.2": 3235.1182, "encoder_q-layer.3": 3387.2046, "encoder_q-layer.4": 3282.2161, "encoder_q-layer.5": 2198.2368, "encoder_q-layer.6": 1430.386, "encoder_q-layer.7": 1308.3364, "encoder_q-layer.8": 1245.3276, "encoder_q-layer.9": 632.8948, "epoch": 0.44, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 0.9824, "learning_rate": 3.0277777777777776e-05, "loss": 2.9617, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.6396, "num_token_overlap": 17.9719, "num_token_query": 52.3043, "num_token_union": 73.4487, "num_word_context": 202.1884, "num_word_doc": 49.6893, "num_word_query": 39.878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4912.6563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.431, "queue_k_norm": 1.4503, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3043, "sent_len_1": 66.6396, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4325, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 2.9323, "doc_norm": 1.4464, "encoder_q-embeddings": 931.4536, "encoder_q-layer.0": 603.3522, "encoder_q-layer.1": 650.8054, "encoder_q-layer.10": 357.631, "encoder_q-layer.11": 782.1538, "encoder_q-layer.2": 686.7283, "encoder_q-layer.3": 756.023, "encoder_q-layer.4": 783.7189, "encoder_q-layer.5": 740.3793, "encoder_q-layer.6": 674.4294, "encoder_q-layer.7": 611.5114, "encoder_q-layer.8": 551.4993, "encoder_q-layer.9": 378.0309, "epoch": 0.45, "inbatch_neg_score": 0.3034, "inbatch_pos_score": 0.9888, "learning_rate": 3.0222222222222225e-05, "loss": 2.9323, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.7462, "num_token_overlap": 17.9842, "num_token_query": 52.2399, "num_token_union": 73.4841, "num_word_context": 202.3736, "num_word_doc": 49.8339, "num_word_query": 39.8311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1020.7985, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3035, "query_norm": 1.4289, "queue_k_norm": 1.4481, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2399, "sent_len_1": 66.7462, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2575, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.9406, "doc_norm": 1.4537, "encoder_q-embeddings": 2046.3602, "encoder_q-layer.0": 1492.8678, "encoder_q-layer.1": 1688.0586, "encoder_q-layer.10": 342.371, "encoder_q-layer.11": 793.8545, "encoder_q-layer.2": 1657.1467, "encoder_q-layer.3": 1643.438, "encoder_q-layer.4": 1622.6263, "encoder_q-layer.5": 1442.6304, "encoder_q-layer.6": 1211.0251, "encoder_q-layer.7": 1031.4482, "encoder_q-layer.8": 658.6267, "encoder_q-layer.9": 349.7879, "epoch": 0.45, "inbatch_neg_score": 0.2996, "inbatch_pos_score": 1.0029, "learning_rate": 3.016666666666667e-05, "loss": 2.9406, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.8225, "num_token_overlap": 17.9923, "num_token_query": 52.1476, "num_token_union": 73.4016, "num_word_context": 202.238, "num_word_doc": 49.8658, "num_word_query": 39.7336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2048.6137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.439, "queue_k_norm": 1.448, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1476, "sent_len_1": 66.8225, "sent_len_max_0": 127.99, "sent_len_max_1": 189.9112, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 2.9234, "doc_norm": 1.4414, "encoder_q-embeddings": 838.3296, "encoder_q-layer.0": 680.053, "encoder_q-layer.1": 641.0179, "encoder_q-layer.10": 369.0473, "encoder_q-layer.11": 876.3621, "encoder_q-layer.2": 759.8751, "encoder_q-layer.3": 889.7617, "encoder_q-layer.4": 817.4674, "encoder_q-layer.5": 791.6589, "encoder_q-layer.6": 593.9572, "encoder_q-layer.7": 600.9625, "encoder_q-layer.8": 634.9863, "encoder_q-layer.9": 407.5612, "epoch": 0.45, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 0.9976, "learning_rate": 3.0111111111111113e-05, "loss": 2.9234, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.8241, "num_token_overlap": 18.0221, "num_token_query": 52.1709, "num_token_union": 73.4267, "num_word_context": 202.292, "num_word_doc": 49.8982, "num_word_query": 39.7594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1082.9614, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3049, "query_norm": 1.453, "queue_k_norm": 1.4482, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1709, "sent_len_1": 66.8241, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2075, "stdk": 0.0479, "stdq": 0.0468, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 2.9568, "doc_norm": 1.4468, "encoder_q-embeddings": 1302.0322, "encoder_q-layer.0": 951.9086, "encoder_q-layer.1": 1051.1907, "encoder_q-layer.10": 351.6991, "encoder_q-layer.11": 788.6795, "encoder_q-layer.2": 1223.9929, "encoder_q-layer.3": 1250.943, "encoder_q-layer.4": 1194.8228, "encoder_q-layer.5": 1200.9225, "encoder_q-layer.6": 1051.4084, "encoder_q-layer.7": 922.5549, "encoder_q-layer.8": 799.6989, "encoder_q-layer.9": 490.5641, "epoch": 0.45, "inbatch_neg_score": 0.3024, "inbatch_pos_score": 0.9937, "learning_rate": 3.005555555555556e-05, "loss": 2.9568, "norm_diff": 0.013, "norm_loss": 0.0, "num_token_doc": 66.7412, "num_token_overlap": 17.9801, "num_token_query": 52.0965, "num_token_union": 73.407, "num_word_context": 202.3165, "num_word_doc": 49.8155, "num_word_query": 39.7104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1532.0715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3027, "query_norm": 1.4469, "queue_k_norm": 1.448, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0965, "sent_len_1": 66.7412, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5188, "stdk": 0.0481, "stdq": 0.0466, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.939, "doc_norm": 1.4411, "encoder_q-embeddings": 1596.53, "encoder_q-layer.0": 1191.2013, "encoder_q-layer.1": 1309.7041, "encoder_q-layer.10": 366.5126, "encoder_q-layer.11": 773.447, "encoder_q-layer.2": 1542.8508, "encoder_q-layer.3": 1539.1055, "encoder_q-layer.4": 1530.7445, "encoder_q-layer.5": 1336.3357, "encoder_q-layer.6": 991.3078, "encoder_q-layer.7": 785.4623, "encoder_q-layer.8": 652.9616, "encoder_q-layer.9": 360.2431, "epoch": 0.45, "inbatch_neg_score": 0.3069, "inbatch_pos_score": 0.9941, "learning_rate": 3e-05, "loss": 2.939, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.5263, "num_token_overlap": 17.9611, "num_token_query": 52.2461, "num_token_union": 73.35, "num_word_context": 202.3287, "num_word_doc": 49.6961, "num_word_query": 39.8429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1751.9812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.439, "queue_k_norm": 1.4468, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2461, "sent_len_1": 66.5263, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9162, "stdk": 0.0479, "stdq": 0.0463, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.9369, "doc_norm": 1.4501, "encoder_q-embeddings": 978.36, "encoder_q-layer.0": 664.2216, "encoder_q-layer.1": 764.3091, "encoder_q-layer.10": 397.6898, "encoder_q-layer.11": 778.8994, "encoder_q-layer.2": 914.4004, "encoder_q-layer.3": 960.4841, "encoder_q-layer.4": 867.589, "encoder_q-layer.5": 834.9966, "encoder_q-layer.6": 698.5174, "encoder_q-layer.7": 734.2228, "encoder_q-layer.8": 819.6522, "encoder_q-layer.9": 443.5935, "epoch": 0.45, "inbatch_neg_score": 0.3001, "inbatch_pos_score": 0.9873, "learning_rate": 2.9944444444444446e-05, "loss": 2.9369, "norm_diff": 0.0184, "norm_loss": 0.0, "num_token_doc": 66.3306, "num_token_overlap": 17.9486, "num_token_query": 52.1189, "num_token_union": 73.2446, "num_word_context": 202.3195, "num_word_doc": 49.5328, "num_word_query": 39.7104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1183.462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.301, "query_norm": 1.4324, "queue_k_norm": 1.4502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1189, "sent_len_1": 66.3306, "sent_len_max_0": 128.0, "sent_len_max_1": 186.5788, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9381, "doc_norm": 1.4482, "encoder_q-embeddings": 1743.3247, "encoder_q-layer.0": 1247.1808, "encoder_q-layer.1": 1302.9294, "encoder_q-layer.10": 700.8187, "encoder_q-layer.11": 1595.6177, "encoder_q-layer.2": 1476.3435, "encoder_q-layer.3": 1528.3832, "encoder_q-layer.4": 1547.7524, "encoder_q-layer.5": 1703.3608, "encoder_q-layer.6": 1645.2458, "encoder_q-layer.7": 1906.8372, "encoder_q-layer.8": 1939.463, "encoder_q-layer.9": 863.1023, "epoch": 0.45, "inbatch_neg_score": 0.3173, "inbatch_pos_score": 1.0088, "learning_rate": 2.988888888888889e-05, "loss": 2.9381, "norm_diff": 0.0175, "norm_loss": 0.0, "num_token_doc": 66.9227, "num_token_overlap": 17.959, "num_token_query": 52.2012, "num_token_union": 73.5607, "num_word_context": 202.3785, "num_word_doc": 49.9524, "num_word_query": 39.778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2293.3312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.4464, "queue_k_norm": 1.4483, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2012, "sent_len_1": 66.9227, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9487, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.937, "doc_norm": 1.4494, "encoder_q-embeddings": 2297.4587, "encoder_q-layer.0": 1733.9382, "encoder_q-layer.1": 1690.2439, "encoder_q-layer.10": 644.5336, "encoder_q-layer.11": 1568.3339, "encoder_q-layer.2": 1970.8922, "encoder_q-layer.3": 2088.2144, "encoder_q-layer.4": 1858.8639, "encoder_q-layer.5": 1830.4333, "encoder_q-layer.6": 1684.8861, "encoder_q-layer.7": 1706.8909, "encoder_q-layer.8": 1366.1322, "encoder_q-layer.9": 684.9474, "epoch": 0.45, "inbatch_neg_score": 0.3148, "inbatch_pos_score": 1.0205, "learning_rate": 2.9833333333333335e-05, "loss": 2.937, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.7403, "num_token_overlap": 18.0716, "num_token_query": 52.4483, "num_token_union": 73.5153, "num_word_context": 202.3134, "num_word_doc": 49.7556, "num_word_query": 39.9997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2604.3107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3135, "query_norm": 1.4343, "queue_k_norm": 1.449, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4483, "sent_len_1": 66.7403, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5962, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.9537, "doc_norm": 1.4507, "encoder_q-embeddings": 1709.1355, "encoder_q-layer.0": 1162.9426, "encoder_q-layer.1": 1297.6914, "encoder_q-layer.10": 665.6479, "encoder_q-layer.11": 1620.8289, "encoder_q-layer.2": 1460.325, "encoder_q-layer.3": 1512.9287, "encoder_q-layer.4": 1453.5894, "encoder_q-layer.5": 1534.6433, "encoder_q-layer.6": 1485.4015, "encoder_q-layer.7": 1528.6508, "encoder_q-layer.8": 1429.4835, "encoder_q-layer.9": 780.0386, "epoch": 0.45, "inbatch_neg_score": 0.3224, "inbatch_pos_score": 1.0068, "learning_rate": 2.9777777777777777e-05, "loss": 2.9537, "norm_diff": 0.0161, "norm_loss": 0.0, "num_token_doc": 66.9666, "num_token_overlap": 18.014, "num_token_query": 52.2366, "num_token_union": 73.6045, "num_word_context": 202.2737, "num_word_doc": 49.9884, "num_word_query": 39.832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2117.5585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3225, "query_norm": 1.4533, "queue_k_norm": 1.4498, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2366, "sent_len_1": 66.9666, "sent_len_max_0": 128.0, "sent_len_max_1": 187.79, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.9473, "doc_norm": 1.4456, "encoder_q-embeddings": 7354.2935, "encoder_q-layer.0": 5249.3628, "encoder_q-layer.1": 4848.3677, "encoder_q-layer.10": 672.8999, "encoder_q-layer.11": 1608.2855, "encoder_q-layer.2": 5475.6572, "encoder_q-layer.3": 6107.6724, "encoder_q-layer.4": 4990.708, "encoder_q-layer.5": 5857.4595, "encoder_q-layer.6": 5209.0996, "encoder_q-layer.7": 4753.3228, "encoder_q-layer.8": 4645.7988, "encoder_q-layer.9": 1629.4841, "epoch": 0.45, "inbatch_neg_score": 0.3126, "inbatch_pos_score": 1.0039, "learning_rate": 2.9722222222222223e-05, "loss": 2.9473, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.7665, "num_token_overlap": 18.0616, "num_token_query": 52.3004, "num_token_union": 73.4745, "num_word_context": 201.9528, "num_word_doc": 49.8522, "num_word_query": 39.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7620.1449, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3127, "query_norm": 1.4292, "queue_k_norm": 1.4502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3004, "sent_len_1": 66.7665, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0225, "stdk": 0.0481, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.9296, "doc_norm": 1.4514, "encoder_q-embeddings": 2022.8888, "encoder_q-layer.0": 1427.7305, "encoder_q-layer.1": 1484.0466, "encoder_q-layer.10": 314.1625, "encoder_q-layer.11": 758.0084, "encoder_q-layer.2": 1703.7191, "encoder_q-layer.3": 1523.0081, "encoder_q-layer.4": 1261.6431, "encoder_q-layer.5": 1099.4246, "encoder_q-layer.6": 860.8203, "encoder_q-layer.7": 760.8713, "encoder_q-layer.8": 538.5693, "encoder_q-layer.9": 355.5127, "epoch": 0.45, "inbatch_neg_score": 0.3236, "inbatch_pos_score": 1.0215, "learning_rate": 2.9666666666666672e-05, "loss": 2.9296, "norm_diff": 0.0078, "norm_loss": 0.0, "num_token_doc": 66.7673, "num_token_overlap": 17.9808, "num_token_query": 52.1709, "num_token_union": 73.3993, "num_word_context": 202.3341, "num_word_doc": 49.8272, "num_word_query": 39.7582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1887.8411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3223, "query_norm": 1.4502, "queue_k_norm": 1.4526, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1709, "sent_len_1": 66.7673, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3575, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.9591, "doc_norm": 1.4516, "encoder_q-embeddings": 1213.9309, "encoder_q-layer.0": 888.1832, "encoder_q-layer.1": 999.4083, "encoder_q-layer.10": 172.8039, "encoder_q-layer.11": 368.2025, "encoder_q-layer.2": 730.9263, "encoder_q-layer.3": 619.6822, "encoder_q-layer.4": 618.9945, "encoder_q-layer.5": 563.2109, "encoder_q-layer.6": 538.4524, "encoder_q-layer.7": 518.6063, "encoder_q-layer.8": 434.358, "encoder_q-layer.9": 180.9288, "epoch": 0.46, "inbatch_neg_score": 0.3098, "inbatch_pos_score": 1.0, "learning_rate": 2.961111111111111e-05, "loss": 2.9591, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.9026, "num_token_overlap": 18.004, "num_token_query": 52.2842, "num_token_union": 73.5795, "num_word_context": 202.454, "num_word_doc": 49.9056, "num_word_query": 39.856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1069.244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3093, "query_norm": 1.4364, "queue_k_norm": 1.4528, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2842, "sent_len_1": 66.9026, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.4462, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.9239, "doc_norm": 1.4514, "encoder_q-embeddings": 498.5031, "encoder_q-layer.0": 353.7595, "encoder_q-layer.1": 383.8025, "encoder_q-layer.10": 170.4033, "encoder_q-layer.11": 402.6673, "encoder_q-layer.2": 415.503, "encoder_q-layer.3": 379.7991, "encoder_q-layer.4": 346.9976, "encoder_q-layer.5": 358.9713, "encoder_q-layer.6": 301.6545, "encoder_q-layer.7": 336.8072, "encoder_q-layer.8": 309.995, "encoder_q-layer.9": 185.1094, "epoch": 0.46, "inbatch_neg_score": 0.3174, "inbatch_pos_score": 1.0234, "learning_rate": 2.955555555555556e-05, "loss": 2.9239, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.7528, "num_token_overlap": 18.0642, "num_token_query": 52.4156, "num_token_union": 73.4864, "num_word_context": 202.4415, "num_word_doc": 49.7852, "num_word_query": 39.964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 544.8687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3162, "query_norm": 1.4611, "queue_k_norm": 1.452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4156, "sent_len_1": 66.7528, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4363, "stdk": 0.0483, "stdq": 0.0466, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.9604, "doc_norm": 1.451, "encoder_q-embeddings": 1176.4607, "encoder_q-layer.0": 804.122, "encoder_q-layer.1": 965.8107, "encoder_q-layer.10": 174.3487, "encoder_q-layer.11": 397.8187, "encoder_q-layer.2": 1057.8947, "encoder_q-layer.3": 1038.7739, "encoder_q-layer.4": 935.7351, "encoder_q-layer.5": 1023.1574, "encoder_q-layer.6": 1420.6663, "encoder_q-layer.7": 1809.3588, "encoder_q-layer.8": 1940.4323, "encoder_q-layer.9": 691.0256, "epoch": 0.46, "inbatch_neg_score": 0.329, "inbatch_pos_score": 1.0264, "learning_rate": 2.95e-05, "loss": 2.9604, "norm_diff": 0.0214, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 17.9341, "num_token_query": 52.1544, "num_token_union": 73.3875, "num_word_context": 202.157, "num_word_doc": 49.7314, "num_word_query": 39.768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1840.6999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3276, "query_norm": 1.4722, "queue_k_norm": 1.4523, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1544, "sent_len_1": 66.6342, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5238, "stdk": 0.0483, "stdq": 0.0467, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.9446, "doc_norm": 1.4474, "encoder_q-embeddings": 407.481, "encoder_q-layer.0": 286.9333, "encoder_q-layer.1": 346.1809, "encoder_q-layer.10": 153.8135, "encoder_q-layer.11": 386.6666, "encoder_q-layer.2": 392.9668, "encoder_q-layer.3": 387.5842, "encoder_q-layer.4": 334.6978, "encoder_q-layer.5": 290.7157, "encoder_q-layer.6": 297.5224, "encoder_q-layer.7": 300.7264, "encoder_q-layer.8": 263.8939, "encoder_q-layer.9": 155.1274, "epoch": 0.46, "inbatch_neg_score": 0.3227, "inbatch_pos_score": 1.0127, "learning_rate": 2.9444444444444448e-05, "loss": 2.9446, "norm_diff": 0.0123, "norm_loss": 0.0, "num_token_doc": 66.7404, "num_token_overlap": 17.9842, "num_token_query": 52.2717, "num_token_union": 73.4617, "num_word_context": 202.4739, "num_word_doc": 49.8089, "num_word_query": 39.8666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 488.3521, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3208, "query_norm": 1.4466, "queue_k_norm": 1.4531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2717, "sent_len_1": 66.7404, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5213, "stdk": 0.0481, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9306, "doc_norm": 1.453, "encoder_q-embeddings": 1150.884, "encoder_q-layer.0": 840.1723, "encoder_q-layer.1": 931.4175, "encoder_q-layer.10": 163.8738, "encoder_q-layer.11": 395.4666, "encoder_q-layer.2": 1042.516, "encoder_q-layer.3": 782.2304, "encoder_q-layer.4": 554.2942, "encoder_q-layer.5": 472.7434, "encoder_q-layer.6": 419.6928, "encoder_q-layer.7": 374.1735, "encoder_q-layer.8": 249.436, "encoder_q-layer.9": 155.5852, "epoch": 0.46, "inbatch_neg_score": 0.3228, "inbatch_pos_score": 1.0234, "learning_rate": 2.9388888888888887e-05, "loss": 2.9306, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.8077, "num_token_overlap": 17.982, "num_token_query": 52.1492, "num_token_union": 73.5158, "num_word_context": 202.2125, "num_word_doc": 49.8154, "num_word_query": 39.7014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.4512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3215, "query_norm": 1.4494, "queue_k_norm": 1.4502, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1492, "sent_len_1": 66.8077, "sent_len_max_0": 127.995, "sent_len_max_1": 191.085, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.9324, "doc_norm": 1.4479, "encoder_q-embeddings": 377.3009, "encoder_q-layer.0": 271.8897, "encoder_q-layer.1": 300.2668, "encoder_q-layer.10": 177.666, "encoder_q-layer.11": 397.3117, "encoder_q-layer.2": 342.7105, "encoder_q-layer.3": 347.9446, "encoder_q-layer.4": 356.0513, "encoder_q-layer.5": 350.38, "encoder_q-layer.6": 427.5832, "encoder_q-layer.7": 506.1866, "encoder_q-layer.8": 547.0958, "encoder_q-layer.9": 195.2509, "epoch": 0.46, "inbatch_neg_score": 0.3117, "inbatch_pos_score": 1.0254, "learning_rate": 2.9333333333333336e-05, "loss": 2.9324, "norm_diff": 0.009, "norm_loss": 0.0, "num_token_doc": 66.8134, "num_token_overlap": 18.0666, "num_token_query": 52.3793, "num_token_union": 73.5461, "num_word_context": 202.573, "num_word_doc": 49.8674, "num_word_query": 39.944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 574.2899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3113, "query_norm": 1.4514, "queue_k_norm": 1.45, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3793, "sent_len_1": 66.8134, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.6075, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.9122, "doc_norm": 1.454, "encoder_q-embeddings": 1386.7382, "encoder_q-layer.0": 904.1224, "encoder_q-layer.1": 978.7102, "encoder_q-layer.10": 158.0932, "encoder_q-layer.11": 379.5595, "encoder_q-layer.2": 1154.7433, "encoder_q-layer.3": 1116.3472, "encoder_q-layer.4": 966.4547, "encoder_q-layer.5": 861.9417, "encoder_q-layer.6": 1081.1597, "encoder_q-layer.7": 1162.3618, "encoder_q-layer.8": 1341.79, "encoder_q-layer.9": 267.5704, "epoch": 0.46, "inbatch_neg_score": 0.3111, "inbatch_pos_score": 1.0078, "learning_rate": 2.927777777777778e-05, "loss": 2.9122, "norm_diff": 0.02, "norm_loss": 0.0, "num_token_doc": 66.7228, "num_token_overlap": 18.1456, "num_token_query": 52.5318, "num_token_union": 73.502, "num_word_context": 202.1539, "num_word_doc": 49.7956, "num_word_query": 40.0538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1563.2884, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3098, "query_norm": 1.4372, "queue_k_norm": 1.4486, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.5318, "sent_len_1": 66.7228, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4837, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 2.946, "doc_norm": 1.4389, "encoder_q-embeddings": 888.9083, "encoder_q-layer.0": 560.7832, "encoder_q-layer.1": 627.8878, "encoder_q-layer.10": 182.8291, "encoder_q-layer.11": 449.7415, "encoder_q-layer.2": 739.6695, "encoder_q-layer.3": 895.2629, "encoder_q-layer.4": 820.0151, "encoder_q-layer.5": 822.0239, "encoder_q-layer.6": 740.4518, "encoder_q-layer.7": 672.4437, "encoder_q-layer.8": 488.5988, "encoder_q-layer.9": 197.5972, "epoch": 0.46, "inbatch_neg_score": 0.3188, "inbatch_pos_score": 0.9854, "learning_rate": 2.9222222222222224e-05, "loss": 2.946, "norm_diff": 0.016, "norm_loss": 0.0, "num_token_doc": 66.8519, "num_token_overlap": 18.1151, "num_token_query": 52.4341, "num_token_union": 73.509, "num_word_context": 202.7911, "num_word_doc": 49.9027, "num_word_query": 39.958, "postclip_grad_norm": 1.0, "preclip_grad_norm": 991.6512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.4341, "queue_k_norm": 1.4495, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4341, "sent_len_1": 66.8519, "sent_len_max_0": 128.0, "sent_len_max_1": 189.595, "stdk": 0.0478, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 2.9488, "doc_norm": 1.4458, "encoder_q-embeddings": 2851.7097, "encoder_q-layer.0": 1966.821, "encoder_q-layer.1": 2243.8677, "encoder_q-layer.10": 190.2906, "encoder_q-layer.11": 409.5082, "encoder_q-layer.2": 2795.9426, "encoder_q-layer.3": 3238.5171, "encoder_q-layer.4": 3153.2854, "encoder_q-layer.5": 3603.1667, "encoder_q-layer.6": 2267.8535, "encoder_q-layer.7": 1353.1586, "encoder_q-layer.8": 730.7493, "encoder_q-layer.9": 236.1987, "epoch": 0.46, "inbatch_neg_score": 0.305, "inbatch_pos_score": 0.9976, "learning_rate": 2.916666666666667e-05, "loss": 2.9488, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.9136, "num_token_overlap": 18.0464, "num_token_query": 52.2641, "num_token_union": 73.5272, "num_word_context": 202.5938, "num_word_doc": 49.9872, "num_word_query": 39.8533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3361.2694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.4356, "queue_k_norm": 1.4518, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2641, "sent_len_1": 66.9136, "sent_len_max_0": 127.9862, "sent_len_max_1": 186.8462, "stdk": 0.0481, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.9649, "doc_norm": 1.4462, "encoder_q-embeddings": 289.6473, "encoder_q-layer.0": 198.8487, "encoder_q-layer.1": 225.3824, "encoder_q-layer.10": 188.1525, "encoder_q-layer.11": 398.3607, "encoder_q-layer.2": 248.0623, "encoder_q-layer.3": 246.5368, "encoder_q-layer.4": 248.3147, "encoder_q-layer.5": 233.1452, "encoder_q-layer.6": 251.263, "encoder_q-layer.7": 278.7451, "encoder_q-layer.8": 343.1151, "encoder_q-layer.9": 214.9164, "epoch": 0.46, "inbatch_neg_score": 0.3033, "inbatch_pos_score": 1.0059, "learning_rate": 2.9111111111111112e-05, "loss": 2.9649, "norm_diff": 0.0126, "norm_loss": 0.0, "num_token_doc": 66.7945, "num_token_overlap": 17.9891, "num_token_query": 52.1953, "num_token_union": 73.518, "num_word_context": 202.3189, "num_word_doc": 49.8303, "num_word_query": 39.7922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 396.4156, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.303, "query_norm": 1.4502, "queue_k_norm": 1.45, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1953, "sent_len_1": 66.7945, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9062, "stdk": 0.0481, "stdq": 0.047, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.9427, "doc_norm": 1.4471, "encoder_q-embeddings": 2170.9033, "encoder_q-layer.0": 1783.854, "encoder_q-layer.1": 1671.5173, "encoder_q-layer.10": 163.7128, "encoder_q-layer.11": 367.0253, "encoder_q-layer.2": 727.174, "encoder_q-layer.3": 535.2379, "encoder_q-layer.4": 517.5737, "encoder_q-layer.5": 438.006, "encoder_q-layer.6": 376.4531, "encoder_q-layer.7": 350.5727, "encoder_q-layer.8": 339.2857, "encoder_q-layer.9": 237.2302, "epoch": 0.47, "inbatch_neg_score": 0.2909, "inbatch_pos_score": 0.9995, "learning_rate": 2.9055555555555558e-05, "loss": 2.9427, "norm_diff": 0.014, "norm_loss": 0.0, "num_token_doc": 66.7031, "num_token_overlap": 18.039, "num_token_query": 52.3455, "num_token_union": 73.4652, "num_word_context": 201.9346, "num_word_doc": 49.8022, "num_word_query": 39.905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1639.4726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.433, "queue_k_norm": 1.4492, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3455, "sent_len_1": 66.7031, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.5437, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.9539, "doc_norm": 1.4482, "encoder_q-embeddings": 442.3357, "encoder_q-layer.0": 301.0665, "encoder_q-layer.1": 318.2068, "encoder_q-layer.10": 165.7404, "encoder_q-layer.11": 355.2654, "encoder_q-layer.2": 357.4662, "encoder_q-layer.3": 370.7072, "encoder_q-layer.4": 344.393, "encoder_q-layer.5": 383.4134, "encoder_q-layer.6": 320.2942, "encoder_q-layer.7": 245.9874, "encoder_q-layer.8": 221.2909, "encoder_q-layer.9": 163.4931, "epoch": 0.47, "inbatch_neg_score": 0.2838, "inbatch_pos_score": 0.9854, "learning_rate": 2.9e-05, "loss": 2.9539, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.6075, "num_token_overlap": 17.9581, "num_token_query": 52.2067, "num_token_union": 73.3571, "num_word_context": 202.3279, "num_word_doc": 49.7142, "num_word_query": 39.814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 479.3131, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2849, "query_norm": 1.4381, "queue_k_norm": 1.4477, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2067, "sent_len_1": 66.6075, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.555, "stdk": 0.0482, "stdq": 0.0464, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.9319, "doc_norm": 1.445, "encoder_q-embeddings": 435.8658, "encoder_q-layer.0": 311.6464, "encoder_q-layer.1": 331.3346, "encoder_q-layer.10": 166.3466, "encoder_q-layer.11": 363.6566, "encoder_q-layer.2": 342.5095, "encoder_q-layer.3": 361.661, "encoder_q-layer.4": 338.1325, "encoder_q-layer.5": 319.056, "encoder_q-layer.6": 310.9106, "encoder_q-layer.7": 316.2223, "encoder_q-layer.8": 288.6183, "encoder_q-layer.9": 185.1118, "epoch": 0.47, "inbatch_neg_score": 0.2686, "inbatch_pos_score": 0.9595, "learning_rate": 2.8944444444444446e-05, "loss": 2.9319, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.9259, "num_token_overlap": 18.0938, "num_token_query": 52.3409, "num_token_union": 73.5668, "num_word_context": 202.5035, "num_word_doc": 49.9584, "num_word_query": 39.9153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 491.2919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2666, "query_norm": 1.4115, "queue_k_norm": 1.4496, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3409, "sent_len_1": 66.9259, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2688, "stdk": 0.0481, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 2.9403, "doc_norm": 1.4422, "encoder_q-embeddings": 285.3091, "encoder_q-layer.0": 199.2302, "encoder_q-layer.1": 226.5535, "encoder_q-layer.10": 151.7879, "encoder_q-layer.11": 352.6481, "encoder_q-layer.2": 253.3895, "encoder_q-layer.3": 260.0153, "encoder_q-layer.4": 247.0073, "encoder_q-layer.5": 250.6552, "encoder_q-layer.6": 221.7011, "encoder_q-layer.7": 208.4273, "encoder_q-layer.8": 199.1566, "encoder_q-layer.9": 151.889, "epoch": 0.47, "inbatch_neg_score": 0.2656, "inbatch_pos_score": 0.9541, "learning_rate": 2.8888888888888888e-05, "loss": 2.9403, "norm_diff": 0.0108, "norm_loss": 0.0, "num_token_doc": 66.7753, "num_token_overlap": 18.0803, "num_token_query": 52.2301, "num_token_union": 73.3514, "num_word_context": 202.1987, "num_word_doc": 49.8225, "num_word_query": 39.8336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 356.1007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2654, "query_norm": 1.4395, "queue_k_norm": 1.4458, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2301, "sent_len_1": 66.7753, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6425, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.9448, "doc_norm": 1.4458, "encoder_q-embeddings": 1917.1324, "encoder_q-layer.0": 1547.1765, "encoder_q-layer.1": 1639.249, "encoder_q-layer.10": 162.7416, "encoder_q-layer.11": 337.5437, "encoder_q-layer.2": 1767.205, "encoder_q-layer.3": 1564.8599, "encoder_q-layer.4": 1584.6987, "encoder_q-layer.5": 1585.7902, "encoder_q-layer.6": 1491.5725, "encoder_q-layer.7": 1566.416, "encoder_q-layer.8": 1894.4457, "encoder_q-layer.9": 651.952, "epoch": 0.47, "inbatch_neg_score": 0.2651, "inbatch_pos_score": 1.0098, "learning_rate": 2.8833333333333334e-05, "loss": 2.9448, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.8368, "num_token_overlap": 18.0137, "num_token_query": 52.0948, "num_token_union": 73.4401, "num_word_context": 202.1005, "num_word_doc": 49.872, "num_word_query": 39.6884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2387.194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.4835, "queue_k_norm": 1.4438, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0948, "sent_len_1": 66.8368, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5687, "stdk": 0.0483, "stdq": 0.0477, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.9472, "doc_norm": 1.4451, "encoder_q-embeddings": 902.2852, "encoder_q-layer.0": 678.1991, "encoder_q-layer.1": 753.6191, "encoder_q-layer.10": 171.3984, "encoder_q-layer.11": 388.5358, "encoder_q-layer.2": 883.5037, "encoder_q-layer.3": 947.007, "encoder_q-layer.4": 931.5082, "encoder_q-layer.5": 842.9722, "encoder_q-layer.6": 598.8045, "encoder_q-layer.7": 520.4763, "encoder_q-layer.8": 370.5986, "encoder_q-layer.9": 203.9691, "epoch": 0.47, "inbatch_neg_score": 0.2714, "inbatch_pos_score": 0.9639, "learning_rate": 2.877777777777778e-05, "loss": 2.9472, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.6583, "num_token_overlap": 17.9634, "num_token_query": 52.2004, "num_token_union": 73.4642, "num_word_context": 202.4268, "num_word_doc": 49.7724, "num_word_query": 39.8245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1027.3061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.27, "query_norm": 1.4517, "queue_k_norm": 1.4439, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2004, "sent_len_1": 66.6583, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.84, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.9436, "doc_norm": 1.4438, "encoder_q-embeddings": 949.0474, "encoder_q-layer.0": 712.9749, "encoder_q-layer.1": 702.3741, "encoder_q-layer.10": 167.7969, "encoder_q-layer.11": 351.2724, "encoder_q-layer.2": 562.451, "encoder_q-layer.3": 520.8141, "encoder_q-layer.4": 461.4615, "encoder_q-layer.5": 488.0186, "encoder_q-layer.6": 426.3454, "encoder_q-layer.7": 318.7775, "encoder_q-layer.8": 290.5392, "encoder_q-layer.9": 184.3642, "epoch": 0.47, "inbatch_neg_score": 0.2706, "inbatch_pos_score": 0.9956, "learning_rate": 2.8722222222222222e-05, "loss": 2.9436, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.8991, "num_token_overlap": 18.0414, "num_token_query": 52.2483, "num_token_union": 73.5187, "num_word_context": 202.1601, "num_word_doc": 49.8961, "num_word_query": 39.8187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 814.7818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.269, "query_norm": 1.468, "queue_k_norm": 1.4439, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2483, "sent_len_1": 66.8991, "sent_len_max_0": 127.995, "sent_len_max_1": 191.6387, "stdk": 0.0482, "stdq": 0.047, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.9353, "doc_norm": 1.4373, "encoder_q-embeddings": 173.0401, "encoder_q-layer.0": 114.9226, "encoder_q-layer.1": 125.4861, "encoder_q-layer.10": 174.1417, "encoder_q-layer.11": 369.8665, "encoder_q-layer.2": 134.8858, "encoder_q-layer.3": 138.8811, "encoder_q-layer.4": 139.1946, "encoder_q-layer.5": 125.9863, "encoder_q-layer.6": 142.3062, "encoder_q-layer.7": 158.4597, "encoder_q-layer.8": 192.1236, "encoder_q-layer.9": 165.9036, "epoch": 0.47, "inbatch_neg_score": 0.2693, "inbatch_pos_score": 0.9648, "learning_rate": 2.8666666666666668e-05, "loss": 2.9353, "norm_diff": 0.0141, "norm_loss": 0.0, "num_token_doc": 66.8947, "num_token_overlap": 18.0545, "num_token_query": 52.32, "num_token_union": 73.55, "num_word_context": 202.5044, "num_word_doc": 49.897, "num_word_query": 39.8934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 265.4322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2693, "query_norm": 1.4415, "queue_k_norm": 1.443, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.32, "sent_len_1": 66.8947, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8688, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.9557, "doc_norm": 1.4416, "encoder_q-embeddings": 191.8262, "encoder_q-layer.0": 126.7144, "encoder_q-layer.1": 143.0744, "encoder_q-layer.10": 165.076, "encoder_q-layer.11": 383.463, "encoder_q-layer.2": 153.7589, "encoder_q-layer.3": 149.9037, "encoder_q-layer.4": 147.1672, "encoder_q-layer.5": 146.3618, "encoder_q-layer.6": 154.2328, "encoder_q-layer.7": 174.8046, "encoder_q-layer.8": 184.3219, "encoder_q-layer.9": 158.5424, "epoch": 0.47, "inbatch_neg_score": 0.286, "inbatch_pos_score": 0.9727, "learning_rate": 2.861111111111111e-05, "loss": 2.9557, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.7772, "num_token_overlap": 17.929, "num_token_query": 52.1572, "num_token_union": 73.4646, "num_word_context": 202.314, "num_word_doc": 49.8318, "num_word_query": 39.7835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 288.0708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.4522, "queue_k_norm": 1.4409, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1572, "sent_len_1": 66.7772, "sent_len_max_0": 128.0, "sent_len_max_1": 189.61, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.9414, "doc_norm": 1.4428, "encoder_q-embeddings": 318.0751, "encoder_q-layer.0": 238.2545, "encoder_q-layer.1": 253.2952, "encoder_q-layer.10": 175.342, "encoder_q-layer.11": 403.1609, "encoder_q-layer.2": 299.089, "encoder_q-layer.3": 317.0933, "encoder_q-layer.4": 301.9193, "encoder_q-layer.5": 304.4572, "encoder_q-layer.6": 294.4875, "encoder_q-layer.7": 292.9258, "encoder_q-layer.8": 303.6298, "encoder_q-layer.9": 202.8706, "epoch": 0.47, "inbatch_neg_score": 0.2742, "inbatch_pos_score": 0.9668, "learning_rate": 2.855555555555556e-05, "loss": 2.9414, "norm_diff": 0.0145, "norm_loss": 0.0, "num_token_doc": 66.7517, "num_token_overlap": 17.955, "num_token_query": 52.1149, "num_token_union": 73.4307, "num_word_context": 202.2584, "num_word_doc": 49.7992, "num_word_query": 39.7329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 440.4537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2739, "query_norm": 1.4346, "queue_k_norm": 1.4395, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1149, "sent_len_1": 66.7517, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5188, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 2.9387, "doc_norm": 1.4392, "encoder_q-embeddings": 765.9114, "encoder_q-layer.0": 531.2741, "encoder_q-layer.1": 601.505, "encoder_q-layer.10": 341.2893, "encoder_q-layer.11": 773.9821, "encoder_q-layer.2": 687.5427, "encoder_q-layer.3": 706.8852, "encoder_q-layer.4": 655.4219, "encoder_q-layer.5": 509.9218, "encoder_q-layer.6": 492.3005, "encoder_q-layer.7": 409.6945, "encoder_q-layer.8": 437.184, "encoder_q-layer.9": 337.0851, "epoch": 0.48, "inbatch_neg_score": 0.2728, "inbatch_pos_score": 0.959, "learning_rate": 2.8499999999999998e-05, "loss": 2.9387, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.4541, "num_token_overlap": 17.9634, "num_token_query": 52.1843, "num_token_union": 73.2806, "num_word_context": 201.6436, "num_word_doc": 49.5781, "num_word_query": 39.7887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 891.512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2715, "query_norm": 1.4198, "queue_k_norm": 1.4402, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1843, "sent_len_1": 66.4541, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2562, "stdk": 0.0482, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.9376, "doc_norm": 1.4401, "encoder_q-embeddings": 650.5378, "encoder_q-layer.0": 416.0008, "encoder_q-layer.1": 502.3067, "encoder_q-layer.10": 314.2779, "encoder_q-layer.11": 767.1962, "encoder_q-layer.2": 591.9031, "encoder_q-layer.3": 604.9645, "encoder_q-layer.4": 623.626, "encoder_q-layer.5": 762.1382, "encoder_q-layer.6": 775.0255, "encoder_q-layer.7": 753.8028, "encoder_q-layer.8": 916.3922, "encoder_q-layer.9": 420.77, "epoch": 0.48, "inbatch_neg_score": 0.2599, "inbatch_pos_score": 0.96, "learning_rate": 2.8444444444444447e-05, "loss": 2.9376, "norm_diff": 0.0294, "norm_loss": 0.0, "num_token_doc": 66.8024, "num_token_overlap": 18.0211, "num_token_query": 52.2477, "num_token_union": 73.4567, "num_word_context": 202.3235, "num_word_doc": 49.8225, "num_word_query": 39.8201, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.1455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2615, "query_norm": 1.4107, "queue_k_norm": 1.4404, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2477, "sent_len_1": 66.8024, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7388, "stdk": 0.0483, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.9328, "doc_norm": 1.4399, "encoder_q-embeddings": 724.9651, "encoder_q-layer.0": 519.2653, "encoder_q-layer.1": 608.9271, "encoder_q-layer.10": 287.4797, "encoder_q-layer.11": 702.5333, "encoder_q-layer.2": 707.416, "encoder_q-layer.3": 704.0129, "encoder_q-layer.4": 691.6406, "encoder_q-layer.5": 666.4251, "encoder_q-layer.6": 638.5527, "encoder_q-layer.7": 519.4166, "encoder_q-layer.8": 459.9554, "encoder_q-layer.9": 303.4632, "epoch": 0.48, "inbatch_neg_score": 0.2684, "inbatch_pos_score": 0.981, "learning_rate": 2.8388888888888893e-05, "loss": 2.9328, "norm_diff": 0.0203, "norm_loss": 0.0, "num_token_doc": 66.6989, "num_token_overlap": 18.0417, "num_token_query": 52.3352, "num_token_union": 73.4149, "num_word_context": 202.3385, "num_word_doc": 49.781, "num_word_query": 39.9066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 904.1912, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2683, "query_norm": 1.4197, "queue_k_norm": 1.4389, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3352, "sent_len_1": 66.6989, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7388, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.9331, "doc_norm": 1.4336, "encoder_q-embeddings": 797.7397, "encoder_q-layer.0": 575.3158, "encoder_q-layer.1": 682.6133, "encoder_q-layer.10": 325.989, "encoder_q-layer.11": 758.97, "encoder_q-layer.2": 786.6846, "encoder_q-layer.3": 783.3688, "encoder_q-layer.4": 776.9053, "encoder_q-layer.5": 797.2411, "encoder_q-layer.6": 853.5836, "encoder_q-layer.7": 732.9633, "encoder_q-layer.8": 684.528, "encoder_q-layer.9": 384.3993, "epoch": 0.48, "inbatch_neg_score": 0.2739, "inbatch_pos_score": 0.9771, "learning_rate": 2.8333333333333335e-05, "loss": 2.9331, "norm_diff": 0.0168, "norm_loss": 0.0, "num_token_doc": 67.0141, "num_token_overlap": 18.0343, "num_token_query": 52.2893, "num_token_union": 73.6047, "num_word_context": 202.8741, "num_word_doc": 49.9599, "num_word_query": 39.817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1062.8656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2751, "query_norm": 1.4478, "queue_k_norm": 1.4386, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2893, "sent_len_1": 67.0141, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8862, "stdk": 0.0481, "stdq": 0.0467, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.9339, "doc_norm": 1.4319, "encoder_q-embeddings": 2417.2412, "encoder_q-layer.0": 1654.1737, "encoder_q-layer.1": 1933.9021, "encoder_q-layer.10": 322.4445, "encoder_q-layer.11": 712.2492, "encoder_q-layer.2": 2187.947, "encoder_q-layer.3": 2139.9443, "encoder_q-layer.4": 1945.2097, "encoder_q-layer.5": 1639.4595, "encoder_q-layer.6": 1427.3798, "encoder_q-layer.7": 1047.166, "encoder_q-layer.8": 413.3423, "encoder_q-layer.9": 300.9183, "epoch": 0.48, "inbatch_neg_score": 0.272, "inbatch_pos_score": 0.9824, "learning_rate": 2.827777777777778e-05, "loss": 2.9339, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.6662, "num_token_overlap": 17.9396, "num_token_query": 52.0547, "num_token_union": 73.3277, "num_word_context": 202.3027, "num_word_doc": 49.7135, "num_word_query": 39.6683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2452.3545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2725, "query_norm": 1.4456, "queue_k_norm": 1.437, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0547, "sent_len_1": 66.6662, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9225, "stdk": 0.048, "stdq": 0.0465, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.9066, "doc_norm": 1.4361, "encoder_q-embeddings": 984.7462, "encoder_q-layer.0": 648.2471, "encoder_q-layer.1": 772.3593, "encoder_q-layer.10": 368.448, "encoder_q-layer.11": 764.4605, "encoder_q-layer.2": 934.4536, "encoder_q-layer.3": 931.3237, "encoder_q-layer.4": 1047.196, "encoder_q-layer.5": 1072.822, "encoder_q-layer.6": 958.1467, "encoder_q-layer.7": 663.2518, "encoder_q-layer.8": 597.4968, "encoder_q-layer.9": 373.8575, "epoch": 0.48, "inbatch_neg_score": 0.274, "inbatch_pos_score": 0.9829, "learning_rate": 2.8222222222222223e-05, "loss": 2.9066, "norm_diff": 0.0122, "norm_loss": 0.0, "num_token_doc": 66.857, "num_token_overlap": 18.0675, "num_token_query": 52.3173, "num_token_union": 73.4578, "num_word_context": 202.2324, "num_word_doc": 49.8711, "num_word_query": 39.9004, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1211.4253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.4476, "queue_k_norm": 1.4389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3173, "sent_len_1": 66.857, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9, "stdk": 0.0481, "stdq": 0.0468, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9217, "doc_norm": 1.4335, "encoder_q-embeddings": 440.7316, "encoder_q-layer.0": 306.8963, "encoder_q-layer.1": 331.366, "encoder_q-layer.10": 383.3058, "encoder_q-layer.11": 870.2136, "encoder_q-layer.2": 364.1699, "encoder_q-layer.3": 354.4974, "encoder_q-layer.4": 374.9851, "encoder_q-layer.5": 380.6341, "encoder_q-layer.6": 400.0782, "encoder_q-layer.7": 426.2603, "encoder_q-layer.8": 493.0781, "encoder_q-layer.9": 335.9834, "epoch": 0.48, "inbatch_neg_score": 0.2812, "inbatch_pos_score": 0.9487, "learning_rate": 2.816666666666667e-05, "loss": 2.9217, "norm_diff": 0.0093, "norm_loss": 0.0, "num_token_doc": 66.7118, "num_token_overlap": 17.9608, "num_token_query": 52.2216, "num_token_union": 73.5067, "num_word_context": 202.6773, "num_word_doc": 49.7895, "num_word_query": 39.7903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 676.5092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2786, "query_norm": 1.4268, "queue_k_norm": 1.4356, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2216, "sent_len_1": 66.7118, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.4712, "stdk": 0.048, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.9229, "doc_norm": 1.4399, "encoder_q-embeddings": 618.4429, "encoder_q-layer.0": 433.3876, "encoder_q-layer.1": 512.25, "encoder_q-layer.10": 316.9945, "encoder_q-layer.11": 753.261, "encoder_q-layer.2": 555.8785, "encoder_q-layer.3": 500.621, "encoder_q-layer.4": 460.7967, "encoder_q-layer.5": 451.4447, "encoder_q-layer.6": 455.2357, "encoder_q-layer.7": 452.6676, "encoder_q-layer.8": 457.7757, "encoder_q-layer.9": 341.2628, "epoch": 0.48, "inbatch_neg_score": 0.2804, "inbatch_pos_score": 1.0, "learning_rate": 2.811111111111111e-05, "loss": 2.9229, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.7446, "num_token_overlap": 18.0233, "num_token_query": 52.2187, "num_token_union": 73.4312, "num_word_context": 202.2273, "num_word_doc": 49.7989, "num_word_query": 39.8211, "postclip_grad_norm": 1.0, "preclip_grad_norm": 758.895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2793, "query_norm": 1.4501, "queue_k_norm": 1.4378, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2187, "sent_len_1": 66.7446, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.8762, "stdk": 0.0483, "stdq": 0.0466, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.9107, "doc_norm": 1.4371, "encoder_q-embeddings": 1037.2859, "encoder_q-layer.0": 720.8308, "encoder_q-layer.1": 802.5339, "encoder_q-layer.10": 311.0871, "encoder_q-layer.11": 726.1478, "encoder_q-layer.2": 878.7706, "encoder_q-layer.3": 927.4095, "encoder_q-layer.4": 798.0802, "encoder_q-layer.5": 746.6893, "encoder_q-layer.6": 643.5981, "encoder_q-layer.7": 552.1051, "encoder_q-layer.8": 443.8072, "encoder_q-layer.9": 292.9648, "epoch": 0.48, "inbatch_neg_score": 0.283, "inbatch_pos_score": 0.9785, "learning_rate": 2.8055555555555557e-05, "loss": 2.9107, "norm_diff": 0.0069, "norm_loss": 0.0, "num_token_doc": 66.9232, "num_token_overlap": 18.0733, "num_token_query": 52.4252, "num_token_union": 73.5815, "num_word_context": 202.608, "num_word_doc": 49.932, "num_word_query": 39.9604, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1100.49, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2825, "query_norm": 1.4393, "queue_k_norm": 1.4375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4252, "sent_len_1": 66.9232, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3013, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.9107, "doc_norm": 1.4373, "encoder_q-embeddings": 9192.7295, "encoder_q-layer.0": 6695.7759, "encoder_q-layer.1": 7447.7773, "encoder_q-layer.10": 305.776, "encoder_q-layer.11": 703.0659, "encoder_q-layer.2": 7912.6514, "encoder_q-layer.3": 8859.2314, "encoder_q-layer.4": 9997.0088, "encoder_q-layer.5": 9312.4541, "encoder_q-layer.6": 5640.6992, "encoder_q-layer.7": 3233.4583, "encoder_q-layer.8": 1409.8107, "encoder_q-layer.9": 355.5841, "epoch": 0.48, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 0.9727, "learning_rate": 2.8000000000000003e-05, "loss": 2.9107, "norm_diff": 0.0094, "norm_loss": 0.0, "num_token_doc": 66.9889, "num_token_overlap": 18.0149, "num_token_query": 52.0862, "num_token_union": 73.5162, "num_word_context": 202.4358, "num_word_doc": 49.9356, "num_word_query": 39.6758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9890.656, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2751, "query_norm": 1.4341, "queue_k_norm": 1.4383, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0862, "sent_len_1": 66.9889, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.9575, "stdk": 0.0482, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.9331, "doc_norm": 1.4387, "encoder_q-embeddings": 836.0511, "encoder_q-layer.0": 559.6503, "encoder_q-layer.1": 641.1522, "encoder_q-layer.10": 353.5923, "encoder_q-layer.11": 763.8346, "encoder_q-layer.2": 778.5009, "encoder_q-layer.3": 821.4329, "encoder_q-layer.4": 871.489, "encoder_q-layer.5": 815.7789, "encoder_q-layer.6": 646.8369, "encoder_q-layer.7": 674.4459, "encoder_q-layer.8": 491.7276, "encoder_q-layer.9": 309.573, "epoch": 0.49, "inbatch_neg_score": 0.2804, "inbatch_pos_score": 0.98, "learning_rate": 2.7944444444444445e-05, "loss": 2.9331, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.681, "num_token_overlap": 18.0076, "num_token_query": 52.2552, "num_token_union": 73.366, "num_word_context": 202.1182, "num_word_doc": 49.7453, "num_word_query": 39.8066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1028.2683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2805, "query_norm": 1.4376, "queue_k_norm": 1.4369, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2552, "sent_len_1": 66.681, "sent_len_max_0": 128.0, "sent_len_max_1": 192.58, "stdk": 0.0483, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.9419, "doc_norm": 1.4327, "encoder_q-embeddings": 841.1396, "encoder_q-layer.0": 627.3513, "encoder_q-layer.1": 632.9268, "encoder_q-layer.10": 336.7077, "encoder_q-layer.11": 773.9298, "encoder_q-layer.2": 711.5857, "encoder_q-layer.3": 675.1454, "encoder_q-layer.4": 648.0214, "encoder_q-layer.5": 623.7129, "encoder_q-layer.6": 499.9271, "encoder_q-layer.7": 497.0621, "encoder_q-layer.8": 440.3832, "encoder_q-layer.9": 309.2757, "epoch": 0.49, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 0.981, "learning_rate": 2.788888888888889e-05, "loss": 2.9419, "norm_diff": 0.0142, "norm_loss": 0.0, "num_token_doc": 66.7921, "num_token_overlap": 17.922, "num_token_query": 52.1678, "num_token_union": 73.4627, "num_word_context": 202.4396, "num_word_doc": 49.8269, "num_word_query": 39.7635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 927.383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.4427, "queue_k_norm": 1.4375, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1678, "sent_len_1": 66.7921, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.14, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.9184, "doc_norm": 1.44, "encoder_q-embeddings": 1143.5348, "encoder_q-layer.0": 839.6434, "encoder_q-layer.1": 867.898, "encoder_q-layer.10": 341.7595, "encoder_q-layer.11": 750.6246, "encoder_q-layer.2": 1057.5594, "encoder_q-layer.3": 1106.4677, "encoder_q-layer.4": 1168.2046, "encoder_q-layer.5": 1209.1375, "encoder_q-layer.6": 1352.1635, "encoder_q-layer.7": 1594.3324, "encoder_q-layer.8": 1148.8868, "encoder_q-layer.9": 351.4597, "epoch": 0.49, "inbatch_neg_score": 0.2776, "inbatch_pos_score": 0.9741, "learning_rate": 2.7833333333333333e-05, "loss": 2.9184, "norm_diff": 0.0189, "norm_loss": 0.0, "num_token_doc": 66.7077, "num_token_overlap": 18.0598, "num_token_query": 52.3118, "num_token_union": 73.4449, "num_word_context": 202.3592, "num_word_doc": 49.7698, "num_word_query": 39.8798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1590.4107, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2773, "query_norm": 1.4248, "queue_k_norm": 1.4385, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3118, "sent_len_1": 66.7077, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4938, "stdk": 0.0483, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.9214, "doc_norm": 1.4382, "encoder_q-embeddings": 5962.3398, "encoder_q-layer.0": 4330.6191, "encoder_q-layer.1": 5396.8423, "encoder_q-layer.10": 298.7983, "encoder_q-layer.11": 676.6602, "encoder_q-layer.2": 5937.8721, "encoder_q-layer.3": 5917.9092, "encoder_q-layer.4": 6242.8389, "encoder_q-layer.5": 5484.6968, "encoder_q-layer.6": 3951.8201, "encoder_q-layer.7": 3258.488, "encoder_q-layer.8": 2337.624, "encoder_q-layer.9": 530.206, "epoch": 0.49, "inbatch_neg_score": 0.2718, "inbatch_pos_score": 0.9824, "learning_rate": 2.777777777777778e-05, "loss": 2.9214, "norm_diff": 0.0193, "norm_loss": 0.0, "num_token_doc": 66.7161, "num_token_overlap": 18.0117, "num_token_query": 52.2403, "num_token_union": 73.4533, "num_word_context": 201.8495, "num_word_doc": 49.7839, "num_word_query": 39.8442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6752.2656, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.271, "query_norm": 1.4575, "queue_k_norm": 1.4389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2403, "sent_len_1": 66.7161, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8663, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 28.0739, "dev_samples_per_second": 2.28, "dev_steps_per_second": 0.036, "epoch": 0.49, "step": 50000, "test_accuracy": 92.724609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4196408987045288, "test_doc_norm": 1.3949134349822998, "test_inbatch_neg_score": 0.6061748266220093, "test_inbatch_pos_score": 1.4897311925888062, "test_loss": 0.4196408987045288, "test_loss_align": 1.1742693185806274, "test_loss_unif": 3.838325023651123, "test_loss_unif_q@queue": 3.838324785232544, "test_norm_diff": 0.08178526163101196, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2703973054885864, "test_query_norm": 1.476698637008667, "test_queue_k_norm": 1.438612937927246, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04146450757980347, "test_stdq": 0.04182903468608856, "test_stdqueue_k": 0.0483328215777874, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.0739, "dev_samples_per_second": 2.28, "dev_steps_per_second": 0.036, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.34472, "eval_beir-arguana_recall@10": 0.57895, "eval_beir-arguana_recall@100": 0.87909, "eval_beir-arguana_recall@20": 0.71124, "eval_beir-avg_ndcg@10": 0.34186516666666666, "eval_beir-avg_recall@10": 0.4108739166666667, "eval_beir-avg_recall@100": 0.5950670833333334, "eval_beir-avg_recall@20": 0.4724614166666667, "eval_beir-cqadupstack_ndcg@10": 0.2446016666666667, "eval_beir-cqadupstack_recall@10": 0.3346291666666667, "eval_beir-cqadupstack_recall@100": 0.5591608333333333, "eval_beir-cqadupstack_recall@20": 0.39995416666666667, "eval_beir-fiqa_ndcg@10": 0.21756, "eval_beir-fiqa_recall@10": 0.27529, "eval_beir-fiqa_recall@100": 0.52411, "eval_beir-fiqa_recall@20": 0.34343, "eval_beir-nfcorpus_ndcg@10": 0.27308, "eval_beir-nfcorpus_recall@10": 0.13342, "eval_beir-nfcorpus_recall@100": 0.2541, "eval_beir-nfcorpus_recall@20": 0.1643, "eval_beir-nq_ndcg@10": 0.23647, "eval_beir-nq_recall@10": 0.39315, "eval_beir-nq_recall@100": 0.73349, "eval_beir-nq_recall@20": 0.50647, "eval_beir-quora_ndcg@10": 0.69671, "eval_beir-quora_recall@10": 0.81411, "eval_beir-quora_recall@100": 0.95036, "eval_beir-quora_recall@20": 0.86929, "eval_beir-scidocs_ndcg@10": 0.14092, "eval_beir-scidocs_recall@10": 0.14683, "eval_beir-scidocs_recall@100": 0.34115, "eval_beir-scidocs_recall@20": 0.19672, "eval_beir-scifact_ndcg@10": 0.58922, "eval_beir-scifact_recall@10": 0.75872, "eval_beir-scifact_recall@100": 0.91389, "eval_beir-scifact_recall@20": 0.82467, "eval_beir-trec-covid_ndcg@10": 0.51713, "eval_beir-trec-covid_recall@10": 0.56, "eval_beir-trec-covid_recall@100": 0.3946, "eval_beir-trec-covid_recall@20": 0.532, "eval_beir-webis-touche2020_ndcg@10": 0.15824, "eval_beir-webis-touche2020_recall@10": 0.11364, "eval_beir-webis-touche2020_recall@100": 0.40072, "eval_beir-webis-touche2020_recall@20": 0.17654, "eval_senteval-avg_sts": 0.7432295212911197, "eval_senteval-sickr_spearman": 0.7130202599157393, "eval_senteval-stsb_spearman": 0.7734387826665003, "step": 50000, "test_accuracy": 92.724609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4196408987045288, "test_doc_norm": 1.3949134349822998, "test_inbatch_neg_score": 0.6061748266220093, "test_inbatch_pos_score": 1.4897311925888062, "test_loss": 0.4196408987045288, "test_loss_align": 1.1742693185806274, "test_loss_unif": 3.838325023651123, "test_loss_unif_q@queue": 3.838324785232544, "test_norm_diff": 0.08178526163101196, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2703973054885864, "test_query_norm": 1.476698637008667, "test_queue_k_norm": 1.438612937927246, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04146450757980347, "test_stdq": 0.04182903468608856, "test_stdqueue_k": 0.0483328215777874, "test_stdqueue_q": 0.0 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.9028, "doc_norm": 1.4354, "encoder_q-embeddings": 537.0109, "encoder_q-layer.0": 358.4528, "encoder_q-layer.1": 406.5782, "encoder_q-layer.10": 317.3464, "encoder_q-layer.11": 723.2837, "encoder_q-layer.2": 487.1134, "encoder_q-layer.3": 527.5281, "encoder_q-layer.4": 521.7921, "encoder_q-layer.5": 470.6071, "encoder_q-layer.6": 437.3537, "encoder_q-layer.7": 392.757, "encoder_q-layer.8": 395.0898, "encoder_q-layer.9": 328.2053, "epoch": 0.49, "inbatch_neg_score": 0.2748, "inbatch_pos_score": 0.96, "learning_rate": 2.772222222222222e-05, "loss": 2.9028, "norm_diff": 0.0083, "norm_loss": 0.0, "num_token_doc": 66.9153, "num_token_overlap": 18.0567, "num_token_query": 52.3103, "num_token_union": 73.5458, "num_word_context": 202.6428, "num_word_doc": 49.9737, "num_word_query": 39.9084, "postclip_grad_norm": 1.0, "preclip_grad_norm": 705.2595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2742, "query_norm": 1.443, "queue_k_norm": 1.4386, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3103, "sent_len_1": 66.9153, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0387, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9071, "doc_norm": 1.434, "encoder_q-embeddings": 726.2627, "encoder_q-layer.0": 495.0348, "encoder_q-layer.1": 545.9384, "encoder_q-layer.10": 315.721, "encoder_q-layer.11": 702.4811, "encoder_q-layer.2": 590.3867, "encoder_q-layer.3": 620.3971, "encoder_q-layer.4": 648.5148, "encoder_q-layer.5": 728.3973, "encoder_q-layer.6": 955.1431, "encoder_q-layer.7": 1199.8381, "encoder_q-layer.8": 1486.3721, "encoder_q-layer.9": 523.0106, "epoch": 0.49, "inbatch_neg_score": 0.285, "inbatch_pos_score": 0.9683, "learning_rate": 2.7666666666666667e-05, "loss": 2.9071, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.9619, "num_token_overlap": 18.0334, "num_token_query": 52.2585, "num_token_union": 73.5246, "num_word_context": 202.5148, "num_word_doc": 50.0067, "num_word_query": 39.859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1308.0621, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2837, "query_norm": 1.4446, "queue_k_norm": 1.4387, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2585, "sent_len_1": 66.9619, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0412, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.9112, "doc_norm": 1.437, "encoder_q-embeddings": 1715.5292, "encoder_q-layer.0": 1270.8479, "encoder_q-layer.1": 1382.8711, "encoder_q-layer.10": 352.3346, "encoder_q-layer.11": 799.1551, "encoder_q-layer.2": 1549.9562, "encoder_q-layer.3": 1603.6367, "encoder_q-layer.4": 1697.5299, "encoder_q-layer.5": 1751.7992, "encoder_q-layer.6": 1247.6737, "encoder_q-layer.7": 804.617, "encoder_q-layer.8": 578.5749, "encoder_q-layer.9": 362.8986, "epoch": 0.49, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 0.9697, "learning_rate": 2.761111111111111e-05, "loss": 2.9112, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.6379, "num_token_overlap": 17.9957, "num_token_query": 52.2885, "num_token_union": 73.4197, "num_word_context": 202.3146, "num_word_doc": 49.7551, "num_word_query": 39.8825, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1879.4313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.446, "queue_k_norm": 1.4399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2885, "sent_len_1": 66.6379, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1637, "stdk": 0.0482, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.9028, "doc_norm": 1.4334, "encoder_q-embeddings": 463.5266, "encoder_q-layer.0": 326.9698, "encoder_q-layer.1": 360.3557, "encoder_q-layer.10": 345.9194, "encoder_q-layer.11": 765.7211, "encoder_q-layer.2": 399.7808, "encoder_q-layer.3": 430.4082, "encoder_q-layer.4": 381.4376, "encoder_q-layer.5": 352.6484, "encoder_q-layer.6": 355.5184, "encoder_q-layer.7": 381.1123, "encoder_q-layer.8": 416.5076, "encoder_q-layer.9": 335.4344, "epoch": 0.49, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 0.9751, "learning_rate": 2.7555555555555555e-05, "loss": 2.9028, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.8618, "num_token_overlap": 18.031, "num_token_query": 52.4037, "num_token_union": 73.5873, "num_word_context": 202.4635, "num_word_doc": 49.8998, "num_word_query": 39.9528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 639.1763, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.4469, "queue_k_norm": 1.4393, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4037, "sent_len_1": 66.8618, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0962, "stdk": 0.048, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8923, "doc_norm": 1.4412, "encoder_q-embeddings": 5448.2012, "encoder_q-layer.0": 3761.0701, "encoder_q-layer.1": 3015.3135, "encoder_q-layer.10": 367.2249, "encoder_q-layer.11": 779.8644, "encoder_q-layer.2": 3197.822, "encoder_q-layer.3": 2790.7825, "encoder_q-layer.4": 1680.564, "encoder_q-layer.5": 964.8918, "encoder_q-layer.6": 841.2042, "encoder_q-layer.7": 558.6395, "encoder_q-layer.8": 419.0306, "encoder_q-layer.9": 300.3017, "epoch": 0.49, "inbatch_neg_score": 0.295, "inbatch_pos_score": 0.9937, "learning_rate": 2.7500000000000004e-05, "loss": 2.8923, "norm_diff": 0.0123, "norm_loss": 0.0, "num_token_doc": 67.0718, "num_token_overlap": 18.0706, "num_token_query": 52.3501, "num_token_union": 73.6466, "num_word_context": 202.2209, "num_word_doc": 50.0567, "num_word_query": 39.8994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4100.6018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.4395, "queue_k_norm": 1.4412, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3501, "sent_len_1": 67.0718, "sent_len_max_0": 128.0, "sent_len_max_1": 191.285, "stdk": 0.0483, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.9215, "doc_norm": 1.4458, "encoder_q-embeddings": 1088.3741, "encoder_q-layer.0": 745.1572, "encoder_q-layer.1": 800.9445, "encoder_q-layer.10": 374.3969, "encoder_q-layer.11": 747.2311, "encoder_q-layer.2": 885.3853, "encoder_q-layer.3": 857.9889, "encoder_q-layer.4": 842.6978, "encoder_q-layer.5": 658.7135, "encoder_q-layer.6": 561.6793, "encoder_q-layer.7": 564.0769, "encoder_q-layer.8": 568.5649, "encoder_q-layer.9": 418.9778, "epoch": 0.49, "inbatch_neg_score": 0.3056, "inbatch_pos_score": 1.0264, "learning_rate": 2.7444444444444443e-05, "loss": 2.9215, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.6014, "num_token_overlap": 18.0093, "num_token_query": 52.293, "num_token_union": 73.3918, "num_word_context": 202.1532, "num_word_doc": 49.7363, "num_word_query": 39.8517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1117.7995, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.4665, "queue_k_norm": 1.4414, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.293, "sent_len_1": 66.6014, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.0, "stdk": 0.0485, "stdq": 0.0468, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.8912, "doc_norm": 1.4465, "encoder_q-embeddings": 1463.3645, "encoder_q-layer.0": 1062.1267, "encoder_q-layer.1": 1138.657, "encoder_q-layer.10": 675.222, "encoder_q-layer.11": 1578.0143, "encoder_q-layer.2": 1342.0408, "encoder_q-layer.3": 1422.8807, "encoder_q-layer.4": 1641.4626, "encoder_q-layer.5": 1527.6455, "encoder_q-layer.6": 1657.4377, "encoder_q-layer.7": 1933.0538, "encoder_q-layer.8": 2047.8839, "encoder_q-layer.9": 831.1685, "epoch": 0.49, "inbatch_neg_score": 0.2947, "inbatch_pos_score": 0.9805, "learning_rate": 2.7388888888888892e-05, "loss": 2.8912, "norm_diff": 0.0199, "norm_loss": 0.0, "num_token_doc": 66.9973, "num_token_overlap": 18.0605, "num_token_query": 52.36, "num_token_union": 73.6426, "num_word_context": 202.5956, "num_word_doc": 49.969, "num_word_query": 39.8934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.1398, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.4345, "queue_k_norm": 1.4424, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.36, "sent_len_1": 66.9973, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.9, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.9017, "doc_norm": 1.4404, "encoder_q-embeddings": 2505.2861, "encoder_q-layer.0": 1826.8107, "encoder_q-layer.1": 2337.8601, "encoder_q-layer.10": 683.4299, "encoder_q-layer.11": 1612.5515, "encoder_q-layer.2": 2785.8596, "encoder_q-layer.3": 3471.6045, "encoder_q-layer.4": 3337.9397, "encoder_q-layer.5": 1946.6749, "encoder_q-layer.6": 1825.5696, "encoder_q-layer.7": 1727.8781, "encoder_q-layer.8": 1210.7318, "encoder_q-layer.9": 731.6221, "epoch": 0.5, "inbatch_neg_score": 0.3042, "inbatch_pos_score": 1.0137, "learning_rate": 2.733333333333333e-05, "loss": 2.9017, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.6733, "num_token_overlap": 17.996, "num_token_query": 52.1617, "num_token_union": 73.351, "num_word_context": 202.4098, "num_word_doc": 49.7762, "num_word_query": 39.7469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3312.3404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.4746, "queue_k_norm": 1.4411, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1617, "sent_len_1": 66.6733, "sent_len_max_0": 127.9862, "sent_len_max_1": 187.0838, "stdk": 0.0482, "stdq": 0.0472, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 2.9137, "doc_norm": 1.4387, "encoder_q-embeddings": 2520.3301, "encoder_q-layer.0": 1859.8887, "encoder_q-layer.1": 2104.1887, "encoder_q-layer.10": 682.2574, "encoder_q-layer.11": 1592.2931, "encoder_q-layer.2": 2252.7554, "encoder_q-layer.3": 2712.3108, "encoder_q-layer.4": 2875.5432, "encoder_q-layer.5": 2570.3308, "encoder_q-layer.6": 2386.5764, "encoder_q-layer.7": 2210.4116, "encoder_q-layer.8": 1565.2214, "encoder_q-layer.9": 764.7748, "epoch": 0.5, "inbatch_neg_score": 0.3091, "inbatch_pos_score": 0.999, "learning_rate": 2.727777777777778e-05, "loss": 2.9137, "norm_diff": 0.0106, "norm_loss": 0.0, "num_token_doc": 66.8953, "num_token_overlap": 18.0039, "num_token_query": 52.3502, "num_token_union": 73.5796, "num_word_context": 202.7568, "num_word_doc": 49.892, "num_word_query": 39.8994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3173.4223, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3081, "query_norm": 1.4491, "queue_k_norm": 1.4445, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3502, "sent_len_1": 66.8953, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0513, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.9144, "doc_norm": 1.4454, "encoder_q-embeddings": 1008.6769, "encoder_q-layer.0": 776.3313, "encoder_q-layer.1": 791.9998, "encoder_q-layer.10": 611.0173, "encoder_q-layer.11": 1436.1007, "encoder_q-layer.2": 848.8449, "encoder_q-layer.3": 804.7079, "encoder_q-layer.4": 839.0889, "encoder_q-layer.5": 855.5687, "encoder_q-layer.6": 973.9408, "encoder_q-layer.7": 1106.2195, "encoder_q-layer.8": 1135.0238, "encoder_q-layer.9": 770.5551, "epoch": 0.5, "inbatch_neg_score": 0.3026, "inbatch_pos_score": 1.0078, "learning_rate": 2.7222222222222223e-05, "loss": 2.9144, "norm_diff": 0.013, "norm_loss": 0.0, "num_token_doc": 66.7919, "num_token_overlap": 17.9272, "num_token_query": 52.1169, "num_token_union": 73.4597, "num_word_context": 202.6617, "num_word_doc": 49.8274, "num_word_query": 39.7429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1418.4202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4372, "queue_k_norm": 1.4423, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1169, "sent_len_1": 66.7919, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.745, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.9166, "doc_norm": 1.4484, "encoder_q-embeddings": 2475.334, "encoder_q-layer.0": 1801.6826, "encoder_q-layer.1": 2005.0997, "encoder_q-layer.10": 639.756, "encoder_q-layer.11": 1484.3502, "encoder_q-layer.2": 2161.6611, "encoder_q-layer.3": 1907.7103, "encoder_q-layer.4": 1920.6951, "encoder_q-layer.5": 1758.4835, "encoder_q-layer.6": 1743.5999, "encoder_q-layer.7": 1478.4476, "encoder_q-layer.8": 1003.8093, "encoder_q-layer.9": 652.8934, "epoch": 0.5, "inbatch_neg_score": 0.3049, "inbatch_pos_score": 0.9956, "learning_rate": 2.716666666666667e-05, "loss": 2.9166, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.5751, "num_token_overlap": 17.9555, "num_token_query": 52.1641, "num_token_union": 73.3406, "num_word_context": 201.9378, "num_word_doc": 49.6942, "num_word_query": 39.7756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2606.8204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3057, "query_norm": 1.4431, "queue_k_norm": 1.4445, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1641, "sent_len_1": 66.5751, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.6037, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 2.8907, "doc_norm": 1.4461, "encoder_q-embeddings": 2608.0732, "encoder_q-layer.0": 2070.8752, "encoder_q-layer.1": 2273.5774, "encoder_q-layer.10": 685.4066, "encoder_q-layer.11": 1531.8112, "encoder_q-layer.2": 2711.4092, "encoder_q-layer.3": 2688.1221, "encoder_q-layer.4": 2449.5444, "encoder_q-layer.5": 2128.824, "encoder_q-layer.6": 2237.2454, "encoder_q-layer.7": 2128.2534, "encoder_q-layer.8": 2341.9248, "encoder_q-layer.9": 1381.1252, "epoch": 0.5, "inbatch_neg_score": 0.2973, "inbatch_pos_score": 0.9834, "learning_rate": 2.7111111111111114e-05, "loss": 2.8907, "norm_diff": 0.0269, "norm_loss": 0.0, "num_token_doc": 66.8655, "num_token_overlap": 18.0019, "num_token_query": 52.1971, "num_token_union": 73.4998, "num_word_context": 202.2714, "num_word_doc": 49.8853, "num_word_query": 39.798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3256.313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.4716, "queue_k_norm": 1.4465, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1971, "sent_len_1": 66.8655, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.8325, "stdk": 0.0484, "stdq": 0.0472, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.8953, "doc_norm": 1.4463, "encoder_q-embeddings": 2325.4812, "encoder_q-layer.0": 1691.4966, "encoder_q-layer.1": 1943.9081, "encoder_q-layer.10": 675.3932, "encoder_q-layer.11": 1455.3414, "encoder_q-layer.2": 2521.6738, "encoder_q-layer.3": 2925.8867, "encoder_q-layer.4": 3439.127, "encoder_q-layer.5": 3361.7754, "encoder_q-layer.6": 3420.1958, "encoder_q-layer.7": 2067.0356, "encoder_q-layer.8": 1689.816, "encoder_q-layer.9": 738.6926, "epoch": 0.5, "inbatch_neg_score": 0.291, "inbatch_pos_score": 0.9912, "learning_rate": 2.7055555555555557e-05, "loss": 2.8953, "norm_diff": 0.0283, "norm_loss": 0.0, "num_token_doc": 67.0512, "num_token_overlap": 18.076, "num_token_query": 52.2968, "num_token_union": 73.5464, "num_word_context": 202.4239, "num_word_doc": 49.9746, "num_word_query": 39.8499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3432.29, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.418, "queue_k_norm": 1.4456, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2968, "sent_len_1": 67.0512, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1425, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.8979, "doc_norm": 1.446, "encoder_q-embeddings": 1257.1641, "encoder_q-layer.0": 884.3362, "encoder_q-layer.1": 1022.3442, "encoder_q-layer.10": 636.4166, "encoder_q-layer.11": 1404.4436, "encoder_q-layer.2": 1160.9738, "encoder_q-layer.3": 1273.1797, "encoder_q-layer.4": 1376.8969, "encoder_q-layer.5": 1271.8568, "encoder_q-layer.6": 1249.6272, "encoder_q-layer.7": 1004.4391, "encoder_q-layer.8": 844.3627, "encoder_q-layer.9": 584.2707, "epoch": 0.5, "inbatch_neg_score": 0.2756, "inbatch_pos_score": 0.9805, "learning_rate": 2.7000000000000002e-05, "loss": 2.8979, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.9241, "num_token_overlap": 18.0193, "num_token_query": 52.1969, "num_token_union": 73.5416, "num_word_context": 202.413, "num_word_doc": 49.9244, "num_word_query": 39.7717, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1644.5224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.4414, "queue_k_norm": 1.4439, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1969, "sent_len_1": 66.9241, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.085, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.9028, "doc_norm": 1.4438, "encoder_q-embeddings": 1814.886, "encoder_q-layer.0": 1297.0352, "encoder_q-layer.1": 1451.7343, "encoder_q-layer.10": 628.892, "encoder_q-layer.11": 1486.6074, "encoder_q-layer.2": 1724.7322, "encoder_q-layer.3": 1897.3577, "encoder_q-layer.4": 2035.2026, "encoder_q-layer.5": 2092.8457, "encoder_q-layer.6": 1816.3665, "encoder_q-layer.7": 1588.9467, "encoder_q-layer.8": 1239.6669, "encoder_q-layer.9": 679.4373, "epoch": 0.5, "inbatch_neg_score": 0.2851, "inbatch_pos_score": 0.9756, "learning_rate": 2.6944444444444445e-05, "loss": 2.9028, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.9282, "num_token_overlap": 18.0385, "num_token_query": 52.2624, "num_token_union": 73.5869, "num_word_context": 202.5894, "num_word_doc": 49.9497, "num_word_query": 39.8521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2376.0039, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.4462, "queue_k_norm": 1.4453, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2624, "sent_len_1": 66.9282, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.0037, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.8905, "doc_norm": 1.4312, "encoder_q-embeddings": 783.446, "encoder_q-layer.0": 506.3916, "encoder_q-layer.1": 589.5538, "encoder_q-layer.10": 692.7335, "encoder_q-layer.11": 1449.8578, "encoder_q-layer.2": 639.9376, "encoder_q-layer.3": 640.6722, "encoder_q-layer.4": 705.0541, "encoder_q-layer.5": 727.5081, "encoder_q-layer.6": 810.9276, "encoder_q-layer.7": 775.2231, "encoder_q-layer.8": 877.3541, "encoder_q-layer.9": 629.5786, "epoch": 0.5, "inbatch_neg_score": 0.2729, "inbatch_pos_score": 0.9697, "learning_rate": 2.688888888888889e-05, "loss": 2.8905, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 66.7012, "num_token_overlap": 18.0315, "num_token_query": 52.286, "num_token_union": 73.3971, "num_word_context": 202.3889, "num_word_doc": 49.811, "num_word_query": 39.8674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1176.0851, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.272, "query_norm": 1.4322, "queue_k_norm": 1.4442, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.286, "sent_len_1": 66.7012, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9288, "stdk": 0.0478, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.899, "doc_norm": 1.4376, "encoder_q-embeddings": 2499.2466, "encoder_q-layer.0": 1829.4558, "encoder_q-layer.1": 1736.3622, "encoder_q-layer.10": 676.3901, "encoder_q-layer.11": 1556.8236, "encoder_q-layer.2": 1993.9382, "encoder_q-layer.3": 1711.9385, "encoder_q-layer.4": 1548.2384, "encoder_q-layer.5": 1589.3132, "encoder_q-layer.6": 1379.1371, "encoder_q-layer.7": 1161.6042, "encoder_q-layer.8": 959.4645, "encoder_q-layer.9": 618.5576, "epoch": 0.5, "inbatch_neg_score": 0.2855, "inbatch_pos_score": 0.9824, "learning_rate": 2.6833333333333333e-05, "loss": 2.899, "norm_diff": 0.0108, "norm_loss": 0.0, "num_token_doc": 66.625, "num_token_overlap": 18.0395, "num_token_query": 52.2792, "num_token_union": 73.3196, "num_word_context": 201.8579, "num_word_doc": 49.6758, "num_word_query": 39.872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2449.3596, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4484, "queue_k_norm": 1.4446, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2792, "sent_len_1": 66.625, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.2862, "stdk": 0.048, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.8977, "doc_norm": 1.4491, "encoder_q-embeddings": 1023.9949, "encoder_q-layer.0": 700.6323, "encoder_q-layer.1": 767.651, "encoder_q-layer.10": 661.6569, "encoder_q-layer.11": 1512.4709, "encoder_q-layer.2": 907.7819, "encoder_q-layer.3": 945.4867, "encoder_q-layer.4": 1045.432, "encoder_q-layer.5": 1091.3645, "encoder_q-layer.6": 1233.4325, "encoder_q-layer.7": 1381.7395, "encoder_q-layer.8": 1650.535, "encoder_q-layer.9": 873.1734, "epoch": 0.51, "inbatch_neg_score": 0.2862, "inbatch_pos_score": 1.0088, "learning_rate": 2.677777777777778e-05, "loss": 2.8977, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.764, "num_token_overlap": 17.9749, "num_token_query": 52.0967, "num_token_union": 73.4353, "num_word_context": 202.0591, "num_word_doc": 49.8304, "num_word_query": 39.6856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1675.1689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.4642, "queue_k_norm": 1.444, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0967, "sent_len_1": 66.764, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.9938, "stdk": 0.0486, "stdq": 0.0471, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.8997, "doc_norm": 1.4482, "encoder_q-embeddings": 3248.2708, "encoder_q-layer.0": 2205.6233, "encoder_q-layer.1": 2441.709, "encoder_q-layer.10": 698.6635, "encoder_q-layer.11": 1564.1589, "encoder_q-layer.2": 2607.0642, "encoder_q-layer.3": 2443.8423, "encoder_q-layer.4": 2323.137, "encoder_q-layer.5": 2335.1223, "encoder_q-layer.6": 2107.8792, "encoder_q-layer.7": 1857.2947, "encoder_q-layer.8": 1424.1208, "encoder_q-layer.9": 839.6113, "epoch": 0.51, "inbatch_neg_score": 0.285, "inbatch_pos_score": 0.9868, "learning_rate": 2.6722222222222228e-05, "loss": 2.8997, "norm_diff": 0.0166, "norm_loss": 0.0, "num_token_doc": 66.6549, "num_token_overlap": 17.963, "num_token_query": 52.1557, "num_token_union": 73.4182, "num_word_context": 202.4792, "num_word_doc": 49.7413, "num_word_query": 39.7496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3258.1946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2849, "query_norm": 1.4419, "queue_k_norm": 1.4425, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1557, "sent_len_1": 66.6549, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.275, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.8736, "doc_norm": 1.4426, "encoder_q-embeddings": 738.4327, "encoder_q-layer.0": 499.5642, "encoder_q-layer.1": 570.6121, "encoder_q-layer.10": 711.522, "encoder_q-layer.11": 1561.8796, "encoder_q-layer.2": 728.4628, "encoder_q-layer.3": 675.3767, "encoder_q-layer.4": 657.6398, "encoder_q-layer.5": 708.1599, "encoder_q-layer.6": 691.2543, "encoder_q-layer.7": 719.3043, "encoder_q-layer.8": 765.5875, "encoder_q-layer.9": 630.634, "epoch": 0.51, "inbatch_neg_score": 0.2857, "inbatch_pos_score": 0.998, "learning_rate": 2.6666666666666667e-05, "loss": 2.8736, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.8523, "num_token_overlap": 17.9647, "num_token_query": 52.2182, "num_token_union": 73.5201, "num_word_context": 202.5535, "num_word_doc": 49.8446, "num_word_query": 39.8203, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1198.0604, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2871, "query_norm": 1.4515, "queue_k_norm": 1.4422, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2182, "sent_len_1": 66.8523, "sent_len_max_0": 127.99, "sent_len_max_1": 190.7875, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 2.8589, "doc_norm": 1.4482, "encoder_q-embeddings": 2567.3979, "encoder_q-layer.0": 1801.0879, "encoder_q-layer.1": 2039.3127, "encoder_q-layer.10": 688.9874, "encoder_q-layer.11": 1503.65, "encoder_q-layer.2": 2218.436, "encoder_q-layer.3": 2030.3898, "encoder_q-layer.4": 1958.6907, "encoder_q-layer.5": 1798.8123, "encoder_q-layer.6": 1364.1967, "encoder_q-layer.7": 886.2627, "encoder_q-layer.8": 791.3376, "encoder_q-layer.9": 650.7123, "epoch": 0.51, "inbatch_neg_score": 0.2914, "inbatch_pos_score": 0.9937, "learning_rate": 2.6611111111111116e-05, "loss": 2.8589, "norm_diff": 0.0327, "norm_loss": 0.0, "num_token_doc": 66.9082, "num_token_overlap": 18.1149, "num_token_query": 52.3771, "num_token_union": 73.5213, "num_word_context": 202.266, "num_word_doc": 49.9506, "num_word_query": 39.9608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2587.5294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.4809, "queue_k_norm": 1.4438, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3771, "sent_len_1": 66.9082, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9462, "stdk": 0.0485, "stdq": 0.0473, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.8859, "doc_norm": 1.4434, "encoder_q-embeddings": 1304.0514, "encoder_q-layer.0": 889.931, "encoder_q-layer.1": 1118.4695, "encoder_q-layer.10": 617.573, "encoder_q-layer.11": 1408.8274, "encoder_q-layer.2": 1207.4354, "encoder_q-layer.3": 1153.4285, "encoder_q-layer.4": 1057.5292, "encoder_q-layer.5": 1106.7473, "encoder_q-layer.6": 1106.8458, "encoder_q-layer.7": 1063.4906, "encoder_q-layer.8": 909.5299, "encoder_q-layer.9": 619.1722, "epoch": 0.51, "inbatch_neg_score": 0.2992, "inbatch_pos_score": 1.0391, "learning_rate": 2.6555555555555555e-05, "loss": 2.8859, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.6433, "num_token_overlap": 18.0627, "num_token_query": 52.3339, "num_token_union": 73.4023, "num_word_context": 202.3609, "num_word_doc": 49.7719, "num_word_query": 39.8943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1610.3342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4844, "queue_k_norm": 1.4433, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3339, "sent_len_1": 66.6433, "sent_len_max_0": 128.0, "sent_len_max_1": 186.44, "stdk": 0.0483, "stdq": 0.0472, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.8588, "doc_norm": 1.4452, "encoder_q-embeddings": 1932.1086, "encoder_q-layer.0": 1352.8582, "encoder_q-layer.1": 1366.3383, "encoder_q-layer.10": 664.7368, "encoder_q-layer.11": 1479.4066, "encoder_q-layer.2": 1532.6248, "encoder_q-layer.3": 1656.2404, "encoder_q-layer.4": 1592.4532, "encoder_q-layer.5": 1634.1677, "encoder_q-layer.6": 2198.0454, "encoder_q-layer.7": 3489.3467, "encoder_q-layer.8": 4386.6133, "encoder_q-layer.9": 1496.6388, "epoch": 0.51, "inbatch_neg_score": 0.3037, "inbatch_pos_score": 1.0039, "learning_rate": 2.6500000000000004e-05, "loss": 2.8588, "norm_diff": 0.0181, "norm_loss": 0.0, "num_token_doc": 66.8614, "num_token_overlap": 18.0756, "num_token_query": 52.2697, "num_token_union": 73.4516, "num_word_context": 202.4508, "num_word_doc": 49.8563, "num_word_query": 39.8336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3540.6789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.303, "query_norm": 1.4614, "queue_k_norm": 1.4439, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2697, "sent_len_1": 66.8614, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.3938, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8761, "doc_norm": 1.4402, "encoder_q-embeddings": 609.0621, "encoder_q-layer.0": 416.6591, "encoder_q-layer.1": 464.3449, "encoder_q-layer.10": 635.8068, "encoder_q-layer.11": 1532.368, "encoder_q-layer.2": 501.7723, "encoder_q-layer.3": 511.0597, "encoder_q-layer.4": 504.5789, "encoder_q-layer.5": 511.3469, "encoder_q-layer.6": 578.638, "encoder_q-layer.7": 621.636, "encoder_q-layer.8": 752.7479, "encoder_q-layer.9": 642.5096, "epoch": 0.51, "inbatch_neg_score": 0.3128, "inbatch_pos_score": 1.0127, "learning_rate": 2.6444444444444443e-05, "loss": 2.8761, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.5978, "num_token_overlap": 17.9796, "num_token_query": 52.02, "num_token_union": 73.2592, "num_word_context": 201.9269, "num_word_doc": 49.753, "num_word_query": 39.6848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1047.3592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.4609, "queue_k_norm": 1.4442, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.02, "sent_len_1": 66.5978, "sent_len_max_0": 127.97, "sent_len_max_1": 186.6738, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.8795, "doc_norm": 1.4458, "encoder_q-embeddings": 684.9451, "encoder_q-layer.0": 463.0541, "encoder_q-layer.1": 527.028, "encoder_q-layer.10": 638.0474, "encoder_q-layer.11": 1403.6145, "encoder_q-layer.2": 602.2437, "encoder_q-layer.3": 585.8364, "encoder_q-layer.4": 582.8268, "encoder_q-layer.5": 585.5116, "encoder_q-layer.6": 653.1063, "encoder_q-layer.7": 650.0836, "encoder_q-layer.8": 811.6504, "encoder_q-layer.9": 644.5429, "epoch": 0.51, "inbatch_neg_score": 0.3139, "inbatch_pos_score": 1.0283, "learning_rate": 2.6388888888888892e-05, "loss": 2.8795, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 67.0854, "num_token_overlap": 18.0118, "num_token_query": 52.2671, "num_token_union": 73.6837, "num_word_context": 202.8128, "num_word_doc": 50.0782, "num_word_query": 39.8569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1082.8495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.4632, "queue_k_norm": 1.4459, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2671, "sent_len_1": 67.0854, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0362, "stdk": 0.0483, "stdq": 0.0467, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.8841, "doc_norm": 1.4503, "encoder_q-embeddings": 5236.9878, "encoder_q-layer.0": 3844.6155, "encoder_q-layer.1": 3939.6284, "encoder_q-layer.10": 618.5358, "encoder_q-layer.11": 1404.1858, "encoder_q-layer.2": 4135.4102, "encoder_q-layer.3": 4143.1636, "encoder_q-layer.4": 2990.4575, "encoder_q-layer.5": 2647.187, "encoder_q-layer.6": 2060.3125, "encoder_q-layer.7": 1350.6899, "encoder_q-layer.8": 1125.7555, "encoder_q-layer.9": 705.5525, "epoch": 0.51, "inbatch_neg_score": 0.318, "inbatch_pos_score": 1.0361, "learning_rate": 2.633333333333333e-05, "loss": 2.8841, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.6139, "num_token_overlap": 17.9243, "num_token_query": 52.0879, "num_token_union": 73.3513, "num_word_context": 202.104, "num_word_doc": 49.6794, "num_word_query": 39.7026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4749.471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.4586, "queue_k_norm": 1.4464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0879, "sent_len_1": 66.6139, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.22, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.8982, "doc_norm": 1.4539, "encoder_q-embeddings": 9606.623, "encoder_q-layer.0": 7173.4473, "encoder_q-layer.1": 8239.6484, "encoder_q-layer.10": 1338.9363, "encoder_q-layer.11": 2923.1772, "encoder_q-layer.2": 7420.624, "encoder_q-layer.3": 4603.4502, "encoder_q-layer.4": 3345.9155, "encoder_q-layer.5": 2219.1726, "encoder_q-layer.6": 1843.4376, "encoder_q-layer.7": 1541.8734, "encoder_q-layer.8": 1510.7222, "encoder_q-layer.9": 1349.6521, "epoch": 0.51, "inbatch_neg_score": 0.316, "inbatch_pos_score": 1.0098, "learning_rate": 2.627777777777778e-05, "loss": 2.8982, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.7812, "num_token_overlap": 18.0309, "num_token_query": 52.284, "num_token_union": 73.4665, "num_word_context": 202.4998, "num_word_doc": 49.8833, "num_word_query": 39.8612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8031.5679, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3152, "query_norm": 1.4454, "queue_k_norm": 1.4482, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.284, "sent_len_1": 66.7812, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5213, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.8705, "doc_norm": 1.4441, "encoder_q-embeddings": 1243.6351, "encoder_q-layer.0": 794.9989, "encoder_q-layer.1": 898.3564, "encoder_q-layer.10": 1296.3818, "encoder_q-layer.11": 2974.8809, "encoder_q-layer.2": 1016.7589, "encoder_q-layer.3": 1032.0785, "encoder_q-layer.4": 1089.8314, "encoder_q-layer.5": 991.6116, "encoder_q-layer.6": 1024.7124, "encoder_q-layer.7": 1110.4491, "encoder_q-layer.8": 1314.9375, "encoder_q-layer.9": 1187.9258, "epoch": 0.52, "inbatch_neg_score": 0.3108, "inbatch_pos_score": 1.0176, "learning_rate": 2.6222222222222226e-05, "loss": 2.8705, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.9912, "num_token_overlap": 18.1254, "num_token_query": 52.4275, "num_token_union": 73.5853, "num_word_context": 202.312, "num_word_doc": 49.9751, "num_word_query": 39.9788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2043.8456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3105, "query_norm": 1.4395, "queue_k_norm": 1.4488, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4275, "sent_len_1": 66.9912, "sent_len_max_0": 128.0, "sent_len_max_1": 189.975, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.8715, "doc_norm": 1.4484, "encoder_q-embeddings": 1359.2722, "encoder_q-layer.0": 944.7675, "encoder_q-layer.1": 1053.7365, "encoder_q-layer.10": 1292.3282, "encoder_q-layer.11": 2891.1118, "encoder_q-layer.2": 1108.8617, "encoder_q-layer.3": 1099.4248, "encoder_q-layer.4": 998.4524, "encoder_q-layer.5": 1014.4463, "encoder_q-layer.6": 1053.8953, "encoder_q-layer.7": 1109.9905, "encoder_q-layer.8": 1269.0145, "encoder_q-layer.9": 1180.8397, "epoch": 0.52, "inbatch_neg_score": 0.3089, "inbatch_pos_score": 1.0293, "learning_rate": 2.6166666666666668e-05, "loss": 2.8715, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.7655, "num_token_overlap": 18.0347, "num_token_query": 52.2262, "num_token_union": 73.4532, "num_word_context": 202.5168, "num_word_doc": 49.8175, "num_word_query": 39.8263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2026.6295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3101, "query_norm": 1.4543, "queue_k_norm": 1.4487, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2262, "sent_len_1": 66.7655, "sent_len_max_0": 127.99, "sent_len_max_1": 190.0188, "stdk": 0.0484, "stdq": 0.0469, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.8801, "doc_norm": 1.4551, "encoder_q-embeddings": 2000.8452, "encoder_q-layer.0": 1346.2285, "encoder_q-layer.1": 1475.7026, "encoder_q-layer.10": 1308.749, "encoder_q-layer.11": 2833.5403, "encoder_q-layer.2": 1678.0992, "encoder_q-layer.3": 1871.9436, "encoder_q-layer.4": 1749.5259, "encoder_q-layer.5": 1745.894, "encoder_q-layer.6": 1455.0453, "encoder_q-layer.7": 1563.6542, "encoder_q-layer.8": 1623.6362, "encoder_q-layer.9": 1347.8408, "epoch": 0.52, "inbatch_neg_score": 0.3126, "inbatch_pos_score": 1.0312, "learning_rate": 2.6111111111111114e-05, "loss": 2.8801, "norm_diff": 0.0266, "norm_loss": 0.0, "num_token_doc": 66.7091, "num_token_overlap": 18.0115, "num_token_query": 52.0734, "num_token_union": 73.3409, "num_word_context": 202.1461, "num_word_doc": 49.7401, "num_word_query": 39.6841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2638.6718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.4285, "queue_k_norm": 1.4483, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0734, "sent_len_1": 66.7091, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0625, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8677, "doc_norm": 1.4499, "encoder_q-embeddings": 1249.6053, "encoder_q-layer.0": 824.6915, "encoder_q-layer.1": 863.1036, "encoder_q-layer.10": 1314.9611, "encoder_q-layer.11": 2911.9678, "encoder_q-layer.2": 962.2355, "encoder_q-layer.3": 962.0417, "encoder_q-layer.4": 950.8613, "encoder_q-layer.5": 905.3561, "encoder_q-layer.6": 994.0507, "encoder_q-layer.7": 1116.9407, "encoder_q-layer.8": 1325.9449, "encoder_q-layer.9": 1192.3878, "epoch": 0.52, "inbatch_neg_score": 0.3167, "inbatch_pos_score": 1.0186, "learning_rate": 2.6055555555555556e-05, "loss": 2.8677, "norm_diff": 0.0094, "norm_loss": 0.0, "num_token_doc": 66.6045, "num_token_overlap": 17.9857, "num_token_query": 52.2054, "num_token_union": 73.36, "num_word_context": 202.1359, "num_word_doc": 49.695, "num_word_query": 39.8215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2003.0021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3159, "query_norm": 1.4479, "queue_k_norm": 1.4479, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2054, "sent_len_1": 66.6045, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2925, "stdk": 0.0483, "stdq": 0.0465, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.8727, "doc_norm": 1.451, "encoder_q-embeddings": 1173.5907, "encoder_q-layer.0": 744.1599, "encoder_q-layer.1": 825.1329, "encoder_q-layer.10": 1320.9086, "encoder_q-layer.11": 2960.1086, "encoder_q-layer.2": 883.0341, "encoder_q-layer.3": 876.9339, "encoder_q-layer.4": 900.8442, "encoder_q-layer.5": 907.1689, "encoder_q-layer.6": 1009.231, "encoder_q-layer.7": 1104.7649, "encoder_q-layer.8": 1394.6023, "encoder_q-layer.9": 1201.9364, "epoch": 0.52, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 1.0176, "learning_rate": 2.6000000000000002e-05, "loss": 2.8727, "norm_diff": 0.0358, "norm_loss": 0.0, "num_token_doc": 66.7567, "num_token_overlap": 17.9801, "num_token_query": 52.2014, "num_token_union": 73.4525, "num_word_context": 201.9214, "num_word_doc": 49.7328, "num_word_query": 39.7425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2023.2221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.4152, "queue_k_norm": 1.4491, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2014, "sent_len_1": 66.7567, "sent_len_max_0": 127.9912, "sent_len_max_1": 192.795, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 2.8704, "doc_norm": 1.4497, "encoder_q-embeddings": 2521.2656, "encoder_q-layer.0": 1723.6873, "encoder_q-layer.1": 1879.3441, "encoder_q-layer.10": 1306.2582, "encoder_q-layer.11": 3124.646, "encoder_q-layer.2": 2162.2485, "encoder_q-layer.3": 2281.2056, "encoder_q-layer.4": 2432.2915, "encoder_q-layer.5": 2580.7839, "encoder_q-layer.6": 3084.5725, "encoder_q-layer.7": 3238.7363, "encoder_q-layer.8": 3627.9353, "encoder_q-layer.9": 1955.6763, "epoch": 0.52, "inbatch_neg_score": 0.302, "inbatch_pos_score": 0.9912, "learning_rate": 2.5944444444444444e-05, "loss": 2.8704, "norm_diff": 0.0232, "norm_loss": 0.0, "num_token_doc": 66.8162, "num_token_overlap": 17.9871, "num_token_query": 52.2275, "num_token_union": 73.5183, "num_word_context": 202.3563, "num_word_doc": 49.8432, "num_word_query": 39.8038, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3898.1744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4397, "queue_k_norm": 1.4485, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2275, "sent_len_1": 66.8162, "sent_len_max_0": 128.0, "sent_len_max_1": 190.935, "stdk": 0.0483, "stdq": 0.0465, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.8976, "doc_norm": 1.4538, "encoder_q-embeddings": 1954.2007, "encoder_q-layer.0": 1345.9956, "encoder_q-layer.1": 1394.4115, "encoder_q-layer.10": 1260.0687, "encoder_q-layer.11": 2900.7896, "encoder_q-layer.2": 1561.5973, "encoder_q-layer.3": 1499.515, "encoder_q-layer.4": 1524.8365, "encoder_q-layer.5": 1486.4991, "encoder_q-layer.6": 1490.8131, "encoder_q-layer.7": 1615.0461, "encoder_q-layer.8": 1570.7284, "encoder_q-layer.9": 1283.6643, "epoch": 0.52, "inbatch_neg_score": 0.3069, "inbatch_pos_score": 0.9917, "learning_rate": 2.588888888888889e-05, "loss": 2.8976, "norm_diff": 0.0267, "norm_loss": 0.0, "num_token_doc": 66.6885, "num_token_overlap": 17.9399, "num_token_query": 52.1846, "num_token_union": 73.3859, "num_word_context": 202.4243, "num_word_doc": 49.7497, "num_word_query": 39.7772, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2543.4718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.4271, "queue_k_norm": 1.4506, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1846, "sent_len_1": 66.6885, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6625, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.8805, "doc_norm": 1.4506, "encoder_q-embeddings": 1959.771, "encoder_q-layer.0": 1316.0922, "encoder_q-layer.1": 1286.8192, "encoder_q-layer.10": 1316.0262, "encoder_q-layer.11": 2958.7825, "encoder_q-layer.2": 1512.3063, "encoder_q-layer.3": 1534.4156, "encoder_q-layer.4": 1539.5302, "encoder_q-layer.5": 1450.0623, "encoder_q-layer.6": 1398.7379, "encoder_q-layer.7": 1361.1399, "encoder_q-layer.8": 1371.6646, "encoder_q-layer.9": 1189.9084, "epoch": 0.52, "inbatch_neg_score": 0.3028, "inbatch_pos_score": 1.0225, "learning_rate": 2.5833333333333336e-05, "loss": 2.8805, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.5856, "num_token_overlap": 17.9459, "num_token_query": 52.0842, "num_token_union": 73.3677, "num_word_context": 202.0249, "num_word_doc": 49.6707, "num_word_query": 39.6798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2453.6281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4368, "queue_k_norm": 1.448, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0842, "sent_len_1": 66.5856, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6675, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.8839, "doc_norm": 1.458, "encoder_q-embeddings": 1770.2395, "encoder_q-layer.0": 1217.1012, "encoder_q-layer.1": 1270.8064, "encoder_q-layer.10": 1460.4061, "encoder_q-layer.11": 2971.7598, "encoder_q-layer.2": 1506.208, "encoder_q-layer.3": 1547.1479, "encoder_q-layer.4": 1534.2426, "encoder_q-layer.5": 1526.2838, "encoder_q-layer.6": 1465.0278, "encoder_q-layer.7": 1416.4358, "encoder_q-layer.8": 1481.4932, "encoder_q-layer.9": 1280.5352, "epoch": 0.52, "inbatch_neg_score": 0.3061, "inbatch_pos_score": 1.0332, "learning_rate": 2.5777777777777778e-05, "loss": 2.8839, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.5309, "num_token_overlap": 17.9772, "num_token_query": 52.2277, "num_token_union": 73.3438, "num_word_context": 202.129, "num_word_doc": 49.6925, "num_word_query": 39.8334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2446.8583, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3069, "query_norm": 1.4373, "queue_k_norm": 1.4493, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2277, "sent_len_1": 66.5309, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.1538, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.859, "doc_norm": 1.459, "encoder_q-embeddings": 1888.5618, "encoder_q-layer.0": 1249.2836, "encoder_q-layer.1": 1335.6901, "encoder_q-layer.10": 1352.2413, "encoder_q-layer.11": 2932.3953, "encoder_q-layer.2": 1467.2092, "encoder_q-layer.3": 1401.844, "encoder_q-layer.4": 1407.5203, "encoder_q-layer.5": 1374.0626, "encoder_q-layer.6": 1266.4023, "encoder_q-layer.7": 1279.3179, "encoder_q-layer.8": 1444.9916, "encoder_q-layer.9": 1236.8365, "epoch": 0.52, "inbatch_neg_score": 0.3016, "inbatch_pos_score": 1.0078, "learning_rate": 2.5722222222222224e-05, "loss": 2.859, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.8962, "num_token_overlap": 18.0502, "num_token_query": 52.1904, "num_token_union": 73.4707, "num_word_context": 202.1326, "num_word_doc": 49.9242, "num_word_query": 39.7688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2407.8359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3025, "query_norm": 1.4353, "queue_k_norm": 1.4505, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1904, "sent_len_1": 66.8962, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0687, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.8625, "doc_norm": 1.4539, "encoder_q-embeddings": 1677.3104, "encoder_q-layer.0": 1128.4742, "encoder_q-layer.1": 1327.6915, "encoder_q-layer.10": 1336.4288, "encoder_q-layer.11": 2839.8452, "encoder_q-layer.2": 1520.0784, "encoder_q-layer.3": 1619.1887, "encoder_q-layer.4": 1636.6124, "encoder_q-layer.5": 1357.2002, "encoder_q-layer.6": 1594.8199, "encoder_q-layer.7": 1657.0642, "encoder_q-layer.8": 1627.5667, "encoder_q-layer.9": 1251.338, "epoch": 0.53, "inbatch_neg_score": 0.3143, "inbatch_pos_score": 1.0449, "learning_rate": 2.5666666666666666e-05, "loss": 2.8625, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.6412, "num_token_overlap": 18.0158, "num_token_query": 52.1625, "num_token_union": 73.3646, "num_word_context": 201.9634, "num_word_doc": 49.7111, "num_word_query": 39.7526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2409.4822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.4493, "queue_k_norm": 1.4496, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1625, "sent_len_1": 66.6412, "sent_len_max_0": 128.0, "sent_len_max_1": 188.935, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.8791, "doc_norm": 1.4423, "encoder_q-embeddings": 2903.0354, "encoder_q-layer.0": 2122.7419, "encoder_q-layer.1": 2314.8818, "encoder_q-layer.10": 1297.2961, "encoder_q-layer.11": 2818.1736, "encoder_q-layer.2": 2665.1685, "encoder_q-layer.3": 2848.1716, "encoder_q-layer.4": 3060.9736, "encoder_q-layer.5": 2763.9058, "encoder_q-layer.6": 2724.0105, "encoder_q-layer.7": 2040.8267, "encoder_q-layer.8": 2155.5764, "encoder_q-layer.9": 1432.0745, "epoch": 0.53, "inbatch_neg_score": 0.2983, "inbatch_pos_score": 1.0244, "learning_rate": 2.5611111111111115e-05, "loss": 2.8791, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.7892, "num_token_overlap": 17.9785, "num_token_query": 52.1794, "num_token_union": 73.4212, "num_word_context": 202.1768, "num_word_doc": 49.8344, "num_word_query": 39.7821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3705.2592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.4699, "queue_k_norm": 1.4501, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1794, "sent_len_1": 66.7892, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.9487, "stdk": 0.048, "stdq": 0.0475, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.859, "doc_norm": 1.4508, "encoder_q-embeddings": 3313.3623, "encoder_q-layer.0": 2395.9998, "encoder_q-layer.1": 2690.2319, "encoder_q-layer.10": 1227.6012, "encoder_q-layer.11": 2754.7078, "encoder_q-layer.2": 2883.9246, "encoder_q-layer.3": 2646.3892, "encoder_q-layer.4": 2511.6165, "encoder_q-layer.5": 2173.2104, "encoder_q-layer.6": 2156.7334, "encoder_q-layer.7": 2259.0991, "encoder_q-layer.8": 2272.9648, "encoder_q-layer.9": 1621.8284, "epoch": 0.53, "inbatch_neg_score": 0.2928, "inbatch_pos_score": 0.9868, "learning_rate": 2.5555555555555554e-05, "loss": 2.859, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.7994, "num_token_overlap": 18.0397, "num_token_query": 52.2238, "num_token_union": 73.4394, "num_word_context": 202.1893, "num_word_doc": 49.8473, "num_word_query": 39.8292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3753.5204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.4139, "queue_k_norm": 1.4502, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2238, "sent_len_1": 66.7994, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8425, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.8629, "doc_norm": 1.4504, "encoder_q-embeddings": 6432.1963, "encoder_q-layer.0": 4759.1826, "encoder_q-layer.1": 5214.6855, "encoder_q-layer.10": 1249.9249, "encoder_q-layer.11": 3017.4128, "encoder_q-layer.2": 5606.8374, "encoder_q-layer.3": 5514.0386, "encoder_q-layer.4": 3662.2617, "encoder_q-layer.5": 2354.7886, "encoder_q-layer.6": 2113.8127, "encoder_q-layer.7": 1836.3804, "encoder_q-layer.8": 1597.0391, "encoder_q-layer.9": 1230.4346, "epoch": 0.53, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 1.0166, "learning_rate": 2.5500000000000003e-05, "loss": 2.8629, "norm_diff": 0.0065, "norm_loss": 0.0, "num_token_doc": 66.8328, "num_token_overlap": 17.9758, "num_token_query": 52.1038, "num_token_union": 73.4216, "num_word_context": 202.2426, "num_word_doc": 49.8544, "num_word_query": 39.7125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6156.1264, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3057, "query_norm": 1.4474, "queue_k_norm": 1.4502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1038, "sent_len_1": 66.8328, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8913, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 2.883, "doc_norm": 1.4515, "encoder_q-embeddings": 2614.05, "encoder_q-layer.0": 1856.2056, "encoder_q-layer.1": 1840.0317, "encoder_q-layer.10": 1425.6249, "encoder_q-layer.11": 3005.8755, "encoder_q-layer.2": 1926.3113, "encoder_q-layer.3": 1931.1685, "encoder_q-layer.4": 1752.1375, "encoder_q-layer.5": 1494.791, "encoder_q-layer.6": 1568.87, "encoder_q-layer.7": 1370.3473, "encoder_q-layer.8": 1543.7932, "encoder_q-layer.9": 1297.5515, "epoch": 0.53, "inbatch_neg_score": 0.3101, "inbatch_pos_score": 0.998, "learning_rate": 2.5444444444444442e-05, "loss": 2.883, "norm_diff": 0.0111, "norm_loss": 0.0, "num_token_doc": 66.7168, "num_token_overlap": 17.9641, "num_token_query": 52.2097, "num_token_union": 73.4455, "num_word_context": 202.3311, "num_word_doc": 49.8497, "num_word_query": 39.8263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2894.0463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3088, "query_norm": 1.443, "queue_k_norm": 1.4498, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2097, "sent_len_1": 66.7168, "sent_len_max_0": 128.0, "sent_len_max_1": 187.545, "stdk": 0.0484, "stdq": 0.046, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.8628, "doc_norm": 1.4569, "encoder_q-embeddings": 3117.4163, "encoder_q-layer.0": 2103.4788, "encoder_q-layer.1": 2523.1121, "encoder_q-layer.10": 1252.1208, "encoder_q-layer.11": 2908.5093, "encoder_q-layer.2": 2918.6897, "encoder_q-layer.3": 2651.4851, "encoder_q-layer.4": 2303.5938, "encoder_q-layer.5": 1650.9717, "encoder_q-layer.6": 1536.1451, "encoder_q-layer.7": 1374.2679, "encoder_q-layer.8": 1382.3965, "encoder_q-layer.9": 1222.1008, "epoch": 0.53, "inbatch_neg_score": 0.3145, "inbatch_pos_score": 1.0322, "learning_rate": 2.538888888888889e-05, "loss": 2.8628, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.8015, "num_token_overlap": 17.9726, "num_token_query": 52.1884, "num_token_union": 73.4257, "num_word_context": 202.332, "num_word_doc": 49.8187, "num_word_query": 39.7659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3387.9914, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.4648, "queue_k_norm": 1.4535, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1884, "sent_len_1": 66.8015, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5588, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.8689, "doc_norm": 1.4525, "encoder_q-embeddings": 1614.1613, "encoder_q-layer.0": 1082.9958, "encoder_q-layer.1": 1205.8248, "encoder_q-layer.10": 1299.686, "encoder_q-layer.11": 2920.0789, "encoder_q-layer.2": 1341.8651, "encoder_q-layer.3": 1303.2621, "encoder_q-layer.4": 1339.05, "encoder_q-layer.5": 1259.8929, "encoder_q-layer.6": 1353.3588, "encoder_q-layer.7": 1430.3439, "encoder_q-layer.8": 1592.9219, "encoder_q-layer.9": 1384.7034, "epoch": 0.53, "inbatch_neg_score": 0.3183, "inbatch_pos_score": 1.0068, "learning_rate": 2.5333333333333337e-05, "loss": 2.8689, "norm_diff": 0.0088, "norm_loss": 0.0, "num_token_doc": 66.532, "num_token_overlap": 17.8785, "num_token_query": 52.0221, "num_token_union": 73.3017, "num_word_context": 202.0531, "num_word_doc": 49.6289, "num_word_query": 39.6517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2336.1505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3174, "query_norm": 1.4531, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0221, "sent_len_1": 66.532, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1463, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.8588, "doc_norm": 1.4534, "encoder_q-embeddings": 1169.8101, "encoder_q-layer.0": 730.3197, "encoder_q-layer.1": 794.3738, "encoder_q-layer.10": 1366.657, "encoder_q-layer.11": 2965.8811, "encoder_q-layer.2": 881.2746, "encoder_q-layer.3": 928.5133, "encoder_q-layer.4": 947.693, "encoder_q-layer.5": 1006.7005, "encoder_q-layer.6": 1117.1847, "encoder_q-layer.7": 1291.2971, "encoder_q-layer.8": 1582.4867, "encoder_q-layer.9": 1278.3499, "epoch": 0.53, "inbatch_neg_score": 0.3198, "inbatch_pos_score": 1.0371, "learning_rate": 2.527777777777778e-05, "loss": 2.8588, "norm_diff": 0.0072, "norm_loss": 0.0, "num_token_doc": 66.7452, "num_token_overlap": 17.9957, "num_token_query": 52.1365, "num_token_union": 73.3686, "num_word_context": 202.2133, "num_word_doc": 49.7692, "num_word_query": 39.7306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2057.3942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3188, "query_norm": 1.4542, "queue_k_norm": 1.4518, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1365, "sent_len_1": 66.7452, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1763, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.8555, "doc_norm": 1.4451, "encoder_q-embeddings": 2556.4561, "encoder_q-layer.0": 1753.5171, "encoder_q-layer.1": 2053.7939, "encoder_q-layer.10": 1291.2649, "encoder_q-layer.11": 2830.7039, "encoder_q-layer.2": 2219.8225, "encoder_q-layer.3": 2316.7627, "encoder_q-layer.4": 2029.3815, "encoder_q-layer.5": 2116.1428, "encoder_q-layer.6": 2097.2849, "encoder_q-layer.7": 2226.7922, "encoder_q-layer.8": 2038.0334, "encoder_q-layer.9": 1241.6185, "epoch": 0.53, "inbatch_neg_score": 0.3237, "inbatch_pos_score": 1.0215, "learning_rate": 2.5222222222222225e-05, "loss": 2.8555, "norm_diff": 0.0113, "norm_loss": 0.0, "num_token_doc": 66.6701, "num_token_overlap": 18.0499, "num_token_query": 52.4226, "num_token_union": 73.4878, "num_word_context": 202.6306, "num_word_doc": 49.7828, "num_word_query": 39.9776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3172.5159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3223, "query_norm": 1.454, "queue_k_norm": 1.4498, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4226, "sent_len_1": 66.6701, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1725, "stdk": 0.0481, "stdq": 0.0461, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8707, "doc_norm": 1.4472, "encoder_q-embeddings": 3007.9539, "encoder_q-layer.0": 2102.0286, "encoder_q-layer.1": 2369.8914, "encoder_q-layer.10": 2572.9651, "encoder_q-layer.11": 5869.7925, "encoder_q-layer.2": 2455.0627, "encoder_q-layer.3": 2557.4077, "encoder_q-layer.4": 2605.6973, "encoder_q-layer.5": 2488.7356, "encoder_q-layer.6": 2435.1223, "encoder_q-layer.7": 2745.0447, "encoder_q-layer.8": 3199.9548, "encoder_q-layer.9": 2594.5251, "epoch": 0.53, "inbatch_neg_score": 0.3253, "inbatch_pos_score": 1.0215, "learning_rate": 2.5166666666666667e-05, "loss": 2.8707, "norm_diff": 0.0084, "norm_loss": 0.0, "num_token_doc": 66.6037, "num_token_overlap": 17.9539, "num_token_query": 52.0296, "num_token_union": 73.2371, "num_word_context": 201.8559, "num_word_doc": 49.6843, "num_word_query": 39.6396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4535.6281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3257, "query_norm": 1.4541, "queue_k_norm": 1.4522, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0296, "sent_len_1": 66.6037, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8363, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.8618, "doc_norm": 1.4542, "encoder_q-embeddings": 7071.5908, "encoder_q-layer.0": 5067.7935, "encoder_q-layer.1": 5707.5811, "encoder_q-layer.10": 2747.3428, "encoder_q-layer.11": 5543.6533, "encoder_q-layer.2": 8005.0698, "encoder_q-layer.3": 6978.6201, "encoder_q-layer.4": 7763.9902, "encoder_q-layer.5": 7326.7852, "encoder_q-layer.6": 6279.4868, "encoder_q-layer.7": 3891.1135, "encoder_q-layer.8": 3295.2856, "encoder_q-layer.9": 2426.9041, "epoch": 0.54, "inbatch_neg_score": 0.3239, "inbatch_pos_score": 1.0391, "learning_rate": 2.5111111111111113e-05, "loss": 2.8618, "norm_diff": 0.0122, "norm_loss": 0.0, "num_token_doc": 66.5195, "num_token_overlap": 17.9174, "num_token_query": 52.1098, "num_token_union": 73.3199, "num_word_context": 202.0356, "num_word_doc": 49.6319, "num_word_query": 39.7251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8702.678, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3252, "query_norm": 1.4459, "queue_k_norm": 1.4534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1098, "sent_len_1": 66.5195, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.1738, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.8403, "doc_norm": 1.4512, "encoder_q-embeddings": 4060.9446, "encoder_q-layer.0": 2703.7324, "encoder_q-layer.1": 3017.8174, "encoder_q-layer.10": 2373.9658, "encoder_q-layer.11": 5737.1294, "encoder_q-layer.2": 3699.626, "encoder_q-layer.3": 3578.874, "encoder_q-layer.4": 3438.2974, "encoder_q-layer.5": 3324.158, "encoder_q-layer.6": 3771.4521, "encoder_q-layer.7": 4350.7432, "encoder_q-layer.8": 4308.9941, "encoder_q-layer.9": 3049.0989, "epoch": 0.54, "inbatch_neg_score": 0.3292, "inbatch_pos_score": 1.042, "learning_rate": 2.5055555555555555e-05, "loss": 2.8403, "norm_diff": 0.0079, "norm_loss": 0.0, "num_token_doc": 66.5657, "num_token_overlap": 18.0315, "num_token_query": 52.3645, "num_token_union": 73.4295, "num_word_context": 202.1944, "num_word_doc": 49.6998, "num_word_query": 39.9493, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5571.7696, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3286, "query_norm": 1.4461, "queue_k_norm": 1.4523, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3645, "sent_len_1": 66.5657, "sent_len_max_0": 127.995, "sent_len_max_1": 187.9212, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.8515, "doc_norm": 1.4627, "encoder_q-embeddings": 2605.3176, "encoder_q-layer.0": 1742.8392, "encoder_q-layer.1": 2013.4567, "encoder_q-layer.10": 2777.3569, "encoder_q-layer.11": 6120.3301, "encoder_q-layer.2": 2258.6409, "encoder_q-layer.3": 2377.7961, "encoder_q-layer.4": 2519.9243, "encoder_q-layer.5": 2582.1277, "encoder_q-layer.6": 2502.8904, "encoder_q-layer.7": 2868.5779, "encoder_q-layer.8": 2991.8811, "encoder_q-layer.9": 2429.0742, "epoch": 0.54, "inbatch_neg_score": 0.3305, "inbatch_pos_score": 1.0498, "learning_rate": 2.5e-05, "loss": 2.8515, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.8258, "num_token_overlap": 18.0043, "num_token_query": 52.1959, "num_token_union": 73.4697, "num_word_context": 202.3845, "num_word_doc": 49.8687, "num_word_query": 39.7843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4437.8787, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.332, "query_norm": 1.4266, "queue_k_norm": 1.4525, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1959, "sent_len_1": 66.8258, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6463, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.8488, "doc_norm": 1.4548, "encoder_q-embeddings": 5575.5063, "encoder_q-layer.0": 4025.4905, "encoder_q-layer.1": 4407.1499, "encoder_q-layer.10": 2410.9321, "encoder_q-layer.11": 5713.5703, "encoder_q-layer.2": 5690.0288, "encoder_q-layer.3": 4561.3823, "encoder_q-layer.4": 4009.5144, "encoder_q-layer.5": 4012.8408, "encoder_q-layer.6": 3366.9656, "encoder_q-layer.7": 2878.9309, "encoder_q-layer.8": 2749.0127, "encoder_q-layer.9": 2332.5645, "epoch": 0.54, "inbatch_neg_score": 0.3388, "inbatch_pos_score": 1.0547, "learning_rate": 2.4944444444444447e-05, "loss": 2.8488, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.6607, "num_token_overlap": 18.0046, "num_token_query": 52.2393, "num_token_union": 73.4608, "num_word_context": 202.3068, "num_word_doc": 49.7805, "num_word_query": 39.8431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6394.5952, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3369, "query_norm": 1.445, "queue_k_norm": 1.4556, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2393, "sent_len_1": 66.6607, "sent_len_max_0": 128.0, "sent_len_max_1": 187.3175, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8557, "doc_norm": 1.4525, "encoder_q-embeddings": 2868.0444, "encoder_q-layer.0": 1972.0793, "encoder_q-layer.1": 2160.563, "encoder_q-layer.10": 2585.3994, "encoder_q-layer.11": 6248.0742, "encoder_q-layer.2": 2484.1033, "encoder_q-layer.3": 2492.4016, "encoder_q-layer.4": 2556.8137, "encoder_q-layer.5": 2564.0237, "encoder_q-layer.6": 2847.3398, "encoder_q-layer.7": 3432.5015, "encoder_q-layer.8": 4122.2856, "encoder_q-layer.9": 2861.5044, "epoch": 0.54, "inbatch_neg_score": 0.348, "inbatch_pos_score": 1.0459, "learning_rate": 2.488888888888889e-05, "loss": 2.8557, "norm_diff": 0.0168, "norm_loss": 0.0, "num_token_doc": 66.8106, "num_token_overlap": 18.0042, "num_token_query": 52.3715, "num_token_union": 73.5586, "num_word_context": 202.3276, "num_word_doc": 49.8605, "num_word_query": 39.9222, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4908.4168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3462, "query_norm": 1.4685, "queue_k_norm": 1.456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3715, "sent_len_1": 66.8106, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.3075, "stdk": 0.0482, "stdq": 0.047, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.8578, "doc_norm": 1.4597, "encoder_q-embeddings": 2653.3347, "encoder_q-layer.0": 1922.1531, "encoder_q-layer.1": 2057.2119, "encoder_q-layer.10": 1279.045, "encoder_q-layer.11": 2945.2881, "encoder_q-layer.2": 2290.7954, "encoder_q-layer.3": 2244.7646, "encoder_q-layer.4": 2287.624, "encoder_q-layer.5": 2668.6299, "encoder_q-layer.6": 2948.9163, "encoder_q-layer.7": 3461.7212, "encoder_q-layer.8": 2145.8171, "encoder_q-layer.9": 1170.493, "epoch": 0.54, "inbatch_neg_score": 0.3337, "inbatch_pos_score": 1.0518, "learning_rate": 2.4833333333333335e-05, "loss": 2.8578, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.7729, "num_token_overlap": 17.964, "num_token_query": 52.1973, "num_token_union": 73.4768, "num_word_context": 202.1414, "num_word_doc": 49.8013, "num_word_query": 39.7689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3585.8784, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3337, "query_norm": 1.4317, "queue_k_norm": 1.4576, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1973, "sent_len_1": 66.7729, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1725, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.8551, "doc_norm": 1.4608, "encoder_q-embeddings": 1269.1599, "encoder_q-layer.0": 822.4719, "encoder_q-layer.1": 896.7978, "encoder_q-layer.10": 1260.8673, "encoder_q-layer.11": 2802.8049, "encoder_q-layer.2": 1051.6761, "encoder_q-layer.3": 1193.9352, "encoder_q-layer.4": 1253.0505, "encoder_q-layer.5": 1073.368, "encoder_q-layer.6": 1101.2339, "encoder_q-layer.7": 1231.5024, "encoder_q-layer.8": 1513.1951, "encoder_q-layer.9": 1357.715, "epoch": 0.54, "inbatch_neg_score": 0.3295, "inbatch_pos_score": 1.0557, "learning_rate": 2.477777777777778e-05, "loss": 2.8551, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.5631, "num_token_overlap": 17.9555, "num_token_query": 52.0621, "num_token_union": 73.3079, "num_word_context": 202.2739, "num_word_doc": 49.7099, "num_word_query": 39.7066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2117.6715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3306, "query_norm": 1.4496, "queue_k_norm": 1.4564, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0621, "sent_len_1": 66.5631, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5813, "stdk": 0.0486, "stdq": 0.0468, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.8396, "doc_norm": 1.4563, "encoder_q-embeddings": 9134.5791, "encoder_q-layer.0": 6348.8804, "encoder_q-layer.1": 7503.2861, "encoder_q-layer.10": 1270.3506, "encoder_q-layer.11": 2774.8467, "encoder_q-layer.2": 8745.7969, "encoder_q-layer.3": 8638.0957, "encoder_q-layer.4": 9381.0439, "encoder_q-layer.5": 8019.0298, "encoder_q-layer.6": 6505.1807, "encoder_q-layer.7": 5194.8257, "encoder_q-layer.8": 2814.1277, "encoder_q-layer.9": 1312.5085, "epoch": 0.54, "inbatch_neg_score": 0.3301, "inbatch_pos_score": 1.0391, "learning_rate": 2.4722222222222223e-05, "loss": 2.8396, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 18.0267, "num_token_query": 52.2726, "num_token_union": 73.455, "num_word_context": 202.0403, "num_word_doc": 49.8213, "num_word_query": 39.8336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10138.9966, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3298, "query_norm": 1.4294, "queue_k_norm": 1.4575, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2726, "sent_len_1": 66.7492, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9212, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.8912, "doc_norm": 1.453, "encoder_q-embeddings": 1219.8608, "encoder_q-layer.0": 823.7864, "encoder_q-layer.1": 959.248, "encoder_q-layer.10": 1209.7412, "encoder_q-layer.11": 2655.2461, "encoder_q-layer.2": 986.4561, "encoder_q-layer.3": 973.1949, "encoder_q-layer.4": 986.2042, "encoder_q-layer.5": 1036.2653, "encoder_q-layer.6": 1095.9169, "encoder_q-layer.7": 1176.4296, "encoder_q-layer.8": 1365.6722, "encoder_q-layer.9": 1153.4799, "epoch": 0.54, "inbatch_neg_score": 0.3205, "inbatch_pos_score": 1.0273, "learning_rate": 2.466666666666667e-05, "loss": 2.8912, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.5536, "num_token_overlap": 17.875, "num_token_query": 51.8863, "num_token_union": 73.2023, "num_word_context": 202.0012, "num_word_doc": 49.6731, "num_word_query": 39.5749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1910.8166, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3201, "query_norm": 1.4267, "queue_k_norm": 1.4583, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 51.8863, "sent_len_1": 66.5536, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5175, "stdk": 0.0482, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.8311, "doc_norm": 1.4559, "encoder_q-embeddings": 1389.205, "encoder_q-layer.0": 974.7853, "encoder_q-layer.1": 1011.3016, "encoder_q-layer.10": 1267.4581, "encoder_q-layer.11": 2840.0181, "encoder_q-layer.2": 1178.1312, "encoder_q-layer.3": 1179.0435, "encoder_q-layer.4": 1135.8108, "encoder_q-layer.5": 1070.2913, "encoder_q-layer.6": 1094.6339, "encoder_q-layer.7": 1129.0166, "encoder_q-layer.8": 1323.7258, "encoder_q-layer.9": 1189.3251, "epoch": 0.54, "inbatch_neg_score": 0.3234, "inbatch_pos_score": 1.0264, "learning_rate": 2.461111111111111e-05, "loss": 2.8311, "norm_diff": 0.0225, "norm_loss": 0.0, "num_token_doc": 66.7228, "num_token_overlap": 18.0873, "num_token_query": 52.411, "num_token_union": 73.4726, "num_word_context": 202.3313, "num_word_doc": 49.8234, "num_word_query": 39.9947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2083.1693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.4333, "queue_k_norm": 1.4563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.411, "sent_len_1": 66.7228, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0863, "stdk": 0.0484, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.8588, "doc_norm": 1.4586, "encoder_q-embeddings": 1941.0323, "encoder_q-layer.0": 1294.9792, "encoder_q-layer.1": 1418.2892, "encoder_q-layer.10": 1400.4387, "encoder_q-layer.11": 2793.5596, "encoder_q-layer.2": 1485.2095, "encoder_q-layer.3": 1507.8317, "encoder_q-layer.4": 1461.3149, "encoder_q-layer.5": 1355.469, "encoder_q-layer.6": 1504.5623, "encoder_q-layer.7": 1498.3329, "encoder_q-layer.8": 1462.5123, "encoder_q-layer.9": 1204.5708, "epoch": 0.54, "inbatch_neg_score": 0.3156, "inbatch_pos_score": 1.0332, "learning_rate": 2.4555555555555557e-05, "loss": 2.8588, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.7311, "num_token_overlap": 17.9659, "num_token_query": 52.1514, "num_token_union": 73.3822, "num_word_context": 202.1621, "num_word_doc": 49.7597, "num_word_query": 39.7618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2455.5729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3157, "query_norm": 1.4383, "queue_k_norm": 1.4587, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1514, "sent_len_1": 66.7311, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7287, "stdk": 0.0484, "stdq": 0.0468, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.8365, "doc_norm": 1.4551, "encoder_q-embeddings": 1311.212, "encoder_q-layer.0": 828.4593, "encoder_q-layer.1": 904.8741, "encoder_q-layer.10": 1218.7184, "encoder_q-layer.11": 2787.084, "encoder_q-layer.2": 945.0436, "encoder_q-layer.3": 912.5176, "encoder_q-layer.4": 943.2042, "encoder_q-layer.5": 892.61, "encoder_q-layer.6": 1049.5146, "encoder_q-layer.7": 1126.2965, "encoder_q-layer.8": 1269.5659, "encoder_q-layer.9": 1164.2782, "epoch": 0.55, "inbatch_neg_score": 0.3206, "inbatch_pos_score": 1.0107, "learning_rate": 2.45e-05, "loss": 2.8365, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.7825, "num_token_overlap": 18.0811, "num_token_query": 52.3569, "num_token_union": 73.4347, "num_word_context": 202.1586, "num_word_doc": 49.8019, "num_word_query": 39.9336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1970.7967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.4283, "queue_k_norm": 1.4567, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3569, "sent_len_1": 66.7825, "sent_len_max_0": 128.0, "sent_len_max_1": 191.795, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.8415, "doc_norm": 1.4521, "encoder_q-embeddings": 2654.1333, "encoder_q-layer.0": 1969.8252, "encoder_q-layer.1": 1950.1671, "encoder_q-layer.10": 1332.2085, "encoder_q-layer.11": 3062.5073, "encoder_q-layer.2": 2245.217, "encoder_q-layer.3": 2513.4834, "encoder_q-layer.4": 2366.657, "encoder_q-layer.5": 1940.8818, "encoder_q-layer.6": 1865.4261, "encoder_q-layer.7": 1760.0139, "encoder_q-layer.8": 1674.3745, "encoder_q-layer.9": 1284.4974, "epoch": 0.55, "inbatch_neg_score": 0.3147, "inbatch_pos_score": 0.9951, "learning_rate": 2.4444444444444445e-05, "loss": 2.8415, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.6838, "num_token_overlap": 18.0512, "num_token_query": 52.3131, "num_token_union": 73.4475, "num_word_context": 202.4672, "num_word_doc": 49.7465, "num_word_query": 39.8594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3172.1145, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3125, "query_norm": 1.4146, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3131, "sent_len_1": 66.6838, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1425, "stdk": 0.0482, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.848, "doc_norm": 1.4558, "encoder_q-embeddings": 5305.4272, "encoder_q-layer.0": 3710.2224, "encoder_q-layer.1": 4159.2964, "encoder_q-layer.10": 1281.8768, "encoder_q-layer.11": 2812.4014, "encoder_q-layer.2": 4487.6333, "encoder_q-layer.3": 4211.2793, "encoder_q-layer.4": 4515.7979, "encoder_q-layer.5": 4121.0205, "encoder_q-layer.6": 3626.2795, "encoder_q-layer.7": 2696.4158, "encoder_q-layer.8": 2053.4231, "encoder_q-layer.9": 1298.282, "epoch": 0.55, "inbatch_neg_score": 0.3198, "inbatch_pos_score": 1.0225, "learning_rate": 2.4388888888888887e-05, "loss": 2.848, "norm_diff": 0.0347, "norm_loss": 0.0, "num_token_doc": 66.7253, "num_token_overlap": 18.0628, "num_token_query": 52.2749, "num_token_union": 73.4503, "num_word_context": 202.1854, "num_word_doc": 49.8388, "num_word_query": 39.8727, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5450.6596, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3196, "query_norm": 1.421, "queue_k_norm": 1.4581, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2749, "sent_len_1": 66.7253, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8313, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.8594, "doc_norm": 1.4539, "encoder_q-embeddings": 1459.2338, "encoder_q-layer.0": 972.9214, "encoder_q-layer.1": 1124.6677, "encoder_q-layer.10": 1228.6226, "encoder_q-layer.11": 2732.3938, "encoder_q-layer.2": 1246.8884, "encoder_q-layer.3": 1223.915, "encoder_q-layer.4": 1186.3665, "encoder_q-layer.5": 1111.0597, "encoder_q-layer.6": 1164.2441, "encoder_q-layer.7": 1269.1288, "encoder_q-layer.8": 1340.4995, "encoder_q-layer.9": 1168.1685, "epoch": 0.55, "inbatch_neg_score": 0.3125, "inbatch_pos_score": 1.0215, "learning_rate": 2.4333333333333336e-05, "loss": 2.8594, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 66.7772, "num_token_overlap": 18.0129, "num_token_query": 52.2338, "num_token_union": 73.4637, "num_word_context": 202.6724, "num_word_doc": 49.7955, "num_word_query": 39.8132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2107.728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.4255, "queue_k_norm": 1.4582, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2338, "sent_len_1": 66.7772, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0275, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.843, "doc_norm": 1.4579, "encoder_q-embeddings": 1876.3831, "encoder_q-layer.0": 1302.6487, "encoder_q-layer.1": 1401.7454, "encoder_q-layer.10": 1480.3893, "encoder_q-layer.11": 2921.5403, "encoder_q-layer.2": 1673.4729, "encoder_q-layer.3": 1655.0042, "encoder_q-layer.4": 1585.7566, "encoder_q-layer.5": 1325.7631, "encoder_q-layer.6": 1512.3024, "encoder_q-layer.7": 1923.5089, "encoder_q-layer.8": 2463.3804, "encoder_q-layer.9": 1478.5999, "epoch": 0.55, "inbatch_neg_score": 0.3124, "inbatch_pos_score": 1.0088, "learning_rate": 2.427777777777778e-05, "loss": 2.843, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.8022, "num_token_overlap": 18.0296, "num_token_query": 52.2864, "num_token_union": 73.4866, "num_word_context": 202.5888, "num_word_doc": 49.8408, "num_word_query": 39.8694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2806.5907, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.4238, "queue_k_norm": 1.4568, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2864, "sent_len_1": 66.8022, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9963, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 2.8294, "doc_norm": 1.4573, "encoder_q-embeddings": 1592.1392, "encoder_q-layer.0": 1100.803, "encoder_q-layer.1": 1235.7008, "encoder_q-layer.10": 1360.5812, "encoder_q-layer.11": 2843.7905, "encoder_q-layer.2": 1254.3257, "encoder_q-layer.3": 1195.7362, "encoder_q-layer.4": 1263.8545, "encoder_q-layer.5": 1253.5027, "encoder_q-layer.6": 1216.7203, "encoder_q-layer.7": 1242.5806, "encoder_q-layer.8": 1395.7058, "encoder_q-layer.9": 1244.8857, "epoch": 0.55, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0166, "learning_rate": 2.4222222222222224e-05, "loss": 2.8294, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.8034, "num_token_overlap": 18.0242, "num_token_query": 52.3443, "num_token_union": 73.5699, "num_word_context": 202.6207, "num_word_doc": 49.8423, "num_word_query": 39.8816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.1147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4423, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3443, "sent_len_1": 66.8034, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4613, "stdk": 0.0485, "stdq": 0.047, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.8271, "doc_norm": 1.4534, "encoder_q-embeddings": 1845.9407, "encoder_q-layer.0": 1343.8824, "encoder_q-layer.1": 1504.4784, "encoder_q-layer.10": 1184.8463, "encoder_q-layer.11": 2629.5862, "encoder_q-layer.2": 1698.0731, "encoder_q-layer.3": 1733.718, "encoder_q-layer.4": 1824.9043, "encoder_q-layer.5": 1791.6517, "encoder_q-layer.6": 1783.9492, "encoder_q-layer.7": 1469.864, "encoder_q-layer.8": 1375.8466, "encoder_q-layer.9": 1175.3071, "epoch": 0.55, "inbatch_neg_score": 0.3049, "inbatch_pos_score": 0.9971, "learning_rate": 2.4166666666666667e-05, "loss": 2.8271, "norm_diff": 0.0207, "norm_loss": 0.0, "num_token_doc": 66.7546, "num_token_overlap": 17.9843, "num_token_query": 52.2811, "num_token_union": 73.5121, "num_word_context": 202.2096, "num_word_doc": 49.818, "num_word_query": 39.8694, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2506.1367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3037, "query_norm": 1.4347, "queue_k_norm": 1.457, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2811, "sent_len_1": 66.7546, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.3137, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.8123, "doc_norm": 1.4564, "encoder_q-embeddings": 1987.37, "encoder_q-layer.0": 1364.6898, "encoder_q-layer.1": 1645.759, "encoder_q-layer.10": 1442.7324, "encoder_q-layer.11": 2886.5032, "encoder_q-layer.2": 1911.3938, "encoder_q-layer.3": 1971.3938, "encoder_q-layer.4": 1935.2748, "encoder_q-layer.5": 2187.9436, "encoder_q-layer.6": 1755.9135, "encoder_q-layer.7": 1539.0815, "encoder_q-layer.8": 1612.0808, "encoder_q-layer.9": 1285.6666, "epoch": 0.55, "inbatch_neg_score": 0.2996, "inbatch_pos_score": 0.9971, "learning_rate": 2.4111111111111113e-05, "loss": 2.8123, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.8304, "num_token_overlap": 18.0137, "num_token_query": 52.3611, "num_token_union": 73.5989, "num_word_context": 202.886, "num_word_doc": 49.8645, "num_word_query": 39.9194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2764.2618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.4169, "queue_k_norm": 1.4579, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3611, "sent_len_1": 66.8304, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5062, "stdk": 0.0484, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8268, "doc_norm": 1.4543, "encoder_q-embeddings": 6441.7368, "encoder_q-layer.0": 4222.6709, "encoder_q-layer.1": 4786.4077, "encoder_q-layer.10": 1354.0511, "encoder_q-layer.11": 2700.9868, "encoder_q-layer.2": 6525.7051, "encoder_q-layer.3": 7287.1553, "encoder_q-layer.4": 6411.4604, "encoder_q-layer.5": 5695.3174, "encoder_q-layer.6": 5680.1797, "encoder_q-layer.7": 5261.1616, "encoder_q-layer.8": 2396.3538, "encoder_q-layer.9": 1296.0986, "epoch": 0.55, "inbatch_neg_score": 0.2985, "inbatch_pos_score": 0.9985, "learning_rate": 2.4055555555555555e-05, "loss": 2.8268, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.9877, "num_token_overlap": 18.0355, "num_token_query": 52.2388, "num_token_union": 73.5599, "num_word_context": 202.2727, "num_word_doc": 49.9865, "num_word_query": 39.8238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7613.7576, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2974, "query_norm": 1.4267, "queue_k_norm": 1.4556, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2388, "sent_len_1": 66.9877, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7562, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.8371, "doc_norm": 1.451, "encoder_q-embeddings": 70439.5703, "encoder_q-layer.0": 47046.5938, "encoder_q-layer.1": 54670.0859, "encoder_q-layer.10": 1296.4906, "encoder_q-layer.11": 2792.356, "encoder_q-layer.2": 63117.8945, "encoder_q-layer.3": 64510.5273, "encoder_q-layer.4": 64594.0586, "encoder_q-layer.5": 53241.0078, "encoder_q-layer.6": 41536.3906, "encoder_q-layer.7": 31508.5762, "encoder_q-layer.8": 5468.0469, "encoder_q-layer.9": 1978.3177, "epoch": 0.55, "inbatch_neg_score": 0.2997, "inbatch_pos_score": 0.999, "learning_rate": 2.4e-05, "loss": 2.8371, "norm_diff": 0.0198, "norm_loss": 0.0, "num_token_doc": 66.7957, "num_token_overlap": 18.0151, "num_token_query": 52.1828, "num_token_union": 73.465, "num_word_context": 202.4493, "num_word_doc": 49.8473, "num_word_query": 39.7804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 71326.9545, "preclip_grad_norm_avg": 0.0007, "q@queue_neg_score": 0.2988, "query_norm": 1.4312, "queue_k_norm": 1.4545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1828, "sent_len_1": 66.7957, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.9338, "stdk": 0.0483, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.8441, "doc_norm": 1.4584, "encoder_q-embeddings": 1139.5892, "encoder_q-layer.0": 700.6183, "encoder_q-layer.1": 754.9235, "encoder_q-layer.10": 1258.8971, "encoder_q-layer.11": 2748.4653, "encoder_q-layer.2": 877.2374, "encoder_q-layer.3": 842.6167, "encoder_q-layer.4": 880.7103, "encoder_q-layer.5": 883.7446, "encoder_q-layer.6": 1005.0429, "encoder_q-layer.7": 1176.7589, "encoder_q-layer.8": 1330.826, "encoder_q-layer.9": 1147.5347, "epoch": 0.56, "inbatch_neg_score": 0.302, "inbatch_pos_score": 1.0127, "learning_rate": 2.3944444444444443e-05, "loss": 2.8441, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.8458, "num_token_overlap": 18.0429, "num_token_query": 52.3297, "num_token_union": 73.571, "num_word_context": 202.4248, "num_word_doc": 49.8598, "num_word_query": 39.9247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1893.5945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.4259, "queue_k_norm": 1.4559, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3297, "sent_len_1": 66.8458, "sent_len_max_0": 128.0, "sent_len_max_1": 191.11, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.8556, "doc_norm": 1.4529, "encoder_q-embeddings": 1790.986, "encoder_q-layer.0": 1138.421, "encoder_q-layer.1": 1406.6964, "encoder_q-layer.10": 1261.6337, "encoder_q-layer.11": 2807.6899, "encoder_q-layer.2": 1379.9041, "encoder_q-layer.3": 1217.1846, "encoder_q-layer.4": 1185.0325, "encoder_q-layer.5": 1172.8027, "encoder_q-layer.6": 1220.4451, "encoder_q-layer.7": 1290.5006, "encoder_q-layer.8": 1322.9454, "encoder_q-layer.9": 1248.9849, "epoch": 0.56, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 1.0244, "learning_rate": 2.3888888888888892e-05, "loss": 2.8556, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.8481, "num_token_overlap": 18.0463, "num_token_query": 52.1938, "num_token_union": 73.4223, "num_word_context": 202.6424, "num_word_doc": 49.872, "num_word_query": 39.7824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2253.9984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2981, "query_norm": 1.4161, "queue_k_norm": 1.4557, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1938, "sent_len_1": 66.8481, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6562, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.8111, "doc_norm": 1.4555, "encoder_q-embeddings": 1150.5496, "encoder_q-layer.0": 731.8836, "encoder_q-layer.1": 811.9027, "encoder_q-layer.10": 1289.4858, "encoder_q-layer.11": 2838.8545, "encoder_q-layer.2": 897.1129, "encoder_q-layer.3": 894.2648, "encoder_q-layer.4": 885.9703, "encoder_q-layer.5": 925.4968, "encoder_q-layer.6": 1022.1862, "encoder_q-layer.7": 1164.8553, "encoder_q-layer.8": 1303.2026, "encoder_q-layer.9": 1294.6106, "epoch": 0.56, "inbatch_neg_score": 0.3024, "inbatch_pos_score": 1.0176, "learning_rate": 2.3833333333333334e-05, "loss": 2.8111, "norm_diff": 0.0244, "norm_loss": 0.0, "num_token_doc": 66.76, "num_token_overlap": 17.9936, "num_token_query": 52.1979, "num_token_union": 73.411, "num_word_context": 202.4032, "num_word_doc": 49.7956, "num_word_query": 39.7894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1931.6641, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.302, "query_norm": 1.4328, "queue_k_norm": 1.4555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1979, "sent_len_1": 66.76, "sent_len_max_0": 128.0, "sent_len_max_1": 190.71, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.8424, "doc_norm": 1.4575, "encoder_q-embeddings": 1274.4197, "encoder_q-layer.0": 766.9784, "encoder_q-layer.1": 875.3175, "encoder_q-layer.10": 1323.394, "encoder_q-layer.11": 2919.2043, "encoder_q-layer.2": 969.6957, "encoder_q-layer.3": 991.2415, "encoder_q-layer.4": 1043.3104, "encoder_q-layer.5": 1103.9559, "encoder_q-layer.6": 1140.454, "encoder_q-layer.7": 1249.0281, "encoder_q-layer.8": 1495.5376, "encoder_q-layer.9": 1320.7942, "epoch": 0.56, "inbatch_neg_score": 0.3053, "inbatch_pos_score": 1.0078, "learning_rate": 2.377777777777778e-05, "loss": 2.8424, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.728, "num_token_overlap": 17.968, "num_token_query": 52.2297, "num_token_union": 73.4199, "num_word_context": 202.1814, "num_word_doc": 49.791, "num_word_query": 39.8218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2097.2577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3035, "query_norm": 1.4303, "queue_k_norm": 1.4539, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2297, "sent_len_1": 66.728, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8825, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.8406, "doc_norm": 1.4585, "encoder_q-embeddings": 3678.1353, "encoder_q-layer.0": 2592.3452, "encoder_q-layer.1": 3268.5376, "encoder_q-layer.10": 3215.0422, "encoder_q-layer.11": 5956.9263, "encoder_q-layer.2": 2895.4185, "encoder_q-layer.3": 2699.6245, "encoder_q-layer.4": 2613.157, "encoder_q-layer.5": 2673.0557, "encoder_q-layer.6": 2835.4749, "encoder_q-layer.7": 2966.8506, "encoder_q-layer.8": 3035.8, "encoder_q-layer.9": 2566.9224, "epoch": 0.56, "inbatch_neg_score": 0.3023, "inbatch_pos_score": 1.0117, "learning_rate": 2.3722222222222222e-05, "loss": 2.8406, "norm_diff": 0.0398, "norm_loss": 0.0, "num_token_doc": 66.8617, "num_token_overlap": 18.0253, "num_token_query": 52.2073, "num_token_union": 73.4705, "num_word_context": 202.5056, "num_word_doc": 49.892, "num_word_query": 39.7993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5037.8345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3025, "query_norm": 1.4187, "queue_k_norm": 1.453, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2073, "sent_len_1": 66.8617, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.905, "stdk": 0.0486, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.8342, "doc_norm": 1.4515, "encoder_q-embeddings": 1177.8929, "encoder_q-layer.0": 757.0454, "encoder_q-layer.1": 801.4745, "encoder_q-layer.10": 1213.8298, "encoder_q-layer.11": 2794.9653, "encoder_q-layer.2": 899.5739, "encoder_q-layer.3": 943.9528, "encoder_q-layer.4": 985.5266, "encoder_q-layer.5": 1013.4075, "encoder_q-layer.6": 1092.955, "encoder_q-layer.7": 1162.0336, "encoder_q-layer.8": 1346.207, "encoder_q-layer.9": 1173.9058, "epoch": 0.56, "inbatch_neg_score": 0.298, "inbatch_pos_score": 1.0195, "learning_rate": 2.3666666666666668e-05, "loss": 2.8342, "norm_diff": 0.0453, "norm_loss": 0.0, "num_token_doc": 66.5647, "num_token_overlap": 17.9434, "num_token_query": 52.0861, "num_token_union": 73.3013, "num_word_context": 201.9791, "num_word_doc": 49.6982, "num_word_query": 39.7306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1948.6499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.4063, "queue_k_norm": 1.4536, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0861, "sent_len_1": 66.5647, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8725, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 2.829, "doc_norm": 1.4469, "encoder_q-embeddings": 1520.8927, "encoder_q-layer.0": 1041.8173, "encoder_q-layer.1": 1164.6212, "encoder_q-layer.10": 1243.5302, "encoder_q-layer.11": 2754.4573, "encoder_q-layer.2": 1233.0182, "encoder_q-layer.3": 1237.6403, "encoder_q-layer.4": 1218.0382, "encoder_q-layer.5": 1217.4828, "encoder_q-layer.6": 1229.8621, "encoder_q-layer.7": 1239.0458, "encoder_q-layer.8": 1326.6062, "encoder_q-layer.9": 1193.8821, "epoch": 0.56, "inbatch_neg_score": 0.2929, "inbatch_pos_score": 0.9912, "learning_rate": 2.361111111111111e-05, "loss": 2.829, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.7274, "num_token_overlap": 18.0316, "num_token_query": 52.2201, "num_token_union": 73.4224, "num_word_context": 202.1438, "num_word_doc": 49.8018, "num_word_query": 39.8321, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2168.0492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.4067, "queue_k_norm": 1.4522, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2201, "sent_len_1": 66.7274, "sent_len_max_0": 127.98, "sent_len_max_1": 189.1887, "stdk": 0.0482, "stdq": 0.046, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.8247, "doc_norm": 1.4567, "encoder_q-embeddings": 1263.6051, "encoder_q-layer.0": 762.9982, "encoder_q-layer.1": 845.269, "encoder_q-layer.10": 1394.3102, "encoder_q-layer.11": 3119.4868, "encoder_q-layer.2": 918.6445, "encoder_q-layer.3": 898.958, "encoder_q-layer.4": 880.9574, "encoder_q-layer.5": 905.1733, "encoder_q-layer.6": 1021.9423, "encoder_q-layer.7": 1182.3385, "encoder_q-layer.8": 1515.5173, "encoder_q-layer.9": 1288.9841, "epoch": 0.56, "inbatch_neg_score": 0.2885, "inbatch_pos_score": 0.9951, "learning_rate": 2.3555555555555556e-05, "loss": 2.8247, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.6397, "num_token_overlap": 18.0475, "num_token_query": 52.2733, "num_token_union": 73.3753, "num_word_context": 202.4975, "num_word_doc": 49.7562, "num_word_query": 39.8544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2063.8646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.4116, "queue_k_norm": 1.4542, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2733, "sent_len_1": 66.6397, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.175, "stdk": 0.0486, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.8237, "doc_norm": 1.4455, "encoder_q-embeddings": 1688.2605, "encoder_q-layer.0": 1193.3145, "encoder_q-layer.1": 1341.369, "encoder_q-layer.10": 1359.9478, "encoder_q-layer.11": 2881.72, "encoder_q-layer.2": 1634.0975, "encoder_q-layer.3": 1523.0637, "encoder_q-layer.4": 1529.8827, "encoder_q-layer.5": 1601.7679, "encoder_q-layer.6": 1600.9712, "encoder_q-layer.7": 1486.5413, "encoder_q-layer.8": 1471.0768, "encoder_q-layer.9": 1225.8472, "epoch": 0.56, "inbatch_neg_score": 0.2848, "inbatch_pos_score": 0.9888, "learning_rate": 2.35e-05, "loss": 2.8237, "norm_diff": 0.0455, "norm_loss": 0.0, "num_token_doc": 66.626, "num_token_overlap": 17.961, "num_token_query": 52.1723, "num_token_union": 73.3402, "num_word_context": 202.212, "num_word_doc": 49.7026, "num_word_query": 39.7681, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2455.4136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.4, "queue_k_norm": 1.452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1723, "sent_len_1": 66.626, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.235, "stdk": 0.0481, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.8392, "doc_norm": 1.454, "encoder_q-embeddings": 2215.748, "encoder_q-layer.0": 1507.6591, "encoder_q-layer.1": 1805.0129, "encoder_q-layer.10": 1217.5519, "encoder_q-layer.11": 2818.2485, "encoder_q-layer.2": 2195.2422, "encoder_q-layer.3": 2209.7498, "encoder_q-layer.4": 2217.9634, "encoder_q-layer.5": 2189.1733, "encoder_q-layer.6": 1893.5259, "encoder_q-layer.7": 1734.9696, "encoder_q-layer.8": 1513.1665, "encoder_q-layer.9": 1190.576, "epoch": 0.56, "inbatch_neg_score": 0.284, "inbatch_pos_score": 1.0088, "learning_rate": 2.3444444444444448e-05, "loss": 2.8392, "norm_diff": 0.0354, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 18.0741, "num_token_query": 52.4008, "num_token_union": 73.5331, "num_word_context": 202.232, "num_word_doc": 49.8269, "num_word_query": 39.955, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2910.656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.4186, "queue_k_norm": 1.4519, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4008, "sent_len_1": 66.7993, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4712, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.8204, "doc_norm": 1.4488, "encoder_q-embeddings": 2160.8506, "encoder_q-layer.0": 1554.9867, "encoder_q-layer.1": 1593.0919, "encoder_q-layer.10": 1439.2175, "encoder_q-layer.11": 2848.9326, "encoder_q-layer.2": 1594.3013, "encoder_q-layer.3": 1442.0723, "encoder_q-layer.4": 1318.371, "encoder_q-layer.5": 1277.5798, "encoder_q-layer.6": 1357.8855, "encoder_q-layer.7": 1340.5334, "encoder_q-layer.8": 1440.5239, "encoder_q-layer.9": 1246.0635, "epoch": 0.57, "inbatch_neg_score": 0.2881, "inbatch_pos_score": 1.001, "learning_rate": 2.338888888888889e-05, "loss": 2.8204, "norm_diff": 0.0386, "norm_loss": 0.0, "num_token_doc": 66.8692, "num_token_overlap": 18.0329, "num_token_query": 52.3111, "num_token_union": 73.536, "num_word_context": 202.5268, "num_word_doc": 49.8981, "num_word_query": 39.868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2520.0503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2896, "query_norm": 1.4102, "queue_k_norm": 1.4524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3111, "sent_len_1": 66.8692, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.445, "stdk": 0.0484, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.8131, "doc_norm": 1.449, "encoder_q-embeddings": 1101.7195, "encoder_q-layer.0": 680.2471, "encoder_q-layer.1": 739.7314, "encoder_q-layer.10": 1211.4968, "encoder_q-layer.11": 2827.1353, "encoder_q-layer.2": 839.1953, "encoder_q-layer.3": 855.2874, "encoder_q-layer.4": 886.4125, "encoder_q-layer.5": 920.0115, "encoder_q-layer.6": 1048.3604, "encoder_q-layer.7": 1163.6958, "encoder_q-layer.8": 1544.61, "encoder_q-layer.9": 1307.3741, "epoch": 0.57, "inbatch_neg_score": 0.2838, "inbatch_pos_score": 1.0059, "learning_rate": 2.3333333333333336e-05, "loss": 2.8131, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.8949, "num_token_overlap": 18.0263, "num_token_query": 52.3332, "num_token_union": 73.5441, "num_word_context": 202.2756, "num_word_doc": 49.9052, "num_word_query": 39.8993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1918.8834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.4157, "queue_k_norm": 1.4507, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3332, "sent_len_1": 66.8949, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7825, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.8325, "doc_norm": 1.4438, "encoder_q-embeddings": 1597.3943, "encoder_q-layer.0": 1093.0111, "encoder_q-layer.1": 1191.9351, "encoder_q-layer.10": 1323.6326, "encoder_q-layer.11": 2726.7383, "encoder_q-layer.2": 1376.7483, "encoder_q-layer.3": 1367.9772, "encoder_q-layer.4": 1441.9413, "encoder_q-layer.5": 1529.545, "encoder_q-layer.6": 1517.7253, "encoder_q-layer.7": 1384.0248, "encoder_q-layer.8": 1483.8538, "encoder_q-layer.9": 1248.3759, "epoch": 0.57, "inbatch_neg_score": 0.2778, "inbatch_pos_score": 0.9609, "learning_rate": 2.3277777777777778e-05, "loss": 2.8325, "norm_diff": 0.0324, "norm_loss": 0.0, "num_token_doc": 66.7105, "num_token_overlap": 17.974, "num_token_query": 52.2303, "num_token_union": 73.4652, "num_word_context": 202.3913, "num_word_doc": 49.7905, "num_word_query": 39.8168, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2272.0047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2759, "query_norm": 1.4118, "queue_k_norm": 1.4504, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2303, "sent_len_1": 66.7105, "sent_len_max_0": 127.9925, "sent_len_max_1": 186.65, "stdk": 0.0482, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.8225, "doc_norm": 1.4484, "encoder_q-embeddings": 1442.9263, "encoder_q-layer.0": 900.5107, "encoder_q-layer.1": 1070.3352, "encoder_q-layer.10": 1355.1525, "encoder_q-layer.11": 2785.373, "encoder_q-layer.2": 1159.3219, "encoder_q-layer.3": 1262.1917, "encoder_q-layer.4": 1345.6619, "encoder_q-layer.5": 1473.8718, "encoder_q-layer.6": 1303.0107, "encoder_q-layer.7": 1152.6833, "encoder_q-layer.8": 1300.7402, "encoder_q-layer.9": 1191.9816, "epoch": 0.57, "inbatch_neg_score": 0.2762, "inbatch_pos_score": 0.9839, "learning_rate": 2.3222222222222224e-05, "loss": 2.8225, "norm_diff": 0.0345, "norm_loss": 0.0, "num_token_doc": 66.6529, "num_token_overlap": 17.9817, "num_token_query": 52.2346, "num_token_union": 73.3988, "num_word_context": 201.6953, "num_word_doc": 49.718, "num_word_query": 39.8519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2149.9942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2751, "query_norm": 1.4139, "queue_k_norm": 1.4495, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2346, "sent_len_1": 66.6529, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.6912, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.8281, "doc_norm": 1.4522, "encoder_q-embeddings": 7181.7178, "encoder_q-layer.0": 5077.9175, "encoder_q-layer.1": 5966.377, "encoder_q-layer.10": 1231.354, "encoder_q-layer.11": 2705.8352, "encoder_q-layer.2": 7157.3496, "encoder_q-layer.3": 5863.2925, "encoder_q-layer.4": 4876.5728, "encoder_q-layer.5": 3691.0613, "encoder_q-layer.6": 3312.1931, "encoder_q-layer.7": 3263.5513, "encoder_q-layer.8": 2477.9426, "encoder_q-layer.9": 1320.3617, "epoch": 0.57, "inbatch_neg_score": 0.2732, "inbatch_pos_score": 0.979, "learning_rate": 2.3166666666666666e-05, "loss": 2.8281, "norm_diff": 0.0536, "norm_loss": 0.0, "num_token_doc": 66.7219, "num_token_overlap": 18.0138, "num_token_query": 52.2476, "num_token_union": 73.4242, "num_word_context": 202.2612, "num_word_doc": 49.7994, "num_word_query": 39.826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7059.6859, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2729, "query_norm": 1.3986, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2476, "sent_len_1": 66.7219, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9575, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.8252, "doc_norm": 1.4521, "encoder_q-embeddings": 3709.8745, "encoder_q-layer.0": 2661.7021, "encoder_q-layer.1": 3069.3323, "encoder_q-layer.10": 1302.8878, "encoder_q-layer.11": 2802.6763, "encoder_q-layer.2": 3268.0527, "encoder_q-layer.3": 3328.7791, "encoder_q-layer.4": 3133.3145, "encoder_q-layer.5": 2723.377, "encoder_q-layer.6": 2600.5745, "encoder_q-layer.7": 2492.2456, "encoder_q-layer.8": 1689.6927, "encoder_q-layer.9": 1266.9391, "epoch": 0.57, "inbatch_neg_score": 0.2802, "inbatch_pos_score": 1.0127, "learning_rate": 2.3111111111111112e-05, "loss": 2.8252, "norm_diff": 0.0136, "norm_loss": 0.0, "num_token_doc": 66.5254, "num_token_overlap": 17.9507, "num_token_query": 52.1632, "num_token_union": 73.3365, "num_word_context": 201.9387, "num_word_doc": 49.6677, "num_word_query": 39.7495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4106.5618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2795, "query_norm": 1.4448, "queue_k_norm": 1.4495, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1632, "sent_len_1": 66.5254, "sent_len_max_0": 128.0, "sent_len_max_1": 186.84, "stdk": 0.0486, "stdq": 0.0473, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.8387, "doc_norm": 1.4488, "encoder_q-embeddings": 3289.6533, "encoder_q-layer.0": 2267.3369, "encoder_q-layer.1": 2557.5625, "encoder_q-layer.10": 1315.4022, "encoder_q-layer.11": 2840.8115, "encoder_q-layer.2": 2920.6331, "encoder_q-layer.3": 3207.4648, "encoder_q-layer.4": 3265.8567, "encoder_q-layer.5": 3038.7781, "encoder_q-layer.6": 3153.26, "encoder_q-layer.7": 3095.1182, "encoder_q-layer.8": 2344.3281, "encoder_q-layer.9": 1354.4203, "epoch": 0.57, "inbatch_neg_score": 0.2866, "inbatch_pos_score": 0.9727, "learning_rate": 2.3055555555555558e-05, "loss": 2.8387, "norm_diff": 0.0345, "norm_loss": 0.0, "num_token_doc": 66.5708, "num_token_overlap": 17.9946, "num_token_query": 52.26, "num_token_union": 73.3721, "num_word_context": 201.9947, "num_word_doc": 49.71, "num_word_query": 39.8685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4153.7323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2842, "query_norm": 1.4143, "queue_k_norm": 1.449, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.26, "sent_len_1": 66.5708, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3775, "stdk": 0.0484, "stdq": 0.0458, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.8366, "doc_norm": 1.4463, "encoder_q-embeddings": 2642.1621, "encoder_q-layer.0": 1776.2756, "encoder_q-layer.1": 2046.51, "encoder_q-layer.10": 1294.3392, "encoder_q-layer.11": 2728.7312, "encoder_q-layer.2": 2402.1553, "encoder_q-layer.3": 2686.5769, "encoder_q-layer.4": 2697.6313, "encoder_q-layer.5": 2427.5354, "encoder_q-layer.6": 2381.6592, "encoder_q-layer.7": 2356.1165, "encoder_q-layer.8": 1863.1853, "encoder_q-layer.9": 1266.765, "epoch": 0.57, "inbatch_neg_score": 0.2828, "inbatch_pos_score": 0.9756, "learning_rate": 2.3000000000000003e-05, "loss": 2.8366, "norm_diff": 0.0342, "norm_loss": 0.0, "num_token_doc": 66.7077, "num_token_overlap": 18.0186, "num_token_query": 52.1904, "num_token_union": 73.3759, "num_word_context": 202.4258, "num_word_doc": 49.8352, "num_word_query": 39.8022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3356.3367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.4121, "queue_k_norm": 1.4485, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1904, "sent_len_1": 66.7077, "sent_len_max_0": 127.995, "sent_len_max_1": 187.795, "stdk": 0.0484, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.8293, "doc_norm": 1.4468, "encoder_q-embeddings": 2261.8464, "encoder_q-layer.0": 1652.2263, "encoder_q-layer.1": 1942.0553, "encoder_q-layer.10": 1384.1018, "encoder_q-layer.11": 2858.925, "encoder_q-layer.2": 2323.5503, "encoder_q-layer.3": 2570.415, "encoder_q-layer.4": 2586.468, "encoder_q-layer.5": 2817.4072, "encoder_q-layer.6": 2524.9028, "encoder_q-layer.7": 1792.014, "encoder_q-layer.8": 1623.2028, "encoder_q-layer.9": 1324.2235, "epoch": 0.57, "inbatch_neg_score": 0.2867, "inbatch_pos_score": 0.9907, "learning_rate": 2.2944444444444446e-05, "loss": 2.8293, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.842, "num_token_overlap": 17.9941, "num_token_query": 52.3522, "num_token_union": 73.5946, "num_word_context": 202.5128, "num_word_doc": 49.9084, "num_word_query": 39.9014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3220.8688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2869, "query_norm": 1.4463, "queue_k_norm": 1.4487, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3522, "sent_len_1": 66.842, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.7937, "stdk": 0.0484, "stdq": 0.0468, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.837, "doc_norm": 1.4399, "encoder_q-embeddings": 3017.1167, "encoder_q-layer.0": 2179.3748, "encoder_q-layer.1": 2381.709, "encoder_q-layer.10": 1332.681, "encoder_q-layer.11": 2696.552, "encoder_q-layer.2": 2781.9001, "encoder_q-layer.3": 2722.4888, "encoder_q-layer.4": 3293.4255, "encoder_q-layer.5": 3137.0203, "encoder_q-layer.6": 3395.811, "encoder_q-layer.7": 2698.8276, "encoder_q-layer.8": 1737.936, "encoder_q-layer.9": 1301.2794, "epoch": 0.57, "inbatch_neg_score": 0.2862, "inbatch_pos_score": 0.9922, "learning_rate": 2.288888888888889e-05, "loss": 2.837, "norm_diff": 0.0101, "norm_loss": 0.0, "num_token_doc": 66.7183, "num_token_overlap": 17.9614, "num_token_query": 52.1426, "num_token_union": 73.4875, "num_word_context": 202.4093, "num_word_doc": 49.7979, "num_word_query": 39.7382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3852.805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2859, "query_norm": 1.4317, "queue_k_norm": 1.4478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1426, "sent_len_1": 66.7183, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.535, "stdk": 0.0481, "stdq": 0.046, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.8253, "doc_norm": 1.4431, "encoder_q-embeddings": 3178.2886, "encoder_q-layer.0": 2134.0037, "encoder_q-layer.1": 2539.4314, "encoder_q-layer.10": 1348.4032, "encoder_q-layer.11": 2726.8748, "encoder_q-layer.2": 2524.7588, "encoder_q-layer.3": 2508.9353, "encoder_q-layer.4": 2425.4844, "encoder_q-layer.5": 2340.3691, "encoder_q-layer.6": 2511.5278, "encoder_q-layer.7": 2234.1328, "encoder_q-layer.8": 1923.3625, "encoder_q-layer.9": 1318.1067, "epoch": 0.58, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 1.0, "learning_rate": 2.2833333333333334e-05, "loss": 2.8253, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.7244, "num_token_overlap": 17.9764, "num_token_query": 52.3212, "num_token_union": 73.57, "num_word_context": 202.7612, "num_word_doc": 49.8167, "num_word_query": 39.9066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3534.0147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.4436, "queue_k_norm": 1.4481, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3212, "sent_len_1": 66.7244, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0588, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.8311, "doc_norm": 1.4451, "encoder_q-embeddings": 2035.8997, "encoder_q-layer.0": 1389.7736, "encoder_q-layer.1": 1700.897, "encoder_q-layer.10": 1227.9719, "encoder_q-layer.11": 2842.6003, "encoder_q-layer.2": 2027.9722, "encoder_q-layer.3": 2000.1514, "encoder_q-layer.4": 2008.677, "encoder_q-layer.5": 1966.2292, "encoder_q-layer.6": 1598.2095, "encoder_q-layer.7": 1378.1503, "encoder_q-layer.8": 1415.6228, "encoder_q-layer.9": 1258.3412, "epoch": 0.58, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 1.0068, "learning_rate": 2.277777777777778e-05, "loss": 2.8311, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.7099, "num_token_overlap": 17.9924, "num_token_query": 52.1569, "num_token_union": 73.4235, "num_word_context": 202.3232, "num_word_doc": 49.7991, "num_word_query": 39.7542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2721.4839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.4458, "queue_k_norm": 1.4469, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1569, "sent_len_1": 66.7099, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9938, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.8209, "doc_norm": 1.4443, "encoder_q-embeddings": 1297.4258, "encoder_q-layer.0": 831.3081, "encoder_q-layer.1": 918.6305, "encoder_q-layer.10": 1260.5834, "encoder_q-layer.11": 2798.5967, "encoder_q-layer.2": 1048.6541, "encoder_q-layer.3": 1118.0748, "encoder_q-layer.4": 1080.3662, "encoder_q-layer.5": 1035.1545, "encoder_q-layer.6": 1159.2716, "encoder_q-layer.7": 1165.9078, "encoder_q-layer.8": 1352.8163, "encoder_q-layer.9": 1185.5892, "epoch": 0.58, "inbatch_neg_score": 0.3008, "inbatch_pos_score": 0.9893, "learning_rate": 2.2722222222222222e-05, "loss": 2.8209, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.7307, "num_token_overlap": 18.0491, "num_token_query": 52.2705, "num_token_union": 73.3929, "num_word_context": 202.0266, "num_word_doc": 49.7796, "num_word_query": 39.8389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1987.2117, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.4369, "queue_k_norm": 1.4464, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2705, "sent_len_1": 66.7307, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.4338, "stdk": 0.0483, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.831, "doc_norm": 1.4448, "encoder_q-embeddings": 2152.4614, "encoder_q-layer.0": 1596.7537, "encoder_q-layer.1": 1900.5078, "encoder_q-layer.10": 1300.5155, "encoder_q-layer.11": 2765.8118, "encoder_q-layer.2": 1701.3467, "encoder_q-layer.3": 1445.9888, "encoder_q-layer.4": 1329.4584, "encoder_q-layer.5": 1282.0078, "encoder_q-layer.6": 1297.4355, "encoder_q-layer.7": 1284.3052, "encoder_q-layer.8": 1405.9175, "encoder_q-layer.9": 1263.156, "epoch": 0.58, "inbatch_neg_score": 0.2966, "inbatch_pos_score": 1.0146, "learning_rate": 2.2666666666666668e-05, "loss": 2.831, "norm_diff": 0.0088, "norm_loss": 0.0, "num_token_doc": 67.0037, "num_token_overlap": 18.0071, "num_token_query": 52.125, "num_token_union": 73.5001, "num_word_context": 202.5864, "num_word_doc": 50.0165, "num_word_query": 39.7472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2552.9345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.44, "queue_k_norm": 1.4496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.125, "sent_len_1": 67.0037, "sent_len_max_0": 127.9963, "sent_len_max_1": 192.2438, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.8141, "doc_norm": 1.4517, "encoder_q-embeddings": 1137.4839, "encoder_q-layer.0": 721.2142, "encoder_q-layer.1": 759.6733, "encoder_q-layer.10": 1276.8175, "encoder_q-layer.11": 2746.4609, "encoder_q-layer.2": 843.9075, "encoder_q-layer.3": 850.5105, "encoder_q-layer.4": 843.2027, "encoder_q-layer.5": 844.8553, "encoder_q-layer.6": 989.3116, "encoder_q-layer.7": 1093.6924, "encoder_q-layer.8": 1325.7578, "encoder_q-layer.9": 1177.9144, "epoch": 0.58, "inbatch_neg_score": 0.3019, "inbatch_pos_score": 1.0098, "learning_rate": 2.2611111111111113e-05, "loss": 2.8141, "norm_diff": 0.0272, "norm_loss": 0.0, "num_token_doc": 66.8153, "num_token_overlap": 18.028, "num_token_query": 52.2142, "num_token_union": 73.4408, "num_word_context": 202.1198, "num_word_doc": 49.8533, "num_word_query": 39.7878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1864.5514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4245, "queue_k_norm": 1.4507, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2142, "sent_len_1": 66.8153, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1775, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.8201, "doc_norm": 1.4474, "encoder_q-embeddings": 2216.3386, "encoder_q-layer.0": 1379.6794, "encoder_q-layer.1": 1486.9774, "encoder_q-layer.10": 2622.3665, "encoder_q-layer.11": 5667.0913, "encoder_q-layer.2": 1618.1306, "encoder_q-layer.3": 1673.3348, "encoder_q-layer.4": 1698.6523, "encoder_q-layer.5": 1742.0293, "encoder_q-layer.6": 1997.0599, "encoder_q-layer.7": 2238.1794, "encoder_q-layer.8": 2774.7065, "encoder_q-layer.9": 2447.449, "epoch": 0.58, "inbatch_neg_score": 0.2995, "inbatch_pos_score": 1.0059, "learning_rate": 2.255555555555556e-05, "loss": 2.8201, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.7872, "num_token_overlap": 18.0321, "num_token_query": 52.3516, "num_token_union": 73.5093, "num_word_context": 202.5402, "num_word_doc": 49.8066, "num_word_query": 39.928, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3818.1055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.4268, "queue_k_norm": 1.4488, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3516, "sent_len_1": 66.7872, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7875, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.833, "doc_norm": 1.4494, "encoder_q-embeddings": 6403.7788, "encoder_q-layer.0": 4832.7759, "encoder_q-layer.1": 5145.1807, "encoder_q-layer.10": 2340.2344, "encoder_q-layer.11": 5483.3154, "encoder_q-layer.2": 4406.0332, "encoder_q-layer.3": 4237.6997, "encoder_q-layer.4": 4266.1851, "encoder_q-layer.5": 4520.6831, "encoder_q-layer.6": 3871.2961, "encoder_q-layer.7": 2924.0703, "encoder_q-layer.8": 2764.3169, "encoder_q-layer.9": 2246.1145, "epoch": 0.58, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 1.0127, "learning_rate": 2.25e-05, "loss": 2.833, "norm_diff": 0.0221, "norm_loss": 0.0, "num_token_doc": 66.8316, "num_token_overlap": 18.0315, "num_token_query": 52.16, "num_token_union": 73.41, "num_word_context": 202.5582, "num_word_doc": 49.8855, "num_word_query": 39.7495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6641.9023, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2983, "query_norm": 1.4281, "queue_k_norm": 1.4472, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.16, "sent_len_1": 66.8316, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8938, "stdk": 0.0485, "stdq": 0.0463, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.8243, "doc_norm": 1.4465, "encoder_q-embeddings": 3587.5444, "encoder_q-layer.0": 2338.2671, "encoder_q-layer.1": 2701.1011, "encoder_q-layer.10": 2398.4502, "encoder_q-layer.11": 5753.7876, "encoder_q-layer.2": 3078.4141, "encoder_q-layer.3": 3271.7109, "encoder_q-layer.4": 3349.1702, "encoder_q-layer.5": 2928.1682, "encoder_q-layer.6": 2855.571, "encoder_q-layer.7": 2614.3611, "encoder_q-layer.8": 2723.8691, "encoder_q-layer.9": 2432.498, "epoch": 0.58, "inbatch_neg_score": 0.2928, "inbatch_pos_score": 1.0049, "learning_rate": 2.2444444444444447e-05, "loss": 2.8243, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.6309, "num_token_overlap": 17.9075, "num_token_query": 51.9415, "num_token_union": 73.2299, "num_word_context": 202.0279, "num_word_doc": 49.6914, "num_word_query": 39.6138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4827.2472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.4393, "queue_k_norm": 1.4495, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 51.9415, "sent_len_1": 66.6309, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7075, "stdk": 0.0484, "stdq": 0.047, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.8357, "doc_norm": 1.4466, "encoder_q-embeddings": 3338.8298, "encoder_q-layer.0": 2312.145, "encoder_q-layer.1": 2593.6978, "encoder_q-layer.10": 2423.3979, "encoder_q-layer.11": 5768.9551, "encoder_q-layer.2": 2960.8494, "encoder_q-layer.3": 3153.8677, "encoder_q-layer.4": 3321.3289, "encoder_q-layer.5": 3451.156, "encoder_q-layer.6": 3153.1675, "encoder_q-layer.7": 3149.0933, "encoder_q-layer.8": 3301.6562, "encoder_q-layer.9": 2469.3965, "epoch": 0.58, "inbatch_neg_score": 0.2915, "inbatch_pos_score": 0.9985, "learning_rate": 2.238888888888889e-05, "loss": 2.8357, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.8316, "num_token_overlap": 17.9337, "num_token_query": 52.0123, "num_token_union": 73.3844, "num_word_context": 202.1944, "num_word_doc": 49.855, "num_word_query": 39.6387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4870.4322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2905, "query_norm": 1.436, "queue_k_norm": 1.4489, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0123, "sent_len_1": 66.8316, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0538, "stdk": 0.0483, "stdq": 0.047, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.804, "doc_norm": 1.4414, "encoder_q-embeddings": 2331.9185, "encoder_q-layer.0": 1542.7925, "encoder_q-layer.1": 1722.251, "encoder_q-layer.10": 2555.4924, "encoder_q-layer.11": 5598.4453, "encoder_q-layer.2": 1900.1604, "encoder_q-layer.3": 1923.2715, "encoder_q-layer.4": 2138.8757, "encoder_q-layer.5": 2241.4116, "encoder_q-layer.6": 2539.1201, "encoder_q-layer.7": 3236.6025, "encoder_q-layer.8": 3744.7969, "encoder_q-layer.9": 2909.2021, "epoch": 0.58, "inbatch_neg_score": 0.2856, "inbatch_pos_score": 0.9946, "learning_rate": 2.2333333333333335e-05, "loss": 2.804, "norm_diff": 0.0365, "norm_loss": 0.0, "num_token_doc": 66.7219, "num_token_overlap": 18.0872, "num_token_query": 52.2037, "num_token_union": 73.3662, "num_word_context": 202.1196, "num_word_doc": 49.7845, "num_word_query": 39.8256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4298.382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2859, "query_norm": 1.4049, "queue_k_norm": 1.45, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2037, "sent_len_1": 66.7219, "sent_len_max_0": 127.995, "sent_len_max_1": 188.635, "stdk": 0.0482, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.8134, "doc_norm": 1.4464, "encoder_q-embeddings": 1597.9396, "encoder_q-layer.0": 1072.4069, "encoder_q-layer.1": 1266.2302, "encoder_q-layer.10": 1300.9677, "encoder_q-layer.11": 2778.25, "encoder_q-layer.2": 1485.4176, "encoder_q-layer.3": 1506.7316, "encoder_q-layer.4": 1522.9387, "encoder_q-layer.5": 1516.3599, "encoder_q-layer.6": 1457.4261, "encoder_q-layer.7": 1431.6519, "encoder_q-layer.8": 1512.3916, "encoder_q-layer.9": 1276.5194, "epoch": 0.58, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 1.0137, "learning_rate": 2.2277777777777778e-05, "loss": 2.8134, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.9761, "num_token_overlap": 18.105, "num_token_query": 52.3752, "num_token_union": 73.5712, "num_word_context": 202.6933, "num_word_doc": 49.9746, "num_word_query": 39.9241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2314.9506, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4236, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3752, "sent_len_1": 66.9761, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6362, "stdk": 0.0484, "stdq": 0.0466, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 2.8252, "doc_norm": 1.4412, "encoder_q-embeddings": 1617.1364, "encoder_q-layer.0": 1147.4932, "encoder_q-layer.1": 1306.9896, "encoder_q-layer.10": 1311.951, "encoder_q-layer.11": 2953.5249, "encoder_q-layer.2": 1366.7688, "encoder_q-layer.3": 1355.802, "encoder_q-layer.4": 1256.9579, "encoder_q-layer.5": 1029.7085, "encoder_q-layer.6": 1197.3835, "encoder_q-layer.7": 1290.006, "encoder_q-layer.8": 1543.172, "encoder_q-layer.9": 1343.2515, "epoch": 0.59, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 0.9839, "learning_rate": 2.2222222222222223e-05, "loss": 2.8252, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.7578, "num_token_overlap": 18.0031, "num_token_query": 52.3017, "num_token_union": 73.4968, "num_word_context": 202.3691, "num_word_doc": 49.7749, "num_word_query": 39.8871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2277.7364, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4132, "queue_k_norm": 1.4489, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3017, "sent_len_1": 66.7578, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.9688, "stdk": 0.0482, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 28.969, "dev_samples_per_second": 2.209, "dev_steps_per_second": 0.035, "epoch": 0.59, "step": 60000, "test_accuracy": 93.3349609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3941124379634857, "test_doc_norm": 1.412374496459961, "test_inbatch_neg_score": 0.6232419013977051, "test_inbatch_pos_score": 1.5427241325378418, "test_loss": 0.3941124379634857, "test_loss_align": 1.032270908355713, "test_loss_unif": 3.834125518798828, "test_loss_unif_q@queue": 3.8341257572174072, "test_norm_diff": 0.03486579284071922, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.27379167079925537, "test_query_norm": 1.4472403526306152, "test_queue_k_norm": 1.4486746788024902, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04193231835961342, "test_stdq": 0.04248354956507683, "test_stdqueue_k": 0.04857736453413963, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.969, "dev_samples_per_second": 2.209, "dev_steps_per_second": 0.035, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.37716, "eval_beir-arguana_recall@10": 0.63442, "eval_beir-arguana_recall@100": 0.91181, "eval_beir-arguana_recall@20": 0.75605, "eval_beir-avg_ndcg@10": 0.3597008333333333, "eval_beir-avg_recall@10": 0.43060425, "eval_beir-avg_recall@100": 0.6143195, "eval_beir-avg_recall@20": 0.4911464166666667, "eval_beir-cqadupstack_ndcg@10": 0.2518283333333333, "eval_beir-cqadupstack_recall@10": 0.34261250000000004, "eval_beir-cqadupstack_recall@100": 0.578825, "eval_beir-cqadupstack_recall@20": 0.4126541666666667, "eval_beir-fiqa_ndcg@10": 0.24019, "eval_beir-fiqa_recall@10": 0.30247, "eval_beir-fiqa_recall@100": 0.55557, "eval_beir-fiqa_recall@20": 0.38368, "eval_beir-nfcorpus_ndcg@10": 0.28167, "eval_beir-nfcorpus_recall@10": 0.13771, "eval_beir-nfcorpus_recall@100": 0.26352, "eval_beir-nfcorpus_recall@20": 0.16559, "eval_beir-nq_ndcg@10": 0.25209, "eval_beir-nq_recall@10": 0.41783, "eval_beir-nq_recall@100": 0.76755, "eval_beir-nq_recall@20": 0.54193, "eval_beir-quora_ndcg@10": 0.7668, "eval_beir-quora_recall@10": 0.87609, "eval_beir-quora_recall@100": 0.9724, "eval_beir-quora_recall@20": 0.91904, "eval_beir-scidocs_ndcg@10": 0.14873, "eval_beir-scidocs_recall@10": 0.15553, "eval_beir-scidocs_recall@100": 0.35628, "eval_beir-scidocs_recall@20": 0.20978, "eval_beir-scifact_ndcg@10": 0.61686, "eval_beir-scifact_recall@10": 0.77067, "eval_beir-scifact_recall@100": 0.91822, "eval_beir-scifact_recall@20": 0.81689, "eval_beir-trec-covid_ndcg@10": 0.5078, "eval_beir-trec-covid_recall@10": 0.552, "eval_beir-trec-covid_recall@100": 0.405, "eval_beir-trec-covid_recall@20": 0.524, "eval_beir-webis-touche2020_ndcg@10": 0.15388, "eval_beir-webis-touche2020_recall@10": 0.11671, "eval_beir-webis-touche2020_recall@100": 0.41402, "eval_beir-webis-touche2020_recall@20": 0.18185, "eval_senteval-avg_sts": 0.7462285425583493, "eval_senteval-sickr_spearman": 0.7103462724364767, "eval_senteval-stsb_spearman": 0.7821108126802216, "step": 60000, "test_accuracy": 93.3349609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3941124379634857, "test_doc_norm": 1.412374496459961, "test_inbatch_neg_score": 0.6232419013977051, "test_inbatch_pos_score": 1.5427241325378418, "test_loss": 0.3941124379634857, "test_loss_align": 1.032270908355713, "test_loss_unif": 3.834125518798828, "test_loss_unif_q@queue": 3.8341257572174072, "test_norm_diff": 0.03486579284071922, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.27379167079925537, "test_query_norm": 1.4472403526306152, "test_queue_k_norm": 1.4486746788024902, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04193231835961342, "test_stdq": 0.04248354956507683, "test_stdqueue_k": 0.04857736453413963, "test_stdqueue_q": 0.0 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.8124, "doc_norm": 1.4498, "encoder_q-embeddings": 3029.668, "encoder_q-layer.0": 2210.2476, "encoder_q-layer.1": 2477.9421, "encoder_q-layer.10": 1283.1436, "encoder_q-layer.11": 2673.9834, "encoder_q-layer.2": 2606.5669, "encoder_q-layer.3": 2722.4456, "encoder_q-layer.4": 3054.3442, "encoder_q-layer.5": 3312.5923, "encoder_q-layer.6": 3655.5081, "encoder_q-layer.7": 4478.9116, "encoder_q-layer.8": 3355.0222, "encoder_q-layer.9": 1422.012, "epoch": 0.59, "inbatch_neg_score": 0.2868, "inbatch_pos_score": 0.9932, "learning_rate": 2.216666666666667e-05, "loss": 2.8124, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.6721, "num_token_overlap": 17.9926, "num_token_query": 52.3252, "num_token_union": 73.4931, "num_word_context": 202.3758, "num_word_doc": 49.7661, "num_word_query": 39.8993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4412.5622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2871, "query_norm": 1.3983, "queue_k_norm": 1.4494, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3252, "sent_len_1": 66.6721, "sent_len_max_0": 128.0, "sent_len_max_1": 187.7763, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.8349, "doc_norm": 1.4493, "encoder_q-embeddings": 3212.9399, "encoder_q-layer.0": 2295.1658, "encoder_q-layer.1": 2562.5054, "encoder_q-layer.10": 1406.9711, "encoder_q-layer.11": 2705.4211, "encoder_q-layer.2": 2669.3193, "encoder_q-layer.3": 2476.7834, "encoder_q-layer.4": 2632.6606, "encoder_q-layer.5": 2863.0129, "encoder_q-layer.6": 3580.4595, "encoder_q-layer.7": 3912.9612, "encoder_q-layer.8": 5072.1128, "encoder_q-layer.9": 2556.6035, "epoch": 0.59, "inbatch_neg_score": 0.2886, "inbatch_pos_score": 1.0029, "learning_rate": 2.211111111111111e-05, "loss": 2.8349, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.6232, "num_token_overlap": 17.9063, "num_token_query": 52.089, "num_token_union": 73.3507, "num_word_context": 202.2782, "num_word_doc": 49.7271, "num_word_query": 39.7086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4653.6449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2893, "query_norm": 1.4109, "queue_k_norm": 1.4477, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.089, "sent_len_1": 66.6232, "sent_len_max_0": 127.995, "sent_len_max_1": 189.535, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 2.8228, "doc_norm": 1.4467, "encoder_q-embeddings": 1793.0269, "encoder_q-layer.0": 1222.5189, "encoder_q-layer.1": 1370.9314, "encoder_q-layer.10": 1399.3448, "encoder_q-layer.11": 2780.6113, "encoder_q-layer.2": 1610.2605, "encoder_q-layer.3": 1655.1068, "encoder_q-layer.4": 1619.9746, "encoder_q-layer.5": 1639.9896, "encoder_q-layer.6": 1802.2726, "encoder_q-layer.7": 1817.9929, "encoder_q-layer.8": 1940.1862, "encoder_q-layer.9": 1391.1979, "epoch": 0.59, "inbatch_neg_score": 0.278, "inbatch_pos_score": 0.979, "learning_rate": 2.2055555555555557e-05, "loss": 2.8228, "norm_diff": 0.0295, "norm_loss": 0.0, "num_token_doc": 66.7408, "num_token_overlap": 17.9526, "num_token_query": 52.1651, "num_token_union": 73.4139, "num_word_context": 202.6482, "num_word_doc": 49.8381, "num_word_query": 39.7595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2578.7927, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.4172, "queue_k_norm": 1.449, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1651, "sent_len_1": 66.7408, "sent_len_max_0": 127.98, "sent_len_max_1": 191.8212, "stdk": 0.0484, "stdq": 0.0467, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.8128, "doc_norm": 1.453, "encoder_q-embeddings": 1561.9613, "encoder_q-layer.0": 986.7236, "encoder_q-layer.1": 1162.1443, "encoder_q-layer.10": 1276.427, "encoder_q-layer.11": 2527.0479, "encoder_q-layer.2": 1420.9418, "encoder_q-layer.3": 1374.2987, "encoder_q-layer.4": 1315.8064, "encoder_q-layer.5": 1254.4199, "encoder_q-layer.6": 1250.901, "encoder_q-layer.7": 1260.8096, "encoder_q-layer.8": 1385.6547, "encoder_q-layer.9": 1141.0924, "epoch": 0.59, "inbatch_neg_score": 0.286, "inbatch_pos_score": 1.0322, "learning_rate": 2.2000000000000003e-05, "loss": 2.8128, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.7893, "num_token_overlap": 18.0197, "num_token_query": 52.1045, "num_token_union": 73.3864, "num_word_context": 202.1687, "num_word_doc": 49.8246, "num_word_query": 39.6972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2152.1714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2859, "query_norm": 1.4282, "queue_k_norm": 1.4486, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1045, "sent_len_1": 66.7893, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.3675, "stdk": 0.0487, "stdq": 0.047, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.8186, "doc_norm": 1.4445, "encoder_q-embeddings": 1136.5844, "encoder_q-layer.0": 736.9612, "encoder_q-layer.1": 842.9706, "encoder_q-layer.10": 1373.1835, "encoder_q-layer.11": 2698.7983, "encoder_q-layer.2": 953.6512, "encoder_q-layer.3": 958.9674, "encoder_q-layer.4": 929.8972, "encoder_q-layer.5": 897.4481, "encoder_q-layer.6": 1002.4935, "encoder_q-layer.7": 1079.6381, "encoder_q-layer.8": 1212.6173, "encoder_q-layer.9": 1147.8992, "epoch": 0.59, "inbatch_neg_score": 0.2874, "inbatch_pos_score": 1.0283, "learning_rate": 2.1944444444444445e-05, "loss": 2.8186, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.6362, "num_token_overlap": 17.9841, "num_token_query": 52.3543, "num_token_union": 73.4845, "num_word_context": 202.2246, "num_word_doc": 49.7374, "num_word_query": 39.887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1832.6108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2874, "query_norm": 1.4268, "queue_k_norm": 1.4472, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3543, "sent_len_1": 66.6362, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3462, "stdk": 0.0483, "stdq": 0.0469, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.8179, "doc_norm": 1.4418, "encoder_q-embeddings": 1516.6171, "encoder_q-layer.0": 1013.5807, "encoder_q-layer.1": 1169.2587, "encoder_q-layer.10": 1239.6531, "encoder_q-layer.11": 2773.406, "encoder_q-layer.2": 1320.6389, "encoder_q-layer.3": 1374.9778, "encoder_q-layer.4": 1392.1078, "encoder_q-layer.5": 1252.7574, "encoder_q-layer.6": 1315.1204, "encoder_q-layer.7": 1397.6167, "encoder_q-layer.8": 1415.9934, "encoder_q-layer.9": 1129.8545, "epoch": 0.59, "inbatch_neg_score": 0.2823, "inbatch_pos_score": 0.9907, "learning_rate": 2.188888888888889e-05, "loss": 2.8179, "norm_diff": 0.0496, "norm_loss": 0.0, "num_token_doc": 66.761, "num_token_overlap": 17.991, "num_token_query": 52.2254, "num_token_union": 73.4028, "num_word_context": 202.3756, "num_word_doc": 49.7965, "num_word_query": 39.814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2210.9627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.3921, "queue_k_norm": 1.4484, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2254, "sent_len_1": 66.761, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9263, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7951, "doc_norm": 1.4466, "encoder_q-embeddings": 5890.0396, "encoder_q-layer.0": 4137.0312, "encoder_q-layer.1": 4809.73, "encoder_q-layer.10": 1335.0387, "encoder_q-layer.11": 2818.8848, "encoder_q-layer.2": 5599.4834, "encoder_q-layer.3": 5827.0229, "encoder_q-layer.4": 5306.2212, "encoder_q-layer.5": 5372.4292, "encoder_q-layer.6": 5110.8506, "encoder_q-layer.7": 3766.5718, "encoder_q-layer.8": 2190.0876, "encoder_q-layer.9": 1295.938, "epoch": 0.59, "inbatch_neg_score": 0.282, "inbatch_pos_score": 0.9883, "learning_rate": 2.1833333333333333e-05, "loss": 2.7951, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.9862, "num_token_overlap": 18.0698, "num_token_query": 52.2976, "num_token_union": 73.5654, "num_word_context": 202.3037, "num_word_doc": 49.9903, "num_word_query": 39.8542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6558.0188, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2808, "query_norm": 1.4043, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2976, "sent_len_1": 66.9862, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6575, "stdk": 0.0484, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.8088, "doc_norm": 1.4414, "encoder_q-embeddings": 1281.7321, "encoder_q-layer.0": 853.9232, "encoder_q-layer.1": 957.6357, "encoder_q-layer.10": 1287.5851, "encoder_q-layer.11": 2756.022, "encoder_q-layer.2": 1111.1287, "encoder_q-layer.3": 1121.1813, "encoder_q-layer.4": 1167.5233, "encoder_q-layer.5": 1201.4493, "encoder_q-layer.6": 1214.22, "encoder_q-layer.7": 1232.8815, "encoder_q-layer.8": 1337.6627, "encoder_q-layer.9": 1245.0889, "epoch": 0.59, "inbatch_neg_score": 0.2827, "inbatch_pos_score": 1.0117, "learning_rate": 2.177777777777778e-05, "loss": 2.8088, "norm_diff": 0.0262, "norm_loss": 0.0, "num_token_doc": 66.7213, "num_token_overlap": 18.0218, "num_token_query": 52.2685, "num_token_union": 73.4421, "num_word_context": 202.1377, "num_word_doc": 49.7959, "num_word_query": 39.8644, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2034.8253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.4153, "queue_k_norm": 1.4496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2685, "sent_len_1": 66.7213, "sent_len_max_0": 128.0, "sent_len_max_1": 188.19, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7937, "doc_norm": 1.4475, "encoder_q-embeddings": 1973.5426, "encoder_q-layer.0": 1383.3936, "encoder_q-layer.1": 1529.3097, "encoder_q-layer.10": 1186.9946, "encoder_q-layer.11": 2711.3362, "encoder_q-layer.2": 1765.2489, "encoder_q-layer.3": 1616.0939, "encoder_q-layer.4": 1385.3912, "encoder_q-layer.5": 1210.7451, "encoder_q-layer.6": 1427.2909, "encoder_q-layer.7": 1319.6897, "encoder_q-layer.8": 1382.9908, "encoder_q-layer.9": 1196.783, "epoch": 0.59, "inbatch_neg_score": 0.2802, "inbatch_pos_score": 0.9971, "learning_rate": 2.1722222222222225e-05, "loss": 2.7937, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.7203, "num_token_overlap": 18.0741, "num_token_query": 52.1937, "num_token_union": 73.2918, "num_word_context": 202.5164, "num_word_doc": 49.7948, "num_word_query": 39.8078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2438.9608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2786, "query_norm": 1.4018, "queue_k_norm": 1.4489, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1937, "sent_len_1": 66.7203, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.4375, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7962, "doc_norm": 1.4433, "encoder_q-embeddings": 1457.6754, "encoder_q-layer.0": 1011.8529, "encoder_q-layer.1": 1170.8551, "encoder_q-layer.10": 1320.3756, "encoder_q-layer.11": 2798.2046, "encoder_q-layer.2": 1312.625, "encoder_q-layer.3": 1229.6631, "encoder_q-layer.4": 1138.3469, "encoder_q-layer.5": 1139.3086, "encoder_q-layer.6": 1295.1328, "encoder_q-layer.7": 1410.8481, "encoder_q-layer.8": 1505.8381, "encoder_q-layer.9": 1238.6698, "epoch": 0.6, "inbatch_neg_score": 0.2821, "inbatch_pos_score": 0.9941, "learning_rate": 2.1666666666666667e-05, "loss": 2.7962, "norm_diff": 0.0195, "norm_loss": 0.0, "num_token_doc": 66.6662, "num_token_overlap": 18.014, "num_token_query": 52.2651, "num_token_union": 73.4429, "num_word_context": 202.629, "num_word_doc": 49.7815, "num_word_query": 39.8398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2201.7043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.4269, "queue_k_norm": 1.4488, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2651, "sent_len_1": 66.6662, "sent_len_max_0": 128.0, "sent_len_max_1": 188.73, "stdk": 0.0483, "stdq": 0.0468, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.8176, "doc_norm": 1.444, "encoder_q-embeddings": 13919.5, "encoder_q-layer.0": 9773.3271, "encoder_q-layer.1": 11635.9902, "encoder_q-layer.10": 1313.7736, "encoder_q-layer.11": 2767.7542, "encoder_q-layer.2": 13440.8672, "encoder_q-layer.3": 13657.3809, "encoder_q-layer.4": 14704.3564, "encoder_q-layer.5": 14382.2266, "encoder_q-layer.6": 17962.5605, "encoder_q-layer.7": 13627.6602, "encoder_q-layer.8": 6137.8267, "encoder_q-layer.9": 1644.2944, "epoch": 0.6, "inbatch_neg_score": 0.2825, "inbatch_pos_score": 0.9883, "learning_rate": 2.1611111111111113e-05, "loss": 2.8176, "norm_diff": 0.0258, "norm_loss": 0.0, "num_token_doc": 66.7628, "num_token_overlap": 17.952, "num_token_query": 52.1385, "num_token_union": 73.3888, "num_word_context": 202.3305, "num_word_doc": 49.7815, "num_word_query": 39.7302, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17482.216, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2808, "query_norm": 1.4208, "queue_k_norm": 1.448, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1385, "sent_len_1": 66.7628, "sent_len_max_0": 128.0, "sent_len_max_1": 189.65, "stdk": 0.0483, "stdq": 0.0466, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.8064, "doc_norm": 1.443, "encoder_q-embeddings": 1465.5568, "encoder_q-layer.0": 931.5396, "encoder_q-layer.1": 1048.4071, "encoder_q-layer.10": 1271.1881, "encoder_q-layer.11": 2809.1621, "encoder_q-layer.2": 1220.3062, "encoder_q-layer.3": 1209.1871, "encoder_q-layer.4": 1219.4229, "encoder_q-layer.5": 1338.0438, "encoder_q-layer.6": 1258.6505, "encoder_q-layer.7": 1147.5348, "encoder_q-layer.8": 1318.4968, "encoder_q-layer.9": 1206.6721, "epoch": 0.6, "inbatch_neg_score": 0.2821, "inbatch_pos_score": 0.9834, "learning_rate": 2.1555555555555555e-05, "loss": 2.8064, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.8222, "num_token_overlap": 18.0043, "num_token_query": 52.3577, "num_token_union": 73.5648, "num_word_context": 202.308, "num_word_doc": 49.8787, "num_word_query": 39.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2148.9956, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.4036, "queue_k_norm": 1.4478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3577, "sent_len_1": 66.8222, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1562, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.7834, "doc_norm": 1.4491, "encoder_q-embeddings": 1501.463, "encoder_q-layer.0": 957.8265, "encoder_q-layer.1": 1153.7877, "encoder_q-layer.10": 1311.3937, "encoder_q-layer.11": 2562.1057, "encoder_q-layer.2": 1296.1482, "encoder_q-layer.3": 1409.8923, "encoder_q-layer.4": 1436.0499, "encoder_q-layer.5": 1343.5483, "encoder_q-layer.6": 1553.3018, "encoder_q-layer.7": 1503.8507, "encoder_q-layer.8": 1624.3197, "encoder_q-layer.9": 1264.4626, "epoch": 0.6, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 1.0186, "learning_rate": 2.15e-05, "loss": 2.7834, "norm_diff": 0.0185, "norm_loss": 0.0, "num_token_doc": 66.7343, "num_token_overlap": 18.0942, "num_token_query": 52.2999, "num_token_union": 73.4362, "num_word_context": 202.0443, "num_word_doc": 49.8406, "num_word_query": 39.8692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2212.6178, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2754, "query_norm": 1.4325, "queue_k_norm": 1.4477, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2999, "sent_len_1": 66.7343, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.1325, "stdk": 0.0486, "stdq": 0.047, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.8094, "doc_norm": 1.448, "encoder_q-embeddings": 1093.7123, "encoder_q-layer.0": 708.6924, "encoder_q-layer.1": 787.2784, "encoder_q-layer.10": 1210.7358, "encoder_q-layer.11": 2761.4912, "encoder_q-layer.2": 875.0381, "encoder_q-layer.3": 856.4417, "encoder_q-layer.4": 875.1245, "encoder_q-layer.5": 933.3562, "encoder_q-layer.6": 1032.124, "encoder_q-layer.7": 1112.6289, "encoder_q-layer.8": 1304.5007, "encoder_q-layer.9": 1160.5044, "epoch": 0.6, "inbatch_neg_score": 0.2792, "inbatch_pos_score": 0.9937, "learning_rate": 2.1444444444444443e-05, "loss": 2.8094, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.7863, "num_token_overlap": 18.0317, "num_token_query": 52.1959, "num_token_union": 73.4335, "num_word_context": 202.1623, "num_word_doc": 49.845, "num_word_query": 39.784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1885.3456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2791, "query_norm": 1.4129, "queue_k_norm": 1.4454, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1959, "sent_len_1": 66.7863, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.1937, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 2.8106, "doc_norm": 1.4534, "encoder_q-embeddings": 1576.4465, "encoder_q-layer.0": 1026.9634, "encoder_q-layer.1": 1061.7163, "encoder_q-layer.10": 1327.4613, "encoder_q-layer.11": 2969.6023, "encoder_q-layer.2": 1261.4629, "encoder_q-layer.3": 1353.2994, "encoder_q-layer.4": 1395.3213, "encoder_q-layer.5": 1356.196, "encoder_q-layer.6": 1440.1335, "encoder_q-layer.7": 1474.6445, "encoder_q-layer.8": 1657.8638, "encoder_q-layer.9": 1335.4696, "epoch": 0.6, "inbatch_neg_score": 0.2821, "inbatch_pos_score": 0.9746, "learning_rate": 2.138888888888889e-05, "loss": 2.8106, "norm_diff": 0.048, "norm_loss": 0.0, "num_token_doc": 66.9414, "num_token_overlap": 18.0415, "num_token_query": 52.2754, "num_token_union": 73.5552, "num_word_context": 202.4461, "num_word_doc": 49.9018, "num_word_query": 39.8313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2349.0529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4054, "queue_k_norm": 1.446, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2754, "sent_len_1": 66.9414, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.93, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.8013, "doc_norm": 1.4367, "encoder_q-embeddings": 1169.9513, "encoder_q-layer.0": 766.2446, "encoder_q-layer.1": 843.3297, "encoder_q-layer.10": 1332.5552, "encoder_q-layer.11": 2691.8936, "encoder_q-layer.2": 945.641, "encoder_q-layer.3": 949.5907, "encoder_q-layer.4": 914.7134, "encoder_q-layer.5": 901.7709, "encoder_q-layer.6": 1043.0779, "encoder_q-layer.7": 1254.0088, "encoder_q-layer.8": 1336.6532, "encoder_q-layer.9": 1219.7039, "epoch": 0.6, "inbatch_neg_score": 0.2815, "inbatch_pos_score": 0.9902, "learning_rate": 2.1333333333333335e-05, "loss": 2.8013, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.7696, "num_token_overlap": 18.05, "num_token_query": 52.2047, "num_token_union": 73.3937, "num_word_context": 202.2452, "num_word_doc": 49.8324, "num_word_query": 39.7878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1895.7152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.282, "query_norm": 1.4225, "queue_k_norm": 1.446, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2047, "sent_len_1": 66.7696, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1612, "stdk": 0.0481, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7804, "doc_norm": 1.4434, "encoder_q-embeddings": 1821.2662, "encoder_q-layer.0": 1307.9642, "encoder_q-layer.1": 1542.7101, "encoder_q-layer.10": 1284.3521, "encoder_q-layer.11": 2744.0439, "encoder_q-layer.2": 1659.02, "encoder_q-layer.3": 1707.4023, "encoder_q-layer.4": 1845.231, "encoder_q-layer.5": 1763.7032, "encoder_q-layer.6": 1793.4106, "encoder_q-layer.7": 1692.3889, "encoder_q-layer.8": 1600.9602, "encoder_q-layer.9": 1260.0249, "epoch": 0.6, "inbatch_neg_score": 0.2792, "inbatch_pos_score": 0.9902, "learning_rate": 2.127777777777778e-05, "loss": 2.7804, "norm_diff": 0.0221, "norm_loss": 0.0, "num_token_doc": 66.8939, "num_token_overlap": 18.0253, "num_token_query": 52.163, "num_token_union": 73.4164, "num_word_context": 202.1993, "num_word_doc": 49.8962, "num_word_query": 39.7668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2572.2374, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2773, "query_norm": 1.4213, "queue_k_norm": 1.4482, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.163, "sent_len_1": 66.8939, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.625, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7805, "doc_norm": 1.445, "encoder_q-embeddings": 2793.7244, "encoder_q-layer.0": 1816.7146, "encoder_q-layer.1": 1864.8384, "encoder_q-layer.10": 1352.7454, "encoder_q-layer.11": 2773.2136, "encoder_q-layer.2": 2329.9065, "encoder_q-layer.3": 2290.9583, "encoder_q-layer.4": 1776.3431, "encoder_q-layer.5": 1718.4153, "encoder_q-layer.6": 2074.3665, "encoder_q-layer.7": 2198.6953, "encoder_q-layer.8": 2072.9922, "encoder_q-layer.9": 1315.4865, "epoch": 0.6, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 0.9941, "learning_rate": 2.1222222222222223e-05, "loss": 2.7805, "norm_diff": 0.0212, "norm_loss": 0.0, "num_token_doc": 66.7781, "num_token_overlap": 18.0411, "num_token_query": 52.2727, "num_token_union": 73.5038, "num_word_context": 202.4944, "num_word_doc": 49.8588, "num_word_query": 39.8466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3195.1502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4237, "queue_k_norm": 1.4485, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2727, "sent_len_1": 66.7781, "sent_len_max_0": 127.995, "sent_len_max_1": 189.3862, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7983, "doc_norm": 1.4555, "encoder_q-embeddings": 1976.286, "encoder_q-layer.0": 1314.1825, "encoder_q-layer.1": 1416.7737, "encoder_q-layer.10": 2633.688, "encoder_q-layer.11": 5330.7134, "encoder_q-layer.2": 1528.9907, "encoder_q-layer.3": 1606.0627, "encoder_q-layer.4": 1632.884, "encoder_q-layer.5": 1667.257, "encoder_q-layer.6": 1887.9067, "encoder_q-layer.7": 2058.0125, "encoder_q-layer.8": 2658.1306, "encoder_q-layer.9": 2349.9658, "epoch": 0.6, "inbatch_neg_score": 0.2835, "inbatch_pos_score": 1.0137, "learning_rate": 2.116666666666667e-05, "loss": 2.7983, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 67.0919, "num_token_overlap": 18.0991, "num_token_query": 52.3717, "num_token_union": 73.6432, "num_word_context": 202.5876, "num_word_doc": 50.067, "num_word_query": 39.9516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3592.7857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.425, "queue_k_norm": 1.4484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3717, "sent_len_1": 67.0919, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2637, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7968, "doc_norm": 1.4373, "encoder_q-embeddings": 2507.4106, "encoder_q-layer.0": 1603.3879, "encoder_q-layer.1": 1803.5917, "encoder_q-layer.10": 2614.3701, "encoder_q-layer.11": 5828.2524, "encoder_q-layer.2": 2151.2866, "encoder_q-layer.3": 2124.509, "encoder_q-layer.4": 2238.4893, "encoder_q-layer.5": 2480.7673, "encoder_q-layer.6": 2522.7527, "encoder_q-layer.7": 2438.8254, "encoder_q-layer.8": 2749.137, "encoder_q-layer.9": 2427.0227, "epoch": 0.61, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 1.0, "learning_rate": 2.111111111111111e-05, "loss": 2.7968, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.7946, "num_token_overlap": 18.0272, "num_token_query": 52.3679, "num_token_union": 73.5722, "num_word_context": 202.4425, "num_word_doc": 49.8419, "num_word_query": 39.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4117.5755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2874, "query_norm": 1.4253, "queue_k_norm": 1.448, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3679, "sent_len_1": 66.7946, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1287, "stdk": 0.0481, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.8059, "doc_norm": 1.4485, "encoder_q-embeddings": 2541.8389, "encoder_q-layer.0": 1733.225, "encoder_q-layer.1": 1903.637, "encoder_q-layer.10": 2690.8611, "encoder_q-layer.11": 5920.6807, "encoder_q-layer.2": 2386.551, "encoder_q-layer.3": 2340.9138, "encoder_q-layer.4": 2297.0505, "encoder_q-layer.5": 2338.3689, "encoder_q-layer.6": 2471.073, "encoder_q-layer.7": 2627.6921, "encoder_q-layer.8": 2849.6677, "encoder_q-layer.9": 2562.0239, "epoch": 0.61, "inbatch_neg_score": 0.2902, "inbatch_pos_score": 0.9951, "learning_rate": 2.1055555555555556e-05, "loss": 2.8059, "norm_diff": 0.0239, "norm_loss": 0.0, "num_token_doc": 66.5322, "num_token_overlap": 17.8787, "num_token_query": 52.0122, "num_token_union": 73.2975, "num_word_context": 201.6357, "num_word_doc": 49.6359, "num_word_query": 39.6415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4334.6334, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.4247, "queue_k_norm": 1.4481, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0122, "sent_len_1": 66.5322, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.0875, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 2.8253, "doc_norm": 1.4472, "encoder_q-embeddings": 2689.4014, "encoder_q-layer.0": 1863.0367, "encoder_q-layer.1": 2240.6716, "encoder_q-layer.10": 2646.5417, "encoder_q-layer.11": 5777.5396, "encoder_q-layer.2": 2883.051, "encoder_q-layer.3": 2829.4238, "encoder_q-layer.4": 3078.8757, "encoder_q-layer.5": 2633.855, "encoder_q-layer.6": 2661.3518, "encoder_q-layer.7": 2412.7861, "encoder_q-layer.8": 2670.1777, "encoder_q-layer.9": 2327.9172, "epoch": 0.61, "inbatch_neg_score": 0.286, "inbatch_pos_score": 0.9644, "learning_rate": 2.1e-05, "loss": 2.8253, "norm_diff": 0.0522, "norm_loss": 0.0, "num_token_doc": 66.8043, "num_token_overlap": 18.0017, "num_token_query": 52.1737, "num_token_union": 73.4022, "num_word_context": 202.6312, "num_word_doc": 49.8259, "num_word_query": 39.7253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4417.5649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.395, "queue_k_norm": 1.4463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1737, "sent_len_1": 66.8043, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.5362, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.7891, "doc_norm": 1.4479, "encoder_q-embeddings": 12799.4805, "encoder_q-layer.0": 10129.4258, "encoder_q-layer.1": 11278.5166, "encoder_q-layer.10": 2835.6599, "encoder_q-layer.11": 5568.937, "encoder_q-layer.2": 14352.2129, "encoder_q-layer.3": 15125.7041, "encoder_q-layer.4": 11780.5957, "encoder_q-layer.5": 8757.9619, "encoder_q-layer.6": 10412.9141, "encoder_q-layer.7": 8031.2231, "encoder_q-layer.8": 5456.9375, "encoder_q-layer.9": 3239.3818, "epoch": 0.61, "inbatch_neg_score": 0.2863, "inbatch_pos_score": 1.0059, "learning_rate": 2.0944444444444445e-05, "loss": 2.7891, "norm_diff": 0.0385, "norm_loss": 0.0, "num_token_doc": 66.8231, "num_token_overlap": 18.0829, "num_token_query": 52.2804, "num_token_union": 73.4168, "num_word_context": 202.2125, "num_word_doc": 49.832, "num_word_query": 39.8672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15097.7831, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2871, "query_norm": 1.4094, "queue_k_norm": 1.4482, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2804, "sent_len_1": 66.8231, "sent_len_max_0": 127.995, "sent_len_max_1": 188.6213, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7947, "doc_norm": 1.4521, "encoder_q-embeddings": 3495.3833, "encoder_q-layer.0": 2426.1604, "encoder_q-layer.1": 2561.092, "encoder_q-layer.10": 2753.2566, "encoder_q-layer.11": 5773.6504, "encoder_q-layer.2": 3086.48, "encoder_q-layer.3": 3039.5002, "encoder_q-layer.4": 2759.6936, "encoder_q-layer.5": 2469.6904, "encoder_q-layer.6": 2581.8684, "encoder_q-layer.7": 2527.9463, "encoder_q-layer.8": 2875.6917, "encoder_q-layer.9": 2522.5193, "epoch": 0.61, "inbatch_neg_score": 0.288, "inbatch_pos_score": 1.0059, "learning_rate": 2.088888888888889e-05, "loss": 2.7947, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.5986, "num_token_overlap": 17.9732, "num_token_query": 52.1683, "num_token_union": 73.3281, "num_word_context": 202.2316, "num_word_doc": 49.687, "num_word_query": 39.7952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4706.4144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2883, "query_norm": 1.3965, "queue_k_norm": 1.447, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1683, "sent_len_1": 66.5986, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.15, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7812, "doc_norm": 1.4461, "encoder_q-embeddings": 2629.4724, "encoder_q-layer.0": 1781.8574, "encoder_q-layer.1": 1963.6406, "encoder_q-layer.10": 2628.9312, "encoder_q-layer.11": 5566.665, "encoder_q-layer.2": 2043.5908, "encoder_q-layer.3": 2029.2871, "encoder_q-layer.4": 2053.7412, "encoder_q-layer.5": 2100.583, "encoder_q-layer.6": 2347.8174, "encoder_q-layer.7": 2423.5681, "encoder_q-layer.8": 2750.6782, "encoder_q-layer.9": 2591.6409, "epoch": 0.61, "inbatch_neg_score": 0.2878, "inbatch_pos_score": 1.0195, "learning_rate": 2.0833333333333336e-05, "loss": 2.7812, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.6879, "num_token_overlap": 18.0118, "num_token_query": 52.3556, "num_token_union": 73.464, "num_word_context": 202.3559, "num_word_doc": 49.7892, "num_word_query": 39.9015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4035.2098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.4208, "queue_k_norm": 1.4493, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3556, "sent_len_1": 66.6879, "sent_len_max_0": 127.9838, "sent_len_max_1": 189.0938, "stdk": 0.0485, "stdq": 0.0466, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7817, "doc_norm": 1.4472, "encoder_q-embeddings": 2387.1426, "encoder_q-layer.0": 1653.1908, "encoder_q-layer.1": 1709.213, "encoder_q-layer.10": 2460.4761, "encoder_q-layer.11": 5686.1807, "encoder_q-layer.2": 1876.6946, "encoder_q-layer.3": 1745.6626, "encoder_q-layer.4": 1692.4269, "encoder_q-layer.5": 1660.491, "encoder_q-layer.6": 1870.8673, "encoder_q-layer.7": 2082.0488, "encoder_q-layer.8": 2489.1658, "encoder_q-layer.9": 2359.7449, "epoch": 0.61, "inbatch_neg_score": 0.2967, "inbatch_pos_score": 1.0293, "learning_rate": 2.077777777777778e-05, "loss": 2.7817, "norm_diff": 0.0252, "norm_loss": 0.0, "num_token_doc": 66.8107, "num_token_overlap": 17.9822, "num_token_query": 52.2579, "num_token_union": 73.5089, "num_word_context": 202.2048, "num_word_doc": 49.858, "num_word_query": 39.8464, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3764.9505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.422, "queue_k_norm": 1.4495, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2579, "sent_len_1": 66.8107, "sent_len_max_0": 127.995, "sent_len_max_1": 188.2587, "stdk": 0.0485, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7927, "doc_norm": 1.4411, "encoder_q-embeddings": 2717.7461, "encoder_q-layer.0": 1889.6572, "encoder_q-layer.1": 2150.6997, "encoder_q-layer.10": 2570.499, "encoder_q-layer.11": 5773.1597, "encoder_q-layer.2": 2471.0957, "encoder_q-layer.3": 2476.7461, "encoder_q-layer.4": 2705.1135, "encoder_q-layer.5": 2822.8777, "encoder_q-layer.6": 3000.8254, "encoder_q-layer.7": 2913.1685, "encoder_q-layer.8": 3180.5261, "encoder_q-layer.9": 2540.8999, "epoch": 0.61, "inbatch_neg_score": 0.3005, "inbatch_pos_score": 1.0146, "learning_rate": 2.0722222222222224e-05, "loss": 2.7927, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.6431, "num_token_overlap": 17.9429, "num_token_query": 52.1231, "num_token_union": 73.3484, "num_word_context": 201.9783, "num_word_doc": 49.7433, "num_word_query": 39.716, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4442.7091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4278, "queue_k_norm": 1.4495, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1231, "sent_len_1": 66.6431, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3625, "stdk": 0.0482, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7775, "doc_norm": 1.4475, "encoder_q-embeddings": 3923.6416, "encoder_q-layer.0": 2553.8379, "encoder_q-layer.1": 3154.4568, "encoder_q-layer.10": 2554.5454, "encoder_q-layer.11": 5357.5103, "encoder_q-layer.2": 3279.5505, "encoder_q-layer.3": 2401.551, "encoder_q-layer.4": 2424.7522, "encoder_q-layer.5": 2101.6658, "encoder_q-layer.6": 2390.0769, "encoder_q-layer.7": 2530.3076, "encoder_q-layer.8": 2795.5063, "encoder_q-layer.9": 2391.6182, "epoch": 0.61, "inbatch_neg_score": 0.2969, "inbatch_pos_score": 1.0186, "learning_rate": 2.0666666666666666e-05, "loss": 2.7775, "norm_diff": 0.034, "norm_loss": 0.0, "num_token_doc": 66.5539, "num_token_overlap": 18.0128, "num_token_query": 52.21, "num_token_union": 73.2567, "num_word_context": 201.8558, "num_word_doc": 49.6839, "num_word_query": 39.8088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4623.5347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.4135, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.21, "sent_len_1": 66.5539, "sent_len_max_0": 127.995, "sent_len_max_1": 188.175, "stdk": 0.0485, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7914, "doc_norm": 1.4458, "encoder_q-embeddings": 3013.05, "encoder_q-layer.0": 2095.1418, "encoder_q-layer.1": 2451.1294, "encoder_q-layer.10": 2341.9097, "encoder_q-layer.11": 5485.3516, "encoder_q-layer.2": 2831.5386, "encoder_q-layer.3": 2902.5986, "encoder_q-layer.4": 3097.7419, "encoder_q-layer.5": 2851.4773, "encoder_q-layer.6": 3215.3059, "encoder_q-layer.7": 3385.6248, "encoder_q-layer.8": 3148.6377, "encoder_q-layer.9": 2566.9861, "epoch": 0.61, "inbatch_neg_score": 0.2985, "inbatch_pos_score": 1.0078, "learning_rate": 2.0611111111111112e-05, "loss": 2.7914, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.6304, "num_token_overlap": 17.9276, "num_token_query": 52.059, "num_token_union": 73.3971, "num_word_context": 202.0562, "num_word_doc": 49.7154, "num_word_query": 39.6572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4616.1345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.4137, "queue_k_norm": 1.4496, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.059, "sent_len_1": 66.6304, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.9787, "stdk": 0.0484, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.7936, "doc_norm": 1.4507, "encoder_q-embeddings": 2469.8188, "encoder_q-layer.0": 1645.5948, "encoder_q-layer.1": 1812.1483, "encoder_q-layer.10": 2483.9431, "encoder_q-layer.11": 5532.2617, "encoder_q-layer.2": 2069.3542, "encoder_q-layer.3": 2089.2708, "encoder_q-layer.4": 2200.7114, "encoder_q-layer.5": 2211.8586, "encoder_q-layer.6": 2513.145, "encoder_q-layer.7": 2611.8262, "encoder_q-layer.8": 2755.1511, "encoder_q-layer.9": 2400.137, "epoch": 0.62, "inbatch_neg_score": 0.297, "inbatch_pos_score": 1.0186, "learning_rate": 2.0555555555555555e-05, "loss": 2.7936, "norm_diff": 0.0327, "norm_loss": 0.0, "num_token_doc": 66.8208, "num_token_overlap": 18.0014, "num_token_query": 52.271, "num_token_union": 73.5277, "num_word_context": 202.5863, "num_word_doc": 49.8281, "num_word_query": 39.8399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4042.8659, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.4179, "queue_k_norm": 1.4511, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.271, "sent_len_1": 66.8208, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.9938, "stdk": 0.0486, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.7996, "doc_norm": 1.4494, "encoder_q-embeddings": 2810.2515, "encoder_q-layer.0": 1734.8218, "encoder_q-layer.1": 1886.4038, "encoder_q-layer.10": 2471.1367, "encoder_q-layer.11": 5413.6826, "encoder_q-layer.2": 2271.3662, "encoder_q-layer.3": 2402.718, "encoder_q-layer.4": 2497.2437, "encoder_q-layer.5": 2594.7275, "encoder_q-layer.6": 2786.7151, "encoder_q-layer.7": 2894.6448, "encoder_q-layer.8": 3028.957, "encoder_q-layer.9": 2464.5039, "epoch": 0.62, "inbatch_neg_score": 0.2991, "inbatch_pos_score": 1.0146, "learning_rate": 2.05e-05, "loss": 2.7996, "norm_diff": 0.0259, "norm_loss": 0.0, "num_token_doc": 66.6482, "num_token_overlap": 17.9524, "num_token_query": 52.1718, "num_token_union": 73.422, "num_word_context": 202.1782, "num_word_doc": 49.7359, "num_word_query": 39.7583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4295.9744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.4235, "queue_k_norm": 1.4518, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1718, "sent_len_1": 66.6482, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.2525, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7879, "doc_norm": 1.4508, "encoder_q-embeddings": 2120.71, "encoder_q-layer.0": 1351.0492, "encoder_q-layer.1": 1449.1139, "encoder_q-layer.10": 2558.0286, "encoder_q-layer.11": 5494.9629, "encoder_q-layer.2": 1637.9966, "encoder_q-layer.3": 1662.4938, "encoder_q-layer.4": 1771.1993, "encoder_q-layer.5": 1820.912, "encoder_q-layer.6": 1975.8783, "encoder_q-layer.7": 2167.6772, "encoder_q-layer.8": 2588.8677, "encoder_q-layer.9": 2374.3494, "epoch": 0.62, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 1.001, "learning_rate": 2.0444444444444446e-05, "loss": 2.7879, "norm_diff": 0.0475, "norm_loss": 0.0, "num_token_doc": 66.6575, "num_token_overlap": 17.9755, "num_token_query": 52.1682, "num_token_union": 73.3683, "num_word_context": 202.1106, "num_word_doc": 49.7355, "num_word_query": 39.7701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3687.6893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.4034, "queue_k_norm": 1.4517, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1682, "sent_len_1": 66.6575, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5762, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.8047, "doc_norm": 1.4551, "encoder_q-embeddings": 2985.1072, "encoder_q-layer.0": 1951.7009, "encoder_q-layer.1": 2260.8882, "encoder_q-layer.10": 2808.7561, "encoder_q-layer.11": 5713.0586, "encoder_q-layer.2": 2684.8506, "encoder_q-layer.3": 2912.7766, "encoder_q-layer.4": 3074.6025, "encoder_q-layer.5": 3095.8103, "encoder_q-layer.6": 3320.7212, "encoder_q-layer.7": 3175.0432, "encoder_q-layer.8": 3237.0632, "encoder_q-layer.9": 2471.0042, "epoch": 0.62, "inbatch_neg_score": 0.2909, "inbatch_pos_score": 0.9912, "learning_rate": 2.0388888888888892e-05, "loss": 2.8047, "norm_diff": 0.0323, "norm_loss": 0.0, "num_token_doc": 66.7193, "num_token_overlap": 17.9438, "num_token_query": 51.9603, "num_token_union": 73.3303, "num_word_context": 201.9219, "num_word_doc": 49.7818, "num_word_query": 39.6274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4679.1622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.292, "query_norm": 1.4228, "queue_k_norm": 1.4516, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 51.9603, "sent_len_1": 66.7193, "sent_len_max_0": 127.99, "sent_len_max_1": 191.8587, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.7803, "doc_norm": 1.4485, "encoder_q-embeddings": 7095.5405, "encoder_q-layer.0": 5049.4683, "encoder_q-layer.1": 5960.0845, "encoder_q-layer.10": 2441.4275, "encoder_q-layer.11": 5534.9346, "encoder_q-layer.2": 6728.1592, "encoder_q-layer.3": 7276.5117, "encoder_q-layer.4": 8004.5142, "encoder_q-layer.5": 7199.6738, "encoder_q-layer.6": 5955.9565, "encoder_q-layer.7": 5471.5835, "encoder_q-layer.8": 4709.2607, "encoder_q-layer.9": 3126.9116, "epoch": 0.62, "inbatch_neg_score": 0.2888, "inbatch_pos_score": 1.0098, "learning_rate": 2.0333333333333334e-05, "loss": 2.7803, "norm_diff": 0.0239, "norm_loss": 0.0, "num_token_doc": 66.7853, "num_token_overlap": 18.0461, "num_token_query": 52.2553, "num_token_union": 73.4776, "num_word_context": 202.4103, "num_word_doc": 49.8671, "num_word_query": 39.8568, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8745.3207, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2893, "query_norm": 1.4246, "queue_k_norm": 1.4539, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2553, "sent_len_1": 66.7853, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0538, "stdk": 0.0485, "stdq": 0.0468, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.7879, "doc_norm": 1.4553, "encoder_q-embeddings": 4279.8286, "encoder_q-layer.0": 2893.5789, "encoder_q-layer.1": 3515.7607, "encoder_q-layer.10": 2496.4402, "encoder_q-layer.11": 5415.2764, "encoder_q-layer.2": 3994.7637, "encoder_q-layer.3": 3734.9592, "encoder_q-layer.4": 4054.9858, "encoder_q-layer.5": 3818.6011, "encoder_q-layer.6": 3229.6055, "encoder_q-layer.7": 2982.562, "encoder_q-layer.8": 2764.6289, "encoder_q-layer.9": 2365.8967, "epoch": 0.62, "inbatch_neg_score": 0.289, "inbatch_pos_score": 0.9961, "learning_rate": 2.027777777777778e-05, "loss": 2.7879, "norm_diff": 0.0587, "norm_loss": 0.0, "num_token_doc": 66.6583, "num_token_overlap": 17.9592, "num_token_query": 52.196, "num_token_union": 73.3991, "num_word_context": 201.9076, "num_word_doc": 49.7516, "num_word_query": 39.7786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5428.8975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.3966, "queue_k_norm": 1.4529, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.196, "sent_len_1": 66.6583, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.945, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 2.7881, "doc_norm": 1.441, "encoder_q-embeddings": 2633.1765, "encoder_q-layer.0": 1690.0146, "encoder_q-layer.1": 1941.1649, "encoder_q-layer.10": 2581.2258, "encoder_q-layer.11": 5854.3564, "encoder_q-layer.2": 2303.1638, "encoder_q-layer.3": 2484.7988, "encoder_q-layer.4": 2607.6377, "encoder_q-layer.5": 2634.375, "encoder_q-layer.6": 2707.3645, "encoder_q-layer.7": 2821.7439, "encoder_q-layer.8": 3010.1699, "encoder_q-layer.9": 2389.834, "epoch": 0.62, "inbatch_neg_score": 0.2889, "inbatch_pos_score": 0.9941, "learning_rate": 2.0222222222222222e-05, "loss": 2.7881, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.6905, "num_token_overlap": 17.9581, "num_token_query": 52.1436, "num_token_union": 73.3957, "num_word_context": 202.8168, "num_word_doc": 49.801, "num_word_query": 39.7275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4315.2541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.4134, "queue_k_norm": 1.4501, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1436, "sent_len_1": 66.6905, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3525, "stdk": 0.0482, "stdq": 0.0463, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.8011, "doc_norm": 1.4533, "encoder_q-embeddings": 2412.2131, "encoder_q-layer.0": 1595.5935, "encoder_q-layer.1": 1765.2889, "encoder_q-layer.10": 2662.7012, "encoder_q-layer.11": 5606.6948, "encoder_q-layer.2": 2024.6754, "encoder_q-layer.3": 2235.5996, "encoder_q-layer.4": 2384.9397, "encoder_q-layer.5": 2543.4836, "encoder_q-layer.6": 2504.5149, "encoder_q-layer.7": 2448.9927, "encoder_q-layer.8": 2713.6326, "encoder_q-layer.9": 2394.8625, "epoch": 0.62, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0205, "learning_rate": 2.0166666666666668e-05, "loss": 2.8011, "norm_diff": 0.0402, "norm_loss": 0.0, "num_token_doc": 66.5734, "num_token_overlap": 17.9818, "num_token_query": 52.1174, "num_token_union": 73.2572, "num_word_context": 202.2269, "num_word_doc": 49.6571, "num_word_query": 39.7153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4061.1845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.4131, "queue_k_norm": 1.4487, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1174, "sent_len_1": 66.5734, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.2725, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 2.8109, "doc_norm": 1.4455, "encoder_q-embeddings": 5661.9204, "encoder_q-layer.0": 3634.2498, "encoder_q-layer.1": 3678.1677, "encoder_q-layer.10": 2635.1887, "encoder_q-layer.11": 5683.6011, "encoder_q-layer.2": 4173.5835, "encoder_q-layer.3": 4274.311, "encoder_q-layer.4": 4541.8804, "encoder_q-layer.5": 5031.4121, "encoder_q-layer.6": 5611.2783, "encoder_q-layer.7": 5009.3545, "encoder_q-layer.8": 4220.7324, "encoder_q-layer.9": 2908.4856, "epoch": 0.62, "inbatch_neg_score": 0.2915, "inbatch_pos_score": 0.9966, "learning_rate": 2.011111111111111e-05, "loss": 2.8109, "norm_diff": 0.0266, "norm_loss": 0.0, "num_token_doc": 66.5567, "num_token_overlap": 17.9058, "num_token_query": 52.0075, "num_token_union": 73.2988, "num_word_context": 202.025, "num_word_doc": 49.7141, "num_word_query": 39.6607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6782.5165, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2913, "query_norm": 1.4189, "queue_k_norm": 1.4494, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0075, "sent_len_1": 66.5567, "sent_len_max_0": 128.0, "sent_len_max_1": 187.81, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.7668, "doc_norm": 1.4436, "encoder_q-embeddings": 5973.0225, "encoder_q-layer.0": 4005.5215, "encoder_q-layer.1": 4566.7129, "encoder_q-layer.10": 4969.4321, "encoder_q-layer.11": 10900.2832, "encoder_q-layer.2": 4996.8599, "encoder_q-layer.3": 5241.3091, "encoder_q-layer.4": 5231.5078, "encoder_q-layer.5": 5713.8203, "encoder_q-layer.6": 6414.0493, "encoder_q-layer.7": 7063.2231, "encoder_q-layer.8": 6728.6392, "encoder_q-layer.9": 5096.9497, "epoch": 0.62, "inbatch_neg_score": 0.2948, "inbatch_pos_score": 1.0068, "learning_rate": 2.0055555555555556e-05, "loss": 2.7668, "norm_diff": 0.0231, "norm_loss": 0.0, "num_token_doc": 66.9208, "num_token_overlap": 18.019, "num_token_query": 52.3211, "num_token_union": 73.5886, "num_word_context": 202.5559, "num_word_doc": 49.9737, "num_word_query": 39.8917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9081.6006, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2942, "query_norm": 1.4205, "queue_k_norm": 1.451, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3211, "sent_len_1": 66.9208, "sent_len_max_0": 127.9775, "sent_len_max_1": 188.1662, "stdk": 0.0483, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7785, "doc_norm": 1.4578, "encoder_q-embeddings": 2481.1135, "encoder_q-layer.0": 1531.7561, "encoder_q-layer.1": 1697.7546, "encoder_q-layer.10": 2874.2314, "encoder_q-layer.11": 5887.1597, "encoder_q-layer.2": 1879.6274, "encoder_q-layer.3": 1914.3352, "encoder_q-layer.4": 1979.8524, "encoder_q-layer.5": 2002.3057, "encoder_q-layer.6": 2377.7805, "encoder_q-layer.7": 2347.3335, "encoder_q-layer.8": 2772.5273, "encoder_q-layer.9": 2571.4424, "epoch": 0.62, "inbatch_neg_score": 0.2879, "inbatch_pos_score": 1.0137, "learning_rate": 2e-05, "loss": 2.7785, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 66.8726, "num_token_overlap": 17.9847, "num_token_query": 52.0876, "num_token_union": 73.4409, "num_word_context": 202.2944, "num_word_doc": 49.8756, "num_word_query": 39.7072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4056.0612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4151, "queue_k_norm": 1.4529, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0876, "sent_len_1": 66.8726, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.0938, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7793, "doc_norm": 1.4507, "encoder_q-embeddings": 3236.2512, "encoder_q-layer.0": 2163.6914, "encoder_q-layer.1": 2440.001, "encoder_q-layer.10": 2365.6377, "encoder_q-layer.11": 5419.1284, "encoder_q-layer.2": 2822.6475, "encoder_q-layer.3": 2905.0925, "encoder_q-layer.4": 3163.7163, "encoder_q-layer.5": 3225.2148, "encoder_q-layer.6": 3342.947, "encoder_q-layer.7": 3477.322, "encoder_q-layer.8": 3337.4836, "encoder_q-layer.9": 2490.9363, "epoch": 0.63, "inbatch_neg_score": 0.2846, "inbatch_pos_score": 0.9775, "learning_rate": 1.9944444444444447e-05, "loss": 2.7793, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.7934, "num_token_overlap": 17.9996, "num_token_query": 52.3507, "num_token_union": 73.6038, "num_word_context": 202.3738, "num_word_doc": 49.8508, "num_word_query": 39.9424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4812.4183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.4046, "queue_k_norm": 1.4515, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3507, "sent_len_1": 66.7934, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0387, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7818, "doc_norm": 1.4573, "encoder_q-embeddings": 2319.0752, "encoder_q-layer.0": 1507.7483, "encoder_q-layer.1": 1600.7865, "encoder_q-layer.10": 2497.6562, "encoder_q-layer.11": 5041.5835, "encoder_q-layer.2": 1846.6497, "encoder_q-layer.3": 1911.3853, "encoder_q-layer.4": 1981.1095, "encoder_q-layer.5": 2153.8711, "encoder_q-layer.6": 2449.3904, "encoder_q-layer.7": 3074.9692, "encoder_q-layer.8": 4084.6208, "encoder_q-layer.9": 2970.4421, "epoch": 0.63, "inbatch_neg_score": 0.2848, "inbatch_pos_score": 1.0049, "learning_rate": 1.988888888888889e-05, "loss": 2.7818, "norm_diff": 0.0423, "norm_loss": 0.0, "num_token_doc": 66.7502, "num_token_overlap": 17.9542, "num_token_query": 52.1896, "num_token_union": 73.4611, "num_word_context": 202.0955, "num_word_doc": 49.8224, "num_word_query": 39.819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.3531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.415, "queue_k_norm": 1.4515, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1896, "sent_len_1": 66.7502, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.665, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7841, "doc_norm": 1.4531, "encoder_q-embeddings": 3245.7524, "encoder_q-layer.0": 2155.5745, "encoder_q-layer.1": 2466.5085, "encoder_q-layer.10": 2481.0146, "encoder_q-layer.11": 5376.396, "encoder_q-layer.2": 2718.5923, "encoder_q-layer.3": 3070.8491, "encoder_q-layer.4": 2937.5366, "encoder_q-layer.5": 3017.0149, "encoder_q-layer.6": 3066.9148, "encoder_q-layer.7": 3205.6331, "encoder_q-layer.8": 3246.3823, "encoder_q-layer.9": 2554.4509, "epoch": 0.63, "inbatch_neg_score": 0.2965, "inbatch_pos_score": 1.0234, "learning_rate": 1.9833333333333335e-05, "loss": 2.7841, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.4687, "num_token_overlap": 17.8866, "num_token_query": 51.9994, "num_token_union": 73.2672, "num_word_context": 202.1384, "num_word_doc": 49.5962, "num_word_query": 39.6063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4710.4346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.4358, "queue_k_norm": 1.4512, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 51.9994, "sent_len_1": 66.4687, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.815, "stdk": 0.0487, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7795, "doc_norm": 1.4446, "encoder_q-embeddings": 1874.3995, "encoder_q-layer.0": 1165.5328, "encoder_q-layer.1": 1250.6936, "encoder_q-layer.10": 2459.3169, "encoder_q-layer.11": 5383.4795, "encoder_q-layer.2": 1416.8656, "encoder_q-layer.3": 1471.6343, "encoder_q-layer.4": 1552.751, "encoder_q-layer.5": 1576.5172, "encoder_q-layer.6": 1857.1665, "encoder_q-layer.7": 2058.988, "encoder_q-layer.8": 2540.0696, "encoder_q-layer.9": 2306.7344, "epoch": 0.63, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 1.0352, "learning_rate": 1.9777777777777778e-05, "loss": 2.7795, "norm_diff": 0.007, "norm_loss": 0.0, "num_token_doc": 66.9609, "num_token_overlap": 18.0269, "num_token_query": 52.2761, "num_token_union": 73.5909, "num_word_context": 202.8663, "num_word_doc": 49.9907, "num_word_query": 39.8746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3472.9259, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.4377, "queue_k_norm": 1.452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2761, "sent_len_1": 66.9609, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3137, "stdk": 0.0483, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.7673, "doc_norm": 1.4544, "encoder_q-embeddings": 1877.9497, "encoder_q-layer.0": 1323.5526, "encoder_q-layer.1": 1387.7632, "encoder_q-layer.10": 2439.8174, "encoder_q-layer.11": 5562.8696, "encoder_q-layer.2": 1523.6196, "encoder_q-layer.3": 1597.5569, "encoder_q-layer.4": 1655.1089, "encoder_q-layer.5": 1741.1857, "encoder_q-layer.6": 2061.3789, "encoder_q-layer.7": 2332.7356, "encoder_q-layer.8": 2535.0986, "encoder_q-layer.9": 2242.3999, "epoch": 0.63, "inbatch_neg_score": 0.2966, "inbatch_pos_score": 1.0166, "learning_rate": 1.9722222222222224e-05, "loss": 2.7673, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.7905, "num_token_overlap": 18.063, "num_token_query": 52.3907, "num_token_union": 73.5129, "num_word_context": 202.4249, "num_word_doc": 49.8848, "num_word_query": 39.9773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3596.8206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.4243, "queue_k_norm": 1.4522, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3907, "sent_len_1": 66.7905, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5175, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7771, "doc_norm": 1.4468, "encoder_q-embeddings": 2366.4028, "encoder_q-layer.0": 1543.3759, "encoder_q-layer.1": 1826.3049, "encoder_q-layer.10": 2623.4553, "encoder_q-layer.11": 5678.3203, "encoder_q-layer.2": 1996.901, "encoder_q-layer.3": 2040.5312, "encoder_q-layer.4": 2060.9233, "encoder_q-layer.5": 2106.6582, "encoder_q-layer.6": 2363.6199, "encoder_q-layer.7": 2462.9827, "encoder_q-layer.8": 2759.2771, "encoder_q-layer.9": 2461.0706, "epoch": 0.63, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 1.0371, "learning_rate": 1.9666666666666666e-05, "loss": 2.7771, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.832, "num_token_overlap": 18.0495, "num_token_query": 52.3185, "num_token_union": 73.4744, "num_word_context": 202.4009, "num_word_doc": 49.837, "num_word_query": 39.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4054.8219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.4366, "queue_k_norm": 1.4515, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3185, "sent_len_1": 66.832, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.7875, "stdk": 0.0484, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.7712, "doc_norm": 1.4461, "encoder_q-embeddings": 3678.2297, "encoder_q-layer.0": 2597.95, "encoder_q-layer.1": 2775.2524, "encoder_q-layer.10": 2575.5181, "encoder_q-layer.11": 5444.4785, "encoder_q-layer.2": 3308.6638, "encoder_q-layer.3": 3638.0615, "encoder_q-layer.4": 4108.561, "encoder_q-layer.5": 4560.2202, "encoder_q-layer.6": 5094.0903, "encoder_q-layer.7": 6394.6743, "encoder_q-layer.8": 5076.9292, "encoder_q-layer.9": 2725.0176, "epoch": 0.63, "inbatch_neg_score": 0.303, "inbatch_pos_score": 1.0469, "learning_rate": 1.9611111111111115e-05, "loss": 2.7712, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.8785, "num_token_overlap": 18.0619, "num_token_query": 52.3279, "num_token_union": 73.5104, "num_word_context": 202.5164, "num_word_doc": 49.8878, "num_word_query": 39.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6218.8509, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3027, "query_norm": 1.4445, "queue_k_norm": 1.4532, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3279, "sent_len_1": 66.8785, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.6037, "stdk": 0.0484, "stdq": 0.0468, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7634, "doc_norm": 1.4564, "encoder_q-embeddings": 2129.8743, "encoder_q-layer.0": 1302.8462, "encoder_q-layer.1": 1397.7412, "encoder_q-layer.10": 2446.7317, "encoder_q-layer.11": 5327.687, "encoder_q-layer.2": 1635.7793, "encoder_q-layer.3": 1685.9548, "encoder_q-layer.4": 1746.7224, "encoder_q-layer.5": 1766.3359, "encoder_q-layer.6": 2176.0059, "encoder_q-layer.7": 2375.1519, "encoder_q-layer.8": 2650.9219, "encoder_q-layer.9": 2381.4058, "epoch": 0.63, "inbatch_neg_score": 0.3109, "inbatch_pos_score": 1.0586, "learning_rate": 1.9555555555555557e-05, "loss": 2.7634, "norm_diff": 0.01, "norm_loss": 0.0, "num_token_doc": 66.4787, "num_token_overlap": 17.9884, "num_token_query": 52.2463, "num_token_union": 73.3024, "num_word_context": 202.2724, "num_word_doc": 49.6305, "num_word_query": 39.8331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3659.7982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3096, "query_norm": 1.4555, "queue_k_norm": 1.4525, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2463, "sent_len_1": 66.4787, "sent_len_max_0": 127.9838, "sent_len_max_1": 188.335, "stdk": 0.0487, "stdq": 0.047, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 2.7755, "doc_norm": 1.4541, "encoder_q-embeddings": 3734.5813, "encoder_q-layer.0": 2469.1035, "encoder_q-layer.1": 2650.8118, "encoder_q-layer.10": 2658.0049, "encoder_q-layer.11": 5571.1333, "encoder_q-layer.2": 3013.3235, "encoder_q-layer.3": 2892.5339, "encoder_q-layer.4": 2998.543, "encoder_q-layer.5": 3008.9902, "encoder_q-layer.6": 2784.2239, "encoder_q-layer.7": 2826.7229, "encoder_q-layer.8": 2582.3931, "encoder_q-layer.9": 2433.0632, "epoch": 0.63, "inbatch_neg_score": 0.3066, "inbatch_pos_score": 1.0273, "learning_rate": 1.9500000000000003e-05, "loss": 2.7755, "norm_diff": 0.0221, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 18.0136, "num_token_query": 52.2103, "num_token_union": 73.3642, "num_word_context": 202.3041, "num_word_doc": 49.7538, "num_word_query": 39.8051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4739.159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3074, "query_norm": 1.434, "queue_k_norm": 1.4529, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2103, "sent_len_1": 66.6933, "sent_len_max_0": 128.0, "sent_len_max_1": 191.7212, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.7869, "doc_norm": 1.4459, "encoder_q-embeddings": 1787.2745, "encoder_q-layer.0": 1168.994, "encoder_q-layer.1": 1303.5365, "encoder_q-layer.10": 1246.8068, "encoder_q-layer.11": 2770.9199, "encoder_q-layer.2": 1532.4337, "encoder_q-layer.3": 1577.0126, "encoder_q-layer.4": 1731.311, "encoder_q-layer.5": 1548.4958, "encoder_q-layer.6": 1832.0049, "encoder_q-layer.7": 1730.7129, "encoder_q-layer.8": 1595.8323, "encoder_q-layer.9": 1220.9939, "epoch": 0.63, "inbatch_neg_score": 0.3131, "inbatch_pos_score": 1.0293, "learning_rate": 1.9444444444444445e-05, "loss": 2.7869, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.7182, "num_token_overlap": 17.9943, "num_token_query": 52.3823, "num_token_union": 73.5374, "num_word_context": 202.5753, "num_word_doc": 49.7973, "num_word_query": 39.9509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2478.9535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.4509, "queue_k_norm": 1.4559, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3823, "sent_len_1": 66.7182, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.0137, "stdk": 0.0483, "stdq": 0.0471, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.7767, "doc_norm": 1.4592, "encoder_q-embeddings": 1086.7839, "encoder_q-layer.0": 685.8618, "encoder_q-layer.1": 746.4498, "encoder_q-layer.10": 1334.0502, "encoder_q-layer.11": 2741.625, "encoder_q-layer.2": 836.6058, "encoder_q-layer.3": 893.6281, "encoder_q-layer.4": 939.8101, "encoder_q-layer.5": 985.1179, "encoder_q-layer.6": 1070.2368, "encoder_q-layer.7": 1152.3516, "encoder_q-layer.8": 1269.4159, "encoder_q-layer.9": 1159.8025, "epoch": 0.64, "inbatch_neg_score": 0.3082, "inbatch_pos_score": 1.0342, "learning_rate": 1.938888888888889e-05, "loss": 2.7767, "norm_diff": 0.0224, "norm_loss": 0.0, "num_token_doc": 66.7328, "num_token_overlap": 18.0439, "num_token_query": 52.385, "num_token_union": 73.5173, "num_word_context": 202.3546, "num_word_doc": 49.8093, "num_word_query": 39.9464, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1856.0062, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3079, "query_norm": 1.4368, "queue_k_norm": 1.4546, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.385, "sent_len_1": 66.7328, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0462, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7937, "doc_norm": 1.4553, "encoder_q-embeddings": 1154.8691, "encoder_q-layer.0": 763.1609, "encoder_q-layer.1": 815.8049, "encoder_q-layer.10": 1254.8217, "encoder_q-layer.11": 2775.6436, "encoder_q-layer.2": 928.4027, "encoder_q-layer.3": 958.6046, "encoder_q-layer.4": 1040.3698, "encoder_q-layer.5": 947.6887, "encoder_q-layer.6": 1014.7525, "encoder_q-layer.7": 1080.3352, "encoder_q-layer.8": 1267.9833, "encoder_q-layer.9": 1134.9313, "epoch": 0.64, "inbatch_neg_score": 0.3108, "inbatch_pos_score": 1.042, "learning_rate": 1.9333333333333333e-05, "loss": 2.7937, "norm_diff": 0.0249, "norm_loss": 0.0, "num_token_doc": 66.7614, "num_token_overlap": 18.0348, "num_token_query": 52.343, "num_token_union": 73.5037, "num_word_context": 202.6159, "num_word_doc": 49.8021, "num_word_query": 39.9021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1886.9591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.311, "query_norm": 1.4317, "queue_k_norm": 1.4548, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.343, "sent_len_1": 66.7614, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.795, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7853, "doc_norm": 1.4492, "encoder_q-embeddings": 1104.0505, "encoder_q-layer.0": 709.3962, "encoder_q-layer.1": 761.0221, "encoder_q-layer.10": 1183.425, "encoder_q-layer.11": 2740.0881, "encoder_q-layer.2": 869.3546, "encoder_q-layer.3": 912.6322, "encoder_q-layer.4": 1038.8766, "encoder_q-layer.5": 1102.5437, "encoder_q-layer.6": 1145.0886, "encoder_q-layer.7": 1167.0979, "encoder_q-layer.8": 1298.417, "encoder_q-layer.9": 1155.6511, "epoch": 0.64, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 1.0176, "learning_rate": 1.927777777777778e-05, "loss": 2.7853, "norm_diff": 0.035, "norm_loss": 0.0, "num_token_doc": 66.6196, "num_token_overlap": 17.9425, "num_token_query": 51.9763, "num_token_union": 73.2292, "num_word_context": 201.9878, "num_word_doc": 49.7111, "num_word_query": 39.6294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1898.2418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4142, "queue_k_norm": 1.4555, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 51.9763, "sent_len_1": 66.6196, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1813, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.8076, "doc_norm": 1.4573, "encoder_q-embeddings": 2705.6194, "encoder_q-layer.0": 1892.4713, "encoder_q-layer.1": 2007.4524, "encoder_q-layer.10": 1232.1656, "encoder_q-layer.11": 2698.8008, "encoder_q-layer.2": 2062.6382, "encoder_q-layer.3": 2163.1492, "encoder_q-layer.4": 2128.8235, "encoder_q-layer.5": 2267.3186, "encoder_q-layer.6": 2189.9551, "encoder_q-layer.7": 2017.9362, "encoder_q-layer.8": 1746.6317, "encoder_q-layer.9": 1198.2512, "epoch": 0.64, "inbatch_neg_score": 0.3045, "inbatch_pos_score": 1.001, "learning_rate": 1.922222222222222e-05, "loss": 2.8076, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.8591, "num_token_overlap": 17.966, "num_token_query": 52.1804, "num_token_union": 73.5367, "num_word_context": 202.5198, "num_word_doc": 49.8977, "num_word_query": 39.8013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3155.0612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.408, "queue_k_norm": 1.4566, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1804, "sent_len_1": 66.8591, "sent_len_max_0": 128.0, "sent_len_max_1": 189.355, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.779, "doc_norm": 1.457, "encoder_q-embeddings": 1653.8469, "encoder_q-layer.0": 1075.3081, "encoder_q-layer.1": 1226.7123, "encoder_q-layer.10": 1166.1785, "encoder_q-layer.11": 2705.782, "encoder_q-layer.2": 1447.3818, "encoder_q-layer.3": 1442.2495, "encoder_q-layer.4": 1419.0869, "encoder_q-layer.5": 1468.0298, "encoder_q-layer.6": 1364.9841, "encoder_q-layer.7": 1278.382, "encoder_q-layer.8": 1320.6063, "encoder_q-layer.9": 1192.7804, "epoch": 0.64, "inbatch_neg_score": 0.2941, "inbatch_pos_score": 1.0234, "learning_rate": 1.9166666666666667e-05, "loss": 2.779, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.8099, "num_token_overlap": 18.0773, "num_token_query": 52.3665, "num_token_union": 73.5186, "num_word_context": 202.2297, "num_word_doc": 49.8571, "num_word_query": 39.9262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2260.7453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.4097, "queue_k_norm": 1.4563, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3665, "sent_len_1": 66.8099, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0675, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.7936, "doc_norm": 1.4621, "encoder_q-embeddings": 1412.4587, "encoder_q-layer.0": 973.0233, "encoder_q-layer.1": 1085.8112, "encoder_q-layer.10": 1146.8477, "encoder_q-layer.11": 2537.6394, "encoder_q-layer.2": 1145.8304, "encoder_q-layer.3": 1199.4718, "encoder_q-layer.4": 1322.568, "encoder_q-layer.5": 1366.3256, "encoder_q-layer.6": 1528.7759, "encoder_q-layer.7": 1403.4895, "encoder_q-layer.8": 1401.2737, "encoder_q-layer.9": 1122.9659, "epoch": 0.64, "inbatch_neg_score": 0.2985, "inbatch_pos_score": 1.0391, "learning_rate": 1.9111111111111113e-05, "loss": 2.7936, "norm_diff": 0.0474, "norm_loss": 0.0, "num_token_doc": 66.7779, "num_token_overlap": 17.9592, "num_token_query": 52.2322, "num_token_union": 73.5283, "num_word_context": 202.423, "num_word_doc": 49.8506, "num_word_query": 39.8478, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2110.1388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.4147, "queue_k_norm": 1.4558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2322, "sent_len_1": 66.7779, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3375, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 2.778, "doc_norm": 1.4645, "encoder_q-embeddings": 1034.6978, "encoder_q-layer.0": 682.7767, "encoder_q-layer.1": 711.8134, "encoder_q-layer.10": 1390.0149, "encoder_q-layer.11": 2876.0364, "encoder_q-layer.2": 801.7812, "encoder_q-layer.3": 814.136, "encoder_q-layer.4": 855.897, "encoder_q-layer.5": 863.5699, "encoder_q-layer.6": 967.8682, "encoder_q-layer.7": 1109.0621, "encoder_q-layer.8": 1279.3835, "encoder_q-layer.9": 1179.0846, "epoch": 0.64, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 1.0039, "learning_rate": 1.905555555555556e-05, "loss": 2.778, "norm_diff": 0.0608, "norm_loss": 0.0, "num_token_doc": 66.8288, "num_token_overlap": 18.0195, "num_token_query": 52.2955, "num_token_union": 73.5402, "num_word_context": 202.003, "num_word_doc": 49.8614, "num_word_query": 39.8738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1868.6285, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.4036, "queue_k_norm": 1.455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2955, "sent_len_1": 66.8288, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5175, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.787, "doc_norm": 1.4562, "encoder_q-embeddings": 1145.3748, "encoder_q-layer.0": 726.3959, "encoder_q-layer.1": 794.8083, "encoder_q-layer.10": 1148.8242, "encoder_q-layer.11": 2651.1443, "encoder_q-layer.2": 909.3495, "encoder_q-layer.3": 895.534, "encoder_q-layer.4": 951.7124, "encoder_q-layer.5": 962.6411, "encoder_q-layer.6": 1062.1279, "encoder_q-layer.7": 1135.413, "encoder_q-layer.8": 1290.1796, "encoder_q-layer.9": 1143.1101, "epoch": 0.64, "inbatch_neg_score": 0.2926, "inbatch_pos_score": 1.0088, "learning_rate": 1.9e-05, "loss": 2.787, "norm_diff": 0.0618, "norm_loss": 0.0, "num_token_doc": 66.6141, "num_token_overlap": 17.9285, "num_token_query": 52.2191, "num_token_union": 73.4721, "num_word_context": 202.304, "num_word_doc": 49.6937, "num_word_query": 39.8111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1871.3748, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3944, "queue_k_norm": 1.4545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2191, "sent_len_1": 66.6141, "sent_len_max_0": 127.995, "sent_len_max_1": 192.1362, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.7691, "doc_norm": 1.454, "encoder_q-embeddings": 1385.7858, "encoder_q-layer.0": 897.0159, "encoder_q-layer.1": 1064.2631, "encoder_q-layer.10": 1190.2679, "encoder_q-layer.11": 2622.6392, "encoder_q-layer.2": 1256.6599, "encoder_q-layer.3": 1418.3367, "encoder_q-layer.4": 1446.1077, "encoder_q-layer.5": 1541.743, "encoder_q-layer.6": 1563.1372, "encoder_q-layer.7": 1550.8251, "encoder_q-layer.8": 1495.7006, "encoder_q-layer.9": 1161.8596, "epoch": 0.64, "inbatch_neg_score": 0.2904, "inbatch_pos_score": 1.0088, "learning_rate": 1.8944444444444447e-05, "loss": 2.7691, "norm_diff": 0.0592, "norm_loss": 0.0, "num_token_doc": 66.779, "num_token_overlap": 18.0112, "num_token_query": 52.3279, "num_token_union": 73.5202, "num_word_context": 202.254, "num_word_doc": 49.8518, "num_word_query": 39.8977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.8727, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.3948, "queue_k_norm": 1.4544, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3279, "sent_len_1": 66.779, "sent_len_max_0": 128.0, "sent_len_max_1": 187.725, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.7703, "doc_norm": 1.4557, "encoder_q-embeddings": 937.5085, "encoder_q-layer.0": 630.0259, "encoder_q-layer.1": 659.5764, "encoder_q-layer.10": 1452.1045, "encoder_q-layer.11": 2981.1216, "encoder_q-layer.2": 753.0179, "encoder_q-layer.3": 760.1039, "encoder_q-layer.4": 822.7474, "encoder_q-layer.5": 849.5505, "encoder_q-layer.6": 998.6681, "encoder_q-layer.7": 1071.6139, "encoder_q-layer.8": 1335.5331, "encoder_q-layer.9": 1338.9535, "epoch": 0.64, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 1.0088, "learning_rate": 1.888888888888889e-05, "loss": 2.7703, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 18.0393, "num_token_query": 52.2984, "num_token_union": 73.4339, "num_word_context": 202.3066, "num_word_doc": 49.8141, "num_word_query": 39.8672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1895.8467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4257, "queue_k_norm": 1.4558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2984, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8013, "stdk": 0.0487, "stdq": 0.047, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7744, "doc_norm": 1.4465, "encoder_q-embeddings": 1601.9712, "encoder_q-layer.0": 1085.8151, "encoder_q-layer.1": 1182.7501, "encoder_q-layer.10": 1398.7404, "encoder_q-layer.11": 2805.3604, "encoder_q-layer.2": 1280.578, "encoder_q-layer.3": 1364.8676, "encoder_q-layer.4": 1461.2034, "encoder_q-layer.5": 1412.4221, "encoder_q-layer.6": 1431.5702, "encoder_q-layer.7": 1343.6725, "encoder_q-layer.8": 1461.453, "encoder_q-layer.9": 1218.0256, "epoch": 0.65, "inbatch_neg_score": 0.2898, "inbatch_pos_score": 1.0117, "learning_rate": 1.8833333333333335e-05, "loss": 2.7744, "norm_diff": 0.0463, "norm_loss": 0.0, "num_token_doc": 66.6224, "num_token_overlap": 17.975, "num_token_query": 52.2024, "num_token_union": 73.3441, "num_word_context": 201.7299, "num_word_doc": 49.6822, "num_word_query": 39.7975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2281.5139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.4002, "queue_k_norm": 1.4538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2024, "sent_len_1": 66.6224, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2537, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7846, "doc_norm": 1.4566, "encoder_q-embeddings": 1427.5238, "encoder_q-layer.0": 962.0321, "encoder_q-layer.1": 995.7765, "encoder_q-layer.10": 1227.8824, "encoder_q-layer.11": 2750.8699, "encoder_q-layer.2": 1014.7772, "encoder_q-layer.3": 1032.4988, "encoder_q-layer.4": 1049.9965, "encoder_q-layer.5": 1017.3154, "encoder_q-layer.6": 1157.592, "encoder_q-layer.7": 1231.5006, "encoder_q-layer.8": 1319.1937, "encoder_q-layer.9": 1187.031, "epoch": 0.65, "inbatch_neg_score": 0.2932, "inbatch_pos_score": 0.9995, "learning_rate": 1.8777777777777777e-05, "loss": 2.7846, "norm_diff": 0.0514, "norm_loss": 0.0, "num_token_doc": 66.7004, "num_token_overlap": 18.031, "num_token_query": 52.2858, "num_token_union": 73.3983, "num_word_context": 202.3127, "num_word_doc": 49.78, "num_word_query": 39.834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2044.494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.292, "query_norm": 1.4052, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2858, "sent_len_1": 66.7004, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.1375, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.7629, "doc_norm": 1.451, "encoder_q-embeddings": 1503.9171, "encoder_q-layer.0": 904.683, "encoder_q-layer.1": 1044.4713, "encoder_q-layer.10": 1218.9711, "encoder_q-layer.11": 2643.1448, "encoder_q-layer.2": 1235.5752, "encoder_q-layer.3": 1217.2542, "encoder_q-layer.4": 1286.7845, "encoder_q-layer.5": 1257.8502, "encoder_q-layer.6": 1352.0767, "encoder_q-layer.7": 1240.1497, "encoder_q-layer.8": 1342.0278, "encoder_q-layer.9": 1181.0587, "epoch": 0.65, "inbatch_neg_score": 0.2941, "inbatch_pos_score": 1.0156, "learning_rate": 1.8722222222222223e-05, "loss": 2.7629, "norm_diff": 0.0513, "norm_loss": 0.0, "num_token_doc": 66.8535, "num_token_overlap": 18.0645, "num_token_query": 52.3761, "num_token_union": 73.5099, "num_word_context": 202.6445, "num_word_doc": 49.9002, "num_word_query": 39.9327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.6401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3996, "queue_k_norm": 1.4548, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3761, "sent_len_1": 66.8535, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7937, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7843, "doc_norm": 1.4562, "encoder_q-embeddings": 1914.0021, "encoder_q-layer.0": 1318.504, "encoder_q-layer.1": 1345.6276, "encoder_q-layer.10": 1272.6707, "encoder_q-layer.11": 2883.2185, "encoder_q-layer.2": 1346.0588, "encoder_q-layer.3": 1276.1854, "encoder_q-layer.4": 1231.9055, "encoder_q-layer.5": 1312.1793, "encoder_q-layer.6": 1429.1787, "encoder_q-layer.7": 1414.749, "encoder_q-layer.8": 1700.1995, "encoder_q-layer.9": 1257.3867, "epoch": 0.65, "inbatch_neg_score": 0.298, "inbatch_pos_score": 1.0137, "learning_rate": 1.866666666666667e-05, "loss": 2.7843, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.8784, "num_token_overlap": 17.9463, "num_token_query": 52.1212, "num_token_union": 73.5136, "num_word_context": 202.4576, "num_word_doc": 49.8986, "num_word_query": 39.7238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2412.6768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.3988, "queue_k_norm": 1.4535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1212, "sent_len_1": 66.8784, "sent_len_max_0": 128.0, "sent_len_max_1": 191.69, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7776, "doc_norm": 1.4558, "encoder_q-embeddings": 1173.083, "encoder_q-layer.0": 786.1652, "encoder_q-layer.1": 820.8047, "encoder_q-layer.10": 1105.5536, "encoder_q-layer.11": 2506.3442, "encoder_q-layer.2": 956.6758, "encoder_q-layer.3": 925.5525, "encoder_q-layer.4": 1005.2155, "encoder_q-layer.5": 1010.3945, "encoder_q-layer.6": 1058.0551, "encoder_q-layer.7": 1148.9625, "encoder_q-layer.8": 1246.4467, "encoder_q-layer.9": 1102.5494, "epoch": 0.65, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 1.0195, "learning_rate": 1.861111111111111e-05, "loss": 2.7776, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.7333, "num_token_overlap": 17.9724, "num_token_query": 52.0618, "num_token_union": 73.376, "num_word_context": 202.2501, "num_word_doc": 49.7761, "num_word_query": 39.6817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1828.0292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.4159, "queue_k_norm": 1.4557, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0618, "sent_len_1": 66.7333, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6525, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.7866, "doc_norm": 1.4522, "encoder_q-embeddings": 1035.6604, "encoder_q-layer.0": 648.0887, "encoder_q-layer.1": 722.3629, "encoder_q-layer.10": 1259.2009, "encoder_q-layer.11": 2828.4534, "encoder_q-layer.2": 818.1191, "encoder_q-layer.3": 844.6724, "encoder_q-layer.4": 908.3381, "encoder_q-layer.5": 909.2163, "encoder_q-layer.6": 1028.4036, "encoder_q-layer.7": 1130.8123, "encoder_q-layer.8": 1361.9766, "encoder_q-layer.9": 1224.6592, "epoch": 0.65, "inbatch_neg_score": 0.303, "inbatch_pos_score": 1.0137, "learning_rate": 1.8555555555555557e-05, "loss": 2.7866, "norm_diff": 0.0316, "norm_loss": 0.0, "num_token_doc": 66.7866, "num_token_overlap": 17.9683, "num_token_query": 52.2444, "num_token_union": 73.4737, "num_word_context": 202.286, "num_word_doc": 49.827, "num_word_query": 39.8233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1891.582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4206, "queue_k_norm": 1.4542, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2444, "sent_len_1": 66.7866, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9575, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7597, "doc_norm": 1.4479, "encoder_q-embeddings": 1173.981, "encoder_q-layer.0": 790.6396, "encoder_q-layer.1": 866.1105, "encoder_q-layer.10": 1252.2214, "encoder_q-layer.11": 2721.1211, "encoder_q-layer.2": 955.3245, "encoder_q-layer.3": 964.5788, "encoder_q-layer.4": 1028.0879, "encoder_q-layer.5": 973.5499, "encoder_q-layer.6": 1069.6023, "encoder_q-layer.7": 1209.8181, "encoder_q-layer.8": 1434.3462, "encoder_q-layer.9": 1165.7644, "epoch": 0.65, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 1.0283, "learning_rate": 1.85e-05, "loss": 2.7597, "norm_diff": 0.021, "norm_loss": 0.0, "num_token_doc": 66.9025, "num_token_overlap": 18.0591, "num_token_query": 52.3203, "num_token_union": 73.5103, "num_word_context": 202.1719, "num_word_doc": 49.9023, "num_word_query": 39.9002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1944.7451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3003, "query_norm": 1.4269, "queue_k_norm": 1.4562, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3203, "sent_len_1": 66.9025, "sent_len_max_0": 128.0, "sent_len_max_1": 189.075, "stdk": 0.0483, "stdq": 0.0468, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7654, "doc_norm": 1.4533, "encoder_q-embeddings": 1116.9176, "encoder_q-layer.0": 745.5552, "encoder_q-layer.1": 782.8291, "encoder_q-layer.10": 1268.0636, "encoder_q-layer.11": 2727.6484, "encoder_q-layer.2": 843.8342, "encoder_q-layer.3": 848.4802, "encoder_q-layer.4": 924.4744, "encoder_q-layer.5": 963.7095, "encoder_q-layer.6": 1123.7766, "encoder_q-layer.7": 1265.771, "encoder_q-layer.8": 1570.9669, "encoder_q-layer.9": 1255.4091, "epoch": 0.65, "inbatch_neg_score": 0.2989, "inbatch_pos_score": 1.0273, "learning_rate": 1.8444444444444445e-05, "loss": 2.7654, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.7881, "num_token_overlap": 18.0818, "num_token_query": 52.4296, "num_token_union": 73.4672, "num_word_context": 202.8276, "num_word_doc": 49.8265, "num_word_query": 39.9693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1951.2335, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2981, "query_norm": 1.3984, "queue_k_norm": 1.4573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4296, "sent_len_1": 66.7881, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.2575, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.7933, "doc_norm": 1.4585, "encoder_q-embeddings": 1093.131, "encoder_q-layer.0": 715.1212, "encoder_q-layer.1": 778.838, "encoder_q-layer.10": 1299.7146, "encoder_q-layer.11": 2754.4751, "encoder_q-layer.2": 852.0481, "encoder_q-layer.3": 894.3032, "encoder_q-layer.4": 953.8177, "encoder_q-layer.5": 936.8149, "encoder_q-layer.6": 1069.0739, "encoder_q-layer.7": 1093.5992, "encoder_q-layer.8": 1266.0753, "encoder_q-layer.9": 1172.3326, "epoch": 0.65, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0039, "learning_rate": 1.838888888888889e-05, "loss": 2.7933, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.6308, "num_token_overlap": 17.9313, "num_token_query": 52.2003, "num_token_union": 73.3735, "num_word_context": 202.4565, "num_word_doc": 49.6722, "num_word_query": 39.7714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1880.9756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.395, "queue_k_norm": 1.454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2003, "sent_len_1": 66.6308, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1375, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.7763, "doc_norm": 1.455, "encoder_q-embeddings": 2293.3318, "encoder_q-layer.0": 1558.0405, "encoder_q-layer.1": 1607.5079, "encoder_q-layer.10": 2553.834, "encoder_q-layer.11": 5700.9854, "encoder_q-layer.2": 1884.4879, "encoder_q-layer.3": 1940.1414, "encoder_q-layer.4": 2122.9824, "encoder_q-layer.5": 2129.1487, "encoder_q-layer.6": 2408.4907, "encoder_q-layer.7": 2483.5215, "encoder_q-layer.8": 2756.2727, "encoder_q-layer.9": 2465.7356, "epoch": 0.65, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 1.0088, "learning_rate": 1.8333333333333333e-05, "loss": 2.7763, "norm_diff": 0.0405, "norm_loss": 0.0, "num_token_doc": 66.8323, "num_token_overlap": 17.9528, "num_token_query": 52.0677, "num_token_union": 73.3881, "num_word_context": 202.1671, "num_word_doc": 49.8987, "num_word_query": 39.6915, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3935.2761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.4145, "queue_k_norm": 1.4548, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0677, "sent_len_1": 66.8323, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6163, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.769, "doc_norm": 1.4547, "encoder_q-embeddings": 4837.2534, "encoder_q-layer.0": 3192.2026, "encoder_q-layer.1": 3363.7681, "encoder_q-layer.10": 2814.7854, "encoder_q-layer.11": 5648.6733, "encoder_q-layer.2": 3868.7676, "encoder_q-layer.3": 4044.9099, "encoder_q-layer.4": 4047.1079, "encoder_q-layer.5": 4075.4521, "encoder_q-layer.6": 3646.74, "encoder_q-layer.7": 3470.0774, "encoder_q-layer.8": 3538.2305, "encoder_q-layer.9": 2701.7742, "epoch": 0.66, "inbatch_neg_score": 0.2971, "inbatch_pos_score": 1.0117, "learning_rate": 1.827777777777778e-05, "loss": 2.769, "norm_diff": 0.0318, "norm_loss": 0.0, "num_token_doc": 66.7786, "num_token_overlap": 18.0085, "num_token_query": 52.3709, "num_token_union": 73.5668, "num_word_context": 202.7124, "num_word_doc": 49.8431, "num_word_query": 39.9431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5846.4309, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2981, "query_norm": 1.4228, "queue_k_norm": 1.4555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3709, "sent_len_1": 66.7786, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.92, "stdk": 0.0487, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7695, "doc_norm": 1.4526, "encoder_q-embeddings": 2340.5693, "encoder_q-layer.0": 1632.8123, "encoder_q-layer.1": 1757.6823, "encoder_q-layer.10": 2702.5786, "encoder_q-layer.11": 5265.2563, "encoder_q-layer.2": 1946.9025, "encoder_q-layer.3": 2016.2987, "encoder_q-layer.4": 2144.9817, "encoder_q-layer.5": 2157.2847, "encoder_q-layer.6": 2137.8225, "encoder_q-layer.7": 2317.4048, "encoder_q-layer.8": 2598.2339, "encoder_q-layer.9": 2308.2834, "epoch": 0.66, "inbatch_neg_score": 0.2981, "inbatch_pos_score": 0.9971, "learning_rate": 1.8222222222222224e-05, "loss": 2.7695, "norm_diff": 0.0502, "norm_loss": 0.0, "num_token_doc": 66.5212, "num_token_overlap": 17.991, "num_token_query": 52.169, "num_token_union": 73.2275, "num_word_context": 202.0421, "num_word_doc": 49.6637, "num_word_query": 39.7969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3782.4674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.4024, "queue_k_norm": 1.4548, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.169, "sent_len_1": 66.5212, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0275, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.7735, "doc_norm": 1.4618, "encoder_q-embeddings": 1392.6147, "encoder_q-layer.0": 919.709, "encoder_q-layer.1": 993.2045, "encoder_q-layer.10": 1304.2151, "encoder_q-layer.11": 2760.2742, "encoder_q-layer.2": 1125.1111, "encoder_q-layer.3": 1109.46, "encoder_q-layer.4": 1133.6697, "encoder_q-layer.5": 1109.1853, "encoder_q-layer.6": 1269.8757, "encoder_q-layer.7": 1280.6398, "encoder_q-layer.8": 1475.7611, "encoder_q-layer.9": 1230.4138, "epoch": 0.66, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0459, "learning_rate": 1.8166666666666667e-05, "loss": 2.7735, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.7205, "num_token_overlap": 17.9655, "num_token_query": 52.2067, "num_token_union": 73.4015, "num_word_context": 202.1653, "num_word_doc": 49.8181, "num_word_query": 39.7837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2103.2999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.424, "queue_k_norm": 1.4552, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2067, "sent_len_1": 66.7205, "sent_len_max_0": 127.99, "sent_len_max_1": 189.1362, "stdk": 0.0489, "stdq": 0.0468, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7796, "doc_norm": 1.454, "encoder_q-embeddings": 1242.7975, "encoder_q-layer.0": 791.412, "encoder_q-layer.1": 891.7367, "encoder_q-layer.10": 1186.8107, "encoder_q-layer.11": 2656.072, "encoder_q-layer.2": 1009.2551, "encoder_q-layer.3": 1074.3264, "encoder_q-layer.4": 1142.5354, "encoder_q-layer.5": 1079.512, "encoder_q-layer.6": 1143.2111, "encoder_q-layer.7": 1186.5881, "encoder_q-layer.8": 1279.8773, "encoder_q-layer.9": 1147.3411, "epoch": 0.66, "inbatch_neg_score": 0.299, "inbatch_pos_score": 1.0166, "learning_rate": 1.8111111111111112e-05, "loss": 2.7796, "norm_diff": 0.0498, "norm_loss": 0.0, "num_token_doc": 66.6307, "num_token_overlap": 17.9586, "num_token_query": 52.225, "num_token_union": 73.4197, "num_word_context": 202.3459, "num_word_doc": 49.7015, "num_word_query": 39.833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.0038, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.4041, "queue_k_norm": 1.4558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.225, "sent_len_1": 66.6307, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.61, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.757, "doc_norm": 1.4569, "encoder_q-embeddings": 2992.1218, "encoder_q-layer.0": 2089.6741, "encoder_q-layer.1": 2739.0552, "encoder_q-layer.10": 1270.7449, "encoder_q-layer.11": 2812.6919, "encoder_q-layer.2": 3489.4299, "encoder_q-layer.3": 3496.175, "encoder_q-layer.4": 3261.9368, "encoder_q-layer.5": 2734.2935, "encoder_q-layer.6": 2567.6721, "encoder_q-layer.7": 2358.0918, "encoder_q-layer.8": 2142.491, "encoder_q-layer.9": 1271.5061, "epoch": 0.66, "inbatch_neg_score": 0.295, "inbatch_pos_score": 1.0166, "learning_rate": 1.8055555555555555e-05, "loss": 2.757, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.9142, "num_token_overlap": 18.0548, "num_token_query": 52.4921, "num_token_union": 73.6542, "num_word_context": 202.7009, "num_word_doc": 49.9468, "num_word_query": 40.0178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3966.5858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.4108, "queue_k_norm": 1.4557, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4921, "sent_len_1": 66.9142, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0387, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7804, "doc_norm": 1.4553, "encoder_q-embeddings": 1381.8882, "encoder_q-layer.0": 880.8937, "encoder_q-layer.1": 979.9785, "encoder_q-layer.10": 1258.8282, "encoder_q-layer.11": 2792.0869, "encoder_q-layer.2": 1170.2136, "encoder_q-layer.3": 1179.9745, "encoder_q-layer.4": 1138.9338, "encoder_q-layer.5": 1122.4354, "encoder_q-layer.6": 1170.4623, "encoder_q-layer.7": 1262.2838, "encoder_q-layer.8": 1404.3214, "encoder_q-layer.9": 1217.9117, "epoch": 0.66, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 1.0068, "learning_rate": 1.8e-05, "loss": 2.7804, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.5097, "num_token_overlap": 17.9155, "num_token_query": 52.2333, "num_token_union": 73.3722, "num_word_context": 202.4463, "num_word_doc": 49.647, "num_word_query": 39.8404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.6791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.3894, "queue_k_norm": 1.4532, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2333, "sent_len_1": 66.5097, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9062, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7561, "doc_norm": 1.446, "encoder_q-embeddings": 964.0497, "encoder_q-layer.0": 610.1404, "encoder_q-layer.1": 660.5527, "encoder_q-layer.10": 1405.6542, "encoder_q-layer.11": 2858.1589, "encoder_q-layer.2": 745.8024, "encoder_q-layer.3": 755.4278, "encoder_q-layer.4": 800.6057, "encoder_q-layer.5": 835.8075, "encoder_q-layer.6": 1000.6499, "encoder_q-layer.7": 1071.1432, "encoder_q-layer.8": 1294.3986, "encoder_q-layer.9": 1315.3451, "epoch": 0.66, "inbatch_neg_score": 0.2995, "inbatch_pos_score": 1.001, "learning_rate": 1.7944444444444443e-05, "loss": 2.7561, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 66.7615, "num_token_overlap": 18.0495, "num_token_query": 52.3694, "num_token_union": 73.534, "num_word_context": 202.217, "num_word_doc": 49.8398, "num_word_query": 39.952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1822.9359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.3839, "queue_k_norm": 1.4548, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3694, "sent_len_1": 66.7615, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.7812, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.7495, "doc_norm": 1.4522, "encoder_q-embeddings": 1150.4348, "encoder_q-layer.0": 733.5315, "encoder_q-layer.1": 798.7374, "encoder_q-layer.10": 1448.0641, "encoder_q-layer.11": 2829.9106, "encoder_q-layer.2": 893.8357, "encoder_q-layer.3": 950.7589, "encoder_q-layer.4": 1000.9714, "encoder_q-layer.5": 1030.1497, "encoder_q-layer.6": 1136.0242, "encoder_q-layer.7": 1277.5626, "encoder_q-layer.8": 1526.5359, "encoder_q-layer.9": 1314.0468, "epoch": 0.66, "inbatch_neg_score": 0.2956, "inbatch_pos_score": 1.0156, "learning_rate": 1.788888888888889e-05, "loss": 2.7495, "norm_diff": 0.0433, "norm_loss": 0.0, "num_token_doc": 66.9432, "num_token_overlap": 18.1144, "num_token_query": 52.2123, "num_token_union": 73.4773, "num_word_context": 202.2538, "num_word_doc": 49.959, "num_word_query": 39.7884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2006.2902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.4089, "queue_k_norm": 1.4565, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2123, "sent_len_1": 66.9432, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7337, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.7779, "doc_norm": 1.456, "encoder_q-embeddings": 1257.2653, "encoder_q-layer.0": 862.6942, "encoder_q-layer.1": 943.318, "encoder_q-layer.10": 1224.8309, "encoder_q-layer.11": 2761.3176, "encoder_q-layer.2": 1044.2936, "encoder_q-layer.3": 1026.5088, "encoder_q-layer.4": 1066.34, "encoder_q-layer.5": 1187.6276, "encoder_q-layer.6": 1240.0726, "encoder_q-layer.7": 1192.1046, "encoder_q-layer.8": 1352.9181, "encoder_q-layer.9": 1171.2148, "epoch": 0.66, "inbatch_neg_score": 0.2944, "inbatch_pos_score": 1.0117, "learning_rate": 1.7833333333333334e-05, "loss": 2.7779, "norm_diff": 0.0531, "norm_loss": 0.0, "num_token_doc": 66.5822, "num_token_overlap": 17.9478, "num_token_query": 52.0102, "num_token_union": 73.2896, "num_word_context": 201.6835, "num_word_doc": 49.7024, "num_word_query": 39.6411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2010.1034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.403, "queue_k_norm": 1.4547, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0102, "sent_len_1": 66.5822, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9787, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.7573, "doc_norm": 1.4596, "encoder_q-embeddings": 1126.3766, "encoder_q-layer.0": 722.3678, "encoder_q-layer.1": 814.3462, "encoder_q-layer.10": 1386.5554, "encoder_q-layer.11": 2649.6558, "encoder_q-layer.2": 901.9274, "encoder_q-layer.3": 884.7326, "encoder_q-layer.4": 899.7871, "encoder_q-layer.5": 915.5313, "encoder_q-layer.6": 1034.7252, "encoder_q-layer.7": 1184.7964, "encoder_q-layer.8": 1401.1243, "encoder_q-layer.9": 1300.6079, "epoch": 0.66, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 1.0352, "learning_rate": 1.777777777777778e-05, "loss": 2.7573, "norm_diff": 0.0224, "norm_loss": 0.0, "num_token_doc": 66.8672, "num_token_overlap": 18.031, "num_token_query": 52.2882, "num_token_union": 73.4797, "num_word_context": 202.1771, "num_word_doc": 49.8879, "num_word_query": 39.9009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1869.9715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.291, "query_norm": 1.4372, "queue_k_norm": 1.4554, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2882, "sent_len_1": 66.8672, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4212, "stdk": 0.0488, "stdq": 0.0474, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7688, "doc_norm": 1.4564, "encoder_q-embeddings": 954.7453, "encoder_q-layer.0": 595.5602, "encoder_q-layer.1": 629.2014, "encoder_q-layer.10": 1213.1285, "encoder_q-layer.11": 2644.9106, "encoder_q-layer.2": 720.3141, "encoder_q-layer.3": 736.0012, "encoder_q-layer.4": 812.7568, "encoder_q-layer.5": 807.2302, "encoder_q-layer.6": 876.3967, "encoder_q-layer.7": 935.561, "encoder_q-layer.8": 1183.2777, "encoder_q-layer.9": 1105.1315, "epoch": 0.66, "inbatch_neg_score": 0.2827, "inbatch_pos_score": 1.0098, "learning_rate": 1.7722222222222222e-05, "loss": 2.7688, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.6401, "num_token_overlap": 18.0112, "num_token_query": 52.1886, "num_token_union": 73.2783, "num_word_context": 202.2332, "num_word_doc": 49.6963, "num_word_query": 39.7381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1698.6135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2837, "query_norm": 1.4052, "queue_k_norm": 1.4542, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1886, "sent_len_1": 66.6401, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.7337, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.765, "doc_norm": 1.4561, "encoder_q-embeddings": 2475.6829, "encoder_q-layer.0": 1698.928, "encoder_q-layer.1": 2010.473, "encoder_q-layer.10": 1177.1433, "encoder_q-layer.11": 2672.1899, "encoder_q-layer.2": 2185.8235, "encoder_q-layer.3": 2570.228, "encoder_q-layer.4": 2444.7319, "encoder_q-layer.5": 2516.2041, "encoder_q-layer.6": 2820.6282, "encoder_q-layer.7": 2283.8525, "encoder_q-layer.8": 1790.1534, "encoder_q-layer.9": 1183.0887, "epoch": 0.67, "inbatch_neg_score": 0.2883, "inbatch_pos_score": 1.0381, "learning_rate": 1.7666666666666668e-05, "loss": 2.765, "norm_diff": 0.029, "norm_loss": 0.0, "num_token_doc": 66.8069, "num_token_overlap": 18.013, "num_token_query": 52.177, "num_token_union": 73.4394, "num_word_context": 202.0196, "num_word_doc": 49.869, "num_word_query": 39.7713, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3369.5617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.4271, "queue_k_norm": 1.4519, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.177, "sent_len_1": 66.8069, "sent_len_max_0": 127.995, "sent_len_max_1": 187.655, "stdk": 0.0488, "stdq": 0.047, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7646, "doc_norm": 1.455, "encoder_q-embeddings": 1056.9269, "encoder_q-layer.0": 656.9004, "encoder_q-layer.1": 705.0876, "encoder_q-layer.10": 1247.5968, "encoder_q-layer.11": 2583.9624, "encoder_q-layer.2": 785.3265, "encoder_q-layer.3": 821.2953, "encoder_q-layer.4": 883.5806, "encoder_q-layer.5": 922.2829, "encoder_q-layer.6": 1088.5115, "encoder_q-layer.7": 1256.26, "encoder_q-layer.8": 1339.0483, "encoder_q-layer.9": 1165.2314, "epoch": 0.67, "inbatch_neg_score": 0.2833, "inbatch_pos_score": 1.001, "learning_rate": 1.761111111111111e-05, "loss": 2.7646, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.8499, "num_token_overlap": 18.0286, "num_token_query": 52.3057, "num_token_union": 73.536, "num_word_context": 202.2326, "num_word_doc": 49.8937, "num_word_query": 39.8927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1826.8219, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.3993, "queue_k_norm": 1.4554, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3057, "sent_len_1": 66.8499, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0588, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.7507, "doc_norm": 1.4554, "encoder_q-embeddings": 1532.6615, "encoder_q-layer.0": 996.8864, "encoder_q-layer.1": 1082.5172, "encoder_q-layer.10": 1213.2922, "encoder_q-layer.11": 2737.6868, "encoder_q-layer.2": 1286.6909, "encoder_q-layer.3": 1379.0916, "encoder_q-layer.4": 1511.5159, "encoder_q-layer.5": 1601.5151, "encoder_q-layer.6": 1676.1913, "encoder_q-layer.7": 1627.765, "encoder_q-layer.8": 1584.9526, "encoder_q-layer.9": 1151.9515, "epoch": 0.67, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 0.9863, "learning_rate": 1.7555555555555556e-05, "loss": 2.7507, "norm_diff": 0.0665, "norm_loss": 0.0, "num_token_doc": 67.1077, "num_token_overlap": 18.09, "num_token_query": 52.3896, "num_token_union": 73.6544, "num_word_context": 202.6982, "num_word_doc": 50.0809, "num_word_query": 39.9273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2336.124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2781, "query_norm": 1.3889, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3896, "sent_len_1": 67.1077, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9625, "stdk": 0.0487, "stdq": 0.0457, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7488, "doc_norm": 1.4617, "encoder_q-embeddings": 1060.7369, "encoder_q-layer.0": 694.9835, "encoder_q-layer.1": 738.334, "encoder_q-layer.10": 1357.6844, "encoder_q-layer.11": 3054.6926, "encoder_q-layer.2": 865.1098, "encoder_q-layer.3": 901.0588, "encoder_q-layer.4": 902.4542, "encoder_q-layer.5": 922.4153, "encoder_q-layer.6": 1083.6053, "encoder_q-layer.7": 1168.4918, "encoder_q-layer.8": 1428.5017, "encoder_q-layer.9": 1336.3077, "epoch": 0.67, "inbatch_neg_score": 0.2818, "inbatch_pos_score": 1.0117, "learning_rate": 1.75e-05, "loss": 2.7488, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.974, "num_token_overlap": 18.0835, "num_token_query": 52.254, "num_token_union": 73.5692, "num_word_context": 202.3354, "num_word_doc": 49.975, "num_word_query": 39.8198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1983.5984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2825, "query_norm": 1.4021, "queue_k_norm": 1.4533, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.254, "sent_len_1": 66.974, "sent_len_max_0": 127.9838, "sent_len_max_1": 188.9525, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 2.7358, "doc_norm": 1.4539, "encoder_q-embeddings": 1690.3566, "encoder_q-layer.0": 1210.9667, "encoder_q-layer.1": 1261.6206, "encoder_q-layer.10": 1405.873, "encoder_q-layer.11": 2861.6572, "encoder_q-layer.2": 1497.2384, "encoder_q-layer.3": 1391.6941, "encoder_q-layer.4": 1357.381, "encoder_q-layer.5": 1349.3318, "encoder_q-layer.6": 1343.0813, "encoder_q-layer.7": 1286.3396, "encoder_q-layer.8": 1524.2277, "encoder_q-layer.9": 1353.6162, "epoch": 0.67, "inbatch_neg_score": 0.2832, "inbatch_pos_score": 0.9985, "learning_rate": 1.7444444444444448e-05, "loss": 2.7358, "norm_diff": 0.0369, "norm_loss": 0.0, "num_token_doc": 66.8749, "num_token_overlap": 18.0977, "num_token_query": 52.3688, "num_token_union": 73.5592, "num_word_context": 202.6175, "num_word_doc": 49.9136, "num_word_query": 39.9218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2354.5256, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.417, "queue_k_norm": 1.4538, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3688, "sent_len_1": 66.8749, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3837, "stdk": 0.0486, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.7596, "doc_norm": 1.4552, "encoder_q-embeddings": 981.0418, "encoder_q-layer.0": 644.069, "encoder_q-layer.1": 685.3669, "encoder_q-layer.10": 1277.5424, "encoder_q-layer.11": 2767.697, "encoder_q-layer.2": 818.6052, "encoder_q-layer.3": 838.2947, "encoder_q-layer.4": 888.8694, "encoder_q-layer.5": 910.0251, "encoder_q-layer.6": 1001.7188, "encoder_q-layer.7": 1058.2233, "encoder_q-layer.8": 1280.1483, "encoder_q-layer.9": 1243.0043, "epoch": 0.67, "inbatch_neg_score": 0.2813, "inbatch_pos_score": 1.0039, "learning_rate": 1.738888888888889e-05, "loss": 2.7596, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.6365, "num_token_overlap": 17.9696, "num_token_query": 52.1565, "num_token_union": 73.3283, "num_word_context": 201.8886, "num_word_doc": 49.7088, "num_word_query": 39.7678, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1825.2537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.4017, "queue_k_norm": 1.4541, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1565, "sent_len_1": 66.6365, "sent_len_max_0": 128.0, "sent_len_max_1": 188.53, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.741, "doc_norm": 1.4517, "encoder_q-embeddings": 2293.2883, "encoder_q-layer.0": 1604.449, "encoder_q-layer.1": 1896.4243, "encoder_q-layer.10": 1311.0702, "encoder_q-layer.11": 2763.4109, "encoder_q-layer.2": 1882.5737, "encoder_q-layer.3": 1923.7599, "encoder_q-layer.4": 1990.4371, "encoder_q-layer.5": 1852.0487, "encoder_q-layer.6": 1925.39, "encoder_q-layer.7": 1738.2837, "encoder_q-layer.8": 1646.4327, "encoder_q-layer.9": 1240.8081, "epoch": 0.67, "inbatch_neg_score": 0.2795, "inbatch_pos_score": 1.0059, "learning_rate": 1.7333333333333336e-05, "loss": 2.741, "norm_diff": 0.0379, "norm_loss": 0.0, "num_token_doc": 66.7535, "num_token_overlap": 18.0088, "num_token_query": 52.2601, "num_token_union": 73.4275, "num_word_context": 202.1163, "num_word_doc": 49.7877, "num_word_query": 39.8367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2840.2479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.281, "query_norm": 1.4138, "queue_k_norm": 1.4522, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2601, "sent_len_1": 66.7535, "sent_len_max_0": 127.9862, "sent_len_max_1": 192.8038, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 63.4766, "active_queue_size": 16384.0, "cl_loss": 2.7616, "doc_norm": 1.4564, "encoder_q-embeddings": 1111.4624, "encoder_q-layer.0": 694.2844, "encoder_q-layer.1": 772.6993, "encoder_q-layer.10": 1301.0532, "encoder_q-layer.11": 2540.634, "encoder_q-layer.2": 871.789, "encoder_q-layer.3": 914.2667, "encoder_q-layer.4": 917.7905, "encoder_q-layer.5": 960.2596, "encoder_q-layer.6": 1038.8663, "encoder_q-layer.7": 1097.0815, "encoder_q-layer.8": 1227.8242, "encoder_q-layer.9": 1173.9266, "epoch": 0.67, "inbatch_neg_score": 0.2858, "inbatch_pos_score": 1.0605, "learning_rate": 1.7277777777777778e-05, "loss": 2.7616, "norm_diff": 0.0249, "norm_loss": 0.0, "num_token_doc": 66.618, "num_token_overlap": 17.9559, "num_token_query": 52.2021, "num_token_union": 73.3871, "num_word_context": 202.2741, "num_word_doc": 49.6797, "num_word_query": 39.782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1807.43, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.4316, "queue_k_norm": 1.4513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2021, "sent_len_1": 66.618, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5925, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.7586, "doc_norm": 1.4501, "encoder_q-embeddings": 991.3627, "encoder_q-layer.0": 638.512, "encoder_q-layer.1": 682.002, "encoder_q-layer.10": 1301.269, "encoder_q-layer.11": 2762.3489, "encoder_q-layer.2": 768.0671, "encoder_q-layer.3": 824.0035, "encoder_q-layer.4": 898.2994, "encoder_q-layer.5": 958.4055, "encoder_q-layer.6": 1098.439, "encoder_q-layer.7": 1235.1599, "encoder_q-layer.8": 1283.4681, "encoder_q-layer.9": 1168.803, "epoch": 0.67, "inbatch_neg_score": 0.2796, "inbatch_pos_score": 0.9946, "learning_rate": 1.7222222222222224e-05, "loss": 2.7586, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.9071, "num_token_overlap": 18.0333, "num_token_query": 52.2812, "num_token_union": 73.5501, "num_word_context": 202.597, "num_word_doc": 49.9265, "num_word_query": 39.8489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1847.9824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2803, "query_norm": 1.4049, "queue_k_norm": 1.4526, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2812, "sent_len_1": 66.9071, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.225, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7377, "doc_norm": 1.4596, "encoder_q-embeddings": 1049.5519, "encoder_q-layer.0": 678.2117, "encoder_q-layer.1": 730.8, "encoder_q-layer.10": 1353.8169, "encoder_q-layer.11": 2790.8596, "encoder_q-layer.2": 807.5619, "encoder_q-layer.3": 826.317, "encoder_q-layer.4": 850.0207, "encoder_q-layer.5": 927.577, "encoder_q-layer.6": 1074.9069, "encoder_q-layer.7": 1111.3275, "encoder_q-layer.8": 1378.1931, "encoder_q-layer.9": 1248.5219, "epoch": 0.67, "inbatch_neg_score": 0.2831, "inbatch_pos_score": 1.0049, "learning_rate": 1.7166666666666666e-05, "loss": 2.7377, "norm_diff": 0.0396, "norm_loss": 0.0, "num_token_doc": 67.0904, "num_token_overlap": 18.1366, "num_token_query": 52.3547, "num_token_union": 73.5918, "num_word_context": 202.5437, "num_word_doc": 50.0409, "num_word_query": 39.9075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1854.8378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.42, "queue_k_norm": 1.453, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3547, "sent_len_1": 67.0904, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.0, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7407, "doc_norm": 1.4537, "encoder_q-embeddings": 1809.47, "encoder_q-layer.0": 1264.8506, "encoder_q-layer.1": 1390.5424, "encoder_q-layer.10": 1296.2552, "encoder_q-layer.11": 2789.5039, "encoder_q-layer.2": 1743.1747, "encoder_q-layer.3": 1910.2554, "encoder_q-layer.4": 1905.3527, "encoder_q-layer.5": 1761.86, "encoder_q-layer.6": 1817.5941, "encoder_q-layer.7": 1734.9285, "encoder_q-layer.8": 1589.4297, "encoder_q-layer.9": 1202.7568, "epoch": 0.68, "inbatch_neg_score": 0.2904, "inbatch_pos_score": 1.002, "learning_rate": 1.7111111111111112e-05, "loss": 2.7407, "norm_diff": 0.0402, "norm_loss": 0.0, "num_token_doc": 66.7977, "num_token_overlap": 18.0201, "num_token_query": 52.1773, "num_token_union": 73.4147, "num_word_context": 202.2059, "num_word_doc": 49.8646, "num_word_query": 39.7597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2616.407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.4135, "queue_k_norm": 1.4521, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1773, "sent_len_1": 66.7977, "sent_len_max_0": 128.0, "sent_len_max_1": 188.195, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.7658, "doc_norm": 1.4487, "encoder_q-embeddings": 2379.5625, "encoder_q-layer.0": 1538.2267, "encoder_q-layer.1": 1719.9447, "encoder_q-layer.10": 2554.042, "encoder_q-layer.11": 5310.1758, "encoder_q-layer.2": 1958.2333, "encoder_q-layer.3": 2050.3186, "encoder_q-layer.4": 2179.8027, "encoder_q-layer.5": 2296.2842, "encoder_q-layer.6": 2512.0698, "encoder_q-layer.7": 2398.4641, "encoder_q-layer.8": 2604.1792, "encoder_q-layer.9": 2338.8975, "epoch": 0.68, "inbatch_neg_score": 0.2946, "inbatch_pos_score": 1.04, "learning_rate": 1.7055555555555554e-05, "loss": 2.7658, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.7691, "num_token_overlap": 18.0381, "num_token_query": 52.372, "num_token_union": 73.5032, "num_word_context": 202.5596, "num_word_doc": 49.8329, "num_word_query": 39.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3915.2631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.4391, "queue_k_norm": 1.4527, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.372, "sent_len_1": 66.7691, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.7125, "stdk": 0.0485, "stdq": 0.047, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7531, "doc_norm": 1.4606, "encoder_q-embeddings": 2135.2756, "encoder_q-layer.0": 1395.0416, "encoder_q-layer.1": 1495.0428, "encoder_q-layer.10": 2458.771, "encoder_q-layer.11": 5520.4834, "encoder_q-layer.2": 1742.7968, "encoder_q-layer.3": 1827.4424, "encoder_q-layer.4": 2032.8054, "encoder_q-layer.5": 1973.1979, "encoder_q-layer.6": 2283.5823, "encoder_q-layer.7": 2496.9429, "encoder_q-layer.8": 2678.2185, "encoder_q-layer.9": 2429.2156, "epoch": 0.68, "inbatch_neg_score": 0.3025, "inbatch_pos_score": 1.0254, "learning_rate": 1.7000000000000003e-05, "loss": 2.7531, "norm_diff": 0.0385, "norm_loss": 0.0, "num_token_doc": 66.8604, "num_token_overlap": 18.0191, "num_token_query": 52.2326, "num_token_union": 73.4815, "num_word_context": 202.3341, "num_word_doc": 49.9218, "num_word_query": 39.8351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3827.0639, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3022, "query_norm": 1.4222, "queue_k_norm": 1.4531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2326, "sent_len_1": 66.8604, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9325, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.742, "doc_norm": 1.4506, "encoder_q-embeddings": 2645.1023, "encoder_q-layer.0": 1861.8772, "encoder_q-layer.1": 1955.5052, "encoder_q-layer.10": 2552.9827, "encoder_q-layer.11": 5389.4907, "encoder_q-layer.2": 2223.8584, "encoder_q-layer.3": 2115.5156, "encoder_q-layer.4": 2109.325, "encoder_q-layer.5": 2013.108, "encoder_q-layer.6": 2078.7358, "encoder_q-layer.7": 2116.1765, "encoder_q-layer.8": 2409.3184, "encoder_q-layer.9": 2272.5452, "epoch": 0.68, "inbatch_neg_score": 0.3055, "inbatch_pos_score": 1.0391, "learning_rate": 1.6944444444444446e-05, "loss": 2.742, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 67.1737, "num_token_overlap": 18.0978, "num_token_query": 52.281, "num_token_union": 73.668, "num_word_context": 202.7344, "num_word_doc": 50.1438, "num_word_query": 39.8405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3942.9384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.4359, "queue_k_norm": 1.4537, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.281, "sent_len_1": 67.1737, "sent_len_max_0": 128.0, "sent_len_max_1": 189.315, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7352, "doc_norm": 1.4525, "encoder_q-embeddings": 3616.908, "encoder_q-layer.0": 2576.8372, "encoder_q-layer.1": 2630.7229, "encoder_q-layer.10": 2751.5251, "encoder_q-layer.11": 5536.2227, "encoder_q-layer.2": 3032.5273, "encoder_q-layer.3": 3500.6248, "encoder_q-layer.4": 3533.8413, "encoder_q-layer.5": 3232.1458, "encoder_q-layer.6": 3900.0244, "encoder_q-layer.7": 3292.1746, "encoder_q-layer.8": 3422.5144, "encoder_q-layer.9": 2684.1689, "epoch": 0.68, "inbatch_neg_score": 0.3023, "inbatch_pos_score": 1.0371, "learning_rate": 1.688888888888889e-05, "loss": 2.7352, "norm_diff": 0.0249, "norm_loss": 0.0, "num_token_doc": 66.7111, "num_token_overlap": 18.0899, "num_token_query": 52.3894, "num_token_union": 73.4584, "num_word_context": 202.026, "num_word_doc": 49.743, "num_word_query": 39.9061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5190.8415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3022, "query_norm": 1.4276, "queue_k_norm": 1.4562, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3894, "sent_len_1": 66.7111, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.8288, "stdk": 0.0486, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7522, "doc_norm": 1.4582, "encoder_q-embeddings": 2435.5029, "encoder_q-layer.0": 1609.7094, "encoder_q-layer.1": 1783.6857, "encoder_q-layer.10": 2673.2913, "encoder_q-layer.11": 5805.9028, "encoder_q-layer.2": 1973.7252, "encoder_q-layer.3": 2052.1785, "encoder_q-layer.4": 2165.5137, "encoder_q-layer.5": 2377.4187, "encoder_q-layer.6": 2580.2917, "encoder_q-layer.7": 2664.9194, "encoder_q-layer.8": 2978.5127, "encoder_q-layer.9": 2472.7507, "epoch": 0.68, "inbatch_neg_score": 0.3057, "inbatch_pos_score": 1.0361, "learning_rate": 1.6833333333333334e-05, "loss": 2.7522, "norm_diff": 0.0287, "norm_loss": 0.0, "num_token_doc": 66.8032, "num_token_overlap": 18.0026, "num_token_query": 52.1755, "num_token_union": 73.4333, "num_word_context": 202.3118, "num_word_doc": 49.896, "num_word_query": 39.7945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4179.5785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3059, "query_norm": 1.4295, "queue_k_norm": 1.4551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1755, "sent_len_1": 66.8032, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.5637, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.7531, "doc_norm": 1.4639, "encoder_q-embeddings": 3067.9546, "encoder_q-layer.0": 2029.442, "encoder_q-layer.1": 2094.502, "encoder_q-layer.10": 2532.5815, "encoder_q-layer.11": 5424.4414, "encoder_q-layer.2": 2374.2974, "encoder_q-layer.3": 2451.2021, "encoder_q-layer.4": 2323.2036, "encoder_q-layer.5": 2077.0063, "encoder_q-layer.6": 2227.4553, "encoder_q-layer.7": 2284.5708, "encoder_q-layer.8": 2627.1602, "encoder_q-layer.9": 2321.0513, "epoch": 0.68, "inbatch_neg_score": 0.3081, "inbatch_pos_score": 1.0498, "learning_rate": 1.677777777777778e-05, "loss": 2.7531, "norm_diff": 0.0369, "norm_loss": 0.0, "num_token_doc": 66.8264, "num_token_overlap": 17.968, "num_token_query": 52.0987, "num_token_union": 73.4081, "num_word_context": 202.359, "num_word_doc": 49.8995, "num_word_query": 39.7177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4202.6955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3096, "query_norm": 1.4269, "queue_k_norm": 1.4552, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0987, "sent_len_1": 66.8264, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0662, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7475, "doc_norm": 1.459, "encoder_q-embeddings": 2559.0688, "encoder_q-layer.0": 1695.8652, "encoder_q-layer.1": 1793.3051, "encoder_q-layer.10": 2382.394, "encoder_q-layer.11": 5208.2266, "encoder_q-layer.2": 1945.2242, "encoder_q-layer.3": 2023.0289, "encoder_q-layer.4": 2126.3804, "encoder_q-layer.5": 2077.7661, "encoder_q-layer.6": 2262.0835, "encoder_q-layer.7": 2568.7146, "encoder_q-layer.8": 2661.9199, "encoder_q-layer.9": 2310.5103, "epoch": 0.68, "inbatch_neg_score": 0.2987, "inbatch_pos_score": 1.0352, "learning_rate": 1.6722222222222222e-05, "loss": 2.7475, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.9002, "num_token_overlap": 18.0526, "num_token_query": 52.3657, "num_token_union": 73.5558, "num_word_context": 202.4604, "num_word_doc": 49.9305, "num_word_query": 39.9532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3948.3302, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.4055, "queue_k_norm": 1.4556, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3657, "sent_len_1": 66.9002, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9375, "stdk": 0.0489, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7594, "doc_norm": 1.452, "encoder_q-embeddings": 2193.9709, "encoder_q-layer.0": 1399.6952, "encoder_q-layer.1": 1574.2358, "encoder_q-layer.10": 2534.2722, "encoder_q-layer.11": 5633.3735, "encoder_q-layer.2": 1795.2156, "encoder_q-layer.3": 1803.8257, "encoder_q-layer.4": 1898.5519, "encoder_q-layer.5": 2025.4808, "encoder_q-layer.6": 2201.7012, "encoder_q-layer.7": 2431.5513, "encoder_q-layer.8": 2597.686, "encoder_q-layer.9": 2285.0359, "epoch": 0.68, "inbatch_neg_score": 0.2976, "inbatch_pos_score": 1.0293, "learning_rate": 1.6666666666666667e-05, "loss": 2.7594, "norm_diff": 0.0422, "norm_loss": 0.0, "num_token_doc": 66.6068, "num_token_overlap": 17.9551, "num_token_query": 52.1398, "num_token_union": 73.3506, "num_word_context": 202.0646, "num_word_doc": 49.693, "num_word_query": 39.7448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3799.32, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2959, "query_norm": 1.4098, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1398, "sent_len_1": 66.6068, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.11, "stdk": 0.0486, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 28.3316, "dev_samples_per_second": 2.259, "dev_steps_per_second": 0.035, "epoch": 0.68, "step": 70000, "test_accuracy": 93.5546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37200358510017395, "test_doc_norm": 1.4267178773880005, "test_inbatch_neg_score": 0.647799015045166, "test_inbatch_pos_score": 1.584402322769165, "test_loss": 0.37200358510017395, "test_loss_align": 1.0171136856079102, "test_loss_unif": 3.8173561096191406, "test_loss_unif_q@queue": 3.8173558712005615, "test_norm_diff": 0.029279479756951332, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2894432544708252, "test_query_norm": 1.455997347831726, "test_queue_k_norm": 1.455552101135254, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04236723482608795, "test_stdq": 0.042725399136543274, "test_stdqueue_k": 0.04880161210894585, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.3316, "dev_samples_per_second": 2.259, "dev_steps_per_second": 0.035, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.3866, "eval_beir-arguana_recall@10": 0.65789, "eval_beir-arguana_recall@100": 0.92674, "eval_beir-arguana_recall@20": 0.78094, "eval_beir-avg_ndcg@10": 0.3735948333333334, "eval_beir-avg_recall@10": 0.44479766666666676, "eval_beir-avg_recall@100": 0.6258459166666668, "eval_beir-avg_recall@20": 0.5056748333333333, "eval_beir-cqadupstack_ndcg@10": 0.2643683333333333, "eval_beir-cqadupstack_recall@10": 0.35763666666666666, "eval_beir-cqadupstack_recall@100": 0.5880691666666666, "eval_beir-cqadupstack_recall@20": 0.4254083333333334, "eval_beir-fiqa_ndcg@10": 0.2477, "eval_beir-fiqa_recall@10": 0.31043, "eval_beir-fiqa_recall@100": 0.58458, "eval_beir-fiqa_recall@20": 0.39731, "eval_beir-nfcorpus_ndcg@10": 0.28925, "eval_beir-nfcorpus_recall@10": 0.14067, "eval_beir-nfcorpus_recall@100": 0.26195, "eval_beir-nfcorpus_recall@20": 0.17105, "eval_beir-nq_ndcg@10": 0.26603, "eval_beir-nq_recall@10": 0.43634, "eval_beir-nq_recall@100": 0.78889, "eval_beir-nq_recall@20": 0.55796, "eval_beir-quora_ndcg@10": 0.78694, "eval_beir-quora_recall@10": 0.89234, "eval_beir-quora_recall@100": 0.97755, "eval_beir-quora_recall@20": 0.931, "eval_beir-scidocs_ndcg@10": 0.15093, "eval_beir-scidocs_recall@10": 0.15603, "eval_beir-scidocs_recall@100": 0.3612, "eval_beir-scidocs_recall@20": 0.21018, "eval_beir-scifact_ndcg@10": 0.6301, "eval_beir-scifact_recall@10": 0.78233, "eval_beir-scifact_recall@100": 0.92156, "eval_beir-scifact_recall@20": 0.83356, "eval_beir-trec-covid_ndcg@10": 0.54241, "eval_beir-trec-covid_recall@10": 0.582, "eval_beir-trec-covid_recall@100": 0.4202, "eval_beir-trec-covid_recall@20": 0.556, "eval_beir-webis-touche2020_ndcg@10": 0.17162, "eval_beir-webis-touche2020_recall@10": 0.13231, "eval_beir-webis-touche2020_recall@100": 0.42772, "eval_beir-webis-touche2020_recall@20": 0.19334, "eval_senteval-avg_sts": 0.7381008044225916, "eval_senteval-sickr_spearman": 0.7084336259461095, "eval_senteval-stsb_spearman": 0.7677679828990738, "step": 70000, "test_accuracy": 93.5546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37200358510017395, "test_doc_norm": 1.4267178773880005, "test_inbatch_neg_score": 0.647799015045166, "test_inbatch_pos_score": 1.584402322769165, "test_loss": 0.37200358510017395, "test_loss_align": 1.0171136856079102, "test_loss_unif": 3.8173561096191406, "test_loss_unif_q@queue": 3.8173558712005615, "test_norm_diff": 0.029279479756951332, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2894432544708252, "test_query_norm": 1.455997347831726, "test_queue_k_norm": 1.455552101135254, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04236723482608795, "test_stdq": 0.042725399136543274, "test_stdqueue_k": 0.04880161210894585, "test_stdqueue_q": 0.0 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7565, "doc_norm": 1.4579, "encoder_q-embeddings": 2108.437, "encoder_q-layer.0": 1306.6071, "encoder_q-layer.1": 1454.0397, "encoder_q-layer.10": 2968.5872, "encoder_q-layer.11": 6194.4414, "encoder_q-layer.2": 1689.7634, "encoder_q-layer.3": 1675.2615, "encoder_q-layer.4": 1856.6097, "encoder_q-layer.5": 1949.5652, "encoder_q-layer.6": 2170.2673, "encoder_q-layer.7": 2555.3877, "encoder_q-layer.8": 3653.7908, "encoder_q-layer.9": 2917.2988, "epoch": 0.68, "inbatch_neg_score": 0.2947, "inbatch_pos_score": 1.0215, "learning_rate": 1.661111111111111e-05, "loss": 2.7565, "norm_diff": 0.059, "norm_loss": 0.0, "num_token_doc": 66.8442, "num_token_overlap": 18.0252, "num_token_query": 52.2515, "num_token_union": 73.5184, "num_word_context": 202.5218, "num_word_doc": 49.9014, "num_word_query": 39.8475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4148.4269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.3989, "queue_k_norm": 1.4557, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2515, "sent_len_1": 66.8442, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.4013, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7243, "doc_norm": 1.4574, "encoder_q-embeddings": 2970.3904, "encoder_q-layer.0": 1900.1302, "encoder_q-layer.1": 2151.2754, "encoder_q-layer.10": 2528.4307, "encoder_q-layer.11": 5283.5186, "encoder_q-layer.2": 2449.5605, "encoder_q-layer.3": 2680.864, "encoder_q-layer.4": 3231.1621, "encoder_q-layer.5": 3105.2168, "encoder_q-layer.6": 2749.8127, "encoder_q-layer.7": 2778.1736, "encoder_q-layer.8": 2608.7451, "encoder_q-layer.9": 2398.145, "epoch": 0.69, "inbatch_neg_score": 0.2997, "inbatch_pos_score": 1.0049, "learning_rate": 1.655555555555556e-05, "loss": 2.7243, "norm_diff": 0.0613, "norm_loss": 0.0, "num_token_doc": 66.887, "num_token_overlap": 18.0195, "num_token_query": 52.1093, "num_token_union": 73.4928, "num_word_context": 202.4671, "num_word_doc": 49.9242, "num_word_query": 39.7304, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4384.4522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.3961, "queue_k_norm": 1.4579, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1093, "sent_len_1": 66.887, "sent_len_max_0": 128.0, "sent_len_max_1": 188.185, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7691, "doc_norm": 1.4577, "encoder_q-embeddings": 2177.0862, "encoder_q-layer.0": 1397.475, "encoder_q-layer.1": 1497.3123, "encoder_q-layer.10": 2634.5259, "encoder_q-layer.11": 5529.7754, "encoder_q-layer.2": 1707.3484, "encoder_q-layer.3": 1716.3701, "encoder_q-layer.4": 1852.373, "encoder_q-layer.5": 1852.5575, "encoder_q-layer.6": 2106.3447, "encoder_q-layer.7": 2411.8606, "encoder_q-layer.8": 2705.7847, "encoder_q-layer.9": 2352.0862, "epoch": 0.69, "inbatch_neg_score": 0.2983, "inbatch_pos_score": 1.0127, "learning_rate": 1.65e-05, "loss": 2.7691, "norm_diff": 0.0495, "norm_loss": 0.0, "num_token_doc": 66.8976, "num_token_overlap": 18.0019, "num_token_query": 52.3408, "num_token_union": 73.5661, "num_word_context": 202.3558, "num_word_doc": 49.911, "num_word_query": 39.8925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3751.5806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4082, "queue_k_norm": 1.457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3408, "sent_len_1": 66.8976, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0775, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7402, "doc_norm": 1.4569, "encoder_q-embeddings": 1879.6779, "encoder_q-layer.0": 1140.7067, "encoder_q-layer.1": 1213.8226, "encoder_q-layer.10": 2569.4829, "encoder_q-layer.11": 5411.6035, "encoder_q-layer.2": 1347.9443, "encoder_q-layer.3": 1417.8806, "encoder_q-layer.4": 1484.2579, "encoder_q-layer.5": 1529.3081, "encoder_q-layer.6": 1822.2629, "encoder_q-layer.7": 2040.6726, "encoder_q-layer.8": 2495.2014, "encoder_q-layer.9": 2330.0278, "epoch": 0.69, "inbatch_neg_score": 0.3011, "inbatch_pos_score": 1.0283, "learning_rate": 1.6444444444444447e-05, "loss": 2.7402, "norm_diff": 0.0588, "norm_loss": 0.0, "num_token_doc": 66.8247, "num_token_overlap": 17.931, "num_token_query": 52.1623, "num_token_union": 73.5463, "num_word_context": 202.5233, "num_word_doc": 49.8831, "num_word_query": 39.7535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3497.4982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3005, "query_norm": 1.3981, "queue_k_norm": 1.4563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1623, "sent_len_1": 66.8247, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.2312, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7546, "doc_norm": 1.4602, "encoder_q-embeddings": 2878.4998, "encoder_q-layer.0": 1904.0299, "encoder_q-layer.1": 2147.0522, "encoder_q-layer.10": 2588.0027, "encoder_q-layer.11": 5583.5122, "encoder_q-layer.2": 2642.7388, "encoder_q-layer.3": 2933.8718, "encoder_q-layer.4": 2895.771, "encoder_q-layer.5": 2855.4604, "encoder_q-layer.6": 2628.7112, "encoder_q-layer.7": 2320.1687, "encoder_q-layer.8": 2599.9722, "encoder_q-layer.9": 2430.7029, "epoch": 0.69, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0059, "learning_rate": 1.638888888888889e-05, "loss": 2.7546, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.8592, "num_token_overlap": 17.9464, "num_token_query": 52.1241, "num_token_union": 73.5165, "num_word_context": 202.5406, "num_word_doc": 49.9134, "num_word_query": 39.756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4423.6996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.4028, "queue_k_norm": 1.4564, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1241, "sent_len_1": 66.8592, "sent_len_max_0": 127.9838, "sent_len_max_1": 187.7763, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7626, "doc_norm": 1.461, "encoder_q-embeddings": 8138.4897, "encoder_q-layer.0": 5724.354, "encoder_q-layer.1": 6296.6914, "encoder_q-layer.10": 2751.7671, "encoder_q-layer.11": 5918.8213, "encoder_q-layer.2": 6361.4414, "encoder_q-layer.3": 6203.6851, "encoder_q-layer.4": 6729.7339, "encoder_q-layer.5": 7019.2788, "encoder_q-layer.6": 6756.9561, "encoder_q-layer.7": 5730.8335, "encoder_q-layer.8": 4590.2124, "encoder_q-layer.9": 2577.0144, "epoch": 0.69, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 1.0352, "learning_rate": 1.6333333333333335e-05, "loss": 2.7626, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.7721, "num_token_overlap": 18.0254, "num_token_query": 52.1183, "num_token_union": 73.377, "num_word_context": 202.2656, "num_word_doc": 49.7958, "num_word_query": 39.7192, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9167.3802, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2983, "query_norm": 1.4152, "queue_k_norm": 1.455, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1183, "sent_len_1": 66.7721, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.5125, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.7629, "doc_norm": 1.4544, "encoder_q-embeddings": 2398.5833, "encoder_q-layer.0": 1535.6531, "encoder_q-layer.1": 1712.5114, "encoder_q-layer.10": 2852.6418, "encoder_q-layer.11": 5864.5825, "encoder_q-layer.2": 2021.9067, "encoder_q-layer.3": 2047.9695, "encoder_q-layer.4": 2212.0459, "encoder_q-layer.5": 2230.2644, "encoder_q-layer.6": 2415.9094, "encoder_q-layer.7": 2523.0046, "encoder_q-layer.8": 2827.021, "encoder_q-layer.9": 2365.7852, "epoch": 0.69, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 0.999, "learning_rate": 1.6277777777777777e-05, "loss": 2.7629, "norm_diff": 0.0529, "norm_loss": 0.0, "num_token_doc": 66.7545, "num_token_overlap": 18.0184, "num_token_query": 52.3079, "num_token_union": 73.4633, "num_word_context": 202.5141, "num_word_doc": 49.7985, "num_word_query": 39.8711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4141.415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4015, "queue_k_norm": 1.4564, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3079, "sent_len_1": 66.7545, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8887, "stdk": 0.0486, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.751, "doc_norm": 1.4597, "encoder_q-embeddings": 2434.1624, "encoder_q-layer.0": 1565.4304, "encoder_q-layer.1": 1785.7692, "encoder_q-layer.10": 2611.7397, "encoder_q-layer.11": 5616.8599, "encoder_q-layer.2": 2103.074, "encoder_q-layer.3": 2227.0847, "encoder_q-layer.4": 2363.6038, "encoder_q-layer.5": 2155.7046, "encoder_q-layer.6": 2616.9658, "encoder_q-layer.7": 2781.2493, "encoder_q-layer.8": 3071.8716, "encoder_q-layer.9": 2600.075, "epoch": 0.69, "inbatch_neg_score": 0.2989, "inbatch_pos_score": 1.0469, "learning_rate": 1.6222222222222223e-05, "loss": 2.751, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.8128, "num_token_overlap": 17.9956, "num_token_query": 52.1595, "num_token_union": 73.4345, "num_word_context": 202.547, "num_word_doc": 49.8492, "num_word_query": 39.7285, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4173.7949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.4253, "queue_k_norm": 1.4566, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1595, "sent_len_1": 66.8128, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9913, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.7414, "doc_norm": 1.4547, "encoder_q-embeddings": 2141.5991, "encoder_q-layer.0": 1341.3628, "encoder_q-layer.1": 1482.7063, "encoder_q-layer.10": 2590.262, "encoder_q-layer.11": 5582.3643, "encoder_q-layer.2": 1622.1849, "encoder_q-layer.3": 1608.7089, "encoder_q-layer.4": 1665.6302, "encoder_q-layer.5": 1774.6079, "encoder_q-layer.6": 2071.6094, "encoder_q-layer.7": 2312.844, "encoder_q-layer.8": 2789.5854, "encoder_q-layer.9": 2515.0405, "epoch": 0.69, "inbatch_neg_score": 0.3016, "inbatch_pos_score": 1.0244, "learning_rate": 1.6166666666666665e-05, "loss": 2.7414, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 67.0029, "num_token_overlap": 18.0206, "num_token_query": 52.1852, "num_token_union": 73.5293, "num_word_context": 202.4277, "num_word_doc": 49.9743, "num_word_query": 39.7851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3728.7873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.302, "query_norm": 1.4206, "queue_k_norm": 1.4551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1852, "sent_len_1": 67.0029, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.2125, "stdk": 0.0487, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7421, "doc_norm": 1.4563, "encoder_q-embeddings": 3070.5984, "encoder_q-layer.0": 2005.3353, "encoder_q-layer.1": 2380.6838, "encoder_q-layer.10": 2643.6055, "encoder_q-layer.11": 5880.1934, "encoder_q-layer.2": 2681.5293, "encoder_q-layer.3": 2739.9443, "encoder_q-layer.4": 2843.7227, "encoder_q-layer.5": 3148.0872, "encoder_q-layer.6": 3353.0452, "encoder_q-layer.7": 3297.0037, "encoder_q-layer.8": 3070.4751, "encoder_q-layer.9": 2415.0234, "epoch": 0.69, "inbatch_neg_score": 0.2992, "inbatch_pos_score": 1.0195, "learning_rate": 1.6111111111111115e-05, "loss": 2.7421, "norm_diff": 0.038, "norm_loss": 0.0, "num_token_doc": 66.7069, "num_token_overlap": 17.9697, "num_token_query": 52.3582, "num_token_union": 73.505, "num_word_context": 202.4336, "num_word_doc": 49.7193, "num_word_query": 39.9124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4674.3527, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2966, "query_norm": 1.4183, "queue_k_norm": 1.4556, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3582, "sent_len_1": 66.7069, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3187, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.7344, "doc_norm": 1.4605, "encoder_q-embeddings": 2222.5125, "encoder_q-layer.0": 1376.2526, "encoder_q-layer.1": 1495.011, "encoder_q-layer.10": 2657.3181, "encoder_q-layer.11": 5754.7671, "encoder_q-layer.2": 1705.9657, "encoder_q-layer.3": 1742.1271, "encoder_q-layer.4": 1823.4399, "encoder_q-layer.5": 1881.5278, "encoder_q-layer.6": 2157.5652, "encoder_q-layer.7": 2247.9946, "encoder_q-layer.8": 2720.4888, "encoder_q-layer.9": 2503.3997, "epoch": 0.69, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0176, "learning_rate": 1.6055555555555557e-05, "loss": 2.7344, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 66.7486, "num_token_overlap": 18.0449, "num_token_query": 52.2119, "num_token_union": 73.3635, "num_word_context": 202.0088, "num_word_doc": 49.8027, "num_word_query": 39.7993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3851.0778, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.4168, "queue_k_norm": 1.455, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2119, "sent_len_1": 66.7486, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2488, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.7414, "doc_norm": 1.4583, "encoder_q-embeddings": 1949.6042, "encoder_q-layer.0": 1281.6619, "encoder_q-layer.1": 1370.0913, "encoder_q-layer.10": 2552.6641, "encoder_q-layer.11": 5398.2778, "encoder_q-layer.2": 1545.4673, "encoder_q-layer.3": 1545.7814, "encoder_q-layer.4": 1704.192, "encoder_q-layer.5": 1630.8043, "encoder_q-layer.6": 1952.7289, "encoder_q-layer.7": 2222.5051, "encoder_q-layer.8": 2522.3728, "encoder_q-layer.9": 2348.8596, "epoch": 0.7, "inbatch_neg_score": 0.2968, "inbatch_pos_score": 1.0449, "learning_rate": 1.6000000000000003e-05, "loss": 2.7414, "norm_diff": 0.0299, "norm_loss": 0.0, "num_token_doc": 66.7729, "num_token_overlap": 18.0033, "num_token_query": 52.3787, "num_token_union": 73.5521, "num_word_context": 202.1977, "num_word_doc": 49.8018, "num_word_query": 39.9494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3581.2413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.4284, "queue_k_norm": 1.457, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3787, "sent_len_1": 66.7729, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.4225, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.74, "doc_norm": 1.4612, "encoder_q-embeddings": 4099.6655, "encoder_q-layer.0": 2719.8196, "encoder_q-layer.1": 2927.0415, "encoder_q-layer.10": 4707.0737, "encoder_q-layer.11": 10275.7754, "encoder_q-layer.2": 3360.689, "encoder_q-layer.3": 3786.8691, "encoder_q-layer.4": 4009.4753, "encoder_q-layer.5": 3828.6528, "encoder_q-layer.6": 4530.2314, "encoder_q-layer.7": 5151.2891, "encoder_q-layer.8": 5478.3608, "encoder_q-layer.9": 4420.0571, "epoch": 0.7, "inbatch_neg_score": 0.2873, "inbatch_pos_score": 1.0488, "learning_rate": 1.5944444444444445e-05, "loss": 2.74, "norm_diff": 0.0466, "norm_loss": 0.0, "num_token_doc": 66.8354, "num_token_overlap": 18.0226, "num_token_query": 52.3058, "num_token_union": 73.5832, "num_word_context": 202.4753, "num_word_doc": 49.9039, "num_word_query": 39.8747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7422.1258, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2866, "query_norm": 1.4146, "queue_k_norm": 1.4568, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3058, "sent_len_1": 66.8354, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.7925, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.7519, "doc_norm": 1.4579, "encoder_q-embeddings": 4198.1699, "encoder_q-layer.0": 2886.7483, "encoder_q-layer.1": 3168.8862, "encoder_q-layer.10": 4751.5317, "encoder_q-layer.11": 10346.5664, "encoder_q-layer.2": 3510.9946, "encoder_q-layer.3": 3437.6016, "encoder_q-layer.4": 3498.3652, "encoder_q-layer.5": 3459.8374, "encoder_q-layer.6": 3951.3274, "encoder_q-layer.7": 4106.9443, "encoder_q-layer.8": 4885.5879, "encoder_q-layer.9": 4380.3247, "epoch": 0.7, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 1.0225, "learning_rate": 1.588888888888889e-05, "loss": 2.7519, "norm_diff": 0.0513, "norm_loss": 0.0, "num_token_doc": 66.8708, "num_token_overlap": 18.0063, "num_token_query": 52.2013, "num_token_union": 73.5215, "num_word_context": 202.7443, "num_word_doc": 49.8621, "num_word_query": 39.7789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7006.7061, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2942, "query_norm": 1.4066, "queue_k_norm": 1.4558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2013, "sent_len_1": 66.8708, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6113, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7327, "doc_norm": 1.4613, "encoder_q-embeddings": 2272.9023, "encoder_q-layer.0": 1480.8423, "encoder_q-layer.1": 1668.1626, "encoder_q-layer.10": 2463.6687, "encoder_q-layer.11": 5159.4619, "encoder_q-layer.2": 1896.167, "encoder_q-layer.3": 1946.9995, "encoder_q-layer.4": 2038.8038, "encoder_q-layer.5": 2113.2402, "encoder_q-layer.6": 2347.3472, "encoder_q-layer.7": 2525.3711, "encoder_q-layer.8": 2695.2649, "encoder_q-layer.9": 2246.7512, "epoch": 0.7, "inbatch_neg_score": 0.2884, "inbatch_pos_score": 1.0078, "learning_rate": 1.5833333333333333e-05, "loss": 2.7327, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 66.5506, "num_token_overlap": 18.0471, "num_token_query": 52.2986, "num_token_union": 73.3393, "num_word_context": 202.067, "num_word_doc": 49.6613, "num_word_query": 39.8699, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3763.1917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2888, "query_norm": 1.4059, "queue_k_norm": 1.456, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2986, "sent_len_1": 66.5506, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.0737, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.7606, "doc_norm": 1.4564, "encoder_q-embeddings": 2889.2063, "encoder_q-layer.0": 2238.2849, "encoder_q-layer.1": 2181.9617, "encoder_q-layer.10": 2740.2378, "encoder_q-layer.11": 5425.6343, "encoder_q-layer.2": 2755.7954, "encoder_q-layer.3": 2667.5884, "encoder_q-layer.4": 2830.6316, "encoder_q-layer.5": 2888.0247, "encoder_q-layer.6": 2734.3164, "encoder_q-layer.7": 2778.0825, "encoder_q-layer.8": 2732.1267, "encoder_q-layer.9": 2303.7866, "epoch": 0.7, "inbatch_neg_score": 0.288, "inbatch_pos_score": 1.002, "learning_rate": 1.577777777777778e-05, "loss": 2.7606, "norm_diff": 0.0488, "norm_loss": 0.0, "num_token_doc": 66.7672, "num_token_overlap": 17.9386, "num_token_query": 52.1327, "num_token_union": 73.4469, "num_word_context": 202.2352, "num_word_doc": 49.7667, "num_word_query": 39.7379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4469.2342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.4076, "queue_k_norm": 1.4558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1327, "sent_len_1": 66.7672, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.9825, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.755, "doc_norm": 1.4532, "encoder_q-embeddings": 2136.2432, "encoder_q-layer.0": 1468.6472, "encoder_q-layer.1": 1585.5588, "encoder_q-layer.10": 2482.2432, "encoder_q-layer.11": 5225.6079, "encoder_q-layer.2": 1798.5187, "encoder_q-layer.3": 1851.0863, "encoder_q-layer.4": 1941.1655, "encoder_q-layer.5": 1982.6584, "encoder_q-layer.6": 2108.209, "encoder_q-layer.7": 2274.9668, "encoder_q-layer.8": 2628.4365, "encoder_q-layer.9": 2340.0366, "epoch": 0.7, "inbatch_neg_score": 0.2844, "inbatch_pos_score": 1.0049, "learning_rate": 1.5722222222222225e-05, "loss": 2.755, "norm_diff": 0.0441, "norm_loss": 0.0, "num_token_doc": 66.9038, "num_token_overlap": 17.945, "num_token_query": 52.029, "num_token_union": 73.4629, "num_word_context": 202.4936, "num_word_doc": 49.8835, "num_word_query": 39.6169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3700.4355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.4091, "queue_k_norm": 1.4556, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.029, "sent_len_1": 66.9038, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4137, "stdk": 0.0486, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 2.7331, "doc_norm": 1.4505, "encoder_q-embeddings": 2347.4126, "encoder_q-layer.0": 1496.995, "encoder_q-layer.1": 1637.9235, "encoder_q-layer.10": 2662.3135, "encoder_q-layer.11": 5593.9307, "encoder_q-layer.2": 1641.832, "encoder_q-layer.3": 1682.3237, "encoder_q-layer.4": 1806.6152, "encoder_q-layer.5": 2014.4053, "encoder_q-layer.6": 2200.2891, "encoder_q-layer.7": 2328.1582, "encoder_q-layer.8": 2883.8469, "encoder_q-layer.9": 2473.2209, "epoch": 0.7, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 0.9746, "learning_rate": 1.5666666666666667e-05, "loss": 2.7331, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.957, "num_token_overlap": 18.104, "num_token_query": 52.3077, "num_token_union": 73.5121, "num_word_context": 202.2393, "num_word_doc": 49.9648, "num_word_query": 39.9023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3864.2244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2847, "query_norm": 1.3962, "queue_k_norm": 1.4567, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3077, "sent_len_1": 66.957, "sent_len_max_0": 127.99, "sent_len_max_1": 190.4975, "stdk": 0.0485, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.7324, "doc_norm": 1.4519, "encoder_q-embeddings": 2352.7131, "encoder_q-layer.0": 1541.2985, "encoder_q-layer.1": 1725.9598, "encoder_q-layer.10": 2709.8325, "encoder_q-layer.11": 5587.6338, "encoder_q-layer.2": 2198.9446, "encoder_q-layer.3": 2149.7915, "encoder_q-layer.4": 2276.459, "encoder_q-layer.5": 2376.0383, "encoder_q-layer.6": 2673.677, "encoder_q-layer.7": 2512.8743, "encoder_q-layer.8": 2661.5908, "encoder_q-layer.9": 2454.3477, "epoch": 0.7, "inbatch_neg_score": 0.2835, "inbatch_pos_score": 1.001, "learning_rate": 1.5611111111111113e-05, "loss": 2.7324, "norm_diff": 0.03, "norm_loss": 0.0, "num_token_doc": 66.6741, "num_token_overlap": 17.9967, "num_token_query": 52.2669, "num_token_union": 73.4594, "num_word_context": 201.9573, "num_word_doc": 49.7607, "num_word_query": 39.8612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4077.5158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4219, "queue_k_norm": 1.456, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2669, "sent_len_1": 66.6741, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.8375, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.7271, "doc_norm": 1.4543, "encoder_q-embeddings": 2257.5439, "encoder_q-layer.0": 1527.5917, "encoder_q-layer.1": 1654.2861, "encoder_q-layer.10": 2824.3577, "encoder_q-layer.11": 5618.6455, "encoder_q-layer.2": 1874.7439, "encoder_q-layer.3": 1998.3973, "encoder_q-layer.4": 2207.0032, "encoder_q-layer.5": 2384.8491, "encoder_q-layer.6": 2550.1968, "encoder_q-layer.7": 2927.9426, "encoder_q-layer.8": 3151.4761, "encoder_q-layer.9": 2595.8193, "epoch": 0.7, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 1.0059, "learning_rate": 1.5555555555555555e-05, "loss": 2.7271, "norm_diff": 0.0416, "norm_loss": 0.0, "num_token_doc": 66.721, "num_token_overlap": 18.0781, "num_token_query": 52.2435, "num_token_union": 73.3711, "num_word_context": 202.2403, "num_word_doc": 49.884, "num_word_query": 39.8524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4098.0451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2805, "query_norm": 1.414, "queue_k_norm": 1.4544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2435, "sent_len_1": 66.721, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5412, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7115, "doc_norm": 1.4561, "encoder_q-embeddings": 2713.4587, "encoder_q-layer.0": 1699.6692, "encoder_q-layer.1": 1888.3988, "encoder_q-layer.10": 2494.4136, "encoder_q-layer.11": 5597.1626, "encoder_q-layer.2": 2237.74, "encoder_q-layer.3": 2234.9541, "encoder_q-layer.4": 2210.8953, "encoder_q-layer.5": 2225.6301, "encoder_q-layer.6": 2390.8511, "encoder_q-layer.7": 2511.5544, "encoder_q-layer.8": 2793.1143, "encoder_q-layer.9": 2401.5461, "epoch": 0.7, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 1.0059, "learning_rate": 1.55e-05, "loss": 2.7115, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.8951, "num_token_overlap": 18.1233, "num_token_query": 52.4557, "num_token_union": 73.5411, "num_word_context": 202.6971, "num_word_doc": 49.9067, "num_word_query": 40.0182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.6498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.4185, "queue_k_norm": 1.4554, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4557, "sent_len_1": 66.8951, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8288, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7585, "doc_norm": 1.4598, "encoder_q-embeddings": 2485.6929, "encoder_q-layer.0": 1663.5494, "encoder_q-layer.1": 1851.9758, "encoder_q-layer.10": 2649.2798, "encoder_q-layer.11": 5549.8833, "encoder_q-layer.2": 2259.4978, "encoder_q-layer.3": 2235.5518, "encoder_q-layer.4": 2420.793, "encoder_q-layer.5": 2268.1841, "encoder_q-layer.6": 2491.4663, "encoder_q-layer.7": 2576.3311, "encoder_q-layer.8": 2707.3481, "encoder_q-layer.9": 2382.2688, "epoch": 0.7, "inbatch_neg_score": 0.2908, "inbatch_pos_score": 1.0146, "learning_rate": 1.5444444444444446e-05, "loss": 2.7585, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 66.8414, "num_token_overlap": 17.9649, "num_token_query": 52.243, "num_token_union": 73.4919, "num_word_context": 202.4736, "num_word_doc": 49.8468, "num_word_query": 39.8175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4062.1717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2905, "query_norm": 1.416, "queue_k_norm": 1.457, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.243, "sent_len_1": 66.8414, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4025, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7213, "doc_norm": 1.4564, "encoder_q-embeddings": 2458.8777, "encoder_q-layer.0": 1568.1975, "encoder_q-layer.1": 1734.3408, "encoder_q-layer.10": 2694.9421, "encoder_q-layer.11": 5379.6411, "encoder_q-layer.2": 1944.8772, "encoder_q-layer.3": 1974.7083, "encoder_q-layer.4": 2050.627, "encoder_q-layer.5": 2073.4573, "encoder_q-layer.6": 2337.1416, "encoder_q-layer.7": 2581.1489, "encoder_q-layer.8": 2815.6921, "encoder_q-layer.9": 2556.9434, "epoch": 0.71, "inbatch_neg_score": 0.2996, "inbatch_pos_score": 1.0273, "learning_rate": 1.538888888888889e-05, "loss": 2.7213, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.702, "num_token_overlap": 18.0554, "num_token_query": 52.314, "num_token_union": 73.4443, "num_word_context": 201.9954, "num_word_doc": 49.8403, "num_word_query": 39.9215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3943.8981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.4225, "queue_k_norm": 1.4558, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.314, "sent_len_1": 66.702, "sent_len_max_0": 127.9862, "sent_len_max_1": 187.4613, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 63.0859, "active_queue_size": 16384.0, "cl_loss": 2.7147, "doc_norm": 1.4576, "encoder_q-embeddings": 2196.3936, "encoder_q-layer.0": 1411.1681, "encoder_q-layer.1": 1599.6343, "encoder_q-layer.10": 2561.1021, "encoder_q-layer.11": 5218.6465, "encoder_q-layer.2": 1913.6941, "encoder_q-layer.3": 2022.3779, "encoder_q-layer.4": 2174.0347, "encoder_q-layer.5": 2176.115, "encoder_q-layer.6": 2358.6135, "encoder_q-layer.7": 2288.1479, "encoder_q-layer.8": 2484.3667, "encoder_q-layer.9": 2281.3052, "epoch": 0.71, "inbatch_neg_score": 0.2968, "inbatch_pos_score": 1.0547, "learning_rate": 1.5333333333333334e-05, "loss": 2.7147, "norm_diff": 0.0261, "norm_loss": 0.0, "num_token_doc": 66.8477, "num_token_overlap": 17.9938, "num_token_query": 52.128, "num_token_union": 73.4364, "num_word_context": 202.1975, "num_word_doc": 49.8101, "num_word_query": 39.6887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3767.7564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.4315, "queue_k_norm": 1.4565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.128, "sent_len_1": 66.8477, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.265, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.747, "doc_norm": 1.4551, "encoder_q-embeddings": 2829.9326, "encoder_q-layer.0": 1841.2737, "encoder_q-layer.1": 2061.0774, "encoder_q-layer.10": 2561.8696, "encoder_q-layer.11": 5619.5132, "encoder_q-layer.2": 2475.4702, "encoder_q-layer.3": 2553.5581, "encoder_q-layer.4": 2538.2158, "encoder_q-layer.5": 2557.5967, "encoder_q-layer.6": 2680.7832, "encoder_q-layer.7": 2605.1416, "encoder_q-layer.8": 2926.3269, "encoder_q-layer.9": 2494.6689, "epoch": 0.71, "inbatch_neg_score": 0.2969, "inbatch_pos_score": 1.0215, "learning_rate": 1.527777777777778e-05, "loss": 2.747, "norm_diff": 0.038, "norm_loss": 0.0, "num_token_doc": 66.6285, "num_token_overlap": 17.9026, "num_token_query": 52.0158, "num_token_union": 73.3234, "num_word_context": 202.1894, "num_word_doc": 49.7181, "num_word_query": 39.618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4370.8749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.4171, "queue_k_norm": 1.4556, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0158, "sent_len_1": 66.6285, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1113, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.7309, "doc_norm": 1.4606, "encoder_q-embeddings": 2057.9329, "encoder_q-layer.0": 1288.6461, "encoder_q-layer.1": 1347.1053, "encoder_q-layer.10": 2521.8337, "encoder_q-layer.11": 5428.7764, "encoder_q-layer.2": 1489.1536, "encoder_q-layer.3": 1495.3915, "encoder_q-layer.4": 1553.2983, "encoder_q-layer.5": 1579.205, "encoder_q-layer.6": 1917.2373, "encoder_q-layer.7": 2125.5913, "encoder_q-layer.8": 2504.9851, "encoder_q-layer.9": 2303.6775, "epoch": 0.71, "inbatch_neg_score": 0.3043, "inbatch_pos_score": 1.04, "learning_rate": 1.5222222222222224e-05, "loss": 2.7309, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 67.0155, "num_token_overlap": 18.0299, "num_token_query": 52.2746, "num_token_union": 73.5622, "num_word_context": 202.3218, "num_word_doc": 49.9816, "num_word_query": 39.837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3604.6227, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.303, "query_norm": 1.4239, "queue_k_norm": 1.4573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2746, "sent_len_1": 67.0155, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6012, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.7543, "doc_norm": 1.4589, "encoder_q-embeddings": 3918.1243, "encoder_q-layer.0": 2480.0852, "encoder_q-layer.1": 2740.4072, "encoder_q-layer.10": 2542.522, "encoder_q-layer.11": 5647.3638, "encoder_q-layer.2": 3298.0154, "encoder_q-layer.3": 3591.0525, "encoder_q-layer.4": 3865.4468, "encoder_q-layer.5": 3653.3542, "encoder_q-layer.6": 3364.4375, "encoder_q-layer.7": 3310.5464, "encoder_q-layer.8": 3493.0293, "encoder_q-layer.9": 2465.9883, "epoch": 0.71, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 1.0205, "learning_rate": 1.5166666666666668e-05, "loss": 2.7543, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.8043, "num_token_overlap": 17.9731, "num_token_query": 52.0542, "num_token_union": 73.3979, "num_word_context": 202.255, "num_word_doc": 49.8679, "num_word_query": 39.6664, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5233.4546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.4042, "queue_k_norm": 1.457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0542, "sent_len_1": 66.8043, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6325, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.7165, "doc_norm": 1.4592, "encoder_q-embeddings": 2197.6116, "encoder_q-layer.0": 1429.599, "encoder_q-layer.1": 1581.6729, "encoder_q-layer.10": 2470.9802, "encoder_q-layer.11": 5485.541, "encoder_q-layer.2": 1767.1323, "encoder_q-layer.3": 1895.4517, "encoder_q-layer.4": 2010.7733, "encoder_q-layer.5": 2039.075, "encoder_q-layer.6": 2173.6145, "encoder_q-layer.7": 2333.439, "encoder_q-layer.8": 2590.3196, "encoder_q-layer.9": 2345.7097, "epoch": 0.71, "inbatch_neg_score": 0.305, "inbatch_pos_score": 1.0352, "learning_rate": 1.5111111111111112e-05, "loss": 2.7165, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.8611, "num_token_overlap": 18.0332, "num_token_query": 52.2553, "num_token_union": 73.5231, "num_word_context": 202.4395, "num_word_doc": 49.9263, "num_word_query": 39.8362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3812.7644, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3052, "query_norm": 1.4383, "queue_k_norm": 1.4558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2553, "sent_len_1": 66.8611, "sent_len_max_0": 127.9925, "sent_len_max_1": 187.9288, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.7465, "doc_norm": 1.4559, "encoder_q-embeddings": 2536.5227, "encoder_q-layer.0": 1608.3319, "encoder_q-layer.1": 1846.4647, "encoder_q-layer.10": 2636.4968, "encoder_q-layer.11": 5797.0117, "encoder_q-layer.2": 2184.9092, "encoder_q-layer.3": 2504.6226, "encoder_q-layer.4": 3042.2087, "encoder_q-layer.5": 3342.8418, "encoder_q-layer.6": 4527.3936, "encoder_q-layer.7": 6647.1494, "encoder_q-layer.8": 9350.1094, "encoder_q-layer.9": 4098.3203, "epoch": 0.71, "inbatch_neg_score": 0.3091, "inbatch_pos_score": 1.0088, "learning_rate": 1.5055555555555556e-05, "loss": 2.7465, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.5782, "num_token_overlap": 17.976, "num_token_query": 52.2688, "num_token_union": 73.3713, "num_word_context": 202.217, "num_word_doc": 49.6573, "num_word_query": 39.8297, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7330.3786, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3081, "query_norm": 1.4222, "queue_k_norm": 1.4573, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2688, "sent_len_1": 66.5782, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.3938, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.74, "doc_norm": 1.455, "encoder_q-embeddings": 2244.2, "encoder_q-layer.0": 1427.0488, "encoder_q-layer.1": 1654.1627, "encoder_q-layer.10": 2497.646, "encoder_q-layer.11": 5575.106, "encoder_q-layer.2": 1962.725, "encoder_q-layer.3": 2029.8766, "encoder_q-layer.4": 2032.0768, "encoder_q-layer.5": 2021.8566, "encoder_q-layer.6": 2126.804, "encoder_q-layer.7": 2223.2036, "encoder_q-layer.8": 2636.7927, "encoder_q-layer.9": 2322.9231, "epoch": 0.71, "inbatch_neg_score": 0.3075, "inbatch_pos_score": 1.0479, "learning_rate": 1.5e-05, "loss": 2.74, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.8623, "num_token_overlap": 18.0391, "num_token_query": 52.285, "num_token_union": 73.4863, "num_word_context": 202.4004, "num_word_doc": 49.8824, "num_word_query": 39.8668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3844.4869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3069, "query_norm": 1.4222, "queue_k_norm": 1.4587, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.285, "sent_len_1": 66.8623, "sent_len_max_0": 128.0, "sent_len_max_1": 192.2038, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 63.1836, "active_queue_size": 16384.0, "cl_loss": 2.7407, "doc_norm": 1.4609, "encoder_q-embeddings": 2447.4038, "encoder_q-layer.0": 1497.464, "encoder_q-layer.1": 1679.6306, "encoder_q-layer.10": 2606.5337, "encoder_q-layer.11": 5501.9473, "encoder_q-layer.2": 1955.5336, "encoder_q-layer.3": 2059.7122, "encoder_q-layer.4": 2250.1392, "encoder_q-layer.5": 2279.8438, "encoder_q-layer.6": 2568.2041, "encoder_q-layer.7": 2723.561, "encoder_q-layer.8": 2976.3071, "encoder_q-layer.9": 2425.8611, "epoch": 0.71, "inbatch_neg_score": 0.305, "inbatch_pos_score": 1.0635, "learning_rate": 1.4944444444444444e-05, "loss": 2.7407, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.7167, "num_token_overlap": 18.0006, "num_token_query": 52.2153, "num_token_union": 73.3811, "num_word_context": 201.8658, "num_word_doc": 49.7585, "num_word_query": 39.7995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4101.3079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3057, "query_norm": 1.423, "queue_k_norm": 1.4593, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2153, "sent_len_1": 66.7167, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5525, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.7318, "doc_norm": 1.4584, "encoder_q-embeddings": 3712.8267, "encoder_q-layer.0": 2522.7842, "encoder_q-layer.1": 2898.9014, "encoder_q-layer.10": 2775.0518, "encoder_q-layer.11": 5381.1157, "encoder_q-layer.2": 3547.1902, "encoder_q-layer.3": 3855.5845, "encoder_q-layer.4": 3928.2349, "encoder_q-layer.5": 4196.0688, "encoder_q-layer.6": 4279.0083, "encoder_q-layer.7": 3678.8452, "encoder_q-layer.8": 3298.1416, "encoder_q-layer.9": 2432.6108, "epoch": 0.71, "inbatch_neg_score": 0.3083, "inbatch_pos_score": 1.0596, "learning_rate": 1.4888888888888888e-05, "loss": 2.7318, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.8342, "num_token_overlap": 18.0179, "num_token_query": 52.2345, "num_token_union": 73.4746, "num_word_context": 202.309, "num_word_doc": 49.8663, "num_word_query": 39.8495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5387.9699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3086, "query_norm": 1.4216, "queue_k_norm": 1.4581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2345, "sent_len_1": 66.8342, "sent_len_max_0": 128.0, "sent_len_max_1": 187.7463, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7239, "doc_norm": 1.4628, "encoder_q-embeddings": 3872.0068, "encoder_q-layer.0": 2603.093, "encoder_q-layer.1": 3013.4719, "encoder_q-layer.10": 2514.9521, "encoder_q-layer.11": 5722.0449, "encoder_q-layer.2": 3301.3899, "encoder_q-layer.3": 3330.4082, "encoder_q-layer.4": 3207.762, "encoder_q-layer.5": 3196.0354, "encoder_q-layer.6": 3382.9404, "encoder_q-layer.7": 3206.0229, "encoder_q-layer.8": 2907.2864, "encoder_q-layer.9": 2473.3669, "epoch": 0.72, "inbatch_neg_score": 0.3045, "inbatch_pos_score": 1.0127, "learning_rate": 1.4833333333333336e-05, "loss": 2.7239, "norm_diff": 0.0625, "norm_loss": 0.0, "num_token_doc": 66.7966, "num_token_overlap": 18.0647, "num_token_query": 52.3674, "num_token_union": 73.4852, "num_word_context": 202.3584, "num_word_doc": 49.8448, "num_word_query": 39.9239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5089.8319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3049, "query_norm": 1.4003, "queue_k_norm": 1.4566, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3674, "sent_len_1": 66.7966, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0737, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.7469, "doc_norm": 1.4629, "encoder_q-embeddings": 1052.2231, "encoder_q-layer.0": 659.2485, "encoder_q-layer.1": 686.0032, "encoder_q-layer.10": 1242.8326, "encoder_q-layer.11": 2755.334, "encoder_q-layer.2": 774.6048, "encoder_q-layer.3": 801.7064, "encoder_q-layer.4": 863.2739, "encoder_q-layer.5": 878.3856, "encoder_q-layer.6": 974.5828, "encoder_q-layer.7": 1127.5588, "encoder_q-layer.8": 1353.2333, "encoder_q-layer.9": 1174.0204, "epoch": 0.72, "inbatch_neg_score": 0.3034, "inbatch_pos_score": 1.0469, "learning_rate": 1.477777777777778e-05, "loss": 2.7469, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.8216, "num_token_overlap": 18.0014, "num_token_query": 52.1142, "num_token_union": 73.3749, "num_word_context": 202.3553, "num_word_doc": 49.845, "num_word_query": 39.7199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1817.7775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.4168, "queue_k_norm": 1.458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1142, "sent_len_1": 66.8216, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.0863, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7447, "doc_norm": 1.4582, "encoder_q-embeddings": 1345.8834, "encoder_q-layer.0": 908.5377, "encoder_q-layer.1": 1033.8326, "encoder_q-layer.10": 1568.0737, "encoder_q-layer.11": 2825.8862, "encoder_q-layer.2": 1121.7321, "encoder_q-layer.3": 1138.1266, "encoder_q-layer.4": 1175.1167, "encoder_q-layer.5": 1224.1909, "encoder_q-layer.6": 1362.8044, "encoder_q-layer.7": 1499.4277, "encoder_q-layer.8": 1741.6936, "encoder_q-layer.9": 1318.7229, "epoch": 0.72, "inbatch_neg_score": 0.3018, "inbatch_pos_score": 1.0176, "learning_rate": 1.4722222222222224e-05, "loss": 2.7447, "norm_diff": 0.0542, "norm_loss": 0.0, "num_token_doc": 66.6898, "num_token_overlap": 18.0092, "num_token_query": 52.2514, "num_token_union": 73.3859, "num_word_context": 202.3176, "num_word_doc": 49.7246, "num_word_query": 39.8074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2230.3741, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3047, "query_norm": 1.4039, "queue_k_norm": 1.4596, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2514, "sent_len_1": 66.6898, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6138, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7601, "doc_norm": 1.4603, "encoder_q-embeddings": 1631.5712, "encoder_q-layer.0": 1080.9941, "encoder_q-layer.1": 1240.2147, "encoder_q-layer.10": 1230.0292, "encoder_q-layer.11": 2748.103, "encoder_q-layer.2": 1437.7449, "encoder_q-layer.3": 1564.0615, "encoder_q-layer.4": 1625.3611, "encoder_q-layer.5": 1551.0364, "encoder_q-layer.6": 1676.9911, "encoder_q-layer.7": 1883.4459, "encoder_q-layer.8": 1798.4801, "encoder_q-layer.9": 1223.5602, "epoch": 0.72, "inbatch_neg_score": 0.305, "inbatch_pos_score": 1.0352, "learning_rate": 1.4666666666666668e-05, "loss": 2.7601, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.5395, "num_token_overlap": 17.9494, "num_token_query": 52.2908, "num_token_union": 73.3984, "num_word_context": 202.2941, "num_word_doc": 49.6334, "num_word_query": 39.8579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2474.3542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3044, "query_norm": 1.4191, "queue_k_norm": 1.4585, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2908, "sent_len_1": 66.5395, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2075, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.7468, "doc_norm": 1.4575, "encoder_q-embeddings": 998.3713, "encoder_q-layer.0": 659.6633, "encoder_q-layer.1": 747.3666, "encoder_q-layer.10": 1159.0287, "encoder_q-layer.11": 2702.6416, "encoder_q-layer.2": 833.7615, "encoder_q-layer.3": 849.2508, "encoder_q-layer.4": 925.741, "encoder_q-layer.5": 1006.5239, "encoder_q-layer.6": 1090.1062, "encoder_q-layer.7": 1173.0629, "encoder_q-layer.8": 1293.7301, "encoder_q-layer.9": 1146.8286, "epoch": 0.72, "inbatch_neg_score": 0.3004, "inbatch_pos_score": 1.0127, "learning_rate": 1.4611111111111112e-05, "loss": 2.7468, "norm_diff": 0.0596, "norm_loss": 0.0, "num_token_doc": 66.7801, "num_token_overlap": 17.9542, "num_token_query": 52.2219, "num_token_union": 73.5478, "num_word_context": 202.3807, "num_word_doc": 49.8583, "num_word_query": 39.8082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1812.4809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.3979, "queue_k_norm": 1.4587, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2219, "sent_len_1": 66.7801, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5525, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7188, "doc_norm": 1.4587, "encoder_q-embeddings": 1602.8032, "encoder_q-layer.0": 1134.2609, "encoder_q-layer.1": 1292.1206, "encoder_q-layer.10": 1303.1777, "encoder_q-layer.11": 2790.6218, "encoder_q-layer.2": 1494.201, "encoder_q-layer.3": 1494.8542, "encoder_q-layer.4": 1749.4768, "encoder_q-layer.5": 1614.4121, "encoder_q-layer.6": 1785.9509, "encoder_q-layer.7": 1507.0444, "encoder_q-layer.8": 1515.0919, "encoder_q-layer.9": 1215.0549, "epoch": 0.72, "inbatch_neg_score": 0.3007, "inbatch_pos_score": 1.0586, "learning_rate": 1.4555555555555556e-05, "loss": 2.7188, "norm_diff": 0.0327, "norm_loss": 0.0, "num_token_doc": 66.479, "num_token_overlap": 18.0255, "num_token_query": 52.2841, "num_token_union": 73.2643, "num_word_context": 201.7175, "num_word_doc": 49.6384, "num_word_query": 39.8774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2430.3879, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.426, "queue_k_norm": 1.457, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2841, "sent_len_1": 66.479, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0938, "stdk": 0.0488, "stdq": 0.047, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.7171, "doc_norm": 1.4535, "encoder_q-embeddings": 948.1398, "encoder_q-layer.0": 591.0471, "encoder_q-layer.1": 641.0872, "encoder_q-layer.10": 1224.3988, "encoder_q-layer.11": 2715.6707, "encoder_q-layer.2": 744.6064, "encoder_q-layer.3": 764.1903, "encoder_q-layer.4": 810.7815, "encoder_q-layer.5": 889.0381, "encoder_q-layer.6": 993.1102, "encoder_q-layer.7": 1098.2009, "encoder_q-layer.8": 1326.8353, "encoder_q-layer.9": 1150.5138, "epoch": 0.72, "inbatch_neg_score": 0.2936, "inbatch_pos_score": 1.043, "learning_rate": 1.45e-05, "loss": 2.7171, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 66.7999, "num_token_overlap": 18.0584, "num_token_query": 52.3293, "num_token_union": 73.4723, "num_word_context": 202.2147, "num_word_doc": 49.8556, "num_word_query": 39.8783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1779.2088, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.423, "queue_k_norm": 1.4594, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3293, "sent_len_1": 66.7999, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1387, "stdk": 0.0486, "stdq": 0.047, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7327, "doc_norm": 1.4611, "encoder_q-embeddings": 2500.5925, "encoder_q-layer.0": 1722.788, "encoder_q-layer.1": 2258.4785, "encoder_q-layer.10": 1314.4122, "encoder_q-layer.11": 2634.6274, "encoder_q-layer.2": 2375.8169, "encoder_q-layer.3": 2338.9038, "encoder_q-layer.4": 2179.9812, "encoder_q-layer.5": 1726.9438, "encoder_q-layer.6": 1691.166, "encoder_q-layer.7": 1514.4452, "encoder_q-layer.8": 1521.9557, "encoder_q-layer.9": 1176.0657, "epoch": 0.72, "inbatch_neg_score": 0.2927, "inbatch_pos_score": 1.0234, "learning_rate": 1.4444444444444444e-05, "loss": 2.7327, "norm_diff": 0.0464, "norm_loss": 0.0, "num_token_doc": 66.7411, "num_token_overlap": 17.9757, "num_token_query": 52.1116, "num_token_union": 73.3964, "num_word_context": 201.8732, "num_word_doc": 49.8372, "num_word_query": 39.7294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2980.1736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.4147, "queue_k_norm": 1.4576, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1116, "sent_len_1": 66.7411, "sent_len_max_0": 127.9813, "sent_len_max_1": 188.36, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7247, "doc_norm": 1.4567, "encoder_q-embeddings": 1232.1979, "encoder_q-layer.0": 868.8057, "encoder_q-layer.1": 973.8293, "encoder_q-layer.10": 1265.3474, "encoder_q-layer.11": 2727.3899, "encoder_q-layer.2": 1125.3748, "encoder_q-layer.3": 1120.574, "encoder_q-layer.4": 1130.121, "encoder_q-layer.5": 1100.8506, "encoder_q-layer.6": 1106.7136, "encoder_q-layer.7": 1221.3019, "encoder_q-layer.8": 1424.965, "encoder_q-layer.9": 1182.3118, "epoch": 0.72, "inbatch_neg_score": 0.2963, "inbatch_pos_score": 1.0205, "learning_rate": 1.438888888888889e-05, "loss": 2.7247, "norm_diff": 0.0565, "norm_loss": 0.0, "num_token_doc": 66.6766, "num_token_overlap": 18.0048, "num_token_query": 52.3094, "num_token_union": 73.4133, "num_word_context": 201.9443, "num_word_doc": 49.7512, "num_word_query": 39.8625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.6724, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.4003, "queue_k_norm": 1.4595, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3094, "sent_len_1": 66.6766, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.845, "stdk": 0.0486, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.7415, "doc_norm": 1.4616, "encoder_q-embeddings": 1226.7499, "encoder_q-layer.0": 757.7464, "encoder_q-layer.1": 833.1066, "encoder_q-layer.10": 1197.4448, "encoder_q-layer.11": 2646.5791, "encoder_q-layer.2": 985.2285, "encoder_q-layer.3": 1036.8829, "encoder_q-layer.4": 1025.6899, "encoder_q-layer.5": 1007.3136, "encoder_q-layer.6": 1083.2894, "encoder_q-layer.7": 1217.86, "encoder_q-layer.8": 1297.1628, "encoder_q-layer.9": 1161.4169, "epoch": 0.72, "inbatch_neg_score": 0.2922, "inbatch_pos_score": 1.0283, "learning_rate": 1.4333333333333334e-05, "loss": 2.7415, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.8731, "num_token_overlap": 18.0396, "num_token_query": 52.2769, "num_token_union": 73.4854, "num_word_context": 202.5979, "num_word_doc": 49.8764, "num_word_query": 39.8804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1889.1128, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.4071, "queue_k_norm": 1.458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2769, "sent_len_1": 66.8731, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9812, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7489, "doc_norm": 1.4553, "encoder_q-embeddings": 1305.4534, "encoder_q-layer.0": 894.0651, "encoder_q-layer.1": 977.2723, "encoder_q-layer.10": 1308.6936, "encoder_q-layer.11": 2794.3386, "encoder_q-layer.2": 1133.7283, "encoder_q-layer.3": 1164.8489, "encoder_q-layer.4": 1193.5862, "encoder_q-layer.5": 1170.5886, "encoder_q-layer.6": 1243.4974, "encoder_q-layer.7": 1376.0438, "encoder_q-layer.8": 1511.8809, "encoder_q-layer.9": 1186.802, "epoch": 0.73, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 1.0166, "learning_rate": 1.427777777777778e-05, "loss": 2.7489, "norm_diff": 0.059, "norm_loss": 0.0, "num_token_doc": 66.7349, "num_token_overlap": 17.9298, "num_token_query": 52.213, "num_token_union": 73.5209, "num_word_context": 202.5149, "num_word_doc": 49.8143, "num_word_query": 39.8625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2103.3877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.3963, "queue_k_norm": 1.4571, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.213, "sent_len_1": 66.7349, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.8925, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7275, "doc_norm": 1.4647, "encoder_q-embeddings": 1223.2465, "encoder_q-layer.0": 813.2972, "encoder_q-layer.1": 876.2638, "encoder_q-layer.10": 1392.5914, "encoder_q-layer.11": 2841.3298, "encoder_q-layer.2": 1027.3196, "encoder_q-layer.3": 1080.3466, "encoder_q-layer.4": 1059.5785, "encoder_q-layer.5": 983.2667, "encoder_q-layer.6": 1102.4033, "encoder_q-layer.7": 1174.6093, "encoder_q-layer.8": 1387.5057, "encoder_q-layer.9": 1262.6084, "epoch": 0.73, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 1.0312, "learning_rate": 1.4222222222222224e-05, "loss": 2.7275, "norm_diff": 0.0666, "norm_loss": 0.0, "num_token_doc": 66.789, "num_token_overlap": 18.0216, "num_token_query": 52.135, "num_token_union": 73.4079, "num_word_context": 202.282, "num_word_doc": 49.8173, "num_word_query": 39.7289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2031.4996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.398, "queue_k_norm": 1.4578, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.135, "sent_len_1": 66.789, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.2775, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7469, "doc_norm": 1.4527, "encoder_q-embeddings": 1921.8646, "encoder_q-layer.0": 1269.9026, "encoder_q-layer.1": 1424.4882, "encoder_q-layer.10": 1222.5712, "encoder_q-layer.11": 2704.3867, "encoder_q-layer.2": 1699.0051, "encoder_q-layer.3": 1802.9818, "encoder_q-layer.4": 1926.6107, "encoder_q-layer.5": 1781.3003, "encoder_q-layer.6": 1565.9392, "encoder_q-layer.7": 1509.0724, "encoder_q-layer.8": 1447.9664, "encoder_q-layer.9": 1172.0063, "epoch": 0.73, "inbatch_neg_score": 0.2923, "inbatch_pos_score": 1.0088, "learning_rate": 1.4166666666666668e-05, "loss": 2.7469, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 66.6115, "num_token_overlap": 17.9397, "num_token_query": 52.2281, "num_token_union": 73.3957, "num_word_context": 201.9881, "num_word_doc": 49.719, "num_word_query": 39.7974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2553.3118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.292, "query_norm": 1.3973, "queue_k_norm": 1.4578, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2281, "sent_len_1": 66.6115, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.5825, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.7266, "doc_norm": 1.4578, "encoder_q-embeddings": 1075.4204, "encoder_q-layer.0": 721.4901, "encoder_q-layer.1": 787.5269, "encoder_q-layer.10": 1397.7565, "encoder_q-layer.11": 3211.252, "encoder_q-layer.2": 897.0989, "encoder_q-layer.3": 914.1972, "encoder_q-layer.4": 968.0056, "encoder_q-layer.5": 987.4688, "encoder_q-layer.6": 1107.9097, "encoder_q-layer.7": 1155.7828, "encoder_q-layer.8": 1357.4777, "encoder_q-layer.9": 1255.801, "epoch": 0.73, "inbatch_neg_score": 0.2921, "inbatch_pos_score": 1.0186, "learning_rate": 1.4111111111111112e-05, "loss": 2.7266, "norm_diff": 0.0619, "norm_loss": 0.0, "num_token_doc": 66.8351, "num_token_overlap": 17.972, "num_token_query": 52.211, "num_token_union": 73.4884, "num_word_context": 202.5511, "num_word_doc": 49.8685, "num_word_query": 39.7971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2071.8631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.292, "query_norm": 1.3959, "queue_k_norm": 1.4582, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.211, "sent_len_1": 66.8351, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.7287, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.7496, "doc_norm": 1.454, "encoder_q-embeddings": 1260.9755, "encoder_q-layer.0": 820.576, "encoder_q-layer.1": 909.6795, "encoder_q-layer.10": 1268.6801, "encoder_q-layer.11": 2871.6624, "encoder_q-layer.2": 1101.9386, "encoder_q-layer.3": 1097.0208, "encoder_q-layer.4": 1195.2705, "encoder_q-layer.5": 1189.1851, "encoder_q-layer.6": 1256.4668, "encoder_q-layer.7": 1299.8379, "encoder_q-layer.8": 1460.49, "encoder_q-layer.9": 1217.4845, "epoch": 0.73, "inbatch_neg_score": 0.2914, "inbatch_pos_score": 1.0127, "learning_rate": 1.4055555555555556e-05, "loss": 2.7496, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.9286, "num_token_overlap": 17.9279, "num_token_query": 52.1098, "num_token_union": 73.5289, "num_word_context": 202.0195, "num_word_doc": 49.9239, "num_word_query": 39.7315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.4035, "queue_k_norm": 1.4565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1098, "sent_len_1": 66.9286, "sent_len_max_0": 127.995, "sent_len_max_1": 188.9762, "stdk": 0.0486, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7374, "doc_norm": 1.4593, "encoder_q-embeddings": 2648.0684, "encoder_q-layer.0": 1878.1978, "encoder_q-layer.1": 2103.3896, "encoder_q-layer.10": 1305.5647, "encoder_q-layer.11": 2657.158, "encoder_q-layer.2": 2339.6892, "encoder_q-layer.3": 2495.2749, "encoder_q-layer.4": 2501.4768, "encoder_q-layer.5": 2622.1287, "encoder_q-layer.6": 2573.0374, "encoder_q-layer.7": 2490.3621, "encoder_q-layer.8": 1935.9515, "encoder_q-layer.9": 1233.1439, "epoch": 0.73, "inbatch_neg_score": 0.2919, "inbatch_pos_score": 1.0098, "learning_rate": 1.4000000000000001e-05, "loss": 2.7374, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.5814, "num_token_overlap": 17.8671, "num_token_query": 52.0284, "num_token_union": 73.3291, "num_word_context": 202.1136, "num_word_doc": 49.6452, "num_word_query": 39.6417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3415.0688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.4054, "queue_k_norm": 1.4572, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0284, "sent_len_1": 66.5814, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.8, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7313, "doc_norm": 1.463, "encoder_q-embeddings": 1334.5557, "encoder_q-layer.0": 900.64, "encoder_q-layer.1": 1069.7694, "encoder_q-layer.10": 1362.556, "encoder_q-layer.11": 2723.2551, "encoder_q-layer.2": 1130.91, "encoder_q-layer.3": 1167.2228, "encoder_q-layer.4": 1178.6426, "encoder_q-layer.5": 1165.8354, "encoder_q-layer.6": 1301.4004, "encoder_q-layer.7": 1400.2344, "encoder_q-layer.8": 1456.0983, "encoder_q-layer.9": 1219.7865, "epoch": 0.73, "inbatch_neg_score": 0.2923, "inbatch_pos_score": 1.0352, "learning_rate": 1.3944444444444446e-05, "loss": 2.7313, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 66.5225, "num_token_overlap": 17.8912, "num_token_query": 52.1159, "num_token_union": 73.3149, "num_word_context": 202.1658, "num_word_doc": 49.629, "num_word_query": 39.7499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2110.7743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.424, "queue_k_norm": 1.457, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1159, "sent_len_1": 66.5225, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3113, "stdk": 0.049, "stdq": 0.0471, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.734, "doc_norm": 1.4504, "encoder_q-embeddings": 1070.6193, "encoder_q-layer.0": 637.7012, "encoder_q-layer.1": 712.2737, "encoder_q-layer.10": 1356.429, "encoder_q-layer.11": 2944.6011, "encoder_q-layer.2": 798.2567, "encoder_q-layer.3": 845.303, "encoder_q-layer.4": 880.6699, "encoder_q-layer.5": 938.9569, "encoder_q-layer.6": 1056.1748, "encoder_q-layer.7": 1189.9685, "encoder_q-layer.8": 1398.9988, "encoder_q-layer.9": 1287.3087, "epoch": 0.73, "inbatch_neg_score": 0.2903, "inbatch_pos_score": 1.0127, "learning_rate": 1.388888888888889e-05, "loss": 2.734, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.8584, "num_token_overlap": 18.0409, "num_token_query": 52.1515, "num_token_union": 73.4104, "num_word_context": 202.3463, "num_word_doc": 49.9185, "num_word_query": 39.7444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1929.8044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.4104, "queue_k_norm": 1.4571, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1515, "sent_len_1": 66.8584, "sent_len_max_0": 128.0, "sent_len_max_1": 190.05, "stdk": 0.0485, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7382, "doc_norm": 1.4574, "encoder_q-embeddings": 1055.7542, "encoder_q-layer.0": 685.1146, "encoder_q-layer.1": 736.7531, "encoder_q-layer.10": 1227.3486, "encoder_q-layer.11": 2741.1113, "encoder_q-layer.2": 865.8424, "encoder_q-layer.3": 881.4272, "encoder_q-layer.4": 930.7975, "encoder_q-layer.5": 1045.4573, "encoder_q-layer.6": 1094.9586, "encoder_q-layer.7": 1235.4705, "encoder_q-layer.8": 1296.2849, "encoder_q-layer.9": 1141.304, "epoch": 0.73, "inbatch_neg_score": 0.2918, "inbatch_pos_score": 1.0322, "learning_rate": 1.3833333333333334e-05, "loss": 2.7382, "norm_diff": 0.0471, "norm_loss": 0.0, "num_token_doc": 66.6083, "num_token_overlap": 17.959, "num_token_query": 52.1229, "num_token_union": 73.3074, "num_word_context": 202.0303, "num_word_doc": 49.6791, "num_word_query": 39.718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1866.366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.4103, "queue_k_norm": 1.4561, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1229, "sent_len_1": 66.6083, "sent_len_max_0": 127.99, "sent_len_max_1": 189.0037, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.7377, "doc_norm": 1.458, "encoder_q-embeddings": 1178.3568, "encoder_q-layer.0": 774.8721, "encoder_q-layer.1": 825.711, "encoder_q-layer.10": 1262.9966, "encoder_q-layer.11": 2742.2222, "encoder_q-layer.2": 920.1483, "encoder_q-layer.3": 965.5135, "encoder_q-layer.4": 942.8071, "encoder_q-layer.5": 984.1129, "encoder_q-layer.6": 1070.1178, "encoder_q-layer.7": 1139.9741, "encoder_q-layer.8": 1243.1641, "encoder_q-layer.9": 1153.3229, "epoch": 0.73, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 1.041, "learning_rate": 1.3777777777777778e-05, "loss": 2.7377, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.8547, "num_token_overlap": 18.0075, "num_token_query": 52.3128, "num_token_union": 73.5168, "num_word_context": 202.5882, "num_word_doc": 49.8734, "num_word_query": 39.8849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1912.0124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.4098, "queue_k_norm": 1.4575, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3128, "sent_len_1": 66.8547, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.7012, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.7381, "doc_norm": 1.4525, "encoder_q-embeddings": 1058.5709, "encoder_q-layer.0": 687.4422, "encoder_q-layer.1": 768.0286, "encoder_q-layer.10": 1331.691, "encoder_q-layer.11": 2717.1726, "encoder_q-layer.2": 875.6118, "encoder_q-layer.3": 865.9478, "encoder_q-layer.4": 919.2577, "encoder_q-layer.5": 943.3683, "encoder_q-layer.6": 1048.4951, "encoder_q-layer.7": 1094.613, "encoder_q-layer.8": 1315.959, "encoder_q-layer.9": 1210.5009, "epoch": 0.74, "inbatch_neg_score": 0.286, "inbatch_pos_score": 1.0049, "learning_rate": 1.3722222222222222e-05, "loss": 2.7381, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.689, "num_token_overlap": 17.9485, "num_token_query": 52.0722, "num_token_union": 73.3128, "num_word_context": 202.098, "num_word_doc": 49.7601, "num_word_query": 39.6868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1896.1658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.4147, "queue_k_norm": 1.4573, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0722, "sent_len_1": 66.689, "sent_len_max_0": 127.99, "sent_len_max_1": 188.7325, "stdk": 0.0486, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.7213, "doc_norm": 1.4595, "encoder_q-embeddings": 2079.0513, "encoder_q-layer.0": 1345.0387, "encoder_q-layer.1": 1418.0524, "encoder_q-layer.10": 2647.625, "encoder_q-layer.11": 5286.688, "encoder_q-layer.2": 1630.2148, "encoder_q-layer.3": 1675.6726, "encoder_q-layer.4": 1734.5677, "encoder_q-layer.5": 1782.2952, "encoder_q-layer.6": 1988.2982, "encoder_q-layer.7": 2186.7842, "encoder_q-layer.8": 2527.7466, "encoder_q-layer.9": 2269.4121, "epoch": 0.74, "inbatch_neg_score": 0.2906, "inbatch_pos_score": 1.0254, "learning_rate": 1.3666666666666666e-05, "loss": 2.7213, "norm_diff": 0.0552, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 18.0215, "num_token_query": 52.1463, "num_token_union": 73.4188, "num_word_context": 202.0852, "num_word_doc": 49.8612, "num_word_query": 39.7449, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3639.3422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.4042, "queue_k_norm": 1.4562, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1463, "sent_len_1": 66.8064, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.1887, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.7299, "doc_norm": 1.4555, "encoder_q-embeddings": 1976.1478, "encoder_q-layer.0": 1342.0604, "encoder_q-layer.1": 1444.77, "encoder_q-layer.10": 2605.1226, "encoder_q-layer.11": 5411.1558, "encoder_q-layer.2": 1718.5273, "encoder_q-layer.3": 1649.5151, "encoder_q-layer.4": 1751.0237, "encoder_q-layer.5": 1793.7401, "encoder_q-layer.6": 1985.1536, "encoder_q-layer.7": 2291.6365, "encoder_q-layer.8": 2511.6152, "encoder_q-layer.9": 2336.364, "epoch": 0.74, "inbatch_neg_score": 0.2925, "inbatch_pos_score": 1.0146, "learning_rate": 1.3611111111111111e-05, "loss": 2.7299, "norm_diff": 0.0531, "norm_loss": 0.0, "num_token_doc": 66.4955, "num_token_overlap": 17.9392, "num_token_query": 52.0746, "num_token_union": 73.2508, "num_word_context": 202.1898, "num_word_doc": 49.6672, "num_word_query": 39.7092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3638.076, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.4024, "queue_k_norm": 1.4564, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0746, "sent_len_1": 66.4955, "sent_len_max_0": 128.0, "sent_len_max_1": 186.8425, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 2.7136, "doc_norm": 1.4483, "encoder_q-embeddings": 5137.6323, "encoder_q-layer.0": 3519.9333, "encoder_q-layer.1": 4272.3584, "encoder_q-layer.10": 2533.3904, "encoder_q-layer.11": 5613.5254, "encoder_q-layer.2": 5129.6978, "encoder_q-layer.3": 4955.1558, "encoder_q-layer.4": 4969.0684, "encoder_q-layer.5": 4813.8223, "encoder_q-layer.6": 3641.0564, "encoder_q-layer.7": 3710.6357, "encoder_q-layer.8": 3150.1736, "encoder_q-layer.9": 2479.3679, "epoch": 0.74, "inbatch_neg_score": 0.2915, "inbatch_pos_score": 0.9902, "learning_rate": 1.3555555555555557e-05, "loss": 2.7136, "norm_diff": 0.0469, "norm_loss": 0.0, "num_token_doc": 66.8023, "num_token_overlap": 18.0372, "num_token_query": 52.1864, "num_token_union": 73.4066, "num_word_context": 202.196, "num_word_doc": 49.8489, "num_word_query": 39.7888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6424.9739, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2915, "query_norm": 1.4015, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1864, "sent_len_1": 66.8023, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4137, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.714, "doc_norm": 1.455, "encoder_q-embeddings": 2094.781, "encoder_q-layer.0": 1380.4781, "encoder_q-layer.1": 1481.287, "encoder_q-layer.10": 2488.5283, "encoder_q-layer.11": 5279.5928, "encoder_q-layer.2": 1763.66, "encoder_q-layer.3": 1805.1648, "encoder_q-layer.4": 1954.3923, "encoder_q-layer.5": 1998.6277, "encoder_q-layer.6": 2336.4204, "encoder_q-layer.7": 2532.2783, "encoder_q-layer.8": 2657.5911, "encoder_q-layer.9": 2306.594, "epoch": 0.74, "inbatch_neg_score": 0.2881, "inbatch_pos_score": 1.0312, "learning_rate": 1.3500000000000001e-05, "loss": 2.714, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 67.0709, "num_token_overlap": 18.0403, "num_token_query": 52.1898, "num_token_union": 73.5855, "num_word_context": 202.409, "num_word_doc": 50.0302, "num_word_query": 39.7838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3750.3972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2883, "query_norm": 1.4057, "queue_k_norm": 1.4566, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1898, "sent_len_1": 67.0709, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.96, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.704, "doc_norm": 1.4528, "encoder_q-embeddings": 2214.4475, "encoder_q-layer.0": 1427.6957, "encoder_q-layer.1": 1533.1504, "encoder_q-layer.10": 2755.6926, "encoder_q-layer.11": 5297.4551, "encoder_q-layer.2": 1705.5144, "encoder_q-layer.3": 1836.411, "encoder_q-layer.4": 1940.1514, "encoder_q-layer.5": 1934.2761, "encoder_q-layer.6": 2040.6293, "encoder_q-layer.7": 2313.5369, "encoder_q-layer.8": 2497.5496, "encoder_q-layer.9": 2369.4761, "epoch": 0.74, "inbatch_neg_score": 0.2833, "inbatch_pos_score": 0.9927, "learning_rate": 1.3444444444444445e-05, "loss": 2.704, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.7887, "num_token_overlap": 18.0131, "num_token_query": 52.3885, "num_token_union": 73.5215, "num_word_context": 202.5708, "num_word_doc": 49.8402, "num_word_query": 39.931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3675.6606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2827, "query_norm": 1.3925, "queue_k_norm": 1.4569, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3885, "sent_len_1": 66.7887, "sent_len_max_0": 128.0, "sent_len_max_1": 189.775, "stdk": 0.0486, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 2.7239, "doc_norm": 1.4547, "encoder_q-embeddings": 3961.8066, "encoder_q-layer.0": 2518.9727, "encoder_q-layer.1": 3117.0759, "encoder_q-layer.10": 2731.0166, "encoder_q-layer.11": 5672.7549, "encoder_q-layer.2": 3639.8696, "encoder_q-layer.3": 3942.3335, "encoder_q-layer.4": 3973.1494, "encoder_q-layer.5": 3664.2463, "encoder_q-layer.6": 3899.9795, "encoder_q-layer.7": 4245.7295, "encoder_q-layer.8": 3844.3428, "encoder_q-layer.9": 2476.0088, "epoch": 0.74, "inbatch_neg_score": 0.2787, "inbatch_pos_score": 1.0029, "learning_rate": 1.338888888888889e-05, "loss": 2.7239, "norm_diff": 0.0426, "norm_loss": 0.0, "num_token_doc": 66.6667, "num_token_overlap": 17.9116, "num_token_query": 51.9875, "num_token_union": 73.3198, "num_word_context": 202.0024, "num_word_doc": 49.7187, "num_word_query": 39.6213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5662.5926, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2803, "query_norm": 1.4122, "queue_k_norm": 1.4561, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 51.9875, "sent_len_1": 66.6667, "sent_len_max_0": 128.0, "sent_len_max_1": 189.92, "stdk": 0.0488, "stdq": 0.047, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.7231, "doc_norm": 1.4538, "encoder_q-embeddings": 3924.2524, "encoder_q-layer.0": 2896.4243, "encoder_q-layer.1": 3373.3066, "encoder_q-layer.10": 2481.2136, "encoder_q-layer.11": 5600.6143, "encoder_q-layer.2": 3962.3044, "encoder_q-layer.3": 4432.0464, "encoder_q-layer.4": 4426.4141, "encoder_q-layer.5": 4113.0757, "encoder_q-layer.6": 4791.0591, "encoder_q-layer.7": 3856.1777, "encoder_q-layer.8": 3945.4854, "encoder_q-layer.9": 2421.0127, "epoch": 0.74, "inbatch_neg_score": 0.2796, "inbatch_pos_score": 0.9868, "learning_rate": 1.3333333333333333e-05, "loss": 2.7231, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.8898, "num_token_overlap": 18.0864, "num_token_query": 52.425, "num_token_union": 73.496, "num_word_context": 202.3007, "num_word_doc": 49.8488, "num_word_query": 39.9507, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5975.2601, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.281, "query_norm": 1.4012, "queue_k_norm": 1.4564, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.425, "sent_len_1": 66.8898, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3088, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.7431, "doc_norm": 1.4595, "encoder_q-embeddings": 1865.5632, "encoder_q-layer.0": 1233.7902, "encoder_q-layer.1": 1326.7941, "encoder_q-layer.10": 2499.9446, "encoder_q-layer.11": 5169.8828, "encoder_q-layer.2": 1482.1372, "encoder_q-layer.3": 1486.8457, "encoder_q-layer.4": 1577.4839, "encoder_q-layer.5": 1837.9011, "encoder_q-layer.6": 2003.28, "encoder_q-layer.7": 1900.4017, "encoder_q-layer.8": 2402.9224, "encoder_q-layer.9": 2219.6602, "epoch": 0.74, "inbatch_neg_score": 0.2784, "inbatch_pos_score": 1.0312, "learning_rate": 1.3277777777777777e-05, "loss": 2.7431, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 66.604, "num_token_overlap": 17.9354, "num_token_query": 52.1821, "num_token_union": 73.4105, "num_word_context": 202.068, "num_word_doc": 49.7116, "num_word_query": 39.8057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3413.3234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.4004, "queue_k_norm": 1.4546, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1821, "sent_len_1": 66.604, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8975, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7288, "doc_norm": 1.4572, "encoder_q-embeddings": 2260.7263, "encoder_q-layer.0": 1547.4531, "encoder_q-layer.1": 1652.749, "encoder_q-layer.10": 2784.3406, "encoder_q-layer.11": 5404.5435, "encoder_q-layer.2": 1879.756, "encoder_q-layer.3": 1889.7305, "encoder_q-layer.4": 1979.4755, "encoder_q-layer.5": 2020.7072, "encoder_q-layer.6": 2132.7344, "encoder_q-layer.7": 2280.6094, "encoder_q-layer.8": 2607.8633, "encoder_q-layer.9": 2403.2524, "epoch": 0.74, "inbatch_neg_score": 0.2791, "inbatch_pos_score": 1.0039, "learning_rate": 1.3222222222222221e-05, "loss": 2.7288, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.6538, "num_token_overlap": 18.0351, "num_token_query": 52.1838, "num_token_union": 73.3397, "num_word_context": 201.946, "num_word_doc": 49.7733, "num_word_query": 39.7846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3796.1647, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.3901, "queue_k_norm": 1.4543, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1838, "sent_len_1": 66.6538, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.295, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.734, "doc_norm": 1.4546, "encoder_q-embeddings": 2066.3743, "encoder_q-layer.0": 1275.4556, "encoder_q-layer.1": 1407.1447, "encoder_q-layer.10": 2648.4861, "encoder_q-layer.11": 5598.9438, "encoder_q-layer.2": 1575.8174, "encoder_q-layer.3": 1607.9268, "encoder_q-layer.4": 1757.7212, "encoder_q-layer.5": 1806.6552, "encoder_q-layer.6": 2116.4141, "encoder_q-layer.7": 2467.1499, "encoder_q-layer.8": 2753.123, "encoder_q-layer.9": 2619.7793, "epoch": 0.74, "inbatch_neg_score": 0.274, "inbatch_pos_score": 1.0166, "learning_rate": 1.3166666666666665e-05, "loss": 2.734, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.6024, "num_token_overlap": 17.9807, "num_token_query": 52.2329, "num_token_union": 73.3874, "num_word_context": 202.187, "num_word_doc": 49.7587, "num_word_query": 39.8367, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3745.0553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2751, "query_norm": 1.4064, "queue_k_norm": 1.454, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2329, "sent_len_1": 66.6024, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9875, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7212, "doc_norm": 1.4509, "encoder_q-embeddings": 2247.4407, "encoder_q-layer.0": 1499.3049, "encoder_q-layer.1": 1646.295, "encoder_q-layer.10": 2517.4419, "encoder_q-layer.11": 5338.6299, "encoder_q-layer.2": 1914.3961, "encoder_q-layer.3": 2043.7014, "encoder_q-layer.4": 2232.7837, "encoder_q-layer.5": 2388.5596, "encoder_q-layer.6": 2708.4551, "encoder_q-layer.7": 2749.8889, "encoder_q-layer.8": 2861.3574, "encoder_q-layer.9": 2504.0376, "epoch": 0.75, "inbatch_neg_score": 0.2728, "inbatch_pos_score": 1.0049, "learning_rate": 1.3111111111111113e-05, "loss": 2.7212, "norm_diff": 0.0533, "norm_loss": 0.0, "num_token_doc": 66.7533, "num_token_overlap": 18.0029, "num_token_query": 52.2816, "num_token_union": 73.4911, "num_word_context": 202.3923, "num_word_doc": 49.8348, "num_word_query": 39.8352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3984.7439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2739, "query_norm": 1.3976, "queue_k_norm": 1.4549, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2816, "sent_len_1": 66.7533, "sent_len_max_0": 127.995, "sent_len_max_1": 190.2862, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.7453, "doc_norm": 1.4587, "encoder_q-embeddings": 2402.9705, "encoder_q-layer.0": 1583.2554, "encoder_q-layer.1": 1818.4652, "encoder_q-layer.10": 2696.8123, "encoder_q-layer.11": 5060.3691, "encoder_q-layer.2": 2238.7852, "encoder_q-layer.3": 2109.0085, "encoder_q-layer.4": 1909.0537, "encoder_q-layer.5": 2003.7627, "encoder_q-layer.6": 2162.8528, "encoder_q-layer.7": 2311.3289, "encoder_q-layer.8": 2575.1716, "encoder_q-layer.9": 2273.9077, "epoch": 0.75, "inbatch_neg_score": 0.2741, "inbatch_pos_score": 1.0088, "learning_rate": 1.3055555555555557e-05, "loss": 2.7453, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.5809, "num_token_overlap": 17.9673, "num_token_query": 52.2518, "num_token_union": 73.4233, "num_word_context": 202.4363, "num_word_doc": 49.7378, "num_word_query": 39.8431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3790.8651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2742, "query_norm": 1.3965, "queue_k_norm": 1.4548, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2518, "sent_len_1": 66.5809, "sent_len_max_0": 128.0, "sent_len_max_1": 188.18, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 62.6953, "active_queue_size": 16384.0, "cl_loss": 2.7251, "doc_norm": 1.4584, "encoder_q-embeddings": 13559.0859, "encoder_q-layer.0": 9498.2793, "encoder_q-layer.1": 12383.2559, "encoder_q-layer.10": 1337.6392, "encoder_q-layer.11": 2739.1875, "encoder_q-layer.2": 13876.9814, "encoder_q-layer.3": 12404.6436, "encoder_q-layer.4": 9784.8887, "encoder_q-layer.5": 6380.9629, "encoder_q-layer.6": 6307.7886, "encoder_q-layer.7": 4522.1772, "encoder_q-layer.8": 3315.6289, "encoder_q-layer.9": 1336.0017, "epoch": 0.75, "inbatch_neg_score": 0.2818, "inbatch_pos_score": 1.0498, "learning_rate": 1.3000000000000001e-05, "loss": 2.7251, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.751, "num_token_overlap": 18.0231, "num_token_query": 52.3447, "num_token_union": 73.4989, "num_word_context": 202.1869, "num_word_doc": 49.7813, "num_word_query": 39.9108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13919.1362, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.281, "query_norm": 1.4105, "queue_k_norm": 1.4551, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3447, "sent_len_1": 66.751, "sent_len_max_0": 128.0, "sent_len_max_1": 189.205, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7379, "doc_norm": 1.4553, "encoder_q-embeddings": 2924.5293, "encoder_q-layer.0": 2144.0813, "encoder_q-layer.1": 2164.2139, "encoder_q-layer.10": 1494.083, "encoder_q-layer.11": 2786.8296, "encoder_q-layer.2": 2614.0825, "encoder_q-layer.3": 2999.896, "encoder_q-layer.4": 2798.2122, "encoder_q-layer.5": 2684.9482, "encoder_q-layer.6": 2856.3062, "encoder_q-layer.7": 2334.9722, "encoder_q-layer.8": 1829.2625, "encoder_q-layer.9": 1347.3666, "epoch": 0.75, "inbatch_neg_score": 0.2726, "inbatch_pos_score": 1.0283, "learning_rate": 1.2944444444444445e-05, "loss": 2.7379, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.6187, "num_token_overlap": 17.9172, "num_token_query": 52.0589, "num_token_union": 73.346, "num_word_context": 202.178, "num_word_doc": 49.7236, "num_word_query": 39.6562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3781.9019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2727, "query_norm": 1.4172, "queue_k_norm": 1.4521, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0589, "sent_len_1": 66.6187, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.5362, "stdk": 0.0488, "stdq": 0.0472, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.7054, "doc_norm": 1.4539, "encoder_q-embeddings": 1461.8574, "encoder_q-layer.0": 974.3536, "encoder_q-layer.1": 1074.0052, "encoder_q-layer.10": 1377.4749, "encoder_q-layer.11": 2832.1775, "encoder_q-layer.2": 1278.45, "encoder_q-layer.3": 1353.5819, "encoder_q-layer.4": 1438.2545, "encoder_q-layer.5": 1484.9164, "encoder_q-layer.6": 1491.4788, "encoder_q-layer.7": 1342.0177, "encoder_q-layer.8": 1374.0469, "encoder_q-layer.9": 1166.0684, "epoch": 0.75, "inbatch_neg_score": 0.271, "inbatch_pos_score": 1.0186, "learning_rate": 1.2888888888888889e-05, "loss": 2.7054, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.6455, "num_token_overlap": 17.9909, "num_token_query": 52.1802, "num_token_union": 73.3126, "num_word_context": 202.12, "num_word_doc": 49.7189, "num_word_query": 39.7867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2226.6032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2715, "query_norm": 1.4155, "queue_k_norm": 1.452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1802, "sent_len_1": 66.6455, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6163, "stdk": 0.0488, "stdq": 0.047, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7195, "doc_norm": 1.4508, "encoder_q-embeddings": 4868.2329, "encoder_q-layer.0": 3254.105, "encoder_q-layer.1": 3864.5273, "encoder_q-layer.10": 1364.2452, "encoder_q-layer.11": 2981.6372, "encoder_q-layer.2": 4416.1157, "encoder_q-layer.3": 4140.584, "encoder_q-layer.4": 3877.2029, "encoder_q-layer.5": 3272.7178, "encoder_q-layer.6": 2814.9919, "encoder_q-layer.7": 2249.8057, "encoder_q-layer.8": 1651.3345, "encoder_q-layer.9": 1343.1183, "epoch": 0.75, "inbatch_neg_score": 0.2747, "inbatch_pos_score": 0.9951, "learning_rate": 1.2833333333333333e-05, "loss": 2.7195, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.7397, "num_token_overlap": 18.0175, "num_token_query": 52.2823, "num_token_union": 73.4323, "num_word_context": 202.5671, "num_word_doc": 49.8016, "num_word_query": 39.8626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5107.6347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2754, "query_norm": 1.4146, "queue_k_norm": 1.453, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2823, "sent_len_1": 66.7397, "sent_len_max_0": 128.0, "sent_len_max_1": 191.645, "stdk": 0.0487, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7072, "doc_norm": 1.4574, "encoder_q-embeddings": 1071.3049, "encoder_q-layer.0": 674.3763, "encoder_q-layer.1": 755.1987, "encoder_q-layer.10": 1254.1768, "encoder_q-layer.11": 2699.7517, "encoder_q-layer.2": 870.6291, "encoder_q-layer.3": 883.4755, "encoder_q-layer.4": 939.4125, "encoder_q-layer.5": 982.6141, "encoder_q-layer.6": 1061.9875, "encoder_q-layer.7": 1166.3807, "encoder_q-layer.8": 1306.4187, "encoder_q-layer.9": 1165.2689, "epoch": 0.75, "inbatch_neg_score": 0.2762, "inbatch_pos_score": 1.0107, "learning_rate": 1.2777777777777777e-05, "loss": 2.7072, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.9457, "num_token_overlap": 18.0686, "num_token_query": 52.2574, "num_token_union": 73.4982, "num_word_context": 202.3282, "num_word_doc": 49.9892, "num_word_query": 39.8327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1900.8798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2759, "query_norm": 1.3967, "queue_k_norm": 1.454, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2574, "sent_len_1": 66.9457, "sent_len_max_0": 127.985, "sent_len_max_1": 188.79, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7303, "doc_norm": 1.4507, "encoder_q-embeddings": 957.367, "encoder_q-layer.0": 612.9617, "encoder_q-layer.1": 665.7371, "encoder_q-layer.10": 1282.8136, "encoder_q-layer.11": 2851.8455, "encoder_q-layer.2": 759.4709, "encoder_q-layer.3": 774.8825, "encoder_q-layer.4": 805.2379, "encoder_q-layer.5": 847.9243, "encoder_q-layer.6": 978.1257, "encoder_q-layer.7": 1126.944, "encoder_q-layer.8": 1374.6866, "encoder_q-layer.9": 1221.0979, "epoch": 0.75, "inbatch_neg_score": 0.2787, "inbatch_pos_score": 1.0098, "learning_rate": 1.2722222222222221e-05, "loss": 2.7303, "norm_diff": 0.0362, "norm_loss": 0.0, "num_token_doc": 66.9207, "num_token_overlap": 18.0188, "num_token_query": 52.3074, "num_token_union": 73.5588, "num_word_context": 202.0523, "num_word_doc": 49.9175, "num_word_query": 39.8776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1825.6271, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2781, "query_norm": 1.4146, "queue_k_norm": 1.454, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3074, "sent_len_1": 66.9207, "sent_len_max_0": 128.0, "sent_len_max_1": 191.8525, "stdk": 0.0487, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7243, "doc_norm": 1.456, "encoder_q-embeddings": 1109.351, "encoder_q-layer.0": 703.9092, "encoder_q-layer.1": 763.5487, "encoder_q-layer.10": 1232.8992, "encoder_q-layer.11": 2759.7896, "encoder_q-layer.2": 907.2136, "encoder_q-layer.3": 1000.6631, "encoder_q-layer.4": 1061.2961, "encoder_q-layer.5": 1036.854, "encoder_q-layer.6": 1088.4121, "encoder_q-layer.7": 1156.7343, "encoder_q-layer.8": 1286.9064, "encoder_q-layer.9": 1152.5299, "epoch": 0.75, "inbatch_neg_score": 0.2755, "inbatch_pos_score": 1.0098, "learning_rate": 1.2666666666666668e-05, "loss": 2.7243, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.9412, "num_token_overlap": 17.9626, "num_token_query": 52.1435, "num_token_union": 73.5347, "num_word_context": 202.6308, "num_word_doc": 49.9686, "num_word_query": 39.7422, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1928.1094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.4013, "queue_k_norm": 1.4528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1435, "sent_len_1": 66.9412, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8075, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.7074, "doc_norm": 1.4587, "encoder_q-embeddings": 1181.1173, "encoder_q-layer.0": 749.7986, "encoder_q-layer.1": 822.82, "encoder_q-layer.10": 1389.4113, "encoder_q-layer.11": 2804.4443, "encoder_q-layer.2": 991.1027, "encoder_q-layer.3": 1082.202, "encoder_q-layer.4": 1146.6472, "encoder_q-layer.5": 1256.9906, "encoder_q-layer.6": 1499.7141, "encoder_q-layer.7": 1560.3339, "encoder_q-layer.8": 1601.4871, "encoder_q-layer.9": 1173.9906, "epoch": 0.75, "inbatch_neg_score": 0.2724, "inbatch_pos_score": 1.0244, "learning_rate": 1.2611111111111113e-05, "loss": 2.7074, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 67.0521, "num_token_overlap": 18.0614, "num_token_query": 52.2032, "num_token_union": 73.504, "num_word_context": 202.3686, "num_word_doc": 49.9834, "num_word_query": 39.7995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2116.7984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2722, "query_norm": 1.4076, "queue_k_norm": 1.4528, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2032, "sent_len_1": 67.0521, "sent_len_max_0": 128.0, "sent_len_max_1": 192.2012, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.7079, "doc_norm": 1.4536, "encoder_q-embeddings": 1498.9673, "encoder_q-layer.0": 982.3544, "encoder_q-layer.1": 1112.1149, "encoder_q-layer.10": 1195.8442, "encoder_q-layer.11": 2588.28, "encoder_q-layer.2": 1331.4344, "encoder_q-layer.3": 1387.1691, "encoder_q-layer.4": 1421.4667, "encoder_q-layer.5": 1405.2985, "encoder_q-layer.6": 1653.8977, "encoder_q-layer.7": 1571.1661, "encoder_q-layer.8": 1518.5366, "encoder_q-layer.9": 1126.1942, "epoch": 0.76, "inbatch_neg_score": 0.2779, "inbatch_pos_score": 0.998, "learning_rate": 1.2555555555555557e-05, "loss": 2.7079, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 18.0186, "num_token_query": 52.2393, "num_token_union": 73.498, "num_word_context": 202.4318, "num_word_doc": 49.8913, "num_word_query": 39.8171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2234.1559, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2769, "query_norm": 1.4075, "queue_k_norm": 1.4523, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2393, "sent_len_1": 66.8337, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.3125, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 63.1836, "active_queue_size": 16384.0, "cl_loss": 2.7348, "doc_norm": 1.453, "encoder_q-embeddings": 1176.3938, "encoder_q-layer.0": 800.3547, "encoder_q-layer.1": 933.0085, "encoder_q-layer.10": 1324.7085, "encoder_q-layer.11": 2706.75, "encoder_q-layer.2": 1107.6908, "encoder_q-layer.3": 1130.7288, "encoder_q-layer.4": 1179.0587, "encoder_q-layer.5": 1223.6516, "encoder_q-layer.6": 1310.9648, "encoder_q-layer.7": 1302.276, "encoder_q-layer.8": 1411.3019, "encoder_q-layer.9": 1232.2407, "epoch": 0.76, "inbatch_neg_score": 0.2739, "inbatch_pos_score": 1.0225, "learning_rate": 1.25e-05, "loss": 2.7348, "norm_diff": 0.0475, "norm_loss": 0.0, "num_token_doc": 66.9287, "num_token_overlap": 17.9942, "num_token_query": 52.2608, "num_token_union": 73.5437, "num_word_context": 202.5112, "num_word_doc": 49.902, "num_word_query": 39.8568, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2003.937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2744, "query_norm": 1.4055, "queue_k_norm": 1.4531, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2608, "sent_len_1": 66.9287, "sent_len_max_0": 128.0, "sent_len_max_1": 193.5163, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.7143, "doc_norm": 1.4501, "encoder_q-embeddings": 1019.5667, "encoder_q-layer.0": 639.6235, "encoder_q-layer.1": 704.1077, "encoder_q-layer.10": 1261.9287, "encoder_q-layer.11": 2745.1101, "encoder_q-layer.2": 790.7042, "encoder_q-layer.3": 844.5157, "encoder_q-layer.4": 911.2975, "encoder_q-layer.5": 865.851, "encoder_q-layer.6": 960.8386, "encoder_q-layer.7": 1058.8264, "encoder_q-layer.8": 1293.0623, "encoder_q-layer.9": 1200.3661, "epoch": 0.76, "inbatch_neg_score": 0.2749, "inbatch_pos_score": 0.9893, "learning_rate": 1.2444444444444445e-05, "loss": 2.7143, "norm_diff": 0.0419, "norm_loss": 0.0, "num_token_doc": 66.8835, "num_token_overlap": 18.0076, "num_token_query": 52.1121, "num_token_union": 73.4363, "num_word_context": 202.0008, "num_word_doc": 49.8992, "num_word_query": 39.7283, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1829.4329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2759, "query_norm": 1.4082, "queue_k_norm": 1.4535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1121, "sent_len_1": 66.8835, "sent_len_max_0": 127.99, "sent_len_max_1": 189.94, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7154, "doc_norm": 1.4558, "encoder_q-embeddings": 1215.7471, "encoder_q-layer.0": 812.4129, "encoder_q-layer.1": 907.4594, "encoder_q-layer.10": 1199.7292, "encoder_q-layer.11": 2730.5503, "encoder_q-layer.2": 1044.4556, "encoder_q-layer.3": 1147.5732, "encoder_q-layer.4": 1258.1212, "encoder_q-layer.5": 1187.8126, "encoder_q-layer.6": 1263.103, "encoder_q-layer.7": 1277.9963, "encoder_q-layer.8": 1331.6093, "encoder_q-layer.9": 1156.1455, "epoch": 0.76, "inbatch_neg_score": 0.2829, "inbatch_pos_score": 0.9946, "learning_rate": 1.238888888888889e-05, "loss": 2.7154, "norm_diff": 0.0599, "norm_loss": 0.0, "num_token_doc": 66.7483, "num_token_overlap": 18.003, "num_token_query": 52.2565, "num_token_union": 73.4577, "num_word_context": 202.2104, "num_word_doc": 49.803, "num_word_query": 39.8582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2034.2194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.3959, "queue_k_norm": 1.452, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2565, "sent_len_1": 66.7483, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0675, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7293, "doc_norm": 1.4508, "encoder_q-embeddings": 1347.4865, "encoder_q-layer.0": 921.153, "encoder_q-layer.1": 1084.2679, "encoder_q-layer.10": 1239.5104, "encoder_q-layer.11": 2734.8945, "encoder_q-layer.2": 1286.9977, "encoder_q-layer.3": 1246.0243, "encoder_q-layer.4": 1335.1064, "encoder_q-layer.5": 1425.2982, "encoder_q-layer.6": 1346.7979, "encoder_q-layer.7": 1297.9968, "encoder_q-layer.8": 1354.519, "encoder_q-layer.9": 1176.9816, "epoch": 0.76, "inbatch_neg_score": 0.2829, "inbatch_pos_score": 1.0215, "learning_rate": 1.2333333333333334e-05, "loss": 2.7293, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.5422, "num_token_overlap": 17.9356, "num_token_query": 52.2039, "num_token_union": 73.3425, "num_word_context": 202.2386, "num_word_doc": 49.6626, "num_word_query": 39.7834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2164.6413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.4096, "queue_k_norm": 1.452, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2039, "sent_len_1": 66.5422, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.3963, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7251, "doc_norm": 1.4516, "encoder_q-embeddings": 927.3699, "encoder_q-layer.0": 592.6611, "encoder_q-layer.1": 645.0938, "encoder_q-layer.10": 1312.427, "encoder_q-layer.11": 2717.2144, "encoder_q-layer.2": 715.0463, "encoder_q-layer.3": 741.2772, "encoder_q-layer.4": 796.1975, "encoder_q-layer.5": 815.7591, "encoder_q-layer.6": 896.4356, "encoder_q-layer.7": 1027.6378, "encoder_q-layer.8": 1225.77, "encoder_q-layer.9": 1153.4728, "epoch": 0.76, "inbatch_neg_score": 0.286, "inbatch_pos_score": 1.0146, "learning_rate": 1.2277777777777778e-05, "loss": 2.7251, "norm_diff": 0.0339, "norm_loss": 0.0, "num_token_doc": 66.5318, "num_token_overlap": 18.0021, "num_token_query": 52.1348, "num_token_union": 73.3015, "num_word_context": 202.0723, "num_word_doc": 49.671, "num_word_query": 39.7642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1780.8179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.419, "queue_k_norm": 1.454, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1348, "sent_len_1": 66.5318, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2125, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.7084, "doc_norm": 1.4501, "encoder_q-embeddings": 1289.0043, "encoder_q-layer.0": 792.6633, "encoder_q-layer.1": 900.3349, "encoder_q-layer.10": 1347.0316, "encoder_q-layer.11": 2888.6501, "encoder_q-layer.2": 1040.65, "encoder_q-layer.3": 1044.7836, "encoder_q-layer.4": 1140.0763, "encoder_q-layer.5": 1159.8425, "encoder_q-layer.6": 1167.3979, "encoder_q-layer.7": 1266.7555, "encoder_q-layer.8": 1484.8088, "encoder_q-layer.9": 1255.9155, "epoch": 0.76, "inbatch_neg_score": 0.2822, "inbatch_pos_score": 0.9902, "learning_rate": 1.2222222222222222e-05, "loss": 2.7084, "norm_diff": 0.0471, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 18.0273, "num_token_query": 52.1292, "num_token_union": 73.3311, "num_word_context": 202.1223, "num_word_doc": 49.878, "num_word_query": 39.7288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.1834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.4029, "queue_k_norm": 1.4539, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1292, "sent_len_1": 66.8004, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6312, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.7295, "doc_norm": 1.4557, "encoder_q-embeddings": 1661.1135, "encoder_q-layer.0": 1227.3058, "encoder_q-layer.1": 1440.1477, "encoder_q-layer.10": 1371.4626, "encoder_q-layer.11": 2729.0681, "encoder_q-layer.2": 1594.1761, "encoder_q-layer.3": 1782.849, "encoder_q-layer.4": 1884.9547, "encoder_q-layer.5": 1888.6046, "encoder_q-layer.6": 2297.8738, "encoder_q-layer.7": 2584.4421, "encoder_q-layer.8": 3164.668, "encoder_q-layer.9": 2347.4944, "epoch": 0.76, "inbatch_neg_score": 0.2823, "inbatch_pos_score": 1.0068, "learning_rate": 1.2166666666666668e-05, "loss": 2.7295, "norm_diff": 0.0571, "norm_loss": 0.0, "num_token_doc": 66.756, "num_token_overlap": 17.9691, "num_token_query": 52.1925, "num_token_union": 73.4836, "num_word_context": 202.093, "num_word_doc": 49.8312, "num_word_query": 39.801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3162.0703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3986, "queue_k_norm": 1.4507, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1925, "sent_len_1": 66.756, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.8762, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.7358, "doc_norm": 1.4546, "encoder_q-embeddings": 972.6237, "encoder_q-layer.0": 656.3541, "encoder_q-layer.1": 700.0739, "encoder_q-layer.10": 1309.7726, "encoder_q-layer.11": 2815.6045, "encoder_q-layer.2": 797.4626, "encoder_q-layer.3": 815.894, "encoder_q-layer.4": 832.4048, "encoder_q-layer.5": 866.2548, "encoder_q-layer.6": 995.293, "encoder_q-layer.7": 1077.5042, "encoder_q-layer.8": 1279.4154, "encoder_q-layer.9": 1211.4319, "epoch": 0.76, "inbatch_neg_score": 0.2835, "inbatch_pos_score": 1.0332, "learning_rate": 1.2111111111111112e-05, "loss": 2.7358, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 66.7211, "num_token_overlap": 17.9871, "num_token_query": 52.2163, "num_token_union": 73.4014, "num_word_context": 202.1706, "num_word_doc": 49.7615, "num_word_query": 39.8073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1809.9228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4241, "queue_k_norm": 1.4544, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2163, "sent_len_1": 66.7211, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.7488, "stdk": 0.0489, "stdq": 0.0471, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6995, "doc_norm": 1.4445, "encoder_q-embeddings": 1490.391, "encoder_q-layer.0": 992.0523, "encoder_q-layer.1": 1113.1575, "encoder_q-layer.10": 1298.2654, "encoder_q-layer.11": 2768.8625, "encoder_q-layer.2": 1343.9481, "encoder_q-layer.3": 1485.4669, "encoder_q-layer.4": 1492.4785, "encoder_q-layer.5": 1365.447, "encoder_q-layer.6": 1425.2711, "encoder_q-layer.7": 1463.8589, "encoder_q-layer.8": 1491.8914, "encoder_q-layer.9": 1243.7721, "epoch": 0.76, "inbatch_neg_score": 0.2851, "inbatch_pos_score": 1.0117, "learning_rate": 1.2055555555555556e-05, "loss": 2.6995, "norm_diff": 0.0382, "norm_loss": 0.0, "num_token_doc": 66.6379, "num_token_overlap": 18.0203, "num_token_query": 52.337, "num_token_union": 73.4607, "num_word_context": 202.3808, "num_word_doc": 49.8025, "num_word_query": 39.879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2305.4253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.4063, "queue_k_norm": 1.4531, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.337, "sent_len_1": 66.6379, "sent_len_max_0": 128.0, "sent_len_max_1": 186.0813, "stdk": 0.0484, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.7258, "doc_norm": 1.4585, "encoder_q-embeddings": 860.3827, "encoder_q-layer.0": 551.9717, "encoder_q-layer.1": 596.7385, "encoder_q-layer.10": 1391.8114, "encoder_q-layer.11": 2788.5669, "encoder_q-layer.2": 678.5499, "encoder_q-layer.3": 685.7589, "encoder_q-layer.4": 757.4376, "encoder_q-layer.5": 759.8514, "encoder_q-layer.6": 897.5767, "encoder_q-layer.7": 1027.7246, "encoder_q-layer.8": 1228.2982, "encoder_q-layer.9": 1186.0536, "epoch": 0.77, "inbatch_neg_score": 0.2842, "inbatch_pos_score": 1.0215, "learning_rate": 1.2e-05, "loss": 2.7258, "norm_diff": 0.0662, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 17.9585, "num_token_query": 52.2009, "num_token_union": 73.4366, "num_word_context": 202.1622, "num_word_doc": 49.7716, "num_word_query": 39.7975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1757.6244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.3923, "queue_k_norm": 1.4541, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2009, "sent_len_1": 66.7179, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.72, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.7188, "doc_norm": 1.4534, "encoder_q-embeddings": 1038.8435, "encoder_q-layer.0": 676.8956, "encoder_q-layer.1": 740.1647, "encoder_q-layer.10": 1407.3152, "encoder_q-layer.11": 2751.6487, "encoder_q-layer.2": 860.5892, "encoder_q-layer.3": 887.9553, "encoder_q-layer.4": 962.5106, "encoder_q-layer.5": 994.0905, "encoder_q-layer.6": 1092.3853, "encoder_q-layer.7": 1132.5515, "encoder_q-layer.8": 1283.5861, "encoder_q-layer.9": 1161.934, "epoch": 0.77, "inbatch_neg_score": 0.2798, "inbatch_pos_score": 1.0117, "learning_rate": 1.1944444444444446e-05, "loss": 2.7188, "norm_diff": 0.0525, "norm_loss": 0.0, "num_token_doc": 66.9931, "num_token_overlap": 18.066, "num_token_query": 52.2845, "num_token_union": 73.571, "num_word_context": 202.5218, "num_word_doc": 50.034, "num_word_query": 39.8494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1881.5171, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2791, "query_norm": 1.4008, "queue_k_norm": 1.4534, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2845, "sent_len_1": 66.9931, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4837, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.7229, "doc_norm": 1.4492, "encoder_q-embeddings": 1987.7588, "encoder_q-layer.0": 1267.7843, "encoder_q-layer.1": 1337.3345, "encoder_q-layer.10": 2602.8093, "encoder_q-layer.11": 5837.064, "encoder_q-layer.2": 1529.4794, "encoder_q-layer.3": 1581.156, "encoder_q-layer.4": 1753.2208, "encoder_q-layer.5": 1806.6669, "encoder_q-layer.6": 2133.4722, "encoder_q-layer.7": 2333.8684, "encoder_q-layer.8": 2816.3267, "encoder_q-layer.9": 2466.9102, "epoch": 0.77, "inbatch_neg_score": 0.2846, "inbatch_pos_score": 0.9976, "learning_rate": 1.188888888888889e-05, "loss": 2.7229, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.9691, "num_token_overlap": 18.0403, "num_token_query": 52.2985, "num_token_union": 73.6116, "num_word_context": 202.6666, "num_word_doc": 49.978, "num_word_query": 39.8745, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3865.7858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2842, "query_norm": 1.4092, "queue_k_norm": 1.4547, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2985, "sent_len_1": 66.9691, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3675, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 62.9883, "active_queue_size": 16384.0, "cl_loss": 2.7318, "doc_norm": 1.4626, "encoder_q-embeddings": 5548.5664, "encoder_q-layer.0": 3732.2517, "encoder_q-layer.1": 4152.0718, "encoder_q-layer.10": 2745.9695, "encoder_q-layer.11": 5316.9126, "encoder_q-layer.2": 4652.1714, "encoder_q-layer.3": 5008.7095, "encoder_q-layer.4": 6411.959, "encoder_q-layer.5": 6808.6919, "encoder_q-layer.6": 6431.2593, "encoder_q-layer.7": 6152.5991, "encoder_q-layer.8": 4507.4653, "encoder_q-layer.9": 2567.302, "epoch": 0.77, "inbatch_neg_score": 0.2798, "inbatch_pos_score": 1.0439, "learning_rate": 1.1833333333333334e-05, "loss": 2.7318, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.6344, "num_token_overlap": 17.9609, "num_token_query": 52.2303, "num_token_union": 73.3976, "num_word_context": 202.2248, "num_word_doc": 49.6971, "num_word_query": 39.8411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7581.1634, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2805, "query_norm": 1.4098, "queue_k_norm": 1.4516, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2303, "sent_len_1": 66.6344, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6813, "stdk": 0.0492, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 62.8906, "active_queue_size": 16384.0, "cl_loss": 2.7242, "doc_norm": 1.4575, "encoder_q-embeddings": 2385.3896, "encoder_q-layer.0": 1492.6001, "encoder_q-layer.1": 1659.4469, "encoder_q-layer.10": 2444.4106, "encoder_q-layer.11": 5381.4673, "encoder_q-layer.2": 1849.1619, "encoder_q-layer.3": 1947.7704, "encoder_q-layer.4": 2036.9912, "encoder_q-layer.5": 2052.7642, "encoder_q-layer.6": 2393.0354, "encoder_q-layer.7": 2619.9932, "encoder_q-layer.8": 2893.0903, "encoder_q-layer.9": 2499.3374, "epoch": 0.77, "inbatch_neg_score": 0.279, "inbatch_pos_score": 1.0293, "learning_rate": 1.1777777777777778e-05, "loss": 2.7242, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.7664, "num_token_overlap": 17.9941, "num_token_query": 52.3385, "num_token_union": 73.5651, "num_word_context": 202.4763, "num_word_doc": 49.8414, "num_word_query": 39.9325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3966.0923, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2795, "query_norm": 1.4081, "queue_k_norm": 1.4538, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3385, "sent_len_1": 66.7664, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9825, "stdk": 0.049, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6985, "doc_norm": 1.4541, "encoder_q-embeddings": 1489.3284, "encoder_q-layer.0": 1002.8985, "encoder_q-layer.1": 1176.6271, "encoder_q-layer.10": 1369.7535, "encoder_q-layer.11": 2798.1748, "encoder_q-layer.2": 1417.9092, "encoder_q-layer.3": 1471.7159, "encoder_q-layer.4": 1664.8005, "encoder_q-layer.5": 1623.8219, "encoder_q-layer.6": 1693.7871, "encoder_q-layer.7": 1559.5256, "encoder_q-layer.8": 1550.3787, "encoder_q-layer.9": 1183.0664, "epoch": 0.77, "inbatch_neg_score": 0.2803, "inbatch_pos_score": 1.0156, "learning_rate": 1.1722222222222224e-05, "loss": 2.6985, "norm_diff": 0.0592, "norm_loss": 0.0, "num_token_doc": 66.7436, "num_token_overlap": 18.0431, "num_token_query": 52.2278, "num_token_union": 73.4407, "num_word_context": 202.303, "num_word_doc": 49.7683, "num_word_query": 39.8048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2334.471, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.3949, "queue_k_norm": 1.4543, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2278, "sent_len_1": 66.7436, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.875, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.713, "doc_norm": 1.462, "encoder_q-embeddings": 892.9482, "encoder_q-layer.0": 572.4625, "encoder_q-layer.1": 630.0477, "encoder_q-layer.10": 1305.3578, "encoder_q-layer.11": 2801.5237, "encoder_q-layer.2": 719.009, "encoder_q-layer.3": 736.6684, "encoder_q-layer.4": 804.4115, "encoder_q-layer.5": 873.9665, "encoder_q-layer.6": 977.2306, "encoder_q-layer.7": 1133.0527, "encoder_q-layer.8": 1312.4637, "encoder_q-layer.9": 1198.0869, "epoch": 0.77, "inbatch_neg_score": 0.2822, "inbatch_pos_score": 1.0342, "learning_rate": 1.1666666666666668e-05, "loss": 2.713, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.8458, "num_token_overlap": 18.0602, "num_token_query": 52.2934, "num_token_union": 73.4662, "num_word_context": 202.4237, "num_word_doc": 49.8344, "num_word_query": 39.8458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1816.8228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.4105, "queue_k_norm": 1.4541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2934, "sent_len_1": 66.8458, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1325, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.7191, "doc_norm": 1.4563, "encoder_q-embeddings": 1166.7263, "encoder_q-layer.0": 812.3295, "encoder_q-layer.1": 883.0737, "encoder_q-layer.10": 1360.1794, "encoder_q-layer.11": 3006.4534, "encoder_q-layer.2": 979.8336, "encoder_q-layer.3": 988.57, "encoder_q-layer.4": 1068.0265, "encoder_q-layer.5": 1078.0739, "encoder_q-layer.6": 1096.3538, "encoder_q-layer.7": 1135.5225, "encoder_q-layer.8": 1329.6931, "encoder_q-layer.9": 1265.4999, "epoch": 0.77, "inbatch_neg_score": 0.2836, "inbatch_pos_score": 0.9941, "learning_rate": 1.1611111111111112e-05, "loss": 2.7191, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 66.7229, "num_token_overlap": 17.9933, "num_token_query": 52.3576, "num_token_union": 73.4598, "num_word_context": 202.1196, "num_word_doc": 49.7876, "num_word_query": 39.9259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2041.2326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2815, "query_norm": 1.4009, "queue_k_norm": 1.4515, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3576, "sent_len_1": 66.7229, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.2825, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6999, "doc_norm": 1.4485, "encoder_q-embeddings": 3295.8647, "encoder_q-layer.0": 3736.1826, "encoder_q-layer.1": 4596.1953, "encoder_q-layer.10": 1344.0834, "encoder_q-layer.11": 2838.8955, "encoder_q-layer.2": 6553.8071, "encoder_q-layer.3": 4553.2783, "encoder_q-layer.4": 2473.4128, "encoder_q-layer.5": 1390.4434, "encoder_q-layer.6": 1346.0957, "encoder_q-layer.7": 1279.5961, "encoder_q-layer.8": 1389.3728, "encoder_q-layer.9": 1198.4246, "epoch": 0.77, "inbatch_neg_score": 0.2774, "inbatch_pos_score": 0.9858, "learning_rate": 1.1555555555555556e-05, "loss": 2.6999, "norm_diff": 0.0652, "norm_loss": 0.0, "num_token_doc": 66.823, "num_token_overlap": 17.9826, "num_token_query": 52.2287, "num_token_union": 73.4897, "num_word_context": 202.2973, "num_word_doc": 49.8561, "num_word_query": 39.7967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5262.743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2764, "query_norm": 1.3834, "queue_k_norm": 1.4533, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2287, "sent_len_1": 66.823, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1262, "stdk": 0.0486, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.7136, "doc_norm": 1.4577, "encoder_q-embeddings": 912.7161, "encoder_q-layer.0": 583.5549, "encoder_q-layer.1": 630.1874, "encoder_q-layer.10": 1316.2373, "encoder_q-layer.11": 2749.7402, "encoder_q-layer.2": 701.278, "encoder_q-layer.3": 707.9981, "encoder_q-layer.4": 740.2378, "encoder_q-layer.5": 764.3464, "encoder_q-layer.6": 939.5284, "encoder_q-layer.7": 1131.4343, "encoder_q-layer.8": 1284.8005, "encoder_q-layer.9": 1148.5825, "epoch": 0.77, "inbatch_neg_score": 0.2845, "inbatch_pos_score": 1.0342, "learning_rate": 1.1500000000000002e-05, "loss": 2.7136, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.8411, "num_token_overlap": 18.0597, "num_token_query": 52.3133, "num_token_union": 73.497, "num_word_context": 202.1529, "num_word_doc": 49.8634, "num_word_query": 39.8894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1791.4594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2834, "query_norm": 1.4048, "queue_k_norm": 1.4557, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3133, "sent_len_1": 66.8411, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.3738, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.73, "doc_norm": 1.4549, "encoder_q-embeddings": 1067.4028, "encoder_q-layer.0": 726.6381, "encoder_q-layer.1": 788.2988, "encoder_q-layer.10": 1430.0007, "encoder_q-layer.11": 2839.3762, "encoder_q-layer.2": 907.522, "encoder_q-layer.3": 934.7095, "encoder_q-layer.4": 1024.2064, "encoder_q-layer.5": 1005.3827, "encoder_q-layer.6": 990.8967, "encoder_q-layer.7": 1116.8652, "encoder_q-layer.8": 1281.9453, "encoder_q-layer.9": 1182.6553, "epoch": 0.78, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 0.9976, "learning_rate": 1.1444444444444446e-05, "loss": 2.73, "norm_diff": 0.0667, "norm_loss": 0.0, "num_token_doc": 66.6939, "num_token_overlap": 17.9584, "num_token_query": 52.1026, "num_token_union": 73.3488, "num_word_context": 202.0677, "num_word_doc": 49.7809, "num_word_query": 39.7244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1935.0183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.3882, "queue_k_norm": 1.4534, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1026, "sent_len_1": 66.6939, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5375, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7143, "doc_norm": 1.4494, "encoder_q-embeddings": 1174.66, "encoder_q-layer.0": 755.9072, "encoder_q-layer.1": 833.7535, "encoder_q-layer.10": 1247.2174, "encoder_q-layer.11": 2877.6028, "encoder_q-layer.2": 969.0623, "encoder_q-layer.3": 999.4234, "encoder_q-layer.4": 1017.2628, "encoder_q-layer.5": 1084.5638, "encoder_q-layer.6": 1183.1953, "encoder_q-layer.7": 1231.4884, "encoder_q-layer.8": 1470.8622, "encoder_q-layer.9": 1225.3481, "epoch": 0.78, "inbatch_neg_score": 0.2802, "inbatch_pos_score": 1.001, "learning_rate": 1.138888888888889e-05, "loss": 2.7143, "norm_diff": 0.0419, "norm_loss": 0.0, "num_token_doc": 66.8024, "num_token_overlap": 17.9939, "num_token_query": 52.2228, "num_token_union": 73.4102, "num_word_context": 202.4955, "num_word_doc": 49.826, "num_word_query": 39.7896, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2006.6833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2803, "query_norm": 1.4075, "queue_k_norm": 1.4533, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2228, "sent_len_1": 66.8024, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6912, "stdk": 0.0486, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7129, "doc_norm": 1.4496, "encoder_q-embeddings": 3286.1008, "encoder_q-layer.0": 2358.9351, "encoder_q-layer.1": 2517.3022, "encoder_q-layer.10": 1304.8123, "encoder_q-layer.11": 2813.7849, "encoder_q-layer.2": 2741.3845, "encoder_q-layer.3": 2518.1443, "encoder_q-layer.4": 2352.1672, "encoder_q-layer.5": 2056.3132, "encoder_q-layer.6": 2274.2825, "encoder_q-layer.7": 2235.979, "encoder_q-layer.8": 2431.4004, "encoder_q-layer.9": 1659.652, "epoch": 0.78, "inbatch_neg_score": 0.2813, "inbatch_pos_score": 1.0088, "learning_rate": 1.1333333333333334e-05, "loss": 2.7129, "norm_diff": 0.046, "norm_loss": 0.0, "num_token_doc": 66.7959, "num_token_overlap": 18.009, "num_token_query": 52.1696, "num_token_union": 73.4269, "num_word_context": 201.9302, "num_word_doc": 49.8094, "num_word_query": 39.7706, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3692.3139, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4036, "queue_k_norm": 1.4562, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1696, "sent_len_1": 66.7959, "sent_len_max_0": 127.995, "sent_len_max_1": 189.6087, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7138, "doc_norm": 1.4578, "encoder_q-embeddings": 920.2148, "encoder_q-layer.0": 604.3748, "encoder_q-layer.1": 644.5286, "encoder_q-layer.10": 1275.7531, "encoder_q-layer.11": 2786.8584, "encoder_q-layer.2": 739.1763, "encoder_q-layer.3": 752.1577, "encoder_q-layer.4": 770.2506, "encoder_q-layer.5": 766.6106, "encoder_q-layer.6": 907.9159, "encoder_q-layer.7": 1023.5198, "encoder_q-layer.8": 1220.4512, "encoder_q-layer.9": 1164.1641, "epoch": 0.78, "inbatch_neg_score": 0.2832, "inbatch_pos_score": 1.0156, "learning_rate": 1.127777777777778e-05, "loss": 2.7138, "norm_diff": 0.0493, "norm_loss": 0.0, "num_token_doc": 66.8184, "num_token_overlap": 17.9731, "num_token_query": 52.0823, "num_token_union": 73.4013, "num_word_context": 202.3666, "num_word_doc": 49.7996, "num_word_query": 39.6701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1758.4558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.282, "query_norm": 1.4086, "queue_k_norm": 1.4542, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0823, "sent_len_1": 66.8184, "sent_len_max_0": 127.995, "sent_len_max_1": 192.4725, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6997, "doc_norm": 1.454, "encoder_q-embeddings": 1017.7548, "encoder_q-layer.0": 646.4902, "encoder_q-layer.1": 741.839, "encoder_q-layer.10": 1474.8937, "encoder_q-layer.11": 2792.1597, "encoder_q-layer.2": 834.4485, "encoder_q-layer.3": 890.5054, "encoder_q-layer.4": 890.9164, "encoder_q-layer.5": 931.3468, "encoder_q-layer.6": 1046.0215, "encoder_q-layer.7": 1163.2501, "encoder_q-layer.8": 1331.0911, "encoder_q-layer.9": 1251.4614, "epoch": 0.78, "inbatch_neg_score": 0.2791, "inbatch_pos_score": 1.0176, "learning_rate": 1.1222222222222224e-05, "loss": 2.6997, "norm_diff": 0.0507, "norm_loss": 0.0, "num_token_doc": 66.8854, "num_token_overlap": 18.0002, "num_token_query": 52.2016, "num_token_union": 73.4945, "num_word_context": 202.4709, "num_word_doc": 49.912, "num_word_query": 39.7819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1922.1675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.4033, "queue_k_norm": 1.4546, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2016, "sent_len_1": 66.8854, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5513, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7142, "doc_norm": 1.4506, "encoder_q-embeddings": 1059.163, "encoder_q-layer.0": 672.1663, "encoder_q-layer.1": 739.6802, "encoder_q-layer.10": 1349.8662, "encoder_q-layer.11": 3004.8906, "encoder_q-layer.2": 871.0031, "encoder_q-layer.3": 882.044, "encoder_q-layer.4": 978.7052, "encoder_q-layer.5": 977.2263, "encoder_q-layer.6": 1050.9087, "encoder_q-layer.7": 1108.7915, "encoder_q-layer.8": 1384.5764, "encoder_q-layer.9": 1240.5455, "epoch": 0.78, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 1.0156, "learning_rate": 1.1166666666666668e-05, "loss": 2.7142, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.706, "num_token_overlap": 17.9826, "num_token_query": 52.2894, "num_token_union": 73.4448, "num_word_context": 202.3347, "num_word_doc": 49.7895, "num_word_query": 39.8789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1972.5137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.4052, "queue_k_norm": 1.4559, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2894, "sent_len_1": 66.706, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3288, "stdk": 0.0487, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.7011, "doc_norm": 1.4563, "encoder_q-embeddings": 1101.9784, "encoder_q-layer.0": 742.6258, "encoder_q-layer.1": 832.1734, "encoder_q-layer.10": 1229.4828, "encoder_q-layer.11": 2647.2566, "encoder_q-layer.2": 914.1186, "encoder_q-layer.3": 925.5294, "encoder_q-layer.4": 914.2247, "encoder_q-layer.5": 885.1161, "encoder_q-layer.6": 987.452, "encoder_q-layer.7": 1146.603, "encoder_q-layer.8": 1312.6987, "encoder_q-layer.9": 1102.3916, "epoch": 0.78, "inbatch_neg_score": 0.2828, "inbatch_pos_score": 1.0186, "learning_rate": 1.1111111111111112e-05, "loss": 2.7011, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.5955, "num_token_overlap": 18.05, "num_token_query": 52.3683, "num_token_union": 73.3855, "num_word_context": 201.7155, "num_word_doc": 49.715, "num_word_query": 39.9345, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1874.4354, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2825, "query_norm": 1.3936, "queue_k_norm": 1.4558, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3683, "sent_len_1": 66.5955, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.1337, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 28.2176, "dev_samples_per_second": 2.268, "dev_steps_per_second": 0.035, "epoch": 0.78, "step": 80000, "test_accuracy": 93.75, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3571547567844391, "test_doc_norm": 1.4239628314971924, "test_inbatch_neg_score": 0.642953634262085, "test_inbatch_pos_score": 1.5806140899658203, "test_loss": 0.3571547567844391, "test_loss_align": 0.988666296005249, "test_loss_unif": 3.8291423320770264, "test_loss_unif_q@queue": 3.829141855239868, "test_norm_diff": 0.022874552756547928, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2785772383213043, "test_query_norm": 1.4466485977172852, "test_queue_k_norm": 1.4556074142456055, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042435623705387115, "test_stdq": 0.04254382103681564, "test_stdqueue_k": 0.048982929438352585, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.2176, "dev_samples_per_second": 2.268, "dev_steps_per_second": 0.035, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.39847, "eval_beir-arguana_recall@10": 0.66785, "eval_beir-arguana_recall@100": 0.93883, "eval_beir-arguana_recall@20": 0.78521, "eval_beir-avg_ndcg@10": 0.37488774999999996, "eval_beir-avg_recall@10": 0.4486479166666667, "eval_beir-avg_recall@100": 0.630854, "eval_beir-avg_recall@20": 0.5074159166666667, "eval_beir-cqadupstack_ndcg@10": 0.2651974999999999, "eval_beir-cqadupstack_recall@10": 0.35937916666666664, "eval_beir-cqadupstack_recall@100": 0.5892299999999999, "eval_beir-cqadupstack_recall@20": 0.4255791666666667, "eval_beir-fiqa_ndcg@10": 0.25467, "eval_beir-fiqa_recall@10": 0.31975, "eval_beir-fiqa_recall@100": 0.57755, "eval_beir-fiqa_recall@20": 0.39283, "eval_beir-nfcorpus_ndcg@10": 0.2929, "eval_beir-nfcorpus_recall@10": 0.1475, "eval_beir-nfcorpus_recall@100": 0.26887, "eval_beir-nfcorpus_recall@20": 0.17433, "eval_beir-nq_ndcg@10": 0.27468, "eval_beir-nq_recall@10": 0.45314, "eval_beir-nq_recall@100": 0.79138, "eval_beir-nq_recall@20": 0.56397, "eval_beir-quora_ndcg@10": 0.78518, "eval_beir-quora_recall@10": 0.88921, "eval_beir-quora_recall@100": 0.9774, "eval_beir-quora_recall@20": 0.93163, "eval_beir-scidocs_ndcg@10": 0.15242, "eval_beir-scidocs_recall@10": 0.15923, "eval_beir-scidocs_recall@100": 0.3685, "eval_beir-scidocs_recall@20": 0.21387, "eval_beir-scifact_ndcg@10": 0.61804, "eval_beir-scifact_recall@10": 0.7815, "eval_beir-scifact_recall@100": 0.92156, "eval_beir-scifact_recall@20": 0.83578, "eval_beir-trec-covid_ndcg@10": 0.54065, "eval_beir-trec-covid_recall@10": 0.576, "eval_beir-trec-covid_recall@100": 0.43, "eval_beir-trec-covid_recall@20": 0.552, "eval_beir-webis-touche2020_ndcg@10": 0.16667, "eval_beir-webis-touche2020_recall@10": 0.13292, "eval_beir-webis-touche2020_recall@100": 0.44522, "eval_beir-webis-touche2020_recall@20": 0.19896, "eval_senteval-avg_sts": 0.7509850905121829, "eval_senteval-sickr_spearman": 0.7224356046062468, "eval_senteval-stsb_spearman": 0.7795345764181191, "step": 80000, "test_accuracy": 93.75, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3571547567844391, "test_doc_norm": 1.4239628314971924, "test_inbatch_neg_score": 0.642953634262085, "test_inbatch_pos_score": 1.5806140899658203, "test_loss": 0.3571547567844391, "test_loss_align": 0.988666296005249, "test_loss_unif": 3.8291423320770264, "test_loss_unif_q@queue": 3.829141855239868, "test_norm_diff": 0.022874552756547928, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2785772383213043, "test_query_norm": 1.4466485977172852, "test_queue_k_norm": 1.4556074142456055, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042435623705387115, "test_stdq": 0.04254382103681564, "test_stdqueue_k": 0.048982929438352585, "test_stdqueue_q": 0.0 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 2.7399, "doc_norm": 1.4522, "encoder_q-embeddings": 999.3373, "encoder_q-layer.0": 639.1943, "encoder_q-layer.1": 693.4212, "encoder_q-layer.10": 1388.1932, "encoder_q-layer.11": 2915.1042, "encoder_q-layer.2": 800.1569, "encoder_q-layer.3": 849.6198, "encoder_q-layer.4": 886.7648, "encoder_q-layer.5": 904.1697, "encoder_q-layer.6": 966.1161, "encoder_q-layer.7": 1082.4556, "encoder_q-layer.8": 1318.2639, "encoder_q-layer.9": 1209.6105, "epoch": 0.78, "inbatch_neg_score": 0.282, "inbatch_pos_score": 0.9722, "learning_rate": 1.1055555555555556e-05, "loss": 2.7399, "norm_diff": 0.0679, "norm_loss": 0.0, "num_token_doc": 66.8441, "num_token_overlap": 17.9553, "num_token_query": 52.1574, "num_token_union": 73.4746, "num_word_context": 202.2125, "num_word_doc": 49.8832, "num_word_query": 39.7759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1889.109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.3843, "queue_k_norm": 1.4538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1574, "sent_len_1": 66.8441, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9925, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 62.9883, "active_queue_size": 16384.0, "cl_loss": 2.7057, "doc_norm": 1.454, "encoder_q-embeddings": 1039.2836, "encoder_q-layer.0": 659.4468, "encoder_q-layer.1": 714.2915, "encoder_q-layer.10": 1293.0128, "encoder_q-layer.11": 2764.2227, "encoder_q-layer.2": 814.3267, "encoder_q-layer.3": 875.0195, "encoder_q-layer.4": 970.0192, "encoder_q-layer.5": 933.5492, "encoder_q-layer.6": 1003.766, "encoder_q-layer.7": 1152.2075, "encoder_q-layer.8": 1357.8125, "encoder_q-layer.9": 1210.4907, "epoch": 0.78, "inbatch_neg_score": 0.2799, "inbatch_pos_score": 1.0371, "learning_rate": 1.1000000000000001e-05, "loss": 2.7057, "norm_diff": 0.0483, "norm_loss": 0.0, "num_token_doc": 66.9236, "num_token_overlap": 18.0027, "num_token_query": 52.41, "num_token_union": 73.6284, "num_word_context": 202.4241, "num_word_doc": 49.895, "num_word_query": 39.914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1873.4396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.4057, "queue_k_norm": 1.4562, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.41, "sent_len_1": 66.9236, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.81, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.7218, "doc_norm": 1.4468, "encoder_q-embeddings": 1168.8336, "encoder_q-layer.0": 712.5834, "encoder_q-layer.1": 780.6096, "encoder_q-layer.10": 1314.0648, "encoder_q-layer.11": 2795.6147, "encoder_q-layer.2": 917.6903, "encoder_q-layer.3": 983.8267, "encoder_q-layer.4": 1076.2345, "encoder_q-layer.5": 1092.2166, "encoder_q-layer.6": 1234.7374, "encoder_q-layer.7": 1387.9136, "encoder_q-layer.8": 1481.1167, "encoder_q-layer.9": 1214.5287, "epoch": 0.78, "inbatch_neg_score": 0.2795, "inbatch_pos_score": 1.0029, "learning_rate": 1.0944444444444445e-05, "loss": 2.7218, "norm_diff": 0.0498, "norm_loss": 0.0, "num_token_doc": 66.7717, "num_token_overlap": 17.9615, "num_token_query": 52.2607, "num_token_union": 73.5246, "num_word_context": 202.8098, "num_word_doc": 49.8456, "num_word_query": 39.8362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2023.6692, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.397, "queue_k_norm": 1.4547, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2607, "sent_len_1": 66.7717, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2463, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.7075, "doc_norm": 1.4518, "encoder_q-embeddings": 3915.8027, "encoder_q-layer.0": 2850.0342, "encoder_q-layer.1": 3066.0701, "encoder_q-layer.10": 1346.0967, "encoder_q-layer.11": 2884.6653, "encoder_q-layer.2": 3598.5928, "encoder_q-layer.3": 3438.0037, "encoder_q-layer.4": 3019.4219, "encoder_q-layer.5": 2833.627, "encoder_q-layer.6": 2654.96, "encoder_q-layer.7": 2322.6006, "encoder_q-layer.8": 1933.6086, "encoder_q-layer.9": 1267.0594, "epoch": 0.78, "inbatch_neg_score": 0.2742, "inbatch_pos_score": 0.9961, "learning_rate": 1.088888888888889e-05, "loss": 2.7075, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.7627, "num_token_overlap": 18.0281, "num_token_query": 52.3773, "num_token_union": 73.5483, "num_word_context": 202.008, "num_word_doc": 49.7942, "num_word_query": 39.9214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4475.8329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2754, "query_norm": 1.3894, "queue_k_norm": 1.4559, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3773, "sent_len_1": 66.7627, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.87, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.73, "doc_norm": 1.4504, "encoder_q-embeddings": 2111.3528, "encoder_q-layer.0": 1418.5643, "encoder_q-layer.1": 1626.2047, "encoder_q-layer.10": 1409.0096, "encoder_q-layer.11": 2819.7461, "encoder_q-layer.2": 1914.1942, "encoder_q-layer.3": 1971.0437, "encoder_q-layer.4": 2296.6187, "encoder_q-layer.5": 2454.0359, "encoder_q-layer.6": 2626.5662, "encoder_q-layer.7": 1639.3475, "encoder_q-layer.8": 1363.7585, "encoder_q-layer.9": 1217.5336, "epoch": 0.79, "inbatch_neg_score": 0.2762, "inbatch_pos_score": 0.9922, "learning_rate": 1.0833333333333334e-05, "loss": 2.73, "norm_diff": 0.0663, "norm_loss": 0.0, "num_token_doc": 66.6047, "num_token_overlap": 18.0128, "num_token_query": 52.1688, "num_token_union": 73.289, "num_word_context": 202.1692, "num_word_doc": 49.7044, "num_word_query": 39.7601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2947.2716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2773, "query_norm": 1.3841, "queue_k_norm": 1.4531, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1688, "sent_len_1": 66.6047, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1587, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.7195, "doc_norm": 1.454, "encoder_q-embeddings": 1218.3177, "encoder_q-layer.0": 766.8915, "encoder_q-layer.1": 834.4187, "encoder_q-layer.10": 1248.7557, "encoder_q-layer.11": 2695.6921, "encoder_q-layer.2": 917.5738, "encoder_q-layer.3": 929.1729, "encoder_q-layer.4": 954.5579, "encoder_q-layer.5": 923.0493, "encoder_q-layer.6": 1013.8842, "encoder_q-layer.7": 1091.2244, "encoder_q-layer.8": 1331.3689, "encoder_q-layer.9": 1159.1654, "epoch": 0.79, "inbatch_neg_score": 0.2775, "inbatch_pos_score": 1.0049, "learning_rate": 1.0777777777777778e-05, "loss": 2.7195, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.5696, "num_token_overlap": 17.9271, "num_token_query": 52.184, "num_token_union": 73.4277, "num_word_context": 202.6508, "num_word_doc": 49.6951, "num_word_query": 39.7954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1901.9926, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2773, "query_norm": 1.3938, "queue_k_norm": 1.4541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.184, "sent_len_1": 66.5696, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.2663, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7205, "doc_norm": 1.4566, "encoder_q-embeddings": 1594.4899, "encoder_q-layer.0": 1010.6606, "encoder_q-layer.1": 1155.3673, "encoder_q-layer.10": 1417.4203, "encoder_q-layer.11": 3010.4458, "encoder_q-layer.2": 1375.3634, "encoder_q-layer.3": 1529.8032, "encoder_q-layer.4": 1643.1855, "encoder_q-layer.5": 1395.7816, "encoder_q-layer.6": 1582.1239, "encoder_q-layer.7": 1737.2394, "encoder_q-layer.8": 1729.5872, "encoder_q-layer.9": 1316.9163, "epoch": 0.79, "inbatch_neg_score": 0.2776, "inbatch_pos_score": 1.0088, "learning_rate": 1.0722222222222222e-05, "loss": 2.7205, "norm_diff": 0.0533, "norm_loss": 0.0, "num_token_doc": 66.8503, "num_token_overlap": 17.987, "num_token_query": 52.3467, "num_token_union": 73.57, "num_word_context": 202.712, "num_word_doc": 49.8833, "num_word_query": 39.8945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2440.4726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.4033, "queue_k_norm": 1.4545, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3467, "sent_len_1": 66.8503, "sent_len_max_0": 128.0, "sent_len_max_1": 187.2713, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6953, "doc_norm": 1.4505, "encoder_q-embeddings": 912.0484, "encoder_q-layer.0": 588.484, "encoder_q-layer.1": 645.444, "encoder_q-layer.10": 1492.178, "encoder_q-layer.11": 2912.8689, "encoder_q-layer.2": 731.7536, "encoder_q-layer.3": 771.8331, "encoder_q-layer.4": 852.5414, "encoder_q-layer.5": 935.2951, "encoder_q-layer.6": 1019.0132, "encoder_q-layer.7": 1186.6686, "encoder_q-layer.8": 1367.4735, "encoder_q-layer.9": 1237.7325, "epoch": 0.79, "inbatch_neg_score": 0.276, "inbatch_pos_score": 1.0127, "learning_rate": 1.0666666666666667e-05, "loss": 2.6953, "norm_diff": 0.0677, "norm_loss": 0.0, "num_token_doc": 66.8873, "num_token_overlap": 18.0279, "num_token_query": 52.2816, "num_token_union": 73.5452, "num_word_context": 202.8256, "num_word_doc": 49.8817, "num_word_query": 39.8668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1863.0695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2747, "query_norm": 1.3828, "queue_k_norm": 1.4553, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2816, "sent_len_1": 66.8873, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.6712, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7149, "doc_norm": 1.4521, "encoder_q-embeddings": 2567.4053, "encoder_q-layer.0": 1595.7876, "encoder_q-layer.1": 1814.6545, "encoder_q-layer.10": 2641.9583, "encoder_q-layer.11": 5571.4854, "encoder_q-layer.2": 2105.9807, "encoder_q-layer.3": 2169.1421, "encoder_q-layer.4": 2303.9041, "encoder_q-layer.5": 2333.0957, "encoder_q-layer.6": 2529.1978, "encoder_q-layer.7": 2515.123, "encoder_q-layer.8": 2862.813, "encoder_q-layer.9": 2612.0, "epoch": 0.79, "inbatch_neg_score": 0.2774, "inbatch_pos_score": 1.001, "learning_rate": 1.0611111111111111e-05, "loss": 2.7149, "norm_diff": 0.0617, "norm_loss": 0.0, "num_token_doc": 66.748, "num_token_overlap": 18.0015, "num_token_query": 52.2221, "num_token_union": 73.4656, "num_word_context": 202.5296, "num_word_doc": 49.79, "num_word_query": 39.8466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4123.0266, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2769, "query_norm": 1.3905, "queue_k_norm": 1.4544, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2221, "sent_len_1": 66.748, "sent_len_max_0": 127.9725, "sent_len_max_1": 188.5225, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.7132, "doc_norm": 1.4516, "encoder_q-embeddings": 2441.4329, "encoder_q-layer.0": 1601.0237, "encoder_q-layer.1": 1836.9283, "encoder_q-layer.10": 2774.1865, "encoder_q-layer.11": 5619.5693, "encoder_q-layer.2": 2188.1738, "encoder_q-layer.3": 2286.2087, "encoder_q-layer.4": 2427.437, "encoder_q-layer.5": 2732.408, "encoder_q-layer.6": 2793.8149, "encoder_q-layer.7": 2524.4875, "encoder_q-layer.8": 2730.3875, "encoder_q-layer.9": 2414.4988, "epoch": 0.79, "inbatch_neg_score": 0.2734, "inbatch_pos_score": 0.9814, "learning_rate": 1.0555555555555555e-05, "loss": 2.7132, "norm_diff": 0.051, "norm_loss": 0.0, "num_token_doc": 66.8014, "num_token_overlap": 18.0483, "num_token_query": 52.392, "num_token_union": 73.511, "num_word_context": 202.2171, "num_word_doc": 49.8281, "num_word_query": 39.9212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4138.806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.4006, "queue_k_norm": 1.4541, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.392, "sent_len_1": 66.8014, "sent_len_max_0": 127.995, "sent_len_max_1": 191.5325, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7142, "doc_norm": 1.4494, "encoder_q-embeddings": 2094.5969, "encoder_q-layer.0": 1440.1635, "encoder_q-layer.1": 1621.92, "encoder_q-layer.10": 2729.27, "encoder_q-layer.11": 5647.0103, "encoder_q-layer.2": 1925.3279, "encoder_q-layer.3": 1948.0088, "encoder_q-layer.4": 2149.6279, "encoder_q-layer.5": 2139.0886, "encoder_q-layer.6": 2392.9111, "encoder_q-layer.7": 2463.9326, "encoder_q-layer.8": 2644.0427, "encoder_q-layer.9": 2411.6597, "epoch": 0.79, "inbatch_neg_score": 0.2786, "inbatch_pos_score": 0.9971, "learning_rate": 1.05e-05, "loss": 2.7142, "norm_diff": 0.0765, "norm_loss": 0.0, "num_token_doc": 66.8874, "num_token_overlap": 17.9889, "num_token_query": 52.2043, "num_token_union": 73.502, "num_word_context": 202.2518, "num_word_doc": 49.8345, "num_word_query": 39.7932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3915.6327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2776, "query_norm": 1.3729, "queue_k_norm": 1.4544, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2043, "sent_len_1": 66.8874, "sent_len_max_0": 128.0, "sent_len_max_1": 193.3862, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6947, "doc_norm": 1.4521, "encoder_q-embeddings": 4440.1553, "encoder_q-layer.0": 3050.116, "encoder_q-layer.1": 3588.04, "encoder_q-layer.10": 2890.1421, "encoder_q-layer.11": 5599.3906, "encoder_q-layer.2": 4650.0762, "encoder_q-layer.3": 4903.4639, "encoder_q-layer.4": 4885.2739, "encoder_q-layer.5": 5508.7451, "encoder_q-layer.6": 5148.2983, "encoder_q-layer.7": 3709.3472, "encoder_q-layer.8": 3766.2371, "encoder_q-layer.9": 2746.4192, "epoch": 0.79, "inbatch_neg_score": 0.2753, "inbatch_pos_score": 1.0059, "learning_rate": 1.0444444444444445e-05, "loss": 2.6947, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.7256, "num_token_overlap": 18.1271, "num_token_query": 52.3869, "num_token_union": 73.4212, "num_word_context": 202.6403, "num_word_doc": 49.8064, "num_word_query": 39.9402, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6350.3814, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2751, "query_norm": 1.391, "queue_k_norm": 1.4543, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3869, "sent_len_1": 66.7256, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4187, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.7298, "doc_norm": 1.4471, "encoder_q-embeddings": 2336.145, "encoder_q-layer.0": 1447.4282, "encoder_q-layer.1": 1620.2329, "encoder_q-layer.10": 2946.2798, "encoder_q-layer.11": 5511.1099, "encoder_q-layer.2": 1917.4601, "encoder_q-layer.3": 1972.9066, "encoder_q-layer.4": 1943.3308, "encoder_q-layer.5": 1948.9536, "encoder_q-layer.6": 2128.3547, "encoder_q-layer.7": 2548.9099, "encoder_q-layer.8": 2692.3577, "encoder_q-layer.9": 2464.4675, "epoch": 0.79, "inbatch_neg_score": 0.2773, "inbatch_pos_score": 1.0156, "learning_rate": 1.038888888888889e-05, "loss": 2.7298, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.6201, "num_token_overlap": 17.9363, "num_token_query": 51.9956, "num_token_union": 73.3242, "num_word_context": 202.0435, "num_word_doc": 49.6999, "num_word_query": 39.6546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3945.1104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2783, "query_norm": 1.4138, "queue_k_norm": 1.4541, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 51.9956, "sent_len_1": 66.6201, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7937, "stdk": 0.0486, "stdq": 0.047, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.7213, "doc_norm": 1.4563, "encoder_q-embeddings": 2284.0234, "encoder_q-layer.0": 1488.4209, "encoder_q-layer.1": 1612.4551, "encoder_q-layer.10": 2376.9202, "encoder_q-layer.11": 5259.1504, "encoder_q-layer.2": 1886.1847, "encoder_q-layer.3": 2078.4419, "encoder_q-layer.4": 2093.1091, "encoder_q-layer.5": 2113.4709, "encoder_q-layer.6": 2352.3201, "encoder_q-layer.7": 2378.4861, "encoder_q-layer.8": 2718.3083, "encoder_q-layer.9": 2298.3152, "epoch": 0.79, "inbatch_neg_score": 0.2773, "inbatch_pos_score": 1.0215, "learning_rate": 1.0333333333333333e-05, "loss": 2.7213, "norm_diff": 0.0564, "norm_loss": 0.0, "num_token_doc": 66.9112, "num_token_overlap": 18.0053, "num_token_query": 52.2461, "num_token_union": 73.5449, "num_word_context": 202.5066, "num_word_doc": 49.9, "num_word_query": 39.8383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3853.7933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2778, "query_norm": 1.3999, "queue_k_norm": 1.4547, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2461, "sent_len_1": 66.9112, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.225, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 63.0859, "active_queue_size": 16384.0, "cl_loss": 2.7098, "doc_norm": 1.4535, "encoder_q-embeddings": 2185.3462, "encoder_q-layer.0": 1445.9396, "encoder_q-layer.1": 1649.6621, "encoder_q-layer.10": 2338.5115, "encoder_q-layer.11": 5198.0273, "encoder_q-layer.2": 1912.9703, "encoder_q-layer.3": 1989.8507, "encoder_q-layer.4": 2135.9463, "encoder_q-layer.5": 2188.7263, "encoder_q-layer.6": 2336.3857, "encoder_q-layer.7": 2259.7573, "encoder_q-layer.8": 2494.7463, "encoder_q-layer.9": 2178.9661, "epoch": 0.8, "inbatch_neg_score": 0.2819, "inbatch_pos_score": 1.0273, "learning_rate": 1.0277777777777777e-05, "loss": 2.7098, "norm_diff": 0.0509, "norm_loss": 0.0, "num_token_doc": 66.8572, "num_token_overlap": 18.0473, "num_token_query": 52.3056, "num_token_union": 73.5285, "num_word_context": 202.4598, "num_word_doc": 49.8508, "num_word_query": 39.8792, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3740.7123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.281, "query_norm": 1.4025, "queue_k_norm": 1.4525, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3056, "sent_len_1": 66.8572, "sent_len_max_0": 127.9788, "sent_len_max_1": 191.135, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.7222, "doc_norm": 1.4566, "encoder_q-embeddings": 2437.2458, "encoder_q-layer.0": 1576.6901, "encoder_q-layer.1": 1856.7465, "encoder_q-layer.10": 2480.4751, "encoder_q-layer.11": 5432.8604, "encoder_q-layer.2": 2172.6721, "encoder_q-layer.3": 2288.9238, "encoder_q-layer.4": 2425.0513, "encoder_q-layer.5": 2427.0784, "encoder_q-layer.6": 2581.2268, "encoder_q-layer.7": 2647.6289, "encoder_q-layer.8": 2708.1052, "encoder_q-layer.9": 2343.406, "epoch": 0.8, "inbatch_neg_score": 0.2865, "inbatch_pos_score": 1.0049, "learning_rate": 1.0222222222222223e-05, "loss": 2.7222, "norm_diff": 0.0633, "norm_loss": 0.0, "num_token_doc": 66.7654, "num_token_overlap": 17.9978, "num_token_query": 52.2073, "num_token_union": 73.4674, "num_word_context": 202.843, "num_word_doc": 49.8244, "num_word_query": 39.8139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4090.7829, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2854, "query_norm": 1.3933, "queue_k_norm": 1.4523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2073, "sent_len_1": 66.7654, "sent_len_max_0": 127.9925, "sent_len_max_1": 187.9563, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.6845, "doc_norm": 1.4597, "encoder_q-embeddings": 2062.6863, "encoder_q-layer.0": 1235.5773, "encoder_q-layer.1": 1353.5276, "encoder_q-layer.10": 2635.6074, "encoder_q-layer.11": 5618.7451, "encoder_q-layer.2": 1512.4769, "encoder_q-layer.3": 1573.4629, "encoder_q-layer.4": 1651.6832, "encoder_q-layer.5": 1739.0131, "encoder_q-layer.6": 1955.8729, "encoder_q-layer.7": 2363.5581, "encoder_q-layer.8": 2873.145, "encoder_q-layer.9": 2532.8315, "epoch": 0.8, "inbatch_neg_score": 0.2854, "inbatch_pos_score": 1.0381, "learning_rate": 1.0166666666666667e-05, "loss": 2.6845, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 67.1853, "num_token_overlap": 18.1473, "num_token_query": 52.2861, "num_token_union": 73.6073, "num_word_context": 202.6067, "num_word_doc": 50.1379, "num_word_query": 39.8675, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3827.061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.4051, "queue_k_norm": 1.4547, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2861, "sent_len_1": 67.1853, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8525, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6848, "doc_norm": 1.4618, "encoder_q-embeddings": 7423.5259, "encoder_q-layer.0": 5066.999, "encoder_q-layer.1": 6306.606, "encoder_q-layer.10": 2555.1279, "encoder_q-layer.11": 5536.2002, "encoder_q-layer.2": 7353.6118, "encoder_q-layer.3": 7966.4214, "encoder_q-layer.4": 9181.3584, "encoder_q-layer.5": 10428.5898, "encoder_q-layer.6": 9978.7666, "encoder_q-layer.7": 7750.5938, "encoder_q-layer.8": 6385.9702, "encoder_q-layer.9": 3222.0339, "epoch": 0.8, "inbatch_neg_score": 0.2812, "inbatch_pos_score": 1.0371, "learning_rate": 1.0111111111111111e-05, "loss": 2.6848, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.8377, "num_token_overlap": 18.0984, "num_token_query": 52.4296, "num_token_union": 73.533, "num_word_context": 202.1901, "num_word_doc": 49.8765, "num_word_query": 39.9608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10875.8434, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.282, "query_norm": 1.4107, "queue_k_norm": 1.4548, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4296, "sent_len_1": 66.8377, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.3988, "stdk": 0.0492, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.7192, "doc_norm": 1.4525, "encoder_q-embeddings": 2420.9519, "encoder_q-layer.0": 1638.2734, "encoder_q-layer.1": 1782.2532, "encoder_q-layer.10": 2467.4126, "encoder_q-layer.11": 5332.2148, "encoder_q-layer.2": 1988.783, "encoder_q-layer.3": 2002.2594, "encoder_q-layer.4": 2072.0916, "encoder_q-layer.5": 2113.4949, "encoder_q-layer.6": 2170.1587, "encoder_q-layer.7": 2412.1223, "encoder_q-layer.8": 2714.3176, "encoder_q-layer.9": 2428.042, "epoch": 0.8, "inbatch_neg_score": 0.2853, "inbatch_pos_score": 1.0225, "learning_rate": 1.0055555555555555e-05, "loss": 2.7192, "norm_diff": 0.0388, "norm_loss": 0.0, "num_token_doc": 66.7728, "num_token_overlap": 18.0127, "num_token_query": 52.3823, "num_token_union": 73.5275, "num_word_context": 202.4552, "num_word_doc": 49.8056, "num_word_query": 39.9229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3878.2584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.4137, "queue_k_norm": 1.4535, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3823, "sent_len_1": 66.7728, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.7688, "stdk": 0.0488, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6936, "doc_norm": 1.4541, "encoder_q-embeddings": 2116.4602, "encoder_q-layer.0": 1379.1414, "encoder_q-layer.1": 1483.8243, "encoder_q-layer.10": 2491.906, "encoder_q-layer.11": 5534.9336, "encoder_q-layer.2": 1737.8943, "encoder_q-layer.3": 1753.9808, "encoder_q-layer.4": 1922.7347, "encoder_q-layer.5": 1931.3251, "encoder_q-layer.6": 2085.4958, "encoder_q-layer.7": 2310.6543, "encoder_q-layer.8": 2683.2314, "encoder_q-layer.9": 2408.6292, "epoch": 0.8, "inbatch_neg_score": 0.2885, "inbatch_pos_score": 1.0361, "learning_rate": 1e-05, "loss": 2.6936, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.5994, "num_token_overlap": 17.9919, "num_token_query": 52.2808, "num_token_union": 73.3678, "num_word_context": 202.1193, "num_word_doc": 49.6847, "num_word_query": 39.8492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3796.4753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2883, "query_norm": 1.4024, "queue_k_norm": 1.4528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2808, "sent_len_1": 66.5994, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.31, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7237, "doc_norm": 1.4578, "encoder_q-embeddings": 1903.4081, "encoder_q-layer.0": 1282.0164, "encoder_q-layer.1": 1407.1473, "encoder_q-layer.10": 2834.0903, "encoder_q-layer.11": 6037.7251, "encoder_q-layer.2": 1657.8306, "encoder_q-layer.3": 1676.9928, "encoder_q-layer.4": 1778.8251, "encoder_q-layer.5": 1849.4801, "encoder_q-layer.6": 2054.4028, "encoder_q-layer.7": 2196.6162, "encoder_q-layer.8": 2586.7524, "encoder_q-layer.9": 2458.6562, "epoch": 0.8, "inbatch_neg_score": 0.2877, "inbatch_pos_score": 1.0098, "learning_rate": 9.944444444444445e-06, "loss": 2.7237, "norm_diff": 0.0579, "norm_loss": 0.0, "num_token_doc": 66.6722, "num_token_overlap": 17.949, "num_token_query": 52.2345, "num_token_union": 73.4812, "num_word_context": 202.3716, "num_word_doc": 49.7464, "num_word_query": 39.8158, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3868.5233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.3999, "queue_k_norm": 1.4536, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2345, "sent_len_1": 66.6722, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0762, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.6981, "doc_norm": 1.4534, "encoder_q-embeddings": 3262.3477, "encoder_q-layer.0": 2251.0273, "encoder_q-layer.1": 2753.3767, "encoder_q-layer.10": 2749.6272, "encoder_q-layer.11": 5709.7456, "encoder_q-layer.2": 3148.0833, "encoder_q-layer.3": 2840.6348, "encoder_q-layer.4": 2854.2725, "encoder_q-layer.5": 2763.7092, "encoder_q-layer.6": 2787.3889, "encoder_q-layer.7": 2737.0151, "encoder_q-layer.8": 2923.4487, "encoder_q-layer.9": 2517.0986, "epoch": 0.8, "inbatch_neg_score": 0.2869, "inbatch_pos_score": 1.001, "learning_rate": 9.888888888888889e-06, "loss": 2.6981, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.8208, "num_token_overlap": 17.9938, "num_token_query": 52.2977, "num_token_union": 73.5263, "num_word_context": 202.4871, "num_word_doc": 49.8785, "num_word_query": 39.8627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4707.4303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.4025, "queue_k_norm": 1.4551, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2977, "sent_len_1": 66.8208, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9462, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.7107, "doc_norm": 1.4533, "encoder_q-embeddings": 2899.459, "encoder_q-layer.0": 2032.4674, "encoder_q-layer.1": 2408.4656, "encoder_q-layer.10": 2692.3425, "encoder_q-layer.11": 5616.8711, "encoder_q-layer.2": 2432.9807, "encoder_q-layer.3": 2133.0674, "encoder_q-layer.4": 2079.5371, "encoder_q-layer.5": 2095.5923, "encoder_q-layer.6": 2221.6184, "encoder_q-layer.7": 2526.8447, "encoder_q-layer.8": 2936.814, "encoder_q-layer.9": 2510.3323, "epoch": 0.8, "inbatch_neg_score": 0.2894, "inbatch_pos_score": 1.0195, "learning_rate": 9.833333333333333e-06, "loss": 2.7107, "norm_diff": 0.053, "norm_loss": 0.0, "num_token_doc": 66.8935, "num_token_overlap": 18.0655, "num_token_query": 52.4505, "num_token_union": 73.5729, "num_word_context": 202.5819, "num_word_doc": 49.9246, "num_word_query": 39.9888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4288.6059, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2893, "query_norm": 1.4003, "queue_k_norm": 1.454, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4505, "sent_len_1": 66.8935, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6438, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6911, "doc_norm": 1.4582, "encoder_q-embeddings": 2471.8264, "encoder_q-layer.0": 1650.4336, "encoder_q-layer.1": 1871.1418, "encoder_q-layer.10": 2402.9719, "encoder_q-layer.11": 5409.4902, "encoder_q-layer.2": 2175.7446, "encoder_q-layer.3": 2253.4131, "encoder_q-layer.4": 2405.0049, "encoder_q-layer.5": 2414.5266, "encoder_q-layer.6": 2560.9927, "encoder_q-layer.7": 2520.1655, "encoder_q-layer.8": 2684.9905, "encoder_q-layer.9": 2316.918, "epoch": 0.8, "inbatch_neg_score": 0.2819, "inbatch_pos_score": 1.0176, "learning_rate": 9.777777777777779e-06, "loss": 2.6911, "norm_diff": 0.0697, "norm_loss": 0.0, "num_token_doc": 67.0115, "num_token_overlap": 18.0905, "num_token_query": 52.304, "num_token_union": 73.5735, "num_word_context": 202.268, "num_word_doc": 50.0042, "num_word_query": 39.8636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4075.1103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.3885, "queue_k_norm": 1.4567, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.304, "sent_len_1": 67.0115, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0188, "stdk": 0.049, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6867, "doc_norm": 1.4569, "encoder_q-embeddings": 2950.2344, "encoder_q-layer.0": 2060.9888, "encoder_q-layer.1": 2230.3706, "encoder_q-layer.10": 2636.6929, "encoder_q-layer.11": 5602.7129, "encoder_q-layer.2": 2637.6812, "encoder_q-layer.3": 2805.98, "encoder_q-layer.4": 2927.5547, "encoder_q-layer.5": 2727.561, "encoder_q-layer.6": 2717.1958, "encoder_q-layer.7": 2761.3984, "encoder_q-layer.8": 2852.7778, "encoder_q-layer.9": 2431.3604, "epoch": 0.81, "inbatch_neg_score": 0.2879, "inbatch_pos_score": 1.0195, "learning_rate": 9.722222222222223e-06, "loss": 2.6867, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.8337, "num_token_overlap": 18.0712, "num_token_query": 52.141, "num_token_union": 73.3732, "num_word_context": 202.0005, "num_word_doc": 49.8886, "num_word_query": 39.7528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4519.3717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3963, "queue_k_norm": 1.4554, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.141, "sent_len_1": 66.8337, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2413, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 2.7008, "doc_norm": 1.4565, "encoder_q-embeddings": 1928.9358, "encoder_q-layer.0": 1213.9725, "encoder_q-layer.1": 1292.0046, "encoder_q-layer.10": 2521.6633, "encoder_q-layer.11": 5668.6709, "encoder_q-layer.2": 1437.4918, "encoder_q-layer.3": 1546.2333, "encoder_q-layer.4": 1626.7266, "encoder_q-layer.5": 1713.7932, "encoder_q-layer.6": 1920.9869, "encoder_q-layer.7": 2115.4619, "encoder_q-layer.8": 2623.8936, "encoder_q-layer.9": 2451.5071, "epoch": 0.81, "inbatch_neg_score": 0.2872, "inbatch_pos_score": 0.9956, "learning_rate": 9.666666666666667e-06, "loss": 2.7008, "norm_diff": 0.0618, "norm_loss": 0.0, "num_token_doc": 66.8662, "num_token_overlap": 18.0442, "num_token_query": 52.3089, "num_token_union": 73.518, "num_word_context": 202.5706, "num_word_doc": 49.9112, "num_word_query": 39.8572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3717.0173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.3946, "queue_k_norm": 1.4555, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3089, "sent_len_1": 66.8662, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.4425, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6975, "doc_norm": 1.4525, "encoder_q-embeddings": 2156.0232, "encoder_q-layer.0": 1415.8265, "encoder_q-layer.1": 1550.4529, "encoder_q-layer.10": 2718.353, "encoder_q-layer.11": 5950.498, "encoder_q-layer.2": 1785.0964, "encoder_q-layer.3": 1876.6814, "encoder_q-layer.4": 1966.8931, "encoder_q-layer.5": 1957.7742, "encoder_q-layer.6": 2083.5769, "encoder_q-layer.7": 2267.9202, "encoder_q-layer.8": 2626.0498, "encoder_q-layer.9": 2536.7444, "epoch": 0.81, "inbatch_neg_score": 0.2864, "inbatch_pos_score": 0.9932, "learning_rate": 9.61111111111111e-06, "loss": 2.6975, "norm_diff": 0.0495, "norm_loss": 0.0, "num_token_doc": 66.6956, "num_token_overlap": 17.9891, "num_token_query": 52.1434, "num_token_union": 73.3786, "num_word_context": 202.157, "num_word_doc": 49.7834, "num_word_query": 39.7729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3950.6765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.4029, "queue_k_norm": 1.4562, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1434, "sent_len_1": 66.6956, "sent_len_max_0": 128.0, "sent_len_max_1": 188.88, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 2.698, "doc_norm": 1.4548, "encoder_q-embeddings": 2058.3989, "encoder_q-layer.0": 1253.9298, "encoder_q-layer.1": 1371.4218, "encoder_q-layer.10": 2430.0864, "encoder_q-layer.11": 5797.6772, "encoder_q-layer.2": 1553.9117, "encoder_q-layer.3": 1629.1584, "encoder_q-layer.4": 1753.8828, "encoder_q-layer.5": 1765.5546, "encoder_q-layer.6": 2019.6046, "encoder_q-layer.7": 2183.2869, "encoder_q-layer.8": 2582.8416, "encoder_q-layer.9": 2364.3896, "epoch": 0.81, "inbatch_neg_score": 0.2871, "inbatch_pos_score": 1.0146, "learning_rate": 9.555555555555556e-06, "loss": 2.698, "norm_diff": 0.0648, "norm_loss": 0.0, "num_token_doc": 66.798, "num_token_overlap": 18.0123, "num_token_query": 52.2671, "num_token_union": 73.5036, "num_word_context": 202.2031, "num_word_doc": 49.8529, "num_word_query": 39.8342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3803.7013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2878, "query_norm": 1.39, "queue_k_norm": 1.456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2671, "sent_len_1": 66.798, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9938, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.7185, "doc_norm": 1.4562, "encoder_q-embeddings": 4604.2539, "encoder_q-layer.0": 2951.5427, "encoder_q-layer.1": 3326.6074, "encoder_q-layer.10": 5470.5327, "encoder_q-layer.11": 11495.7129, "encoder_q-layer.2": 3815.687, "encoder_q-layer.3": 4080.4653, "encoder_q-layer.4": 4356.5425, "encoder_q-layer.5": 4272.0864, "encoder_q-layer.6": 4509.6753, "encoder_q-layer.7": 4964.0527, "encoder_q-layer.8": 5625.6621, "encoder_q-layer.9": 4927.9683, "epoch": 0.81, "inbatch_neg_score": 0.2817, "inbatch_pos_score": 1.0146, "learning_rate": 9.5e-06, "loss": 2.7185, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.5832, "num_token_overlap": 17.9965, "num_token_query": 52.2507, "num_token_union": 73.3773, "num_word_context": 202.339, "num_word_doc": 49.6965, "num_word_query": 39.8442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8079.8901, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.283, "query_norm": 1.3978, "queue_k_norm": 1.4556, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2507, "sent_len_1": 66.5832, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3487, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.6923, "doc_norm": 1.4591, "encoder_q-embeddings": 3575.5234, "encoder_q-layer.0": 2360.489, "encoder_q-layer.1": 2562.3274, "encoder_q-layer.10": 5181.9287, "encoder_q-layer.11": 11140.7793, "encoder_q-layer.2": 2785.7178, "encoder_q-layer.3": 3005.3711, "encoder_q-layer.4": 3120.9863, "encoder_q-layer.5": 3195.5796, "encoder_q-layer.6": 3720.2375, "encoder_q-layer.7": 4183.6172, "encoder_q-layer.8": 5107.0801, "encoder_q-layer.9": 4844.0845, "epoch": 0.81, "inbatch_neg_score": 0.2834, "inbatch_pos_score": 1.0176, "learning_rate": 9.444444444444445e-06, "loss": 2.6923, "norm_diff": 0.0521, "norm_loss": 0.0, "num_token_doc": 66.8262, "num_token_overlap": 18.0219, "num_token_query": 52.2234, "num_token_union": 73.4411, "num_word_context": 202.4566, "num_word_doc": 49.9203, "num_word_query": 39.8167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7254.1988, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2827, "query_norm": 1.407, "queue_k_norm": 1.4556, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2234, "sent_len_1": 66.8262, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.0625, "stdk": 0.049, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7241, "doc_norm": 1.4529, "encoder_q-embeddings": 5520.0127, "encoder_q-layer.0": 3675.7227, "encoder_q-layer.1": 4055.8748, "encoder_q-layer.10": 5234.915, "encoder_q-layer.11": 11482.1855, "encoder_q-layer.2": 4931.1768, "encoder_q-layer.3": 5189.2061, "encoder_q-layer.4": 5096.4814, "encoder_q-layer.5": 5221.4595, "encoder_q-layer.6": 5720.0464, "encoder_q-layer.7": 5325.877, "encoder_q-layer.8": 5865.2983, "encoder_q-layer.9": 5006.2495, "epoch": 0.81, "inbatch_neg_score": 0.2846, "inbatch_pos_score": 1.0156, "learning_rate": 9.388888888888889e-06, "loss": 2.7241, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.5705, "num_token_overlap": 17.9271, "num_token_query": 52.1536, "num_token_union": 73.3671, "num_word_context": 202.0038, "num_word_doc": 49.6975, "num_word_query": 39.7942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8910.582, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2839, "query_norm": 1.3983, "queue_k_norm": 1.4564, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1536, "sent_len_1": 66.5705, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.2212, "stdk": 0.0487, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7077, "doc_norm": 1.4549, "encoder_q-embeddings": 3664.832, "encoder_q-layer.0": 2314.3225, "encoder_q-layer.1": 2502.1963, "encoder_q-layer.10": 5373.3569, "encoder_q-layer.11": 11183.8926, "encoder_q-layer.2": 2797.5461, "encoder_q-layer.3": 2946.5945, "encoder_q-layer.4": 3083.5322, "encoder_q-layer.5": 3129.2163, "encoder_q-layer.6": 3620.9294, "encoder_q-layer.7": 4009.6016, "encoder_q-layer.8": 5006.8262, "encoder_q-layer.9": 4888.5615, "epoch": 0.81, "inbatch_neg_score": 0.2886, "inbatch_pos_score": 1.0215, "learning_rate": 9.333333333333334e-06, "loss": 2.7077, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.731, "num_token_overlap": 17.9932, "num_token_query": 52.1762, "num_token_union": 73.3668, "num_word_context": 201.9737, "num_word_doc": 49.798, "num_word_query": 39.7884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7183.5064, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2886, "query_norm": 1.4066, "queue_k_norm": 1.4548, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1762, "sent_len_1": 66.731, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5563, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.6933, "doc_norm": 1.4541, "encoder_q-embeddings": 3088.1252, "encoder_q-layer.0": 2049.584, "encoder_q-layer.1": 2295.4578, "encoder_q-layer.10": 2633.7842, "encoder_q-layer.11": 5644.7705, "encoder_q-layer.2": 2667.1406, "encoder_q-layer.3": 3118.4163, "encoder_q-layer.4": 3575.2617, "encoder_q-layer.5": 3113.8601, "encoder_q-layer.6": 3283.4673, "encoder_q-layer.7": 3050.4561, "encoder_q-layer.8": 3090.0095, "encoder_q-layer.9": 2473.8083, "epoch": 0.81, "inbatch_neg_score": 0.2893, "inbatch_pos_score": 1.0117, "learning_rate": 9.277777777777778e-06, "loss": 2.6933, "norm_diff": 0.0566, "norm_loss": 0.0, "num_token_doc": 66.6578, "num_token_overlap": 17.9937, "num_token_query": 52.2734, "num_token_union": 73.4219, "num_word_context": 202.0135, "num_word_doc": 49.766, "num_word_query": 39.8521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4809.5739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2903, "query_norm": 1.3975, "queue_k_norm": 1.4555, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2734, "sent_len_1": 66.6578, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4475, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.6992, "doc_norm": 1.4632, "encoder_q-embeddings": 2772.4858, "encoder_q-layer.0": 1830.3346, "encoder_q-layer.1": 1962.2927, "encoder_q-layer.10": 2581.7629, "encoder_q-layer.11": 5783.2275, "encoder_q-layer.2": 2210.1035, "encoder_q-layer.3": 2253.5798, "encoder_q-layer.4": 2390.4446, "encoder_q-layer.5": 2461.6423, "encoder_q-layer.6": 2686.075, "encoder_q-layer.7": 2880.8198, "encoder_q-layer.8": 2888.8755, "encoder_q-layer.9": 2441.4473, "epoch": 0.81, "inbatch_neg_score": 0.2913, "inbatch_pos_score": 1.0195, "learning_rate": 9.222222222222222e-06, "loss": 2.6992, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 66.9227, "num_token_overlap": 18.0308, "num_token_query": 52.319, "num_token_union": 73.5984, "num_word_context": 202.4025, "num_word_doc": 49.9204, "num_word_query": 39.8751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4355.1713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.292, "query_norm": 1.4079, "queue_k_norm": 1.4551, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.319, "sent_len_1": 66.9227, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0613, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.7161, "doc_norm": 1.4536, "encoder_q-embeddings": 2029.6346, "encoder_q-layer.0": 1328.6716, "encoder_q-layer.1": 1491.2405, "encoder_q-layer.10": 2606.4133, "encoder_q-layer.11": 5466.8315, "encoder_q-layer.2": 1755.7433, "encoder_q-layer.3": 1852.1279, "encoder_q-layer.4": 1890.219, "encoder_q-layer.5": 1893.8693, "encoder_q-layer.6": 2058.2559, "encoder_q-layer.7": 2177.6604, "encoder_q-layer.8": 2533.0076, "encoder_q-layer.9": 2319.2183, "epoch": 0.82, "inbatch_neg_score": 0.2926, "inbatch_pos_score": 1.0205, "learning_rate": 9.166666666666666e-06, "loss": 2.7161, "norm_diff": 0.0433, "norm_loss": 0.0, "num_token_doc": 66.5928, "num_token_overlap": 17.9711, "num_token_query": 52.278, "num_token_union": 73.4407, "num_word_context": 201.8701, "num_word_doc": 49.6999, "num_word_query": 39.8411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3736.711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.4103, "queue_k_norm": 1.457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.278, "sent_len_1": 66.5928, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5325, "stdk": 0.0487, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.6994, "doc_norm": 1.4597, "encoder_q-embeddings": 1842.2928, "encoder_q-layer.0": 1181.4346, "encoder_q-layer.1": 1252.6202, "encoder_q-layer.10": 2451.7661, "encoder_q-layer.11": 5350.9346, "encoder_q-layer.2": 1390.1691, "encoder_q-layer.3": 1433.2728, "encoder_q-layer.4": 1543.4769, "encoder_q-layer.5": 1558.147, "encoder_q-layer.6": 1828.5977, "encoder_q-layer.7": 2164.1289, "encoder_q-layer.8": 2611.041, "encoder_q-layer.9": 2317.2922, "epoch": 0.82, "inbatch_neg_score": 0.2952, "inbatch_pos_score": 1.0244, "learning_rate": 9.111111111111112e-06, "loss": 2.6994, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.9056, "num_token_overlap": 17.9329, "num_token_query": 52.0157, "num_token_union": 73.4672, "num_word_context": 202.3553, "num_word_doc": 49.8929, "num_word_query": 39.6431, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3533.7606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.4113, "queue_k_norm": 1.4581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0157, "sent_len_1": 66.9056, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3462, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.7109, "doc_norm": 1.4566, "encoder_q-embeddings": 2433.8767, "encoder_q-layer.0": 1586.9347, "encoder_q-layer.1": 1715.1215, "encoder_q-layer.10": 2624.2729, "encoder_q-layer.11": 5600.6519, "encoder_q-layer.2": 1964.297, "encoder_q-layer.3": 2071.104, "encoder_q-layer.4": 2255.9719, "encoder_q-layer.5": 2372.1987, "encoder_q-layer.6": 2248.0425, "encoder_q-layer.7": 2251.1667, "encoder_q-layer.8": 2663.5952, "encoder_q-layer.9": 2394.1438, "epoch": 0.82, "inbatch_neg_score": 0.2978, "inbatch_pos_score": 1.0264, "learning_rate": 9.055555555555556e-06, "loss": 2.7109, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.6862, "num_token_overlap": 17.945, "num_token_query": 52.1943, "num_token_union": 73.4524, "num_word_context": 202.1336, "num_word_doc": 49.7653, "num_word_query": 39.8059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4018.7135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.4055, "queue_k_norm": 1.4571, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1943, "sent_len_1": 66.6862, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4725, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.6993, "doc_norm": 1.4557, "encoder_q-embeddings": 5428.6538, "encoder_q-layer.0": 4022.4543, "encoder_q-layer.1": 3860.5056, "encoder_q-layer.10": 2650.3613, "encoder_q-layer.11": 5673.4043, "encoder_q-layer.2": 4464.2627, "encoder_q-layer.3": 4577.7759, "encoder_q-layer.4": 4494.3105, "encoder_q-layer.5": 4529.8979, "encoder_q-layer.6": 4601.1431, "encoder_q-layer.7": 4486.332, "encoder_q-layer.8": 3789.2935, "encoder_q-layer.9": 2549.4443, "epoch": 0.82, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 1.0293, "learning_rate": 9e-06, "loss": 2.6993, "norm_diff": 0.0514, "norm_loss": 0.0, "num_token_doc": 66.9325, "num_token_overlap": 18.0459, "num_token_query": 52.2245, "num_token_union": 73.5317, "num_word_context": 202.6107, "num_word_doc": 49.9584, "num_word_query": 39.838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6632.5784, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2959, "query_norm": 1.4043, "queue_k_norm": 1.4591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2245, "sent_len_1": 66.9325, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.32, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.6753, "doc_norm": 1.4551, "encoder_q-embeddings": 2047.6517, "encoder_q-layer.0": 1346.6488, "encoder_q-layer.1": 1422.1295, "encoder_q-layer.10": 2402.311, "encoder_q-layer.11": 5245.6641, "encoder_q-layer.2": 1684.9427, "encoder_q-layer.3": 1760.6385, "encoder_q-layer.4": 1831.4486, "encoder_q-layer.5": 1812.9518, "encoder_q-layer.6": 1983.7699, "encoder_q-layer.7": 2127.9441, "encoder_q-layer.8": 2525.5, "encoder_q-layer.9": 2283.2974, "epoch": 0.82, "inbatch_neg_score": 0.2971, "inbatch_pos_score": 1.0488, "learning_rate": 8.944444444444444e-06, "loss": 2.6753, "norm_diff": 0.0486, "norm_loss": 0.0, "num_token_doc": 66.7703, "num_token_overlap": 18.0225, "num_token_query": 52.3569, "num_token_union": 73.4706, "num_word_context": 202.1168, "num_word_doc": 49.8071, "num_word_query": 39.9185, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3624.915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.4065, "queue_k_norm": 1.4586, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3569, "sent_len_1": 66.7703, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5037, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6956, "doc_norm": 1.4568, "encoder_q-embeddings": 1903.0443, "encoder_q-layer.0": 1229.6465, "encoder_q-layer.1": 1326.1758, "encoder_q-layer.10": 2701.1868, "encoder_q-layer.11": 5547.0273, "encoder_q-layer.2": 1558.4956, "encoder_q-layer.3": 1567.5863, "encoder_q-layer.4": 1670.4025, "encoder_q-layer.5": 1781.2732, "encoder_q-layer.6": 2077.4592, "encoder_q-layer.7": 2298.7668, "encoder_q-layer.8": 2599.4841, "encoder_q-layer.9": 2364.9395, "epoch": 0.82, "inbatch_neg_score": 0.2925, "inbatch_pos_score": 1.0459, "learning_rate": 8.88888888888889e-06, "loss": 2.6956, "norm_diff": 0.0569, "norm_loss": 0.0, "num_token_doc": 66.9923, "num_token_overlap": 18.0398, "num_token_query": 52.267, "num_token_union": 73.5544, "num_word_context": 202.8653, "num_word_doc": 50.038, "num_word_query": 39.8768, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3675.7212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.3999, "queue_k_norm": 1.4581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.267, "sent_len_1": 66.9923, "sent_len_max_0": 127.9813, "sent_len_max_1": 188.7388, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 62.9883, "active_queue_size": 16384.0, "cl_loss": 2.6951, "doc_norm": 1.4673, "encoder_q-embeddings": 2109.8257, "encoder_q-layer.0": 1446.239, "encoder_q-layer.1": 1551.7434, "encoder_q-layer.10": 2508.0491, "encoder_q-layer.11": 5178.189, "encoder_q-layer.2": 1725.2852, "encoder_q-layer.3": 1723.3622, "encoder_q-layer.4": 1833.5007, "encoder_q-layer.5": 1617.2539, "encoder_q-layer.6": 1862.0762, "encoder_q-layer.7": 2016.6346, "encoder_q-layer.8": 2369.0337, "encoder_q-layer.9": 2198.7339, "epoch": 0.82, "inbatch_neg_score": 0.294, "inbatch_pos_score": 1.0518, "learning_rate": 8.833333333333334e-06, "loss": 2.6951, "norm_diff": 0.0601, "norm_loss": 0.0, "num_token_doc": 67.0301, "num_token_overlap": 18.0462, "num_token_query": 52.3693, "num_token_union": 73.6668, "num_word_context": 202.6341, "num_word_doc": 49.9951, "num_word_query": 39.914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3571.0048, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.4071, "queue_k_norm": 1.4585, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3693, "sent_len_1": 67.0301, "sent_len_max_0": 128.0, "sent_len_max_1": 190.375, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6995, "doc_norm": 1.4611, "encoder_q-embeddings": 2407.5754, "encoder_q-layer.0": 1674.1761, "encoder_q-layer.1": 1793.7142, "encoder_q-layer.10": 2796.2817, "encoder_q-layer.11": 5457.0269, "encoder_q-layer.2": 2067.989, "encoder_q-layer.3": 1980.2113, "encoder_q-layer.4": 2222.5959, "encoder_q-layer.5": 2131.1462, "encoder_q-layer.6": 2263.2634, "encoder_q-layer.7": 2357.394, "encoder_q-layer.8": 2650.5571, "encoder_q-layer.9": 2371.6304, "epoch": 0.82, "inbatch_neg_score": 0.2989, "inbatch_pos_score": 1.0371, "learning_rate": 8.777777777777778e-06, "loss": 2.6995, "norm_diff": 0.0525, "norm_loss": 0.0, "num_token_doc": 66.8255, "num_token_overlap": 18.0661, "num_token_query": 52.2699, "num_token_union": 73.4746, "num_word_context": 202.328, "num_word_doc": 49.8384, "num_word_query": 39.8442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3961.007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.4086, "queue_k_norm": 1.4592, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2699, "sent_len_1": 66.8255, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8088, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7112, "doc_norm": 1.4618, "encoder_q-embeddings": 2025.1722, "encoder_q-layer.0": 1304.9553, "encoder_q-layer.1": 1361.6031, "encoder_q-layer.10": 2667.1143, "encoder_q-layer.11": 5855.8105, "encoder_q-layer.2": 1548.0171, "encoder_q-layer.3": 1630.542, "encoder_q-layer.4": 1693.0099, "encoder_q-layer.5": 1795.233, "encoder_q-layer.6": 1967.9114, "encoder_q-layer.7": 2215.0708, "encoder_q-layer.8": 2668.8923, "encoder_q-layer.9": 2409.7942, "epoch": 0.82, "inbatch_neg_score": 0.2975, "inbatch_pos_score": 1.0195, "learning_rate": 8.722222222222224e-06, "loss": 2.7112, "norm_diff": 0.0722, "norm_loss": 0.0, "num_token_doc": 66.8397, "num_token_overlap": 17.9663, "num_token_query": 52.1443, "num_token_union": 73.4385, "num_word_context": 202.349, "num_word_doc": 49.878, "num_word_query": 39.7602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3844.387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.3896, "queue_k_norm": 1.4593, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1443, "sent_len_1": 66.8397, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6188, "stdk": 0.049, "stdq": 0.0458, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6927, "doc_norm": 1.4571, "encoder_q-embeddings": 2213.0737, "encoder_q-layer.0": 1460.4551, "encoder_q-layer.1": 1676.2133, "encoder_q-layer.10": 2681.0891, "encoder_q-layer.11": 5802.1279, "encoder_q-layer.2": 1911.5529, "encoder_q-layer.3": 1962.9491, "encoder_q-layer.4": 2123.8894, "encoder_q-layer.5": 2130.9321, "encoder_q-layer.6": 2245.5801, "encoder_q-layer.7": 2340.7373, "encoder_q-layer.8": 2918.6174, "encoder_q-layer.9": 2511.3174, "epoch": 0.82, "inbatch_neg_score": 0.3021, "inbatch_pos_score": 1.041, "learning_rate": 8.666666666666668e-06, "loss": 2.6927, "norm_diff": 0.0437, "norm_loss": 0.0, "num_token_doc": 66.8008, "num_token_overlap": 17.9591, "num_token_query": 52.0966, "num_token_union": 73.5153, "num_word_context": 202.2139, "num_word_doc": 49.8439, "num_word_query": 39.6912, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3948.8366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4134, "queue_k_norm": 1.4589, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0966, "sent_len_1": 66.8008, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.74, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 62.0117, "active_queue_size": 16384.0, "cl_loss": 2.6996, "doc_norm": 1.4582, "encoder_q-embeddings": 2284.8721, "encoder_q-layer.0": 1520.8328, "encoder_q-layer.1": 1726.4438, "encoder_q-layer.10": 2540.4211, "encoder_q-layer.11": 5625.0391, "encoder_q-layer.2": 2120.2573, "encoder_q-layer.3": 2031.1749, "encoder_q-layer.4": 2145.7512, "encoder_q-layer.5": 2253.5276, "encoder_q-layer.6": 2381.1492, "encoder_q-layer.7": 2406.1897, "encoder_q-layer.8": 2536.0427, "encoder_q-layer.9": 2359.7761, "epoch": 0.82, "inbatch_neg_score": 0.3005, "inbatch_pos_score": 1.0459, "learning_rate": 8.611111111111112e-06, "loss": 2.6996, "norm_diff": 0.0448, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 18.0272, "num_token_query": 52.3847, "num_token_union": 73.5064, "num_word_context": 202.6478, "num_word_doc": 49.8068, "num_word_query": 39.9247, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3965.1355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.4135, "queue_k_norm": 1.4584, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3847, "sent_len_1": 66.7301, "sent_len_max_0": 128.0, "sent_len_max_1": 187.4812, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6832, "doc_norm": 1.466, "encoder_q-embeddings": 1964.2552, "encoder_q-layer.0": 1302.1794, "encoder_q-layer.1": 1532.8179, "encoder_q-layer.10": 2741.1782, "encoder_q-layer.11": 5996.6436, "encoder_q-layer.2": 1668.6362, "encoder_q-layer.3": 1657.0614, "encoder_q-layer.4": 1759.7677, "encoder_q-layer.5": 1792.3885, "encoder_q-layer.6": 2006.4166, "encoder_q-layer.7": 2509.2185, "encoder_q-layer.8": 2745.3171, "encoder_q-layer.9": 2388.2209, "epoch": 0.83, "inbatch_neg_score": 0.3001, "inbatch_pos_score": 1.0459, "learning_rate": 8.555555555555556e-06, "loss": 2.6832, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.612, "num_token_overlap": 18.0501, "num_token_query": 52.2781, "num_token_union": 73.3551, "num_word_context": 201.8884, "num_word_doc": 49.7229, "num_word_query": 39.8434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3935.6503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.4062, "queue_k_norm": 1.4611, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2781, "sent_len_1": 66.612, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5913, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.6999, "doc_norm": 1.4575, "encoder_q-embeddings": 2657.6531, "encoder_q-layer.0": 1702.3198, "encoder_q-layer.1": 2006.0757, "encoder_q-layer.10": 2578.5095, "encoder_q-layer.11": 5800.0132, "encoder_q-layer.2": 2382.7241, "encoder_q-layer.3": 2556.6636, "encoder_q-layer.4": 2518.0911, "encoder_q-layer.5": 2639.6624, "encoder_q-layer.6": 2802.0049, "encoder_q-layer.7": 2888.821, "encoder_q-layer.8": 2904.6538, "encoder_q-layer.9": 2535.2751, "epoch": 0.83, "inbatch_neg_score": 0.3026, "inbatch_pos_score": 1.0312, "learning_rate": 8.500000000000002e-06, "loss": 2.6999, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.752, "num_token_overlap": 17.988, "num_token_query": 52.165, "num_token_union": 73.382, "num_word_context": 202.1096, "num_word_doc": 49.8144, "num_word_query": 39.7514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4378.0582, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3013, "query_norm": 1.4069, "queue_k_norm": 1.4617, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.165, "sent_len_1": 66.752, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8187, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.7097, "doc_norm": 1.4655, "encoder_q-embeddings": 2127.4111, "encoder_q-layer.0": 1371.6559, "encoder_q-layer.1": 1520.4635, "encoder_q-layer.10": 2585.5168, "encoder_q-layer.11": 5681.9902, "encoder_q-layer.2": 1746.8564, "encoder_q-layer.3": 1880.2598, "encoder_q-layer.4": 2123.9944, "encoder_q-layer.5": 2191.6594, "encoder_q-layer.6": 2422.1909, "encoder_q-layer.7": 2584.2202, "encoder_q-layer.8": 3005.6711, "encoder_q-layer.9": 2530.4521, "epoch": 0.83, "inbatch_neg_score": 0.301, "inbatch_pos_score": 1.0557, "learning_rate": 8.444444444444446e-06, "loss": 2.7097, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.6907, "num_token_overlap": 17.9778, "num_token_query": 52.2058, "num_token_union": 73.4125, "num_word_context": 202.2052, "num_word_doc": 49.7939, "num_word_query": 39.8062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4005.245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3018, "query_norm": 1.4208, "queue_k_norm": 1.4597, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2058, "sent_len_1": 66.6907, "sent_len_max_0": 128.0, "sent_len_max_1": 189.045, "stdk": 0.0491, "stdq": 0.047, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 2.7099, "doc_norm": 1.4642, "encoder_q-embeddings": 2487.9226, "encoder_q-layer.0": 1690.8237, "encoder_q-layer.1": 1939.7123, "encoder_q-layer.10": 3141.1013, "encoder_q-layer.11": 5953.9058, "encoder_q-layer.2": 2264.5593, "encoder_q-layer.3": 2196.4895, "encoder_q-layer.4": 2221.0801, "encoder_q-layer.5": 2283.8188, "encoder_q-layer.6": 2423.0732, "encoder_q-layer.7": 2643.3457, "encoder_q-layer.8": 2910.2036, "encoder_q-layer.9": 2596.9585, "epoch": 0.83, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 1.0234, "learning_rate": 8.38888888888889e-06, "loss": 2.7099, "norm_diff": 0.0611, "norm_loss": 0.0, "num_token_doc": 66.9372, "num_token_overlap": 18.0618, "num_token_query": 52.4031, "num_token_union": 73.5782, "num_word_context": 202.5277, "num_word_doc": 49.9184, "num_word_query": 39.9679, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4281.475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3022, "query_norm": 1.403, "queue_k_norm": 1.4609, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.4031, "sent_len_1": 66.9372, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7862, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.7009, "doc_norm": 1.46, "encoder_q-embeddings": 2179.7109, "encoder_q-layer.0": 1368.297, "encoder_q-layer.1": 1438.4553, "encoder_q-layer.10": 2561.5234, "encoder_q-layer.11": 5662.6006, "encoder_q-layer.2": 1633.9025, "encoder_q-layer.3": 1688.457, "encoder_q-layer.4": 1731.6495, "encoder_q-layer.5": 1831.0239, "encoder_q-layer.6": 1962.3108, "encoder_q-layer.7": 2181.9023, "encoder_q-layer.8": 2488.7573, "encoder_q-layer.9": 2357.8921, "epoch": 0.83, "inbatch_neg_score": 0.2975, "inbatch_pos_score": 1.0273, "learning_rate": 8.333333333333334e-06, "loss": 2.7009, "norm_diff": 0.0698, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 18.0565, "num_token_query": 52.3281, "num_token_union": 73.5245, "num_word_context": 202.6516, "num_word_doc": 49.8726, "num_word_query": 39.8941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3763.1523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.3903, "queue_k_norm": 1.4575, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3281, "sent_len_1": 66.8562, "sent_len_max_0": 127.99, "sent_len_max_1": 189.69, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.7119, "doc_norm": 1.4617, "encoder_q-embeddings": 2035.6289, "encoder_q-layer.0": 1321.8407, "encoder_q-layer.1": 1446.9948, "encoder_q-layer.10": 2740.1313, "encoder_q-layer.11": 5707.0737, "encoder_q-layer.2": 1744.8257, "encoder_q-layer.3": 1816.6993, "encoder_q-layer.4": 1918.8577, "encoder_q-layer.5": 1823.1833, "encoder_q-layer.6": 2006.163, "encoder_q-layer.7": 2337.531, "encoder_q-layer.8": 2680.9509, "encoder_q-layer.9": 2371.7524, "epoch": 0.83, "inbatch_neg_score": 0.3009, "inbatch_pos_score": 1.0088, "learning_rate": 8.27777777777778e-06, "loss": 2.7119, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.5183, "num_token_overlap": 17.9546, "num_token_query": 52.1793, "num_token_union": 73.3265, "num_word_context": 201.9336, "num_word_doc": 49.6412, "num_word_query": 39.8225, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3885.2983, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.3958, "queue_k_norm": 1.4612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1793, "sent_len_1": 66.5183, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.04, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6916, "doc_norm": 1.4633, "encoder_q-embeddings": 2118.6685, "encoder_q-layer.0": 1301.5809, "encoder_q-layer.1": 1464.9387, "encoder_q-layer.10": 2657.9558, "encoder_q-layer.11": 5836.001, "encoder_q-layer.2": 1709.0803, "encoder_q-layer.3": 1769.6666, "encoder_q-layer.4": 1961.8165, "encoder_q-layer.5": 1981.5635, "encoder_q-layer.6": 2302.4551, "encoder_q-layer.7": 2545.4426, "encoder_q-layer.8": 2889.8174, "encoder_q-layer.9": 2488.1851, "epoch": 0.83, "inbatch_neg_score": 0.301, "inbatch_pos_score": 1.0518, "learning_rate": 8.222222222222223e-06, "loss": 2.6916, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.9596, "num_token_overlap": 18.0526, "num_token_query": 52.2424, "num_token_union": 73.4579, "num_word_context": 201.891, "num_word_doc": 49.9892, "num_word_query": 39.8368, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3992.7436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.4025, "queue_k_norm": 1.4619, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2424, "sent_len_1": 66.9596, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0037, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.6902, "doc_norm": 1.4555, "encoder_q-embeddings": 7917.9648, "encoder_q-layer.0": 6169.6484, "encoder_q-layer.1": 5532.1689, "encoder_q-layer.10": 5226.4331, "encoder_q-layer.11": 11538.1543, "encoder_q-layer.2": 4897.4199, "encoder_q-layer.3": 4773.2627, "encoder_q-layer.4": 4728.1943, "encoder_q-layer.5": 4254.2822, "encoder_q-layer.6": 5006.5166, "encoder_q-layer.7": 5215.2681, "encoder_q-layer.8": 5574.6626, "encoder_q-layer.9": 4906.1714, "epoch": 0.83, "inbatch_neg_score": 0.302, "inbatch_pos_score": 1.04, "learning_rate": 8.166666666666668e-06, "loss": 2.6902, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.9103, "num_token_overlap": 18.0483, "num_token_query": 52.3364, "num_token_union": 73.5672, "num_word_context": 202.4742, "num_word_doc": 49.9343, "num_word_query": 39.9105, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9501.5764, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3015, "query_norm": 1.418, "queue_k_norm": 1.4597, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3364, "sent_len_1": 66.9103, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4975, "stdk": 0.0487, "stdq": 0.0469, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 2.6903, "doc_norm": 1.4569, "encoder_q-embeddings": 3854.5647, "encoder_q-layer.0": 2489.2761, "encoder_q-layer.1": 2674.8667, "encoder_q-layer.10": 5268.9619, "encoder_q-layer.11": 11971.1211, "encoder_q-layer.2": 2972.9592, "encoder_q-layer.3": 3057.2861, "encoder_q-layer.4": 3184.4175, "encoder_q-layer.5": 3288.1333, "encoder_q-layer.6": 3764.0964, "encoder_q-layer.7": 4471.1172, "encoder_q-layer.8": 5194.8716, "encoder_q-layer.9": 4718.7676, "epoch": 0.83, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 0.9878, "learning_rate": 8.111111111111112e-06, "loss": 2.6903, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.7137, "num_token_overlap": 17.9696, "num_token_query": 52.2544, "num_token_union": 73.4514, "num_word_context": 202.0508, "num_word_doc": 49.7867, "num_word_query": 39.8632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7716.6431, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2988, "query_norm": 1.3935, "queue_k_norm": 1.4609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2544, "sent_len_1": 66.7137, "sent_len_max_0": 128.0, "sent_len_max_1": 188.83, "stdk": 0.0487, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.7114, "doc_norm": 1.4612, "encoder_q-embeddings": 5270.564, "encoder_q-layer.0": 3672.3254, "encoder_q-layer.1": 4207.4136, "encoder_q-layer.10": 5731.5708, "encoder_q-layer.11": 11409.3857, "encoder_q-layer.2": 4925.9375, "encoder_q-layer.3": 5155.9985, "encoder_q-layer.4": 5727.0259, "encoder_q-layer.5": 5624.6572, "encoder_q-layer.6": 6514.9321, "encoder_q-layer.7": 6129.3335, "encoder_q-layer.8": 6238.8091, "encoder_q-layer.9": 4885.2339, "epoch": 0.83, "inbatch_neg_score": 0.2942, "inbatch_pos_score": 1.0137, "learning_rate": 8.055555555555557e-06, "loss": 2.7114, "norm_diff": 0.0733, "norm_loss": 0.0, "num_token_doc": 66.7373, "num_token_overlap": 18.0033, "num_token_query": 52.2713, "num_token_union": 73.4883, "num_word_context": 202.3662, "num_word_doc": 49.7909, "num_word_query": 39.8294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9143.3833, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2944, "query_norm": 1.388, "queue_k_norm": 1.4615, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2713, "sent_len_1": 66.7373, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.6125, "stdk": 0.0489, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 61.3281, "active_queue_size": 16384.0, "cl_loss": 2.6973, "doc_norm": 1.4654, "encoder_q-embeddings": 4872.4795, "encoder_q-layer.0": 3310.3315, "encoder_q-layer.1": 3734.5728, "encoder_q-layer.10": 6117.208, "encoder_q-layer.11": 11190.46, "encoder_q-layer.2": 4226.8428, "encoder_q-layer.3": 4600.8032, "encoder_q-layer.4": 5233.3667, "encoder_q-layer.5": 4626.8091, "encoder_q-layer.6": 4798.2915, "encoder_q-layer.7": 5104.4219, "encoder_q-layer.8": 5662.998, "encoder_q-layer.9": 4909.396, "epoch": 0.84, "inbatch_neg_score": 0.2932, "inbatch_pos_score": 1.0391, "learning_rate": 8.000000000000001e-06, "loss": 2.6973, "norm_diff": 0.0631, "norm_loss": 0.0, "num_token_doc": 66.5016, "num_token_overlap": 17.9888, "num_token_query": 52.1841, "num_token_union": 73.3087, "num_word_context": 202.2949, "num_word_doc": 49.638, "num_word_query": 39.7941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8354.6978, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2939, "query_norm": 1.4023, "queue_k_norm": 1.4593, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1841, "sent_len_1": 66.5016, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2363, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.6887, "doc_norm": 1.4681, "encoder_q-embeddings": 2528.0369, "encoder_q-layer.0": 1633.6481, "encoder_q-layer.1": 1922.429, "encoder_q-layer.10": 2433.8955, "encoder_q-layer.11": 5507.1963, "encoder_q-layer.2": 2266.8872, "encoder_q-layer.3": 2438.7573, "encoder_q-layer.4": 2970.6213, "encoder_q-layer.5": 2520.5144, "encoder_q-layer.6": 2465.5173, "encoder_q-layer.7": 2725.8643, "encoder_q-layer.8": 2842.6035, "encoder_q-layer.9": 2507.5806, "epoch": 0.84, "inbatch_neg_score": 0.2917, "inbatch_pos_score": 1.04, "learning_rate": 7.944444444444445e-06, "loss": 2.6887, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.7146, "num_token_overlap": 17.9989, "num_token_query": 52.1883, "num_token_union": 73.3651, "num_word_context": 202.2438, "num_word_doc": 49.7622, "num_word_query": 39.7875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4220.7389, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2903, "query_norm": 1.4057, "queue_k_norm": 1.4612, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1883, "sent_len_1": 66.7146, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.0563, "stdk": 0.0491, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.6909, "doc_norm": 1.4593, "encoder_q-embeddings": 3895.6567, "encoder_q-layer.0": 2688.7983, "encoder_q-layer.1": 3202.7478, "encoder_q-layer.10": 3043.5723, "encoder_q-layer.11": 6210.5107, "encoder_q-layer.2": 3540.1399, "encoder_q-layer.3": 3628.5027, "encoder_q-layer.4": 3802.1023, "encoder_q-layer.5": 3445.2239, "encoder_q-layer.6": 3620.7593, "encoder_q-layer.7": 3508.9602, "encoder_q-layer.8": 3579.2104, "encoder_q-layer.9": 2716.5156, "epoch": 0.84, "inbatch_neg_score": 0.293, "inbatch_pos_score": 0.9976, "learning_rate": 7.88888888888889e-06, "loss": 2.6909, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 66.9155, "num_token_overlap": 18.0031, "num_token_query": 52.1532, "num_token_union": 73.5162, "num_word_context": 202.3284, "num_word_doc": 49.931, "num_word_query": 39.7295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5529.3135, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2927, "query_norm": 1.3829, "queue_k_norm": 1.4606, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1532, "sent_len_1": 66.9155, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6062, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.705, "doc_norm": 1.4587, "encoder_q-embeddings": 1913.5057, "encoder_q-layer.0": 1194.9502, "encoder_q-layer.1": 1327.0974, "encoder_q-layer.10": 2563.5383, "encoder_q-layer.11": 5672.729, "encoder_q-layer.2": 1452.5367, "encoder_q-layer.3": 1513.9186, "encoder_q-layer.4": 1593.4478, "encoder_q-layer.5": 1653.2159, "encoder_q-layer.6": 1929.5037, "encoder_q-layer.7": 2224.2583, "encoder_q-layer.8": 2676.2861, "encoder_q-layer.9": 2371.791, "epoch": 0.84, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0322, "learning_rate": 7.833333333333333e-06, "loss": 2.705, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.9568, "num_token_overlap": 17.9922, "num_token_query": 52.2838, "num_token_union": 73.5996, "num_word_context": 202.5967, "num_word_doc": 49.9605, "num_word_query": 39.8474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3702.3546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.407, "queue_k_norm": 1.4599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2838, "sent_len_1": 66.9568, "sent_len_max_0": 128.0, "sent_len_max_1": 190.54, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 2.679, "doc_norm": 1.4616, "encoder_q-embeddings": 3981.6384, "encoder_q-layer.0": 2571.6055, "encoder_q-layer.1": 3324.8384, "encoder_q-layer.10": 2505.7717, "encoder_q-layer.11": 5829.2754, "encoder_q-layer.2": 4341.7051, "encoder_q-layer.3": 4522.5034, "encoder_q-layer.4": 4487.8955, "encoder_q-layer.5": 4125.6484, "encoder_q-layer.6": 4232.9785, "encoder_q-layer.7": 3538.7869, "encoder_q-layer.8": 2973.0029, "encoder_q-layer.9": 2350.4446, "epoch": 0.84, "inbatch_neg_score": 0.2984, "inbatch_pos_score": 1.0127, "learning_rate": 7.777777777777777e-06, "loss": 2.679, "norm_diff": 0.0555, "norm_loss": 0.0, "num_token_doc": 66.9471, "num_token_overlap": 18.0668, "num_token_query": 52.283, "num_token_union": 73.5219, "num_word_context": 202.4864, "num_word_doc": 50.0016, "num_word_query": 39.8109, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5773.3269, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2966, "query_norm": 1.4061, "queue_k_norm": 1.4613, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.283, "sent_len_1": 66.9471, "sent_len_max_0": 128.0, "sent_len_max_1": 187.2937, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.7008, "doc_norm": 1.4555, "encoder_q-embeddings": 1961.2803, "encoder_q-layer.0": 1283.4581, "encoder_q-layer.1": 1433.2354, "encoder_q-layer.10": 2550.4766, "encoder_q-layer.11": 5709.063, "encoder_q-layer.2": 1682.1318, "encoder_q-layer.3": 1683.7805, "encoder_q-layer.4": 1841.0205, "encoder_q-layer.5": 1942.8301, "encoder_q-layer.6": 2116.6257, "encoder_q-layer.7": 2205.8916, "encoder_q-layer.8": 2669.6648, "encoder_q-layer.9": 2423.3979, "epoch": 0.84, "inbatch_neg_score": 0.2941, "inbatch_pos_score": 1.0146, "learning_rate": 7.722222222222223e-06, "loss": 2.7008, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.7332, "num_token_overlap": 18.0434, "num_token_query": 52.2854, "num_token_union": 73.4469, "num_word_context": 202.1222, "num_word_doc": 49.8033, "num_word_query": 39.8569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3852.9438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3957, "queue_k_norm": 1.4611, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2854, "sent_len_1": 66.7332, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1725, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7018, "doc_norm": 1.4612, "encoder_q-embeddings": 2437.6252, "encoder_q-layer.0": 1567.7052, "encoder_q-layer.1": 1872.6545, "encoder_q-layer.10": 2494.29, "encoder_q-layer.11": 5703.7886, "encoder_q-layer.2": 2193.7417, "encoder_q-layer.3": 2426.3518, "encoder_q-layer.4": 2448.6167, "encoder_q-layer.5": 2374.2244, "encoder_q-layer.6": 2438.6831, "encoder_q-layer.7": 2457.8965, "encoder_q-layer.8": 2785.905, "encoder_q-layer.9": 2400.4958, "epoch": 0.84, "inbatch_neg_score": 0.2934, "inbatch_pos_score": 1.0391, "learning_rate": 7.666666666666667e-06, "loss": 2.7018, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.9742, "num_token_overlap": 17.9397, "num_token_query": 51.9933, "num_token_union": 73.4944, "num_word_context": 202.265, "num_word_doc": 49.9528, "num_word_query": 39.6284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4122.2399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3952, "queue_k_norm": 1.4601, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 51.9933, "sent_len_1": 66.9742, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.7675, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.7066, "doc_norm": 1.4582, "encoder_q-embeddings": 2479.0671, "encoder_q-layer.0": 1581.0037, "encoder_q-layer.1": 1819.3938, "encoder_q-layer.10": 2676.137, "encoder_q-layer.11": 5543.0376, "encoder_q-layer.2": 2261.7517, "encoder_q-layer.3": 2347.2007, "encoder_q-layer.4": 2613.2673, "encoder_q-layer.5": 2610.053, "encoder_q-layer.6": 2849.6091, "encoder_q-layer.7": 2841.3738, "encoder_q-layer.8": 3016.3406, "encoder_q-layer.9": 2384.0173, "epoch": 0.84, "inbatch_neg_score": 0.2884, "inbatch_pos_score": 1.0186, "learning_rate": 7.611111111111112e-06, "loss": 2.7066, "norm_diff": 0.0564, "norm_loss": 0.0, "num_token_doc": 66.833, "num_token_overlap": 18.0376, "num_token_query": 52.2708, "num_token_union": 73.5161, "num_word_context": 202.3201, "num_word_doc": 49.8979, "num_word_query": 39.8688, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4274.5016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2891, "query_norm": 1.4018, "queue_k_norm": 1.4595, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2708, "sent_len_1": 66.833, "sent_len_max_0": 128.0, "sent_len_max_1": 187.3225, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7042, "doc_norm": 1.4595, "encoder_q-embeddings": 1887.3296, "encoder_q-layer.0": 1185.2134, "encoder_q-layer.1": 1305.0255, "encoder_q-layer.10": 2488.6968, "encoder_q-layer.11": 5512.459, "encoder_q-layer.2": 1494.1638, "encoder_q-layer.3": 1598.1084, "encoder_q-layer.4": 1675.3601, "encoder_q-layer.5": 1740.6519, "encoder_q-layer.6": 2009.5951, "encoder_q-layer.7": 2153.8293, "encoder_q-layer.8": 2593.7302, "encoder_q-layer.9": 2330.8997, "epoch": 0.84, "inbatch_neg_score": 0.2889, "inbatch_pos_score": 1.0283, "learning_rate": 7.555555555555556e-06, "loss": 2.7042, "norm_diff": 0.0608, "norm_loss": 0.0, "num_token_doc": 66.5689, "num_token_overlap": 17.9857, "num_token_query": 52.2141, "num_token_union": 73.347, "num_word_context": 201.9112, "num_word_doc": 49.6378, "num_word_query": 39.7909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3670.2019, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2893, "query_norm": 1.3987, "queue_k_norm": 1.4618, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2141, "sent_len_1": 66.5689, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.3025, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6837, "doc_norm": 1.464, "encoder_q-embeddings": 2248.4148, "encoder_q-layer.0": 1509.1385, "encoder_q-layer.1": 1627.5787, "encoder_q-layer.10": 2471.647, "encoder_q-layer.11": 5531.5327, "encoder_q-layer.2": 1900.3274, "encoder_q-layer.3": 1914.6471, "encoder_q-layer.4": 1899.7678, "encoder_q-layer.5": 1941.641, "encoder_q-layer.6": 2108.8022, "encoder_q-layer.7": 2247.5049, "encoder_q-layer.8": 2584.7029, "encoder_q-layer.9": 2331.0654, "epoch": 0.84, "inbatch_neg_score": 0.2886, "inbatch_pos_score": 1.0254, "learning_rate": 7.5e-06, "loss": 2.6837, "norm_diff": 0.0648, "norm_loss": 0.0, "num_token_doc": 66.7444, "num_token_overlap": 18.0722, "num_token_query": 52.3339, "num_token_union": 73.4379, "num_word_context": 202.3857, "num_word_doc": 49.8098, "num_word_query": 39.8957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3799.4599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.3992, "queue_k_norm": 1.4615, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3339, "sent_len_1": 66.7444, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4875, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.6954, "doc_norm": 1.4571, "encoder_q-embeddings": 3107.0356, "encoder_q-layer.0": 1984.0913, "encoder_q-layer.1": 2255.1716, "encoder_q-layer.10": 2703.3921, "encoder_q-layer.11": 5529.895, "encoder_q-layer.2": 2949.2874, "encoder_q-layer.3": 3071.9578, "encoder_q-layer.4": 2812.6965, "encoder_q-layer.5": 2511.7129, "encoder_q-layer.6": 2669.447, "encoder_q-layer.7": 2792.7437, "encoder_q-layer.8": 2931.1011, "encoder_q-layer.9": 2506.8564, "epoch": 0.85, "inbatch_neg_score": 0.2884, "inbatch_pos_score": 1.0205, "learning_rate": 7.444444444444444e-06, "loss": 2.6954, "norm_diff": 0.0746, "norm_loss": 0.0, "num_token_doc": 66.533, "num_token_overlap": 18.0185, "num_token_query": 52.3923, "num_token_union": 73.4153, "num_word_context": 202.2068, "num_word_doc": 49.6426, "num_word_query": 39.9204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4555.1493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2866, "query_norm": 1.3824, "queue_k_norm": 1.4595, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3923, "sent_len_1": 66.533, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.41, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.688, "doc_norm": 1.4596, "encoder_q-embeddings": 2373.095, "encoder_q-layer.0": 1615.6569, "encoder_q-layer.1": 1770.9301, "encoder_q-layer.10": 3034.7761, "encoder_q-layer.11": 6120.6226, "encoder_q-layer.2": 2088.2285, "encoder_q-layer.3": 2235.8359, "encoder_q-layer.4": 2212.4995, "encoder_q-layer.5": 2202.9041, "encoder_q-layer.6": 2409.6973, "encoder_q-layer.7": 2478.7515, "encoder_q-layer.8": 3132.7209, "encoder_q-layer.9": 2860.6147, "epoch": 0.85, "inbatch_neg_score": 0.2863, "inbatch_pos_score": 1.0068, "learning_rate": 7.38888888888889e-06, "loss": 2.688, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.8665, "num_token_overlap": 18.0322, "num_token_query": 52.2767, "num_token_union": 73.5203, "num_word_context": 202.3062, "num_word_doc": 49.9252, "num_word_query": 39.86, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4228.411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2859, "query_norm": 1.4008, "queue_k_norm": 1.4609, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2767, "sent_len_1": 66.8665, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2763, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.6936, "doc_norm": 1.4547, "encoder_q-embeddings": 4182.8813, "encoder_q-layer.0": 2943.936, "encoder_q-layer.1": 3295.3481, "encoder_q-layer.10": 2516.1162, "encoder_q-layer.11": 5409.5552, "encoder_q-layer.2": 4152.0708, "encoder_q-layer.3": 4709.792, "encoder_q-layer.4": 5279.5513, "encoder_q-layer.5": 4917.4604, "encoder_q-layer.6": 4451.7993, "encoder_q-layer.7": 3707.0671, "encoder_q-layer.8": 3267.8616, "encoder_q-layer.9": 2396.9763, "epoch": 0.85, "inbatch_neg_score": 0.2822, "inbatch_pos_score": 1.0156, "learning_rate": 7.333333333333334e-06, "loss": 2.6936, "norm_diff": 0.0522, "norm_loss": 0.0, "num_token_doc": 66.8298, "num_token_overlap": 18.0158, "num_token_query": 52.2495, "num_token_union": 73.4917, "num_word_context": 202.4689, "num_word_doc": 49.8427, "num_word_query": 39.8311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6041.7774, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2839, "query_norm": 1.4025, "queue_k_norm": 1.4605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2495, "sent_len_1": 66.8298, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5987, "stdk": 0.0487, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.696, "doc_norm": 1.4609, "encoder_q-embeddings": 2360.5457, "encoder_q-layer.0": 1600.2148, "encoder_q-layer.1": 1824.5198, "encoder_q-layer.10": 2556.4329, "encoder_q-layer.11": 5455.6343, "encoder_q-layer.2": 2114.2668, "encoder_q-layer.3": 2324.3608, "encoder_q-layer.4": 2615.9895, "encoder_q-layer.5": 2565.2168, "encoder_q-layer.6": 2527.3276, "encoder_q-layer.7": 2518.5244, "encoder_q-layer.8": 2846.5315, "encoder_q-layer.9": 2291.0952, "epoch": 0.85, "inbatch_neg_score": 0.2857, "inbatch_pos_score": 1.0361, "learning_rate": 7.277777777777778e-06, "loss": 2.696, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.6853, "num_token_overlap": 18.0145, "num_token_query": 52.2301, "num_token_union": 73.4074, "num_word_context": 201.8359, "num_word_doc": 49.7379, "num_word_query": 39.8036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4139.386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2864, "query_norm": 1.3989, "queue_k_norm": 1.4599, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2301, "sent_len_1": 66.6853, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9712, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.6727, "doc_norm": 1.4577, "encoder_q-embeddings": 2231.4824, "encoder_q-layer.0": 1472.6217, "encoder_q-layer.1": 1884.7959, "encoder_q-layer.10": 2858.2085, "encoder_q-layer.11": 5933.0879, "encoder_q-layer.2": 2048.1738, "encoder_q-layer.3": 2015.0603, "encoder_q-layer.4": 2101.927, "encoder_q-layer.5": 2046.3348, "encoder_q-layer.6": 2287.5359, "encoder_q-layer.7": 2462.2529, "encoder_q-layer.8": 2728.6865, "encoder_q-layer.9": 2479.9102, "epoch": 0.85, "inbatch_neg_score": 0.2839, "inbatch_pos_score": 1.0068, "learning_rate": 7.222222222222222e-06, "loss": 2.6727, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.9283, "num_token_overlap": 18.0254, "num_token_query": 52.2303, "num_token_union": 73.4746, "num_word_context": 202.3463, "num_word_doc": 49.9075, "num_word_query": 39.7975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4142.2192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.397, "queue_k_norm": 1.4593, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2303, "sent_len_1": 66.9283, "sent_len_max_0": 127.9862, "sent_len_max_1": 192.2937, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.6976, "doc_norm": 1.4671, "encoder_q-embeddings": 2547.1584, "encoder_q-layer.0": 1587.1407, "encoder_q-layer.1": 1730.5765, "encoder_q-layer.10": 2566.8608, "encoder_q-layer.11": 5686.0918, "encoder_q-layer.2": 2070.238, "encoder_q-layer.3": 2244.3044, "encoder_q-layer.4": 2384.5693, "encoder_q-layer.5": 2328.2732, "encoder_q-layer.6": 2660.8386, "encoder_q-layer.7": 2761.4299, "encoder_q-layer.8": 2827.521, "encoder_q-layer.9": 2425.4312, "epoch": 0.85, "inbatch_neg_score": 0.2847, "inbatch_pos_score": 1.0127, "learning_rate": 7.166666666666667e-06, "loss": 2.6976, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.7494, "num_token_overlap": 17.9737, "num_token_query": 52.1676, "num_token_union": 73.4692, "num_word_context": 202.1576, "num_word_doc": 49.8021, "num_word_query": 39.7597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4224.919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2847, "query_norm": 1.3917, "queue_k_norm": 1.4587, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1676, "sent_len_1": 66.7494, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.65, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.6892, "doc_norm": 1.4609, "encoder_q-embeddings": 1831.4335, "encoder_q-layer.0": 1151.8541, "encoder_q-layer.1": 1250.7279, "encoder_q-layer.10": 2452.5405, "encoder_q-layer.11": 5465.4199, "encoder_q-layer.2": 1400.3531, "encoder_q-layer.3": 1442.5966, "encoder_q-layer.4": 1520.203, "encoder_q-layer.5": 1600.5754, "encoder_q-layer.6": 1770.9694, "encoder_q-layer.7": 2168.1731, "encoder_q-layer.8": 2506.9558, "encoder_q-layer.9": 2324.1943, "epoch": 0.85, "inbatch_neg_score": 0.2776, "inbatch_pos_score": 1.0303, "learning_rate": 7.111111111111112e-06, "loss": 2.6892, "norm_diff": 0.0682, "norm_loss": 0.0, "num_token_doc": 66.6498, "num_token_overlap": 17.9962, "num_token_query": 52.0757, "num_token_union": 73.3311, "num_word_context": 201.8994, "num_word_doc": 49.7536, "num_word_query": 39.7083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3581.3828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.3927, "queue_k_norm": 1.4596, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0757, "sent_len_1": 66.6498, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0238, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6837, "doc_norm": 1.4541, "encoder_q-embeddings": 4928.501, "encoder_q-layer.0": 3505.884, "encoder_q-layer.1": 3867.3704, "encoder_q-layer.10": 2851.4158, "encoder_q-layer.11": 5995.7969, "encoder_q-layer.2": 4935.0566, "encoder_q-layer.3": 4842.8433, "encoder_q-layer.4": 5035.876, "encoder_q-layer.5": 4405.606, "encoder_q-layer.6": 4541.2446, "encoder_q-layer.7": 4846.5967, "encoder_q-layer.8": 4507.3525, "encoder_q-layer.9": 2693.811, "epoch": 0.85, "inbatch_neg_score": 0.2811, "inbatch_pos_score": 1.0146, "learning_rate": 7.055555555555556e-06, "loss": 2.6837, "norm_diff": 0.058, "norm_loss": 0.0, "num_token_doc": 66.638, "num_token_overlap": 17.9487, "num_token_query": 52.1913, "num_token_union": 73.3749, "num_word_context": 202.1244, "num_word_doc": 49.7113, "num_word_query": 39.7934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6788.873, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.281, "query_norm": 1.3961, "queue_k_norm": 1.4593, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1913, "sent_len_1": 66.638, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3762, "stdk": 0.0487, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6848, "doc_norm": 1.4541, "encoder_q-embeddings": 3974.1604, "encoder_q-layer.0": 2653.4355, "encoder_q-layer.1": 3156.1414, "encoder_q-layer.10": 2604.8503, "encoder_q-layer.11": 5862.6274, "encoder_q-layer.2": 3776.6909, "encoder_q-layer.3": 3842.8328, "encoder_q-layer.4": 3823.9678, "encoder_q-layer.5": 3599.834, "encoder_q-layer.6": 3400.2898, "encoder_q-layer.7": 2739.7656, "encoder_q-layer.8": 2771.0894, "encoder_q-layer.9": 2497.5054, "epoch": 0.85, "inbatch_neg_score": 0.2841, "inbatch_pos_score": 1.0312, "learning_rate": 7.000000000000001e-06, "loss": 2.6848, "norm_diff": 0.0407, "norm_loss": 0.0, "num_token_doc": 66.805, "num_token_overlap": 18.0491, "num_token_query": 52.2517, "num_token_union": 73.4971, "num_word_context": 202.2835, "num_word_doc": 49.9126, "num_word_query": 39.8549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5359.2392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2822, "query_norm": 1.4134, "queue_k_norm": 1.46, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2517, "sent_len_1": 66.805, "sent_len_max_0": 127.985, "sent_len_max_1": 187.8288, "stdk": 0.0487, "stdq": 0.0471, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.6834, "doc_norm": 1.4631, "encoder_q-embeddings": 2063.8977, "encoder_q-layer.0": 1310.4922, "encoder_q-layer.1": 1458.7378, "encoder_q-layer.10": 3149.9888, "encoder_q-layer.11": 5814.1382, "encoder_q-layer.2": 1671.9021, "encoder_q-layer.3": 1731.9056, "encoder_q-layer.4": 1854.9739, "encoder_q-layer.5": 1865.5502, "encoder_q-layer.6": 2169.7595, "encoder_q-layer.7": 2500.5117, "encoder_q-layer.8": 2943.3823, "encoder_q-layer.9": 2622.5801, "epoch": 0.85, "inbatch_neg_score": 0.2834, "inbatch_pos_score": 1.0195, "learning_rate": 6.944444444444445e-06, "loss": 2.6834, "norm_diff": 0.0565, "norm_loss": 0.0, "num_token_doc": 66.7335, "num_token_overlap": 17.9917, "num_token_query": 52.1368, "num_token_union": 73.3831, "num_word_context": 202.117, "num_word_doc": 49.7856, "num_word_query": 39.7466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3991.0706, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4066, "queue_k_norm": 1.459, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1368, "sent_len_1": 66.7335, "sent_len_max_0": 128.0, "sent_len_max_1": 189.975, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.6957, "doc_norm": 1.4538, "encoder_q-embeddings": 3049.1064, "encoder_q-layer.0": 1968.8971, "encoder_q-layer.1": 2234.8408, "encoder_q-layer.10": 2642.2473, "encoder_q-layer.11": 5371.6304, "encoder_q-layer.2": 2721.0671, "encoder_q-layer.3": 2963.5559, "encoder_q-layer.4": 3260.5715, "encoder_q-layer.5": 3495.8916, "encoder_q-layer.6": 4228.0303, "encoder_q-layer.7": 4412.8794, "encoder_q-layer.8": 3794.2375, "encoder_q-layer.9": 2354.3701, "epoch": 0.86, "inbatch_neg_score": 0.285, "inbatch_pos_score": 1.0254, "learning_rate": 6.888888888888889e-06, "loss": 2.6957, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.7469, "num_token_overlap": 18.02, "num_token_query": 52.1823, "num_token_union": 73.4001, "num_word_context": 202.1298, "num_word_doc": 49.8196, "num_word_query": 39.817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5051.1546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3877, "queue_k_norm": 1.4582, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1823, "sent_len_1": 66.7469, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6538, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.7102, "doc_norm": 1.4595, "encoder_q-embeddings": 3751.7729, "encoder_q-layer.0": 2321.113, "encoder_q-layer.1": 2621.6069, "encoder_q-layer.10": 4727.4321, "encoder_q-layer.11": 10793.0068, "encoder_q-layer.2": 2895.314, "encoder_q-layer.3": 3000.1533, "encoder_q-layer.4": 3114.3677, "encoder_q-layer.5": 3304.7441, "encoder_q-layer.6": 3708.8677, "encoder_q-layer.7": 4133.7783, "encoder_q-layer.8": 4977.3408, "encoder_q-layer.9": 4490.5576, "epoch": 0.86, "inbatch_neg_score": 0.2849, "inbatch_pos_score": 1.0381, "learning_rate": 6.833333333333333e-06, "loss": 2.7102, "norm_diff": 0.0461, "norm_loss": 0.0, "num_token_doc": 66.9631, "num_token_overlap": 18.0375, "num_token_query": 52.1936, "num_token_union": 73.5052, "num_word_context": 202.355, "num_word_doc": 49.9502, "num_word_query": 39.795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7150.1418, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2847, "query_norm": 1.4133, "queue_k_norm": 1.4592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1936, "sent_len_1": 66.9631, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.3137, "stdk": 0.049, "stdq": 0.0471, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6892, "doc_norm": 1.4641, "encoder_q-embeddings": 6126.0044, "encoder_q-layer.0": 3937.3997, "encoder_q-layer.1": 4598.2871, "encoder_q-layer.10": 5402.5732, "encoder_q-layer.11": 11074.4121, "encoder_q-layer.2": 5017.312, "encoder_q-layer.3": 5414.9077, "encoder_q-layer.4": 5978.2461, "encoder_q-layer.5": 5875.0664, "encoder_q-layer.6": 5762.8765, "encoder_q-layer.7": 5339.7891, "encoder_q-layer.8": 5654.8604, "encoder_q-layer.9": 4884.2144, "epoch": 0.86, "inbatch_neg_score": 0.2853, "inbatch_pos_score": 1.0205, "learning_rate": 6.777777777777779e-06, "loss": 2.6892, "norm_diff": 0.0772, "norm_loss": 0.0, "num_token_doc": 66.8072, "num_token_overlap": 18.0551, "num_token_query": 52.3122, "num_token_union": 73.4892, "num_word_context": 202.2597, "num_word_doc": 49.8525, "num_word_query": 39.8878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9054.0419, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2861, "query_norm": 1.3868, "queue_k_norm": 1.4572, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3122, "sent_len_1": 66.8072, "sent_len_max_0": 128.0, "sent_len_max_1": 189.085, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6947, "doc_norm": 1.4546, "encoder_q-embeddings": 3727.7573, "encoder_q-layer.0": 2282.949, "encoder_q-layer.1": 2473.0381, "encoder_q-layer.10": 5761.5029, "encoder_q-layer.11": 11013.9727, "encoder_q-layer.2": 2780.3674, "encoder_q-layer.3": 2790.5422, "encoder_q-layer.4": 3091.824, "encoder_q-layer.5": 3112.501, "encoder_q-layer.6": 3688.0562, "encoder_q-layer.7": 4023.6499, "encoder_q-layer.8": 4896.4092, "encoder_q-layer.9": 4655.1265, "epoch": 0.86, "inbatch_neg_score": 0.2836, "inbatch_pos_score": 1.0166, "learning_rate": 6.722222222222223e-06, "loss": 2.6947, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.6096, "num_token_overlap": 17.9728, "num_token_query": 52.1139, "num_token_union": 73.291, "num_word_context": 201.7114, "num_word_doc": 49.6857, "num_word_query": 39.7207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7213.3785, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2847, "query_norm": 1.3923, "queue_k_norm": 1.4586, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1139, "sent_len_1": 66.6096, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5975, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.7004, "doc_norm": 1.4662, "encoder_q-embeddings": 10726.3447, "encoder_q-layer.0": 7411.7451, "encoder_q-layer.1": 7932.3882, "encoder_q-layer.10": 5383.1079, "encoder_q-layer.11": 11310.1748, "encoder_q-layer.2": 9057.2051, "encoder_q-layer.3": 10214.7744, "encoder_q-layer.4": 10920.7246, "encoder_q-layer.5": 10737.5, "encoder_q-layer.6": 9166.043, "encoder_q-layer.7": 8092.0405, "encoder_q-layer.8": 6147.9404, "encoder_q-layer.9": 4805.1758, "epoch": 0.86, "inbatch_neg_score": 0.2809, "inbatch_pos_score": 1.0234, "learning_rate": 6.666666666666667e-06, "loss": 2.7004, "norm_diff": 0.0646, "norm_loss": 0.0, "num_token_doc": 66.5639, "num_token_overlap": 17.9626, "num_token_query": 52.1694, "num_token_union": 73.3945, "num_word_context": 202.0541, "num_word_doc": 49.6901, "num_word_query": 39.7545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13295.2346, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2808, "query_norm": 1.4017, "queue_k_norm": 1.4576, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1694, "sent_len_1": 66.5639, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8675, "stdk": 0.0492, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6796, "doc_norm": 1.456, "encoder_q-embeddings": 3521.3386, "encoder_q-layer.0": 2224.5156, "encoder_q-layer.1": 2385.8569, "encoder_q-layer.10": 5415.8921, "encoder_q-layer.11": 11272.5762, "encoder_q-layer.2": 2641.3779, "encoder_q-layer.3": 2762.2334, "encoder_q-layer.4": 2907.1826, "encoder_q-layer.5": 3102.5645, "encoder_q-layer.6": 3652.0728, "encoder_q-layer.7": 4366.8506, "encoder_q-layer.8": 5518.4521, "encoder_q-layer.9": 4824.0581, "epoch": 0.86, "inbatch_neg_score": 0.2831, "inbatch_pos_score": 1.0127, "learning_rate": 6.611111111111111e-06, "loss": 2.6796, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.845, "num_token_overlap": 18.0447, "num_token_query": 52.438, "num_token_union": 73.587, "num_word_context": 202.6061, "num_word_doc": 49.9016, "num_word_query": 39.9793, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7336.0556, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2847, "query_norm": 1.3948, "queue_k_norm": 1.4581, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.438, "sent_len_1": 66.845, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1662, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 62.5977, "active_queue_size": 16384.0, "cl_loss": 2.668, "doc_norm": 1.4624, "encoder_q-embeddings": 3792.1426, "encoder_q-layer.0": 2559.3645, "encoder_q-layer.1": 2677.6665, "encoder_q-layer.10": 4733.5493, "encoder_q-layer.11": 10473.4746, "encoder_q-layer.2": 3088.0613, "encoder_q-layer.3": 3141.6814, "encoder_q-layer.4": 3313.3894, "encoder_q-layer.5": 3295.6401, "encoder_q-layer.6": 3651.3467, "encoder_q-layer.7": 4092.25, "encoder_q-layer.8": 4774.3003, "encoder_q-layer.9": 4426.2578, "epoch": 0.86, "inbatch_neg_score": 0.2837, "inbatch_pos_score": 1.0391, "learning_rate": 6.555555555555556e-06, "loss": 2.668, "norm_diff": 0.0627, "norm_loss": 0.0, "num_token_doc": 66.7536, "num_token_overlap": 18.0635, "num_token_query": 52.3783, "num_token_union": 73.4907, "num_word_context": 202.0761, "num_word_doc": 49.7923, "num_word_query": 39.9121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7042.4268, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2852, "query_norm": 1.3997, "queue_k_norm": 1.4574, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3783, "sent_len_1": 66.7536, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5625, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 62.207, "active_queue_size": 16384.0, "cl_loss": 2.6945, "doc_norm": 1.4555, "encoder_q-embeddings": 3544.9109, "encoder_q-layer.0": 2350.1384, "encoder_q-layer.1": 2578.6777, "encoder_q-layer.10": 4598.5454, "encoder_q-layer.11": 10195.7627, "encoder_q-layer.2": 2929.249, "encoder_q-layer.3": 2996.574, "encoder_q-layer.4": 3179.0989, "encoder_q-layer.5": 3210.2974, "encoder_q-layer.6": 3559.7317, "encoder_q-layer.7": 4163.8418, "encoder_q-layer.8": 4669.3589, "encoder_q-layer.9": 4329.2764, "epoch": 0.86, "inbatch_neg_score": 0.2834, "inbatch_pos_score": 1.0342, "learning_rate": 6.5000000000000004e-06, "loss": 2.6945, "norm_diff": 0.0519, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 17.9937, "num_token_query": 52.1311, "num_token_union": 73.3972, "num_word_context": 202.5361, "num_word_doc": 49.8416, "num_word_query": 39.7684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6789.7746, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2847, "query_norm": 1.4036, "queue_k_norm": 1.4571, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1311, "sent_len_1": 66.8004, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7812, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.7022, "doc_norm": 1.4597, "encoder_q-embeddings": 8366.6777, "encoder_q-layer.0": 5681.0469, "encoder_q-layer.1": 6988.2705, "encoder_q-layer.10": 5015.48, "encoder_q-layer.11": 10633.21, "encoder_q-layer.2": 8311.3223, "encoder_q-layer.3": 7924.0557, "encoder_q-layer.4": 8700.292, "encoder_q-layer.5": 7401.5298, "encoder_q-layer.6": 5603.5771, "encoder_q-layer.7": 5237.5488, "encoder_q-layer.8": 4993.2959, "encoder_q-layer.9": 4567.5776, "epoch": 0.86, "inbatch_neg_score": 0.2869, "inbatch_pos_score": 1.0322, "learning_rate": 6.4444444444444445e-06, "loss": 2.7022, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.7703, "num_token_overlap": 17.9256, "num_token_query": 52.1892, "num_token_union": 73.5136, "num_word_context": 202.1799, "num_word_doc": 49.7867, "num_word_query": 39.7914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10776.3966, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2852, "query_norm": 1.3994, "queue_k_norm": 1.4571, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1892, "sent_len_1": 66.7703, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2075, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.7006, "doc_norm": 1.4603, "encoder_q-embeddings": 15550.9912, "encoder_q-layer.0": 9456.9766, "encoder_q-layer.1": 10764.4678, "encoder_q-layer.10": 5080.4995, "encoder_q-layer.11": 11317.8057, "encoder_q-layer.2": 13208.8252, "encoder_q-layer.3": 14608.8936, "encoder_q-layer.4": 16155.126, "encoder_q-layer.5": 16307.0039, "encoder_q-layer.6": 20019.6094, "encoder_q-layer.7": 16727.752, "encoder_q-layer.8": 11118.918, "encoder_q-layer.9": 5068.6084, "epoch": 0.86, "inbatch_neg_score": 0.2858, "inbatch_pos_score": 1.0332, "learning_rate": 6.3888888888888885e-06, "loss": 2.7006, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.7032, "num_token_overlap": 17.964, "num_token_query": 52.1354, "num_token_union": 73.3493, "num_word_context": 202.2814, "num_word_doc": 49.764, "num_word_query": 39.7408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20246.5895, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2866, "query_norm": 1.3997, "queue_k_norm": 1.4578, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1354, "sent_len_1": 66.7032, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1975, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 64.2578, "active_queue_size": 16384.0, "cl_loss": 2.6914, "doc_norm": 1.4603, "encoder_q-embeddings": 4417.6221, "encoder_q-layer.0": 2914.9258, "encoder_q-layer.1": 3200.0447, "encoder_q-layer.10": 5644.1919, "encoder_q-layer.11": 11308.9297, "encoder_q-layer.2": 3810.6528, "encoder_q-layer.3": 3855.5518, "encoder_q-layer.4": 3932.751, "encoder_q-layer.5": 4055.1787, "encoder_q-layer.6": 4589.6963, "encoder_q-layer.7": 4295.7549, "encoder_q-layer.8": 5185.7314, "encoder_q-layer.9": 5101.3447, "epoch": 0.87, "inbatch_neg_score": 0.2891, "inbatch_pos_score": 1.0459, "learning_rate": 6.333333333333334e-06, "loss": 2.6914, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.9252, "num_token_overlap": 18.0484, "num_token_query": 52.2515, "num_token_union": 73.5029, "num_word_context": 202.2015, "num_word_doc": 49.9068, "num_word_query": 39.8079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7714.2677, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2898, "query_norm": 1.4019, "queue_k_norm": 1.4573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2515, "sent_len_1": 66.9252, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.0, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.6802, "doc_norm": 1.4584, "encoder_q-embeddings": 3742.1765, "encoder_q-layer.0": 2339.9299, "encoder_q-layer.1": 2587.032, "encoder_q-layer.10": 4889.0786, "encoder_q-layer.11": 11677.7285, "encoder_q-layer.2": 2912.1704, "encoder_q-layer.3": 3087.9036, "encoder_q-layer.4": 3370.521, "encoder_q-layer.5": 3482.7764, "encoder_q-layer.6": 3773.4526, "encoder_q-layer.7": 4247.1616, "encoder_q-layer.8": 5025.3037, "encoder_q-layer.9": 4693.688, "epoch": 0.87, "inbatch_neg_score": 0.292, "inbatch_pos_score": 1.0283, "learning_rate": 6.277777777777778e-06, "loss": 2.6802, "norm_diff": 0.0632, "norm_loss": 0.0, "num_token_doc": 66.9077, "num_token_overlap": 18.071, "num_token_query": 52.207, "num_token_union": 73.4765, "num_word_context": 202.4772, "num_word_doc": 49.9535, "num_word_query": 39.816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7448.8503, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2905, "query_norm": 1.3952, "queue_k_norm": 1.4579, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.207, "sent_len_1": 66.9077, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9075, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.7026, "doc_norm": 1.4519, "encoder_q-embeddings": 7730.1431, "encoder_q-layer.0": 5436.209, "encoder_q-layer.1": 5469.1665, "encoder_q-layer.10": 5223.9692, "encoder_q-layer.11": 11511.9062, "encoder_q-layer.2": 6231.0264, "encoder_q-layer.3": 6243.1006, "encoder_q-layer.4": 6639.8408, "encoder_q-layer.5": 6745.7397, "encoder_q-layer.6": 8355.1572, "encoder_q-layer.7": 11149.6143, "encoder_q-layer.8": 16835.3633, "encoder_q-layer.9": 10302.2559, "epoch": 0.87, "inbatch_neg_score": 0.2878, "inbatch_pos_score": 1.0059, "learning_rate": 6.222222222222222e-06, "loss": 2.7026, "norm_diff": 0.0593, "norm_loss": 0.0, "num_token_doc": 66.6137, "num_token_overlap": 17.9589, "num_token_query": 52.0656, "num_token_union": 73.2824, "num_word_context": 202.0323, "num_word_doc": 49.7143, "num_word_query": 39.7057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14224.4424, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2886, "query_norm": 1.3925, "queue_k_norm": 1.4575, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0656, "sent_len_1": 66.6137, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5563, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 64.1602, "active_queue_size": 16384.0, "cl_loss": 2.6759, "doc_norm": 1.4623, "encoder_q-embeddings": 5237.3604, "encoder_q-layer.0": 3521.3945, "encoder_q-layer.1": 4244.2847, "encoder_q-layer.10": 2349.6587, "encoder_q-layer.11": 5452.5293, "encoder_q-layer.2": 4916.8682, "encoder_q-layer.3": 5235.8081, "encoder_q-layer.4": 5240.916, "encoder_q-layer.5": 4934.3086, "encoder_q-layer.6": 4864.0249, "encoder_q-layer.7": 4396.2417, "encoder_q-layer.8": 3839.9966, "encoder_q-layer.9": 2473.5073, "epoch": 0.87, "inbatch_neg_score": 0.2901, "inbatch_pos_score": 1.0566, "learning_rate": 6.166666666666667e-06, "loss": 2.6759, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.7659, "num_token_overlap": 18.072, "num_token_query": 52.2857, "num_token_union": 73.4405, "num_word_context": 201.9161, "num_word_doc": 49.8247, "num_word_query": 39.8421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6755.0698, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2903, "query_norm": 1.4009, "queue_k_norm": 1.4589, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2857, "sent_len_1": 66.7659, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3288, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 2.7092, "doc_norm": 1.4548, "encoder_q-embeddings": 1889.1781, "encoder_q-layer.0": 1179.1978, "encoder_q-layer.1": 1254.1542, "encoder_q-layer.10": 2489.5713, "encoder_q-layer.11": 5494.4131, "encoder_q-layer.2": 1432.2681, "encoder_q-layer.3": 1490.9296, "encoder_q-layer.4": 1562.2166, "encoder_q-layer.5": 1630.8234, "encoder_q-layer.6": 1905.3862, "encoder_q-layer.7": 2070.6604, "encoder_q-layer.8": 2465.5222, "encoder_q-layer.9": 2271.6914, "epoch": 0.87, "inbatch_neg_score": 0.2865, "inbatch_pos_score": 1.0234, "learning_rate": 6.111111111111111e-06, "loss": 2.7092, "norm_diff": 0.061, "norm_loss": 0.0, "num_token_doc": 66.8909, "num_token_overlap": 18.0499, "num_token_query": 52.3315, "num_token_union": 73.4886, "num_word_context": 202.35, "num_word_doc": 49.8869, "num_word_query": 39.9102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3612.6538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3938, "queue_k_norm": 1.4586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3315, "sent_len_1": 66.8909, "sent_len_max_0": 127.985, "sent_len_max_1": 191.0675, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.6802, "doc_norm": 1.4561, "encoder_q-embeddings": 1242.6293, "encoder_q-layer.0": 820.8616, "encoder_q-layer.1": 940.1411, "encoder_q-layer.10": 1265.2882, "encoder_q-layer.11": 2815.8125, "encoder_q-layer.2": 1100.9565, "encoder_q-layer.3": 1119.1765, "encoder_q-layer.4": 1205.76, "encoder_q-layer.5": 1227.0958, "encoder_q-layer.6": 1175.1488, "encoder_q-layer.7": 1190.6222, "encoder_q-layer.8": 1291.9819, "encoder_q-layer.9": 1186.2361, "epoch": 0.87, "inbatch_neg_score": 0.2919, "inbatch_pos_score": 1.0293, "learning_rate": 6.055555555555556e-06, "loss": 2.6802, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 67.0584, "num_token_overlap": 17.9826, "num_token_query": 52.1092, "num_token_union": 73.5638, "num_word_context": 202.5102, "num_word_doc": 50.0307, "num_word_query": 39.709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.3946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.3955, "queue_k_norm": 1.4573, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1092, "sent_len_1": 67.0584, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9563, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 60.9375, "active_queue_size": 16384.0, "cl_loss": 2.7104, "doc_norm": 1.4636, "encoder_q-embeddings": 1120.7676, "encoder_q-layer.0": 755.9847, "encoder_q-layer.1": 843.9695, "encoder_q-layer.10": 1250.7157, "encoder_q-layer.11": 2737.5447, "encoder_q-layer.2": 977.0201, "encoder_q-layer.3": 1051.171, "encoder_q-layer.4": 1089.5656, "encoder_q-layer.5": 1108.1416, "encoder_q-layer.6": 1131.5592, "encoder_q-layer.7": 1193.9001, "encoder_q-layer.8": 1325.7705, "encoder_q-layer.9": 1189.0513, "epoch": 0.87, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 1.0303, "learning_rate": 6e-06, "loss": 2.7104, "norm_diff": 0.0658, "norm_loss": 0.0, "num_token_doc": 66.8138, "num_token_overlap": 17.9731, "num_token_query": 52.116, "num_token_union": 73.4154, "num_word_context": 202.2135, "num_word_doc": 49.8547, "num_word_query": 39.7087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1965.5659, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2922, "query_norm": 1.3979, "queue_k_norm": 1.4596, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.116, "sent_len_1": 66.8138, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1838, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.7026, "doc_norm": 1.4595, "encoder_q-embeddings": 1267.8774, "encoder_q-layer.0": 851.0789, "encoder_q-layer.1": 996.455, "encoder_q-layer.10": 1310.9038, "encoder_q-layer.11": 2846.8184, "encoder_q-layer.2": 1174.7384, "encoder_q-layer.3": 1217.9119, "encoder_q-layer.4": 1157.0503, "encoder_q-layer.5": 1258.8459, "encoder_q-layer.6": 1343.626, "encoder_q-layer.7": 1259.3107, "encoder_q-layer.8": 1380.6815, "encoder_q-layer.9": 1179.8625, "epoch": 0.87, "inbatch_neg_score": 0.2952, "inbatch_pos_score": 1.0264, "learning_rate": 5.944444444444445e-06, "loss": 2.7026, "norm_diff": 0.0564, "norm_loss": 0.0, "num_token_doc": 66.7968, "num_token_overlap": 17.9754, "num_token_query": 52.2349, "num_token_union": 73.4853, "num_word_context": 202.2702, "num_word_doc": 49.8298, "num_word_query": 39.8334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2107.1061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.4031, "queue_k_norm": 1.4581, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2349, "sent_len_1": 66.7968, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.435, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6701, "doc_norm": 1.462, "encoder_q-embeddings": 2645.8037, "encoder_q-layer.0": 1760.8805, "encoder_q-layer.1": 2216.5686, "encoder_q-layer.10": 1256.5374, "encoder_q-layer.11": 2923.5359, "encoder_q-layer.2": 2610.2134, "encoder_q-layer.3": 2702.5933, "encoder_q-layer.4": 2856.0701, "encoder_q-layer.5": 3047.198, "encoder_q-layer.6": 2838.3135, "encoder_q-layer.7": 1902.9543, "encoder_q-layer.8": 1706.1688, "encoder_q-layer.9": 1228.4126, "epoch": 0.87, "inbatch_neg_score": 0.2958, "inbatch_pos_score": 1.0342, "learning_rate": 5.888888888888889e-06, "loss": 2.6701, "norm_diff": 0.0554, "norm_loss": 0.0, "num_token_doc": 67.0544, "num_token_overlap": 18.0859, "num_token_query": 52.4486, "num_token_union": 73.7026, "num_word_context": 202.7093, "num_word_doc": 50.0926, "num_word_query": 39.9884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3529.1144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.4067, "queue_k_norm": 1.4605, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4486, "sent_len_1": 67.0544, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.6425, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 2.6954, "doc_norm": 1.4537, "encoder_q-embeddings": 1727.5465, "encoder_q-layer.0": 1175.9946, "encoder_q-layer.1": 1398.4376, "encoder_q-layer.10": 1293.2067, "encoder_q-layer.11": 3026.813, "encoder_q-layer.2": 1639.9935, "encoder_q-layer.3": 1722.4801, "encoder_q-layer.4": 1797.9067, "encoder_q-layer.5": 1820.2007, "encoder_q-layer.6": 2110.8469, "encoder_q-layer.7": 2440.9558, "encoder_q-layer.8": 1998.3164, "encoder_q-layer.9": 1249.3298, "epoch": 0.87, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 1.0088, "learning_rate": 5.833333333333334e-06, "loss": 2.6954, "norm_diff": 0.0579, "norm_loss": 0.0, "num_token_doc": 66.7412, "num_token_overlap": 17.9817, "num_token_query": 52.0336, "num_token_union": 73.3353, "num_word_context": 201.9721, "num_word_doc": 49.8058, "num_word_query": 39.6505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2803.4739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.3958, "queue_k_norm": 1.4569, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0336, "sent_len_1": 66.7412, "sent_len_max_0": 127.9825, "sent_len_max_1": 187.4175, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6848, "doc_norm": 1.4604, "encoder_q-embeddings": 1000.9634, "encoder_q-layer.0": 618.6737, "encoder_q-layer.1": 654.5208, "encoder_q-layer.10": 1361.235, "encoder_q-layer.11": 2911.4009, "encoder_q-layer.2": 749.3162, "encoder_q-layer.3": 760.472, "encoder_q-layer.4": 816.1378, "encoder_q-layer.5": 816.3851, "encoder_q-layer.6": 911.7336, "encoder_q-layer.7": 1013.3984, "encoder_q-layer.8": 1370.9136, "encoder_q-layer.9": 1232.897, "epoch": 0.87, "inbatch_neg_score": 0.2974, "inbatch_pos_score": 1.0332, "learning_rate": 5.777777777777778e-06, "loss": 2.6848, "norm_diff": 0.0679, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 18.0436, "num_token_query": 52.4275, "num_token_union": 73.5187, "num_word_context": 202.3449, "num_word_doc": 49.8254, "num_word_query": 39.9548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1871.7601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.3925, "queue_k_norm": 1.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4275, "sent_len_1": 66.8173, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7925, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.7027, "doc_norm": 1.4538, "encoder_q-embeddings": 1488.934, "encoder_q-layer.0": 1040.2828, "encoder_q-layer.1": 1175.1304, "encoder_q-layer.10": 1421.0007, "encoder_q-layer.11": 2923.3574, "encoder_q-layer.2": 1454.4958, "encoder_q-layer.3": 1523.7787, "encoder_q-layer.4": 1368.6115, "encoder_q-layer.5": 1381.0774, "encoder_q-layer.6": 1397.5172, "encoder_q-layer.7": 1360.0399, "encoder_q-layer.8": 1403.3171, "encoder_q-layer.9": 1262.7837, "epoch": 0.88, "inbatch_neg_score": 0.3002, "inbatch_pos_score": 1.0215, "learning_rate": 5.722222222222223e-06, "loss": 2.7027, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.675, "num_token_overlap": 17.8963, "num_token_query": 52.1154, "num_token_union": 73.4475, "num_word_context": 202.4534, "num_word_doc": 49.7744, "num_word_query": 39.7579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2336.5937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.3957, "queue_k_norm": 1.4592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1154, "sent_len_1": 66.675, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2063, "stdk": 0.0487, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.6727, "doc_norm": 1.4618, "encoder_q-embeddings": 954.9238, "encoder_q-layer.0": 629.4647, "encoder_q-layer.1": 688.7402, "encoder_q-layer.10": 1247.8448, "encoder_q-layer.11": 2869.3921, "encoder_q-layer.2": 797.8767, "encoder_q-layer.3": 824.8854, "encoder_q-layer.4": 868.4839, "encoder_q-layer.5": 957.3376, "encoder_q-layer.6": 1089.1661, "encoder_q-layer.7": 1237.3918, "encoder_q-layer.8": 1365.8756, "encoder_q-layer.9": 1182.51, "epoch": 0.88, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0312, "learning_rate": 5.666666666666667e-06, "loss": 2.6727, "norm_diff": 0.0684, "norm_loss": 0.0, "num_token_doc": 66.7946, "num_token_overlap": 18.0235, "num_token_query": 52.2555, "num_token_union": 73.459, "num_word_context": 202.4627, "num_word_doc": 49.9036, "num_word_query": 39.831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1955.1761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.3934, "queue_k_norm": 1.4599, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2555, "sent_len_1": 66.7946, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9775, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.6903, "doc_norm": 1.4639, "encoder_q-embeddings": 2416.0156, "encoder_q-layer.0": 1694.4937, "encoder_q-layer.1": 2038.7994, "encoder_q-layer.10": 1313.1807, "encoder_q-layer.11": 2806.1069, "encoder_q-layer.2": 2321.8418, "encoder_q-layer.3": 2422.0293, "encoder_q-layer.4": 2248.0552, "encoder_q-layer.5": 2051.647, "encoder_q-layer.6": 1908.5088, "encoder_q-layer.7": 1560.0352, "encoder_q-layer.8": 1475.3075, "encoder_q-layer.9": 1222.1707, "epoch": 0.88, "inbatch_neg_score": 0.2974, "inbatch_pos_score": 1.0449, "learning_rate": 5.611111111111112e-06, "loss": 2.6903, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.8681, "num_token_overlap": 18.0759, "num_token_query": 52.3487, "num_token_union": 73.544, "num_word_context": 202.7158, "num_word_doc": 49.9377, "num_word_query": 39.93, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3084.9761, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.4013, "queue_k_norm": 1.4612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3487, "sent_len_1": 66.8681, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8525, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.6725, "doc_norm": 1.4572, "encoder_q-embeddings": 1168.8267, "encoder_q-layer.0": 760.893, "encoder_q-layer.1": 848.8978, "encoder_q-layer.10": 1300.4232, "encoder_q-layer.11": 2863.478, "encoder_q-layer.2": 929.4352, "encoder_q-layer.3": 985.2825, "encoder_q-layer.4": 1050.3707, "encoder_q-layer.5": 1048.0684, "encoder_q-layer.6": 1126.4752, "encoder_q-layer.7": 1244.7738, "encoder_q-layer.8": 1293.631, "encoder_q-layer.9": 1203.389, "epoch": 0.88, "inbatch_neg_score": 0.2996, "inbatch_pos_score": 1.0273, "learning_rate": 5.555555555555556e-06, "loss": 2.6725, "norm_diff": 0.0536, "norm_loss": 0.0, "num_token_doc": 66.8356, "num_token_overlap": 18.0086, "num_token_query": 52.1254, "num_token_union": 73.3984, "num_word_context": 201.842, "num_word_doc": 49.882, "num_word_query": 39.7532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2000.0096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4036, "queue_k_norm": 1.4613, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1254, "sent_len_1": 66.8356, "sent_len_max_0": 128.0, "sent_len_max_1": 189.795, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 28.0595, "dev_samples_per_second": 2.281, "dev_steps_per_second": 0.036, "epoch": 0.88, "step": 90000, "test_accuracy": 94.07958984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34761226177215576, "test_doc_norm": 1.4264553785324097, "test_inbatch_neg_score": 0.6526392102241516, "test_inbatch_pos_score": 1.5948125123977661, "test_loss": 0.34761226177215576, "test_loss_align": 0.9625829458236694, "test_loss_unif": 3.816102981567383, "test_loss_unif_q@queue": 3.816102981567383, "test_norm_diff": 0.019416874274611473, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.29078614711761475, "test_query_norm": 1.4454649686813354, "test_queue_k_norm": 1.4610779285430908, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042385995388031006, "test_stdq": 0.04270516335964203, "test_stdqueue_k": 0.049058906733989716, "test_stdqueue_q": 0.0 }, { "dev_runtime": 28.0595, "dev_samples_per_second": 2.281, "dev_steps_per_second": 0.036, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.4064, "eval_beir-arguana_recall@10": 0.68634, "eval_beir-arguana_recall@100": 0.95021, "eval_beir-arguana_recall@20": 0.80797, "eval_beir-avg_ndcg@10": 0.3805160833333333, "eval_beir-avg_recall@10": 0.4549926666666667, "eval_beir-avg_recall@100": 0.6366818333333332, "eval_beir-avg_recall@20": 0.5141917500000001, "eval_beir-cqadupstack_ndcg@10": 0.26957083333333337, "eval_beir-cqadupstack_recall@10": 0.36600666666666665, "eval_beir-cqadupstack_recall@100": 0.5960383333333333, "eval_beir-cqadupstack_recall@20": 0.4325375000000001, "eval_beir-fiqa_ndcg@10": 0.25888, "eval_beir-fiqa_recall@10": 0.3149, "eval_beir-fiqa_recall@100": 0.58466, "eval_beir-fiqa_recall@20": 0.38717, "eval_beir-nfcorpus_ndcg@10": 0.29563, "eval_beir-nfcorpus_recall@10": 0.14506, "eval_beir-nfcorpus_recall@100": 0.26973, "eval_beir-nfcorpus_recall@20": 0.17398, "eval_beir-nq_ndcg@10": 0.27049, "eval_beir-nq_recall@10": 0.44725, "eval_beir-nq_recall@100": 0.79604, "eval_beir-nq_recall@20": 0.56994, "eval_beir-quora_ndcg@10": 0.78603, "eval_beir-quora_recall@10": 0.89135, "eval_beir-quora_recall@100": 0.97765, "eval_beir-quora_recall@20": 0.93052, "eval_beir-scidocs_ndcg@10": 0.15704, "eval_beir-scidocs_recall@10": 0.16583, "eval_beir-scidocs_recall@100": 0.37162, "eval_beir-scidocs_recall@20": 0.22033, "eval_beir-scifact_ndcg@10": 0.64585, "eval_beir-scifact_recall@10": 0.79967, "eval_beir-scifact_recall@100": 0.92822, "eval_beir-scifact_recall@20": 0.838, "eval_beir-trec-covid_ndcg@10": 0.55134, "eval_beir-trec-covid_recall@10": 0.594, "eval_beir-trec-covid_recall@100": 0.457, "eval_beir-trec-covid_recall@20": 0.588, "eval_beir-webis-touche2020_ndcg@10": 0.16393, "eval_beir-webis-touche2020_recall@10": 0.13952, "eval_beir-webis-touche2020_recall@100": 0.43565, "eval_beir-webis-touche2020_recall@20": 0.19347, "eval_senteval-avg_sts": 0.7526257918513035, "eval_senteval-sickr_spearman": 0.7252557026960841, "eval_senteval-stsb_spearman": 0.7799958810065228, "step": 90000, "test_accuracy": 94.07958984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.34761226177215576, "test_doc_norm": 1.4264553785324097, "test_inbatch_neg_score": 0.6526392102241516, "test_inbatch_pos_score": 1.5948125123977661, "test_loss": 0.34761226177215576, "test_loss_align": 0.9625829458236694, "test_loss_unif": 3.816102981567383, "test_loss_unif_q@queue": 3.816102981567383, "test_norm_diff": 0.019416874274611473, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.29078614711761475, "test_query_norm": 1.4454649686813354, "test_queue_k_norm": 1.4610779285430908, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042385995388031006, "test_stdq": 0.04270516335964203, "test_stdqueue_k": 0.049058906733989716, "test_stdqueue_q": 0.0 }, { "accuracy": 61.4258, "active_queue_size": 16384.0, "cl_loss": 2.6984, "doc_norm": 1.4602, "encoder_q-embeddings": 1796.33, "encoder_q-layer.0": 1245.5497, "encoder_q-layer.1": 1411.7375, "encoder_q-layer.10": 1362.9158, "encoder_q-layer.11": 2995.8401, "encoder_q-layer.2": 1776.5909, "encoder_q-layer.3": 1902.832, "encoder_q-layer.4": 2326.3123, "encoder_q-layer.5": 2446.5901, "encoder_q-layer.6": 2859.0984, "encoder_q-layer.7": 3022.9937, "encoder_q-layer.8": 2368.53, "encoder_q-layer.9": 1365.3961, "epoch": 0.88, "inbatch_neg_score": 0.3001, "inbatch_pos_score": 1.04, "learning_rate": 5.500000000000001e-06, "loss": 2.6984, "norm_diff": 0.0572, "norm_loss": 0.0, "num_token_doc": 66.6967, "num_token_overlap": 17.9897, "num_token_query": 52.2476, "num_token_union": 73.4289, "num_word_context": 202.3118, "num_word_doc": 49.7262, "num_word_query": 39.8433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3136.6241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2991, "query_norm": 1.403, "queue_k_norm": 1.4585, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2476, "sent_len_1": 66.6967, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0737, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.7077, "doc_norm": 1.4643, "encoder_q-embeddings": 1105.3914, "encoder_q-layer.0": 745.04, "encoder_q-layer.1": 864.9185, "encoder_q-layer.10": 1397.8187, "encoder_q-layer.11": 2846.1726, "encoder_q-layer.2": 1056.8346, "encoder_q-layer.3": 1048.2389, "encoder_q-layer.4": 1149.3752, "encoder_q-layer.5": 1157.8401, "encoder_q-layer.6": 1267.775, "encoder_q-layer.7": 1280.0858, "encoder_q-layer.8": 1505.6005, "encoder_q-layer.9": 1297.3685, "epoch": 0.88, "inbatch_neg_score": 0.2978, "inbatch_pos_score": 1.0371, "learning_rate": 5.444444444444445e-06, "loss": 2.7077, "norm_diff": 0.068, "norm_loss": 0.0, "num_token_doc": 66.8431, "num_token_overlap": 18.0147, "num_token_query": 52.1698, "num_token_union": 73.4146, "num_word_context": 202.4232, "num_word_doc": 49.8506, "num_word_query": 39.7856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2054.5704, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.3963, "queue_k_norm": 1.4608, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1698, "sent_len_1": 66.8431, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1738, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 2.6703, "doc_norm": 1.4591, "encoder_q-embeddings": 2503.5188, "encoder_q-layer.0": 1655.9581, "encoder_q-layer.1": 1700.1619, "encoder_q-layer.10": 1322.3467, "encoder_q-layer.11": 2861.5098, "encoder_q-layer.2": 2060.5813, "encoder_q-layer.3": 2177.5952, "encoder_q-layer.4": 2429.3782, "encoder_q-layer.5": 2744.8062, "encoder_q-layer.6": 2518.1172, "encoder_q-layer.7": 2241.0344, "encoder_q-layer.8": 1684.2645, "encoder_q-layer.9": 1278.3691, "epoch": 0.88, "inbatch_neg_score": 0.2971, "inbatch_pos_score": 1.0195, "learning_rate": 5.388888888888889e-06, "loss": 2.6703, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.9406, "num_token_overlap": 18.0074, "num_token_query": 52.0341, "num_token_union": 73.4615, "num_word_context": 202.3955, "num_word_doc": 49.962, "num_word_query": 39.6555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3271.2553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.4052, "queue_k_norm": 1.4599, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.0341, "sent_len_1": 66.9406, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9963, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.679, "doc_norm": 1.4582, "encoder_q-embeddings": 1020.2768, "encoder_q-layer.0": 673.4888, "encoder_q-layer.1": 757.8648, "encoder_q-layer.10": 1283.0941, "encoder_q-layer.11": 2982.9844, "encoder_q-layer.2": 874.9388, "encoder_q-layer.3": 891.0382, "encoder_q-layer.4": 913.1162, "encoder_q-layer.5": 935.9516, "encoder_q-layer.6": 1088.776, "encoder_q-layer.7": 1159.949, "encoder_q-layer.8": 1357.7405, "encoder_q-layer.9": 1211.9846, "epoch": 0.88, "inbatch_neg_score": 0.2992, "inbatch_pos_score": 1.0361, "learning_rate": 5.333333333333334e-06, "loss": 2.679, "norm_diff": 0.0631, "norm_loss": 0.0, "num_token_doc": 66.8023, "num_token_overlap": 18.0359, "num_token_query": 52.2575, "num_token_union": 73.3852, "num_word_context": 202.055, "num_word_doc": 49.8174, "num_word_query": 39.8371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1953.1729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2986, "query_norm": 1.395, "queue_k_norm": 1.4609, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2575, "sent_len_1": 66.8023, "sent_len_max_0": 127.9975, "sent_len_max_1": 192.4112, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 2.6711, "doc_norm": 1.4599, "encoder_q-embeddings": 965.949, "encoder_q-layer.0": 607.3842, "encoder_q-layer.1": 665.4, "encoder_q-layer.10": 1313.3635, "encoder_q-layer.11": 2895.9504, "encoder_q-layer.2": 744.0903, "encoder_q-layer.3": 792.2404, "encoder_q-layer.4": 822.2435, "encoder_q-layer.5": 831.9801, "encoder_q-layer.6": 973.0751, "encoder_q-layer.7": 1054.7122, "encoder_q-layer.8": 1277.6565, "encoder_q-layer.9": 1192.7839, "epoch": 0.88, "inbatch_neg_score": 0.299, "inbatch_pos_score": 1.0127, "learning_rate": 5.277777777777778e-06, "loss": 2.6711, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.6633, "num_token_overlap": 18.0609, "num_token_query": 52.2959, "num_token_union": 73.4259, "num_word_context": 202.142, "num_word_doc": 49.7905, "num_word_query": 39.8798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1862.7836, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4004, "queue_k_norm": 1.4619, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2959, "sent_len_1": 66.6633, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8575, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6676, "doc_norm": 1.4521, "encoder_q-embeddings": 1353.1967, "encoder_q-layer.0": 868.9963, "encoder_q-layer.1": 969.4456, "encoder_q-layer.10": 1272.9736, "encoder_q-layer.11": 2826.6226, "encoder_q-layer.2": 1109.6899, "encoder_q-layer.3": 1143.8591, "encoder_q-layer.4": 1168.7418, "encoder_q-layer.5": 1222.0461, "encoder_q-layer.6": 1534.1887, "encoder_q-layer.7": 1583.5437, "encoder_q-layer.8": 1565.8525, "encoder_q-layer.9": 1206.2938, "epoch": 0.88, "inbatch_neg_score": 0.2978, "inbatch_pos_score": 1.0293, "learning_rate": 5.2222222222222226e-06, "loss": 2.6676, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.6686, "num_token_overlap": 17.957, "num_token_query": 52.1696, "num_token_union": 73.3567, "num_word_context": 201.8384, "num_word_doc": 49.733, "num_word_query": 39.7634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2220.4537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.4024, "queue_k_norm": 1.4612, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1696, "sent_len_1": 66.6686, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.2125, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6826, "doc_norm": 1.4603, "encoder_q-embeddings": 1083.7233, "encoder_q-layer.0": 677.4987, "encoder_q-layer.1": 751.1269, "encoder_q-layer.10": 1201.6449, "encoder_q-layer.11": 2766.9863, "encoder_q-layer.2": 863.7918, "encoder_q-layer.3": 905.9494, "encoder_q-layer.4": 941.105, "encoder_q-layer.5": 963.6823, "encoder_q-layer.6": 1082.251, "encoder_q-layer.7": 1154.3473, "encoder_q-layer.8": 1303.8677, "encoder_q-layer.9": 1122.8867, "epoch": 0.89, "inbatch_neg_score": 0.3019, "inbatch_pos_score": 1.0537, "learning_rate": 5.166666666666667e-06, "loss": 2.6826, "norm_diff": 0.0592, "norm_loss": 0.0, "num_token_doc": 66.8523, "num_token_overlap": 18.0699, "num_token_query": 52.3634, "num_token_union": 73.5557, "num_word_context": 202.343, "num_word_doc": 49.8784, "num_word_query": 39.9152, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1897.7921, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2998, "query_norm": 1.4011, "queue_k_norm": 1.4612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3634, "sent_len_1": 66.8523, "sent_len_max_0": 128.0, "sent_len_max_1": 189.16, "stdk": 0.0488, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 59.375, "active_queue_size": 16384.0, "cl_loss": 2.691, "doc_norm": 1.4578, "encoder_q-embeddings": 951.5821, "encoder_q-layer.0": 609.2993, "encoder_q-layer.1": 666.4062, "encoder_q-layer.10": 1263.2882, "encoder_q-layer.11": 2930.322, "encoder_q-layer.2": 753.7037, "encoder_q-layer.3": 772.3336, "encoder_q-layer.4": 821.8734, "encoder_q-layer.5": 846.4714, "encoder_q-layer.6": 1001.73, "encoder_q-layer.7": 1062.0261, "encoder_q-layer.8": 1291.4445, "encoder_q-layer.9": 1151.0297, "epoch": 0.89, "inbatch_neg_score": 0.2971, "inbatch_pos_score": 1.0264, "learning_rate": 5.1111111111111115e-06, "loss": 2.691, "norm_diff": 0.0727, "norm_loss": 0.0, "num_token_doc": 66.7264, "num_token_overlap": 18.0262, "num_token_query": 52.3205, "num_token_union": 73.4805, "num_word_context": 202.4602, "num_word_doc": 49.7949, "num_word_query": 39.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1865.8785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.3852, "queue_k_norm": 1.4621, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3205, "sent_len_1": 66.7264, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.4025, "stdk": 0.0488, "stdq": 0.0459, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6813, "doc_norm": 1.4667, "encoder_q-embeddings": 2394.4272, "encoder_q-layer.0": 1603.4186, "encoder_q-layer.1": 1989.233, "encoder_q-layer.10": 1370.7125, "encoder_q-layer.11": 2933.0618, "encoder_q-layer.2": 2589.4121, "encoder_q-layer.3": 3095.4946, "encoder_q-layer.4": 3315.4094, "encoder_q-layer.5": 2819.8857, "encoder_q-layer.6": 3103.095, "encoder_q-layer.7": 2371.9487, "encoder_q-layer.8": 1573.6831, "encoder_q-layer.9": 1233.8116, "epoch": 0.89, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 1.0273, "learning_rate": 5.0555555555555555e-06, "loss": 2.6813, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 67.0982, "num_token_overlap": 18.0514, "num_token_query": 52.2227, "num_token_union": 73.5929, "num_word_context": 202.4225, "num_word_doc": 50.0389, "num_word_query": 39.7944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3675.1831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.3995, "queue_k_norm": 1.461, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2227, "sent_len_1": 67.0982, "sent_len_max_0": 128.0, "sent_len_max_1": 190.46, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 62.5977, "active_queue_size": 16384.0, "cl_loss": 2.6866, "doc_norm": 1.4685, "encoder_q-embeddings": 995.7265, "encoder_q-layer.0": 640.7269, "encoder_q-layer.1": 701.814, "encoder_q-layer.10": 1383.9232, "encoder_q-layer.11": 2861.4868, "encoder_q-layer.2": 810.6072, "encoder_q-layer.3": 823.354, "encoder_q-layer.4": 883.1723, "encoder_q-layer.5": 909.5749, "encoder_q-layer.6": 1025.6671, "encoder_q-layer.7": 1149.99, "encoder_q-layer.8": 1384.7388, "encoder_q-layer.9": 1232.2765, "epoch": 0.89, "inbatch_neg_score": 0.3004, "inbatch_pos_score": 1.0264, "learning_rate": 5e-06, "loss": 2.6866, "norm_diff": 0.072, "norm_loss": 0.0, "num_token_doc": 66.7766, "num_token_overlap": 18.0174, "num_token_query": 52.3983, "num_token_union": 73.5854, "num_word_context": 202.6209, "num_word_doc": 49.8688, "num_word_query": 39.9619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1915.037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2993, "query_norm": 1.3965, "queue_k_norm": 1.4615, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3983, "sent_len_1": 66.7766, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7562, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6827, "doc_norm": 1.4663, "encoder_q-embeddings": 2013.4121, "encoder_q-layer.0": 1221.6742, "encoder_q-layer.1": 1313.5005, "encoder_q-layer.10": 2531.3601, "encoder_q-layer.11": 5906.1792, "encoder_q-layer.2": 1573.6442, "encoder_q-layer.3": 1635.9165, "encoder_q-layer.4": 1803.4515, "encoder_q-layer.5": 1982.2938, "encoder_q-layer.6": 2070.5273, "encoder_q-layer.7": 2232.0144, "encoder_q-layer.8": 2546.1428, "encoder_q-layer.9": 2483.0623, "epoch": 0.89, "inbatch_neg_score": 0.2957, "inbatch_pos_score": 1.041, "learning_rate": 4.9444444444444444e-06, "loss": 2.6827, "norm_diff": 0.076, "norm_loss": 0.0, "num_token_doc": 66.8937, "num_token_overlap": 18.0163, "num_token_query": 52.2878, "num_token_union": 73.5502, "num_word_context": 202.5729, "num_word_doc": 49.9351, "num_word_query": 39.8228, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3824.2376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.3903, "queue_k_norm": 1.4617, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2878, "sent_len_1": 66.8937, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8688, "stdk": 0.0491, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6849, "doc_norm": 1.4582, "encoder_q-embeddings": 3148.8159, "encoder_q-layer.0": 2181.1936, "encoder_q-layer.1": 2404.3569, "encoder_q-layer.10": 2498.0601, "encoder_q-layer.11": 5633.9189, "encoder_q-layer.2": 2769.9946, "encoder_q-layer.3": 2874.7607, "encoder_q-layer.4": 2948.7214, "encoder_q-layer.5": 2892.4475, "encoder_q-layer.6": 3045.3757, "encoder_q-layer.7": 2970.1729, "encoder_q-layer.8": 3294.4282, "encoder_q-layer.9": 2570.5491, "epoch": 0.89, "inbatch_neg_score": 0.2977, "inbatch_pos_score": 1.0166, "learning_rate": 4.888888888888889e-06, "loss": 2.6849, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.7602, "num_token_overlap": 17.9252, "num_token_query": 52.0786, "num_token_union": 73.4021, "num_word_context": 202.1957, "num_word_doc": 49.7811, "num_word_query": 39.7295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4776.3997, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.3922, "queue_k_norm": 1.4631, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0786, "sent_len_1": 66.7602, "sent_len_max_0": 127.99, "sent_len_max_1": 190.9162, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 2.6988, "doc_norm": 1.4627, "encoder_q-embeddings": 3139.8459, "encoder_q-layer.0": 2035.7168, "encoder_q-layer.1": 2284.2417, "encoder_q-layer.10": 2838.8794, "encoder_q-layer.11": 6054.3047, "encoder_q-layer.2": 2765.9661, "encoder_q-layer.3": 2756.7012, "encoder_q-layer.4": 3017.0044, "encoder_q-layer.5": 3035.9773, "encoder_q-layer.6": 3010.032, "encoder_q-layer.7": 3018.9746, "encoder_q-layer.8": 2956.8132, "encoder_q-layer.9": 2501.9561, "epoch": 0.89, "inbatch_neg_score": 0.2956, "inbatch_pos_score": 1.0127, "learning_rate": 4.833333333333333e-06, "loss": 2.6988, "norm_diff": 0.0678, "norm_loss": 0.0, "num_token_doc": 66.8622, "num_token_overlap": 17.9232, "num_token_query": 52.1678, "num_token_union": 73.533, "num_word_context": 202.5993, "num_word_doc": 49.8806, "num_word_query": 39.7614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4841.5389, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.3949, "queue_k_norm": 1.4622, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1678, "sent_len_1": 66.8622, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.67, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6786, "doc_norm": 1.461, "encoder_q-embeddings": 1964.3251, "encoder_q-layer.0": 1214.3553, "encoder_q-layer.1": 1297.1873, "encoder_q-layer.10": 2640.1704, "encoder_q-layer.11": 5895.6904, "encoder_q-layer.2": 1523.7251, "encoder_q-layer.3": 1606.3469, "encoder_q-layer.4": 1669.3612, "encoder_q-layer.5": 1725.7305, "encoder_q-layer.6": 2022.2998, "encoder_q-layer.7": 2168.46, "encoder_q-layer.8": 2618.0037, "encoder_q-layer.9": 2450.4905, "epoch": 0.89, "inbatch_neg_score": 0.2967, "inbatch_pos_score": 1.0469, "learning_rate": 4.777777777777778e-06, "loss": 2.6786, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.9113, "num_token_overlap": 18.0435, "num_token_query": 52.2033, "num_token_union": 73.5112, "num_word_context": 202.2355, "num_word_doc": 49.904, "num_word_query": 39.789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3850.8916, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.4015, "queue_k_norm": 1.4627, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2033, "sent_len_1": 66.9113, "sent_len_max_0": 127.985, "sent_len_max_1": 189.69, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.6975, "doc_norm": 1.4645, "encoder_q-embeddings": 2055.5688, "encoder_q-layer.0": 1282.9966, "encoder_q-layer.1": 1437.8596, "encoder_q-layer.10": 2631.3196, "encoder_q-layer.11": 5807.1699, "encoder_q-layer.2": 1628.3354, "encoder_q-layer.3": 1738.1489, "encoder_q-layer.4": 1907.8101, "encoder_q-layer.5": 1975.3972, "encoder_q-layer.6": 2218.1995, "encoder_q-layer.7": 2285.5166, "encoder_q-layer.8": 2612.0967, "encoder_q-layer.9": 2350.8809, "epoch": 0.89, "inbatch_neg_score": 0.2954, "inbatch_pos_score": 1.0293, "learning_rate": 4.722222222222222e-06, "loss": 2.6975, "norm_diff": 0.0692, "norm_loss": 0.0, "num_token_doc": 66.6779, "num_token_overlap": 17.9767, "num_token_query": 52.1148, "num_token_union": 73.3696, "num_word_context": 202.4994, "num_word_doc": 49.7558, "num_word_query": 39.7318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3909.3132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.3953, "queue_k_norm": 1.4636, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1148, "sent_len_1": 66.6779, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.1587, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6684, "doc_norm": 1.4604, "encoder_q-embeddings": 1923.0593, "encoder_q-layer.0": 1222.7848, "encoder_q-layer.1": 1349.6755, "encoder_q-layer.10": 2616.4763, "encoder_q-layer.11": 5676.0605, "encoder_q-layer.2": 1609.1733, "encoder_q-layer.3": 1634.4617, "encoder_q-layer.4": 1687.6069, "encoder_q-layer.5": 1683.9945, "encoder_q-layer.6": 1926.3118, "encoder_q-layer.7": 2095.1797, "encoder_q-layer.8": 2522.4055, "encoder_q-layer.9": 2404.6626, "epoch": 0.89, "inbatch_neg_score": 0.2973, "inbatch_pos_score": 1.0156, "learning_rate": 4.666666666666667e-06, "loss": 2.6684, "norm_diff": 0.0672, "norm_loss": 0.0, "num_token_doc": 66.6999, "num_token_overlap": 18.0759, "num_token_query": 52.4251, "num_token_union": 73.4899, "num_word_context": 202.442, "num_word_doc": 49.7692, "num_word_query": 39.9986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3731.5916, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.3932, "queue_k_norm": 1.4612, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4251, "sent_len_1": 66.6999, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8438, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.6877, "doc_norm": 1.4664, "encoder_q-embeddings": 1907.3287, "encoder_q-layer.0": 1148.8838, "encoder_q-layer.1": 1260.3517, "encoder_q-layer.10": 2856.1533, "encoder_q-layer.11": 6235.021, "encoder_q-layer.2": 1450.1307, "encoder_q-layer.3": 1509.3359, "encoder_q-layer.4": 1615.4398, "encoder_q-layer.5": 1753.3386, "encoder_q-layer.6": 2047.5693, "encoder_q-layer.7": 2389.5654, "encoder_q-layer.8": 2780.165, "encoder_q-layer.9": 2590.3433, "epoch": 0.9, "inbatch_neg_score": 0.2997, "inbatch_pos_score": 1.0303, "learning_rate": 4.611111111111111e-06, "loss": 2.6877, "norm_diff": 0.0699, "norm_loss": 0.0, "num_token_doc": 66.6355, "num_token_overlap": 17.89, "num_token_query": 52.1288, "num_token_union": 73.4094, "num_word_context": 202.3434, "num_word_doc": 49.7242, "num_word_query": 39.7187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4011.5364, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2976, "query_norm": 1.3965, "queue_k_norm": 1.4627, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1288, "sent_len_1": 66.6355, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.5462, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6939, "doc_norm": 1.462, "encoder_q-embeddings": 2822.6384, "encoder_q-layer.0": 1925.5182, "encoder_q-layer.1": 2271.541, "encoder_q-layer.10": 2638.9092, "encoder_q-layer.11": 5533.6836, "encoder_q-layer.2": 2613.5386, "encoder_q-layer.3": 2720.7944, "encoder_q-layer.4": 2972.6919, "encoder_q-layer.5": 3082.6958, "encoder_q-layer.6": 3125.4507, "encoder_q-layer.7": 3230.3447, "encoder_q-layer.8": 3099.8545, "encoder_q-layer.9": 2481.1167, "epoch": 0.9, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 1.042, "learning_rate": 4.555555555555556e-06, "loss": 2.6939, "norm_diff": 0.0574, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 17.9347, "num_token_query": 52.1089, "num_token_union": 73.4218, "num_word_context": 202.2976, "num_word_doc": 49.8454, "num_word_query": 39.718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4582.6942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3003, "query_norm": 1.4046, "queue_k_norm": 1.4633, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1089, "sent_len_1": 66.7954, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.6475, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.6733, "doc_norm": 1.4612, "encoder_q-embeddings": 946.5847, "encoder_q-layer.0": 608.4721, "encoder_q-layer.1": 651.7028, "encoder_q-layer.10": 1257.2725, "encoder_q-layer.11": 2892.8567, "encoder_q-layer.2": 759.8366, "encoder_q-layer.3": 770.0349, "encoder_q-layer.4": 783.3853, "encoder_q-layer.5": 827.3237, "encoder_q-layer.6": 918.5265, "encoder_q-layer.7": 1003.3991, "encoder_q-layer.8": 1193.4791, "encoder_q-layer.9": 1130.3335, "epoch": 0.9, "inbatch_neg_score": 0.3006, "inbatch_pos_score": 1.0557, "learning_rate": 4.5e-06, "loss": 2.6733, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.4772, "num_token_overlap": 18.021, "num_token_query": 52.4032, "num_token_union": 73.3698, "num_word_context": 202.0528, "num_word_doc": 49.5928, "num_word_query": 39.9782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.7798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4027, "queue_k_norm": 1.4615, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4032, "sent_len_1": 66.4772, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8725, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6837, "doc_norm": 1.4642, "encoder_q-embeddings": 1006.6044, "encoder_q-layer.0": 646.7365, "encoder_q-layer.1": 726.5934, "encoder_q-layer.10": 1412.3289, "encoder_q-layer.11": 3068.3071, "encoder_q-layer.2": 847.7441, "encoder_q-layer.3": 871.6035, "encoder_q-layer.4": 965.6418, "encoder_q-layer.5": 953.8779, "encoder_q-layer.6": 1095.631, "encoder_q-layer.7": 1160.1637, "encoder_q-layer.8": 1420.3042, "encoder_q-layer.9": 1264.722, "epoch": 0.9, "inbatch_neg_score": 0.2973, "inbatch_pos_score": 1.0332, "learning_rate": 4.444444444444445e-06, "loss": 2.6837, "norm_diff": 0.0683, "norm_loss": 0.0, "num_token_doc": 66.7423, "num_token_overlap": 17.9923, "num_token_query": 52.2382, "num_token_union": 73.4173, "num_word_context": 202.087, "num_word_doc": 49.7856, "num_word_query": 39.8157, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1986.2331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2981, "query_norm": 1.3959, "queue_k_norm": 1.4623, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2382, "sent_len_1": 66.7423, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.3038, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 61.7188, "active_queue_size": 16384.0, "cl_loss": 2.706, "doc_norm": 1.4671, "encoder_q-embeddings": 498.6049, "encoder_q-layer.0": 332.6006, "encoder_q-layer.1": 370.6447, "encoder_q-layer.10": 695.0558, "encoder_q-layer.11": 1389.6597, "encoder_q-layer.2": 409.3798, "encoder_q-layer.3": 430.4943, "encoder_q-layer.4": 440.6762, "encoder_q-layer.5": 471.6153, "encoder_q-layer.6": 520.5039, "encoder_q-layer.7": 556.0726, "encoder_q-layer.8": 638.6329, "encoder_q-layer.9": 585.0756, "epoch": 0.9, "inbatch_neg_score": 0.2946, "inbatch_pos_score": 1.0566, "learning_rate": 4.388888888888889e-06, "loss": 2.706, "norm_diff": 0.0703, "norm_loss": 0.0, "num_token_doc": 66.7331, "num_token_overlap": 17.9358, "num_token_query": 52.0869, "num_token_union": 73.3971, "num_word_context": 202.2379, "num_word_doc": 49.7453, "num_word_query": 39.6678, "postclip_grad_norm": 1.0, "preclip_grad_norm": 936.9205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.3968, "queue_k_norm": 1.4627, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0869, "sent_len_1": 66.7331, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.2088, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 62.793, "active_queue_size": 16384.0, "cl_loss": 2.6917, "doc_norm": 1.4571, "encoder_q-embeddings": 527.3842, "encoder_q-layer.0": 338.2496, "encoder_q-layer.1": 375.9722, "encoder_q-layer.10": 662.5986, "encoder_q-layer.11": 1339.261, "encoder_q-layer.2": 439.4574, "encoder_q-layer.3": 453.7116, "encoder_q-layer.4": 486.4167, "encoder_q-layer.5": 498.8152, "encoder_q-layer.6": 552.6912, "encoder_q-layer.7": 558.8635, "encoder_q-layer.8": 720.7628, "encoder_q-layer.9": 607.2452, "epoch": 0.9, "inbatch_neg_score": 0.2991, "inbatch_pos_score": 1.0547, "learning_rate": 4.333333333333334e-06, "loss": 2.6917, "norm_diff": 0.0537, "norm_loss": 0.0, "num_token_doc": 66.7043, "num_token_overlap": 17.9562, "num_token_query": 52.0304, "num_token_union": 73.3354, "num_word_context": 201.8944, "num_word_doc": 49.7333, "num_word_query": 39.6659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 954.9522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2988, "query_norm": 1.4034, "queue_k_norm": 1.4619, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0304, "sent_len_1": 66.7043, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0762, "stdk": 0.0487, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.6967, "doc_norm": 1.4637, "encoder_q-embeddings": 562.3187, "encoder_q-layer.0": 360.3296, "encoder_q-layer.1": 409.3228, "encoder_q-layer.10": 692.488, "encoder_q-layer.11": 1503.9419, "encoder_q-layer.2": 471.5563, "encoder_q-layer.3": 507.2382, "encoder_q-layer.4": 518.2711, "encoder_q-layer.5": 531.8378, "encoder_q-layer.6": 625.8975, "encoder_q-layer.7": 645.1902, "encoder_q-layer.8": 715.7422, "encoder_q-layer.9": 630.2759, "epoch": 0.9, "inbatch_neg_score": 0.2984, "inbatch_pos_score": 1.0107, "learning_rate": 4.277777777777778e-06, "loss": 2.6967, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.7441, "num_token_overlap": 17.9208, "num_token_query": 52.1233, "num_token_union": 73.4031, "num_word_context": 202.1729, "num_word_doc": 49.8148, "num_word_query": 39.7318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1034.448, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2986, "query_norm": 1.3952, "queue_k_norm": 1.4609, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1233, "sent_len_1": 66.7441, "sent_len_max_0": 128.0, "sent_len_max_1": 189.975, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6969, "doc_norm": 1.4587, "encoder_q-embeddings": 6099.4253, "encoder_q-layer.0": 4229.8218, "encoder_q-layer.1": 5279.2803, "encoder_q-layer.10": 679.5453, "encoder_q-layer.11": 1544.8771, "encoder_q-layer.2": 6341.3828, "encoder_q-layer.3": 6448.7954, "encoder_q-layer.4": 6113.167, "encoder_q-layer.5": 4860.188, "encoder_q-layer.6": 3490.8025, "encoder_q-layer.7": 2756.0391, "encoder_q-layer.8": 1714.9849, "encoder_q-layer.9": 722.5194, "epoch": 0.9, "inbatch_neg_score": 0.2999, "inbatch_pos_score": 1.0, "learning_rate": 4.222222222222223e-06, "loss": 2.6969, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.9962, "num_token_overlap": 17.9076, "num_token_query": 51.9483, "num_token_union": 73.5019, "num_word_context": 202.4177, "num_word_doc": 49.9554, "num_word_query": 39.5981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6837.6132, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2988, "query_norm": 1.3864, "queue_k_norm": 1.4635, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 51.9483, "sent_len_1": 66.9962, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.5563, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6895, "doc_norm": 1.4703, "encoder_q-embeddings": 589.3185, "encoder_q-layer.0": 385.6688, "encoder_q-layer.1": 446.7025, "encoder_q-layer.10": 629.3568, "encoder_q-layer.11": 1383.8684, "encoder_q-layer.2": 541.5959, "encoder_q-layer.3": 551.9266, "encoder_q-layer.4": 590.3376, "encoder_q-layer.5": 572.4713, "encoder_q-layer.6": 572.5166, "encoder_q-layer.7": 564.7131, "encoder_q-layer.8": 632.6285, "encoder_q-layer.9": 574.5616, "epoch": 0.9, "inbatch_neg_score": 0.2974, "inbatch_pos_score": 1.0264, "learning_rate": 4.166666666666667e-06, "loss": 2.6895, "norm_diff": 0.0805, "norm_loss": 0.0, "num_token_doc": 66.6553, "num_token_overlap": 17.9593, "num_token_query": 52.1997, "num_token_union": 73.4066, "num_word_context": 201.9686, "num_word_doc": 49.7366, "num_word_query": 39.7804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 986.7435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2979, "query_norm": 1.3898, "queue_k_norm": 1.4607, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1997, "sent_len_1": 66.6553, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9712, "stdk": 0.0493, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6778, "doc_norm": 1.4634, "encoder_q-embeddings": 542.4648, "encoder_q-layer.0": 364.8212, "encoder_q-layer.1": 393.0308, "encoder_q-layer.10": 664.6454, "encoder_q-layer.11": 1499.2914, "encoder_q-layer.2": 465.2858, "encoder_q-layer.3": 500.3476, "encoder_q-layer.4": 521.3937, "encoder_q-layer.5": 526.2469, "encoder_q-layer.6": 612.6599, "encoder_q-layer.7": 632.5939, "encoder_q-layer.8": 694.3786, "encoder_q-layer.9": 623.8557, "epoch": 0.9, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 1.0303, "learning_rate": 4.111111111111112e-06, "loss": 2.6778, "norm_diff": 0.0665, "norm_loss": 0.0, "num_token_doc": 66.7337, "num_token_overlap": 17.9573, "num_token_query": 52.246, "num_token_union": 73.4954, "num_word_context": 201.9717, "num_word_doc": 49.793, "num_word_query": 39.819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1016.0651, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.3969, "queue_k_norm": 1.4629, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.246, "sent_len_1": 66.7337, "sent_len_max_0": 128.0, "sent_len_max_1": 190.135, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 63.9648, "active_queue_size": 16384.0, "cl_loss": 2.6723, "doc_norm": 1.4635, "encoder_q-embeddings": 429.8658, "encoder_q-layer.0": 267.1426, "encoder_q-layer.1": 291.1758, "encoder_q-layer.10": 618.5582, "encoder_q-layer.11": 1424.1118, "encoder_q-layer.2": 326.4517, "encoder_q-layer.3": 341.8995, "encoder_q-layer.4": 377.1483, "encoder_q-layer.5": 375.8171, "encoder_q-layer.6": 444.9409, "encoder_q-layer.7": 531.1757, "encoder_q-layer.8": 645.09, "encoder_q-layer.9": 585.0289, "epoch": 0.91, "inbatch_neg_score": 0.2975, "inbatch_pos_score": 1.0488, "learning_rate": 4.055555555555556e-06, "loss": 2.6723, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.7706, "num_token_overlap": 18.0579, "num_token_query": 52.3868, "num_token_union": 73.5411, "num_word_context": 202.0248, "num_word_doc": 49.8489, "num_word_query": 39.9366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 904.2814, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2966, "query_norm": 1.4009, "queue_k_norm": 1.4626, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3868, "sent_len_1": 66.7706, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7725, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6941, "doc_norm": 1.4614, "encoder_q-embeddings": 479.3031, "encoder_q-layer.0": 315.6202, "encoder_q-layer.1": 342.1584, "encoder_q-layer.10": 652.1685, "encoder_q-layer.11": 1433.903, "encoder_q-layer.2": 400.7509, "encoder_q-layer.3": 402.1357, "encoder_q-layer.4": 409.5775, "encoder_q-layer.5": 428.6736, "encoder_q-layer.6": 485.7873, "encoder_q-layer.7": 549.6085, "encoder_q-layer.8": 646.4675, "encoder_q-layer.9": 594.4556, "epoch": 0.91, "inbatch_neg_score": 0.3005, "inbatch_pos_score": 1.0312, "learning_rate": 4.000000000000001e-06, "loss": 2.6941, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.6332, "num_token_overlap": 17.9391, "num_token_query": 52.1803, "num_token_union": 73.4265, "num_word_context": 202.3513, "num_word_doc": 49.7097, "num_word_query": 39.789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 936.2939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3003, "query_norm": 1.3988, "queue_k_norm": 1.4611, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1803, "sent_len_1": 66.6332, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.25, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.6766, "doc_norm": 1.4647, "encoder_q-embeddings": 627.8555, "encoder_q-layer.0": 418.7037, "encoder_q-layer.1": 450.3427, "encoder_q-layer.10": 654.1379, "encoder_q-layer.11": 1388.1819, "encoder_q-layer.2": 529.4196, "encoder_q-layer.3": 555.716, "encoder_q-layer.4": 586.3263, "encoder_q-layer.5": 606.3682, "encoder_q-layer.6": 628.4451, "encoder_q-layer.7": 645.4348, "encoder_q-layer.8": 631.9298, "encoder_q-layer.9": 565.499, "epoch": 0.91, "inbatch_neg_score": 0.294, "inbatch_pos_score": 1.0391, "learning_rate": 3.944444444444445e-06, "loss": 2.6766, "norm_diff": 0.0662, "norm_loss": 0.0, "num_token_doc": 66.7942, "num_token_overlap": 18.0291, "num_token_query": 52.2718, "num_token_union": 73.4781, "num_word_context": 202.2639, "num_word_doc": 49.834, "num_word_query": 39.8516, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1012.3132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2957, "query_norm": 1.3985, "queue_k_norm": 1.4629, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2718, "sent_len_1": 66.7942, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4638, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 61.8164, "active_queue_size": 16384.0, "cl_loss": 2.6896, "doc_norm": 1.4682, "encoder_q-embeddings": 699.933, "encoder_q-layer.0": 473.2171, "encoder_q-layer.1": 510.239, "encoder_q-layer.10": 627.818, "encoder_q-layer.11": 1400.2271, "encoder_q-layer.2": 588.1845, "encoder_q-layer.3": 672.1273, "encoder_q-layer.4": 735.7301, "encoder_q-layer.5": 845.8722, "encoder_q-layer.6": 777.7631, "encoder_q-layer.7": 678.8604, "encoder_q-layer.8": 668.8727, "encoder_q-layer.9": 626.6864, "epoch": 0.91, "inbatch_neg_score": 0.2959, "inbatch_pos_score": 1.0547, "learning_rate": 3.888888888888889e-06, "loss": 2.6896, "norm_diff": 0.0704, "norm_loss": 0.0, "num_token_doc": 66.8198, "num_token_overlap": 17.9608, "num_token_query": 52.0742, "num_token_union": 73.4187, "num_word_context": 202.2251, "num_word_doc": 49.8516, "num_word_query": 39.7143, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1121.1006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2971, "query_norm": 1.3978, "queue_k_norm": 1.4622, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0742, "sent_len_1": 66.8198, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0188, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6912, "doc_norm": 1.4619, "encoder_q-embeddings": 619.6366, "encoder_q-layer.0": 380.6206, "encoder_q-layer.1": 422.5034, "encoder_q-layer.10": 678.7277, "encoder_q-layer.11": 1490.2832, "encoder_q-layer.2": 482.823, "encoder_q-layer.3": 511.8636, "encoder_q-layer.4": 521.9879, "encoder_q-layer.5": 494.108, "encoder_q-layer.6": 570.1505, "encoder_q-layer.7": 653.3654, "encoder_q-layer.8": 708.9666, "encoder_q-layer.9": 637.129, "epoch": 0.91, "inbatch_neg_score": 0.2972, "inbatch_pos_score": 1.0283, "learning_rate": 3.833333333333334e-06, "loss": 2.6912, "norm_diff": 0.0717, "norm_loss": 0.0, "num_token_doc": 66.8097, "num_token_overlap": 17.9724, "num_token_query": 52.2166, "num_token_union": 73.5207, "num_word_context": 202.1903, "num_word_doc": 49.863, "num_word_query": 39.8033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1048.415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2959, "query_norm": 1.3902, "queue_k_norm": 1.4636, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2166, "sent_len_1": 66.8097, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.945, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6914, "doc_norm": 1.4618, "encoder_q-embeddings": 482.9348, "encoder_q-layer.0": 319.7353, "encoder_q-layer.1": 342.031, "encoder_q-layer.10": 654.5519, "encoder_q-layer.11": 1347.1117, "encoder_q-layer.2": 380.6853, "encoder_q-layer.3": 392.0463, "encoder_q-layer.4": 415.4238, "encoder_q-layer.5": 430.6717, "encoder_q-layer.6": 510.328, "encoder_q-layer.7": 568.5675, "encoder_q-layer.8": 672.598, "encoder_q-layer.9": 587.1897, "epoch": 0.91, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0391, "learning_rate": 3.777777777777778e-06, "loss": 2.6914, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.8242, "num_token_overlap": 17.9714, "num_token_query": 52.0512, "num_token_union": 73.3967, "num_word_context": 202.105, "num_word_doc": 49.8195, "num_word_query": 39.6561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 910.7387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3974, "queue_k_norm": 1.4638, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.0512, "sent_len_1": 66.8242, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.2262, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6773, "doc_norm": 1.4643, "encoder_q-embeddings": 490.1202, "encoder_q-layer.0": 296.2318, "encoder_q-layer.1": 315.3452, "encoder_q-layer.10": 781.0696, "encoder_q-layer.11": 1538.1779, "encoder_q-layer.2": 353.0749, "encoder_q-layer.3": 378.0027, "encoder_q-layer.4": 417.5166, "encoder_q-layer.5": 432.3629, "encoder_q-layer.6": 517.7657, "encoder_q-layer.7": 574.1985, "encoder_q-layer.8": 674.1197, "encoder_q-layer.9": 614.9086, "epoch": 0.91, "inbatch_neg_score": 0.2935, "inbatch_pos_score": 1.043, "learning_rate": 3.722222222222222e-06, "loss": 2.6773, "norm_diff": 0.0651, "norm_loss": 0.0, "num_token_doc": 66.621, "num_token_overlap": 18.0112, "num_token_query": 52.185, "num_token_union": 73.3367, "num_word_context": 202.1444, "num_word_doc": 49.6752, "num_word_query": 39.7847, "postclip_grad_norm": 1.0, "preclip_grad_norm": 981.962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.3992, "queue_k_norm": 1.4643, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.185, "sent_len_1": 66.621, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8175, "stdk": 0.049, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 62.9883, "active_queue_size": 16384.0, "cl_loss": 2.6692, "doc_norm": 1.4658, "encoder_q-embeddings": 553.9524, "encoder_q-layer.0": 370.8161, "encoder_q-layer.1": 422.4052, "encoder_q-layer.10": 779.9997, "encoder_q-layer.11": 1478.9136, "encoder_q-layer.2": 507.5558, "encoder_q-layer.3": 525.517, "encoder_q-layer.4": 559.1332, "encoder_q-layer.5": 540.7095, "encoder_q-layer.6": 622.8672, "encoder_q-layer.7": 657.0191, "encoder_q-layer.8": 684.8749, "encoder_q-layer.9": 596.254, "epoch": 0.91, "inbatch_neg_score": 0.2946, "inbatch_pos_score": 1.0352, "learning_rate": 3.666666666666667e-06, "loss": 2.6692, "norm_diff": 0.075, "norm_loss": 0.0, "num_token_doc": 66.9385, "num_token_overlap": 18.0666, "num_token_query": 52.4334, "num_token_union": 73.6241, "num_word_context": 202.357, "num_word_doc": 49.9342, "num_word_query": 39.9786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1040.5908, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3909, "queue_k_norm": 1.4646, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.4334, "sent_len_1": 66.9385, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8162, "stdk": 0.0491, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.673, "doc_norm": 1.4606, "encoder_q-embeddings": 1362.3245, "encoder_q-layer.0": 1042.3618, "encoder_q-layer.1": 1118.016, "encoder_q-layer.10": 685.6836, "encoder_q-layer.11": 1455.5093, "encoder_q-layer.2": 1377.1221, "encoder_q-layer.3": 1373.3888, "encoder_q-layer.4": 1386.1036, "encoder_q-layer.5": 1393.9696, "encoder_q-layer.6": 1421.9052, "encoder_q-layer.7": 1089.9181, "encoder_q-layer.8": 878.9377, "encoder_q-layer.9": 632.3008, "epoch": 0.91, "inbatch_neg_score": 0.2927, "inbatch_pos_score": 1.0322, "learning_rate": 3.611111111111111e-06, "loss": 2.673, "norm_diff": 0.0716, "norm_loss": 0.0, "num_token_doc": 66.9833, "num_token_overlap": 18.0155, "num_token_query": 52.2395, "num_token_union": 73.6247, "num_word_context": 202.6405, "num_word_doc": 49.9621, "num_word_query": 39.8209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1809.2595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.389, "queue_k_norm": 1.4629, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2395, "sent_len_1": 66.9833, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.3812, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6665, "doc_norm": 1.4662, "encoder_q-embeddings": 496.1821, "encoder_q-layer.0": 332.2993, "encoder_q-layer.1": 356.366, "encoder_q-layer.10": 652.9759, "encoder_q-layer.11": 1461.6624, "encoder_q-layer.2": 408.378, "encoder_q-layer.3": 418.0899, "encoder_q-layer.4": 435.683, "encoder_q-layer.5": 480.8091, "encoder_q-layer.6": 572.9403, "encoder_q-layer.7": 614.6708, "encoder_q-layer.8": 732.9105, "encoder_q-layer.9": 639.8162, "epoch": 0.91, "inbatch_neg_score": 0.2993, "inbatch_pos_score": 1.0264, "learning_rate": 3.555555555555556e-06, "loss": 2.6665, "norm_diff": 0.0705, "norm_loss": 0.0, "num_token_doc": 66.6914, "num_token_overlap": 17.9917, "num_token_query": 52.1162, "num_token_union": 73.3644, "num_word_context": 202.3884, "num_word_doc": 49.7822, "num_word_query": 39.7231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 983.7831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2983, "query_norm": 1.3957, "queue_k_norm": 1.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1162, "sent_len_1": 66.6914, "sent_len_max_0": 127.995, "sent_len_max_1": 188.15, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6776, "doc_norm": 1.4685, "encoder_q-embeddings": 576.6719, "encoder_q-layer.0": 380.3852, "encoder_q-layer.1": 433.4356, "encoder_q-layer.10": 747.4516, "encoder_q-layer.11": 1427.6846, "encoder_q-layer.2": 491.6473, "encoder_q-layer.3": 492.9781, "encoder_q-layer.4": 536.9276, "encoder_q-layer.5": 538.1404, "encoder_q-layer.6": 599.3411, "encoder_q-layer.7": 666.0906, "encoder_q-layer.8": 737.2059, "encoder_q-layer.9": 622.7219, "epoch": 0.91, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0273, "learning_rate": 3.5000000000000004e-06, "loss": 2.6776, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.7888, "num_token_overlap": 18.0224, "num_token_query": 52.2791, "num_token_union": 73.4877, "num_word_context": 202.5695, "num_word_doc": 49.8387, "num_word_query": 39.879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1030.8426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.3929, "queue_k_norm": 1.462, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2791, "sent_len_1": 66.7888, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3787, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 2.6622, "doc_norm": 1.4649, "encoder_q-embeddings": 488.214, "encoder_q-layer.0": 313.2536, "encoder_q-layer.1": 336.6125, "encoder_q-layer.10": 668.5866, "encoder_q-layer.11": 1473.2589, "encoder_q-layer.2": 381.8775, "encoder_q-layer.3": 398.9984, "encoder_q-layer.4": 414.8359, "encoder_q-layer.5": 419.4102, "encoder_q-layer.6": 488.8484, "encoder_q-layer.7": 533.9321, "encoder_q-layer.8": 640.6928, "encoder_q-layer.9": 594.7028, "epoch": 0.92, "inbatch_neg_score": 0.2962, "inbatch_pos_score": 1.0352, "learning_rate": 3.4444444444444444e-06, "loss": 2.6622, "norm_diff": 0.0707, "norm_loss": 0.0, "num_token_doc": 66.901, "num_token_overlap": 18.0418, "num_token_query": 52.307, "num_token_union": 73.5728, "num_word_context": 202.8166, "num_word_doc": 49.9111, "num_word_query": 39.8879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2959, "query_norm": 1.3942, "queue_k_norm": 1.4635, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.307, "sent_len_1": 66.901, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2225, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6689, "doc_norm": 1.4611, "encoder_q-embeddings": 826.858, "encoder_q-layer.0": 534.1141, "encoder_q-layer.1": 643.6742, "encoder_q-layer.10": 689.2502, "encoder_q-layer.11": 1449.3531, "encoder_q-layer.2": 722.1978, "encoder_q-layer.3": 679.5774, "encoder_q-layer.4": 728.8922, "encoder_q-layer.5": 700.3072, "encoder_q-layer.6": 679.8802, "encoder_q-layer.7": 723.7787, "encoder_q-layer.8": 739.0092, "encoder_q-layer.9": 612.2043, "epoch": 0.92, "inbatch_neg_score": 0.2904, "inbatch_pos_score": 1.0449, "learning_rate": 3.3888888888888893e-06, "loss": 2.6689, "norm_diff": 0.0706, "norm_loss": 0.0, "num_token_doc": 66.9052, "num_token_overlap": 18.0785, "num_token_query": 52.4047, "num_token_union": 73.57, "num_word_context": 202.4307, "num_word_doc": 49.9004, "num_word_query": 39.9496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1206.5092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.3904, "queue_k_norm": 1.4637, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.4047, "sent_len_1": 66.9052, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7612, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6751, "doc_norm": 1.4606, "encoder_q-embeddings": 602.6819, "encoder_q-layer.0": 409.4219, "encoder_q-layer.1": 455.5891, "encoder_q-layer.10": 709.0029, "encoder_q-layer.11": 1422.7045, "encoder_q-layer.2": 554.8695, "encoder_q-layer.3": 574.2726, "encoder_q-layer.4": 601.0023, "encoder_q-layer.5": 587.822, "encoder_q-layer.6": 652.7373, "encoder_q-layer.7": 686.0252, "encoder_q-layer.8": 713.5817, "encoder_q-layer.9": 627.7542, "epoch": 0.92, "inbatch_neg_score": 0.2946, "inbatch_pos_score": 1.0254, "learning_rate": 3.3333333333333333e-06, "loss": 2.6751, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.6754, "num_token_overlap": 18.0216, "num_token_query": 52.2001, "num_token_union": 73.3431, "num_word_context": 202.0954, "num_word_doc": 49.7499, "num_word_query": 39.7608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1057.7571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.3883, "queue_k_norm": 1.4618, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2001, "sent_len_1": 66.6754, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8537, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6769, "doc_norm": 1.4593, "encoder_q-embeddings": 1686.235, "encoder_q-layer.0": 1177.9202, "encoder_q-layer.1": 1598.3168, "encoder_q-layer.10": 1264.0852, "encoder_q-layer.11": 2900.2659, "encoder_q-layer.2": 1891.7181, "encoder_q-layer.3": 1612.7711, "encoder_q-layer.4": 1313.9791, "encoder_q-layer.5": 1267.6096, "encoder_q-layer.6": 1345.0385, "encoder_q-layer.7": 1297.0703, "encoder_q-layer.8": 1293.3759, "encoder_q-layer.9": 1156.0466, "epoch": 0.92, "inbatch_neg_score": 0.2965, "inbatch_pos_score": 1.0439, "learning_rate": 3.277777777777778e-06, "loss": 2.6769, "norm_diff": 0.0715, "norm_loss": 0.0, "num_token_doc": 66.9165, "num_token_overlap": 18.0501, "num_token_query": 52.3947, "num_token_union": 73.6064, "num_word_context": 202.5263, "num_word_doc": 49.8931, "num_word_query": 39.9387, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2382.9955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3878, "queue_k_norm": 1.4614, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3947, "sent_len_1": 66.9165, "sent_len_max_0": 128.0, "sent_len_max_1": 190.69, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 2.6641, "doc_norm": 1.4688, "encoder_q-embeddings": 959.007, "encoder_q-layer.0": 600.8491, "encoder_q-layer.1": 646.015, "encoder_q-layer.10": 1587.6748, "encoder_q-layer.11": 3065.9221, "encoder_q-layer.2": 746.1539, "encoder_q-layer.3": 758.5635, "encoder_q-layer.4": 834.1676, "encoder_q-layer.5": 870.9464, "encoder_q-layer.6": 975.0374, "encoder_q-layer.7": 1209.3855, "encoder_q-layer.8": 1388.8806, "encoder_q-layer.9": 1247.12, "epoch": 0.92, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 1.0264, "learning_rate": 3.2222222222222222e-06, "loss": 2.6641, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.7604, "num_token_overlap": 18.007, "num_token_query": 52.3234, "num_token_union": 73.5162, "num_word_context": 202.3926, "num_word_doc": 49.8088, "num_word_query": 39.8906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1968.4601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3932, "queue_k_norm": 1.4611, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3234, "sent_len_1": 66.7604, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.3162, "stdk": 0.0492, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 60.4492, "active_queue_size": 16384.0, "cl_loss": 2.6643, "doc_norm": 1.4632, "encoder_q-embeddings": 1266.7771, "encoder_q-layer.0": 850.7219, "encoder_q-layer.1": 1016.9469, "encoder_q-layer.10": 1271.5718, "encoder_q-layer.11": 2893.2112, "encoder_q-layer.2": 1079.8442, "encoder_q-layer.3": 1059.6207, "encoder_q-layer.4": 1069.5022, "encoder_q-layer.5": 972.1599, "encoder_q-layer.6": 1085.1705, "encoder_q-layer.7": 1141.745, "encoder_q-layer.8": 1276.7455, "encoder_q-layer.9": 1253.0526, "epoch": 0.92, "inbatch_neg_score": 0.2967, "inbatch_pos_score": 1.0391, "learning_rate": 3.166666666666667e-06, "loss": 2.6643, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 67.0376, "num_token_overlap": 18.0441, "num_token_query": 52.2262, "num_token_union": 73.5801, "num_word_context": 202.6353, "num_word_doc": 50.0145, "num_word_query": 39.7912, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2060.2758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2966, "query_norm": 1.3988, "queue_k_norm": 1.4639, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2262, "sent_len_1": 67.0376, "sent_len_max_0": 128.0, "sent_len_max_1": 189.255, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.6886, "doc_norm": 1.4609, "encoder_q-embeddings": 1032.0841, "encoder_q-layer.0": 648.4321, "encoder_q-layer.1": 764.0793, "encoder_q-layer.10": 1350.1396, "encoder_q-layer.11": 2902.5938, "encoder_q-layer.2": 899.3076, "encoder_q-layer.3": 911.0847, "encoder_q-layer.4": 945.8455, "encoder_q-layer.5": 959.5635, "encoder_q-layer.6": 1067.8807, "encoder_q-layer.7": 1131.3478, "encoder_q-layer.8": 1348.2411, "encoder_q-layer.9": 1178.7701, "epoch": 0.92, "inbatch_neg_score": 0.2921, "inbatch_pos_score": 1.0195, "learning_rate": 3.111111111111111e-06, "loss": 2.6886, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.7178, "num_token_overlap": 18.0091, "num_token_query": 52.1708, "num_token_union": 73.3493, "num_word_context": 202.1314, "num_word_doc": 49.7611, "num_word_query": 39.7749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1955.1295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3785, "queue_k_norm": 1.4627, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1708, "sent_len_1": 66.7178, "sent_len_max_0": 128.0, "sent_len_max_1": 190.77, "stdk": 0.0489, "stdq": 0.0458, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 59.082, "active_queue_size": 16384.0, "cl_loss": 2.6708, "doc_norm": 1.4599, "encoder_q-embeddings": 1011.0517, "encoder_q-layer.0": 654.3282, "encoder_q-layer.1": 725.186, "encoder_q-layer.10": 1401.3835, "encoder_q-layer.11": 2858.0347, "encoder_q-layer.2": 831.7761, "encoder_q-layer.3": 854.848, "encoder_q-layer.4": 937.3118, "encoder_q-layer.5": 999.2668, "encoder_q-layer.6": 1138.0625, "encoder_q-layer.7": 1266.0327, "encoder_q-layer.8": 1473.8713, "encoder_q-layer.9": 1316.895, "epoch": 0.92, "inbatch_neg_score": 0.2948, "inbatch_pos_score": 1.0205, "learning_rate": 3.0555555555555556e-06, "loss": 2.6708, "norm_diff": 0.0654, "norm_loss": 0.0, "num_token_doc": 66.9758, "num_token_overlap": 18.083, "num_token_query": 52.3218, "num_token_union": 73.5671, "num_word_context": 202.6404, "num_word_doc": 49.9637, "num_word_query": 39.8734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1981.4514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.3945, "queue_k_norm": 1.4614, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3218, "sent_len_1": 66.9758, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8975, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.6829, "doc_norm": 1.4676, "encoder_q-embeddings": 967.1808, "encoder_q-layer.0": 629.9915, "encoder_q-layer.1": 721.5654, "encoder_q-layer.10": 1298.1057, "encoder_q-layer.11": 2769.3401, "encoder_q-layer.2": 819.0142, "encoder_q-layer.3": 846.5984, "encoder_q-layer.4": 904.1425, "encoder_q-layer.5": 957.1199, "encoder_q-layer.6": 1078.2084, "encoder_q-layer.7": 1176.2693, "encoder_q-layer.8": 1369.6631, "encoder_q-layer.9": 1159.7954, "epoch": 0.92, "inbatch_neg_score": 0.2935, "inbatch_pos_score": 1.042, "learning_rate": 3e-06, "loss": 2.6829, "norm_diff": 0.079, "norm_loss": 0.0, "num_token_doc": 67.0031, "num_token_overlap": 18.0378, "num_token_query": 52.2276, "num_token_union": 73.5077, "num_word_context": 202.5549, "num_word_doc": 49.9959, "num_word_query": 39.7922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1882.8644, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.3886, "queue_k_norm": 1.4623, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2276, "sent_len_1": 67.0031, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2163, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6784, "doc_norm": 1.4671, "encoder_q-embeddings": 613.5475, "encoder_q-layer.0": 392.1582, "encoder_q-layer.1": 413.1991, "encoder_q-layer.10": 638.2197, "encoder_q-layer.11": 1425.1383, "encoder_q-layer.2": 491.8859, "encoder_q-layer.3": 502.1255, "encoder_q-layer.4": 558.4203, "encoder_q-layer.5": 568.2591, "encoder_q-layer.6": 582.8582, "encoder_q-layer.7": 636.1725, "encoder_q-layer.8": 660.8021, "encoder_q-layer.9": 604.7634, "epoch": 0.92, "inbatch_neg_score": 0.2939, "inbatch_pos_score": 1.0322, "learning_rate": 2.9444444444444445e-06, "loss": 2.6784, "norm_diff": 0.0717, "norm_loss": 0.0, "num_token_doc": 66.5879, "num_token_overlap": 17.9441, "num_token_query": 52.2129, "num_token_union": 73.403, "num_word_context": 202.2519, "num_word_doc": 49.7054, "num_word_query": 39.7762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1031.1003, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3954, "queue_k_norm": 1.4621, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2129, "sent_len_1": 66.5879, "sent_len_max_0": 127.985, "sent_len_max_1": 187.77, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.6766, "doc_norm": 1.4656, "encoder_q-embeddings": 540.0573, "encoder_q-layer.0": 367.9136, "encoder_q-layer.1": 389.3563, "encoder_q-layer.10": 641.2745, "encoder_q-layer.11": 1441.7144, "encoder_q-layer.2": 460.6532, "encoder_q-layer.3": 478.0671, "encoder_q-layer.4": 528.9778, "encoder_q-layer.5": 536.6047, "encoder_q-layer.6": 549.021, "encoder_q-layer.7": 585.3755, "encoder_q-layer.8": 668.2262, "encoder_q-layer.9": 613.9857, "epoch": 0.93, "inbatch_neg_score": 0.294, "inbatch_pos_score": 1.0244, "learning_rate": 2.888888888888889e-06, "loss": 2.6766, "norm_diff": 0.0734, "norm_loss": 0.0, "num_token_doc": 66.8777, "num_token_overlap": 17.9979, "num_token_query": 52.1662, "num_token_union": 73.4968, "num_word_context": 202.2698, "num_word_doc": 49.9404, "num_word_query": 39.7626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1002.6485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.3922, "queue_k_norm": 1.4625, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1662, "sent_len_1": 66.8777, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3887, "stdk": 0.0491, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 2.6853, "doc_norm": 1.461, "encoder_q-embeddings": 460.5786, "encoder_q-layer.0": 292.5373, "encoder_q-layer.1": 323.0, "encoder_q-layer.10": 629.2478, "encoder_q-layer.11": 1418.399, "encoder_q-layer.2": 357.746, "encoder_q-layer.3": 363.351, "encoder_q-layer.4": 401.8438, "encoder_q-layer.5": 415.9611, "encoder_q-layer.6": 476.5589, "encoder_q-layer.7": 534.6715, "encoder_q-layer.8": 611.6918, "encoder_q-layer.9": 574.4408, "epoch": 0.93, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 1.0195, "learning_rate": 2.8333333333333335e-06, "loss": 2.6853, "norm_diff": 0.0673, "norm_loss": 0.0, "num_token_doc": 66.7563, "num_token_overlap": 17.9596, "num_token_query": 52.0991, "num_token_union": 73.4272, "num_word_context": 202.3672, "num_word_doc": 49.8055, "num_word_query": 39.7188, "postclip_grad_norm": 1.0, "preclip_grad_norm": 909.5418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3938, "queue_k_norm": 1.463, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.0991, "sent_len_1": 66.7563, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5112, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.6745, "doc_norm": 1.4651, "encoder_q-embeddings": 908.658, "encoder_q-layer.0": 623.9807, "encoder_q-layer.1": 711.4066, "encoder_q-layer.10": 693.9034, "encoder_q-layer.11": 1439.7197, "encoder_q-layer.2": 899.2953, "encoder_q-layer.3": 962.2601, "encoder_q-layer.4": 1008.447, "encoder_q-layer.5": 984.9819, "encoder_q-layer.6": 1017.2994, "encoder_q-layer.7": 1013.7802, "encoder_q-layer.8": 926.7043, "encoder_q-layer.9": 680.3246, "epoch": 0.93, "inbatch_neg_score": 0.2923, "inbatch_pos_score": 1.0664, "learning_rate": 2.777777777777778e-06, "loss": 2.6745, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.9432, "num_token_overlap": 18.0561, "num_token_query": 52.2924, "num_token_union": 73.5194, "num_word_context": 202.5911, "num_word_doc": 49.933, "num_word_query": 39.8804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1418.571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.4048, "queue_k_norm": 1.4628, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2924, "sent_len_1": 66.9432, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9812, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 60.8398, "active_queue_size": 16384.0, "cl_loss": 2.6837, "doc_norm": 1.4582, "encoder_q-embeddings": 596.3871, "encoder_q-layer.0": 388.6646, "encoder_q-layer.1": 427.4365, "encoder_q-layer.10": 653.6951, "encoder_q-layer.11": 1436.6426, "encoder_q-layer.2": 503.824, "encoder_q-layer.3": 524.5083, "encoder_q-layer.4": 576.3913, "encoder_q-layer.5": 529.8224, "encoder_q-layer.6": 539.7457, "encoder_q-layer.7": 573.5679, "encoder_q-layer.8": 673.1915, "encoder_q-layer.9": 614.5276, "epoch": 0.93, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0283, "learning_rate": 2.7222222222222224e-06, "loss": 2.6837, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 67.0156, "num_token_overlap": 18.1038, "num_token_query": 52.3623, "num_token_union": 73.5808, "num_word_context": 202.8286, "num_word_doc": 49.9976, "num_word_query": 39.9244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1033.5197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3993, "queue_k_norm": 1.4616, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3623, "sent_len_1": 67.0156, "sent_len_max_0": 128.0, "sent_len_max_1": 190.03, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6722, "doc_norm": 1.4624, "encoder_q-embeddings": 571.0725, "encoder_q-layer.0": 363.895, "encoder_q-layer.1": 390.3107, "encoder_q-layer.10": 631.2098, "encoder_q-layer.11": 1436.9169, "encoder_q-layer.2": 453.7603, "encoder_q-layer.3": 498.2148, "encoder_q-layer.4": 520.5739, "encoder_q-layer.5": 583.0646, "encoder_q-layer.6": 653.6924, "encoder_q-layer.7": 683.1267, "encoder_q-layer.8": 671.3766, "encoder_q-layer.9": 579.0128, "epoch": 0.93, "inbatch_neg_score": 0.2966, "inbatch_pos_score": 1.0391, "learning_rate": 2.666666666666667e-06, "loss": 2.6722, "norm_diff": 0.0687, "norm_loss": 0.0, "num_token_doc": 66.8107, "num_token_overlap": 17.9982, "num_token_query": 52.2613, "num_token_union": 73.4935, "num_word_context": 202.2082, "num_word_doc": 49.8814, "num_word_query": 39.8388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1027.2044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3937, "queue_k_norm": 1.4617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2613, "sent_len_1": 66.8107, "sent_len_max_0": 127.9925, "sent_len_max_1": 187.5662, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6668, "doc_norm": 1.4621, "encoder_q-embeddings": 1444.9857, "encoder_q-layer.0": 1116.5685, "encoder_q-layer.1": 1233.1566, "encoder_q-layer.10": 612.9226, "encoder_q-layer.11": 1371.3774, "encoder_q-layer.2": 1364.8676, "encoder_q-layer.3": 1379.1936, "encoder_q-layer.4": 1580.4288, "encoder_q-layer.5": 1547.7434, "encoder_q-layer.6": 1568.9104, "encoder_q-layer.7": 955.5046, "encoder_q-layer.8": 727.822, "encoder_q-layer.9": 603.3644, "epoch": 0.93, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 1.0537, "learning_rate": 2.6111111111111113e-06, "loss": 2.6668, "norm_diff": 0.0568, "norm_loss": 0.0, "num_token_doc": 66.8844, "num_token_overlap": 18.0469, "num_token_query": 52.2128, "num_token_union": 73.4637, "num_word_context": 202.3179, "num_word_doc": 49.9295, "num_word_query": 39.7753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1886.8969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.4053, "queue_k_norm": 1.4647, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2128, "sent_len_1": 66.8844, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1312, "stdk": 0.049, "stdq": 0.0469, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6756, "doc_norm": 1.4672, "encoder_q-embeddings": 532.2432, "encoder_q-layer.0": 346.9761, "encoder_q-layer.1": 377.6632, "encoder_q-layer.10": 681.8433, "encoder_q-layer.11": 1399.772, "encoder_q-layer.2": 430.2957, "encoder_q-layer.3": 436.2282, "encoder_q-layer.4": 467.24, "encoder_q-layer.5": 489.0155, "encoder_q-layer.6": 545.4773, "encoder_q-layer.7": 646.5544, "encoder_q-layer.8": 720.8448, "encoder_q-layer.9": 608.5768, "epoch": 0.93, "inbatch_neg_score": 0.2938, "inbatch_pos_score": 1.0508, "learning_rate": 2.5555555555555557e-06, "loss": 2.6756, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.9574, "num_token_overlap": 17.9664, "num_token_query": 52.2048, "num_token_union": 73.6109, "num_word_context": 202.3577, "num_word_doc": 49.9623, "num_word_query": 39.78, "postclip_grad_norm": 1.0, "preclip_grad_norm": 988.7575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.4137, "queue_k_norm": 1.4648, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2048, "sent_len_1": 66.9574, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3462, "stdk": 0.0492, "stdq": 0.0472, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.682, "doc_norm": 1.4614, "encoder_q-embeddings": 468.7096, "encoder_q-layer.0": 293.7401, "encoder_q-layer.1": 316.3052, "encoder_q-layer.10": 694.9209, "encoder_q-layer.11": 1425.2737, "encoder_q-layer.2": 360.4814, "encoder_q-layer.3": 375.7078, "encoder_q-layer.4": 389.6968, "encoder_q-layer.5": 405.6172, "encoder_q-layer.6": 485.0743, "encoder_q-layer.7": 536.5557, "encoder_q-layer.8": 624.9043, "encoder_q-layer.9": 583.6807, "epoch": 0.93, "inbatch_neg_score": 0.2928, "inbatch_pos_score": 1.041, "learning_rate": 2.5e-06, "loss": 2.682, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.9849, "num_token_overlap": 18.0039, "num_token_query": 52.2323, "num_token_union": 73.5753, "num_word_context": 202.2562, "num_word_doc": 49.9551, "num_word_query": 39.8356, "postclip_grad_norm": 1.0, "preclip_grad_norm": 919.6956, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3929, "queue_k_norm": 1.4625, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2323, "sent_len_1": 66.9849, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4925, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 2.671, "doc_norm": 1.4604, "encoder_q-embeddings": 464.3222, "encoder_q-layer.0": 298.9139, "encoder_q-layer.1": 326.445, "encoder_q-layer.10": 661.7235, "encoder_q-layer.11": 1447.3573, "encoder_q-layer.2": 379.8297, "encoder_q-layer.3": 381.8566, "encoder_q-layer.4": 400.6309, "encoder_q-layer.5": 401.6145, "encoder_q-layer.6": 468.9849, "encoder_q-layer.7": 543.8972, "encoder_q-layer.8": 649.1401, "encoder_q-layer.9": 586.3066, "epoch": 0.93, "inbatch_neg_score": 0.2936, "inbatch_pos_score": 1.002, "learning_rate": 2.4444444444444447e-06, "loss": 2.671, "norm_diff": 0.0852, "norm_loss": 0.0, "num_token_doc": 67.0185, "num_token_overlap": 18.0653, "num_token_query": 52.3726, "num_token_union": 73.6034, "num_word_context": 202.4015, "num_word_doc": 50.0725, "num_word_query": 39.9562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 937.2386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3752, "queue_k_norm": 1.4621, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.3726, "sent_len_1": 67.0185, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7088, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.6785, "doc_norm": 1.4587, "encoder_q-embeddings": 802.0989, "encoder_q-layer.0": 577.9037, "encoder_q-layer.1": 680.3068, "encoder_q-layer.10": 668.6329, "encoder_q-layer.11": 1421.0566, "encoder_q-layer.2": 783.4004, "encoder_q-layer.3": 812.9296, "encoder_q-layer.4": 793.3793, "encoder_q-layer.5": 653.5498, "encoder_q-layer.6": 640.3107, "encoder_q-layer.7": 593.0281, "encoder_q-layer.8": 654.1261, "encoder_q-layer.9": 573.0065, "epoch": 0.93, "inbatch_neg_score": 0.2905, "inbatch_pos_score": 1.0332, "learning_rate": 2.388888888888889e-06, "loss": 2.6785, "norm_diff": 0.0594, "norm_loss": 0.0, "num_token_doc": 67.0398, "num_token_overlap": 18.0482, "num_token_query": 52.2431, "num_token_union": 73.5766, "num_word_context": 202.6225, "num_word_doc": 49.9773, "num_word_query": 39.8183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1176.2686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.3993, "queue_k_norm": 1.4635, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2431, "sent_len_1": 67.0398, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1712, "stdk": 0.0489, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 62.3047, "active_queue_size": 16384.0, "cl_loss": 2.6789, "doc_norm": 1.4599, "encoder_q-embeddings": 521.1093, "encoder_q-layer.0": 341.974, "encoder_q-layer.1": 372.9912, "encoder_q-layer.10": 660.7462, "encoder_q-layer.11": 1427.0691, "encoder_q-layer.2": 412.3977, "encoder_q-layer.3": 426.2364, "encoder_q-layer.4": 466.6061, "encoder_q-layer.5": 468.4937, "encoder_q-layer.6": 484.6801, "encoder_q-layer.7": 564.9056, "encoder_q-layer.8": 665.0741, "encoder_q-layer.9": 630.34, "epoch": 0.94, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 1.0322, "learning_rate": 2.3333333333333336e-06, "loss": 2.6789, "norm_diff": 0.0735, "norm_loss": 0.0, "num_token_doc": 66.9867, "num_token_overlap": 17.968, "num_token_query": 52.2466, "num_token_union": 73.627, "num_word_context": 202.6797, "num_word_doc": 49.9487, "num_word_query": 39.804, "postclip_grad_norm": 1.0, "preclip_grad_norm": 954.9955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2922, "query_norm": 1.3864, "queue_k_norm": 1.4614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2466, "sent_len_1": 66.9867, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3313, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6808, "doc_norm": 1.4602, "encoder_q-embeddings": 639.1579, "encoder_q-layer.0": 417.0497, "encoder_q-layer.1": 467.3226, "encoder_q-layer.10": 638.5448, "encoder_q-layer.11": 1387.4457, "encoder_q-layer.2": 556.4135, "encoder_q-layer.3": 559.4019, "encoder_q-layer.4": 585.1255, "encoder_q-layer.5": 579.4646, "encoder_q-layer.6": 631.9701, "encoder_q-layer.7": 617.0291, "encoder_q-layer.8": 642.0582, "encoder_q-layer.9": 580.0287, "epoch": 0.94, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 1.0068, "learning_rate": 2.277777777777778e-06, "loss": 2.6808, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.6579, "num_token_overlap": 18.0022, "num_token_query": 52.1455, "num_token_union": 73.3251, "num_word_context": 202.0095, "num_word_doc": 49.7378, "num_word_query": 39.7434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1027.2276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.3846, "queue_k_norm": 1.4617, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1455, "sent_len_1": 66.6579, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.0462, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6884, "doc_norm": 1.4671, "encoder_q-embeddings": 623.4911, "encoder_q-layer.0": 394.6723, "encoder_q-layer.1": 438.4126, "encoder_q-layer.10": 647.499, "encoder_q-layer.11": 1462.6302, "encoder_q-layer.2": 520.6369, "encoder_q-layer.3": 550.2752, "encoder_q-layer.4": 579.918, "encoder_q-layer.5": 619.2946, "encoder_q-layer.6": 588.7919, "encoder_q-layer.7": 600.7115, "encoder_q-layer.8": 699.1736, "encoder_q-layer.9": 593.5333, "epoch": 0.94, "inbatch_neg_score": 0.2932, "inbatch_pos_score": 1.0098, "learning_rate": 2.2222222222222225e-06, "loss": 2.6884, "norm_diff": 0.0836, "norm_loss": 0.0, "num_token_doc": 66.9431, "num_token_overlap": 18.0274, "num_token_query": 52.1736, "num_token_union": 73.5046, "num_word_context": 202.2939, "num_word_doc": 49.957, "num_word_query": 39.7805, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1054.9293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3834, "queue_k_norm": 1.4613, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1736, "sent_len_1": 66.9431, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5112, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.6779, "doc_norm": 1.4597, "encoder_q-embeddings": 630.3865, "encoder_q-layer.0": 412.2393, "encoder_q-layer.1": 472.7343, "encoder_q-layer.10": 723.2349, "encoder_q-layer.11": 1559.9878, "encoder_q-layer.2": 554.6567, "encoder_q-layer.3": 623.4203, "encoder_q-layer.4": 649.7925, "encoder_q-layer.5": 615.9805, "encoder_q-layer.6": 640.0336, "encoder_q-layer.7": 678.3284, "encoder_q-layer.8": 765.8105, "encoder_q-layer.9": 674.881, "epoch": 0.94, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 1.0098, "learning_rate": 2.166666666666667e-06, "loss": 2.6779, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 67.0537, "num_token_overlap": 18.043, "num_token_query": 52.2527, "num_token_union": 73.6217, "num_word_context": 202.6943, "num_word_doc": 50.0608, "num_word_query": 39.8532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1106.42, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.3819, "queue_k_norm": 1.4611, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2527, "sent_len_1": 67.0537, "sent_len_max_0": 127.995, "sent_len_max_1": 191.4462, "stdk": 0.0488, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 64.3555, "active_queue_size": 16384.0, "cl_loss": 2.682, "doc_norm": 1.4585, "encoder_q-embeddings": 850.213, "encoder_q-layer.0": 632.8358, "encoder_q-layer.1": 716.3987, "encoder_q-layer.10": 615.7058, "encoder_q-layer.11": 1350.9751, "encoder_q-layer.2": 831.6014, "encoder_q-layer.3": 858.6735, "encoder_q-layer.4": 943.5815, "encoder_q-layer.5": 951.4318, "encoder_q-layer.6": 926.5687, "encoder_q-layer.7": 625.5762, "encoder_q-layer.8": 637.3669, "encoder_q-layer.9": 582.1006, "epoch": 0.94, "inbatch_neg_score": 0.2929, "inbatch_pos_score": 1.0518, "learning_rate": 2.1111111111111114e-06, "loss": 2.682, "norm_diff": 0.0616, "norm_loss": 0.0, "num_token_doc": 66.666, "num_token_overlap": 17.9944, "num_token_query": 52.3205, "num_token_union": 73.4722, "num_word_context": 202.1405, "num_word_doc": 49.7497, "num_word_query": 39.8856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1248.9218, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.3969, "queue_k_norm": 1.46, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3205, "sent_len_1": 66.666, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3088, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 2.6791, "doc_norm": 1.4586, "encoder_q-embeddings": 555.4592, "encoder_q-layer.0": 351.7331, "encoder_q-layer.1": 383.1423, "encoder_q-layer.10": 680.6677, "encoder_q-layer.11": 1417.162, "encoder_q-layer.2": 445.865, "encoder_q-layer.3": 459.0539, "encoder_q-layer.4": 492.6378, "encoder_q-layer.5": 512.402, "encoder_q-layer.6": 543.0223, "encoder_q-layer.7": 572.2935, "encoder_q-layer.8": 649.4377, "encoder_q-layer.9": 621.996, "epoch": 0.94, "inbatch_neg_score": 0.2951, "inbatch_pos_score": 1.0254, "learning_rate": 2.055555555555556e-06, "loss": 2.6791, "norm_diff": 0.0578, "norm_loss": 0.0, "num_token_doc": 67.1126, "num_token_overlap": 18.0751, "num_token_query": 52.2499, "num_token_union": 73.6121, "num_word_context": 202.5757, "num_word_doc": 50.0678, "num_word_query": 39.8536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 976.2583, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.4008, "queue_k_norm": 1.4621, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2499, "sent_len_1": 67.1126, "sent_len_max_0": 127.9925, "sent_len_max_1": 192.2725, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 2.6954, "doc_norm": 1.4621, "encoder_q-embeddings": 480.9131, "encoder_q-layer.0": 307.7339, "encoder_q-layer.1": 332.7288, "encoder_q-layer.10": 608.834, "encoder_q-layer.11": 1347.3162, "encoder_q-layer.2": 386.7944, "encoder_q-layer.3": 397.6008, "encoder_q-layer.4": 432.4493, "encoder_q-layer.5": 446.6678, "encoder_q-layer.6": 502.3412, "encoder_q-layer.7": 540.0268, "encoder_q-layer.8": 657.7051, "encoder_q-layer.9": 579.7376, "epoch": 0.94, "inbatch_neg_score": 0.2919, "inbatch_pos_score": 1.0186, "learning_rate": 2.0000000000000003e-06, "loss": 2.6954, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 66.7503, "num_token_overlap": 18.0079, "num_token_query": 52.2502, "num_token_union": 73.4862, "num_word_context": 202.3093, "num_word_doc": 49.7815, "num_word_query": 39.8348, "postclip_grad_norm": 1.0, "preclip_grad_norm": 906.4895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2915, "query_norm": 1.385, "queue_k_norm": 1.4605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2502, "sent_len_1": 66.7503, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5213, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 2.681, "doc_norm": 1.4638, "encoder_q-embeddings": 600.5544, "encoder_q-layer.0": 387.7497, "encoder_q-layer.1": 438.3459, "encoder_q-layer.10": 705.8203, "encoder_q-layer.11": 1483.2112, "encoder_q-layer.2": 532.9064, "encoder_q-layer.3": 499.4534, "encoder_q-layer.4": 514.3624, "encoder_q-layer.5": 509.5029, "encoder_q-layer.6": 559.4295, "encoder_q-layer.7": 601.2238, "encoder_q-layer.8": 749.2823, "encoder_q-layer.9": 659.8158, "epoch": 0.94, "inbatch_neg_score": 0.2939, "inbatch_pos_score": 1.0039, "learning_rate": 1.9444444444444444e-06, "loss": 2.681, "norm_diff": 0.0736, "norm_loss": 0.0, "num_token_doc": 66.86, "num_token_overlap": 18.0217, "num_token_query": 52.1767, "num_token_union": 73.4853, "num_word_context": 202.3131, "num_word_doc": 49.8533, "num_word_query": 39.7623, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1043.134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.3902, "queue_k_norm": 1.4603, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1767, "sent_len_1": 66.86, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0163, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6733, "doc_norm": 1.4699, "encoder_q-embeddings": 516.9339, "encoder_q-layer.0": 319.8056, "encoder_q-layer.1": 347.6133, "encoder_q-layer.10": 666.5712, "encoder_q-layer.11": 1454.1637, "encoder_q-layer.2": 403.2359, "encoder_q-layer.3": 438.2432, "encoder_q-layer.4": 473.025, "encoder_q-layer.5": 508.7362, "encoder_q-layer.6": 536.7388, "encoder_q-layer.7": 610.1105, "encoder_q-layer.8": 712.9571, "encoder_q-layer.9": 616.7828, "epoch": 0.94, "inbatch_neg_score": 0.292, "inbatch_pos_score": 1.04, "learning_rate": 1.888888888888889e-06, "loss": 2.6733, "norm_diff": 0.0705, "norm_loss": 0.0, "num_token_doc": 66.9328, "num_token_overlap": 18.0042, "num_token_query": 52.2841, "num_token_union": 73.5868, "num_word_context": 202.3997, "num_word_doc": 49.9206, "num_word_query": 39.8423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 994.0148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.293, "query_norm": 1.3994, "queue_k_norm": 1.4621, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2841, "sent_len_1": 66.9328, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2663, "stdk": 0.0493, "stdq": 0.0467, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6851, "doc_norm": 1.4551, "encoder_q-embeddings": 3949.8794, "encoder_q-layer.0": 2850.3716, "encoder_q-layer.1": 3244.033, "encoder_q-layer.10": 1219.7542, "encoder_q-layer.11": 2811.7634, "encoder_q-layer.2": 4130.0913, "encoder_q-layer.3": 5187.7832, "encoder_q-layer.4": 5507.4673, "encoder_q-layer.5": 5467.2456, "encoder_q-layer.6": 5725.8335, "encoder_q-layer.7": 4669.7993, "encoder_q-layer.8": 2552.0725, "encoder_q-layer.9": 1333.1132, "epoch": 0.94, "inbatch_neg_score": 0.2914, "inbatch_pos_score": 1.0264, "learning_rate": 1.8333333333333335e-06, "loss": 2.6851, "norm_diff": 0.0728, "norm_loss": 0.0, "num_token_doc": 66.7983, "num_token_overlap": 18.0211, "num_token_query": 52.3251, "num_token_union": 73.4958, "num_word_context": 202.4025, "num_word_doc": 49.8747, "num_word_query": 39.9124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5986.9925, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2917, "query_norm": 1.3823, "queue_k_norm": 1.4618, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3251, "sent_len_1": 66.7983, "sent_len_max_0": 127.9975, "sent_len_max_1": 192.86, "stdk": 0.0487, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 63.1836, "active_queue_size": 16384.0, "cl_loss": 2.6729, "doc_norm": 1.4572, "encoder_q-embeddings": 923.7771, "encoder_q-layer.0": 587.7529, "encoder_q-layer.1": 622.5061, "encoder_q-layer.10": 1183.7393, "encoder_q-layer.11": 2661.5049, "encoder_q-layer.2": 711.059, "encoder_q-layer.3": 745.665, "encoder_q-layer.4": 803.7825, "encoder_q-layer.5": 835.9411, "encoder_q-layer.6": 937.1639, "encoder_q-layer.7": 1057.63, "encoder_q-layer.8": 1295.1226, "encoder_q-layer.9": 1140.9545, "epoch": 0.95, "inbatch_neg_score": 0.2918, "inbatch_pos_score": 1.0322, "learning_rate": 1.777777777777778e-06, "loss": 2.6729, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.8443, "num_token_overlap": 18.0454, "num_token_query": 52.1885, "num_token_union": 73.449, "num_word_context": 202.0967, "num_word_doc": 49.8073, "num_word_query": 39.7963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1785.1358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3959, "queue_k_norm": 1.4617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1885, "sent_len_1": 66.8443, "sent_len_max_0": 127.995, "sent_len_max_1": 191.0613, "stdk": 0.0488, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 2.676, "doc_norm": 1.4677, "encoder_q-embeddings": 936.5209, "encoder_q-layer.0": 598.4082, "encoder_q-layer.1": 658.4313, "encoder_q-layer.10": 1199.5917, "encoder_q-layer.11": 2811.3608, "encoder_q-layer.2": 754.4239, "encoder_q-layer.3": 799.9045, "encoder_q-layer.4": 864.2109, "encoder_q-layer.5": 868.531, "encoder_q-layer.6": 988.1109, "encoder_q-layer.7": 1104.6787, "encoder_q-layer.8": 1323.5828, "encoder_q-layer.9": 1150.3564, "epoch": 0.95, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 1.0342, "learning_rate": 1.7222222222222222e-06, "loss": 2.676, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.9437, "num_token_overlap": 18.0093, "num_token_query": 52.1752, "num_token_union": 73.4994, "num_word_context": 202.4702, "num_word_doc": 49.9408, "num_word_query": 39.7098, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1855.0011, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.3965, "queue_k_norm": 1.4626, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1752, "sent_len_1": 66.9437, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5525, "stdk": 0.0492, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 59.9609, "active_queue_size": 16384.0, "cl_loss": 2.6855, "doc_norm": 1.4586, "encoder_q-embeddings": 965.9597, "encoder_q-layer.0": 609.7569, "encoder_q-layer.1": 666.0319, "encoder_q-layer.10": 1383.9749, "encoder_q-layer.11": 2946.0127, "encoder_q-layer.2": 757.6208, "encoder_q-layer.3": 773.3979, "encoder_q-layer.4": 844.0496, "encoder_q-layer.5": 891.2953, "encoder_q-layer.6": 1037.4091, "encoder_q-layer.7": 1167.813, "encoder_q-layer.8": 1332.624, "encoder_q-layer.9": 1200.3655, "epoch": 0.95, "inbatch_neg_score": 0.2948, "inbatch_pos_score": 1.0146, "learning_rate": 1.6666666666666667e-06, "loss": 2.6855, "norm_diff": 0.0657, "norm_loss": 0.0, "num_token_doc": 66.5693, "num_token_overlap": 17.9781, "num_token_query": 52.1875, "num_token_union": 73.3136, "num_word_context": 202.0206, "num_word_doc": 49.6669, "num_word_query": 39.7577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.49, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.3928, "queue_k_norm": 1.4613, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1875, "sent_len_1": 66.5693, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.7837, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 62.5, "active_queue_size": 16384.0, "cl_loss": 2.6637, "doc_norm": 1.4665, "encoder_q-embeddings": 1011.6594, "encoder_q-layer.0": 632.8521, "encoder_q-layer.1": 682.1323, "encoder_q-layer.10": 1288.8623, "encoder_q-layer.11": 2862.3379, "encoder_q-layer.2": 755.9846, "encoder_q-layer.3": 797.2198, "encoder_q-layer.4": 847.601, "encoder_q-layer.5": 873.95, "encoder_q-layer.6": 1036.2417, "encoder_q-layer.7": 1185.0997, "encoder_q-layer.8": 1417.9203, "encoder_q-layer.9": 1268.1422, "epoch": 0.95, "inbatch_neg_score": 0.2968, "inbatch_pos_score": 1.0547, "learning_rate": 1.6111111111111111e-06, "loss": 2.6637, "norm_diff": 0.064, "norm_loss": 0.0, "num_token_doc": 66.9438, "num_token_overlap": 18.0963, "num_token_query": 52.5096, "num_token_union": 73.5755, "num_word_context": 202.2752, "num_word_doc": 49.9183, "num_word_query": 40.06, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1938.0998, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.4025, "queue_k_norm": 1.4622, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.5096, "sent_len_1": 66.9438, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6738, "stdk": 0.0491, "stdq": 0.0468, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 61.6211, "active_queue_size": 16384.0, "cl_loss": 2.6525, "doc_norm": 1.4684, "encoder_q-embeddings": 1002.626, "encoder_q-layer.0": 677.6898, "encoder_q-layer.1": 755.6426, "encoder_q-layer.10": 1287.1093, "encoder_q-layer.11": 2840.1724, "encoder_q-layer.2": 859.8462, "encoder_q-layer.3": 879.7204, "encoder_q-layer.4": 918.7773, "encoder_q-layer.5": 932.1126, "encoder_q-layer.6": 1012.8873, "encoder_q-layer.7": 1138.7125, "encoder_q-layer.8": 1277.8575, "encoder_q-layer.9": 1216.3419, "epoch": 0.95, "inbatch_neg_score": 0.2909, "inbatch_pos_score": 1.0508, "learning_rate": 1.5555555555555556e-06, "loss": 2.6525, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.9368, "num_token_overlap": 18.1028, "num_token_query": 52.421, "num_token_union": 73.5665, "num_word_context": 202.5007, "num_word_doc": 49.9107, "num_word_query": 39.9139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1905.7966, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3941, "queue_k_norm": 1.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.421, "sent_len_1": 66.9368, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2788, "stdk": 0.0492, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6845, "doc_norm": 1.4581, "encoder_q-embeddings": 1103.5294, "encoder_q-layer.0": 733.147, "encoder_q-layer.1": 795.9472, "encoder_q-layer.10": 1303.5966, "encoder_q-layer.11": 2713.9336, "encoder_q-layer.2": 936.0349, "encoder_q-layer.3": 964.2778, "encoder_q-layer.4": 1031.2415, "encoder_q-layer.5": 1081.399, "encoder_q-layer.6": 1166.3643, "encoder_q-layer.7": 1234.3837, "encoder_q-layer.8": 1378.8202, "encoder_q-layer.9": 1212.6909, "epoch": 0.95, "inbatch_neg_score": 0.2953, "inbatch_pos_score": 1.0342, "learning_rate": 1.5e-06, "loss": 2.6845, "norm_diff": 0.0688, "norm_loss": 0.0, "num_token_doc": 66.6701, "num_token_overlap": 17.9919, "num_token_query": 52.1998, "num_token_union": 73.4299, "num_word_context": 202.5137, "num_word_doc": 49.7491, "num_word_query": 39.7844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1967.5416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3893, "queue_k_norm": 1.4626, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.1998, "sent_len_1": 66.6701, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9375, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 62.5977, "active_queue_size": 16384.0, "cl_loss": 2.6779, "doc_norm": 1.4612, "encoder_q-embeddings": 1049.9813, "encoder_q-layer.0": 703.6017, "encoder_q-layer.1": 776.4891, "encoder_q-layer.10": 1227.8019, "encoder_q-layer.11": 2681.6306, "encoder_q-layer.2": 926.743, "encoder_q-layer.3": 915.428, "encoder_q-layer.4": 990.9521, "encoder_q-layer.5": 1036.8453, "encoder_q-layer.6": 1074.2599, "encoder_q-layer.7": 1077.5067, "encoder_q-layer.8": 1291.7933, "encoder_q-layer.9": 1156.4749, "epoch": 0.95, "inbatch_neg_score": 0.2918, "inbatch_pos_score": 1.043, "learning_rate": 1.4444444444444445e-06, "loss": 2.6779, "norm_diff": 0.0649, "norm_loss": 0.0, "num_token_doc": 66.7742, "num_token_overlap": 18.0516, "num_token_query": 52.3288, "num_token_union": 73.4968, "num_word_context": 202.3066, "num_word_doc": 49.819, "num_word_query": 39.8821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1863.3406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2937, "query_norm": 1.3963, "queue_k_norm": 1.462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.3288, "sent_len_1": 66.7742, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.8787, "stdk": 0.049, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 61.5234, "active_queue_size": 16384.0, "cl_loss": 2.6858, "doc_norm": 1.4708, "encoder_q-embeddings": 1049.5504, "encoder_q-layer.0": 649.3961, "encoder_q-layer.1": 715.4242, "encoder_q-layer.10": 1255.0999, "encoder_q-layer.11": 2801.0857, "encoder_q-layer.2": 824.8116, "encoder_q-layer.3": 868.7065, "encoder_q-layer.4": 919.0185, "encoder_q-layer.5": 965.1603, "encoder_q-layer.6": 1085.486, "encoder_q-layer.7": 1213.4689, "encoder_q-layer.8": 1385.6146, "encoder_q-layer.9": 1202.1973, "epoch": 0.95, "inbatch_neg_score": 0.2958, "inbatch_pos_score": 1.0459, "learning_rate": 1.388888888888889e-06, "loss": 2.6858, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.8169, "num_token_overlap": 17.9897, "num_token_query": 52.3352, "num_token_union": 73.563, "num_word_context": 202.4579, "num_word_doc": 49.8635, "num_word_query": 39.9045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1929.4653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.3994, "queue_k_norm": 1.4621, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3352, "sent_len_1": 66.8169, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4275, "stdk": 0.0493, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 61.1328, "active_queue_size": 16384.0, "cl_loss": 2.6795, "doc_norm": 1.4582, "encoder_q-embeddings": 1171.1537, "encoder_q-layer.0": 750.4028, "encoder_q-layer.1": 824.4533, "encoder_q-layer.10": 1297.3347, "encoder_q-layer.11": 2966.5076, "encoder_q-layer.2": 992.1733, "encoder_q-layer.3": 993.0275, "encoder_q-layer.4": 1031.4407, "encoder_q-layer.5": 1047.8762, "encoder_q-layer.6": 1131.6654, "encoder_q-layer.7": 1375.7531, "encoder_q-layer.8": 1449.9515, "encoder_q-layer.9": 1238.5323, "epoch": 0.95, "inbatch_neg_score": 0.2963, "inbatch_pos_score": 1.0332, "learning_rate": 1.3333333333333334e-06, "loss": 2.6795, "norm_diff": 0.0681, "norm_loss": 0.0, "num_token_doc": 66.7902, "num_token_overlap": 18.0181, "num_token_query": 52.1292, "num_token_union": 73.398, "num_word_context": 202.1225, "num_word_doc": 49.7978, "num_word_query": 39.7415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.1768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.3901, "queue_k_norm": 1.4617, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.1292, "sent_len_1": 66.7902, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.0613, "stdk": 0.0488, "stdq": 0.0462, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 62.6953, "active_queue_size": 16384.0, "cl_loss": 2.6778, "doc_norm": 1.461, "encoder_q-embeddings": 927.608, "encoder_q-layer.0": 588.5096, "encoder_q-layer.1": 638.0604, "encoder_q-layer.10": 1233.6921, "encoder_q-layer.11": 2915.3154, "encoder_q-layer.2": 737.2285, "encoder_q-layer.3": 754.0791, "encoder_q-layer.4": 781.3814, "encoder_q-layer.5": 832.8671, "encoder_q-layer.6": 955.9956, "encoder_q-layer.7": 1099.1421, "encoder_q-layer.8": 1366.774, "encoder_q-layer.9": 1196.0138, "epoch": 0.95, "inbatch_neg_score": 0.2962, "inbatch_pos_score": 1.042, "learning_rate": 1.2777777777777779e-06, "loss": 2.6778, "norm_diff": 0.0647, "norm_loss": 0.0, "num_token_doc": 66.8721, "num_token_overlap": 18.0269, "num_token_query": 52.3522, "num_token_union": 73.5755, "num_word_context": 202.5495, "num_word_doc": 49.8907, "num_word_query": 39.8945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1875.2657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3963, "queue_k_norm": 1.4622, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3522, "sent_len_1": 66.8721, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9288, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 60.3516, "active_queue_size": 16384.0, "cl_loss": 2.6929, "doc_norm": 1.4604, "encoder_q-embeddings": 1538.6426, "encoder_q-layer.0": 1079.2717, "encoder_q-layer.1": 1190.5282, "encoder_q-layer.10": 1463.894, "encoder_q-layer.11": 2987.4407, "encoder_q-layer.2": 1472.6211, "encoder_q-layer.3": 1599.5447, "encoder_q-layer.4": 1590.3436, "encoder_q-layer.5": 1643.4802, "encoder_q-layer.6": 1622.3077, "encoder_q-layer.7": 1585.8706, "encoder_q-layer.8": 1535.7805, "encoder_q-layer.9": 1254.4553, "epoch": 0.95, "inbatch_neg_score": 0.292, "inbatch_pos_score": 1.0264, "learning_rate": 1.2222222222222223e-06, "loss": 2.6929, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.737, "num_token_overlap": 17.9704, "num_token_query": 52.2093, "num_token_union": 73.4573, "num_word_context": 202.576, "num_word_doc": 49.7819, "num_word_query": 39.7977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2470.4879, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.389, "queue_k_norm": 1.4614, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2093, "sent_len_1": 66.737, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2425, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 2.6883, "doc_norm": 1.4627, "encoder_q-embeddings": 859.6669, "encoder_q-layer.0": 565.8984, "encoder_q-layer.1": 607.9249, "encoder_q-layer.10": 1383.9852, "encoder_q-layer.11": 2864.1484, "encoder_q-layer.2": 688.7138, "encoder_q-layer.3": 721.3328, "encoder_q-layer.4": 759.8884, "encoder_q-layer.5": 763.0024, "encoder_q-layer.6": 883.5308, "encoder_q-layer.7": 998.4572, "encoder_q-layer.8": 1208.7911, "encoder_q-layer.9": 1163.2056, "epoch": 0.96, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 1.0137, "learning_rate": 1.1666666666666668e-06, "loss": 2.6883, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 18.0158, "num_token_query": 52.3389, "num_token_union": 73.5037, "num_word_context": 202.5333, "num_word_doc": 49.8246, "num_word_query": 39.9389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1839.843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.3857, "queue_k_norm": 1.4624, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3389, "sent_len_1": 66.7018, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.7012, "stdk": 0.0489, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 2.6856, "doc_norm": 1.4577, "encoder_q-embeddings": 1269.9639, "encoder_q-layer.0": 861.9039, "encoder_q-layer.1": 942.7253, "encoder_q-layer.10": 1341.0189, "encoder_q-layer.11": 2982.832, "encoder_q-layer.2": 1052.4314, "encoder_q-layer.3": 1109.9012, "encoder_q-layer.4": 1143.6825, "encoder_q-layer.5": 1305.3571, "encoder_q-layer.6": 1366.4393, "encoder_q-layer.7": 1326.8436, "encoder_q-layer.8": 1458.6464, "encoder_q-layer.9": 1310.7872, "epoch": 0.96, "inbatch_neg_score": 0.2934, "inbatch_pos_score": 1.0117, "learning_rate": 1.1111111111111112e-06, "loss": 2.6856, "norm_diff": 0.0692, "norm_loss": 0.0, "num_token_doc": 66.7672, "num_token_overlap": 18.0027, "num_token_query": 52.222, "num_token_union": 73.4594, "num_word_context": 202.433, "num_word_doc": 49.8393, "num_word_query": 39.8009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2202.7452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.3884, "queue_k_norm": 1.46, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.222, "sent_len_1": 66.7672, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3675, "stdk": 0.0488, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6781, "doc_norm": 1.4679, "encoder_q-embeddings": 1486.4669, "encoder_q-layer.0": 957.5457, "encoder_q-layer.1": 1083.9326, "encoder_q-layer.10": 1255.6917, "encoder_q-layer.11": 2848.0234, "encoder_q-layer.2": 1266.2321, "encoder_q-layer.3": 1408.1512, "encoder_q-layer.4": 1554.3032, "encoder_q-layer.5": 1720.2617, "encoder_q-layer.6": 1882.8718, "encoder_q-layer.7": 2081.0112, "encoder_q-layer.8": 2035.9309, "encoder_q-layer.9": 1261.0061, "epoch": 0.96, "inbatch_neg_score": 0.2982, "inbatch_pos_score": 1.0234, "learning_rate": 1.0555555555555557e-06, "loss": 2.6781, "norm_diff": 0.0834, "norm_loss": 0.0, "num_token_doc": 66.9186, "num_token_overlap": 18.0356, "num_token_query": 52.3294, "num_token_union": 73.5836, "num_word_context": 201.9823, "num_word_doc": 49.9353, "num_word_query": 39.8899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2533.451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2966, "query_norm": 1.3845, "queue_k_norm": 1.4626, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3294, "sent_len_1": 66.9186, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3325, "stdk": 0.0492, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 61.9141, "active_queue_size": 16384.0, "cl_loss": 2.6815, "doc_norm": 1.456, "encoder_q-embeddings": 1207.677, "encoder_q-layer.0": 819.6846, "encoder_q-layer.1": 921.458, "encoder_q-layer.10": 1255.0205, "encoder_q-layer.11": 2932.6377, "encoder_q-layer.2": 995.0671, "encoder_q-layer.3": 1039.0304, "encoder_q-layer.4": 1041.5909, "encoder_q-layer.5": 1065.6669, "encoder_q-layer.6": 1159.2971, "encoder_q-layer.7": 1178.0768, "encoder_q-layer.8": 1358.0962, "encoder_q-layer.9": 1157.9856, "epoch": 0.96, "inbatch_neg_score": 0.2957, "inbatch_pos_score": 1.0332, "learning_rate": 1.0000000000000002e-06, "loss": 2.6815, "norm_diff": 0.0632, "norm_loss": 0.0, "num_token_doc": 66.8468, "num_token_overlap": 18.0017, "num_token_query": 52.0943, "num_token_union": 73.4295, "num_word_context": 202.417, "num_word_doc": 49.8812, "num_word_query": 39.7197, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2082.4941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.3927, "queue_k_norm": 1.4627, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.0943, "sent_len_1": 66.8468, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3975, "stdk": 0.0487, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 59.8633, "active_queue_size": 16384.0, "cl_loss": 2.6847, "doc_norm": 1.4636, "encoder_q-embeddings": 1661.2124, "encoder_q-layer.0": 1124.6371, "encoder_q-layer.1": 1158.4272, "encoder_q-layer.10": 1374.9117, "encoder_q-layer.11": 2847.1309, "encoder_q-layer.2": 1353.8784, "encoder_q-layer.3": 1435.8766, "encoder_q-layer.4": 1636.5507, "encoder_q-layer.5": 1907.6758, "encoder_q-layer.6": 2120.9397, "encoder_q-layer.7": 2102.606, "encoder_q-layer.8": 1771.0388, "encoder_q-layer.9": 1287.787, "epoch": 0.96, "inbatch_neg_score": 0.2953, "inbatch_pos_score": 1.0273, "learning_rate": 9.444444444444445e-07, "loss": 2.6847, "norm_diff": 0.0745, "norm_loss": 0.0, "num_token_doc": 66.7229, "num_token_overlap": 18.0142, "num_token_query": 52.3653, "num_token_union": 73.4898, "num_word_context": 202.2555, "num_word_doc": 49.8022, "num_word_query": 39.9226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2642.8743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3891, "queue_k_norm": 1.4612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3653, "sent_len_1": 66.7229, "sent_len_max_0": 128.0, "sent_len_max_1": 187.1337, "stdk": 0.049, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 2.677, "doc_norm": 1.4629, "encoder_q-embeddings": 1060.0715, "encoder_q-layer.0": 656.322, "encoder_q-layer.1": 743.4681, "encoder_q-layer.10": 1315.8636, "encoder_q-layer.11": 2963.0977, "encoder_q-layer.2": 891.7231, "encoder_q-layer.3": 935.2521, "encoder_q-layer.4": 1017.118, "encoder_q-layer.5": 989.0609, "encoder_q-layer.6": 1084.0712, "encoder_q-layer.7": 1223.2036, "encoder_q-layer.8": 1431.1256, "encoder_q-layer.9": 1239.4144, "epoch": 0.96, "inbatch_neg_score": 0.2945, "inbatch_pos_score": 1.0244, "learning_rate": 8.88888888888889e-07, "loss": 2.677, "norm_diff": 0.0776, "norm_loss": 0.0, "num_token_doc": 66.825, "num_token_overlap": 18.0319, "num_token_query": 52.2585, "num_token_union": 73.4369, "num_word_context": 202.3546, "num_word_doc": 49.8779, "num_word_query": 39.8175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1999.8952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3852, "queue_k_norm": 1.463, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2585, "sent_len_1": 66.825, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6138, "stdk": 0.049, "stdq": 0.0461, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6647, "doc_norm": 1.4636, "encoder_q-embeddings": 974.9509, "encoder_q-layer.0": 628.6409, "encoder_q-layer.1": 677.9316, "encoder_q-layer.10": 1318.1404, "encoder_q-layer.11": 2760.9478, "encoder_q-layer.2": 758.6028, "encoder_q-layer.3": 781.6783, "encoder_q-layer.4": 853.4146, "encoder_q-layer.5": 848.3521, "encoder_q-layer.6": 982.8146, "encoder_q-layer.7": 1104.0541, "encoder_q-layer.8": 1254.4526, "encoder_q-layer.9": 1156.7949, "epoch": 0.96, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 1.0283, "learning_rate": 8.333333333333333e-07, "loss": 2.6647, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.8756, "num_token_overlap": 18.071, "num_token_query": 52.2961, "num_token_union": 73.4665, "num_word_context": 202.4429, "num_word_doc": 49.8762, "num_word_query": 39.8995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1865.8271, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3878, "queue_k_norm": 1.4612, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2961, "sent_len_1": 66.8756, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7975, "stdk": 0.049, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 60.5469, "active_queue_size": 16384.0, "cl_loss": 2.6901, "doc_norm": 1.4623, "encoder_q-embeddings": 1613.3468, "encoder_q-layer.0": 1050.8174, "encoder_q-layer.1": 1197.4072, "encoder_q-layer.10": 1347.2515, "encoder_q-layer.11": 2915.7305, "encoder_q-layer.2": 1392.6155, "encoder_q-layer.3": 1500.869, "encoder_q-layer.4": 1561.7657, "encoder_q-layer.5": 1723.8582, "encoder_q-layer.6": 1614.2223, "encoder_q-layer.7": 1571.3334, "encoder_q-layer.8": 1757.2233, "encoder_q-layer.9": 1365.981, "epoch": 0.96, "inbatch_neg_score": 0.295, "inbatch_pos_score": 1.0215, "learning_rate": 7.777777777777778e-07, "loss": 2.6901, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.7299, "num_token_overlap": 17.9612, "num_token_query": 52.1561, "num_token_union": 73.4469, "num_word_context": 202.3171, "num_word_doc": 49.7787, "num_word_query": 39.7511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2501.4355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3922, "queue_k_norm": 1.4623, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1561, "sent_len_1": 66.7299, "sent_len_max_0": 128.0, "sent_len_max_1": 189.835, "stdk": 0.049, "stdq": 0.0464, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 2.6752, "doc_norm": 1.4581, "encoder_q-embeddings": 2756.8181, "encoder_q-layer.0": 1808.3282, "encoder_q-layer.1": 2119.1521, "encoder_q-layer.10": 2919.2817, "encoder_q-layer.11": 5737.8125, "encoder_q-layer.2": 2495.5078, "encoder_q-layer.3": 2697.2148, "encoder_q-layer.4": 2516.3066, "encoder_q-layer.5": 2495.8542, "encoder_q-layer.6": 2354.9907, "encoder_q-layer.7": 2598.2722, "encoder_q-layer.8": 2841.6082, "encoder_q-layer.9": 2585.0972, "epoch": 0.96, "inbatch_neg_score": 0.2977, "inbatch_pos_score": 0.9976, "learning_rate": 7.222222222222222e-07, "loss": 2.6752, "norm_diff": 0.0744, "norm_loss": 0.0, "num_token_doc": 66.825, "num_token_overlap": 18.0284, "num_token_query": 52.2628, "num_token_union": 73.4592, "num_word_context": 202.3264, "num_word_doc": 49.9003, "num_word_query": 39.8683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4395.1422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.3837, "queue_k_norm": 1.462, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.2628, "sent_len_1": 66.825, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.0375, "stdk": 0.0488, "stdq": 0.046, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6728, "doc_norm": 1.4653, "encoder_q-embeddings": 1876.2205, "encoder_q-layer.0": 1170.6183, "encoder_q-layer.1": 1263.0692, "encoder_q-layer.10": 2381.198, "encoder_q-layer.11": 5414.397, "encoder_q-layer.2": 1486.808, "encoder_q-layer.3": 1532.2457, "encoder_q-layer.4": 1642.8685, "encoder_q-layer.5": 1605.9504, "encoder_q-layer.6": 1833.0302, "encoder_q-layer.7": 2033.8065, "encoder_q-layer.8": 2463.6152, "encoder_q-layer.9": 2236.0815, "epoch": 0.96, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 1.0391, "learning_rate": 6.666666666666667e-07, "loss": 2.6728, "norm_diff": 0.0673, "norm_loss": 0.0, "num_token_doc": 66.8956, "num_token_overlap": 18.076, "num_token_query": 52.4025, "num_token_union": 73.5575, "num_word_context": 202.5469, "num_word_doc": 49.9008, "num_word_query": 39.9651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3579.2189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2957, "query_norm": 1.398, "queue_k_norm": 1.4626, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.4025, "sent_len_1": 66.8956, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5362, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6734, "doc_norm": 1.4599, "encoder_q-embeddings": 2277.259, "encoder_q-layer.0": 1455.7904, "encoder_q-layer.1": 1681.4995, "encoder_q-layer.10": 2600.4761, "encoder_q-layer.11": 5698.0728, "encoder_q-layer.2": 1944.5812, "encoder_q-layer.3": 1985.3777, "encoder_q-layer.4": 2075.3521, "encoder_q-layer.5": 2207.7747, "encoder_q-layer.6": 2243.0979, "encoder_q-layer.7": 2309.0269, "encoder_q-layer.8": 2640.3865, "encoder_q-layer.9": 2402.2424, "epoch": 0.97, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 1.0293, "learning_rate": 6.111111111111112e-07, "loss": 2.6734, "norm_diff": 0.073, "norm_loss": 0.0, "num_token_doc": 66.7388, "num_token_overlap": 18.0136, "num_token_query": 52.3225, "num_token_union": 73.5104, "num_word_context": 202.5171, "num_word_doc": 49.8454, "num_word_query": 39.9301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4025.529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2925, "query_norm": 1.3869, "queue_k_norm": 1.4605, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.3225, "sent_len_1": 66.7388, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9038, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 2.6744, "doc_norm": 1.4675, "encoder_q-embeddings": 3219.7881, "encoder_q-layer.0": 2315.7107, "encoder_q-layer.1": 2597.1807, "encoder_q-layer.10": 2427.2573, "encoder_q-layer.11": 5819.9482, "encoder_q-layer.2": 2461.946, "encoder_q-layer.3": 2388.7053, "encoder_q-layer.4": 2309.2217, "encoder_q-layer.5": 1984.7925, "encoder_q-layer.6": 2083.6914, "encoder_q-layer.7": 2245.1465, "encoder_q-layer.8": 2639.2725, "encoder_q-layer.9": 2385.9854, "epoch": 0.97, "inbatch_neg_score": 0.2941, "inbatch_pos_score": 1.0254, "learning_rate": 5.555555555555556e-07, "loss": 2.6744, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.9229, "num_token_overlap": 18.0455, "num_token_query": 52.1176, "num_token_union": 73.3873, "num_word_context": 202.22, "num_word_doc": 49.8971, "num_word_query": 39.7072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4406.3389, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3884, "queue_k_norm": 1.4616, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1176, "sent_len_1": 66.9229, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0488, "stdk": 0.0492, "stdq": 0.0463, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 60.7422, "active_queue_size": 16384.0, "cl_loss": 2.6713, "doc_norm": 1.4598, "encoder_q-embeddings": 2381.7603, "encoder_q-layer.0": 1486.8774, "encoder_q-layer.1": 1687.7699, "encoder_q-layer.10": 2669.4985, "encoder_q-layer.11": 5986.438, "encoder_q-layer.2": 2068.5266, "encoder_q-layer.3": 2169.0437, "encoder_q-layer.4": 2331.8337, "encoder_q-layer.5": 2309.6362, "encoder_q-layer.6": 2241.104, "encoder_q-layer.7": 2531.9309, "encoder_q-layer.8": 2721.7512, "encoder_q-layer.9": 2438.8254, "epoch": 0.97, "inbatch_neg_score": 0.2937, "inbatch_pos_score": 1.0352, "learning_rate": 5.000000000000001e-07, "loss": 2.6713, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.7157, "num_token_overlap": 18.0172, "num_token_query": 52.3455, "num_token_union": 73.4525, "num_word_context": 202.141, "num_word_doc": 49.7761, "num_word_query": 39.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4187.9473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2935, "query_norm": 1.3937, "queue_k_norm": 1.462, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3455, "sent_len_1": 66.7157, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8325, "stdk": 0.0489, "stdq": 0.0465, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 2.6856, "doc_norm": 1.4595, "encoder_q-embeddings": 2651.5564, "encoder_q-layer.0": 1743.9659, "encoder_q-layer.1": 1854.8763, "encoder_q-layer.10": 2987.2346, "encoder_q-layer.11": 5793.3721, "encoder_q-layer.2": 2189.2134, "encoder_q-layer.3": 2280.886, "encoder_q-layer.4": 2480.7952, "encoder_q-layer.5": 2694.6021, "encoder_q-layer.6": 2972.3423, "encoder_q-layer.7": 3079.2798, "encoder_q-layer.8": 3638.2251, "encoder_q-layer.9": 2777.7881, "epoch": 0.97, "inbatch_neg_score": 0.2963, "inbatch_pos_score": 1.0098, "learning_rate": 4.444444444444445e-07, "loss": 2.6856, "norm_diff": 0.0679, "norm_loss": 0.0, "num_token_doc": 66.7135, "num_token_overlap": 17.9532, "num_token_query": 52.2, "num_token_union": 73.4189, "num_word_context": 202.523, "num_word_doc": 49.7373, "num_word_query": 39.7789, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4509.7245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2961, "query_norm": 1.3916, "queue_k_norm": 1.4616, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2, "sent_len_1": 66.7135, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5525, "stdk": 0.0488, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 62.1094, "active_queue_size": 16384.0, "cl_loss": 2.6717, "doc_norm": 1.46, "encoder_q-embeddings": 1970.4388, "encoder_q-layer.0": 1316.094, "encoder_q-layer.1": 1413.7151, "encoder_q-layer.10": 2588.2698, "encoder_q-layer.11": 5682.0537, "encoder_q-layer.2": 1608.1836, "encoder_q-layer.3": 1717.2452, "encoder_q-layer.4": 1833.1411, "encoder_q-layer.5": 1927.9911, "encoder_q-layer.6": 2212.0923, "encoder_q-layer.7": 2327.8108, "encoder_q-layer.8": 2728.8616, "encoder_q-layer.9": 2469.9971, "epoch": 0.97, "inbatch_neg_score": 0.2951, "inbatch_pos_score": 1.0449, "learning_rate": 3.888888888888889e-07, "loss": 2.6717, "norm_diff": 0.0615, "norm_loss": 0.0, "num_token_doc": 67.0863, "num_token_overlap": 18.0932, "num_token_query": 52.2709, "num_token_union": 73.5753, "num_word_context": 202.4955, "num_word_doc": 50.0764, "num_word_query": 39.8461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3790.1794, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2957, "query_norm": 1.3985, "queue_k_norm": 1.4612, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2709, "sent_len_1": 67.0863, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.685, "stdk": 0.0489, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 62.4023, "active_queue_size": 16384.0, "cl_loss": 2.647, "doc_norm": 1.4655, "encoder_q-embeddings": 2922.4058, "encoder_q-layer.0": 1920.8474, "encoder_q-layer.1": 2224.606, "encoder_q-layer.10": 2748.4277, "encoder_q-layer.11": 5866.7202, "encoder_q-layer.2": 2725.6284, "encoder_q-layer.3": 2870.9456, "encoder_q-layer.4": 3184.3777, "encoder_q-layer.5": 3307.1528, "encoder_q-layer.6": 3269.2629, "encoder_q-layer.7": 3023.5359, "encoder_q-layer.8": 2729.3049, "encoder_q-layer.9": 2316.2083, "epoch": 0.97, "inbatch_neg_score": 0.2947, "inbatch_pos_score": 1.04, "learning_rate": 3.3333333333333335e-07, "loss": 2.647, "norm_diff": 0.0672, "norm_loss": 0.0, "num_token_doc": 66.8504, "num_token_overlap": 18.058, "num_token_query": 52.2375, "num_token_union": 73.4119, "num_word_context": 202.1661, "num_word_doc": 49.8783, "num_word_query": 39.8257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4769.0259, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3983, "queue_k_norm": 1.4601, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.2375, "sent_len_1": 66.8504, "sent_len_max_0": 128.0, "sent_len_max_1": 190.06, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 60.1562, "active_queue_size": 16384.0, "cl_loss": 2.6885, "doc_norm": 1.4648, "encoder_q-embeddings": 1818.1107, "encoder_q-layer.0": 1209.5885, "encoder_q-layer.1": 1354.1805, "encoder_q-layer.10": 2569.4583, "encoder_q-layer.11": 5854.0273, "encoder_q-layer.2": 1468.8706, "encoder_q-layer.3": 1513.5256, "encoder_q-layer.4": 1646.8119, "encoder_q-layer.5": 1685.5806, "encoder_q-layer.6": 1898.3831, "encoder_q-layer.7": 2134.9285, "encoder_q-layer.8": 2647.6792, "encoder_q-layer.9": 2407.6143, "epoch": 0.97, "inbatch_neg_score": 0.2929, "inbatch_pos_score": 1.0342, "learning_rate": 2.777777777777778e-07, "loss": 2.6885, "norm_diff": 0.071, "norm_loss": 0.0, "num_token_doc": 66.4468, "num_token_overlap": 17.9054, "num_token_query": 52.1046, "num_token_union": 73.2389, "num_word_context": 202.0036, "num_word_doc": 49.619, "num_word_query": 39.7094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3817.4874, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3938, "queue_k_norm": 1.4594, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.1046, "sent_len_1": 66.4468, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8038, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 60.0586, "active_queue_size": 16384.0, "cl_loss": 2.6886, "doc_norm": 1.4619, "encoder_q-embeddings": 1811.0876, "encoder_q-layer.0": 1177.682, "encoder_q-layer.1": 1251.3629, "encoder_q-layer.10": 2785.0708, "encoder_q-layer.11": 5698.5688, "encoder_q-layer.2": 1439.7271, "encoder_q-layer.3": 1478.9949, "encoder_q-layer.4": 1548.2252, "encoder_q-layer.5": 1600.7783, "encoder_q-layer.6": 1878.2175, "encoder_q-layer.7": 2114.1343, "encoder_q-layer.8": 2503.4775, "encoder_q-layer.9": 2374.2129, "epoch": 0.97, "inbatch_neg_score": 0.299, "inbatch_pos_score": 1.0225, "learning_rate": 2.2222222222222224e-07, "loss": 2.6886, "norm_diff": 0.0684, "norm_loss": 0.0, "num_token_doc": 66.7064, "num_token_overlap": 17.9837, "num_token_query": 52.061, "num_token_union": 73.3183, "num_word_context": 202.1875, "num_word_doc": 49.7704, "num_word_query": 39.7104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3668.6682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.3936, "queue_k_norm": 1.4597, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.061, "sent_len_1": 66.7064, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0062, "stdk": 0.0489, "stdq": 0.0464, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 2.6877, "doc_norm": 1.4591, "encoder_q-embeddings": 2005.5229, "encoder_q-layer.0": 1288.9147, "encoder_q-layer.1": 1367.5684, "encoder_q-layer.10": 2722.3865, "encoder_q-layer.11": 5915.751, "encoder_q-layer.2": 1502.1016, "encoder_q-layer.3": 1573.3237, "encoder_q-layer.4": 1644.6637, "encoder_q-layer.5": 1709.4886, "encoder_q-layer.6": 1952.5869, "encoder_q-layer.7": 2357.179, "encoder_q-layer.8": 2666.2881, "encoder_q-layer.9": 2412.2664, "epoch": 0.97, "inbatch_neg_score": 0.2957, "inbatch_pos_score": 1.0273, "learning_rate": 1.6666666666666668e-07, "loss": 2.6877, "norm_diff": 0.0696, "norm_loss": 0.0, "num_token_doc": 66.7712, "num_token_overlap": 18.0144, "num_token_query": 52.2001, "num_token_union": 73.406, "num_word_context": 202.2297, "num_word_doc": 49.8379, "num_word_query": 39.8183, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3869.2123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2949, "query_norm": 1.3895, "queue_k_norm": 1.4624, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 52.2001, "sent_len_1": 66.7712, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.8175, "stdk": 0.0489, "stdq": 0.0463, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 63.5742, "active_queue_size": 16384.0, "cl_loss": 2.6672, "doc_norm": 1.4666, "encoder_q-embeddings": 3924.9434, "encoder_q-layer.0": 2637.2827, "encoder_q-layer.1": 3300.3877, "encoder_q-layer.10": 2363.5247, "encoder_q-layer.11": 5173.833, "encoder_q-layer.2": 3988.5256, "encoder_q-layer.3": 4148.4517, "encoder_q-layer.4": 4165.041, "encoder_q-layer.5": 4062.4414, "encoder_q-layer.6": 4320.8486, "encoder_q-layer.7": 4132.0142, "encoder_q-layer.8": 3576.9561, "encoder_q-layer.9": 2314.0146, "epoch": 0.97, "inbatch_neg_score": 0.2936, "inbatch_pos_score": 1.0449, "learning_rate": 1.1111111111111112e-07, "loss": 2.6672, "norm_diff": 0.0725, "norm_loss": 0.0, "num_token_doc": 66.7009, "num_token_overlap": 17.9863, "num_token_query": 52.1732, "num_token_union": 73.3884, "num_word_context": 202.2879, "num_word_doc": 49.8019, "num_word_query": 39.7511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5765.5155, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2944, "query_norm": 1.3941, "queue_k_norm": 1.4605, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 52.1732, "sent_len_1": 66.7009, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3638, "stdk": 0.0491, "stdq": 0.0465, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 61.2305, "active_queue_size": 16384.0, "cl_loss": 2.6726, "doc_norm": 1.4594, "encoder_q-embeddings": 2183.9194, "encoder_q-layer.0": 1357.6902, "encoder_q-layer.1": 1459.9917, "encoder_q-layer.10": 2503.105, "encoder_q-layer.11": 5641.9043, "encoder_q-layer.2": 1739.2413, "encoder_q-layer.3": 1743.38, "encoder_q-layer.4": 1832.1229, "encoder_q-layer.5": 1974.6479, "encoder_q-layer.6": 2103.1924, "encoder_q-layer.7": 2305.0063, "encoder_q-layer.8": 2554.0847, "encoder_q-layer.9": 2345.2981, "epoch": 0.98, "inbatch_neg_score": 0.293, "inbatch_pos_score": 1.0312, "learning_rate": 5.555555555555556e-08, "loss": 2.6726, "norm_diff": 0.0726, "norm_loss": 0.0, "num_token_doc": 67.0153, "num_token_overlap": 18.0684, "num_token_query": 52.3512, "num_token_union": 73.5671, "num_word_context": 202.3372, "num_word_doc": 49.9407, "num_word_query": 39.8899, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3886.35, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3868, "queue_k_norm": 1.4611, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 52.3512, "sent_len_1": 67.0153, "sent_len_max_0": 128.0, "sent_len_max_1": 192.275, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 2.6917, "doc_norm": 1.4596, "encoder_q-embeddings": 3653.2268, "encoder_q-layer.0": 2487.4209, "encoder_q-layer.1": 3015.4875, "encoder_q-layer.10": 2868.6902, "encoder_q-layer.11": 5810.4912, "encoder_q-layer.2": 3591.9124, "encoder_q-layer.3": 3726.51, "encoder_q-layer.4": 4108.4668, "encoder_q-layer.5": 4259.333, "encoder_q-layer.6": 3824.4319, "encoder_q-layer.7": 3295.531, "encoder_q-layer.8": 3265.0325, "encoder_q-layer.9": 2675.6938, "epoch": 0.98, "inbatch_neg_score": 0.2947, "inbatch_pos_score": 1.0156, "learning_rate": 0.0, "loss": 2.6917, "norm_diff": 0.0742, "norm_loss": 0.0, "num_token_doc": 66.702, "num_token_overlap": 18.025, "num_token_query": 52.2783, "num_token_union": 73.4118, "num_word_context": 202.4196, "num_word_doc": 49.7922, "num_word_query": 39.8398, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5589.5371, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2957, "query_norm": 1.3854, "queue_k_norm": 1.4605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 52.2783, "sent_len_1": 66.702, "sent_len_max_0": 128.0, "sent_len_max_1": 187.695, "stdk": 0.0489, "stdq": 0.0462, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 29.0626, "dev_samples_per_second": 2.202, "dev_steps_per_second": 0.034, "epoch": 0.98, "step": 100000, "test_accuracy": 93.95751953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3522314131259918, "test_doc_norm": 1.43003511428833, "test_inbatch_neg_score": 0.6531069278717041, "test_inbatch_pos_score": 1.5934662818908691, "test_loss": 0.3522314131259918, "test_loss_align": 0.9377579689025879, "test_loss_unif": 3.8197107315063477, "test_loss_unif_q@queue": 3.8197109699249268, "test_norm_diff": 0.008587727323174477, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2876378893852234, "test_query_norm": 1.4319117069244385, "test_queue_k_norm": 1.4607069492340088, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04245629906654358, "test_stdq": 0.0424661710858345, "test_stdqueue_k": 0.04901013895869255, "test_stdqueue_q": 0.0 }, { "dev_runtime": 29.0626, "dev_samples_per_second": 2.202, "dev_steps_per_second": 0.034, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.40399, "eval_beir-arguana_recall@10": 0.68065, "eval_beir-arguana_recall@100": 0.94666, "eval_beir-arguana_recall@20": 0.80868, "eval_beir-avg_ndcg@10": 0.37931175, "eval_beir-avg_recall@10": 0.45246475, "eval_beir-avg_recall@100": 0.6363600833333335, "eval_beir-avg_recall@20": 0.51314125, "eval_beir-cqadupstack_ndcg@10": 0.27109750000000005, "eval_beir-cqadupstack_recall@10": 0.3678975, "eval_beir-cqadupstack_recall@100": 0.5988908333333334, "eval_beir-cqadupstack_recall@20": 0.43421249999999995, "eval_beir-fiqa_ndcg@10": 0.25834, "eval_beir-fiqa_recall@10": 0.31154, "eval_beir-fiqa_recall@100": 0.58671, "eval_beir-fiqa_recall@20": 0.39001, "eval_beir-nfcorpus_ndcg@10": 0.29872, "eval_beir-nfcorpus_recall@10": 0.1448, "eval_beir-nfcorpus_recall@100": 0.27142, "eval_beir-nfcorpus_recall@20": 0.17552, "eval_beir-nq_ndcg@10": 0.27193, "eval_beir-nq_recall@10": 0.4465, "eval_beir-nq_recall@100": 0.79637, "eval_beir-nq_recall@20": 0.57305, "eval_beir-quora_ndcg@10": 0.78168, "eval_beir-quora_recall@10": 0.887, "eval_beir-quora_recall@100": 0.97651, "eval_beir-quora_recall@20": 0.92894, "eval_beir-scidocs_ndcg@10": 0.15698, "eval_beir-scidocs_recall@10": 0.16563, "eval_beir-scidocs_recall@100": 0.37647, "eval_beir-scidocs_recall@20": 0.2229, "eval_beir-scifact_ndcg@10": 0.6428, "eval_beir-scifact_recall@10": 0.80067, "eval_beir-scifact_recall@100": 0.93156, "eval_beir-scifact_recall@20": 0.84244, "eval_beir-trec-covid_ndcg@10": 0.54138, "eval_beir-trec-covid_recall@10": 0.586, "eval_beir-trec-covid_recall@100": 0.447, "eval_beir-trec-covid_recall@20": 0.558, "eval_beir-webis-touche2020_ndcg@10": 0.1662, "eval_beir-webis-touche2020_recall@10": 0.13396, "eval_beir-webis-touche2020_recall@100": 0.43201, "eval_beir-webis-touche2020_recall@20": 0.19766, "eval_senteval-avg_sts": 0.7520594707791572, "eval_senteval-sickr_spearman": 0.724076491124677, "eval_senteval-stsb_spearman": 0.7800424504336374, "step": 100000, "test_accuracy": 93.95751953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3522314131259918, "test_doc_norm": 1.43003511428833, "test_inbatch_neg_score": 0.6531069278717041, "test_inbatch_pos_score": 1.5934662818908691, "test_loss": 0.3522314131259918, "test_loss_align": 0.9377579689025879, "test_loss_unif": 3.8197107315063477, "test_loss_unif_q@queue": 3.8197109699249268, "test_norm_diff": 0.008587727323174477, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2876378893852234, "test_query_norm": 1.4319117069244385, "test_queue_k_norm": 1.4607069492340088, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04245629906654358, "test_stdq": 0.0424661710858345, "test_stdqueue_k": 0.04901013895869255, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 76223.6887, "train_samples_per_second": 1.312 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }