{ "best_metric": 0.05301735922694206, "best_model_checkpoint": "bge_finetune_dsv2/checkpoint-8000", "epoch": 36.36363636363637, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.2727272727272725, "grad_norm": 2.923750162124634, "learning_rate": 5.681818181818183e-06, "loss": 0.2767, "step": 500 }, { "epoch": 2.2727272727272725, "eval_loss": 0.09305377304553986, "eval_runtime": 16.1531, "eval_samples_per_second": 48.412, "eval_steps_per_second": 6.067, "eval_val_evaluator_cosine_accuracy@1": 0.46675191815856776, "eval_val_evaluator_cosine_accuracy@10": 0.9411764705882353, "eval_val_evaluator_cosine_accuracy@5": 0.8913043478260869, "eval_val_evaluator_cosine_map@100": 0.6449144076353992, "eval_val_evaluator_cosine_mrr@10": 0.6419812852677294, "eval_val_evaluator_cosine_mrr@100": 0.6449144076353982, "eval_val_evaluator_cosine_mrr@5": 0.6351449275362308, "eval_val_evaluator_cosine_ndcg@10": 0.715922596095415, "eval_val_evaluator_cosine_ndcg@100": 0.7286582483109194, "eval_val_evaluator_cosine_ndcg@5": 0.6996180454943595, "eval_val_evaluator_cosine_precision@1": 0.46675191815856776, "eval_val_evaluator_cosine_precision@10": 0.09411764705882353, "eval_val_evaluator_cosine_precision@5": 0.17826086956521736, "eval_val_evaluator_cosine_recall@1": 0.46675191815856776, "eval_val_evaluator_cosine_recall@10": 0.9411764705882353, "eval_val_evaluator_cosine_recall@5": 0.8913043478260869, "eval_val_evaluator_dot_accuracy@1": 0.46675191815856776, "eval_val_evaluator_dot_accuracy@10": 0.9411764705882353, "eval_val_evaluator_dot_accuracy@5": 0.8913043478260869, "eval_val_evaluator_dot_map@100": 0.6449144076353992, "eval_val_evaluator_dot_mrr@10": 0.6419812852677294, "eval_val_evaluator_dot_mrr@100": 0.6449144076353982, "eval_val_evaluator_dot_mrr@5": 0.6351449275362308, "eval_val_evaluator_dot_ndcg@10": 0.715922596095415, "eval_val_evaluator_dot_ndcg@100": 0.7286582483109194, "eval_val_evaluator_dot_ndcg@5": 0.6996180454943595, "eval_val_evaluator_dot_precision@1": 0.46675191815856776, "eval_val_evaluator_dot_precision@10": 0.09411764705882353, "eval_val_evaluator_dot_precision@5": 0.17826086956521736, "eval_val_evaluator_dot_recall@1": 0.46675191815856776, "eval_val_evaluator_dot_recall@10": 0.9411764705882353, "eval_val_evaluator_dot_recall@5": 0.8913043478260869, "step": 500 }, { "epoch": 4.545454545454545, "grad_norm": 1.0960533618927002, "learning_rate": 9.84848484848485e-06, "loss": 0.067, "step": 1000 }, { "epoch": 4.545454545454545, "eval_loss": 0.07767628133296967, "eval_runtime": 16.162, "eval_samples_per_second": 48.385, "eval_steps_per_second": 6.064, "eval_val_evaluator_cosine_accuracy@1": 0.4680306905370844, "eval_val_evaluator_cosine_accuracy@10": 0.948849104859335, "eval_val_evaluator_cosine_accuracy@5": 0.8951406649616368, "eval_val_evaluator_cosine_map@100": 0.6501350929872222, "eval_val_evaluator_cosine_mrr@10": 0.6475830187147313, "eval_val_evaluator_cosine_mrr@100": 0.6501350929872212, "eval_val_evaluator_cosine_mrr@5": 0.6400468883205445, "eval_val_evaluator_cosine_ndcg@10": 0.7221983441504519, "eval_val_evaluator_cosine_ndcg@100": 0.7332422183636672, "eval_val_evaluator_cosine_ndcg@5": 0.7044596320615103, "eval_val_evaluator_cosine_precision@1": 0.4680306905370844, "eval_val_evaluator_cosine_precision@10": 0.09488491048593349, "eval_val_evaluator_cosine_precision@5": 0.17902813299232737, "eval_val_evaluator_cosine_recall@1": 0.4680306905370844, "eval_val_evaluator_cosine_recall@10": 0.948849104859335, "eval_val_evaluator_cosine_recall@5": 0.8951406649616368, "eval_val_evaluator_dot_accuracy@1": 0.4680306905370844, "eval_val_evaluator_dot_accuracy@10": 0.948849104859335, "eval_val_evaluator_dot_accuracy@5": 0.8951406649616368, "eval_val_evaluator_dot_map@100": 0.6501350929872222, "eval_val_evaluator_dot_mrr@10": 0.6475830187147313, "eval_val_evaluator_dot_mrr@100": 0.6501350929872212, "eval_val_evaluator_dot_mrr@5": 0.6400468883205445, "eval_val_evaluator_dot_ndcg@10": 0.7221983441504519, "eval_val_evaluator_dot_ndcg@100": 0.7332422183636672, "eval_val_evaluator_dot_ndcg@5": 0.7044596320615103, "eval_val_evaluator_dot_precision@1": 0.4680306905370844, "eval_val_evaluator_dot_precision@10": 0.09488491048593349, "eval_val_evaluator_dot_precision@5": 0.17902813299232737, "eval_val_evaluator_dot_recall@1": 0.4680306905370844, "eval_val_evaluator_dot_recall@10": 0.948849104859335, "eval_val_evaluator_dot_recall@5": 0.8951406649616368, "step": 1000 }, { "epoch": 6.818181818181818, "grad_norm": 1.7721449136734009, "learning_rate": 9.217171717171718e-06, "loss": 0.0485, "step": 1500 }, { "epoch": 6.818181818181818, "eval_loss": 0.06208512559533119, "eval_runtime": 16.1354, "eval_samples_per_second": 48.465, "eval_steps_per_second": 6.074, "eval_val_evaluator_cosine_accuracy@1": 0.4846547314578005, "eval_val_evaluator_cosine_accuracy@10": 0.9616368286445013, "eval_val_evaluator_cosine_accuracy@5": 0.921994884910486, "eval_val_evaluator_cosine_map@100": 0.6677839316557986, "eval_val_evaluator_cosine_mrr@10": 0.6659811025859614, "eval_val_evaluator_cosine_mrr@100": 0.6677839316557982, "eval_val_evaluator_cosine_mrr@5": 0.6605924978687119, "eval_val_evaluator_cosine_ndcg@10": 0.739448593920066, "eval_val_evaluator_cosine_ndcg@100": 0.7474718746893738, "eval_val_evaluator_cosine_ndcg@5": 0.7265352392347777, "eval_val_evaluator_cosine_precision@1": 0.4846547314578005, "eval_val_evaluator_cosine_precision@10": 0.09616368286445011, "eval_val_evaluator_cosine_precision@5": 0.18439897698209717, "eval_val_evaluator_cosine_recall@1": 0.4846547314578005, "eval_val_evaluator_cosine_recall@10": 0.9616368286445013, "eval_val_evaluator_cosine_recall@5": 0.921994884910486, "eval_val_evaluator_dot_accuracy@1": 0.4846547314578005, "eval_val_evaluator_dot_accuracy@10": 0.9616368286445013, "eval_val_evaluator_dot_accuracy@5": 0.921994884910486, "eval_val_evaluator_dot_map@100": 0.6677839316557986, "eval_val_evaluator_dot_mrr@10": 0.6659811025859614, "eval_val_evaluator_dot_mrr@100": 0.6677839316557982, "eval_val_evaluator_dot_mrr@5": 0.6605924978687119, "eval_val_evaluator_dot_ndcg@10": 0.739448593920066, "eval_val_evaluator_dot_ndcg@100": 0.7474718746893738, "eval_val_evaluator_dot_ndcg@5": 0.7265352392347777, "eval_val_evaluator_dot_precision@1": 0.4846547314578005, "eval_val_evaluator_dot_precision@10": 0.09616368286445011, "eval_val_evaluator_dot_precision@5": 0.18439897698209717, "eval_val_evaluator_dot_recall@1": 0.4846547314578005, "eval_val_evaluator_dot_recall@10": 0.9616368286445013, "eval_val_evaluator_dot_recall@5": 0.921994884910486, "step": 1500 }, { "epoch": 9.090909090909092, "grad_norm": 4.445181846618652, "learning_rate": 8.585858585858587e-06, "loss": 0.0361, "step": 2000 }, { "epoch": 9.090909090909092, "eval_loss": 0.06154213100671768, "eval_runtime": 16.1225, "eval_samples_per_second": 48.504, "eval_steps_per_second": 6.078, "eval_val_evaluator_cosine_accuracy@1": 0.4859335038363171, "eval_val_evaluator_cosine_accuracy@10": 0.9654731457800512, "eval_val_evaluator_cosine_accuracy@5": 0.9271099744245525, "eval_val_evaluator_cosine_map@100": 0.6706887408572961, "eval_val_evaluator_cosine_mrr@10": 0.6692648073722237, "eval_val_evaluator_cosine_mrr@100": 0.6706887408572959, "eval_val_evaluator_cosine_mrr@5": 0.6639812446717808, "eval_val_evaluator_cosine_ndcg@10": 0.7429450115476419, "eval_val_evaluator_cosine_ndcg@100": 0.749838617287201, "eval_val_evaluator_cosine_ndcg@5": 0.7303784552891699, "eval_val_evaluator_cosine_precision@1": 0.4859335038363171, "eval_val_evaluator_cosine_precision@10": 0.09654731457800511, "eval_val_evaluator_cosine_precision@5": 0.18542199488491048, "eval_val_evaluator_cosine_recall@1": 0.4859335038363171, "eval_val_evaluator_cosine_recall@10": 0.9654731457800512, "eval_val_evaluator_cosine_recall@5": 0.9271099744245525, "eval_val_evaluator_dot_accuracy@1": 0.4859335038363171, "eval_val_evaluator_dot_accuracy@10": 0.9654731457800512, "eval_val_evaluator_dot_accuracy@5": 0.9271099744245525, "eval_val_evaluator_dot_map@100": 0.6706887408572961, "eval_val_evaluator_dot_mrr@10": 0.6692648073722237, "eval_val_evaluator_dot_mrr@100": 0.6706887408572959, "eval_val_evaluator_dot_mrr@5": 0.6639812446717808, "eval_val_evaluator_dot_ndcg@10": 0.7429450115476419, "eval_val_evaluator_dot_ndcg@100": 0.749838617287201, "eval_val_evaluator_dot_ndcg@5": 0.7303784552891699, "eval_val_evaluator_dot_precision@1": 0.4859335038363171, "eval_val_evaluator_dot_precision@10": 0.09654731457800511, "eval_val_evaluator_dot_precision@5": 0.18542199488491048, "eval_val_evaluator_dot_recall@1": 0.4859335038363171, "eval_val_evaluator_dot_recall@10": 0.9654731457800512, "eval_val_evaluator_dot_recall@5": 0.9271099744245525, "step": 2000 }, { "epoch": 11.363636363636363, "grad_norm": 7.720225811004639, "learning_rate": 7.954545454545455e-06, "loss": 0.0301, "step": 2500 }, { "epoch": 11.363636363636363, "eval_loss": 0.06867850571870804, "eval_runtime": 16.1559, "eval_samples_per_second": 48.403, "eval_steps_per_second": 6.066, "eval_val_evaluator_cosine_accuracy@1": 0.49232736572890023, "eval_val_evaluator_cosine_accuracy@10": 0.9603580562659847, "eval_val_evaluator_cosine_accuracy@5": 0.928388746803069, "eval_val_evaluator_cosine_map@100": 0.6764517431036522, "eval_val_evaluator_cosine_mrr@10": 0.6745762797872769, "eval_val_evaluator_cosine_mrr@100": 0.6764517431036515, "eval_val_evaluator_cosine_mrr@5": 0.6698635976129574, "eval_val_evaluator_cosine_ndcg@10": 0.7459669546464727, "eval_val_evaluator_cosine_ndcg@100": 0.7541025758659824, "eval_val_evaluator_cosine_ndcg@5": 0.7351837372969686, "eval_val_evaluator_cosine_precision@1": 0.49232736572890023, "eval_val_evaluator_cosine_precision@10": 0.09603580562659846, "eval_val_evaluator_cosine_precision@5": 0.1856777493606138, "eval_val_evaluator_cosine_recall@1": 0.49232736572890023, "eval_val_evaluator_cosine_recall@10": 0.9603580562659847, "eval_val_evaluator_cosine_recall@5": 0.928388746803069, "eval_val_evaluator_dot_accuracy@1": 0.49232736572890023, "eval_val_evaluator_dot_accuracy@10": 0.9603580562659847, "eval_val_evaluator_dot_accuracy@5": 0.928388746803069, "eval_val_evaluator_dot_map@100": 0.6764517431036522, "eval_val_evaluator_dot_mrr@10": 0.6745762797872769, "eval_val_evaluator_dot_mrr@100": 0.6764517431036515, "eval_val_evaluator_dot_mrr@5": 0.6698635976129574, "eval_val_evaluator_dot_ndcg@10": 0.7459669546464727, "eval_val_evaluator_dot_ndcg@100": 0.7541025758659824, "eval_val_evaluator_dot_ndcg@5": 0.7351837372969686, "eval_val_evaluator_dot_precision@1": 0.49232736572890023, "eval_val_evaluator_dot_precision@10": 0.09603580562659846, "eval_val_evaluator_dot_precision@5": 0.1856777493606138, "eval_val_evaluator_dot_recall@1": 0.49232736572890023, "eval_val_evaluator_dot_recall@10": 0.9603580562659847, "eval_val_evaluator_dot_recall@5": 0.928388746803069, "step": 2500 }, { "epoch": 13.636363636363637, "grad_norm": 3.5265557765960693, "learning_rate": 7.323232323232324e-06, "loss": 0.0274, "step": 3000 }, { "epoch": 13.636363636363637, "eval_loss": 0.06610127538442612, "eval_runtime": 16.1447, "eval_samples_per_second": 48.437, "eval_steps_per_second": 6.07, "eval_val_evaluator_cosine_accuracy@1": 0.48721227621483376, "eval_val_evaluator_cosine_accuracy@10": 0.9603580562659847, "eval_val_evaluator_cosine_accuracy@5": 0.9245524296675192, "eval_val_evaluator_cosine_map@100": 0.6732921649074852, "eval_val_evaluator_cosine_mrr@10": 0.671490987699427, "eval_val_evaluator_cosine_mrr@100": 0.6732921649074848, "eval_val_evaluator_cosine_mrr@5": 0.6664748508098882, "eval_val_evaluator_cosine_ndcg@10": 0.7435664719550824, "eval_val_evaluator_cosine_ndcg@100": 0.7519530408382193, "eval_val_evaluator_cosine_ndcg@5": 0.7317557792940828, "eval_val_evaluator_cosine_precision@1": 0.48721227621483376, "eval_val_evaluator_cosine_precision@10": 0.09603580562659846, "eval_val_evaluator_cosine_precision@5": 0.18491048593350382, "eval_val_evaluator_cosine_recall@1": 0.48721227621483376, "eval_val_evaluator_cosine_recall@10": 0.9603580562659847, "eval_val_evaluator_cosine_recall@5": 0.9245524296675192, "eval_val_evaluator_dot_accuracy@1": 0.48721227621483376, "eval_val_evaluator_dot_accuracy@10": 0.9603580562659847, "eval_val_evaluator_dot_accuracy@5": 0.9245524296675192, "eval_val_evaluator_dot_map@100": 0.6732921649074852, "eval_val_evaluator_dot_mrr@10": 0.671490987699427, "eval_val_evaluator_dot_mrr@100": 0.6732921649074848, "eval_val_evaluator_dot_mrr@5": 0.6664748508098882, "eval_val_evaluator_dot_ndcg@10": 0.7435664719550824, "eval_val_evaluator_dot_ndcg@100": 0.7519530408382193, "eval_val_evaluator_dot_ndcg@5": 0.7317557792940828, "eval_val_evaluator_dot_precision@1": 0.48721227621483376, "eval_val_evaluator_dot_precision@10": 0.09603580562659846, "eval_val_evaluator_dot_precision@5": 0.18491048593350382, "eval_val_evaluator_dot_recall@1": 0.48721227621483376, "eval_val_evaluator_dot_recall@10": 0.9603580562659847, "eval_val_evaluator_dot_recall@5": 0.9245524296675192, "step": 3000 }, { "epoch": 15.909090909090908, "grad_norm": 2.537912130355835, "learning_rate": 6.691919191919193e-06, "loss": 0.0223, "step": 3500 }, { "epoch": 15.909090909090908, "eval_loss": 0.06057285517454147, "eval_runtime": 16.2937, "eval_samples_per_second": 47.994, "eval_steps_per_second": 6.015, "eval_val_evaluator_cosine_accuracy@1": 0.5051150895140665, "eval_val_evaluator_cosine_accuracy@10": 0.959079283887468, "eval_val_evaluator_cosine_accuracy@5": 0.928388746803069, "eval_val_evaluator_cosine_map@100": 0.6822101368749947, "eval_val_evaluator_cosine_mrr@10": 0.680220131530873, "eval_val_evaluator_cosine_mrr@100": 0.6822101368749943, "eval_val_evaluator_cosine_mrr@5": 0.6759164535379364, "eval_val_evaluator_cosine_ndcg@10": 0.749745056809794, "eval_val_evaluator_cosine_ndcg@100": 0.7586065473558236, "eval_val_evaluator_cosine_ndcg@5": 0.7396139266559898, "eval_val_evaluator_cosine_precision@1": 0.5051150895140665, "eval_val_evaluator_cosine_precision@10": 0.0959079283887468, "eval_val_evaluator_cosine_precision@5": 0.1856777493606138, "eval_val_evaluator_cosine_recall@1": 0.5051150895140665, "eval_val_evaluator_cosine_recall@10": 0.959079283887468, "eval_val_evaluator_cosine_recall@5": 0.928388746803069, "eval_val_evaluator_dot_accuracy@1": 0.5051150895140665, "eval_val_evaluator_dot_accuracy@10": 0.959079283887468, "eval_val_evaluator_dot_accuracy@5": 0.928388746803069, "eval_val_evaluator_dot_map@100": 0.6822101368749947, "eval_val_evaluator_dot_mrr@10": 0.680220131530873, "eval_val_evaluator_dot_mrr@100": 0.6822101368749943, "eval_val_evaluator_dot_mrr@5": 0.6759164535379364, "eval_val_evaluator_dot_ndcg@10": 0.749745056809794, "eval_val_evaluator_dot_ndcg@100": 0.7586065473558236, "eval_val_evaluator_dot_ndcg@5": 0.7396139266559898, "eval_val_evaluator_dot_precision@1": 0.5051150895140665, "eval_val_evaluator_dot_precision@10": 0.0959079283887468, "eval_val_evaluator_dot_precision@5": 0.1856777493606138, "eval_val_evaluator_dot_recall@1": 0.5051150895140665, "eval_val_evaluator_dot_recall@10": 0.959079283887468, "eval_val_evaluator_dot_recall@5": 0.928388746803069, "step": 3500 }, { "epoch": 18.181818181818183, "grad_norm": 6.390940189361572, "learning_rate": 6.060606060606061e-06, "loss": 0.021, "step": 4000 }, { "epoch": 18.181818181818183, "eval_loss": 0.05628308653831482, "eval_runtime": 16.1509, "eval_samples_per_second": 48.418, "eval_steps_per_second": 6.068, "eval_val_evaluator_cosine_accuracy@1": 0.5063938618925832, "eval_val_evaluator_cosine_accuracy@10": 0.9552429667519181, "eval_val_evaluator_cosine_accuracy@5": 0.9245524296675192, "eval_val_evaluator_cosine_map@100": 0.6833838272532422, "eval_val_evaluator_cosine_mrr@10": 0.6811883449031781, "eval_val_evaluator_cosine_mrr@100": 0.6833838272532414, "eval_val_evaluator_cosine_mrr@5": 0.6767689684569473, "eval_val_evaluator_cosine_ndcg@10": 0.7496397009089436, "eval_val_evaluator_cosine_ndcg@100": 0.7591798763800799, "eval_val_evaluator_cosine_ndcg@5": 0.7393934605063354, "eval_val_evaluator_cosine_precision@1": 0.5063938618925832, "eval_val_evaluator_cosine_precision@10": 0.0955242966751918, "eval_val_evaluator_cosine_precision@5": 0.1849104859335038, "eval_val_evaluator_cosine_recall@1": 0.5063938618925832, "eval_val_evaluator_cosine_recall@10": 0.9552429667519181, "eval_val_evaluator_cosine_recall@5": 0.9245524296675192, "eval_val_evaluator_dot_accuracy@1": 0.5063938618925832, "eval_val_evaluator_dot_accuracy@10": 0.9552429667519181, "eval_val_evaluator_dot_accuracy@5": 0.9245524296675192, "eval_val_evaluator_dot_map@100": 0.6833838272532422, "eval_val_evaluator_dot_mrr@10": 0.6811883449031781, "eval_val_evaluator_dot_mrr@100": 0.6833838272532414, "eval_val_evaluator_dot_mrr@5": 0.6767689684569473, "eval_val_evaluator_dot_ndcg@10": 0.7496397009089436, "eval_val_evaluator_dot_ndcg@100": 0.7591798763800799, "eval_val_evaluator_dot_ndcg@5": 0.7393934605063354, "eval_val_evaluator_dot_precision@1": 0.5063938618925832, "eval_val_evaluator_dot_precision@10": 0.0955242966751918, "eval_val_evaluator_dot_precision@5": 0.1849104859335038, "eval_val_evaluator_dot_recall@1": 0.5063938618925832, "eval_val_evaluator_dot_recall@10": 0.9552429667519181, "eval_val_evaluator_dot_recall@5": 0.9245524296675192, "step": 4000 }, { "epoch": 20.454545454545453, "grad_norm": 4.392763614654541, "learning_rate": 5.429292929292929e-06, "loss": 0.0203, "step": 4500 }, { "epoch": 20.454545454545453, "eval_loss": 0.057314660400152206, "eval_runtime": 16.1911, "eval_samples_per_second": 48.298, "eval_steps_per_second": 6.053, "eval_val_evaluator_cosine_accuracy@1": 0.4782608695652174, "eval_val_evaluator_cosine_accuracy@10": 0.9578005115089514, "eval_val_evaluator_cosine_accuracy@5": 0.928388746803069, "eval_val_evaluator_cosine_map@100": 0.6680531566193644, "eval_val_evaluator_cosine_mrr@10": 0.6661059351276735, "eval_val_evaluator_cosine_mrr@100": 0.6680531566193635, "eval_val_evaluator_cosine_mrr@5": 0.6618925831202036, "eval_val_evaluator_cosine_ndcg@10": 0.7390313134229293, "eval_val_evaluator_cosine_ndcg@100": 0.7478157002187452, "eval_val_evaluator_cosine_ndcg@5": 0.7292375411745403, "eval_val_evaluator_cosine_precision@1": 0.4782608695652174, "eval_val_evaluator_cosine_precision@10": 0.09578005115089515, "eval_val_evaluator_cosine_precision@5": 0.1856777493606138, "eval_val_evaluator_cosine_recall@1": 0.4782608695652174, "eval_val_evaluator_cosine_recall@10": 0.9578005115089514, "eval_val_evaluator_cosine_recall@5": 0.928388746803069, "eval_val_evaluator_dot_accuracy@1": 0.4782608695652174, "eval_val_evaluator_dot_accuracy@10": 0.9578005115089514, "eval_val_evaluator_dot_accuracy@5": 0.928388746803069, "eval_val_evaluator_dot_map@100": 0.6680531566193644, "eval_val_evaluator_dot_mrr@10": 0.6661059351276735, "eval_val_evaluator_dot_mrr@100": 0.6680531566193635, "eval_val_evaluator_dot_mrr@5": 0.6618925831202036, "eval_val_evaluator_dot_ndcg@10": 0.7390313134229293, "eval_val_evaluator_dot_ndcg@100": 0.7478157002187452, "eval_val_evaluator_dot_ndcg@5": 0.7292375411745403, "eval_val_evaluator_dot_precision@1": 0.4782608695652174, "eval_val_evaluator_dot_precision@10": 0.09578005115089515, "eval_val_evaluator_dot_precision@5": 0.1856777493606138, "eval_val_evaluator_dot_recall@1": 0.4782608695652174, "eval_val_evaluator_dot_recall@10": 0.9578005115089514, "eval_val_evaluator_dot_recall@5": 0.928388746803069, "step": 4500 }, { "epoch": 22.727272727272727, "grad_norm": 4.935870170593262, "learning_rate": 4.7979797979797985e-06, "loss": 0.0212, "step": 5000 }, { "epoch": 22.727272727272727, "eval_loss": 0.06370092183351517, "eval_runtime": 16.1455, "eval_samples_per_second": 48.435, "eval_steps_per_second": 6.07, "eval_val_evaluator_cosine_accuracy@1": 0.49872122762148335, "eval_val_evaluator_cosine_accuracy@10": 0.9514066496163683, "eval_val_evaluator_cosine_accuracy@5": 0.9258312020460358, "eval_val_evaluator_cosine_map@100": 0.6769910008307545, "eval_val_evaluator_cosine_mrr@10": 0.6745772946859899, "eval_val_evaluator_cosine_mrr@100": 0.6769910008307539, "eval_val_evaluator_cosine_mrr@5": 0.6707587382779191, "eval_val_evaluator_cosine_ndcg@10": 0.743803897516351, "eval_val_evaluator_cosine_ndcg@100": 0.7540671552876991, "eval_val_evaluator_cosine_ndcg@5": 0.7351253519366493, "eval_val_evaluator_cosine_precision@1": 0.49872122762148335, "eval_val_evaluator_cosine_precision@10": 0.0951406649616368, "eval_val_evaluator_cosine_precision@5": 0.18516624040920715, "eval_val_evaluator_cosine_recall@1": 0.49872122762148335, "eval_val_evaluator_cosine_recall@10": 0.9514066496163683, "eval_val_evaluator_cosine_recall@5": 0.9258312020460358, "eval_val_evaluator_dot_accuracy@1": 0.49872122762148335, "eval_val_evaluator_dot_accuracy@10": 0.9514066496163683, "eval_val_evaluator_dot_accuracy@5": 0.9258312020460358, "eval_val_evaluator_dot_map@100": 0.6769910008307545, "eval_val_evaluator_dot_mrr@10": 0.6745772946859899, "eval_val_evaluator_dot_mrr@100": 0.6769910008307539, "eval_val_evaluator_dot_mrr@5": 0.6707587382779191, "eval_val_evaluator_dot_ndcg@10": 0.743803897516351, "eval_val_evaluator_dot_ndcg@100": 0.7540671552876991, "eval_val_evaluator_dot_ndcg@5": 0.7351253519366493, "eval_val_evaluator_dot_precision@1": 0.49872122762148335, "eval_val_evaluator_dot_precision@10": 0.0951406649616368, "eval_val_evaluator_dot_precision@5": 0.18516624040920715, "eval_val_evaluator_dot_recall@1": 0.49872122762148335, "eval_val_evaluator_dot_recall@10": 0.9514066496163683, "eval_val_evaluator_dot_recall@5": 0.9258312020460358, "step": 5000 }, { "epoch": 25.0, "grad_norm": 8.184098243713379, "learning_rate": 4.166666666666667e-06, "loss": 0.018, "step": 5500 }, { "epoch": 25.0, "eval_loss": 0.057994794100522995, "eval_runtime": 16.1282, "eval_samples_per_second": 48.487, "eval_steps_per_second": 6.076, "eval_val_evaluator_cosine_accuracy@1": 0.49744245524296676, "eval_val_evaluator_cosine_accuracy@10": 0.9539641943734015, "eval_val_evaluator_cosine_accuracy@5": 0.9245524296675192, "eval_val_evaluator_cosine_map@100": 0.6780504881130015, "eval_val_evaluator_cosine_mrr@10": 0.6757890837494416, "eval_val_evaluator_cosine_mrr@100": 0.6780504881130013, "eval_val_evaluator_cosine_mrr@5": 0.6714407502131281, "eval_val_evaluator_cosine_ndcg@10": 0.7453518958660976, "eval_val_evaluator_cosine_ndcg@100": 0.7550072905243007, "eval_val_evaluator_cosine_ndcg@5": 0.7354200452387065, "eval_val_evaluator_cosine_precision@1": 0.49744245524296676, "eval_val_evaluator_cosine_precision@10": 0.09539641943734015, "eval_val_evaluator_cosine_precision@5": 0.18491048593350382, "eval_val_evaluator_cosine_recall@1": 0.49744245524296676, "eval_val_evaluator_cosine_recall@10": 0.9539641943734015, "eval_val_evaluator_cosine_recall@5": 0.9245524296675192, "eval_val_evaluator_dot_accuracy@1": 0.49744245524296676, "eval_val_evaluator_dot_accuracy@10": 0.9539641943734015, "eval_val_evaluator_dot_accuracy@5": 0.9245524296675192, "eval_val_evaluator_dot_map@100": 0.6780504881130015, "eval_val_evaluator_dot_mrr@10": 0.6757890837494416, "eval_val_evaluator_dot_mrr@100": 0.6780504881130013, "eval_val_evaluator_dot_mrr@5": 0.6714407502131281, "eval_val_evaluator_dot_ndcg@10": 0.7453518958660976, "eval_val_evaluator_dot_ndcg@100": 0.7550072905243007, "eval_val_evaluator_dot_ndcg@5": 0.7354200452387065, "eval_val_evaluator_dot_precision@1": 0.49744245524296676, "eval_val_evaluator_dot_precision@10": 0.09539641943734015, "eval_val_evaluator_dot_precision@5": 0.18491048593350382, "eval_val_evaluator_dot_recall@1": 0.49744245524296676, "eval_val_evaluator_dot_recall@10": 0.9539641943734015, "eval_val_evaluator_dot_recall@5": 0.9245524296675192, "step": 5500 }, { "epoch": 27.272727272727273, "grad_norm": 0.004099330864846706, "learning_rate": 3.5353535353535356e-06, "loss": 0.0166, "step": 6000 }, { "epoch": 27.272727272727273, "eval_loss": 0.056713227182626724, "eval_runtime": 16.1679, "eval_samples_per_second": 48.367, "eval_steps_per_second": 6.061, "eval_val_evaluator_cosine_accuracy@1": 0.4948849104859335, "eval_val_evaluator_cosine_accuracy@10": 0.9552429667519181, "eval_val_evaluator_cosine_accuracy@5": 0.9232736572890026, "eval_val_evaluator_cosine_map@100": 0.6780550124960388, "eval_val_evaluator_cosine_mrr@10": 0.675847440425445, "eval_val_evaluator_cosine_mrr@100": 0.6780550124960384, "eval_val_evaluator_cosine_mrr@5": 0.6710358056265976, "eval_val_evaluator_cosine_ndcg@10": 0.7457753760186268, "eval_val_evaluator_cosine_ndcg@100": 0.7551473066673557, "eval_val_evaluator_cosine_ndcg@5": 0.7348943591506862, "eval_val_evaluator_cosine_precision@1": 0.4948849104859335, "eval_val_evaluator_cosine_precision@10": 0.0955242966751918, "eval_val_evaluator_cosine_precision@5": 0.18465473145780048, "eval_val_evaluator_cosine_recall@1": 0.4948849104859335, "eval_val_evaluator_cosine_recall@10": 0.9552429667519181, "eval_val_evaluator_cosine_recall@5": 0.9232736572890026, "eval_val_evaluator_dot_accuracy@1": 0.4948849104859335, "eval_val_evaluator_dot_accuracy@10": 0.9552429667519181, "eval_val_evaluator_dot_accuracy@5": 0.9232736572890026, "eval_val_evaluator_dot_map@100": 0.6780550124960388, "eval_val_evaluator_dot_mrr@10": 0.675847440425445, "eval_val_evaluator_dot_mrr@100": 0.6780550124960384, "eval_val_evaluator_dot_mrr@5": 0.6710358056265976, "eval_val_evaluator_dot_ndcg@10": 0.7457753760186268, "eval_val_evaluator_dot_ndcg@100": 0.7551473066673557, "eval_val_evaluator_dot_ndcg@5": 0.7348943591506862, "eval_val_evaluator_dot_precision@1": 0.4948849104859335, "eval_val_evaluator_dot_precision@10": 0.0955242966751918, "eval_val_evaluator_dot_precision@5": 0.18465473145780048, "eval_val_evaluator_dot_recall@1": 0.4948849104859335, "eval_val_evaluator_dot_recall@10": 0.9552429667519181, "eval_val_evaluator_dot_recall@5": 0.9232736572890026, "step": 6000 }, { "epoch": 29.545454545454547, "grad_norm": 0.02236221544444561, "learning_rate": 2.904040404040404e-06, "loss": 0.0194, "step": 6500 }, { "epoch": 29.545454545454547, "eval_loss": 0.05420228838920593, "eval_runtime": 16.1682, "eval_samples_per_second": 48.366, "eval_steps_per_second": 6.061, "eval_val_evaluator_cosine_accuracy@1": 0.5038363171355499, "eval_val_evaluator_cosine_accuracy@10": 0.9565217391304348, "eval_val_evaluator_cosine_accuracy@5": 0.9271099744245525, "eval_val_evaluator_cosine_map@100": 0.6834975753216492, "eval_val_evaluator_cosine_mrr@10": 0.6813415946088577, "eval_val_evaluator_cosine_mrr@100": 0.6834975753216488, "eval_val_evaluator_cosine_mrr@5": 0.6769181585677743, "eval_val_evaluator_cosine_ndcg@10": 0.7501703504734094, "eval_val_evaluator_cosine_ndcg@100": 0.759281214123215, "eval_val_evaluator_cosine_ndcg@5": 0.7401646982131597, "eval_val_evaluator_cosine_precision@1": 0.5038363171355499, "eval_val_evaluator_cosine_precision@10": 0.09565217391304348, "eval_val_evaluator_cosine_precision@5": 0.18542199488491046, "eval_val_evaluator_cosine_recall@1": 0.5038363171355499, "eval_val_evaluator_cosine_recall@10": 0.9565217391304348, "eval_val_evaluator_cosine_recall@5": 0.9271099744245525, "eval_val_evaluator_dot_accuracy@1": 0.5038363171355499, "eval_val_evaluator_dot_accuracy@10": 0.9565217391304348, "eval_val_evaluator_dot_accuracy@5": 0.9271099744245525, "eval_val_evaluator_dot_map@100": 0.6834975753216492, "eval_val_evaluator_dot_mrr@10": 0.6813415946088577, "eval_val_evaluator_dot_mrr@100": 0.6834975753216488, "eval_val_evaluator_dot_mrr@5": 0.6769181585677743, "eval_val_evaluator_dot_ndcg@10": 0.7501703504734094, "eval_val_evaluator_dot_ndcg@100": 0.759281214123215, "eval_val_evaluator_dot_ndcg@5": 0.7401646982131597, "eval_val_evaluator_dot_precision@1": 0.5038363171355499, "eval_val_evaluator_dot_precision@10": 0.09565217391304348, "eval_val_evaluator_dot_precision@5": 0.18542199488491046, "eval_val_evaluator_dot_recall@1": 0.5038363171355499, "eval_val_evaluator_dot_recall@10": 0.9565217391304348, "eval_val_evaluator_dot_recall@5": 0.9271099744245525, "step": 6500 }, { "epoch": 31.818181818181817, "grad_norm": 0.03244137763977051, "learning_rate": 2.2727272727272728e-06, "loss": 0.0182, "step": 7000 }, { "epoch": 31.818181818181817, "eval_loss": 0.05468086898326874, "eval_runtime": 23.5877, "eval_samples_per_second": 33.153, "eval_steps_per_second": 4.155, "eval_val_evaluator_cosine_accuracy@1": 0.5140664961636828, "eval_val_evaluator_cosine_accuracy@10": 0.9578005115089514, "eval_val_evaluator_cosine_accuracy@5": 0.9296675191815856, "eval_val_evaluator_cosine_map@100": 0.6896503713996865, "eval_val_evaluator_cosine_mrr@10": 0.6875806844476918, "eval_val_evaluator_cosine_mrr@100": 0.6896503713996863, "eval_val_evaluator_cosine_mrr@5": 0.6833333333333328, "eval_val_evaluator_cosine_ndcg@10": 0.7551953164170838, "eval_val_evaluator_cosine_ndcg@100": 0.7640059344945531, "eval_val_evaluator_cosine_ndcg@5": 0.7456070216519929, "eval_val_evaluator_cosine_precision@1": 0.5140664961636828, "eval_val_evaluator_cosine_precision@10": 0.09578005115089515, "eval_val_evaluator_cosine_precision@5": 0.1859335038363171, "eval_val_evaluator_cosine_recall@1": 0.5140664961636828, "eval_val_evaluator_cosine_recall@10": 0.9578005115089514, "eval_val_evaluator_cosine_recall@5": 0.9296675191815856, "eval_val_evaluator_dot_accuracy@1": 0.5140664961636828, "eval_val_evaluator_dot_accuracy@10": 0.9578005115089514, "eval_val_evaluator_dot_accuracy@5": 0.9296675191815856, "eval_val_evaluator_dot_map@100": 0.6896503713996865, "eval_val_evaluator_dot_mrr@10": 0.6875806844476918, "eval_val_evaluator_dot_mrr@100": 0.6896503713996863, "eval_val_evaluator_dot_mrr@5": 0.6833333333333328, "eval_val_evaluator_dot_ndcg@10": 0.7551953164170838, "eval_val_evaluator_dot_ndcg@100": 0.7640059344945531, "eval_val_evaluator_dot_ndcg@5": 0.7456070216519929, "eval_val_evaluator_dot_precision@1": 0.5140664961636828, "eval_val_evaluator_dot_precision@10": 0.09578005115089515, "eval_val_evaluator_dot_precision@5": 0.1859335038363171, "eval_val_evaluator_dot_recall@1": 0.5140664961636828, "eval_val_evaluator_dot_recall@10": 0.9578005115089514, "eval_val_evaluator_dot_recall@5": 0.9296675191815856, "step": 7000 }, { "epoch": 34.09090909090909, "grad_norm": 0.001623653806746006, "learning_rate": 1.6414141414141415e-06, "loss": 0.0157, "step": 7500 }, { "epoch": 34.09090909090909, "eval_loss": 0.05486880987882614, "eval_runtime": 16.1005, "eval_samples_per_second": 48.57, "eval_steps_per_second": 6.087, "eval_val_evaluator_cosine_accuracy@1": 0.5140664961636828, "eval_val_evaluator_cosine_accuracy@10": 0.9565217391304348, "eval_val_evaluator_cosine_accuracy@5": 0.9309462915601023, "eval_val_evaluator_cosine_map@100": 0.6898639676357106, "eval_val_evaluator_cosine_mrr@10": 0.6877618438679816, "eval_val_evaluator_cosine_mrr@100": 0.6898639676357101, "eval_val_evaluator_cosine_mrr@5": 0.6839727195225911, "eval_val_evaluator_cosine_ndcg@10": 0.7550036263650965, "eval_val_evaluator_cosine_ndcg@100": 0.764061133822081, "eval_val_evaluator_cosine_ndcg@5": 0.7463601070791411, "eval_val_evaluator_cosine_precision@1": 0.5140664961636828, "eval_val_evaluator_cosine_precision@10": 0.09565217391304345, "eval_val_evaluator_cosine_precision@5": 0.18618925831202046, "eval_val_evaluator_cosine_recall@1": 0.5140664961636828, "eval_val_evaluator_cosine_recall@10": 0.9565217391304348, "eval_val_evaluator_cosine_recall@5": 0.9309462915601023, "eval_val_evaluator_dot_accuracy@1": 0.5140664961636828, "eval_val_evaluator_dot_accuracy@10": 0.9565217391304348, "eval_val_evaluator_dot_accuracy@5": 0.9309462915601023, "eval_val_evaluator_dot_map@100": 0.6898639676357106, "eval_val_evaluator_dot_mrr@10": 0.6877618438679816, "eval_val_evaluator_dot_mrr@100": 0.6898639676357101, "eval_val_evaluator_dot_mrr@5": 0.6839727195225911, "eval_val_evaluator_dot_ndcg@10": 0.7550036263650965, "eval_val_evaluator_dot_ndcg@100": 0.764061133822081, "eval_val_evaluator_dot_ndcg@5": 0.7463601070791411, "eval_val_evaluator_dot_precision@1": 0.5140664961636828, "eval_val_evaluator_dot_precision@10": 0.09565217391304345, "eval_val_evaluator_dot_precision@5": 0.18618925831202046, "eval_val_evaluator_dot_recall@1": 0.5140664961636828, "eval_val_evaluator_dot_recall@10": 0.9565217391304348, "eval_val_evaluator_dot_recall@5": 0.9309462915601023, "step": 7500 }, { "epoch": 36.36363636363637, "grad_norm": 0.06726043671369553, "learning_rate": 1.01010101010101e-06, "loss": 0.016, "step": 8000 }, { "epoch": 36.36363636363637, "eval_loss": 0.05301735922694206, "eval_runtime": 16.2548, "eval_samples_per_second": 48.109, "eval_steps_per_second": 6.029, "eval_val_evaluator_cosine_accuracy@1": 0.5089514066496164, "eval_val_evaluator_cosine_accuracy@10": 0.9578005115089514, "eval_val_evaluator_cosine_accuracy@5": 0.9347826086956522, "eval_val_evaluator_cosine_map@100": 0.6859605033261141, "eval_val_evaluator_cosine_mrr@10": 0.6839808387122961, "eval_val_evaluator_cosine_mrr@100": 0.6859605033261138, "eval_val_evaluator_cosine_mrr@5": 0.6806052855924973, "eval_val_evaluator_cosine_ndcg@10": 0.752452574251246, "eval_val_evaluator_cosine_ndcg@100": 0.7611500008339412, "eval_val_evaluator_cosine_ndcg@5": 0.7447092792639648, "eval_val_evaluator_cosine_precision@1": 0.5089514066496164, "eval_val_evaluator_cosine_precision@10": 0.09578005115089515, "eval_val_evaluator_cosine_precision@5": 0.1869565217391304, "eval_val_evaluator_cosine_recall@1": 0.5089514066496164, "eval_val_evaluator_cosine_recall@10": 0.9578005115089514, "eval_val_evaluator_cosine_recall@5": 0.9347826086956522, "eval_val_evaluator_dot_accuracy@1": 0.5089514066496164, "eval_val_evaluator_dot_accuracy@10": 0.9578005115089514, "eval_val_evaluator_dot_accuracy@5": 0.9347826086956522, "eval_val_evaluator_dot_map@100": 0.6859605033261141, "eval_val_evaluator_dot_mrr@10": 0.6839808387122961, "eval_val_evaluator_dot_mrr@100": 0.6859605033261138, "eval_val_evaluator_dot_mrr@5": 0.6806052855924973, "eval_val_evaluator_dot_ndcg@10": 0.752452574251246, "eval_val_evaluator_dot_ndcg@100": 0.7611500008339412, "eval_val_evaluator_dot_ndcg@5": 0.7447092792639648, "eval_val_evaluator_dot_precision@1": 0.5089514066496164, "eval_val_evaluator_dot_precision@10": 0.09578005115089515, "eval_val_evaluator_dot_precision@5": 0.1869565217391304, "eval_val_evaluator_dot_recall@1": 0.5089514066496164, "eval_val_evaluator_dot_recall@10": 0.9578005115089514, "eval_val_evaluator_dot_recall@5": 0.9347826086956522, "step": 8000 } ], "logging_steps": 500, "max_steps": 8800, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }