{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2972972972972974, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0001, "loss": 0.7874, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.0001, "loss": 0.7241, "step": 20 }, { "epoch": 0.06, "learning_rate": 0.0001, "loss": 0.6801, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.0001, "loss": 0.7074, "step": 40 }, { "epoch": 0.11, "learning_rate": 0.0001, "loss": 1.2215, "step": 50 }, { "epoch": 0.13, "learning_rate": 0.0001, "loss": 0.6756, "step": 60 }, { "epoch": 0.15, "learning_rate": 0.0001, "loss": 0.6333, "step": 70 }, { "epoch": 0.17, "learning_rate": 0.0001, "loss": 0.6226, "step": 80 }, { "epoch": 0.19, "learning_rate": 0.0001, "loss": 0.6498, "step": 90 }, { "epoch": 0.22, "learning_rate": 0.0001, "loss": 1.0335, "step": 100 }, { "epoch": 0.24, "learning_rate": 0.0001, "loss": 0.6274, "step": 110 }, { "epoch": 0.26, "learning_rate": 0.0001, "loss": 0.5948, "step": 120 }, { "epoch": 0.28, "learning_rate": 0.0001, "loss": 0.617, "step": 130 }, { "epoch": 0.3, "learning_rate": 0.0001, "loss": 0.6596, "step": 140 }, { "epoch": 0.32, "learning_rate": 0.0001, "loss": 1.1614, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.0001, "loss": 0.6195, "step": 160 }, { "epoch": 0.37, "learning_rate": 0.0001, "loss": 0.5798, "step": 170 }, { "epoch": 0.39, "learning_rate": 0.0001, "loss": 0.5892, "step": 180 }, { "epoch": 0.4, "eval_loss": 0.6906093955039978, "eval_runtime": 1140.1455, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.877, "step": 187 }, { "epoch": 0.4, "mmlu_eval_accuracy": 0.6468424709524904, "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, "mmlu_eval_accuracy_anatomy": 0.5, "mmlu_eval_accuracy_astronomy": 0.8125, "mmlu_eval_accuracy_business_ethics": 0.8181818181818182, "mmlu_eval_accuracy_clinical_knowledge": 0.6551724137931034, "mmlu_eval_accuracy_college_biology": 0.6875, "mmlu_eval_accuracy_college_chemistry": 0.25, "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, "mmlu_eval_accuracy_college_medicine": 0.6818181818181818, "mmlu_eval_accuracy_college_physics": 0.36363636363636365, "mmlu_eval_accuracy_computer_security": 0.6363636363636364, "mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, "mmlu_eval_accuracy_econometrics": 0.5, "mmlu_eval_accuracy_electrical_engineering": 0.6875, "mmlu_eval_accuracy_elementary_mathematics": 0.5121951219512195, "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, "mmlu_eval_accuracy_global_facts": 0.5, "mmlu_eval_accuracy_high_school_biology": 0.8125, "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, "mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, "mmlu_eval_accuracy_high_school_european_history": 0.8333333333333334, "mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, "mmlu_eval_accuracy_high_school_government_and_politics": 0.8571428571428571, "mmlu_eval_accuracy_high_school_macroeconomics": 0.6976744186046512, "mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, "mmlu_eval_accuracy_high_school_microeconomics": 0.8076923076923077, "mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, "mmlu_eval_accuracy_high_school_psychology": 0.9166666666666666, "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, "mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, "mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, "mmlu_eval_accuracy_human_aging": 0.782608695652174, "mmlu_eval_accuracy_human_sexuality": 0.5833333333333334, "mmlu_eval_accuracy_international_law": 0.9230769230769231, "mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, "mmlu_eval_accuracy_logical_fallacies": 0.6666666666666666, "mmlu_eval_accuracy_machine_learning": 0.2727272727272727, "mmlu_eval_accuracy_management": 0.9090909090909091, "mmlu_eval_accuracy_marketing": 0.88, "mmlu_eval_accuracy_medical_genetics": 1.0, "mmlu_eval_accuracy_miscellaneous": 0.8023255813953488, "mmlu_eval_accuracy_moral_disputes": 0.6842105263157895, "mmlu_eval_accuracy_moral_scenarios": 0.47, "mmlu_eval_accuracy_nutrition": 0.7272727272727273, "mmlu_eval_accuracy_philosophy": 0.7647058823529411, "mmlu_eval_accuracy_prehistory": 0.6285714285714286, "mmlu_eval_accuracy_professional_accounting": 0.6129032258064516, "mmlu_eval_accuracy_professional_law": 0.5470588235294118, "mmlu_eval_accuracy_professional_medicine": 0.6451612903225806, "mmlu_eval_accuracy_professional_psychology": 0.7101449275362319, "mmlu_eval_accuracy_public_relations": 0.5, "mmlu_eval_accuracy_security_studies": 0.7037037037037037, "mmlu_eval_accuracy_sociology": 0.9545454545454546, "mmlu_eval_accuracy_us_foreign_policy": 1.0, "mmlu_eval_accuracy_virology": 0.5555555555555556, "mmlu_eval_accuracy_world_religions": 0.8947368421052632, "mmlu_loss": 1.1459644647311495, "step": 187 }, { "epoch": 0.41, "learning_rate": 0.0001, "loss": 0.63, "step": 190 }, { "epoch": 0.43, "learning_rate": 0.0001, "loss": 1.0713, "step": 200 }, { "epoch": 0.45, "learning_rate": 0.0001, "loss": 0.5899, "step": 210 }, { "epoch": 0.48, "learning_rate": 0.0001, "loss": 0.5853, "step": 220 }, { "epoch": 0.5, "learning_rate": 0.0001, "loss": 0.5808, "step": 230 }, { "epoch": 0.52, "learning_rate": 0.0001, "loss": 0.6217, "step": 240 }, { "epoch": 0.54, "learning_rate": 0.0001, "loss": 0.9323, "step": 250 }, { "epoch": 0.56, "learning_rate": 0.0001, "loss": 0.5872, "step": 260 }, { "epoch": 0.58, "learning_rate": 0.0001, "loss": 0.547, "step": 270 }, { "epoch": 0.61, "learning_rate": 0.0001, "loss": 0.5627, "step": 280 }, { "epoch": 0.63, "learning_rate": 0.0001, "loss": 0.6126, "step": 290 }, { "epoch": 0.65, "learning_rate": 0.0001, "loss": 0.9871, "step": 300 }, { "epoch": 0.67, "learning_rate": 0.0001, "loss": 0.568, "step": 310 }, { "epoch": 0.69, "learning_rate": 0.0001, "loss": 0.561, "step": 320 }, { "epoch": 0.71, "learning_rate": 0.0001, "loss": 0.5675, "step": 330 }, { "epoch": 0.74, "learning_rate": 0.0001, "loss": 0.5864, "step": 340 }, { "epoch": 0.76, "learning_rate": 0.0001, "loss": 1.0407, "step": 350 }, { "epoch": 0.78, "learning_rate": 0.0001, "loss": 0.5774, "step": 360 }, { "epoch": 0.8, "learning_rate": 0.0001, "loss": 0.5696, "step": 370 }, { "epoch": 0.81, "eval_loss": 0.6527115702629089, "eval_runtime": 1139.7605, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.877, "step": 374 }, { "epoch": 0.81, "mmlu_eval_accuracy": 0.651057156408483, "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, "mmlu_eval_accuracy_anatomy": 0.5, "mmlu_eval_accuracy_astronomy": 0.8125, "mmlu_eval_accuracy_business_ethics": 0.8181818181818182, "mmlu_eval_accuracy_clinical_knowledge": 0.6551724137931034, "mmlu_eval_accuracy_college_biology": 0.625, "mmlu_eval_accuracy_college_chemistry": 0.25, "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, "mmlu_eval_accuracy_college_mathematics": 0.45454545454545453, "mmlu_eval_accuracy_college_medicine": 0.6818181818181818, "mmlu_eval_accuracy_college_physics": 0.45454545454545453, "mmlu_eval_accuracy_computer_security": 0.6363636363636364, "mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, "mmlu_eval_accuracy_econometrics": 0.5, "mmlu_eval_accuracy_electrical_engineering": 0.6875, "mmlu_eval_accuracy_elementary_mathematics": 0.5121951219512195, "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, "mmlu_eval_accuracy_global_facts": 0.5, "mmlu_eval_accuracy_high_school_biology": 0.78125, "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, "mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, "mmlu_eval_accuracy_high_school_european_history": 0.8333333333333334, "mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, "mmlu_eval_accuracy_high_school_government_and_politics": 0.8571428571428571, "mmlu_eval_accuracy_high_school_macroeconomics": 0.6976744186046512, "mmlu_eval_accuracy_high_school_mathematics": 0.4482758620689655, "mmlu_eval_accuracy_high_school_microeconomics": 0.8076923076923077, "mmlu_eval_accuracy_high_school_physics": 0.11764705882352941, "mmlu_eval_accuracy_high_school_psychology": 0.95, "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, "mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, "mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, "mmlu_eval_accuracy_human_aging": 0.782608695652174, "mmlu_eval_accuracy_human_sexuality": 0.5833333333333334, "mmlu_eval_accuracy_international_law": 0.9230769230769231, "mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, "mmlu_eval_accuracy_logical_fallacies": 0.7222222222222222, "mmlu_eval_accuracy_machine_learning": 0.36363636363636365, "mmlu_eval_accuracy_management": 0.9090909090909091, "mmlu_eval_accuracy_marketing": 0.88, "mmlu_eval_accuracy_medical_genetics": 1.0, "mmlu_eval_accuracy_miscellaneous": 0.8023255813953488, "mmlu_eval_accuracy_moral_disputes": 0.6842105263157895, "mmlu_eval_accuracy_moral_scenarios": 0.49, "mmlu_eval_accuracy_nutrition": 0.696969696969697, "mmlu_eval_accuracy_philosophy": 0.7647058823529411, "mmlu_eval_accuracy_prehistory": 0.6, "mmlu_eval_accuracy_professional_accounting": 0.6451612903225806, "mmlu_eval_accuracy_professional_law": 0.5529411764705883, "mmlu_eval_accuracy_professional_medicine": 0.6451612903225806, "mmlu_eval_accuracy_professional_psychology": 0.7101449275362319, "mmlu_eval_accuracy_public_relations": 0.5, "mmlu_eval_accuracy_security_studies": 0.6666666666666666, "mmlu_eval_accuracy_sociology": 0.9545454545454546, "mmlu_eval_accuracy_us_foreign_policy": 1.0, "mmlu_eval_accuracy_virology": 0.5555555555555556, "mmlu_eval_accuracy_world_religions": 0.8947368421052632, "mmlu_loss": 1.1366865825197574, "step": 374 }, { "epoch": 0.82, "learning_rate": 0.0001, "loss": 0.5626, "step": 380 }, { "epoch": 0.84, "learning_rate": 0.0001, "loss": 0.597, "step": 390 }, { "epoch": 0.86, "learning_rate": 0.0001, "loss": 0.9613, "step": 400 }, { "epoch": 0.89, "learning_rate": 0.0001, "loss": 0.5486, "step": 410 }, { "epoch": 0.91, "learning_rate": 0.0001, "loss": 0.5496, "step": 420 }, { "epoch": 0.93, "learning_rate": 0.0001, "loss": 0.5468, "step": 430 }, { "epoch": 0.95, "learning_rate": 0.0001, "loss": 0.5914, "step": 440 }, { "epoch": 0.97, "learning_rate": 0.0001, "loss": 0.9408, "step": 450 }, { "epoch": 0.99, "learning_rate": 0.0001, "loss": 0.5851, "step": 460 }, { "epoch": 1.02, "learning_rate": 0.0001, "loss": 0.6053, "step": 470 }, { "epoch": 1.04, "learning_rate": 0.0001, "loss": 0.4955, "step": 480 }, { "epoch": 1.06, "learning_rate": 0.0001, "loss": 0.5032, "step": 490 }, { "epoch": 1.08, "learning_rate": 0.0001, "loss": 0.5175, "step": 500 }, { "epoch": 1.1, "learning_rate": 0.0001, "loss": 0.6447, "step": 510 }, { "epoch": 1.12, "learning_rate": 0.0001, "loss": 0.6054, "step": 520 }, { "epoch": 1.15, "learning_rate": 0.0001, "loss": 0.5034, "step": 530 }, { "epoch": 1.17, "learning_rate": 0.0001, "loss": 0.5105, "step": 540 }, { "epoch": 1.19, "learning_rate": 0.0001, "loss": 0.5272, "step": 550 }, { "epoch": 1.21, "learning_rate": 0.0001, "loss": 0.6821, "step": 560 }, { "epoch": 1.21, "eval_loss": 0.6524714827537537, "eval_runtime": 1139.5177, "eval_samples_per_second": 0.878, "eval_steps_per_second": 0.878, "step": 561 }, { "epoch": 1.21, "mmlu_eval_accuracy": 0.657632739983987, "mmlu_eval_accuracy_abstract_algebra": 0.45454545454545453, "mmlu_eval_accuracy_anatomy": 0.5, "mmlu_eval_accuracy_astronomy": 0.8125, "mmlu_eval_accuracy_business_ethics": 0.8181818181818182, "mmlu_eval_accuracy_clinical_knowledge": 0.6551724137931034, "mmlu_eval_accuracy_college_biology": 0.6875, "mmlu_eval_accuracy_college_chemistry": 0.25, "mmlu_eval_accuracy_college_computer_science": 0.5454545454545454, "mmlu_eval_accuracy_college_mathematics": 0.45454545454545453, "mmlu_eval_accuracy_college_medicine": 0.6363636363636364, "mmlu_eval_accuracy_college_physics": 0.45454545454545453, "mmlu_eval_accuracy_computer_security": 0.7272727272727273, "mmlu_eval_accuracy_conceptual_physics": 0.6153846153846154, "mmlu_eval_accuracy_econometrics": 0.5, "mmlu_eval_accuracy_electrical_engineering": 0.6875, "mmlu_eval_accuracy_elementary_mathematics": 0.4878048780487805, "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, "mmlu_eval_accuracy_global_facts": 0.5, "mmlu_eval_accuracy_high_school_biology": 0.8125, "mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, "mmlu_eval_accuracy_high_school_computer_science": 0.7777777777777778, "mmlu_eval_accuracy_high_school_european_history": 0.8333333333333334, "mmlu_eval_accuracy_high_school_geography": 0.9090909090909091, "mmlu_eval_accuracy_high_school_government_and_politics": 0.9047619047619048, "mmlu_eval_accuracy_high_school_macroeconomics": 0.6976744186046512, "mmlu_eval_accuracy_high_school_mathematics": 0.3793103448275862, "mmlu_eval_accuracy_high_school_microeconomics": 0.8076923076923077, "mmlu_eval_accuracy_high_school_physics": 0.11764705882352941, "mmlu_eval_accuracy_high_school_psychology": 0.9333333333333333, "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, "mmlu_eval_accuracy_high_school_us_history": 0.9090909090909091, "mmlu_eval_accuracy_high_school_world_history": 0.7692307692307693, "mmlu_eval_accuracy_human_aging": 0.782608695652174, "mmlu_eval_accuracy_human_sexuality": 0.5833333333333334, "mmlu_eval_accuracy_international_law": 0.9230769230769231, "mmlu_eval_accuracy_jurisprudence": 0.5454545454545454, "mmlu_eval_accuracy_logical_fallacies": 0.6666666666666666, "mmlu_eval_accuracy_machine_learning": 0.45454545454545453, "mmlu_eval_accuracy_management": 0.9090909090909091, "mmlu_eval_accuracy_marketing": 0.88, "mmlu_eval_accuracy_medical_genetics": 1.0, "mmlu_eval_accuracy_miscellaneous": 0.7906976744186046, "mmlu_eval_accuracy_moral_disputes": 0.6842105263157895, "mmlu_eval_accuracy_moral_scenarios": 0.5, "mmlu_eval_accuracy_nutrition": 0.696969696969697, "mmlu_eval_accuracy_philosophy": 0.7647058823529411, "mmlu_eval_accuracy_prehistory": 0.6285714285714286, "mmlu_eval_accuracy_professional_accounting": 0.6451612903225806, "mmlu_eval_accuracy_professional_law": 0.5411764705882353, "mmlu_eval_accuracy_professional_medicine": 0.6451612903225806, "mmlu_eval_accuracy_professional_psychology": 0.7101449275362319, "mmlu_eval_accuracy_public_relations": 0.5, "mmlu_eval_accuracy_security_studies": 0.6666666666666666, "mmlu_eval_accuracy_sociology": 0.9545454545454546, "mmlu_eval_accuracy_us_foreign_policy": 0.9090909090909091, "mmlu_eval_accuracy_virology": 0.5555555555555556, "mmlu_eval_accuracy_world_religions": 0.8947368421052632, "mmlu_loss": 1.0653009439443233, "step": 561 }, { "epoch": 1.23, "learning_rate": 0.0001, "loss": 0.6131, "step": 570 }, { "epoch": 1.25, "learning_rate": 0.0001, "loss": 0.5156, "step": 580 }, { "epoch": 1.28, "learning_rate": 0.0001, "loss": 0.5091, "step": 590 }, { "epoch": 1.3, "learning_rate": 0.0001, "loss": 0.5251, "step": 600 } ], "max_steps": 1875, "num_train_epochs": 5, "total_flos": 8.191898542915584e+17, "trial_name": null, "trial_params": null }