{ "best_metric": 0.31598106026649475, "best_model_checkpoint": "./opt_trained/checkpoint-43", "epoch": 1.0, "eval_steps": 500, "global_step": 43, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023391812865497075, "grad_norm": 1.4262398481369019, "learning_rate": 9.995238095238095e-05, "loss": 2.4722, "step": 1 }, { "epoch": 0.04678362573099415, "grad_norm": 12.206507682800293, "learning_rate": 9.990476190476191e-05, "loss": 3.176, "step": 2 }, { "epoch": 0.07017543859649122, "grad_norm": 11.578840255737305, "learning_rate": 9.985714285714287e-05, "loss": 2.8396, "step": 3 }, { "epoch": 0.0935672514619883, "grad_norm": 9.648161888122559, "learning_rate": 9.980952380952382e-05, "loss": 2.7336, "step": 4 }, { "epoch": 0.11695906432748537, "grad_norm": 3.8782691955566406, "learning_rate": 9.976190476190477e-05, "loss": 2.6955, "step": 5 }, { "epoch": 0.14035087719298245, "grad_norm": 12.644487380981445, "learning_rate": 9.971428571428571e-05, "loss": 2.6405, "step": 6 }, { "epoch": 0.16374269005847952, "grad_norm": 11.057122230529785, "learning_rate": 9.966666666666667e-05, "loss": 2.7113, "step": 7 }, { "epoch": 0.1871345029239766, "grad_norm": 4.860190391540527, "learning_rate": 9.961904761904762e-05, "loss": 2.7076, "step": 8 }, { "epoch": 0.21052631578947367, "grad_norm": 4.317215442657471, "learning_rate": 9.957142857142858e-05, "loss": 2.6377, "step": 9 }, { "epoch": 0.23391812865497075, "grad_norm": 3.3068416118621826, "learning_rate": 9.952380952380953e-05, "loss": 2.5995, "step": 10 }, { "epoch": 0.2573099415204678, "grad_norm": 1.2752724885940552, "learning_rate": 9.947619047619048e-05, "loss": 2.6285, "step": 11 }, { "epoch": 0.2807017543859649, "grad_norm": 8.849737167358398, "learning_rate": 9.942857142857144e-05, "loss": 2.6217, "step": 12 }, { "epoch": 0.30409356725146197, "grad_norm": 5.594025611877441, "learning_rate": 9.938095238095238e-05, "loss": 2.6265, "step": 13 }, { "epoch": 0.32748538011695905, "grad_norm": 3.581617593765259, "learning_rate": 9.933333333333334e-05, "loss": 2.5984, "step": 14 }, { "epoch": 0.3508771929824561, "grad_norm": 5.33600378036499, "learning_rate": 9.92857142857143e-05, "loss": 2.6071, "step": 15 }, { "epoch": 0.3742690058479532, "grad_norm": 4.4274983406066895, "learning_rate": 9.923809523809524e-05, "loss": 2.5908, "step": 16 }, { "epoch": 0.39766081871345027, "grad_norm": 4.5507307052612305, "learning_rate": 9.91904761904762e-05, "loss": 2.5616, "step": 17 }, { "epoch": 0.42105263157894735, "grad_norm": 3.189161777496338, "learning_rate": 9.914285714285715e-05, "loss": 2.5784, "step": 18 }, { "epoch": 0.4444444444444444, "grad_norm": 2.706615924835205, "learning_rate": 9.909523809523809e-05, "loss": 2.5413, "step": 19 }, { "epoch": 0.4678362573099415, "grad_norm": 2.146662712097168, "learning_rate": 9.904761904761905e-05, "loss": 2.6019, "step": 20 }, { "epoch": 0.49122807017543857, "grad_norm": 3.2252964973449707, "learning_rate": 9.900000000000001e-05, "loss": 2.5914, "step": 21 }, { "epoch": 0.5146198830409356, "grad_norm": 2.8859879970550537, "learning_rate": 9.895238095238095e-05, "loss": 2.6133, "step": 22 }, { "epoch": 0.5380116959064327, "grad_norm": 2.5647897720336914, "learning_rate": 9.890476190476191e-05, "loss": 2.5425, "step": 23 }, { "epoch": 0.5614035087719298, "grad_norm": 3.0347073078155518, "learning_rate": 9.885714285714286e-05, "loss": 2.5732, "step": 24 }, { "epoch": 0.5847953216374269, "grad_norm": 1.8412858247756958, "learning_rate": 9.880952380952381e-05, "loss": 2.5776, "step": 25 }, { "epoch": 0.6081871345029239, "grad_norm": 3.1820366382598877, "learning_rate": 9.876190476190477e-05, "loss": 2.5566, "step": 26 }, { "epoch": 0.631578947368421, "grad_norm": 2.4613687992095947, "learning_rate": 9.871428571428572e-05, "loss": 2.5113, "step": 27 }, { "epoch": 0.6549707602339181, "grad_norm": 5.942374229431152, "learning_rate": 9.866666666666668e-05, "loss": 2.575, "step": 28 }, { "epoch": 0.6783625730994152, "grad_norm": 7.427689075469971, "learning_rate": 9.861904761904762e-05, "loss": 2.5692, "step": 29 }, { "epoch": 0.7017543859649122, "grad_norm": 3.163085699081421, "learning_rate": 9.857142857142858e-05, "loss": 2.5192, "step": 30 }, { "epoch": 0.7251461988304093, "grad_norm": 3.421778678894043, "learning_rate": 9.852380952380952e-05, "loss": 2.5533, "step": 31 }, { "epoch": 0.7485380116959064, "grad_norm": 2.486320734024048, "learning_rate": 9.847619047619048e-05, "loss": 2.515, "step": 32 }, { "epoch": 0.7719298245614035, "grad_norm": 3.2825722694396973, "learning_rate": 9.842857142857144e-05, "loss": 2.4994, "step": 33 }, { "epoch": 0.7953216374269005, "grad_norm": 3.5643672943115234, "learning_rate": 9.838095238095238e-05, "loss": 2.5327, "step": 34 }, { "epoch": 0.8187134502923976, "grad_norm": 3.3127200603485107, "learning_rate": 9.833333333333333e-05, "loss": 2.6251, "step": 35 }, { "epoch": 0.8421052631578947, "grad_norm": 3.187095880508423, "learning_rate": 9.828571428571429e-05, "loss": 2.5233, "step": 36 }, { "epoch": 0.8654970760233918, "grad_norm": 3.3743860721588135, "learning_rate": 9.823809523809525e-05, "loss": 2.4999, "step": 37 }, { "epoch": 0.8888888888888888, "grad_norm": 2.3684120178222656, "learning_rate": 9.81904761904762e-05, "loss": 2.5302, "step": 38 }, { "epoch": 0.9122807017543859, "grad_norm": 3.7091619968414307, "learning_rate": 9.814285714285715e-05, "loss": 2.5003, "step": 39 }, { "epoch": 0.935672514619883, "grad_norm": 4.230418682098389, "learning_rate": 9.80952380952381e-05, "loss": 2.5379, "step": 40 }, { "epoch": 0.9590643274853801, "grad_norm": 3.5879616737365723, "learning_rate": 9.804761904761905e-05, "loss": 2.5652, "step": 41 }, { "epoch": 0.9824561403508771, "grad_norm": 2.621013879776001, "learning_rate": 9.8e-05, "loss": 2.5704, "step": 42 }, { "epoch": 1.0, "grad_norm": 1.3536667823791504, "learning_rate": 9.795238095238097e-05, "loss": 1.9135, "step": 43 }, { "epoch": 1.0, "eval_loss": 0.31598106026649475, "eval_runtime": 3.4639, "eval_samples_per_second": 62.358, "eval_steps_per_second": 15.59, "step": 43 } ], "logging_steps": 1, "max_steps": 2100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2151327228493824.0, "train_batch_size": 12, "trial_name": null, "trial_params": null }