{ "best_metric": 7.5260701179504395, "best_model_checkpoint": "/home/datta0/models/lora_final/Mistral-7B-v0.3_pct_reverse/checkpoint-48", "epoch": 0.2063185041908446, "eval_steps": 8, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025789813023855577, "grad_norm": 17.363887786865234, "learning_rate": 3.75e-05, "loss": 2.1239, "step": 1 }, { "epoch": 0.010315925209542231, "grad_norm": 14.140544891357422, "learning_rate": 0.00015, "loss": 2.053, "step": 4 }, { "epoch": 0.020631850419084462, "grad_norm": 471.962158203125, "learning_rate": 0.0003, "loss": 2.1192, "step": 8 }, { "epoch": 0.020631850419084462, "eval_loss": 11.598015785217285, "eval_runtime": 10.9566, "eval_samples_per_second": 22.361, "eval_steps_per_second": 2.829, "step": 8 }, { "epoch": 0.030947775628626693, "grad_norm": 4009.223388671875, "learning_rate": 0.00029991755529206284, "loss": 10.6315, "step": 12 }, { "epoch": 0.041263700838168924, "grad_norm": 42.12191390991211, "learning_rate": 0.0002996703117966496, "loss": 9.6637, "step": 16 }, { "epoch": 0.041263700838168924, "eval_loss": 8.410330772399902, "eval_runtime": 10.7056, "eval_samples_per_second": 22.885, "eval_steps_per_second": 2.896, "step": 16 }, { "epoch": 0.05157962604771115, "grad_norm": 24.113929748535156, "learning_rate": 0.00029925854129933066, "loss": 8.3281, "step": 20 }, { "epoch": 0.061895551257253385, "grad_norm": 6.116011619567871, "learning_rate": 0.0002986826964440844, "loss": 7.8718, "step": 24 }, { "epoch": 0.061895551257253385, "eval_loss": 8.008735656738281, "eval_runtime": 10.6891, "eval_samples_per_second": 22.921, "eval_steps_per_second": 2.9, "step": 24 }, { "epoch": 0.07221147646679561, "grad_norm": 11.01762580871582, "learning_rate": 0.00029794341023572295, "loss": 7.8313, "step": 28 }, { "epoch": 0.08252740167633785, "grad_norm": 3.9344899654388428, "learning_rate": 0.0002970414953440533, "loss": 7.6741, "step": 32 }, { "epoch": 0.08252740167633785, "eval_loss": 7.719517230987549, "eval_runtime": 10.5778, "eval_samples_per_second": 23.162, "eval_steps_per_second": 2.931, "step": 32 }, { "epoch": 0.09284332688588008, "grad_norm": 4.952977180480957, "learning_rate": 0.00029597794321054006, "loss": 7.6388, "step": 36 }, { "epoch": 0.1031592520954223, "grad_norm": 3.788853406906128, "learning_rate": 0.00029475392295845, "loss": 7.6499, "step": 40 }, { "epoch": 0.1031592520954223, "eval_loss": 7.619475364685059, "eval_runtime": 10.6008, "eval_samples_per_second": 23.111, "eval_steps_per_second": 2.924, "step": 40 }, { "epoch": 0.11347517730496454, "grad_norm": 5.81605863571167, "learning_rate": 0.0002933707801076791, "loss": 7.6638, "step": 44 }, { "epoch": 0.12379110251450677, "grad_norm": 2.517054319381714, "learning_rate": 0.00029183003509567217, "loss": 7.6391, "step": 48 }, { "epoch": 0.12379110251450677, "eval_loss": 7.5260701179504395, "eval_runtime": 10.5993, "eval_samples_per_second": 23.115, "eval_steps_per_second": 2.925, "step": 48 }, { "epoch": 0.134107027724049, "grad_norm": 3.91676926612854, "learning_rate": 0.000290133381606063, "loss": 7.6305, "step": 52 }, { "epoch": 0.14442295293359123, "grad_norm": 9.17829418182373, "learning_rate": 0.0002882826847068703, "loss": 7.5835, "step": 56 }, { "epoch": 0.14442295293359123, "eval_loss": 7.5467963218688965, "eval_runtime": 10.628, "eval_samples_per_second": 23.052, "eval_steps_per_second": 2.917, "step": 56 }, { "epoch": 0.15473887814313347, "grad_norm": 12.215532302856445, "learning_rate": 0.00028627997880029875, "loss": 7.563, "step": 60 }, { "epoch": 0.1650548033526757, "grad_norm": 11.112576484680176, "learning_rate": 0.0002841274653863955, "loss": 7.5515, "step": 64 }, { "epoch": 0.1650548033526757, "eval_loss": 7.549396514892578, "eval_runtime": 10.6501, "eval_samples_per_second": 23.005, "eval_steps_per_second": 2.911, "step": 64 }, { "epoch": 0.17537072856221791, "grad_norm": 13.383460998535156, "learning_rate": 0.00028182751064302397, "loss": 7.5315, "step": 68 }, { "epoch": 0.18568665377176016, "grad_norm": 11.468887329101562, "learning_rate": 0.0002793826428248118, "loss": 7.6827, "step": 72 }, { "epoch": 0.18568665377176016, "eval_loss": 7.556999206542969, "eval_runtime": 10.7096, "eval_samples_per_second": 22.877, "eval_steps_per_second": 2.895, "step": 72 }, { "epoch": 0.19600257898130238, "grad_norm": 9.75944995880127, "learning_rate": 0.0002767955494839353, "loss": 7.5396, "step": 76 }, { "epoch": 0.2063185041908446, "grad_norm": 10.063895225524902, "learning_rate": 0.00027406907451579294, "loss": 7.6842, "step": 80 }, { "epoch": 0.2063185041908446, "eval_loss": 7.593923568725586, "eval_runtime": 10.7319, "eval_samples_per_second": 22.829, "eval_steps_per_second": 2.889, "step": 80 } ], "logging_steps": 4, "max_steps": 387, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8997457703665664e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }