|
{ |
|
"best_metric": 7.5260701179504395, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Mistral-7B-v0.3_pct_reverse/checkpoint-48", |
|
"epoch": 0.2063185041908446, |
|
"eval_steps": 8, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025789813023855577, |
|
"grad_norm": 17.363887786865234, |
|
"learning_rate": 3.75e-05, |
|
"loss": 2.1239, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010315925209542231, |
|
"grad_norm": 14.140544891357422, |
|
"learning_rate": 0.00015, |
|
"loss": 2.053, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.020631850419084462, |
|
"grad_norm": 471.962158203125, |
|
"learning_rate": 0.0003, |
|
"loss": 2.1192, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020631850419084462, |
|
"eval_loss": 11.598015785217285, |
|
"eval_runtime": 10.9566, |
|
"eval_samples_per_second": 22.361, |
|
"eval_steps_per_second": 2.829, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.030947775628626693, |
|
"grad_norm": 4009.223388671875, |
|
"learning_rate": 0.00029991755529206284, |
|
"loss": 10.6315, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.041263700838168924, |
|
"grad_norm": 42.12191390991211, |
|
"learning_rate": 0.0002996703117966496, |
|
"loss": 9.6637, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.041263700838168924, |
|
"eval_loss": 8.410330772399902, |
|
"eval_runtime": 10.7056, |
|
"eval_samples_per_second": 22.885, |
|
"eval_steps_per_second": 2.896, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05157962604771115, |
|
"grad_norm": 24.113929748535156, |
|
"learning_rate": 0.00029925854129933066, |
|
"loss": 8.3281, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.061895551257253385, |
|
"grad_norm": 6.116011619567871, |
|
"learning_rate": 0.0002986826964440844, |
|
"loss": 7.8718, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.061895551257253385, |
|
"eval_loss": 8.008735656738281, |
|
"eval_runtime": 10.6891, |
|
"eval_samples_per_second": 22.921, |
|
"eval_steps_per_second": 2.9, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07221147646679561, |
|
"grad_norm": 11.01762580871582, |
|
"learning_rate": 0.00029794341023572295, |
|
"loss": 7.8313, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08252740167633785, |
|
"grad_norm": 3.9344899654388428, |
|
"learning_rate": 0.0002970414953440533, |
|
"loss": 7.6741, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08252740167633785, |
|
"eval_loss": 7.719517230987549, |
|
"eval_runtime": 10.5778, |
|
"eval_samples_per_second": 23.162, |
|
"eval_steps_per_second": 2.931, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09284332688588008, |
|
"grad_norm": 4.952977180480957, |
|
"learning_rate": 0.00029597794321054006, |
|
"loss": 7.6388, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1031592520954223, |
|
"grad_norm": 3.788853406906128, |
|
"learning_rate": 0.00029475392295845, |
|
"loss": 7.6499, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1031592520954223, |
|
"eval_loss": 7.619475364685059, |
|
"eval_runtime": 10.6008, |
|
"eval_samples_per_second": 23.111, |
|
"eval_steps_per_second": 2.924, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11347517730496454, |
|
"grad_norm": 5.81605863571167, |
|
"learning_rate": 0.0002933707801076791, |
|
"loss": 7.6638, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12379110251450677, |
|
"grad_norm": 2.517054319381714, |
|
"learning_rate": 0.00029183003509567217, |
|
"loss": 7.6391, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12379110251450677, |
|
"eval_loss": 7.5260701179504395, |
|
"eval_runtime": 10.5993, |
|
"eval_samples_per_second": 23.115, |
|
"eval_steps_per_second": 2.925, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.134107027724049, |
|
"grad_norm": 3.91676926612854, |
|
"learning_rate": 0.000290133381606063, |
|
"loss": 7.6305, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14442295293359123, |
|
"grad_norm": 9.17829418182373, |
|
"learning_rate": 0.0002882826847068703, |
|
"loss": 7.5835, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14442295293359123, |
|
"eval_loss": 7.5467963218688965, |
|
"eval_runtime": 10.628, |
|
"eval_samples_per_second": 23.052, |
|
"eval_steps_per_second": 2.917, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15473887814313347, |
|
"grad_norm": 12.215532302856445, |
|
"learning_rate": 0.00028627997880029875, |
|
"loss": 7.563, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1650548033526757, |
|
"grad_norm": 11.112576484680176, |
|
"learning_rate": 0.0002841274653863955, |
|
"loss": 7.5515, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1650548033526757, |
|
"eval_loss": 7.549396514892578, |
|
"eval_runtime": 10.6501, |
|
"eval_samples_per_second": 23.005, |
|
"eval_steps_per_second": 2.911, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.17537072856221791, |
|
"grad_norm": 13.383460998535156, |
|
"learning_rate": 0.00028182751064302397, |
|
"loss": 7.5315, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.18568665377176016, |
|
"grad_norm": 11.468887329101562, |
|
"learning_rate": 0.0002793826428248118, |
|
"loss": 7.6827, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18568665377176016, |
|
"eval_loss": 7.556999206542969, |
|
"eval_runtime": 10.7096, |
|
"eval_samples_per_second": 22.877, |
|
"eval_steps_per_second": 2.895, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19600257898130238, |
|
"grad_norm": 9.75944995880127, |
|
"learning_rate": 0.0002767955494839353, |
|
"loss": 7.5396, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2063185041908446, |
|
"grad_norm": 10.063895225524902, |
|
"learning_rate": 0.00027406907451579294, |
|
"loss": 7.6842, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2063185041908446, |
|
"eval_loss": 7.593923568725586, |
|
"eval_runtime": 10.7319, |
|
"eval_samples_per_second": 22.829, |
|
"eval_steps_per_second": 2.889, |
|
"step": 80 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 387, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 8, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8997457703665664e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|