ben81828's picture
Training in progress, step 250, checkpoint
264d194 verified
raw
history blame
12.7 kB
{
"best_metric": 0.055875860154628754,
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-250",
"epoch": 0.12876641771825909,
"eval_steps": 50,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0025753283543651817,
"grad_norm": 14.230540018977528,
"learning_rate": 2.9411764705882355e-06,
"loss": 1.0954,
"num_input_tokens_seen": 49920,
"step": 5
},
{
"epoch": 0.0051506567087303634,
"grad_norm": 12.725175128208063,
"learning_rate": 5.882352941176471e-06,
"loss": 0.9793,
"num_input_tokens_seen": 99840,
"step": 10
},
{
"epoch": 0.007725985063095545,
"grad_norm": 11.908760913870685,
"learning_rate": 8.823529411764707e-06,
"loss": 1.0964,
"num_input_tokens_seen": 149760,
"step": 15
},
{
"epoch": 0.010301313417460727,
"grad_norm": 7.5714179545602835,
"learning_rate": 1.1764705882352942e-05,
"loss": 0.7079,
"num_input_tokens_seen": 199680,
"step": 20
},
{
"epoch": 0.012876641771825908,
"grad_norm": 2.273110551179123,
"learning_rate": 1.4705882352941177e-05,
"loss": 0.4213,
"num_input_tokens_seen": 249600,
"step": 25
},
{
"epoch": 0.01545197012619109,
"grad_norm": 1.4511084067844011,
"learning_rate": 1.7647058823529414e-05,
"loss": 0.3359,
"num_input_tokens_seen": 299520,
"step": 30
},
{
"epoch": 0.018027298480556272,
"grad_norm": 1.7462649262033438,
"learning_rate": 2.058823529411765e-05,
"loss": 0.3463,
"num_input_tokens_seen": 349440,
"step": 35
},
{
"epoch": 0.020602626834921454,
"grad_norm": 1.2143595820366577,
"learning_rate": 2.3529411764705884e-05,
"loss": 0.28,
"num_input_tokens_seen": 399360,
"step": 40
},
{
"epoch": 0.023177955189286635,
"grad_norm": 0.8468719125045373,
"learning_rate": 2.647058823529412e-05,
"loss": 0.3442,
"num_input_tokens_seen": 449280,
"step": 45
},
{
"epoch": 0.025753283543651816,
"grad_norm": 0.9726867932660042,
"learning_rate": 2.9411764705882354e-05,
"loss": 0.3441,
"num_input_tokens_seen": 499200,
"step": 50
},
{
"epoch": 0.025753283543651816,
"eval_loss": 0.33834776282310486,
"eval_runtime": 47.4467,
"eval_samples_per_second": 1.265,
"eval_steps_per_second": 0.316,
"num_input_tokens_seen": 499200,
"step": 50
},
{
"epoch": 0.028328611898016998,
"grad_norm": 1.4016556961184263,
"learning_rate": 3.235294117647059e-05,
"loss": 0.3182,
"num_input_tokens_seen": 549120,
"step": 55
},
{
"epoch": 0.03090394025238218,
"grad_norm": 0.6437613769459606,
"learning_rate": 3.529411764705883e-05,
"loss": 0.3294,
"num_input_tokens_seen": 599040,
"step": 60
},
{
"epoch": 0.03347926860674736,
"grad_norm": 0.7389008951321312,
"learning_rate": 3.8235294117647055e-05,
"loss": 0.3097,
"num_input_tokens_seen": 648960,
"step": 65
},
{
"epoch": 0.036054596961112545,
"grad_norm": 0.771553860801019,
"learning_rate": 4.11764705882353e-05,
"loss": 0.3008,
"num_input_tokens_seen": 698880,
"step": 70
},
{
"epoch": 0.03862992531547772,
"grad_norm": 0.6965369148334918,
"learning_rate": 4.411764705882353e-05,
"loss": 0.3278,
"num_input_tokens_seen": 748800,
"step": 75
},
{
"epoch": 0.04120525366984291,
"grad_norm": 0.912943461315541,
"learning_rate": 4.705882352941177e-05,
"loss": 0.3074,
"num_input_tokens_seen": 798720,
"step": 80
},
{
"epoch": 0.043780582024208085,
"grad_norm": 0.8407481737577445,
"learning_rate": 5e-05,
"loss": 0.3423,
"num_input_tokens_seen": 848640,
"step": 85
},
{
"epoch": 0.04635591037857327,
"grad_norm": 0.9112879058417015,
"learning_rate": 5.294117647058824e-05,
"loss": 0.3008,
"num_input_tokens_seen": 898560,
"step": 90
},
{
"epoch": 0.04893123873293845,
"grad_norm": 2.391489040464162,
"learning_rate": 5.588235294117647e-05,
"loss": 0.2815,
"num_input_tokens_seen": 948480,
"step": 95
},
{
"epoch": 0.05150656708730363,
"grad_norm": 2.155211791607199,
"learning_rate": 5.882352941176471e-05,
"loss": 0.2274,
"num_input_tokens_seen": 998400,
"step": 100
},
{
"epoch": 0.05150656708730363,
"eval_loss": 0.18663176894187927,
"eval_runtime": 18.9199,
"eval_samples_per_second": 3.171,
"eval_steps_per_second": 0.793,
"num_input_tokens_seen": 998400,
"step": 100
},
{
"epoch": 0.05408189544166881,
"grad_norm": 2.2181531422996716,
"learning_rate": 6.176470588235295e-05,
"loss": 0.168,
"num_input_tokens_seen": 1048320,
"step": 105
},
{
"epoch": 0.056657223796033995,
"grad_norm": 3.1829920225573236,
"learning_rate": 6.470588235294118e-05,
"loss": 0.0709,
"num_input_tokens_seen": 1098240,
"step": 110
},
{
"epoch": 0.05923255215039917,
"grad_norm": 4.337350477588576,
"learning_rate": 6.764705882352942e-05,
"loss": 0.1609,
"num_input_tokens_seen": 1148160,
"step": 115
},
{
"epoch": 0.06180788050476436,
"grad_norm": 2.1010046045637365,
"learning_rate": 7.058823529411765e-05,
"loss": 0.0354,
"num_input_tokens_seen": 1198080,
"step": 120
},
{
"epoch": 0.06438320885912954,
"grad_norm": 2.232308844812103,
"learning_rate": 7.352941176470589e-05,
"loss": 0.1133,
"num_input_tokens_seen": 1248000,
"step": 125
},
{
"epoch": 0.06695853721349472,
"grad_norm": 5.641631090993415,
"learning_rate": 7.647058823529411e-05,
"loss": 0.0867,
"num_input_tokens_seen": 1297920,
"step": 130
},
{
"epoch": 0.0695338655678599,
"grad_norm": 1.5031437609685787,
"learning_rate": 7.941176470588235e-05,
"loss": 0.1352,
"num_input_tokens_seen": 1347840,
"step": 135
},
{
"epoch": 0.07210919392222509,
"grad_norm": 3.2992644431188465,
"learning_rate": 8.23529411764706e-05,
"loss": 0.101,
"num_input_tokens_seen": 1397760,
"step": 140
},
{
"epoch": 0.07468452227659027,
"grad_norm": 3.494236832758233,
"learning_rate": 8.529411764705883e-05,
"loss": 0.0334,
"num_input_tokens_seen": 1447680,
"step": 145
},
{
"epoch": 0.07725985063095545,
"grad_norm": 0.0602113869322109,
"learning_rate": 8.823529411764706e-05,
"loss": 0.0667,
"num_input_tokens_seen": 1497600,
"step": 150
},
{
"epoch": 0.07725985063095545,
"eval_loss": 0.09665286540985107,
"eval_runtime": 19.2745,
"eval_samples_per_second": 3.113,
"eval_steps_per_second": 0.778,
"num_input_tokens_seen": 1497600,
"step": 150
},
{
"epoch": 0.07983517898532062,
"grad_norm": 6.096163706748617,
"learning_rate": 9.11764705882353e-05,
"loss": 0.06,
"num_input_tokens_seen": 1547520,
"step": 155
},
{
"epoch": 0.08241050733968582,
"grad_norm": 4.278069142242893,
"learning_rate": 9.411764705882353e-05,
"loss": 0.0265,
"num_input_tokens_seen": 1597440,
"step": 160
},
{
"epoch": 0.08498583569405099,
"grad_norm": 1.7183097652953412,
"learning_rate": 9.705882352941177e-05,
"loss": 0.1743,
"num_input_tokens_seen": 1647360,
"step": 165
},
{
"epoch": 0.08756116404841617,
"grad_norm": 4.95401899568707,
"learning_rate": 0.0001,
"loss": 0.051,
"num_input_tokens_seen": 1697280,
"step": 170
},
{
"epoch": 0.09013649240278135,
"grad_norm": 0.34558354886099124,
"learning_rate": 9.999940874631277e-05,
"loss": 0.0584,
"num_input_tokens_seen": 1747200,
"step": 175
},
{
"epoch": 0.09271182075714654,
"grad_norm": 3.6326401692458146,
"learning_rate": 9.999763499923432e-05,
"loss": 0.0704,
"num_input_tokens_seen": 1797120,
"step": 180
},
{
"epoch": 0.09528714911151172,
"grad_norm": 0.4999720825488852,
"learning_rate": 9.999467880071402e-05,
"loss": 0.0278,
"num_input_tokens_seen": 1847040,
"step": 185
},
{
"epoch": 0.0978624774658769,
"grad_norm": 0.5211964505880501,
"learning_rate": 9.999054022066641e-05,
"loss": 0.0862,
"num_input_tokens_seen": 1896960,
"step": 190
},
{
"epoch": 0.10043780582024209,
"grad_norm": 0.8767038751832389,
"learning_rate": 9.998521935696953e-05,
"loss": 0.0565,
"num_input_tokens_seen": 1946880,
"step": 195
},
{
"epoch": 0.10301313417460727,
"grad_norm": 0.1848827361202722,
"learning_rate": 9.997871633546257e-05,
"loss": 0.0459,
"num_input_tokens_seen": 1996800,
"step": 200
},
{
"epoch": 0.10301313417460727,
"eval_loss": 0.09957947582006454,
"eval_runtime": 19.2651,
"eval_samples_per_second": 3.114,
"eval_steps_per_second": 0.779,
"num_input_tokens_seen": 1996800,
"step": 200
},
{
"epoch": 0.10558846252897244,
"grad_norm": 1.0969393658164421,
"learning_rate": 9.997103130994296e-05,
"loss": 0.0539,
"num_input_tokens_seen": 2046720,
"step": 205
},
{
"epoch": 0.10816379088333762,
"grad_norm": 2.885869194934028,
"learning_rate": 9.996216446216267e-05,
"loss": 0.0654,
"num_input_tokens_seen": 2096640,
"step": 210
},
{
"epoch": 0.11073911923770281,
"grad_norm": 0.5225257245731217,
"learning_rate": 9.995211600182397e-05,
"loss": 0.0316,
"num_input_tokens_seen": 2146560,
"step": 215
},
{
"epoch": 0.11331444759206799,
"grad_norm": 2.1553510734212797,
"learning_rate": 9.994088616657444e-05,
"loss": 0.1169,
"num_input_tokens_seen": 2196480,
"step": 220
},
{
"epoch": 0.11588977594643317,
"grad_norm": 1.1133884703723633,
"learning_rate": 9.992847522200133e-05,
"loss": 0.0382,
"num_input_tokens_seen": 2246400,
"step": 225
},
{
"epoch": 0.11846510430079835,
"grad_norm": 0.8875243341616034,
"learning_rate": 9.99148834616253e-05,
"loss": 0.0406,
"num_input_tokens_seen": 2296320,
"step": 230
},
{
"epoch": 0.12104043265516354,
"grad_norm": 1.81283533812695,
"learning_rate": 9.990011120689351e-05,
"loss": 0.0182,
"num_input_tokens_seen": 2346240,
"step": 235
},
{
"epoch": 0.12361576100952872,
"grad_norm": 3.873083258671571,
"learning_rate": 9.988415880717194e-05,
"loss": 0.0881,
"num_input_tokens_seen": 2396160,
"step": 240
},
{
"epoch": 0.1261910893638939,
"grad_norm": 3.427761103620865,
"learning_rate": 9.986702663973722e-05,
"loss": 0.0565,
"num_input_tokens_seen": 2446080,
"step": 245
},
{
"epoch": 0.12876641771825909,
"grad_norm": 1.531943599765959,
"learning_rate": 9.98487151097676e-05,
"loss": 0.0805,
"num_input_tokens_seen": 2496000,
"step": 250
},
{
"epoch": 0.12876641771825909,
"eval_loss": 0.055875860154628754,
"eval_runtime": 19.5106,
"eval_samples_per_second": 3.075,
"eval_steps_per_second": 0.769,
"num_input_tokens_seen": 2496000,
"step": 250
}
],
"logging_steps": 5,
"max_steps": 3400,
"num_input_tokens_seen": 2496000,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 164644726243328.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}