terry69's picture
Model save
6028a84 verified
raw
history blame contribute delete
No virus
9.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.997867803837953,
"eval_steps": 500,
"global_step": 234,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0042643923240938165,
"grad_norm": 25.654364462455888,
"learning_rate": 4.1666666666666667e-07,
"loss": 1.5118,
"step": 1
},
{
"epoch": 0.021321961620469083,
"grad_norm": 9.217679826848354,
"learning_rate": 2.0833333333333334e-06,
"loss": 1.4588,
"step": 5
},
{
"epoch": 0.042643923240938165,
"grad_norm": 3.8364568135386343,
"learning_rate": 4.166666666666667e-06,
"loss": 1.1844,
"step": 10
},
{
"epoch": 0.06396588486140725,
"grad_norm": 2.7685426084723606,
"learning_rate": 6.25e-06,
"loss": 1.0266,
"step": 15
},
{
"epoch": 0.08528784648187633,
"grad_norm": 2.714167254334698,
"learning_rate": 8.333333333333334e-06,
"loss": 0.9764,
"step": 20
},
{
"epoch": 0.10660980810234541,
"grad_norm": 2.575043424767955,
"learning_rate": 9.999440509051367e-06,
"loss": 0.9474,
"step": 25
},
{
"epoch": 0.1279317697228145,
"grad_norm": 2.5473126644658635,
"learning_rate": 9.979871469976197e-06,
"loss": 0.9265,
"step": 30
},
{
"epoch": 0.14925373134328357,
"grad_norm": 2.7728522540985927,
"learning_rate": 9.932452969617607e-06,
"loss": 0.9103,
"step": 35
},
{
"epoch": 0.17057569296375266,
"grad_norm": 2.262656802643975,
"learning_rate": 9.857450191464337e-06,
"loss": 0.9089,
"step": 40
},
{
"epoch": 0.19189765458422176,
"grad_norm": 2.2929860595064353,
"learning_rate": 9.755282581475769e-06,
"loss": 0.8839,
"step": 45
},
{
"epoch": 0.21321961620469082,
"grad_norm": 2.9962187125117556,
"learning_rate": 9.626521502369984e-06,
"loss": 0.8779,
"step": 50
},
{
"epoch": 0.2345415778251599,
"grad_norm": 2.4461853196937744,
"learning_rate": 9.471887038331686e-06,
"loss": 0.8655,
"step": 55
},
{
"epoch": 0.255863539445629,
"grad_norm": 2.548713200713329,
"learning_rate": 9.292243968009332e-06,
"loss": 0.8452,
"step": 60
},
{
"epoch": 0.2771855010660981,
"grad_norm": 2.354080355646257,
"learning_rate": 9.088596928322158e-06,
"loss": 0.8453,
"step": 65
},
{
"epoch": 0.29850746268656714,
"grad_norm": 2.3350186621937494,
"learning_rate": 8.862084796122998e-06,
"loss": 0.8213,
"step": 70
},
{
"epoch": 0.31982942430703626,
"grad_norm": 2.352888208422696,
"learning_rate": 8.613974319136959e-06,
"loss": 0.8087,
"step": 75
},
{
"epoch": 0.3411513859275053,
"grad_norm": 2.626490865987853,
"learning_rate": 8.345653031794292e-06,
"loss": 0.8,
"step": 80
},
{
"epoch": 0.3624733475479744,
"grad_norm": 2.2564126156464934,
"learning_rate": 8.058621495575032e-06,
"loss": 0.7883,
"step": 85
},
{
"epoch": 0.3837953091684435,
"grad_norm": 2.536678489630529,
"learning_rate": 7.754484907260513e-06,
"loss": 0.7797,
"step": 90
},
{
"epoch": 0.4051172707889126,
"grad_norm": 2.330261835490306,
"learning_rate": 7.434944122021837e-06,
"loss": 0.7704,
"step": 95
},
{
"epoch": 0.42643923240938164,
"grad_norm": 2.375473887900136,
"learning_rate": 7.101786141547829e-06,
"loss": 0.7491,
"step": 100
},
{
"epoch": 0.44776119402985076,
"grad_norm": 2.31845485895562,
"learning_rate": 6.7568741204067145e-06,
"loss": 0.7426,
"step": 105
},
{
"epoch": 0.4690831556503198,
"grad_norm": 2.2326175780721513,
"learning_rate": 6.402136946530014e-06,
"loss": 0.7366,
"step": 110
},
{
"epoch": 0.4904051172707889,
"grad_norm": 2.444799836226394,
"learning_rate": 6.039558454088796e-06,
"loss": 0.7294,
"step": 115
},
{
"epoch": 0.511727078891258,
"grad_norm": 2.42023799653421,
"learning_rate": 5.671166329088278e-06,
"loss": 0.7346,
"step": 120
},
{
"epoch": 0.5330490405117271,
"grad_norm": 2.525769921790198,
"learning_rate": 5.299020769725172e-06,
"loss": 0.716,
"step": 125
},
{
"epoch": 0.5543710021321961,
"grad_norm": 2.210624855154462,
"learning_rate": 4.9252029649236835e-06,
"loss": 0.7087,
"step": 130
},
{
"epoch": 0.5756929637526652,
"grad_norm": 2.260417777455262,
"learning_rate": 4.551803455482833e-06,
"loss": 0.6979,
"step": 135
},
{
"epoch": 0.5970149253731343,
"grad_norm": 2.5410734519213847,
"learning_rate": 4.180910442924312e-06,
"loss": 0.6758,
"step": 140
},
{
"epoch": 0.6183368869936035,
"grad_norm": 2.2197214614990983,
"learning_rate": 3.8145981114225135e-06,
"loss": 0.6832,
"step": 145
},
{
"epoch": 0.6396588486140725,
"grad_norm": 2.417478197312417,
"learning_rate": 3.4549150281252635e-06,
"loss": 0.6705,
"step": 150
},
{
"epoch": 0.6609808102345416,
"grad_norm": 2.193206874567919,
"learning_rate": 3.1038726867353587e-06,
"loss": 0.6909,
"step": 155
},
{
"epoch": 0.6823027718550106,
"grad_norm": 2.3141978562259133,
"learning_rate": 2.7634342584218364e-06,
"loss": 0.678,
"step": 160
},
{
"epoch": 0.7036247334754797,
"grad_norm": 2.20282691421215,
"learning_rate": 2.43550361297047e-06,
"loss": 0.6646,
"step": 165
},
{
"epoch": 0.7249466950959488,
"grad_norm": 2.3241432733966962,
"learning_rate": 2.1219146715716332e-06,
"loss": 0.6633,
"step": 170
},
{
"epoch": 0.746268656716418,
"grad_norm": 2.3658483418520464,
"learning_rate": 1.8244211507891064e-06,
"loss": 0.6516,
"step": 175
},
{
"epoch": 0.767590618336887,
"grad_norm": 2.259696417637488,
"learning_rate": 1.544686755065677e-06,
"loss": 0.6418,
"step": 180
},
{
"epoch": 0.7889125799573561,
"grad_norm": 2.284368925546414,
"learning_rate": 1.2842758726130283e-06,
"loss": 0.6405,
"step": 185
},
{
"epoch": 0.8102345415778252,
"grad_norm": 2.2174015564488223,
"learning_rate": 1.044644826718295e-06,
"loss": 0.6359,
"step": 190
},
{
"epoch": 0.8315565031982942,
"grad_norm": 2.3098966859462076,
"learning_rate": 8.271337313934869e-07,
"loss": 0.6232,
"step": 195
},
{
"epoch": 0.8528784648187633,
"grad_norm": 2.240425165408693,
"learning_rate": 6.329589969143518e-07,
"loss": 0.6263,
"step": 200
},
{
"epoch": 0.8742004264392325,
"grad_norm": 2.203409177091297,
"learning_rate": 4.632065271606756e-07,
"loss": 0.6299,
"step": 205
},
{
"epoch": 0.8955223880597015,
"grad_norm": 2.1702011902470724,
"learning_rate": 3.18825646801314e-07,
"loss": 0.636,
"step": 210
},
{
"epoch": 0.9168443496801706,
"grad_norm": 2.202446820245564,
"learning_rate": 2.006237922855553e-07,
"loss": 0.6182,
"step": 215
},
{
"epoch": 0.9381663113006397,
"grad_norm": 2.118840248626809,
"learning_rate": 1.0926199633097156e-07,
"loss": 0.609,
"step": 220
},
{
"epoch": 0.9594882729211087,
"grad_norm": 2.168175873632397,
"learning_rate": 4.52511911603265e-08,
"loss": 0.6173,
"step": 225
},
{
"epoch": 0.9808102345415778,
"grad_norm": 2.2624619803066617,
"learning_rate": 8.949351161324227e-09,
"loss": 0.6207,
"step": 230
},
{
"epoch": 0.997867803837953,
"eval_loss": 0.7390011548995972,
"eval_runtime": 106.273,
"eval_samples_per_second": 3.67,
"eval_steps_per_second": 0.922,
"step": 234
},
{
"epoch": 0.997867803837953,
"step": 234,
"total_flos": 48942494515200.0,
"train_loss": 0.7695284368645432,
"train_runtime": 7306.9109,
"train_samples_per_second": 1.026,
"train_steps_per_second": 0.032
}
],
"logging_steps": 5,
"max_steps": 234,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 48942494515200.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}