TongZheng1999's picture
Upload folder using huggingface_hub
2f21e26 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9969945902624725,
"eval_steps": 16,
"global_step": 311,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05129232618713685,
"grad_norm": 0.5048500299453735,
"learning_rate": 0.0015,
"loss": 1.1504,
"step": 16
},
{
"epoch": 0.1025846523742737,
"grad_norm": 0.389893114566803,
"learning_rate": 0.0011094003924504584,
"loss": 3.0392,
"step": 32
},
{
"epoch": 0.15387697856141053,
"grad_norm": 0.2868100106716156,
"learning_rate": 0.0007427813527082075,
"loss": 0.8441,
"step": 48
},
{
"epoch": 0.2051693047485474,
"grad_norm": 0.2573494613170624,
"learning_rate": 0.0005962847939999439,
"loss": 0.783,
"step": 64
},
{
"epoch": 0.25646163093568425,
"grad_norm": 0.31569787859916687,
"learning_rate": 0.000512147519731584,
"loss": 0.7637,
"step": 80
},
{
"epoch": 0.30775395712282105,
"grad_norm": 0.2564464211463928,
"learning_rate": 0.0004558423058385518,
"loss": 0.719,
"step": 96
},
{
"epoch": 0.3590462833099579,
"grad_norm": 0.32603177428245544,
"learning_rate": 0.0004147806778921701,
"loss": 0.7037,
"step": 112
},
{
"epoch": 0.4103386094970948,
"grad_norm": 0.2662847340106964,
"learning_rate": 0.0003831305140884606,
"loss": 0.6794,
"step": 128
},
{
"epoch": 0.46163093568423164,
"grad_norm": 0.3353310525417328,
"learning_rate": 0.00035777087639996636,
"loss": 0.6737,
"step": 144
},
{
"epoch": 0.5129232618713685,
"grad_norm": 0.26287367939949036,
"learning_rate": 0.00033686076842660763,
"loss": 0.6561,
"step": 160
},
{
"epoch": 0.5642155880585054,
"grad_norm": 0.3288561999797821,
"learning_rate": 0.00031923475378704884,
"loss": 0.6359,
"step": 176
},
{
"epoch": 0.6155079142456421,
"grad_norm": 0.35450485348701477,
"learning_rate": 0.0003041143685078822,
"loss": 0.612,
"step": 192
},
{
"epoch": 0.666800240432779,
"grad_norm": 0.29516109824180603,
"learning_rate": 0.00029095718698132317,
"loss": 0.6057,
"step": 208
},
{
"epoch": 0.7180925666199158,
"grad_norm": 0.38677069544792175,
"learning_rate": 0.00027937211830783126,
"loss": 0.5943,
"step": 224
},
{
"epoch": 0.7693848928070527,
"grad_norm": 0.3125530481338501,
"learning_rate": 0.000269069117598525,
"loss": 0.5635,
"step": 240
},
{
"epoch": 0.8206772189941896,
"grad_norm": 0.3307824730873108,
"learning_rate": 0.00025982792098465233,
"loss": 0.5629,
"step": 256
},
{
"epoch": 0.8719695451813264,
"grad_norm": 0.31122493743896484,
"learning_rate": 0.0002514778453847726,
"loss": 0.5582,
"step": 272
},
{
"epoch": 0.9232618713684633,
"grad_norm": 0.3087589144706726,
"learning_rate": 0.00024388430433987693,
"loss": 0.5364,
"step": 288
},
{
"epoch": 0.9745541975556001,
"grad_norm": 0.3315638601779938,
"learning_rate": 0.00023693955110363693,
"loss": 0.5412,
"step": 304
},
{
"epoch": 0.9969945902624725,
"step": 311,
"total_flos": 3.158660236722569e+18,
"train_loss": 0.7963475859050199,
"train_runtime": 2372.2052,
"train_samples_per_second": 16.831,
"train_steps_per_second": 0.131
}
],
"logging_steps": 16,
"max_steps": 311,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 16,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.158660236722569e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}