CodeGen-MNTP / trainer_state.json
Denis641's picture
Upload 14 files
28cb3dc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0080645161290323,
"eval_steps": 100,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10080645161290322,
"eval_accuracy": 0.11547923931567391,
"eval_loss": 5.788015365600586,
"eval_runtime": 62.6523,
"eval_samples_per_second": 24.772,
"eval_steps_per_second": 0.782,
"step": 100
},
{
"epoch": 0.20161290322580644,
"eval_accuracy": 0.13370151290913876,
"eval_loss": 5.176546573638916,
"eval_runtime": 62.6565,
"eval_samples_per_second": 24.77,
"eval_steps_per_second": 0.782,
"step": 200
},
{
"epoch": 0.3024193548387097,
"eval_accuracy": 0.14643068888524466,
"eval_loss": 5.0183634757995605,
"eval_runtime": 62.7024,
"eval_samples_per_second": 24.752,
"eval_steps_per_second": 0.781,
"step": 300
},
{
"epoch": 0.4032258064516129,
"eval_accuracy": 0.15548253881621393,
"eval_loss": 4.92139196395874,
"eval_runtime": 62.7305,
"eval_samples_per_second": 24.741,
"eval_steps_per_second": 0.781,
"step": 400
},
{
"epoch": 0.5040322580645161,
"grad_norm": 1.78125,
"learning_rate": 4.1599462365591404e-05,
"loss": 5.3217,
"step": 500
},
{
"epoch": 0.5040322580645161,
"eval_accuracy": 0.15949499500603373,
"eval_loss": 4.852448463439941,
"eval_runtime": 62.7108,
"eval_samples_per_second": 24.749,
"eval_steps_per_second": 0.781,
"step": 500
},
{
"epoch": 0.6048387096774194,
"eval_accuracy": 0.16651773824214028,
"eval_loss": 4.782216548919678,
"eval_runtime": 62.7083,
"eval_samples_per_second": 24.75,
"eval_steps_per_second": 0.781,
"step": 600
},
{
"epoch": 0.7056451612903226,
"eval_accuracy": 0.17252706374876267,
"eval_loss": 4.736791133880615,
"eval_runtime": 62.7624,
"eval_samples_per_second": 24.728,
"eval_steps_per_second": 0.781,
"step": 700
},
{
"epoch": 0.8064516129032258,
"eval_accuracy": 0.17507433383625723,
"eval_loss": 4.693298816680908,
"eval_runtime": 62.7389,
"eval_samples_per_second": 24.737,
"eval_steps_per_second": 0.781,
"step": 800
},
{
"epoch": 0.907258064516129,
"eval_accuracy": 0.17554612653604237,
"eval_loss": 4.677834987640381,
"eval_runtime": 62.7903,
"eval_samples_per_second": 24.717,
"eval_steps_per_second": 0.78,
"step": 900
},
{
"epoch": 1.0080645161290323,
"grad_norm": 2.21875,
"learning_rate": 3.31989247311828e-05,
"loss": 4.7636,
"step": 1000
},
{
"epoch": 1.0080645161290323,
"eval_accuracy": 0.17953083885533383,
"eval_loss": 4.65625,
"eval_runtime": 62.7838,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 0.78,
"step": 1000
}
],
"logging_steps": 500,
"max_steps": 2976,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 3.010369166298317e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}