{"train/loss": 0.9941, "train/grad_norm": 19.487234115600586, "train/learning_rate": 4.942965779467681e-07, "train/epoch": 3.0, "train/global_step": 6312, "_timestamp": 1720074701.425931, "_runtime": 5895.5103459358215, "_step": 15, "eval/loss": 1.1401793956756592, "eval/accuracy": 0.6116688319357506, "eval/runtime": 132.8, "eval/samples_per_second": 63.758, "eval/steps_per_second": 3.991, "train_runtime": 5918.3988, "train_samples_per_second": 17.061, "train_steps_per_second": 1.067, "total_flos": 2981042653335552.0, "train_loss": 1.2064072061400903} |