{ "best_metric": 0.13331477344036102, "best_model_checkpoint": "/p/project/ccstdl/delbrouck1_juwelsbooster/justin-dev/reward_model-outputs/LLaMA-7b/checkpoint-24", "epoch": 11.430379746835444, "eval_steps": 1.0, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.41, "grad_norm": 11.561720309441663, "learning_rate": 5e-05, "loss": 0.5262, "step": 1 }, { "epoch": 0.41, "eval_loss": 0.5080071687698364, "eval_runtime": 272.1343, "eval_samples_per_second": 36.75, "eval_steps_per_second": 0.037, "step": 1 }, { "epoch": 0.81, "grad_norm": 12.714468399251325, "learning_rate": 0.0001, "loss": 0.5671, "step": 2 }, { "epoch": 0.81, "eval_loss": 0.6037806868553162, "eval_runtime": 269.5392, "eval_samples_per_second": 37.104, "eval_steps_per_second": 0.037, "step": 2 }, { "epoch": 1.22, "grad_norm": 7.445277476591295, "learning_rate": 9.949107209404665e-05, "loss": 0.6304, "step": 3 }, { "epoch": 1.22, "eval_loss": 0.664432168006897, "eval_runtime": 268.3392, "eval_samples_per_second": 37.27, "eval_steps_per_second": 0.037, "step": 3 }, { "epoch": 1.62, "grad_norm": 9.681232907680824, "learning_rate": 9.797464868072488e-05, "loss": 0.722, "step": 4 }, { "epoch": 1.62, "eval_loss": 4.592594623565674, "eval_runtime": 268.7951, "eval_samples_per_second": 37.207, "eval_steps_per_second": 0.037, "step": 4 }, { "epoch": 2.03, "grad_norm": 265.63778139735155, "learning_rate": 9.548159976772592e-05, "loss": 4.6325, "step": 5 }, { "epoch": 2.03, "eval_loss": 0.8545772433280945, "eval_runtime": 267.6697, "eval_samples_per_second": 37.363, "eval_steps_per_second": 0.037, "step": 5 }, { "epoch": 2.43, "grad_norm": 17.629056263131023, "learning_rate": 9.206267664155907e-05, "loss": 0.8679, "step": 6 }, { "epoch": 2.43, "eval_loss": 0.5568432211875916, "eval_runtime": 267.9588, "eval_samples_per_second": 37.323, "eval_steps_per_second": 0.037, "step": 6 }, { "epoch": 3.41, "grad_norm": 10.722615614677697, "learning_rate": 8.778747871771292e-05, "loss": 0.5668, "step": 7 }, { "epoch": 3.41, "eval_loss": 0.4076074957847595, "eval_runtime": 270.2572, "eval_samples_per_second": 37.005, "eval_steps_per_second": 0.037, "step": 7 }, { "epoch": 3.81, "grad_norm": 7.54646041681758, "learning_rate": 8.274303669726426e-05, "loss": 0.4408, "step": 8 }, { "epoch": 3.81, "eval_loss": 0.29421016573905945, "eval_runtime": 268.1328, "eval_samples_per_second": 37.299, "eval_steps_per_second": 0.037, "step": 8 }, { "epoch": 4.22, "grad_norm": 2.275016436716258, "learning_rate": 7.703204087277988e-05, "loss": 0.3079, "step": 9 }, { "epoch": 4.22, "eval_loss": 0.2726017236709595, "eval_runtime": 266.7713, "eval_samples_per_second": 37.489, "eval_steps_per_second": 0.037, "step": 9 }, { "epoch": 4.62, "grad_norm": 3.1894730422214623, "learning_rate": 7.077075065009433e-05, "loss": 0.2915, "step": 10 }, { "epoch": 4.62, "eval_loss": 0.26474782824516296, "eval_runtime": 267.0963, "eval_samples_per_second": 37.443, "eval_steps_per_second": 0.037, "step": 10 }, { "epoch": 5.41, "grad_norm": 1.8812596396037946, "learning_rate": 6.408662784207149e-05, "loss": 0.2712, "step": 11 }, { "epoch": 5.41, "eval_loss": 0.2617990970611572, "eval_runtime": 272.1521, "eval_samples_per_second": 36.748, "eval_steps_per_second": 0.037, "step": 11 }, { "epoch": 5.81, "grad_norm": 3.216908121233545, "learning_rate": 5.7115741913664264e-05, "loss": 0.2873, "step": 12 }, { "epoch": 5.81, "eval_loss": 0.20583461225032806, "eval_runtime": 267.1708, "eval_samples_per_second": 37.433, "eval_steps_per_second": 0.037, "step": 12 }, { "epoch": 6.22, "grad_norm": 0.8415945051002756, "learning_rate": 5e-05, "loss": 0.2149, "step": 13 }, { "epoch": 6.22, "eval_loss": 0.2021123170852661, "eval_runtime": 265.799, "eval_samples_per_second": 37.626, "eval_steps_per_second": 0.038, "step": 13 }, { "epoch": 6.62, "grad_norm": 0.9600900861869989, "learning_rate": 4.288425808633575e-05, "loss": 0.2144, "step": 14 }, { "epoch": 6.62, "eval_loss": 0.1712017059326172, "eval_runtime": 266.5647, "eval_samples_per_second": 37.518, "eval_steps_per_second": 0.038, "step": 14 }, { "epoch": 7.03, "grad_norm": 0.36483545991774, "learning_rate": 3.591337215792852e-05, "loss": 0.1821, "step": 15 }, { "epoch": 7.03, "eval_loss": 0.16192658245563507, "eval_runtime": 267.2595, "eval_samples_per_second": 37.421, "eval_steps_per_second": 0.037, "step": 15 }, { "epoch": 7.43, "grad_norm": 0.29636213689669083, "learning_rate": 2.9229249349905684e-05, "loss": 0.1643, "step": 16 }, { "epoch": 7.43, "eval_loss": 0.1550302356481552, "eval_runtime": 266.9095, "eval_samples_per_second": 37.47, "eval_steps_per_second": 0.037, "step": 16 }, { "epoch": 7.84, "grad_norm": 0.2892605555056782, "learning_rate": 2.296795912722014e-05, "loss": 0.1697, "step": 17 }, { "epoch": 7.84, "eval_loss": 0.14900293946266174, "eval_runtime": 267.048, "eval_samples_per_second": 37.45, "eval_steps_per_second": 0.037, "step": 17 }, { "epoch": 8.24, "grad_norm": 0.23554106047138487, "learning_rate": 1.725696330273575e-05, "loss": 0.1544, "step": 18 }, { "epoch": 8.24, "eval_loss": 0.14368365705013275, "eval_runtime": 265.8512, "eval_samples_per_second": 37.619, "eval_steps_per_second": 0.038, "step": 18 }, { "epoch": 8.65, "grad_norm": 0.1965957885878417, "learning_rate": 1.2212521282287092e-05, "loss": 0.1561, "step": 19 }, { "epoch": 8.65, "eval_loss": 0.13982133567333221, "eval_runtime": 267.5145, "eval_samples_per_second": 37.385, "eval_steps_per_second": 0.037, "step": 19 }, { "epoch": 9.81, "grad_norm": 0.171143476375839, "learning_rate": 7.937323358440935e-06, "loss": 0.1517, "step": 20 }, { "epoch": 9.81, "eval_loss": 0.13716678321361542, "eval_runtime": 271.4057, "eval_samples_per_second": 36.849, "eval_steps_per_second": 0.037, "step": 20 }, { "epoch": 10.22, "grad_norm": 0.16047367303067445, "learning_rate": 4.5184002322740785e-06, "loss": 0.1415, "step": 21 }, { "epoch": 10.22, "eval_loss": 0.13533532619476318, "eval_runtime": 267.6767, "eval_samples_per_second": 37.362, "eval_steps_per_second": 0.037, "step": 21 }, { "epoch": 10.62, "grad_norm": 0.1573994397149672, "learning_rate": 2.0253513192751373e-06, "loss": 0.1425, "step": 22 }, { "epoch": 10.62, "eval_loss": 0.13413330912590027, "eval_runtime": 266.2279, "eval_samples_per_second": 37.566, "eval_steps_per_second": 0.038, "step": 22 }, { "epoch": 11.03, "grad_norm": 0.1403662979613735, "learning_rate": 5.089279059533658e-07, "loss": 0.1418, "step": 23 }, { "epoch": 11.03, "eval_loss": 0.13351500034332275, "eval_runtime": 268.7606, "eval_samples_per_second": 37.212, "eval_steps_per_second": 0.037, "step": 23 }, { "epoch": 11.43, "grad_norm": 0.14318110214979157, "learning_rate": 0.0, "loss": 0.135, "step": 24 }, { "epoch": 11.43, "eval_loss": 0.13331477344036102, "eval_runtime": 267.9025, "eval_samples_per_second": 37.331, "eval_steps_per_second": 0.037, "step": 24 }, { "epoch": 11.43, "step": 24, "total_flos": 2559456911032320.0, "train_loss": 0.02968624420464039, "train_runtime": 11477.2759, "train_samples_per_second": 83.648, "train_steps_per_second": 0.002 } ], "logging_steps": 1.0, "max_steps": 24, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 1.0, "total_flos": 2559456911032320.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }