{ "model_name_or_path": "/home/juntao/Projects/roo-dev-cty/models/proxy_model/gpt2-774m", "max_length": 1024, "trust_remote_code": true, "train_datasets": [ [ "PrefOnlyRewardJSON01", { "proportion": 1.0, "path": "/home/juntao/Projects/roo-dev-cty/data/roo/gold-generate-dataset-ultrafeedback/30k/train.json" } ] ], "eval_datasets": [ [ "PrefOnlyRewardJSON02", { "proportion": 1.0, "path": "/home/juntao/Projects/roo-dev-cty/data/roo/gold-generate-dataset-ultrafeedback/30k/test.json" } ] ], "loss_type": "sequence-wise", "epochs": 2, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "gradient_accumulation_steps": 4, "gradient_checkpointing": true, "regularization": 0.001, "normalize_score_during_training": false, "normalizer_type": "ExponentialMovingAverage", "normalizer_momentum": 0.9, "lr": 2e-05, "lr_scheduler_type": "cosine", "lr_warmup_ratio": 0.03, "weight_decay": 0.1, "seed": 42, "fp16": false, "bf16": true, "tf32": true, "lm_coef": 0.01, "freeze_transformer_layers": false, "eval_strategy": "epoch", "eval_interval": 1000000, "need_eval": true, "eval_split_ratio": null, "output_dir": "/home/juntao/Projects/roo-dev-cty/experiments/outputs/score_lm/gpt2_774m_0910", "log_type": "wandb", "log_dir": "/home/juntao/Projects/roo-dev-cty/experiments/outputs/score_lm/gpt2_774m_0910", "log_project": "score_lm", "log_group": null, "log_run_name": "gpt2_774m_0910", "save_16bit": true, "save_interval": 1000000, "local_rank": 0, "zero_stage": 3, "offload": "none", "deepspeed": false, "deepspeed_config": null, "deepscale": false, "deepscale_config": null, "global_rank": 0, "device": { "type": "torch.device", "repr": "device(type='cuda', index=0)" }, "num_update_steps_per_epoch": 211, "total_training_steps": 422 }