logical-reasoning / train_qwen2_7b.json
dh-mc's picture
completed 08b_InternLM_analysis
2b7032e
raw
history blame
810 Bytes
{
"model_name_or_path": "Qwen/Qwen2-7B",
"stage": "sft",
"do_train": true,
"finetuning_type": "lora",
"lora_target": "all",
"loraplus_lr_ratio": 16.0,
"dataset": "mgtv_train",
"template": "qwen",
"cutoff_len": 1024,
"max_samples": 2000,
"overwrite_cache": "true",
"preprocessing_num_workers": 16,
"output_dir": "/content/qwen2-7b",
"logging_steps": 10,
"save_steps": 10,
"plot_loss": "true",
"overwrite_output_dir": "true",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"learning_rate": 0.0001,
"num_train_epochs": 6.0,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.1,
"bf16": true,
"ddp_timeout": 180000000,
"val_size": 0.1,
"per_device_eval_batch_size": 1,
"eval_strategy": "steps",
"eval_steps": 10,
"report_to": "wandb"
}