{ "model_type": "llama", "hidden_size": 4096, "num_attention_heads": 16, "num_hidden_layers": 32, "intermediate_size": 11008, "hidden_act": "gelu", "initializer_range": 0.02, "layer_norm_eps": 1e-5, "max_position_embeddings": 2048, "vocab_size": 32000, "model_name": "LlamaForSequenceClassification", "pipeline_tag": "text-generation", "peft_config": { "r": 16, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "embed_tokens", "lm_head" ], "lora_alpha": 16, "lora_dropout": 0, "bias": "none", "use_gradient_checkpointing": "unsloth", "random_state": 3407, "use_rslora": false, "loftq_config": null }, "training_args": { "per_device_train_batch_size": 2, "gradient_accumulation_steps": 4, "warmup_steps": 5, "max_steps": 60, "learning_rate": 2e-4, "fp16": true, "bf16": false, "logging_steps": 1, "optim": "adamw_8bit", "weight_decay": 0.01, "lr_scheduler_type": "linear", "seed": 3407, "output_dir": "outputs" } }