compute: | |
instance_type: A10G | |
region: us-east-1 | |
base_model: meta-llama/Llama-2-7b-hf | |
model_id: Lukeam/llama-aa-fine-tuned | |
dataset: ./processed_qa.jsonl | |
training: | |
learning_rate: 2e-5 | |
num_train_epochs: 5 | |
per_device_train_batch_size: 2 | |
gradient_accumulation_steps: 8 | |
save_steps: 500 | |
eval_steps: 250 | |
max_length: 512 | |
warmup_steps: 100 | |
logging_steps: 50 | |
lora: | |
r: 16 | |
alpha: 32 | |
dropout: 0.05 | |
target_modules: ["q_proj", "v_proj", "k_proj", "o_proj"] |