base_model: codellama/CodeLlama-13b-Instruct-hf model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer is_llama_derived_model: true load_in_8bit: false bf16: true strict: false datasets: - path: data.jsonl ds_type: json type: # JSONL file contains question, context, answer fields per line. # This gets mapped to instruction, input, output axolotl tags. field_instruction: instruction #field_input: context field_output: output # Format is used by axolotl to generate the prompt. format: |- Using the instruction context below, generate a typescript code that answers the question and explain it {instruction} dataset_prepared_path: val_set_size: 16 # must be at least micro_batch_size * N_GPUS, and more if eval packing. output_dir: ./lora-out sequence_len: 4096 sample_packing: true eval_sample_packing: false pad_to_sequence_len: true adapter: lora lora_model_dir: lora_r: 16 lora_alpha: 32 # alpha = 2 x rank is a good starting point. lora_dropout: 0.05 lora_target_linear: true # target all linear layers lora_fan_in_fan_out: wandb_project: wandb_entity: wandb_watch: wandb_run_id: gradient_accumulation_steps: 1 micro_batch_size: 8 num_epochs: 1 optimizer: adamw_bnb_8bit lr_scheduler: cosine learning_rate: 0.0002 train_on_inputs: false group_by_length: false fp16: false tf32: false gradient_checkpointing: true early_stopping_patience: resume_from_checkpoint: auto_resume_from_checkpoints: true local_rank: logging_steps: 1 xformers_attention: flash_attention: true warmup_steps: 10 eval_steps: 0.05 save_steps: debug: True deepspeed: /root/axolotl/deepspeed/zero3.json weight_decay: 0.0 fsdp: fsdp_config: special_tokens: bos_token: "" eos_token: "" unk_token: ""