PocketDoc commited on
Commit
e2d90b8
·
1 Parent(s): 88806a9

Delete 2-PKTDC-llama-30B-gptq-lora-24gb.yml

Browse files
Files changed (1) hide show
  1. 2-PKTDC-llama-30B-gptq-lora-24gb.yml +0 -155
2-PKTDC-llama-30B-gptq-lora-24gb.yml DELETED
@@ -1,155 +0,0 @@
1
- # accelerate launch ./scripts/finetune.py 2-PKTDC-llama-30B-gptq-lora-24gb.yml
2
- #
3
- # base model settings (local or huggingface repo)
4
- base_model: PocketDoc/llama-30b-gptq-4bit-128g
5
- base_model_config: PocketDoc/llama-30b-gptq-4bit-128g
6
- model_type: LlamaForCausalLM
7
- tokenizer_type: LlamaTokenizer
8
- trust_remote_code:
9
-
10
- # wandb configuration
11
- wandb_project: llama-30b-gptq-4bit-128g-lora
12
- wandb_watch:
13
- wandb_run_id:
14
- wandb_log_model:
15
-
16
- # where to save the finished model to
17
- output_dir: ./llama-30b-gptq-4bit-128g-lora
18
-
19
- # dataset settings (local or huggingface repo)
20
- datasets:
21
- - path: dansmeth.json
22
- type: pygmalion
23
-
24
- dataset_prepared_path: data/last_run_prepared
25
-
26
- # percentage of the dataset to set aside as evaluation.
27
- val_set_size: 0.02
28
-
29
- # max token length / prompt
30
- sequence_len: 2048
31
-
32
- # max sequence length to concatenate training samples together up to
33
- # inspired by StackLLaMA. see https://huggingface.co/blog/stackllama#supervised-fine-tuning
34
- max_packed_sequence_len: 2048
35
-
36
- # quantized model loading settings
37
- gptq: true
38
- gptq_groupsize: 128 # group size
39
- gptq_model_v1: false # v1 or v2
40
- strict: false
41
-
42
- # this will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer
43
- load_in_8bit: true
44
-
45
- load_in_4bit:
46
-
47
- # Use CUDA bf16
48
- bf16: false
49
- # Use CUDA fp16
50
- fp16: true
51
- # Use CUDA tf32
52
- tf32: true
53
-
54
- # training hyperparameters
55
- gradient_accumulation_steps: 32
56
- micro_batch_size: 1
57
- eval_batch_size: 1
58
- num_epochs: 3
59
- warmup_steps: 350
60
- learning_rate: 0.00003
61
-
62
- logging_steps: 1
63
- eval_steps: 25
64
- save_steps: 175
65
-
66
- # stop training after this many evaluation losses have increased in a row
67
- # https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
68
- early_stopping_patience:
69
- # specify a scheduler to use with the optimizer. only one_cycle is supported currently
70
- lr_scheduler: linear
71
- # specify optimizer
72
- optimizer: paged_adamw_8bit
73
- # specify weight decay
74
- weight_decay: 0.05
75
-
76
-
77
- # if you already have a lora model trained that you want to load, put that here
78
- lora_model_dir:
79
-
80
- # LoRA hyperparameters
81
- adapter: lora # blank for full finetune
82
- lora_r: 32
83
- lora_alpha: 64
84
- lora_dropout: 0.05
85
- lora_target_linear:
86
- lora_target_modules:
87
- - q_proj
88
- - v_proj
89
- # - k_proj
90
- # - o_proj
91
- # - gate_proj
92
- # - down_proj
93
- # - up_proj
94
- lora_modules_to_save:
95
- # - embed_tokens
96
- # - lm_head
97
- lora_out_dir:
98
- lora_fan_in_fan_out: false
99
-
100
-
101
- # whether to mask out or include the human's prompt from the training labels
102
- train_on_inputs: false
103
- # don't use this, leads to wonky training (according to someone on the internet)
104
- group_by_length: true
105
-
106
-
107
- # does not work with current implementation of 4-bit LoRA
108
- gradient_checkpointing: true
109
-
110
-
111
- # whether to use xformers attention patch https://github.com/facebookresearch/xformers:
112
- xformers_attention: true
113
- # whether to use flash attention patch https://github.com/HazyResearch/flash-attention:
114
- flash_attention: # require a100 for llama
115
- # whether to use scaled-dot-product attention
116
- # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
117
- sdp_attention:
118
-
119
-
120
- # resume from a specific checkpoint dir
121
- resume_from_checkpoint:
122
- # if resume_from_checkpoint isn't set and you simply want it to start where it left off
123
- # be careful with this being turned on between different models
124
- auto_resume_from_checkpoints:
125
-
126
-
127
- # don't mess with this, it's here for accelerate and torchrun
128
- local_rank:
129
-
130
- # add or change special tokens
131
- special_tokens:
132
- # sys_role_token: "<|system|>"
133
- # user_role_token: "<|user|>"
134
- # model_role_token: "<|model|>"
135
- bos_token: "<s>"
136
- eos_token: "</s>"
137
- unk_token: "<unk>"
138
-
139
- # add extra tokens
140
- tokens:
141
-
142
-
143
- # FSDP
144
- fsdp:
145
-
146
- fsdp_config:
147
-
148
- # Deepspeed
149
- deepspeed:
150
-
151
- # TODO
152
- torchdistx_path:
153
-
154
- # Debug mode
155
- debug: