chtan commited on
Commit
a77eafc
1 Parent(s): 7344a41

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +31 -0
  2. adapter_config.json +19 -0
  3. adapter_model.bin +3 -0
README.md CHANGED
@@ -1,3 +1,34 @@
1
  ---
2
  license: apache-2.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ datasets:
4
+ - c-s-ale/alpaca-gpt4-data
5
  ---
6
+
7
+ This repo provides the training checkpoint of LLaMA on the alpaca_data_gpt4 dataset via LoRA [MLP].
8
+
9
+ He et al. 2022 gave an insight that FFN can better utilize modification at larger capacities.
10
+
11
+ The codes is provided by [tloen/alpaca-lora: Instruct-tune LLaMA on consumer hardware (github.com)](https://github.com/tloen/alpaca-lora).
12
+
13
+ We modify the running scripts to
14
+ ```bash
15
+ torchrun --nproc_per_node=8 finetune.py \
16
+ --base_model '/cache1/chtan/large_models/llama-hf/llama-65b' \
17
+ --data_path './alpaca_data_gpt4.json' \
18
+ --output_dir './gpt4-alpaca-lora_mlp-65b' \
19
+ --batch_size 128 \
20
+ --micro_batch_size 2 \
21
+ --num_epochs 3 \
22
+ --learning_rate 1e-4 \
23
+ --cutoff_len 512 \
24
+ --val_set_size 2000 \
25
+ --lora_r 8 \
26
+ --lora_alpha 16 \
27
+ --lora_dropout 0.05 \
28
+ --lora_target_modules '[gate_proj,down_proj,up_proj]' \
29
+ --train_on_inputs \
30
+ --group_by_length
31
+ ```
32
+
33
+ > [1] Junxian He, Chunting Zhou, Xuezhe Ma, Taylor Berg-Kirkpatrick, Graham Neubig: Towards a Unified View of Parameter-Efficient Transfer Learning. ICLR 2022
34
+
adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/cache1/chtan/large_models/llama-hf/llama-65b",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "gate_proj",
15
+ "down_proj",
16
+ "up_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a48054d036354e5fbe0c2420ee80a765b1ceb66187b7fde33d1e743220a5fdb
3
+ size 232169613