Upload 3 files
Browse files- README.md +31 -0
- adapter_config.json +19 -0
- adapter_model.bin +3 -0
README.md
CHANGED
@@ -1,3 +1,34 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- c-s-ale/alpaca-gpt4-data
|
5 |
---
|
6 |
+
|
7 |
+
This repo provides the training checkpoint of LLaMA on the alpaca_data_gpt4 dataset via LoRA [MLP].
|
8 |
+
|
9 |
+
He et al. 2022 gave an insight that FFN can better utilize modification at larger capacities.
|
10 |
+
|
11 |
+
The codes is provided by [tloen/alpaca-lora: Instruct-tune LLaMA on consumer hardware (github.com)](https://github.com/tloen/alpaca-lora).
|
12 |
+
|
13 |
+
We modify the running scripts to
|
14 |
+
```bash
|
15 |
+
torchrun --nproc_per_node=8 finetune.py \
|
16 |
+
--base_model '/cache1/chtan/large_models/llama-hf/llama-65b' \
|
17 |
+
--data_path './alpaca_data_gpt4.json' \
|
18 |
+
--output_dir './gpt4-alpaca-lora_mlp-65b' \
|
19 |
+
--batch_size 128 \
|
20 |
+
--micro_batch_size 2 \
|
21 |
+
--num_epochs 3 \
|
22 |
+
--learning_rate 1e-4 \
|
23 |
+
--cutoff_len 512 \
|
24 |
+
--val_set_size 2000 \
|
25 |
+
--lora_r 8 \
|
26 |
+
--lora_alpha 16 \
|
27 |
+
--lora_dropout 0.05 \
|
28 |
+
--lora_target_modules '[gate_proj,down_proj,up_proj]' \
|
29 |
+
--train_on_inputs \
|
30 |
+
--group_by_length
|
31 |
+
```
|
32 |
+
|
33 |
+
> [1] Junxian He, Chunting Zhou, Xuezhe Ma, Taylor Berg-Kirkpatrick, Graham Neubig: Towards a Unified View of Parameter-Efficient Transfer Learning. ICLR 2022
|
34 |
+
|
adapter_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "/cache1/chtan/large_models/llama-hf/llama-65b",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"lora_alpha": 16,
|
8 |
+
"lora_dropout": 0.05,
|
9 |
+
"merge_weights": false,
|
10 |
+
"modules_to_save": null,
|
11 |
+
"peft_type": "LORA",
|
12 |
+
"r": 8,
|
13 |
+
"target_modules": [
|
14 |
+
"gate_proj",
|
15 |
+
"down_proj",
|
16 |
+
"up_proj"
|
17 |
+
],
|
18 |
+
"task_type": "CAUSAL_LM"
|
19 |
+
}
|
adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a48054d036354e5fbe0c2420ee80a765b1ceb66187b7fde33d1e743220a5fdb
|
3 |
+
size 232169613
|