chrislee973
commited on
Commit
•
764c440
1
Parent(s):
cf1e82c
End of training
Browse files- README.md +201 -0
- adapter_model.bin +3 -0
README.md
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
library_name: peft
|
4 |
+
tags:
|
5 |
+
- axolotl
|
6 |
+
- generated_from_trainer
|
7 |
+
base_model: NousResearch/Meta-Llama-3-8B
|
8 |
+
model-index:
|
9 |
+
- name: llama3-conciser
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
|
17 |
+
<details><summary>See axolotl config</summary>
|
18 |
+
|
19 |
+
axolotl version: `0.4.0`
|
20 |
+
```yaml
|
21 |
+
###
|
22 |
+
# Model Configuration: LLaMA-3 8B
|
23 |
+
###
|
24 |
+
|
25 |
+
# Copied from most recent modal llm-finetuning repo
|
26 |
+
|
27 |
+
base_model: NousResearch/Meta-Llama-3-8B
|
28 |
+
sequence_len: 4096
|
29 |
+
|
30 |
+
# base model weight quantization
|
31 |
+
load_in_8bit: true
|
32 |
+
|
33 |
+
# attention implementation
|
34 |
+
flash_attention: true
|
35 |
+
|
36 |
+
# finetuned adapter config
|
37 |
+
adapter: lora
|
38 |
+
lora_model_dir:
|
39 |
+
lora_r: 16
|
40 |
+
lora_alpha: 32
|
41 |
+
lora_dropout: 0.05
|
42 |
+
lora_target_linear: true
|
43 |
+
lora_fan_in_fan_out:
|
44 |
+
lora_modules_to_save: # required when adding new tokens to LLaMA/Mistral
|
45 |
+
- embed_tokens
|
46 |
+
- lm_head
|
47 |
+
# for details, see https://github.com/huggingface/peft/issues/334#issuecomment-1561727994
|
48 |
+
|
49 |
+
###
|
50 |
+
# Dataset Configuration: sqlqa
|
51 |
+
###
|
52 |
+
|
53 |
+
datasets:
|
54 |
+
# This will be the path used for the data when it is saved to the Volume in the cloud.
|
55 |
+
- path: conciser_dataset_50.jsonl
|
56 |
+
ds_type: json
|
57 |
+
type:
|
58 |
+
# JSONL file contains question, context, answer fields per line.
|
59 |
+
# This gets mapped to instruction, input, output axolotl tags.
|
60 |
+
field_instruction: instruction
|
61 |
+
field_input: text
|
62 |
+
field_output: cleaned_text
|
63 |
+
# Format is used by axolotl to generate the prompt.
|
64 |
+
format: |-
|
65 |
+
[INST] {instruction}
|
66 |
+
{input}
|
67 |
+
[/INST]
|
68 |
+
|
69 |
+
# dataset formatting config
|
70 |
+
tokens: # add new control tokens from the dataset to the model
|
71 |
+
- "[INST]"
|
72 |
+
- " [/INST]"
|
73 |
+
- "[RES]"
|
74 |
+
- " [/RES]"
|
75 |
+
|
76 |
+
# [INST] Given the below paragraph from the transcript of a podcast episode, lightly touch it up to make it more concise and readable. Remove filler words. (ie I think, I guess, yeah, you know, like). Remove repetitions and touch up phrases and sentences that look weird in writing. Edit sentences to make them more easy to read, and improve flow. Optimize for readability, but make sure to preserve the original meaning.
|
77 |
+
# I think I sort of deep down believed in what we were doing, and I did some analysis. I was like, okay, well, what would I go do if I wasn't doing this? It's like, well, I really like building things, and I like helping people communicate, and I like understanding what's going on with people and the dynamics between people. So I think if I sold this company, I'd just go build another company like this. And I kind of like the one I have.
|
78 |
+
# [/INST]
|
79 |
+
|
80 |
+
special_tokens:
|
81 |
+
pad_token: <|end_of_text|>
|
82 |
+
|
83 |
+
val_set_size: 0.05
|
84 |
+
|
85 |
+
###
|
86 |
+
# Training Configuration
|
87 |
+
###
|
88 |
+
|
89 |
+
# random seed for better reproducibility
|
90 |
+
seed: 117
|
91 |
+
|
92 |
+
# optimizer config
|
93 |
+
optimizer: adamw_bnb_8bit
|
94 |
+
# optimizer: adamw_torch
|
95 |
+
|
96 |
+
learning_rate: 0.0001
|
97 |
+
lr_scheduler: cosine
|
98 |
+
num_epochs: 4
|
99 |
+
micro_batch_size: 2
|
100 |
+
gradient_accumulation_steps: 1
|
101 |
+
warmup_steps: 10
|
102 |
+
|
103 |
+
# axolotl saving config
|
104 |
+
dataset_prepared_path: last_run_prepared
|
105 |
+
output_dir: ./lora-out
|
106 |
+
|
107 |
+
# logging and eval config
|
108 |
+
logging_steps: 1
|
109 |
+
eval_steps: 0.05
|
110 |
+
|
111 |
+
# training performance optimization config
|
112 |
+
bf16: auto
|
113 |
+
tf32: false
|
114 |
+
gradient_checkpointing: true
|
115 |
+
|
116 |
+
###
|
117 |
+
# Miscellaneous Configuration
|
118 |
+
###
|
119 |
+
|
120 |
+
# when true, prevents over-writing the config from the CLI
|
121 |
+
strict: false
|
122 |
+
|
123 |
+
# "Don't mess with this, it's here for accelerate and torchrun" -- axolotl docs
|
124 |
+
local_rank:
|
125 |
+
|
126 |
+
# wandb logging config
|
127 |
+
wandb_project: llama3-conciser
|
128 |
+
wandb_name: llama3-4epochs-2batchsize-pushtohub
|
129 |
+
|
130 |
+
hub_model_id: chrislee973/llama3-conciser
|
131 |
+
|
132 |
+
```
|
133 |
+
|
134 |
+
</details><br>
|
135 |
+
|
136 |
+
# llama3-conciser
|
137 |
+
|
138 |
+
This model is a fine-tuned version of [NousResearch/Meta-Llama-3-8B](https://huggingface.co/NousResearch/Meta-Llama-3-8B) on the None dataset.
|
139 |
+
It achieves the following results on the evaluation set:
|
140 |
+
- Loss: 0.5174
|
141 |
+
|
142 |
+
## Model description
|
143 |
+
|
144 |
+
More information needed
|
145 |
+
|
146 |
+
## Intended uses & limitations
|
147 |
+
|
148 |
+
More information needed
|
149 |
+
|
150 |
+
## Training and evaluation data
|
151 |
+
|
152 |
+
More information needed
|
153 |
+
|
154 |
+
## Training procedure
|
155 |
+
|
156 |
+
### Training hyperparameters
|
157 |
+
|
158 |
+
The following hyperparameters were used during training:
|
159 |
+
- learning_rate: 0.0001
|
160 |
+
- train_batch_size: 2
|
161 |
+
- eval_batch_size: 2
|
162 |
+
- seed: 117
|
163 |
+
- distributed_type: multi-GPU
|
164 |
+
- num_devices: 2
|
165 |
+
- total_train_batch_size: 4
|
166 |
+
- total_eval_batch_size: 4
|
167 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
168 |
+
- lr_scheduler_type: cosine
|
169 |
+
- lr_scheduler_warmup_steps: 10
|
170 |
+
- num_epochs: 4
|
171 |
+
|
172 |
+
### Training results
|
173 |
+
|
174 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
175 |
+
|:-------------:|:------:|:----:|:---------------:|
|
176 |
+
| 0.8738 | 0.0833 | 1 | 0.7897 |
|
177 |
+
| 1.2209 | 0.25 | 3 | 0.7878 |
|
178 |
+
| 0.8204 | 0.5 | 6 | 0.6336 |
|
179 |
+
| 0.6652 | 0.75 | 9 | 0.5303 |
|
180 |
+
| 0.4086 | 1.0 | 12 | 0.4836 |
|
181 |
+
| 0.3365 | 1.25 | 15 | 0.4733 |
|
182 |
+
| 0.3445 | 1.5 | 18 | 0.5132 |
|
183 |
+
| 0.3641 | 1.75 | 21 | 0.5146 |
|
184 |
+
| 0.1941 | 2.0 | 24 | 0.4939 |
|
185 |
+
| 0.1814 | 2.25 | 27 | 0.4863 |
|
186 |
+
| 0.1342 | 2.5 | 30 | 0.4969 |
|
187 |
+
| 0.1978 | 2.75 | 33 | 0.5141 |
|
188 |
+
| 0.1589 | 3.0 | 36 | 0.5222 |
|
189 |
+
| 0.1184 | 3.25 | 39 | 0.5258 |
|
190 |
+
| 0.1513 | 3.5 | 42 | 0.5182 |
|
191 |
+
| 0.1172 | 3.75 | 45 | 0.5155 |
|
192 |
+
| 0.0607 | 4.0 | 48 | 0.5174 |
|
193 |
+
|
194 |
+
|
195 |
+
### Framework versions
|
196 |
+
|
197 |
+
- PEFT 0.10.0
|
198 |
+
- Transformers 4.40.2
|
199 |
+
- Pytorch 2.2.2+cu121
|
200 |
+
- Datasets 2.19.1
|
201 |
+
- Tokenizers 0.19.1
|
adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c98b0a4f049a4865af84c597f4c5589a60d490f45500b6dc2371c30fe5d78f1e
|
3 |
+
size 4370759998
|