perlthoughts
commited on
Commit
•
ccdf90b
1
Parent(s):
ba75409
Update README.md
Browse files
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
tags:
|
3 |
-
- generated_from_trainer
|
4 |
- code
|
5 |
- coding
|
|
|
6 |
- llama-2
|
7 |
- gptq
|
8 |
model-index:
|
@@ -34,83 +34,20 @@ Model Architecture Llama 2 is an auto-regressive language model that uses an opt
|
|
34 |
|
35 |
The dataset contains problem descriptions and code in python language. This dataset is taken from sahil2801/code_instructions_120k, which adds a prompt column in alpaca style.
|
36 |
|
37 |
-
### Training hyperparameters
|
38 |
-
|
39 |
-
The following `bitsandbytes` quantization config was used during training:
|
40 |
-
- load_in_8bit: False
|
41 |
-
- load_in_4bit: True
|
42 |
-
- llm_int8_threshold: 6.0
|
43 |
-
- llm_int8_skip_modules: None
|
44 |
-
- llm_int8_enable_fp32_cpu_offload: False
|
45 |
-
- llm_int8_has_fp16_weight: False
|
46 |
-
- bnb_4bit_quant_type: nf4
|
47 |
-
- bnb_4bit_use_double_quant: False
|
48 |
-
- bnb_4bit_compute_dtype: float16
|
49 |
-
|
50 |
-
**SFTTrainer arguments**
|
51 |
-
```py
|
52 |
-
# Number of training epochs
|
53 |
-
num_train_epochs = 1
|
54 |
-
# Enable fp16/bf16 training (set bf16 to True with an A100)
|
55 |
-
fp16 = False
|
56 |
-
bf16 = True
|
57 |
-
# Batch size per GPU for training
|
58 |
-
per_device_train_batch_size = 4
|
59 |
-
# Number of update steps to accumulate the gradients for
|
60 |
-
gradient_accumulation_steps = 1
|
61 |
-
# Enable gradient checkpointing
|
62 |
-
gradient_checkpointing = True
|
63 |
-
# Maximum gradient normal (gradient clipping)
|
64 |
-
max_grad_norm = 0.3
|
65 |
-
# Initial learning rate (AdamW optimizer)
|
66 |
-
learning_rate = 2e-4
|
67 |
-
# Weight decay to apply to all layers except bias/LayerNorm weights
|
68 |
-
weight_decay = 0.001
|
69 |
-
# Optimizer to use
|
70 |
-
optim = "paged_adamw_32bit"
|
71 |
-
# Learning rate schedule
|
72 |
-
lr_scheduler_type = "cosine" #"constant"
|
73 |
-
# Ratio of steps for a linear warmup (from 0 to learning rate)
|
74 |
-
warmup_ratio = 0.03
|
75 |
-
```
|
76 |
### Framework versions
|
77 |
- PEFT 0.4.0
|
78 |
|
79 |
-
### Training metrics
|
80 |
-
```
|
81 |
-
{'loss': 1.044, 'learning_rate': 3.571428571428572e-05, 'epoch': 0.01}
|
82 |
-
{'loss': 0.8413, 'learning_rate': 7.142857142857143e-05, 'epoch': 0.01}
|
83 |
-
{'loss': 0.7299, 'learning_rate': 0.00010714285714285715, 'epoch': 0.02}
|
84 |
-
{'loss': 0.6593, 'learning_rate': 0.00014285714285714287, 'epoch': 0.02}
|
85 |
-
{'loss': 0.6309, 'learning_rate': 0.0001785714285714286, 'epoch': 0.03}
|
86 |
-
{'loss': 0.5916, 'learning_rate': 0.00019999757708974043, 'epoch': 0.03}
|
87 |
-
{'loss': 0.5861, 'learning_rate': 0.00019997032069768138, 'epoch': 0.04}
|
88 |
-
{'loss': 0.6118, 'learning_rate': 0.0001999127875580558, 'epoch': 0.04}
|
89 |
-
{'loss': 0.5928, 'learning_rate': 0.00019982499509519857, 'epoch': 0.05}
|
90 |
-
{'loss': 0.5978, 'learning_rate': 0.00019970696989770335, 'epoch': 0.05}
|
91 |
-
{'loss': 0.5791, 'learning_rate': 0.0001995587477103701, 'epoch': 0.06}
|
92 |
-
{'loss': 0.6054, 'learning_rate': 0.00019938037342337933, 'epoch': 0.06}
|
93 |
-
{'loss': 0.5864, 'learning_rate': 0.00019917190105869708, 'epoch': 0.07}
|
94 |
-
{'loss': 0.6159, 'learning_rate': 0.0001989333937537136, 'epoch': 0.08}
|
95 |
-
{'loss': 0.583, 'learning_rate': 0.00019866492374212205, 'epoch': 0.08}
|
96 |
-
{'loss': 0.6066, 'learning_rate': 0.00019836657233204182, 'epoch': 0.09}
|
97 |
-
{'loss': 0.5934, 'learning_rate': 0.00019803842988139374, 'epoch': 0.09}
|
98 |
-
{'loss': 0.5836, 'learning_rate': 0.00019768059577053473, 'epoch': 0.1}
|
99 |
-
{'loss': 0.6021, 'learning_rate': 0.00019729317837215943, 'epoch': 0.1}
|
100 |
-
{'loss': 0.5659, 'learning_rate': 0.00019687629501847898, 'epoch': 0.11}
|
101 |
-
{'loss': 0.5754, 'learning_rate': 0.00019643007196568606, 'epoch': 0.11}
|
102 |
-
{'loss': 0.5936, 'learning_rate': 0.000195954644355717, 'epoch': 0.12}
|
103 |
-
```
|
104 |
|
105 |
### Example of usage
|
106 |
|
107 |
```py
|
108 |
import torch
|
109 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
110 |
-
model_id = "
|
111 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_repo)
|
112 |
-
model = AutoModelForCausalLM.from_pretrained(
|
113 |
-
|
|
|
114 |
instruction="Write a Python function to display the first and last elements of a list."
|
115 |
input=""
|
116 |
prompt = f"""### Instruction:
|
@@ -131,11 +68,11 @@ print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().n
|
|
131 |
### Citation
|
132 |
|
133 |
```
|
134 |
-
@misc {
|
135 |
-
author = { {
|
136 |
-
title = { llama-2-7b-int4-python
|
137 |
year = 2023,
|
138 |
-
url = { https://huggingface.co/
|
139 |
publisher = { Hugging Face }
|
140 |
}
|
141 |
```
|
|
|
1 |
---
|
2 |
tags:
|
|
|
3 |
- code
|
4 |
- coding
|
5 |
+
- python
|
6 |
- llama-2
|
7 |
- gptq
|
8 |
model-index:
|
|
|
34 |
|
35 |
The dataset contains problem descriptions and code in python language. This dataset is taken from sahil2801/code_instructions_120k, which adds a prompt column in alpaca style.
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
### Framework versions
|
38 |
- PEFT 0.4.0
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
### Example of usage
|
42 |
|
43 |
```py
|
44 |
import torch
|
45 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
46 |
+
model_id = "NurtureAI/llama-2-7b-int4-gptq-python"
|
47 |
tokenizer = AutoTokenizer.from_pretrained(hf_model_repo)
|
48 |
+
model = AutoModelForCausalLM.from_pretrained(
|
49 |
+
hf_model_repo, load_in_4bit=True,
|
50 |
+
torch_dtype=torch.float16, device_map=device_map)
|
51 |
instruction="Write a Python function to display the first and last elements of a list."
|
52 |
input=""
|
53 |
prompt = f"""### Instruction:
|
|
|
68 |
### Citation
|
69 |
|
70 |
```
|
71 |
+
@misc {NurtureAI,
|
72 |
+
author = { {Raymond Hernandez} },
|
73 |
+
title = { NurtureAI/llama-2-7b-int4-gptq-python },
|
74 |
year = 2023,
|
75 |
+
url = { https://huggingface.co/NurtureAI/llama-2-7b-int4-gptq-python },
|
76 |
publisher = { Hugging Face }
|
77 |
}
|
78 |
```
|