Update README.md
Browse files
README.md
CHANGED
@@ -42,20 +42,29 @@ dtype: bfloat16
|
|
42 |
<summary>Setup</summary>
|
43 |
|
44 |
```python
|
45 |
-
!pip install -qU transformers accelerate
|
46 |
|
47 |
-
from transformers import
|
|
|
|
|
|
|
|
|
48 |
import torch
|
49 |
|
50 |
model = "dfurman/HermesBagel-34B-v0.1"
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
53 |
-
|
54 |
model = AutoModelForCausalLM.from_pretrained(
|
55 |
model,
|
56 |
torch_dtype=torch.bfloat16,
|
57 |
device_map="auto",
|
58 |
-
|
59 |
)
|
60 |
```
|
61 |
|
|
|
42 |
<summary>Setup</summary>
|
43 |
|
44 |
```python
|
45 |
+
!pip install -qU transformers accelerate bitsandbytes
|
46 |
|
47 |
+
from transformers import (
|
48 |
+
AutoTokenizer,
|
49 |
+
AutoModelForCausalLM,
|
50 |
+
BitsAndBytesConfig
|
51 |
+
)
|
52 |
import torch
|
53 |
|
54 |
model = "dfurman/HermesBagel-34B-v0.1"
|
55 |
+
nf4_config = BitsAndBytesConfig(
|
56 |
+
load_in_4bit=True,
|
57 |
+
bnb_4bit_quant_type="nf4",
|
58 |
+
bnb_4bit_use_double_quant=True,
|
59 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
60 |
+
)
|
61 |
|
62 |
tokenizer = AutoTokenizer.from_pretrained(model)
|
|
|
63 |
model = AutoModelForCausalLM.from_pretrained(
|
64 |
model,
|
65 |
torch_dtype=torch.bfloat16,
|
66 |
device_map="auto",
|
67 |
+
quantization_config=nf4_config,
|
68 |
)
|
69 |
```
|
70 |
|