Update README.md
Browse files
README.md
CHANGED
@@ -172,7 +172,7 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
|
|
172 |
|
173 |
# Inference Code
|
174 |
|
175 |
-
Here is example code using HuggingFace Transformers to inference the model (note:
|
176 |
|
177 |
```python
|
178 |
# Code to inference Hermes with HF Transformers
|
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
183 |
from transformers import LlamaTokenizer, MixtralForCausalLM
|
184 |
import bitsandbytes, flash_attn
|
185 |
|
186 |
-
tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-
|
187 |
model = MixtralForCausalLM.from_pretrained(
|
188 |
-
"NousResearch/Nous-Hermes-2-
|
189 |
torch_dtype=torch.float16,
|
190 |
device_map="auto",
|
191 |
load_in_8bit=False,
|
|
|
172 |
|
173 |
# Inference Code
|
174 |
|
175 |
+
Here is example code using HuggingFace Transformers to inference the model (note: in 4bit, it will require around 5GB of VRAM)
|
176 |
|
177 |
```python
|
178 |
# Code to inference Hermes with HF Transformers
|
|
|
183 |
from transformers import LlamaTokenizer, MixtralForCausalLM
|
184 |
import bitsandbytes, flash_attn
|
185 |
|
186 |
+
tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mistral-7B-DPO', trust_remote_code=True)
|
187 |
model = MixtralForCausalLM.from_pretrained(
|
188 |
+
"NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
|
189 |
torch_dtype=torch.float16,
|
190 |
device_map="auto",
|
191 |
load_in_8bit=False,
|