teknium commited on
Commit
488062b
1 Parent(s): 868f0aa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -172,7 +172,7 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
172
 
173
  # Inference Code
174
 
175
- Here is example code using HuggingFace Transformers to inference the model (note: even in 4bit, it will require more than 24GB of VRAM)
176
 
177
  ```python
178
  # Code to inference Hermes with HF Transformers
@@ -183,9 +183,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
183
  from transformers import LlamaTokenizer, MixtralForCausalLM
184
  import bitsandbytes, flash_attn
185
 
186
- tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', trust_remote_code=True)
187
  model = MixtralForCausalLM.from_pretrained(
188
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
189
  torch_dtype=torch.float16,
190
  device_map="auto",
191
  load_in_8bit=False,
 
172
 
173
  # Inference Code
174
 
175
+ Here is example code using HuggingFace Transformers to inference the model (note: in 4bit, it will require around 5GB of VRAM)
176
 
177
  ```python
178
  # Code to inference Hermes with HF Transformers
 
183
  from transformers import LlamaTokenizer, MixtralForCausalLM
184
  import bitsandbytes, flash_attn
185
 
186
+ tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mistral-7B-DPO', trust_remote_code=True)
187
  model = MixtralForCausalLM.from_pretrained(
188
+ "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
189
  torch_dtype=torch.float16,
190
  device_map="auto",
191
  load_in_8bit=False,