mobicham commited on
Commit
af29244
1 Parent(s): 70e587f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -14
README.md CHANGED
@@ -11,28 +11,23 @@ This is a version of the LLama-2-70B-chat-hf model quantized to 2-bit via Half-Q
11
  ### Basic Usage
12
  To run the model, install the HQQ library from https://github.com/mobiusml/hqq and use it as follows:
13
  ``` Python
14
- from hqq.models.llama_hf import LlamaHQQ
15
- import transformers
16
-
17
  model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
18
- #Load the tokenizer
19
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
20
- #Load the model
21
- model = LlamaHQQ.from_quantized(model_id)
22
  ```
23
 
24
  ### Basic Chat Example
25
  ``` Python
26
- import transformers
27
- from hqq.models.llama_hf import LlamaHQQ
28
 
29
- model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
30
- #Load the tokenizer
31
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
32
- #Load the model
33
- model = LlamaHQQ.from_quantized(model_id)
34
 
35
  ##########################################################################################################
 
36
  from threading import Thread
37
 
38
  from sys import stdout
 
11
  ### Basic Usage
12
  To run the model, install the HQQ library from https://github.com/mobiusml/hqq and use it as follows:
13
  ``` Python
 
 
 
14
  model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
15
+
16
+ from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
+ model = HQQModelForCausalLM.from_quantized(model_id)
19
  ```
20
 
21
  ### Basic Chat Example
22
  ``` Python
23
+ model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
 
24
 
25
+ from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
26
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
27
+ model = HQQModelForCausalLM.from_quantized(model_id)
 
 
28
 
29
  ##########################################################################################################
30
+ import transformers
31
  from threading import Thread
32
 
33
  from sys import stdout