Anhforth commited on
Commit
f4c8499
1 Parent(s): 7f2a151

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -2
README.md CHANGED
@@ -23,12 +23,22 @@ The additional details of the Aquila model will be presented in the official tec
23
  ### 1. Inference
24
 
25
  ```python
26
- from transformers import AutoTokenizer, AutoModelForCausalLM
27
  import torch
 
 
 
28
  device = torch.device("cuda:0")
29
  model_info = "BAAI/AquilaChat2-7B-16K"
30
  tokenizer = AutoTokenizer.from_pretrained(model_info, trust_remote_code=True)
31
- model = AutoModelForCausalLM.from_pretrained(model_info, trust_remote_code=True, torch_dtype=torch.bfloat16)
 
 
 
 
 
 
 
 
32
  model.eval()
33
  model.to(device)
34
  text = "请给出10个要到北京旅游的理由。"
 
23
  ### 1. Inference
24
 
25
  ```python
 
26
  import torch
27
+ from transformers import AutoTokenizer, AutoModelForCausalLM
28
+ from transformers import BitsAndBytesConfig
29
+
30
  device = torch.device("cuda:0")
31
  model_info = "BAAI/AquilaChat2-7B-16K"
32
  tokenizer = AutoTokenizer.from_pretrained(model_info, trust_remote_code=True)
33
+ quantization_config=BitsAndBytesConfig(
34
+ load_in_4bit=True,
35
+ bnb_4bit_use_double_quant=True,
36
+ bnb_4bit_quant_type="nf4",
37
+ bnb_4bit_compute_dtype=torch.bfloat16,
38
+ )
39
+ model = AutoModelForCausalLM.from_pretrained(model_info, trust_remote_code=True, torch_dtype=torch.float16,
40
+ # quantization_config=quantization_config, # Uncomment this line for 4bit quantization
41
+ )
42
  model.eval()
43
  model.to(device)
44
  text = "请给出10个要到北京旅游的理由。"