KnutJaegersberg commited on
Commit
42c45e1
1 Parent(s): 7dcb561

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -0
README.md CHANGED
@@ -1,3 +1,23 @@
1
  ---
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-4.0
3
  ---
4
+
5
+ Experimental quantization.
6
+
7
+ Working inference code (regular inference with autogptq does not work without return_token_type_ids=False, didn't get it to work with textgen-webui):
8
+
9
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
12
+
13
+ model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, device="cuda:0", use_triton=False)
14
+
15
+ input_ids = tokenizer("Question: What is the purpose of life?\n\nAnswer:", return_tensors="pt").input_ids.to("cuda:0")
16
+
17
+ out = model.generate(input_ids=input_ids,max_length=300)
18
+
19
+ print(tokenizer.decode(out[0]))
20
+
21
+ or
22
+
23
+ print(tokenizer.decode(model.generate(**tokenizer("test is", return_tensors="pt", return_token_type_ids=False).to("cuda:0"))[0]))