rodrigomasini commited on
Commit
112a0e5
·
1 Parent(s): ce8ce72

Update app_v2.py

Browse files
Files changed (1) hide show
  1. app_v2.py +17 -3
app_v2.py CHANGED
@@ -13,11 +13,25 @@ os.makedirs(quantized_model_dir, exist_ok=True)
13
  # Quantization configuration
14
  quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Load the tokenizer
17
- tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
18
 
19
- # Load the model using Option 1
20
- model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
21
 
22
  # Starting Streamlit app
23
  st.title("AutoGPTQ Streamlit App")
 
13
  # Quantization configuration
14
  quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
15
 
16
+ # Load the model using from_quantized
17
+ model = AutoGPTQForCausalLM.from_quantized(
18
+ pretrained_model_dir,
19
+ use_safetensors=True,
20
+ strict=False,
21
+ #model_basename=quantized_model_dir,
22
+ device="cuda:0",
23
+ trust_remote_code=True,
24
+ use_triton=False,
25
+ quantize_config=quantize_config
26
+ )
27
+
28
+ model.save_quantized(quantized_model_dir)
29
+
30
+
31
  # Load the tokenizer
32
+ tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
33
 
34
+ model_for_inference = AutoGPTQForCausalLM.from_pretrained(quantized_model_dir)
 
35
 
36
  # Starting Streamlit app
37
  st.title("AutoGPTQ Streamlit App")