Spaces:
Paused
Paused
Commit
·
112a0e5
1
Parent(s):
ce8ce72
Update app_v2.py
Browse files
app_v2.py
CHANGED
@@ -13,11 +13,25 @@ os.makedirs(quantized_model_dir, exist_ok=True)
|
|
13 |
# Quantization configuration
|
14 |
quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Load the tokenizer
|
17 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
18 |
|
19 |
-
|
20 |
-
model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)
|
21 |
|
22 |
# Starting Streamlit app
|
23 |
st.title("AutoGPTQ Streamlit App")
|
|
|
13 |
# Quantization configuration
|
14 |
quantize_config = BaseQuantizeConfig(bits=4, group_size=128, desc_act=False)
|
15 |
|
16 |
+
# Load the model using from_quantized
|
17 |
+
model = AutoGPTQForCausalLM.from_quantized(
|
18 |
+
pretrained_model_dir,
|
19 |
+
use_safetensors=True,
|
20 |
+
strict=False,
|
21 |
+
#model_basename=quantized_model_dir,
|
22 |
+
device="cuda:0",
|
23 |
+
trust_remote_code=True,
|
24 |
+
use_triton=False,
|
25 |
+
quantize_config=quantize_config
|
26 |
+
)
|
27 |
+
|
28 |
+
model.save_quantized(quantized_model_dir)
|
29 |
+
|
30 |
+
|
31 |
# Load the tokenizer
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, use_fast=True)
|
33 |
|
34 |
+
model_for_inference = AutoGPTQForCausalLM.from_pretrained(quantized_model_dir)
|
|
|
35 |
|
36 |
# Starting Streamlit app
|
37 |
st.title("AutoGPTQ Streamlit App")
|