Spaces:
Paused
Paused
Commit
·
5d04704
1
Parent(s):
4b8d66c
Update app_v4.py
Browse files
app_v4.py
CHANGED
@@ -35,7 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=False)
|
|
35 |
# Attempt to load the model, catch any OOM errors
|
36 |
@st.cache_data
|
37 |
def load_gptq_model():
|
38 |
-
AutoGPTQForCausalLM.from_quantized(
|
39 |
pretrained_model_dir,
|
40 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
41 |
use_safetensors=True,
|
@@ -44,6 +44,7 @@ def load_gptq_model():
|
|
44 |
model.eval() # Set the model to inference mode
|
45 |
return model
|
46 |
|
|
|
47 |
model_loaded = False
|
48 |
try:
|
49 |
model = load_gptq_model()
|
|
|
35 |
# Attempt to load the model, catch any OOM errors
|
36 |
@st.cache_data
|
37 |
def load_gptq_model():
|
38 |
+
model = AutoGPTQForCausalLM.from_quantized(
|
39 |
pretrained_model_dir,
|
40 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
41 |
use_safetensors=True,
|
|
|
44 |
model.eval() # Set the model to inference mode
|
45 |
return model
|
46 |
|
47 |
+
# Attempt to load the model, catch any OOM errors
|
48 |
model_loaded = False
|
49 |
try:
|
50 |
model = load_gptq_model()
|