Spaces:
Paused
Paused
Change quantization
Browse files
app.py
CHANGED
@@ -100,8 +100,7 @@ def predict(
|
|
100 |
# Load model
|
101 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
102 |
quantization_config = BitsAndBytesConfig(
|
103 |
-
|
104 |
-
bnb_4bit_compute_dtype=torch.bfloat16
|
105 |
)
|
106 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
107 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
100 |
# Load model
|
101 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
102 |
quantization_config = BitsAndBytesConfig(
|
103 |
+
load_in_8bit=False, bnb_4bit_compute_dtype=torch.bfloat16
|
|
|
104 |
)
|
105 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
106 |
model = AutoModelForCausalLM.from_pretrained(
|