oleksandrfluxon
commited on
Commit
•
fb80f65
1
Parent(s):
d1aa94f
Update pipeline.py
Browse files- pipeline.py +1 -1
pipeline.py
CHANGED
@@ -26,7 +26,7 @@ class PreTrainedPipeline():
|
|
26 |
torch_dtype=torch.float16,
|
27 |
trust_remote_code=True,
|
28 |
device_map="auto",
|
29 |
-
|
30 |
)
|
31 |
model.to('cuda')
|
32 |
print("===> model loaded")
|
|
|
26 |
torch_dtype=torch.float16,
|
27 |
trust_remote_code=True,
|
28 |
device_map="auto",
|
29 |
+
load_in_8bit=True # Load model in the lowest 4-bit precision quantization
|
30 |
)
|
31 |
model.to('cuda')
|
32 |
print("===> model loaded")
|