rphrp1985 commited on
Commit
7b32bf2
1 Parent(s): 9b0d3b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -21,6 +21,9 @@ subprocess.run(
21
  shell=True,
22
  )
23
 
 
 
 
24
  subprocess.run(
25
  "pip install pynvml gpustat",
26
 
@@ -74,11 +77,12 @@ accelerator = Accelerator()
74
 
75
  model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
76
  # torch_dtype= torch.uint8,
77
- torch_dtype=torch.float16,
 
78
  # torch_dtype=torch.fl,
79
- attn_implementation="flash_attention_2",
80
- low_cpu_mem_usage=True,
81
-
82
  # device_map='cuda',
83
  # device_map=accelerator.device_map,
84
 
 
21
  shell=True,
22
  )
23
 
24
+ import bitsandbytes as bnb # Import bitsandbytes for 8-bit quantization
25
+
26
+
27
  subprocess.run(
28
  "pip install pynvml gpustat",
29
 
 
77
 
78
  model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
79
  # torch_dtype= torch.uint8,
80
+ # torch_dtype=torch.float16,
81
+ # load_in_8bit=True,
82
  # torch_dtype=torch.fl,
83
+ # attn_implementation="flash_attention_2",
84
+ # low_cpu_mem_usage=True,
85
+ # quantization_config=bnb.QuantizationConfig(bits=8),
86
  # device_map='cuda',
87
  # device_map=accelerator.device_map,
88