Daniel Marques commited on
Commit
f18cc47
1 Parent(s): 9f4aea3

feat: update model

Browse files
Files changed (1) hide show
  1. load_models.py +1 -1
load_models.py CHANGED
@@ -64,7 +64,7 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
64
  kwargs["n_gpu_layers"] = 1
65
  if device_type.lower() == "cuda":
66
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
- kwargs["n_batch"] = MAX_NEW_TOKENS # set this based on your GPU
68
 
69
  # kwargs["stream"] = stream
70
 
 
64
  kwargs["n_gpu_layers"] = 1
65
  if device_type.lower() == "cuda":
66
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
+ kwargs["n_batch"] = N_BATCH # set this based on your GPU
68
 
69
  # kwargs["stream"] = stream
70