FantasiaFoundry
commited on
Commit
•
14c10f2
1
Parent(s):
6be6a27
Update Windows executable naming as per llama.cpp changes
Browse files- gguf-imat-for-FP16.py +3 -3
gguf-imat-for-FP16.py
CHANGED
@@ -121,7 +121,7 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
|
|
121 |
create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name)
|
122 |
|
123 |
def create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name):
|
124 |
-
imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
|
125 |
imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
|
126 |
imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
|
127 |
if not os.path.exists(imatrix_output_dst):
|
@@ -142,7 +142,7 @@ def quantize_models(base_dir, model_name):
|
|
142 |
|
143 |
quantization_options = [
|
144 |
"IQ3_M", "IQ3_XXS",
|
145 |
-
"Q4_K_M", "Q4_K_S", "
|
146 |
"Q5_K_M", "Q5_K_S",
|
147 |
"Q6_K",
|
148 |
"Q8_0"
|
@@ -151,7 +151,7 @@ def quantize_models(base_dir, model_name):
|
|
151 |
for quant_option in quantization_options:
|
152 |
quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
|
153 |
quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
|
154 |
-
quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
|
155 |
imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
|
156 |
|
157 |
subprocess.run([quantize_command, "--imatrix", imatrix_path,
|
|
|
121 |
create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name)
|
122 |
|
123 |
def create_imatrix(base_dir, gguf_dir, gguf_model_path, model_name, imatrix_file_name):
|
124 |
+
imatrix_exe = os.path.join(base_dir, "bin", "llama-imatrix.exe")
|
125 |
imatrix_output_src = os.path.join(gguf_dir, "imatrix.dat")
|
126 |
imatrix_output_dst = os.path.join(gguf_dir, "imatrix.dat")
|
127 |
if not os.path.exists(imatrix_output_dst):
|
|
|
142 |
|
143 |
quantization_options = [
|
144 |
"IQ3_M", "IQ3_XXS",
|
145 |
+
"Q4_K_M", "Q4_K_S", "IQ4_XS",
|
146 |
"Q5_K_M", "Q5_K_S",
|
147 |
"Q6_K",
|
148 |
"Q8_0"
|
|
|
151 |
for quant_option in quantization_options:
|
152 |
quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
|
153 |
quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
|
154 |
+
quantize_command = os.path.join(base_dir, "bin", "llama-quantize.exe")
|
155 |
imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
|
156 |
|
157 |
subprocess.run([quantize_command, "--imatrix", imatrix_path,
|