AetherArchitectural
/

GGUF-Quantization-Script

https://github.com/ggerganov/llama.cpp/pull/6920

#26

by Lewdiculous - opened Apr 30, 2024

←

Files changed (1) hide show

gguf-imat-llama-3.py CHANGED Viewed

@@ -101,7 +101,7 @@ def download_model_repo():
             convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
 def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
-    convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
@@ -109,7 +109,7 @@ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir,
         os.makedirs(gguf_dir)
     if not os.path.exists(gguf_model_path):
-        subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
         if delete_model_dir == 'yes' or delete_model_dir == 'y':
             shutil.rmtree(model_dir)

             convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
 def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
+    convert_script = os.path.join(base_dir, "llama.cpp", "convert-hf-to-gguf.py")
     gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
     gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
         os.makedirs(gguf_dir)
     if not os.path.exists(gguf_model_path):
+        subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
         if delete_model_dir == 'yes' or delete_model_dir == 'y':
             shutil.rmtree(model_dir)