FantasiaFoundry commited on
Commit
1f5a68d
1 Parent(s): c41c9ab
Files changed (1) hide show
  1. gguf-imat.py +157 -0
gguf-imat.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import zipfile
4
+ import subprocess
5
+ import shutil
6
+ from huggingface_hub import snapshot_download
7
+
8
+ # Clone or update the llama.cpp repository with shallow cloning
9
+ def clone_or_update_llama_cpp():
10
+ print("Preparing...")
11
+ base_dir = os.path.dirname(os.path.abspath(__file__))
12
+ os.chdir(base_dir)
13
+ if not os.path.exists("llama.cpp"):
14
+ subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
15
+ else:
16
+ os.chdir("llama.cpp")
17
+ subprocess.run(["git", "pull"])
18
+ os.chdir(base_dir)
19
+ print("The 'llama.cpp' repository is ready.")
20
+
21
+ # Cownload and extract the latest release of llama.cpp
22
+ def download_llama_release():
23
+ base_dir = os.path.dirname(os.path.abspath(__file__))
24
+ dl_dir = os.path.join(base_dir, "bin", "dl")
25
+ if not os.path.exists(dl_dir):
26
+ os.makedirs(dl_dir)
27
+
28
+ os.chdir(dl_dir)
29
+ latest_release_url = "https://github.com/ggerganov/llama.cpp/releases/latest"
30
+ response = requests.get(latest_release_url)
31
+ if response.status_code == 200:
32
+ latest_release_tag = response.url.split("/")[-1]
33
+ download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip"
34
+ response = requests.get(download_url)
35
+ if response.status_code == 200:
36
+ with open(f"llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip", "wb") as f:
37
+ f.write(response.content)
38
+ with zipfile.ZipFile(f"llama-{latest_release_tag}-bin-win-cublas-cu12.2.0-x64.zip", "r") as zip_ref:
39
+ zip_ref.extractall(os.path.join(base_dir, "bin"))
40
+ print("Downloading latest 'llama.cpp' prebuilt Windows binaries...")
41
+ print("Download and extraction completed successfully.")
42
+ return latest_release_tag
43
+ else:
44
+ print("Failed to download the release file.")
45
+ else:
46
+ print("Failed to fetch the latest release information.")
47
+
48
+ # Download and extract cudart if necessary
49
+ def download_cudart_if_necessary(latest_release_tag):
50
+ base_dir = os.path.dirname(os.path.abspath(__file__))
51
+ cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
52
+ if not os.path.exists(cudart_dl_dir):
53
+ os.makedirs(cudart_dl_dir)
54
+
55
+ cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
56
+ cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
57
+
58
+ # Check if all required files exist
59
+ if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
60
+ print("Cuda resources already exist. Skipping download.")
61
+ else:
62
+ cudart_download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/cudart-llama-bin-win-cu12.2.0-x64.zip"
63
+ response = requests.get(cudart_download_url)
64
+ if response.status_code == 200:
65
+ with open(cudart_zip_file, "wb") as f:
66
+ f.write(response.content)
67
+ with zipfile.ZipFile(cudart_zip_file, "r") as zip_ref:
68
+ zip_ref.extractall(os.path.join(base_dir, "bin"))
69
+ print("Preparing 'cuda' resources...")
70
+ print("Download and extraction of cudart completed successfully.")
71
+ else:
72
+ print("Failed to download the cudart release file.")
73
+
74
+ # Collect user input and download the specified model repository
75
+ def download_model_repo():
76
+ base_dir = os.path.dirname(os.path.abspath(__file__))
77
+ models_dir = os.path.join(base_dir, "models")
78
+ if not os.path.exists(models_dir):
79
+ os.makedirs(models_dir)
80
+
81
+ model_id = input("Enter the model ID to download (e.g., huggingface/transformers): ")
82
+ model_name = model_id.split("/")[-1]
83
+ model_dir = os.path.join(models_dir, model_name)
84
+
85
+ # Download the model repository if it doesn't exist
86
+ if not os.path.exists(model_dir):
87
+ revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
88
+
89
+ print("Downloading model repository...")
90
+ snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
91
+ print("Model repository downloaded successfully.")
92
+ else:
93
+ print("Model already exists.")
94
+
95
+ # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
96
+ convert_model_to_gguf_f16(base_dir, model_dir, model_name)
97
+
98
+ # Convert the downloaded model to GGUF F16 format
99
+ def convert_model_to_gguf_f16(base_dir, model_dir, model_name):
100
+ convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
101
+ gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
102
+ gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
103
+
104
+ if not os.path.exists(gguf_dir):
105
+ os.makedirs(gguf_dir)
106
+
107
+ # Execute the conversion command if F16 file doesn't exist
108
+ if not os.path.exists(gguf_model_path):
109
+ subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16"])
110
+
111
+ # Delete the original model directory
112
+ shutil.rmtree(model_dir)
113
+ print(f"Original model directory '{model_dir}' deleted.")
114
+
115
+ # Execute the imatrix command if imatrix.dat doesn't exist
116
+ imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
117
+ imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
118
+ imatrix_txt = os.path.join(base_dir, "imatrix", "imatrix.txt")
119
+ if not os.path.exists(imatrix_output):
120
+ subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "13"])
121
+ # Move the imatrix.dat file to the GGUF folder
122
+ shutil.move("imatrix.dat", gguf_dir)
123
+ print("imatrix.dat generated successfully.")
124
+
125
+ # Quantize the models
126
+ quantize_models(base_dir, model_name)
127
+
128
+ # Qantize models with different options
129
+ def quantize_models(base_dir, model_name):
130
+ gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
131
+ f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
132
+
133
+ quantization_options = [
134
+ "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M",
135
+ "Q5_K_S", "Q6_K", "Q8_0", "IQ3_M", "IQ3_S", "IQ3_XS", "IQ3_XXS"
136
+ ]
137
+
138
+ for quant_option in quantization_options:
139
+ quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
140
+ quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
141
+ quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
142
+ imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
143
+
144
+ subprocess.run([quantize_command, "--imatrix", imatrix_path,
145
+ f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
146
+ print(f"Model quantized with {quant_option} option.")
147
+
148
+ # Main function - Steps
149
+ def main():
150
+ clone_or_update_llama_cpp()
151
+ latest_release_tag = download_llama_release()
152
+ download_cudart_if_necessary(latest_release_tag)
153
+ download_model_repo()
154
+ print("Finished preparing resources.")
155
+
156
+ if __name__ == "__main__":
157
+ main()