extra-files/llama-3-config-files/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": 128009,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 8192,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_scaling": null,
21
+ "rope_theta": 500000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.40.0.dev0",
25
+ "use_cache": true,
26
+ "vocab_size": 128256,
27
+ "quantization_config": {
28
+ "quant_method": "exl2",
29
+ "version": "0.0.18",
30
+ "bits": 5.0,
31
+ "head_bits": 6,
32
+ "calibration": {
33
+ "rows": 100,
34
+ "length": 2048,
35
+ "dataset": "(default)"
36
+ }
37
+ }
38
+ }
extra-files/llama-3-config-files/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": 128009,
5
+ "transformers_version": "4.40.0.dev0"
6
+ }
gguf-imat-llama-3.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import zipfile
4
+ import subprocess
5
+ import shutil
6
+ from huggingface_hub import snapshot_download
7
+
8
+ # Clone or update the llama.cpp repository with --depth 1
9
+ def clone_or_update_llama_cpp():
10
+ print("Preparing...")
11
+ base_dir = os.path.dirname(os.path.abspath(__file__))
12
+ os.chdir(base_dir)
13
+ if not os.path.exists("llama.cpp"):
14
+ subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
15
+ else:
16
+ os.chdir("llama.cpp")
17
+ subprocess.run(["git", "pull"])
18
+ os.chdir(base_dir)
19
+ print("The 'llama.cpp' repository is ready.")
20
+
21
+ # Download and extract the latest release of llama.cpp Windows binaries
22
+ def download_llama_release():
23
+ base_dir = os.path.dirname(os.path.abspath(__file__))
24
+ dl_dir = os.path.join(base_dir, "bin", "dl")
25
+ if not os.path.exists(dl_dir):
26
+ os.makedirs(dl_dir)
27
+
28
+ os.chdir(dl_dir)
29
+ latest_release_url = "https://github.com/ggerganov/llama.cpp/releases/latest"
30
+ response = requests.get(latest_release_url)
31
+ if response.status_code == 200:
32
+ latest_release_tag = response.url.split("/")[-1]
33
+ download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip"
34
+ response = requests.get(download_url)
35
+ if response.status_code == 200:
36
+ with open(f"llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip", "wb") as f:
37
+ f.write(response.content)
38
+ with zipfile.ZipFile(f"llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip", "r") as zip_ref:
39
+ zip_ref.extractall(os.path.join(base_dir, "bin"))
40
+ print("Downloading latest 'llama.cpp' prebuilt Windows binaries...")
41
+ print("Download and extraction completed successfully.")
42
+ return latest_release_tag
43
+ else:
44
+ print("Failed to download the release file.")
45
+ else:
46
+ print("Failed to fetch the latest release information.")
47
+
48
+ # Download and extract the Cuda .dll resources if they aren't present in the bin folder
49
+ def download_cudart_if_necessary(latest_release_tag):
50
+ base_dir = os.path.dirname(os.path.abspath(__file__))
51
+ cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
52
+ if not os.path.exists(cudart_dl_dir):
53
+ os.makedirs(cudart_dl_dir)
54
+
55
+ cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
56
+ cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
57
+
58
+ # Check if all required files exist
59
+ if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
60
+ print("Cuda resources already exist. Skipping download.")
61
+ else:
62
+ cudart_download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/cudart-llama-bin-win-cu12.2.0-x64.zip"
63
+ response = requests.get(cudart_download_url)
64
+ if response.status_code == 200:
65
+ with open(cudart_zip_file, "wb") as f:
66
+ f.write(response.content)
67
+ with zipfile.ZipFile(cudart_zip_file, "r") as zip_ref:
68
+ zip_ref.extractall(os.path.join(base_dir, "bin"))
69
+ print("Preparing 'cuda' resources...")
70
+ print("Download and extraction of cudart completed successfully.")
71
+ else:
72
+ print("Failed to download the cudart release file.")
73
+
74
+ # Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
75
+ def download_model_repo():
76
+ base_dir = os.path.dirname(os.path.abspath(__file__))
77
+ models_dir = os.path.join(base_dir, "models")
78
+ if not os.path.exists(models_dir):
79
+ os.makedirs(models_dir)
80
+
81
+ model_id = input("Enter the model ID to download (e.g., huggingface/transformers): ")
82
+ model_name = model_id.split("/")[-1]
83
+ model_dir = os.path.join(models_dir, model_name)
84
+
85
+ # Check if the model repository already exists
86
+ if os.path.exists(model_dir):
87
+ print("Model repository already exists. Using existing repository.")
88
+
89
+ # If the model already exists, prompt the user if they want to delete the model directory
90
+ delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
91
+
92
+ # Ask for the name of the imatrix.txt file
93
+ imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
94
+
95
+ # Convert the existing model to GGUF F16 format and generate imatrix.dat
96
+ convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
97
+
98
+ else:
99
+ revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
100
+
101
+ # Ask the user if they want to remove the HF model folder after conversion
102
+ delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
103
+
104
+ print("Downloading model repository...")
105
+ snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
106
+ print("Model repository downloaded successfully.")
107
+
108
+ # Ask for the name of the imatrix.txt file
109
+ imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
110
+
111
+ # Convert the downloaded model to GGUF F16 format and generate imatrix.dat
112
+ convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
113
+
114
+ # Convert the downloaded model to GGUF F16 format
115
+ def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
116
+ convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
117
+ gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
118
+ gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
119
+
120
+ if not os.path.exists(gguf_dir):
121
+ os.makedirs(gguf_dir)
122
+
123
+ # Check if F16 file already exists
124
+ if not os.path.exists(gguf_model_path):
125
+ # Execute the conversion command
126
+ subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
127
+
128
+ # Delete the original model directory under conditions
129
+ if delete_model_dir == 'yes' or delete_model_dir == 'y':
130
+ shutil.rmtree(model_dir)
131
+ print(f"Original model directory '{model_dir}' deleted.")
132
+ else:
133
+ print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
134
+
135
+ # Generate imatrix.dat if it doesn't exist
136
+ imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
137
+ imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
138
+ imatrix_txt = os.path.join(base_dir, "imatrix", imatrix_file_name)
139
+ if not os.path.exists(imatrix_output):
140
+ # Execute the imatrix command
141
+ subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "8"], cwd=gguf_dir)
142
+ # Move the imatrix.dat file to the GGUF folder
143
+ if os.path.exists(os.path.join(gguf_dir, "imatrix.dat")):
144
+ shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
145
+ print("imatrix.dat generated successfully.")
146
+ else:
147
+ print("Failed to generate imatrix.dat file.")
148
+ else:
149
+ print("Skipping imatrix generation as imatrix.dat already exists.")
150
+
151
+ # Quantize the models
152
+ quantize_models(base_dir, model_name)
153
+
154
+ # Quantize models with different options
155
+ def quantize_models(base_dir, model_name):
156
+ gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
157
+ f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
158
+
159
+ quantization_options = [
160
+ "IQ3_M", "IQ3_XXS",
161
+ "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS",
162
+ "Q5_K_M", "Q5_K_S",
163
+ "Q6_K",
164
+ "Q8_0"
165
+ ]
166
+
167
+ for quant_option in quantization_options:
168
+ quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
169
+ quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
170
+ quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
171
+ imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
172
+
173
+ subprocess.run([quantize_command, "--imatrix", imatrix_path,
174
+ f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
175
+ print(f"Model quantized with {quant_option} option.")
176
+
177
+ # Main function - Steps
178
+ def main():
179
+ clone_or_update_llama_cpp()
180
+ latest_release_tag = download_llama_release()
181
+ download_cudart_if_necessary(latest_release_tag)
182
+ download_model_repo()
183
+ print("Finished preparing resources.")
184
+
185
+ if __name__ == "__main__":
186
+ main()