File size: 8,781 Bytes
8c81971 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import os
import requests
import zipfile
import subprocess
import shutil
from huggingface_hub import snapshot_download
# Clone or update the llama.cpp repository with --depth 1
def clone_or_update_llama_cpp():
print("Preparing...")
base_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(base_dir)
if not os.path.exists("llama.cpp"):
subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
else:
os.chdir("llama.cpp")
subprocess.run(["git", "pull"])
os.chdir(base_dir)
print("The 'llama.cpp' repository is ready.")
# Download and extract the latest release of llama.cpp Windows binaries
def download_llama_release():
base_dir = os.path.dirname(os.path.abspath(__file__))
dl_dir = os.path.join(base_dir, "bin", "dl")
if not os.path.exists(dl_dir):
os.makedirs(dl_dir)
os.chdir(dl_dir)
latest_release_url = "https://github.com/ggerganov/llama.cpp/releases/latest"
response = requests.get(latest_release_url)
if response.status_code == 200:
latest_release_tag = response.url.split("/")[-1]
download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip"
response = requests.get(download_url)
if response.status_code == 200:
with open(f"llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip", "wb") as f:
f.write(response.content)
with zipfile.ZipFile(f"llama-{latest_release_tag}-bin-win-cuda-cu12.2.0-x64.zip", "r") as zip_ref:
zip_ref.extractall(os.path.join(base_dir, "bin"))
print("Downloading latest 'llama.cpp' prebuilt Windows binaries...")
print("Download and extraction completed successfully.")
return latest_release_tag
else:
print("Failed to download the release file.")
else:
print("Failed to fetch the latest release information.")
# Download and extract the Cuda .dll resources if they aren't present in the bin folder
def download_cudart_if_necessary(latest_release_tag):
base_dir = os.path.dirname(os.path.abspath(__file__))
cudart_dl_dir = os.path.join(base_dir, "bin", "dl")
if not os.path.exists(cudart_dl_dir):
os.makedirs(cudart_dl_dir)
cudart_zip_file = os.path.join(cudart_dl_dir, "cudart-llama-bin-win-cu12.2.0-x64.zip")
cudart_extracted_files = ["cublas64_12.dll", "cublasLt64_12.dll", "cudart64_12.dll"]
# Check if all required files exist
if all(os.path.exists(os.path.join(base_dir, "bin", file)) for file in cudart_extracted_files):
print("Cuda resources already exist. Skipping download.")
else:
cudart_download_url = f"https://github.com/ggerganov/llama.cpp/releases/download/{latest_release_tag}/cudart-llama-bin-win-cu12.2.0-x64.zip"
response = requests.get(cudart_download_url)
if response.status_code == 200:
with open(cudart_zip_file, "wb") as f:
f.write(response.content)
with zipfile.ZipFile(cudart_zip_file, "r") as zip_ref:
zip_ref.extractall(os.path.join(base_dir, "bin"))
print("Preparing 'cuda' resources...")
print("Download and extraction of cudart completed successfully.")
else:
print("Failed to download the cudart release file.")
# Ask for user input to download or fetch from cache the specified model repository if it doesn't exist
def download_model_repo():
base_dir = os.path.dirname(os.path.abspath(__file__))
models_dir = os.path.join(base_dir, "models")
if not os.path.exists(models_dir):
os.makedirs(models_dir)
model_id = input("Enter the model ID to download (e.g., huggingface/transformers): ")
model_name = model_id.split("/")[-1]
model_dir = os.path.join(models_dir, model_name)
# Check if the model repository already exists
if os.path.exists(model_dir):
print("Model repository already exists. Using existing repository.")
# If the model already exists, prompt the user if they want to delete the model directory
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
# Ask for the name of the imatrix.txt file
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
# Convert the existing model to GGUF F16 format and generate imatrix.dat
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
else:
revision = input("Enter the revision (branch, tag, or commit) to download (default: main): ") or "main"
# Ask the user if they want to remove the HF model folder after conversion
delete_model_dir = input("Remove HF model folder after converting original model to GGUF? (yes/no) (default: no): ").strip().lower()
print("Downloading model repository...")
snapshot_download(repo_id=model_id, local_dir=model_dir, revision=revision)
print("Model repository downloaded successfully.")
# Ask for the name of the imatrix.txt file
imatrix_file_name = input("Enter the name of the imatrix.txt file (default: imatrix.txt): ").strip() or "imatrix.txt"
# Convert the downloaded model to GGUF F16 format and generate imatrix.dat
convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name)
# Convert the downloaded model to GGUF F16 format
def convert_model_to_gguf_f16(base_dir, model_dir, model_name, delete_model_dir, imatrix_file_name):
convert_script = os.path.join(base_dir, "llama.cpp", "convert.py")
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
gguf_model_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
if not os.path.exists(gguf_dir):
os.makedirs(gguf_dir)
# Check if F16 file already exists
if not os.path.exists(gguf_model_path):
# Execute the conversion command
subprocess.run(["python", convert_script, model_dir, "--outfile", gguf_model_path, "--outtype", "f16", "--vocab-type", "bpe"])
# Delete the original model directory under conditions
if delete_model_dir == 'yes' or delete_model_dir == 'y':
shutil.rmtree(model_dir)
print(f"Original model directory '{model_dir}' deleted.")
else:
print(f"Original model directory '{model_dir}' was not deleted. You can remove it manually.")
# Generate imatrix.dat if it doesn't exist
imatrix_exe = os.path.join(base_dir, "bin", "imatrix.exe")
imatrix_output = os.path.join(gguf_dir, "imatrix.dat")
imatrix_txt = os.path.join(base_dir, "imatrix", imatrix_file_name)
if not os.path.exists(imatrix_output):
# Execute the imatrix command
subprocess.run([imatrix_exe, "-m", gguf_model_path, "-f", imatrix_txt, "-ngl", "8"], cwd=gguf_dir)
# Move the imatrix.dat file to the GGUF folder
if os.path.exists(os.path.join(gguf_dir, "imatrix.dat")):
shutil.move(os.path.join(gguf_dir, "imatrix.dat"), gguf_dir)
print("imatrix.dat generated successfully.")
else:
print("Failed to generate imatrix.dat file.")
else:
print("Skipping imatrix generation as imatrix.dat already exists.")
# Quantize the models
quantize_models(base_dir, model_name)
# Quantize models with different options
def quantize_models(base_dir, model_name):
gguf_dir = os.path.join(base_dir, "models", f"{model_name}-GGUF")
f16_gguf_path = os.path.join(gguf_dir, f"{model_name}-F16.gguf")
quantization_options = [
"IQ3_M", "IQ3_XXS",
"Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS",
"Q5_K_M", "Q5_K_S",
"Q6_K",
"Q8_0"
]
for quant_option in quantization_options:
quantized_gguf_name = f"{model_name}-{quant_option}-imat.gguf"
quantized_gguf_path = os.path.join(gguf_dir, quantized_gguf_name)
quantize_command = os.path.join(base_dir, "bin", "quantize.exe")
imatrix_path = os.path.join(gguf_dir, "imatrix.dat")
subprocess.run([quantize_command, "--imatrix", imatrix_path,
f16_gguf_path, quantized_gguf_path, quant_option], cwd=gguf_dir)
print(f"Model quantized with {quant_option} option.")
# Main function - Steps
def main():
clone_or_update_llama_cpp()
latest_release_tag = download_llama_release()
download_cudart_if_necessary(latest_release_tag)
download_model_repo()
print("Finished preparing resources.")
if __name__ == "__main__":
main()
|