bloomz.cpp-converter / convert.py
testbot
might work
713b2b5
raw
history blame
1.12 kB
from pathlib import Path
from subprocess import run
BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"
def convert(
cache_folder: Path, model_id: str, precision: str, quantization: bool
) -> Path:
# Conversion
cmd = [
"python",
str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
model_id,
str(cache_folder),
]
if precision == "FP32":
cmd.append("--use-fp32")
run(cmd, check=True)
# Model file should exist
f_suffix = "f32" if precision == "FP32" else "f16"
_, model_name = model_id.split("/")
model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
assert model_path.is_file()
# Quantization
if quantization:
q_model_path = model_path = (
cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
)
cmd = [
"./bloomz.cpp/quantize",
str(model_path),
str(q_model_path),
"2",
]
run(cmd, check=True)
assert q_model_path.is_file()
model_path = q_model_path
# Return
return model_path