Spaces:
Runtime error
Runtime error
from pathlib import Path | |
from subprocess import run | |
from typing import Generator | |
BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp" | |
def convert( | |
cache_folder: Path, model_id: str, precision: str, quantization: bool | |
) -> Generator[str, Path, None]: | |
# Conversion | |
cmd = [ | |
"python", | |
str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"), | |
model_id, | |
str(cache_folder), | |
] | |
if precision == "FP32": | |
cmd.append("--use-fp32") | |
yield f"Running command: `{' '.join(cmd)}`" | |
run(cmd, check=True) | |
# Model file should exist | |
f_suffix = "f32" if precision == "FP32" else "f16" | |
_, model_name = model_id.split("/") | |
model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin" | |
assert model_path.is_file() | |
yield f"Model successfully converted to ggml: {model_path}" | |
# Quantization | |
if quantization: | |
q_model_path = ( | |
cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin" | |
) | |
cmd = [ | |
"./bloomz.cpp/quantize", | |
str(model_path), | |
str(q_model_path), | |
"2", | |
] | |
yield f"Running command: `{' '.join(cmd)}`" | |
run(cmd, check=True) | |
assert q_model_path.is_file() | |
# Delete non-quantized file | |
model_path.unlink(missing_ok=True) | |
model_path = q_model_path | |
yield f"Model successfully quantized: {model_path}" | |
# Return | |
return model_path | |