Spaces:

Wauplin
/

bloomz.cpp-converter

Runtime error

testbot

Requested changes

8ec55c7 over 1 year ago

1.38 kB

	from pathlib import Path
	from subprocess import run
	from typing import Generator

	BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"


	def convert(
	cache_folder: Path, model_id: str, precision: str, quantization: bool
	) -> Generator[str, Path, None]:
	# Conversion
	cmd = [
	"python",
	str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
	model_id,
	str(cache_folder),
	]
	if precision == "FP32":
	cmd.append("--use-fp32")
	yield f"Running command: `{' '.join(cmd)}`"
	run(cmd, check=True)

	# Model file should exist
	f_suffix = "f32" if precision == "FP32" else "f16"
	_, model_name = model_id.split("/")
	model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
	assert model_path.is_file()
	yield f"Model successfully converted to ggml: {model_path}"

	# Quantization
	if quantization:
	q_model_path = (
	cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
	)
	cmd = [
	"./bloomz.cpp/quantize",
	str(model_path),
	str(q_model_path),
	"2",
	]
	yield f"Running command: `{' '.join(cmd)}`"
	run(cmd, check=True)
	assert q_model_path.is_file()
	model_path = q_model_path
	yield f"Model successfully quantized: {model_path}"

	# Return
	return model_path