vLLM Deploy

#10
by shacotustra - opened

How to delpoy that model in vLLM?

Traceback (most recent call last):
[rank0]: File "/llms/com_r_trial.py", line 12, in
[rank0]: llm = LLM(model="/llms/c4ai-command-r-v01-4bit", quantization = 'bitsandbytes', load_format = 'bitsandbytes')
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/llm.py", line 155, in init
[rank0]: self.llm_engine = LLMEngine.from_engine_args(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 441, in from_engine_args
[rank0]: engine = cls(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 251, in init
[rank0]: self.model_executor = executor_class(
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 47, in init
[rank0]: self._init_executor()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/executor/gpu_executor.py", line 36, in _init_executor
[rank0]: self.driver_worker.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 139, in load_model
[rank0]: self.model_runner.load_model()
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 682, in load_model
[rank0]: self.model = get_model(model_config=self.model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/init.py", line 21, in get_model
[rank0]: return loader.load_model(model_config=model_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 828, in load_model
[rank0]: model = _initialize_model(model_config, self.load_config,
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 109, in _initialize_model
[rank0]: quant_config = _get_quantization_config(model_config, load_config)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 50, in _get_quantization_config
[rank0]: quant_config = get_quant_config(model_config, load_config)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/weight_utils.py", line 130, in get_quant_config
[rank0]: return quant_cls.from_config(hf_quant_config)
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/quantization/bitsandbytes.py", line 52, in from_config
[rank0]: adapter_name = cls.get_from_keys(config, ["adapter_name_or_path"])
[rank0]: File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/layers/quantization/base_config.py", line 87, in get_from_keys
[rank0]: raise ValueError(f"Cannot find any of {keys} in the model's "
[rank0]: ValueError: Cannot find any of ['adapter_name_or_path'] in the model's quantization config.

Sign up or log in to comment