Imran1
/

Qwen2.5-72B-Instruct-FP8

Model card Files Files and versions Community

FINGU-AI commited on Oct 9

Commit

c08633e

•

1 Parent(s): df9d248

Update inference.py

Files changed (1) hide show

inference.py +9 -2

inference.py CHANGED Viewed

@@ -5,12 +5,19 @@ import sys
 import torch
 from typing import List, Dict
-# Ensure vllm is installed
 try:
     import vllm
 except ImportError:
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "vllm"])
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid

 import torch
 from typing import List, Dict
+# Ensure vllm is installed and specify version to match CUDA compatibility
 try:
     import vllm
 except ImportError:
+    # Check CUDA version and install the correct vllm version
+    cuda_version = torch.version.cuda
+    if cuda_version == "11.8":
+        vllm_version = "v0.6.1.post1"
+        pip_cmd = f"pip install https://github.com/vllm-project/vllm/releases/download/{vllm_version}/vllm-{vllm_version}+cu118-cp310-cp310-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118"
+    else:
+        raise RuntimeError(f"Unsupported CUDA version: {cuda_version}")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", pip_cmd])
 # Import the necessary modules after installation
 from vllm import LLM, SamplingParams
 from vllm.utils import random_uuid