Update README.md
Browse files
README.md
CHANGED
@@ -13,4 +13,11 @@ conda create -n vllm8 python=3.10 -y && conda activate vllm8
|
|
13 |
pip install -U git+https://github.com/vllm-project/vllm.git@a134ef6
|
14 |
|
15 |
python -m vllm.entrypoints.openai.api_server --model cat-llama-3-8b-awq-q128-w4-gemm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
```
|
|
|
13 |
pip install -U git+https://github.com/vllm-project/vllm.git@a134ef6
|
14 |
|
15 |
python -m vllm.entrypoints.openai.api_server --model cat-llama-3-8b-awq-q128-w4-gemm
|
16 |
+
|
17 |
+
```
|
18 |
+
|
19 |
+
To use 2 GPUs add `--tensor-parallel-size 2 --gpu-memory-utilization 0.95`:
|
20 |
+
|
21 |
+
```
|
22 |
+
python -m vllm.entrypoints.openai.api_server --model cat-llama-3-8b-awq-q128-w4-gemm --tensor-parallel-size 2 --gpu-memory-utilization 0.95
|
23 |
```
|