Update README.md
Browse files
README.md
CHANGED
@@ -233,7 +233,7 @@ except Exception as e:
|
|
233 |
```python
|
234 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
235 |
|
236 |
-
model_name = "openthaigpt/openthaigpt1.5-
|
237 |
|
238 |
model = AutoModelForCausalLM.from_pretrained(
|
239 |
model_name,
|
@@ -271,13 +271,13 @@ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
271 |
|
272 |
2. Run server
|
273 |
```bash
|
274 |
-
vllm serve openthaigpt/openthaigpt1.5-
|
275 |
```
|
276 |
* Note, change ``--tensor-parallel-size 4`` to the amount of available GPU cards.
|
277 |
|
278 |
If you wish to enable tool calling feature, add ``--enable-auto-tool-choice --tool-call-parser hermes`` into command. e.g.,
|
279 |
```bash
|
280 |
-
vllm serve openthaigpt/openthaigpt1.5-
|
281 |
```
|
282 |
|
283 |
3. Run inference (CURL example)
|
|
|
233 |
```python
|
234 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
235 |
|
236 |
+
model_name = "openthaigpt/openthaigpt1.5-7b-instruct"
|
237 |
|
238 |
model = AutoModelForCausalLM.from_pretrained(
|
239 |
model_name,
|
|
|
271 |
|
272 |
2. Run server
|
273 |
```bash
|
274 |
+
vllm serve openthaigpt/openthaigpt1.5-7b-instruct --tensor-parallel-size 4
|
275 |
```
|
276 |
* Note, change ``--tensor-parallel-size 4`` to the amount of available GPU cards.
|
277 |
|
278 |
If you wish to enable tool calling feature, add ``--enable-auto-tool-choice --tool-call-parser hermes`` into command. e.g.,
|
279 |
```bash
|
280 |
+
vllm serve openthaigpt/openthaigpt1.5-7b-instruct --tensor-parallel-size 4 --enable-auto-tool-choice --tool-call-parser hermes
|
281 |
```
|
282 |
|
283 |
3. Run inference (CURL example)
|