Update README.md
Browse files
README.md
CHANGED
@@ -54,4 +54,73 @@ generated_ids = [
|
|
54 |
]
|
55 |
|
56 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
```
|
|
|
54 |
]
|
55 |
|
56 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
57 |
+
```
|
58 |
+
|
59 |
+
## VLLM
|
60 |
+
|
61 |
+
We recommend deploying our model using 4 A100 GPUs. You can run the vllm server-side with the following code in terminal:
|
62 |
+
|
63 |
+
```python
|
64 |
+
python -m vllm.entrypoints.openai.api_server --served-model-name chemgpt --model path/to/chemgpt --gpu-memory-utilization 0.98 --tensor-parallel-size 4 --port 6000
|
65 |
+
```
|
66 |
+
|
67 |
+
Then, you can use the following code to deploy client-side:
|
68 |
+
|
69 |
+
```python
|
70 |
+
import requests
|
71 |
+
import json
|
72 |
+
|
73 |
+
def general_chemgpt_stream(inputs,history):
|
74 |
+
url = 'http://loaclhost:6000/v1/chat/completions'
|
75 |
+
|
76 |
+
history+=[{"role": "user", "content": inputs},]
|
77 |
+
|
78 |
+
data = {
|
79 |
+
"model": "chemgpt",
|
80 |
+
"messages": history,
|
81 |
+
}
|
82 |
+
|
83 |
+
headers = {
|
84 |
+
'Content-Type': 'application/json'
|
85 |
+
}
|
86 |
+
|
87 |
+
response = requests.post(url, headers=headers, data=json.dumps(data))
|
88 |
+
|
89 |
+
headers = {"User-Agent": "vLLM Client"}
|
90 |
+
|
91 |
+
pload = {
|
92 |
+
"model": "chemgpt",
|
93 |
+
"stream": True,
|
94 |
+
"messages": history
|
95 |
+
}
|
96 |
+
response = requests.post(url,
|
97 |
+
headers=headers,
|
98 |
+
json=pload,
|
99 |
+
stream=True)
|
100 |
+
|
101 |
+
for chunk in response.iter_lines(chunk_size=1,
|
102 |
+
decode_unicode=False,
|
103 |
+
delimiter=b"\n"):
|
104 |
+
if chunk:
|
105 |
+
string_data = chunk.decode("utf-8")
|
106 |
+
try:
|
107 |
+
json_data = json.loads(string_data[6:])
|
108 |
+
delta_content = json_data["choices"][0]["delta"]["content"]
|
109 |
+
assistant_reply+=delta_content
|
110 |
+
yield delta_content
|
111 |
+
except KeyError as e:
|
112 |
+
delta_content = json_data["choices"][0]["delta"]["role"]
|
113 |
+
except json.JSONDecodeError as e:
|
114 |
+
history+=[{
|
115 |
+
"role": "assistant",
|
116 |
+
"content": assistant_reply,
|
117 |
+
"tool_calls": []
|
118 |
+
},]
|
119 |
+
delta_content='[DONE]'
|
120 |
+
assert '[DONE]'==chunk.decode("utf-8")[6:]
|
121 |
+
|
122 |
+
inputs='介绍一下NaoH'
|
123 |
+
history_chem=[]
|
124 |
+
for response_text in general_chemgpt_stream(inputs,history_chem):
|
125 |
+
print(response_text,end='')
|
126 |
```
|