ALmonster commited on
Commit
2c257e4
·
verified ·
1 Parent(s): 032c691

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -0
README.md CHANGED
@@ -54,4 +54,73 @@ generated_ids = [
54
  ]
55
 
56
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ```
 
54
  ]
55
 
56
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
57
+ ```
58
+
59
+ ## VLLM
60
+
61
+ We recommend deploying our model using 4 A100 GPUs. You can run the vllm server-side with the following code in terminal:
62
+
63
+ ```python
64
+ python -m vllm.entrypoints.openai.api_server --served-model-name chemgpt --model path/to/chemgpt --gpu-memory-utilization 0.98 --tensor-parallel-size 4 --port 6000
65
+ ```
66
+
67
+ Then, you can use the following code to deploy client-side:
68
+
69
+ ```python
70
+ import requests
71
+ import json
72
+
73
+ def general_chemgpt_stream(inputs,history):
74
+ url = 'http://loaclhost:6000/v1/chat/completions'
75
+
76
+ history+=[{"role": "user", "content": inputs},]
77
+
78
+ data = {
79
+ "model": "chemgpt",
80
+ "messages": history,
81
+ }
82
+
83
+ headers = {
84
+ 'Content-Type': 'application/json'
85
+ }
86
+
87
+ response = requests.post(url, headers=headers, data=json.dumps(data))
88
+
89
+ headers = {"User-Agent": "vLLM Client"}
90
+
91
+ pload = {
92
+ "model": "chemgpt",
93
+ "stream": True,
94
+ "messages": history
95
+ }
96
+ response = requests.post(url,
97
+ headers=headers,
98
+ json=pload,
99
+ stream=True)
100
+
101
+ for chunk in response.iter_lines(chunk_size=1,
102
+ decode_unicode=False,
103
+ delimiter=b"\n"):
104
+ if chunk:
105
+ string_data = chunk.decode("utf-8")
106
+ try:
107
+ json_data = json.loads(string_data[6:])
108
+ delta_content = json_data["choices"][0]["delta"]["content"]
109
+ assistant_reply+=delta_content
110
+ yield delta_content
111
+ except KeyError as e:
112
+ delta_content = json_data["choices"][0]["delta"]["role"]
113
+ except json.JSONDecodeError as e:
114
+ history+=[{
115
+ "role": "assistant",
116
+ "content": assistant_reply,
117
+ "tool_calls": []
118
+ },]
119
+ delta_content='[DONE]'
120
+ assert '[DONE]'==chunk.decode("utf-8")[6:]
121
+
122
+ inputs='介绍一下NaoH'
123
+ history_chem=[]
124
+ for response_text in general_chemgpt_stream(inputs,history_chem):
125
+ print(response_text,end='')
126
  ```