zRzRzRzRzRzRzR commited on
Commit
79175b4
1 Parent(s): 715ddbe

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -9
README.md CHANGED
@@ -78,12 +78,12 @@ from transformers import AutoTokenizer
78
  from vllm import LLM, SamplingParams
79
 
80
  # GLM-4-9B-Chat-1M
81
- # max_model_len, tp_size = 1048576, 4
82
 
83
- # GLM-4-9B-Chat
84
- max_model_len, tp_size = 131072, 1
85
- model_name = "THUDM/glm-4-9b-chat"
86
- prompt = '你好'
 
87
 
88
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
89
  llm = LLM(
@@ -99,11 +99,10 @@ llm = LLM(
99
  stop_token_ids = [151329, 151336, 151338]
100
  sampling_params = SamplingParams(temperature=0.95, max_tokens=1024, stop_token_ids=stop_token_ids)
101
 
102
- inputs = tokenizer.build_chat_input(prompt, history=None, role='user')['input_ids'].tolist()
103
- outputs = llm.generate(prompt_token_ids=inputs, sampling_params=sampling_params)
104
 
105
- generated_text = [output.outputs[0].text for output in outputs]
106
- print(generated_text)
107
  ```
108
 
109
  ## 协议
 
78
  from vllm import LLM, SamplingParams
79
 
80
  # GLM-4-9B-Chat-1M
 
81
 
82
+ # 如果遇见 OOM 现象,建议减少max_model_len,或者增加tp_size
83
+ max_model_len, tp_size = 1048576, 4
84
+
85
+ model_name = "THUDM/glm-4-9b-chat-1m"
86
+ prompt = [{"role": "user", "content": "你好"}]
87
 
88
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
89
  llm = LLM(
 
99
  stop_token_ids = [151329, 151336, 151338]
100
  sampling_params = SamplingParams(temperature=0.95, max_tokens=1024, stop_token_ids=stop_token_ids)
101
 
102
+ inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
103
+ outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)
104
 
105
+ print(outputs[0].outputs[0].text)
 
106
  ```
107
 
108
  ## 协议