TMElyralab
/

lyraChatGLM

Model card Files Files and versions Community

yibolu96 commited on May 17, 2023

Commit

9d60792

•

1 Parent(s): 9ba46d2

Update README.md

Files changed (1) hide show

README.md +7 -4

README.md CHANGED Viewed

@@ -56,17 +56,20 @@ docker run --gpus=1 --rm --net=host -v ${PWD}:/workdir yibolu96/lyra-chatglm-env
 ```python
 from transformers import AutoTokenizer
-from faster_chat_glm import GLM6B, FasterChatGLM
 MAX_OUT_LEN = 100
-tokenizer = AutoTokenizer.from_pretrained('./models', trust_remote_code=True)
 input_str = ["为什么我们需要对深度学习模型加速？", ]
 inputs = tokenizer(input_str, return_tensors="pt", padding=True)
 input_ids = inputs.input_ids.to('cuda:0')
-plan_path = './models/glm6b-bs8.ftm'
 # kernel for chat model.
 kernel = GLM6B(plan_path=plan_path,
                batch_size=1,
@@ -78,7 +81,7 @@ kernel = GLM6B(plan_path=plan_path,
                vocab_size=150528,
                max_seq_len=MAX_OUT_LEN)
-chat = FasterChatGLM(model_dir="./models", kernel=kernel).half().cuda()
 # generate
 sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)

 ```python
 from transformers import AutoTokenizer
+from lyraChatGLM import GLM6B, FasterChatGLM
+import os
+current_workdir = os.path.dirname(__file__)
 MAX_OUT_LEN = 100
+chatglm6b_dir = os.path.join(current_workdir, "models")
+tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
 input_str = ["为什么我们需要对深度学习模型加速？", ]
 inputs = tokenizer(input_str, return_tensors="pt", padding=True)
 input_ids = inputs.input_ids.to('cuda:0')
+plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")
 # kernel for chat model.
 kernel = GLM6B(plan_path=plan_path,
                batch_size=1,
                vocab_size=150528,
                max_seq_len=MAX_OUT_LEN)
+chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()
 # generate
 sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)