Update README.md
Browse files
README.md
CHANGED
@@ -56,17 +56,20 @@ docker run --gpus=1 --rm --net=host -v ${PWD}:/workdir yibolu96/lyra-chatglm-env
|
|
56 |
|
57 |
```python
|
58 |
from transformers import AutoTokenizer
|
59 |
-
from
|
|
|
60 |
|
|
|
61 |
|
62 |
MAX_OUT_LEN = 100
|
63 |
-
|
|
|
64 |
input_str = ["为什么我们需要对深度学习模型加速?", ]
|
65 |
inputs = tokenizer(input_str, return_tensors="pt", padding=True)
|
66 |
input_ids = inputs.input_ids.to('cuda:0')
|
67 |
|
|
|
68 |
|
69 |
-
plan_path = './models/glm6b-bs8.ftm'
|
70 |
# kernel for chat model.
|
71 |
kernel = GLM6B(plan_path=plan_path,
|
72 |
batch_size=1,
|
@@ -78,7 +81,7 @@ kernel = GLM6B(plan_path=plan_path,
|
|
78 |
vocab_size=150528,
|
79 |
max_seq_len=MAX_OUT_LEN)
|
80 |
|
81 |
-
chat = FasterChatGLM(model_dir=
|
82 |
|
83 |
# generate
|
84 |
sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
|
|
|
56 |
|
57 |
```python
|
58 |
from transformers import AutoTokenizer
|
59 |
+
from lyraChatGLM import GLM6B, FasterChatGLM
|
60 |
+
import os
|
61 |
|
62 |
+
current_workdir = os.path.dirname(__file__)
|
63 |
|
64 |
MAX_OUT_LEN = 100
|
65 |
+
chatglm6b_dir = os.path.join(current_workdir, "models")
|
66 |
+
tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
|
67 |
input_str = ["为什么我们需要对深度学习模型加速?", ]
|
68 |
inputs = tokenizer(input_str, return_tensors="pt", padding=True)
|
69 |
input_ids = inputs.input_ids.to('cuda:0')
|
70 |
|
71 |
+
plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")
|
72 |
|
|
|
73 |
# kernel for chat model.
|
74 |
kernel = GLM6B(plan_path=plan_path,
|
75 |
batch_size=1,
|
|
|
81 |
vocab_size=150528,
|
82 |
max_seq_len=MAX_OUT_LEN)
|
83 |
|
84 |
+
chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()
|
85 |
|
86 |
# generate
|
87 |
sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)
|