add missing files, fix readme and add docker file

Files changed (11) hide show

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 models/glm6b-kv-cache-dy-bs8.ftm filter=lfs diff=lfs merge=lfs -text
 models/glm6b-bs8.ftm filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 models/glm6b-kv-cache-dy-bs8.ftm filter=lfs diff=lfs merge=lfs -text
 models/glm6b-bs8.ftm filter=lfs diff=lfs merge=lfs -text
+*.so filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

+FROM nvcr.io/nvidia/pytorch:23.02-py3
+WORKDIR /workdir
+COPY requirements.txt /workdir/
+# since installing icetk will install protobuf 3.18.3, and we need protobuf==3.20.3
+RUN pip install -r requirements.txt && \
+    pip install protobuf==3.20.3

README.md CHANGED Viewed

@@ -35,6 +35,17 @@ Among its main features are:
 - **Repository:** [https://huggingface.co/THUDM/chatglm-6b]
 ## Uses
 ```python
@@ -86,7 +97,7 @@ print(res)
 ## Citation
 ``` bibtex
 @Misc{lyraChatGLM2023,
-  author =       {Kangjian Wu, Zhengtao Wang, Bin Wu},
   title =        {lyraChatGLM: Accelerating ChatGLM by 10x+},
   howpublished = {\url{https://huggingface.co/TMElyralab/lyraChatGLM}},
   year =         {2023}

 - **Repository:** [https://huggingface.co/THUDM/chatglm-6b]
+## Try Demo in 2 fast steps
+``` bash
+#step 1
+git clone https://huggingface.co/TMElyralab/lyraChatGLM
+cd lyraChatGLM
+#step 2
+docker run --gpus=1 --rm --net=host -v ${PWD}:/workdir yibolu96/lyra-chatglm-env:0.0.1 python3 /workdir/demo.py
+```
 ## Uses
 ```python
 ## Citation
 ``` bibtex
 @Misc{lyraChatGLM2023,
+  author =       {Kangjian Wu, Zhengtao Wang, Yibo Lu, Bin Wu},
   title =        {lyraChatGLM: Accelerating ChatGLM by 10x+},
   howpublished = {\url{https://huggingface.co/TMElyralab/lyraChatGLM}},
   year =         {2023}

demo.py CHANGED Viewed

@@ -1,18 +1,20 @@
 # coding=utf-8
 from transformers import AutoTokenizer
-from faster_chat_glm import GLM6B, FasterChatGLM
 MAX_OUT_LEN = 100
-chatglm6b_dir = './models'
 tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
 input_str = ["为什么我们需要对深度学习模型加速？", ]
 inputs = tokenizer(input_str, return_tensors="pt", padding=True)
 input_ids = inputs.input_ids.to('cuda:0')
-plan_path = './models/glm6b-bs8.ftm'
 # kernel for chat model.
 kernel = GLM6B(plan_path=plan_path,
                batch_size=1,
@@ -24,7 +26,7 @@ kernel = GLM6B(plan_path=plan_path,
                vocab_size=150528,
                max_seq_len=MAX_OUT_LEN)
-chat = FasterChatGLM(model_dir="./models", kernel=kernel).half().cuda()
 # generate
 sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)

 # coding=utf-8
 from transformers import AutoTokenizer
+from lyraChatGLM import GLM6B, FasterChatGLM
+import os
+current_workdir = os.path.dirname(__file__)
 MAX_OUT_LEN = 100
+chatglm6b_dir = os.path.join(current_workdir, "models")
 tokenizer = AutoTokenizer.from_pretrained(chatglm6b_dir, trust_remote_code=True)
 input_str = ["为什么我们需要对深度学习模型加速？", ]
 inputs = tokenizer(input_str, return_tensors="pt", padding=True)
 input_ids = inputs.input_ids.to('cuda:0')
+plan_path = os.path.join(current_workdir, "models/glm6b-bs8.ftm")
 # kernel for chat model.
 kernel = GLM6B(plan_path=plan_path,
                batch_size=1,
                vocab_size=150528,
                max_seq_len=MAX_OUT_LEN)
+chat = FasterChatGLM(model_dir=chatglm6b_dir, kernel=kernel).half().cuda()
 # generate
 sample_output = chat.generate(inputs=input_ids, max_length=MAX_OUT_LEN)

faster_chat_glm/glm.cpython-38-x86_64-linux-gnu.so DELETED Viewed

Binary file (188 kB)

{faster_chat_glm → lyraChatGLM}/__init__.py RENAMED Viewed

@@ -1,5 +1,8 @@
 import os
 os.environ["TORCH_USE_RTLD_GLOBAL"]="YES"
 import torch

 import os
+import ctypes
+current_workdir = os.path.dirname(__file__)
+ctypes.cdll.LoadLibrary(os.path.join(current_workdir, "libnvinfer_plugin.so"))
 os.environ["TORCH_USE_RTLD_GLOBAL"]="YES"
 import torch

{faster_chat_glm → lyraChatGLM}/__init__.py~ RENAMED Viewed

File without changes

lyraChatGLM/glm.cpython-38-x86_64-linux-gnu.so ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:feaeb19a7b780cdb669066bb096726d23f0c3ed401fe2f71adf12c66960c0d07
+size 188432

lyraChatGLM/libnvinfer_plugin.so ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a87eb31795009c545422ef978f607d97be5454c68f09cb829352c0529d1ba8b
+size 235256088

{faster_chat_glm → lyraChatGLM}/model.py RENAMED Viewed

File without changes

requirements.txt ADDED Viewed

+icetk
+torch
+transformers