YingxuHe
/

chatglm3-6b-chitchat

Inference Endpoints

Model card Files Files and versions Community

Yingxu He commited on Oct 29, 2024

Commit

4f9c767

·

verified ·

1 Parent(s): b3fa950

Create handler.py

Files changed (1) hide show

handler.py +30 -0

handler.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+import chatglm_cpp
+from typing import Dict, List, Any
+# get dtype
+# dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
+class EndpointHandler:
+    def __init__(self, path=""):
+        # load the model
+        self.pipeline = chatglm_cpp.Pipeline(f"{path}/q5_1.bin")
+        self.system_message = chatglm_cpp.ChatMessage(role="system", content="请你现在扮演一个软件工程师，名字叫做贺英旭。你需要以这个身份和朋友们对话。")
+    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        messages = [
+            self.system_message,
+            chatglm_cpp.ChatMessage(role="user", content=inputs)
+        ]
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            prediction = self.pipeline.chat(messages, **parameters)
+        else:
+            prediction = self.pipeline.chat(messages)
+        # postprocess the prediction
+        return prediction