Yingxu He commited on
Commit
4f9c767
·
verified ·
1 Parent(s): b3fa950

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +30 -0
handler.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import chatglm_cpp
3
+ from typing import Dict, List, Any
4
+
5
+ # get dtype
6
+ # dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
7
+
8
+
9
+ class EndpointHandler:
10
+ def __init__(self, path=""):
11
+ # load the model
12
+ self.pipeline = chatglm_cpp.Pipeline(f"{path}/q5_1.bin")
13
+ self.system_message = chatglm_cpp.ChatMessage(role="system", content="请你现在扮演一个软件工程师,名字叫做贺英旭。你需要以这个身份和朋友们对话。")
14
+
15
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
16
+ inputs = data.pop("inputs", data)
17
+ parameters = data.pop("parameters", None)
18
+
19
+ messages = [
20
+ self.system_message,
21
+ chatglm_cpp.ChatMessage(role="user", content=inputs)
22
+ ]
23
+
24
+ # pass inputs with all kwargs in data
25
+ if parameters is not None:
26
+ prediction = self.pipeline.chat(messages, **parameters)
27
+ else:
28
+ prediction = self.pipeline.chat(messages)
29
+ # postprocess the prediction
30
+ return prediction