smangrul commited on
Commit
a56b530
·
1 Parent(s): f1a416c

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +46 -0
handler.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ from peft import PeftConfig, PeftModel
7
+ from transformers import pipeline
8
+
9
+
10
+ class EndpointHandler:
11
+ def __init__(self, path=""):
12
+ # load model and processor from path
13
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ config = PeftConfig.from_pretrained(path)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ config.base_model_name_or_path,
17
+ return_dict=True,
18
+ torch_dtype=torch.float16,
19
+ trust_remote_code=True,
20
+ )
21
+ self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
22
+ model = PeftModel.from_pretrained(model, path)
23
+ self.model = model
24
+ self.model.to(torch.float16)
25
+ self.model.to(self.device)
26
+ self.model = self.model.merge_and_unload()
27
+ self.model.eval()
28
+ self.pipeline = pipeline('text-generation',
29
+ model = self.model,
30
+ tokenizer=self.tokenizer,
31
+ device=self.device,
32
+ torch_dtype=torch.float16)
33
+
34
+
35
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
36
+ # process input
37
+ inputs = data.pop("inputs", data)
38
+ parameters = data.pop("parameters", None)
39
+
40
+ # pass inputs with all kwargs in data
41
+ if parameters is not None:
42
+ outputs = self.pipeline(**inputs, **parameters)
43
+ else:
44
+ outputs = self.pipeline(**inputs)
45
+
46
+ return outputs