usamakenway commited on
Commit
06e5302
1 Parent(s): 406b0c3

Adding custom Inference Handler for HF endpoint

Browse files
Files changed (2) hide show
  1. handler.py +26 -0
  2. requirements.txt +1 -0
handler.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer, TextGenerationPipeline, pipeline
3
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
+ from typing import Dict, List, Any
5
+
6
+
7
+ class EndpointHandler:
8
+ def __init__(self, path=""):
9
+ # load the model
10
+ tokenizer = AutoTokenizer.from_pretrained(path)
11
+ model = AutoGPTQForCausalLM.from_quantized(path, device="cuda:0", use_safetensors=True)
12
+
13
+ # create inference pipeline
14
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
15
+
16
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
17
+ inputs = data.pop("inputs", data)
18
+ parameters = data.pop("parameters", None)
19
+
20
+ # pass inputs with all kwargs in data
21
+ if parameters is not None:
22
+ prediction = self.pipeline(inputs, **parameters)
23
+ else:
24
+ prediction = self.pipeline(inputs)
25
+ # postprocess the prediction
26
+ return prediction
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ auto-gptq