shaheerzk commited on
Commit
2e53d8a
1 Parent(s): af69f7f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +19 -30
handler.py CHANGED
@@ -1,33 +1,22 @@
1
  import torch
2
-
3
- from typing import Any, Dict
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
-
6
-
7
- class EndpointHandler:
8
- def __init__(self, path=""):
9
- # load model and tokenizer from path
10
- self.tokenizer = AutoTokenizer.from_pretrained(path)
11
- self.model = AutoModelForCausalLM.from_pretrained(
12
- path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
13
- )
14
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
15
-
16
- def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
17
- # process input
18
- inputs = data.pop("inputs", data)
19
- parameters = data.pop("parameters", None)
20
-
21
- # preprocess
22
- inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
23
-
24
- # pass inputs with all kwargs in data
25
- if parameters is not None:
26
- outputs = self.model.generate(**inputs, **parameters)
27
- else:
28
  outputs = self.model.generate(**inputs)
29
 
30
- # postprocess the prediction
31
- prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
32
-
33
- return [{"generated_text": prediction}]
 
1
  import torch
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
+
4
+ class ModelHandler:
5
+ def __init__(self):
6
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+ self.model = AutoModelForSeq2SeqLM.from_pretrained("shaheerzk/text_to_sql")
8
+ self.tokenizer = AutoTokenizer.from_pretrained("shaheerzk/text_to_sql")
9
+ self.model.to(self.device)
10
+
11
+ def handle(self, inputs):
12
+ # Preprocess input
13
+ text = inputs.get("text", "")
14
+ inputs = self.tokenizer(text, return_tensors="pt").to(self.device)
15
+
16
+ # Inference
17
+ with torch.no_grad():
 
 
 
 
 
 
 
 
 
 
18
  outputs = self.model.generate(**inputs)
19
 
20
+ # Post-process output
21
+ generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+ return {"generated_text": generated_text}