nbroad HF staff commited on
Commit
f1ad91f
1 Parent(s): bfdd558

flexibility for cpu or cuda ep

Browse files
Files changed (1) hide show
  1. handler.py +13 -2
handler.py CHANGED
@@ -11,15 +11,26 @@ if torch.backends.cudnn.is_available():
11
 
12
  class EndpointHandler():
13
  def __init__(self, path=""):
 
 
14
  # load the optimized model
 
 
 
 
 
15
  model = ORTModelForSequenceClassification.from_pretrained(
16
  path,
17
  export=False,
18
- provider="CUDAExecutionProvider",
19
  )
20
  tokenizer = AutoTokenizer.from_pretrained(path)
 
 
 
 
21
  # create inference pipeline
22
- self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0)
23
 
24
 
25
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
 
11
 
12
  class EndpointHandler():
13
  def __init__(self, path=""):
14
+
15
+ on_cuda = torch.cuda.is_available()
16
  # load the optimized model
17
+
18
+ provider = "CPUExecutionProvider"
19
+ if on_cuda:
20
+ provider = "CUDAExecutionProvider"
21
+
22
  model = ORTModelForSequenceClassification.from_pretrained(
23
  path,
24
  export=False,
25
+ provider=provider,
26
  )
27
  tokenizer = AutoTokenizer.from_pretrained(path)
28
+
29
+ device = -1
30
+ if on_cuda:
31
+ device = 0
32
  # create inference pipeline
33
+ self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
34
 
35
 
36
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]: