jordiclive commited on
Commit
2f6df11
1 Parent(s): da58751

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +13 -19
handler.py CHANGED
@@ -36,28 +36,22 @@ class EndpointHandler:
36
  def __call__(self, data: Any):
37
  inputs = data.pop("inputs", data)
38
  parameters = data.pop("parameters", None)
39
- if parameters.get("no_generation", False):
40
- input_tokens = self.tokenizer.batch_encode_plus(
41
  [inputs], return_tensors="pt", padding=False
42
  )
43
- for t in input_tokens:
44
- if torch.is_tensor(input_tokens[t]):
45
- input_tokens[t] = input_tokens[t].to(torch.cuda.current_device())
46
-
47
- logits = self.model(
48
- input_ids=input_tokens["input_ids"],
49
- attention_mask=input_tokens["attention_mask"],
50
- )[0]
51
- log_likelihood = self.compute_log_likelihood(
52
- logits, input_tokens["input_ids"]
53
- )
54
- return (logits, log_likelihood)
55
- if parameters is not None:
56
- prediction = self.pipeline(inputs, **parameters)
57
- else:
58
- prediction = self.pipeline(inputs)
59
- return prediction
60
 
 
 
 
 
 
 
 
 
 
61
 
62
  # if __name__ == "__main__":
63
  # model = EndpointHandler("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 
36
  def __call__(self, data: Any):
37
  inputs = data.pop("inputs", data)
38
  parameters = data.pop("parameters", None)
39
+ input_tokens = self.tokenizer.batch_encode_plus(
 
40
  [inputs], return_tensors="pt", padding=False
41
  )
42
+ for t in input_tokens:
43
+ if torch.is_tensor(input_tokens[t]):
44
+ input_tokens[t] = input_tokens[t].to(torch.cuda.current_device())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ logits = self.model(
47
+ input_ids=input_tokens["input_ids"],
48
+ attention_mask=input_tokens["attention_mask"],
49
+ )[0]
50
+ log_likelihood = self.compute_log_likelihood(
51
+ logits, input_tokens["input_ids"]
52
+ )
53
+ return (logits, log_likelihood)
54
+
55
 
56
  # if __name__ == "__main__":
57
  # model = EndpointHandler("TinyLlama/TinyLlama-1.1B-Chat-v1.0")