File size: 1,454 Bytes
4892a13 8c2a0e5 4892a13 8c2a0e5 277b9d1 a76b97c 277b9d1 8c2a0e5 4892a13 8c2a0e5 c60ad0b b5472ea c60ad0b 026219e c60ad0b b5472ea c60ad0b b5472ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from typing import Dict, List, Any
from transformers import AutoModel, AutoTokenizer
class EndpointHandler:
def __init__(self, path="."):
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.model = AutoModel.from_pretrained(
path,
trust_remote_code=True,
# do_syntax=True, do_prefix=False, do_morph=False, do_ner=True, do_lex=True
)
self.model.eval()
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
inputs (:obj: `str` | `PIL.Image` | `np.array`)
kwargs
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
# return self.pipeline(data['inputs'])
outputs = self.model.predict(data['inputs'], self.tokenizer, output_style='json')
for i, output in enumerate(outputs):
lem = ' '.join([x['lex'] for x in output['tokens']])
ner = [
{
'word': ' '.join([x['lex'] for x in output['tokens'][x['token_start']:x['token_end'] + 1]]),
'entity_group': x['label'],
'token_start': x['token_start'],
'token_end': x['token_end']
}
for x in output['ner_entities']
]
outputs[i] = {
'lex': lem,
'ner': ner
}
return outputs
|