nateraw's picture
init
ee74779
raw
history blame
997 Bytes
import os
from flash.text import TextClassifier
# ⚠️ You need this to access the state key
from flash.core.data.data_source import LabelsState
class PreTrainedPipeline():
def __init__(self, path=""):
self.device = 'cpu'
self.model = TextClassifier.load_from_checkpoint(os.path.join(path, "pytorch_model.bin"))
self.data_pipeline = self.model.build_data_pipeline()
self.labels = self.model._data_pipeline_state._state[LabelsState].labels
self.top_k = 5
def __call__(self, inputs):
x = self.data_pipeline._deserializer(inputs)
x = self.data_pipeline.worker_preprocessor('predict')(x)
x = self.model.transfer_batch_to_device(x, self.device, 0)
x = self.data_pipeline.device_preprocessor('predict')(x)
out = self.model.predict_step(x, 0)
proba = out['logits'].softmax(1)[0].tolist()
return [{'score': s, 'label': l} for s, l in sorted(zip(proba, self.labels), reverse=True)[:self.top_k]]