class Pipeline(object): """Defines a pipeline for transforming sequence data. The input is assumed to be utf-8 encoded `str` (Python 3) or `unicode` (Python 2). Attributes: convert_token: The function to apply to input sequence data. pipes: The Pipelines that will be applied to input sequence data in order. """ def __init__(self, convert_token=None): """Create a pipeline. Arguments: convert_token: The function to apply to input sequence data. If None, the identity function is used. Default: None """ if convert_token is None: self.convert_token = Pipeline.identity elif callable(convert_token): self.convert_token = convert_token else: raise ValueError("Pipeline input convert_token {} is not None " "or callable".format(convert_token)) self.pipes = [self] def __call__(self, x, *args): """Apply the the current Pipeline(s) to an input. Arguments: x: The input to process with the Pipeline(s). Positional arguments: Forwarded to the `call` function of the Pipeline(s). """ for pipe in self.pipes: x = pipe.call(x, *args) return x def call(self, x, *args): """Apply _only_ the convert_token function of the current pipeline to the input. If the input is a list, a list with the results of applying the `convert_token` function to all input elements is returned. Arguments: x: The input to apply the convert_token function to. Positional arguments: Forwarded to the `convert_token` function of the current Pipeline. """ if isinstance(x, list): return [self.convert_token(tok, *args) for tok in x] return self.convert_token(x, *args) def add_before(self, pipeline): """Add a Pipeline to be applied before this processing pipeline. Arguments: pipeline: The Pipeline or callable to apply before this Pipeline. """ if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline) self.pipes = pipeline.pipes[:] + self.pipes[:] return self def add_after(self, pipeline): """Add a Pipeline to be applied after this processing pipeline. Arguments: pipeline: The Pipeline or callable to apply after this Pipeline. """ if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline) self.pipes = self.pipes[:] + pipeline.pipes[:] return self @staticmethod def identity(x): """Return a copy of the input. This is here for serialization compatibility with pickle. """ return x