# {% include 'template/license_header' %} import pandas as pd import mlflow from sklearn.base import ClassifierMixin from zenml import step, log_artifact_metadata from zenml.client import Client from zenml.logger import get_logger from zenml import get_step_context logger = get_logger(__name__) experiment_tracker = Client().active_stack.experiment_tracker @step(enable_cache=False, experiment_tracker="mlflow") def model_evaluator( model: ClassifierMixin, dataset_trn: pd.DataFrame, dataset_tst: pd.DataFrame, min_train_accuracy: float = 0.0, min_test_accuracy: float = 0.0, ) -> float: """Evaluate a trained model. This is an example of a model evaluation step that takes in a model artifact previously trained by another step in your pipeline, and a training and validation data set pair which it uses to evaluate the model's performance. The model metrics are then returned as step output artifacts (in this case, the model accuracy on the train and test set). The suggested step implementation also outputs some warnings if the model performance does not meet some minimum criteria. This is just an example of how you can use steps to monitor your model performance and alert you if something goes wrong. As an alternative, you can raise an exception in the step to force the pipeline run to fail early and all subsequent steps to be skipped. This step is parameterized to configure the step independently of the step code, before running it in a pipeline. In this example, the step can be configured to use different values for the acceptable model performance thresholds and to control whether the pipeline run should fail if the model performance does not meet the minimum criteria. See the documentation for more information: https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines Args: model: The pre-trained model artifact. dataset_trn: The train dataset. dataset_tst: The test dataset. min_train_accuracy: Minimal acceptable training accuracy value. min_test_accuracy: Minimal acceptable testing accuracy value. fail_on_accuracy_quality_gates: If `True` a `RuntimeException` is raised upon not meeting one of the minimal accuracy thresholds. Returns: The model accuracy on the test set. Raises: RuntimeError: if any of accuracies is lower than respective threshold """ # context = get_step_context() # target = context.inputs["dataset_trn"].run_metadata['target'].value target = "target" # Calculate the model accuracy on the train and test set trn_acc = model.score( dataset_trn.drop(columns=[target]), dataset_trn[target], ) logger.info(f"Train accuracy={trn_acc*100:.2f}%") tst_acc = model.score( dataset_tst.drop(columns=[target]), dataset_tst[target], ) logger.info(f"Test accuracy={tst_acc*100:.2f}%") messages = [] if trn_acc < min_train_accuracy: messages.append( f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !" ) if tst_acc < min_test_accuracy: messages.append( f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !" ) else: for message in messages: logger.warning(message) artifact = get_step_context().model_version.get_artifact("model") log_artifact_metadata( metadata={"train_accuracy": float(trn_acc), "test_accuracy": float(tst_acc)}, artifact_name=artifact.name, artifact_version=artifact.version, ) mlflow.log_metric("train_accuracy", float(trn_acc)) mlflow.log_metric("test_accuracy", float(tst_acc)) return float(trn_acc)