htahir1's picture
Upload folder using huggingface_hub
c73381c
raw
history blame
1.85 kB
# {% include 'template/license_header' %}
from typing import Optional
from uuid import UUID
from steps import model_evaluator, model_trainer, model_promoter
from zenml import ExternalArtifact, pipeline
from zenml.logger import get_logger
from pipelines import (
feature_engineering,
)
logger = get_logger(__name__)
@pipeline(enable_cache=True)
def breast_cancer_training(
train_dataset_id: Optional[UUID] = None,
test_dataset_id: Optional[UUID] = None,
min_train_accuracy: float = 0.0,
min_test_accuracy: float = 0.0,
):
"""
Model training pipeline.
This is a pipeline that loads the data, processes it and splits
it into train and test sets, then search for best hyperparameters,
trains and evaluates a model.
Args:
test_size: Size of holdout set for training 0.0..1.0
drop_na: If `True` NA values will be removed from dataset
normalize: If `True` dataset will be normalized with MinMaxScaler
drop_columns: List of columns to drop from dataset
"""
### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ###
# Link all the steps together by calling them and passing the output
# of one step as the input of the next step.
# Execute Feature Engineering Pipeline
if train_dataset_id is None or test_dataset_id is None:
dataset_trn, dataset_tst = feature_engineering()
else:
dataset_trn = ExternalArtifact(id=train_dataset_id)
dataset_tst = ExternalArtifact(id=test_dataset_id)
model = model_trainer(
dataset_trn=dataset_trn,
)
acc = model_evaluator(
model=model,
dataset_trn=dataset_trn,
dataset_tst=dataset_tst,
min_train_accuracy=min_train_accuracy,
min_test_accuracy=min_test_accuracy,
)
model_promoter(accuracy=acc)
### END CODE HERE ###