import shutil import sys from pathlib import Path from concrete.ml.deployment import FHEModelDev from concrete.ml.deployment import FHEModelClient def compile_and_make_it_deployable(model_dev, X_train): path_to_model = Path("compiled_model") # Compile into FHE model_dev.compile(X_train, p_error=0.01) # Saving the model shutil.rmtree(path_to_model, ignore_errors=True) fhemodel_dev = FHEModelDev(path_to_model, model_dev) fhemodel_dev.save(via_mlir=True) # To see the size of the key fhemodel_client = FHEModelClient(path_to_model) # Generate the keys fhemodel_client.generate_private_and_evaluation_keys() evaluation_keys = fhemodel_client.get_serialized_evaluation_keys() print(f"Your keys will be {sys.getsizeof(evaluation_keys) / 1024 / 1024}-megabytes long") # Check accuracy with p_error y_pred_concrete = model_dev.predict_proba(x_test, fhe="simulate")[:, 1] concrete_average_precision = average_precision_score(y_test, y_pred_concrete) print(f"Concrete average precision score (simulate): {concrete_average_precision:0.2f}") # This is the spam classifier. Taken from https://github.com/zama-ai/concrete-ml/blob/main/docs/advanced_examples/DecisionTreeClassifier.ipynb import numpy from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split features, classes = fetch_openml(data_id=44, as_frame=False, cache=True, return_X_y=True) classes = classes.astype(numpy.int64) x_train, x_test, y_train, y_test = train_test_split( features, classes, test_size=0.15, random_state=42, ) # Find best hyper parameters with cross validation from sklearn.model_selection import GridSearchCV from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier # List of hyper parameters to tune param_grid = { "max_features": [None], "min_samples_leaf": [10], "min_samples_split": [100], "max_depth": [None], } grid_search = GridSearchCV( ConcreteDecisionTreeClassifier(), param_grid, cv=10, scoring="average_precision", error_score="raise", n_jobs=1, ) gs_results = grid_search.fit(x_train, y_train) print("Best hyper parameters:", gs_results.best_params_) print("Best score:", gs_results.best_score_) # Build the model with best hyper parameters model_dev = ConcreteDecisionTreeClassifier( max_features=gs_results.best_params_["max_features"], min_samples_leaf=gs_results.best_params_["min_samples_leaf"], min_samples_split=gs_results.best_params_["min_samples_split"], max_depth=gs_results.best_params_["max_depth"], n_bits=6, ) model_dev = model_dev.fit(x_train, y_train) # Compute average precision on test from sklearn.metrics import average_precision_score # pylint: disable=no-member y_pred_concrete = model_dev.predict_proba(x_test)[:, 1] concrete_average_precision = average_precision_score(y_test, y_pred_concrete) print(f"Concrete average precision score: {concrete_average_precision:0.2f}") compile_and_make_it_deployable(model_dev, x_train) print("Your model is ready to be deployable.")