import numpy as np import time import os, sys from pathlib import Path from concrete.ml.deployment import FHEModelClient import requests def to_json(python_object): if isinstance(python_object, bytes): return {"__class__": "bytes", "__value__": list(python_object)} raise TypeError(repr(python_object) + " is not JSON serializable") def from_json(python_object): if "__class__" in python_object: return bytes(python_object["__value__"]) API_URL = "https://zkmlo9jbfzj9ep1j.eu-west-1.aws.endpoints.huggingface.cloud" headers = { "Authorization": "Bearer " + os.environ.get("HF_TOKEN"), "Content-Type": "application/json", } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) if "error" in response: assert False, f"Got an error: {response=}" return response.json() path_to_model = Path("compiled_model") # Iris and NeuralNetClassifier in FHE from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) _, X_test, _, Y_test = train_test_split(X, y, test_size=0.25, random_state=42) X_test = X_test.astype("float32") # Recover parameters for client side fhemodel_client = FHEModelClient(path_to_model) # Generate the keys fhemodel_client.generate_private_and_evaluation_keys() evaluation_keys = fhemodel_client.get_serialized_evaluation_keys() # Save the key in the database evaluation_keys_remaining = evaluation_keys[:] uid = None is_first = True is_finished = False i = 0 packet_size = 1024 * 1024 * 100 while not is_finished: # Send by packets of 100M if sys.getsizeof(evaluation_keys_remaining) > packet_size: evaluation_keys_piece = evaluation_keys_remaining[:packet_size] evaluation_keys_remaining = evaluation_keys_remaining[packet_size:] else: evaluation_keys_piece = evaluation_keys_remaining is_finished = True print( f"Sending {i}-th piece of the key (remaining size is {sys.getsizeof(evaluation_keys_remaining) / 1024:.2f} kbytes)" ) i += 1 if is_first: is_first = False payload = { "inputs": "fake", "evaluation_keys": to_json(evaluation_keys_piece), "method": "save_key", } uid = query(payload)["uid"] print(f"Storing the key in the database under {uid=}") else: payload = { "inputs": "fake", "evaluation_keys": to_json(evaluation_keys_piece), "method": "append_key", "uid": uid, } query(payload) # Test the handler nb_good = 0 nb_samples = len(X_test) verbose = True time_start = time.time() duration = 0 is_first = True for i in range(nb_samples): # Quantize the input and encrypt it encrypted_inputs = fhemodel_client.quantize_encrypt_serialize([X_test[i]]) # Prepare the payload payload = { "inputs": "fake", "encrypted_inputs": to_json(encrypted_inputs), "method": "inference", "uid": uid, } if is_first: print(f"Size of the payload: {sys.getsizeof(payload) / 1024:.2f} kilobytes") is_first = False # Run the inference on HF servers duration -= time.time() duration_inference = -time.time() encrypted_prediction = query(payload) duration += time.time() duration_inference += time.time() encrypted_prediction = from_json(encrypted_prediction) # Decrypt the result and dequantize prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0] prediction = np.argmax(prediction_proba) if verbose: print( f"for {i}-th input, {prediction=} with expected {Y_test[i]} in {duration_inference:.3f} seconds" ) # Measure accuracy nb_good += Y_test[i] == prediction print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}") print(f"Total time: {time.time() - time_start:.3f} seconds") print(f"Duration per inference: {duration / nb_samples:.3f} seconds")