import numpy as np import time import os, sys from pathlib import Path from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from concrete.ml.deployment import FHEModelClient import requests API_URL = "https://puqif7goarh132kl.us-east-1.aws.endpoints.huggingface.cloud" headers = { "Authorization": "Bearer " + os.environ.get("HF_TOKEN"), "Content-Type": "application/octet-stream", } def query(payload): response = requests.post(API_URL, headers=headers, data=payload) return response.json() path_to_model = Path("compiled_model") x, y = make_classification(n_samples=1000, class_sep=2, n_features=30, random_state=42) _, X_test, _, y_test = train_test_split(x, y, test_size=0.2, random_state=42) # Recover parameters for client side fhemodel_client = FHEModelClient(path_to_model) # Generate the keys fhemodel_client.generate_private_and_evaluation_keys() evaluation_keys = fhemodel_client.get_serialized_evaluation_keys() # Test the handler nb_good = 0 nb_samples = len(X_test) verbose = False time_start = time.time() duration = 0 for i in range(nb_samples): # Quantize the input and encrypt it encrypted_inputs = fhemodel_client.quantize_encrypt_serialize([X_test[i]]) # Prepare the payload, including the evaluation keys which are needed server side payload = { "inputs": "fake", "encrypted_inputs": encrypted_inputs, "evaluation_keys": evaluation_keys, } # Run the inference on HF servers duration -= time.time() encrypted_prediction = query(payload) duration += time.time() encrypted_prediction = encrypted_prediction # Decrypt the result and dequantize prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0] prediction = np.argmax(prediction_proba) if verbose or True: print(f"for {i}-th input, {prediction=} with expected {y_test[i]}") # Measure accuracy nb_good += y_test[i] == prediction print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}") print(f"Total time: {time.time() - time_start} seconds") print(f"Duration in inferences: {duration} seconds") print(f"Duration per inference: {duration / nb_samples} seconds")