concrete-ml-encrypted-qnn / play_with_endpoint.py

chore: fixing an issue for circuits with keys.

26de996 5 months ago

No virus

4.42 kB

	import numpy as np
	import time
	import os, sys

	from pathlib import Path

	from concrete.ml.deployment import FHEModelClient

	import requests


	def to_json(python_object):
	if isinstance(python_object, bytes):
	return {"__class__": "bytes", "__value__": list(python_object)}
	raise TypeError(repr(python_object) + " is not JSON serializable")


	def from_json(python_object):
	if "__class__" in python_object:
	return bytes(python_object["__value__"])


	API_URL = "https://zkmlo9jbfzj9ep1j.eu-west-1.aws.endpoints.huggingface.cloud"
	headers = {
	"Authorization": "Bearer " + os.environ.get("HF_TOKEN"),
	"Content-Type": "application/json",
	}


	def query(payload, allowed_retries=2):
	response = requests.post(API_URL, headers=headers, json=payload)

	if response.json() is not None and "error" in response.json():
	if allowed_retries > 0:
	# Sometimes we have "Bad gateway" error
	print(f"Warning, error {response=} {response.json()=} in the query, relaunching")

	return query(payload, allowed_retries - 1)

	assert False, f"Got an error: {response=} {response.json()=}"

	return response.json()


	path_to_model = Path("compiled_model")

	# Iris and NeuralNetClassifier in FHE
	from sklearn.model_selection import train_test_split
	from sklearn.datasets import load_iris

	X, y = load_iris(return_X_y=True)
	_, X_test, _, Y_test = train_test_split(X, y, test_size=0.25, random_state=42)
	X_test = X_test.astype("float32")

	# Recover parameters for client side
	fhemodel_client = FHEModelClient(path_to_model)

	# Generate the keys
	fhemodel_client.generate_private_and_evaluation_keys()
	evaluation_keys = fhemodel_client.get_serialized_evaluation_keys()

	# Save the key in the database
	evaluation_keys_remaining = evaluation_keys[:]
	uid = None
	is_first = True
	is_finished = False
	i = 0
	packet_size = 1024 * 1024 * 100

	while not is_finished:

	# Send by packets of 100M
	if sys.getsizeof(evaluation_keys_remaining) > packet_size:
	evaluation_keys_piece = evaluation_keys_remaining[:packet_size]
	evaluation_keys_remaining = evaluation_keys_remaining[packet_size:]
	else:
	evaluation_keys_piece = evaluation_keys_remaining
	evaluation_keys_remaining = None
	is_finished = True

	print(
	f"Sending {i}-th piece of the key (remaining size is {sys.getsizeof(evaluation_keys_remaining) / 1024:.2f} kbytes)"
	)
	i += 1

	if is_first:
	is_first = False
	payload = {
	"inputs": "fake",
	"evaluation_keys": to_json(evaluation_keys_piece),
	"method": "save_key",
	}

	uid = query(payload)["uid"]
	print(f"Storing the key in the database under {uid=}")

	else:
	payload = {
	"inputs": "fake",
	"evaluation_keys": to_json(evaluation_keys_piece),
	"method": "append_key",
	"uid": uid,
	}

	query(payload)

	# Test the handler
	nb_good = 0
	nb_samples = len(X_test)
	verbose = True
	time_start = time.time()
	duration = 0
	is_first = True

	for i in range(nb_samples):

	# Quantize the input and encrypt it
	encrypted_inputs = fhemodel_client.quantize_encrypt_serialize([X_test[i]])

	# Prepare the payload
	payload = {
	"inputs": "fake",
	"encrypted_inputs": to_json(encrypted_inputs),
	"method": "inference",
	"uid": uid,
	}

	if is_first:
	print(f"Size of the payload: {sys.getsizeof(payload) / 1024:.2f} kilobytes")
	is_first = False

	# Run the inference on HF servers
	duration -= time.time()
	duration_inference = -time.time()
	encrypted_prediction = query(payload)
	duration += time.time()
	duration_inference += time.time()

	encrypted_prediction = from_json(encrypted_prediction)

	# Decrypt the result and dequantize
	prediction_proba = fhemodel_client.deserialize_decrypt_dequantize(encrypted_prediction)[0]
	prediction = np.argmax(prediction_proba)

	if verbose:
	print(
	f"for {i}-th input, {prediction=} with expected {Y_test[i]} in {duration_inference:.3f} seconds"
	)

	# Measure accuracy
	nb_good += Y_test[i] == prediction

	print(f"Accuracy on {nb_samples} samples is {nb_good * 1. / nb_samples}")
	print(f"Total time: {time.time() - time_start:.3f} seconds")
	print(f"Duration per inference: {duration / nb_samples:.3f} seconds")