Spaces:

czl
/

generative-data-augmentation-demo

Running on Zero

App Files Files Community

generative-data-augmentation-demo / tools /synth.py

czl

update xformers param

3da643f verified 8 months ago

raw

history blame

35.6 kB

	"""
	Helper scripts for generating synthetic images using diffusion model.

	Functions:
	- get_top_misclassified
	- get_class_list
	- generateClassPairs
	- outputDirectory
	- pipe_img
	- createPrompts
	- interpolatePrompts
	- slerp
	- get_middle_elements
	- remove_middle
	- genClassImg
	- getMetadata
	- groupbyInterpolation
	- ungroupInterpolation
	- groupAllbyInterpolation
	- getPairIndices
	- generateImagesFromDataset
	- generateTrace
	"""

	import json
	import os

	import numpy as np
	import pandas as pd
	import torch
	from DeepCache import DeepCacheSDHelper
	from diffusers import (
	LMSDiscreteScheduler,
	StableDiffusionImg2ImgPipeline,
	)
	from torch import nn
	from torchmetrics.functional.image import structural_similarity_index_measure as ssim
	from torchvision import transforms


	def get_top_misclassified(val_classifier_json):
	"""
	Retrieves the top misclassified classes from a validation classifier JSON file.

	Args:
	val_classifier_json (str): The path to the validation classifier JSON file.

	Returns:
	dict: A dictionary containing the top misclassified classes, where the keys are the class names
	and the values are the number of misclassifications.
	"""
	with open(val_classifier_json) as f:
	val_output = json.load(f)
	val_metrics_df = pd.DataFrame.from_dict(
	val_output["val_metrics_details"], orient="index"
	)
	class_dict = dict()
	for k, v in val_metrics_df["top_n_classes"].items():
	class_dict[k] = v
	return class_dict


	def get_class_list(val_classifier_json):
	"""
	Retrieves the list of classes from the given validation classifier JSON file.

	Args:
	val_classifier_json (str): The path to the validation classifier JSON file.

	Returns:
	list: A sorted list of class names extracted from the JSON file.
	"""
	with open(val_classifier_json, "r") as f:
	data = json.load(f)
	return sorted(list(data["val_metrics_details"].keys()))


	def generateClassPairs(val_classifier_json):
	"""
	Generate pairs of misclassified classes from the given validation classifier JSON.

	Args:
	val_classifier_json (str): The path to the validation classifier JSON file.

	Returns:
	list: A sorted list of pairs of misclassified classes.
	"""
	pairs = set()
	misclassified_classes = get_top_misclassified(val_classifier_json)
	for key, value in misclassified_classes.items():
	for v in value:
	pairs.add(tuple(sorted([key, v])))
	return sorted(list(pairs))


	def outputDirectory(class_pairs, synth_path, metadata_path):
	"""
	Creates the output directory structure for the synthesized data.

	Args:
	class_pairs (list): A list of class pairs.
	synth_path (str): The path to the directory where the synthesized data will be stored.
	metadata_path (str): The path to the directory where the metadata will be stored.

	Returns:
	None
	"""
	for id in class_pairs:
	class_folder = f"{synth_path}/{id}"
	if not (os.path.exists(class_folder)):
	os.makedirs(class_folder)
	if not (os.path.exists(metadata_path)):
	os.makedirs(metadata_path)
	print("Info: Output directory ready.")


	def pipe_img(
	model_path,
	device="cuda",
	apply_optimization=True,
	use_torchcompile=False,
	ci_cb=(5, 1),
	use_safetensors=None,
	cpu_offload=False,
	scheduler=None,
	):
	"""
	Creates and returns an image-to-image pipeline for stable diffusion.

	Args:
	model_path (str): The path to the pretrained model.
	device (str, optional): The device to use for computation. Defaults to "cuda".
	apply_optimization (bool, optional): Whether to apply optimization techniques. Defaults to True.
	use_torchcompile (bool, optional): Whether to use torchcompile for model compilation. Defaults to False.
	ci_cb (tuple, optional): A tuple containing the cache interval and cache branch ID. Defaults to (5, 1).
	use_safetensors (bool, optional): Whether to use safetensors. Defaults to None.
	cpu_offload (bool, optional): Whether to enable CPU offloading. Defaults to False.
	scheduler (LMSDiscreteScheduler, optional): The scheduler for the pipeline. Defaults to None.

	Returns:
	StableDiffusionImg2ImgPipeline: The image-to-image pipeline for stable diffusion.
	"""
	###############################
	# Reference:
	# Akimov, R. (2024) Images Interpolation with Stable Diffusion - Hugging Face Open-Source AI Cookbook. Available at: https://huggingface.co/learn/cookbook/en/stable_diffusion_interpolation (Accessed: 4 June 2024).
	###############################
	if scheduler is None:
	scheduler = LMSDiscreteScheduler(
	beta_start=0.00085,
	beta_end=0.012,
	beta_schedule="scaled_linear",
	num_train_timesteps=1000,
	steps_offset=1,
	)
	pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
	model_path,
	scheduler=scheduler,
	torch_dtype=torch.float32,
	use_safetensors=use_safetensors,
	safety_checker=None,
	).to(device)
	if cpu_offload:
	pipe.enable_model_cpu_offload()
	if apply_optimization:
	# tomesd.apply_patch(pipe, ratio=0.5)
	helper = DeepCacheSDHelper(pipe=pipe)
	cache_interval, cache_branch_id = ci_cb
	helper.set_params(
	cache_interval=cache_interval, cache_branch_id=cache_branch_id
	) # lower is faster but lower quality
	helper.enable()
	if torch.cuda.is_available():
	pipe.enable_xformers_memory_efficient_attention()
	if use_torchcompile:
	pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	return pipe


	def createPrompts(
	class_name_pairs,
	prompt_structure=None,
	use_default_negative_prompt=False,
	negative_prompt=None,
	):
	"""
	Create prompts for image generation.

	Args:
	class_name_pairs (list): A list of two class names.
	prompt_structure (str, optional): The structure of the prompt. Defaults to "a photo of a <class_name>".
	use_default_negative_prompt (bool, optional): Whether to use the default negative prompt. Defaults to False.
	negative_prompt (str, optional): The negative prompt to steer the generation away from certain features.

	Returns:
	tuple: A tuple containing two lists - prompts and negative_prompts.
	prompts (list): Text prompts that describe the desired output image.
	negative_prompts (list): Negative prompts that can be used to steer the generation away from certain features.
	"""
	if prompt_structure is None:
	prompt_structure = "a photo of a <class_name>"
	elif "<class_name>" not in prompt_structure:
	raise ValueError(
	"The prompt structure must contain the <class_name> placeholder."
	)
	if use_default_negative_prompt:
	default_negative_prompt = (
	"blurry image, disfigured, deformed, distorted, cartoon, drawings"
	)
	negative_prompt = default_negative_prompt

	class1 = class_name_pairs[0]
	class2 = class_name_pairs[1]
	prompt1 = prompt_structure.replace("<class_name>", class1)
	prompt2 = prompt_structure.replace("<class_name>", class2)
	prompts = [prompt1, prompt2]
	if negative_prompt is None:
	print("Info: Negative prompt not provided, returning as None.")
	return prompts, None
	else:
	# Negative prompts that can be used to steer the generation away from certain features.
	negative_prompts = [negative_prompt] * len(prompts)
	return prompts, negative_prompts


	def interpolatePrompts(
	prompts,
	pipeline,
	num_interpolation_steps,
	sample_mid_interpolation,
	remove_n_middle=0,
	device="cuda",
	):
	"""
	Interpolates prompts by generating intermediate embeddings between pairs of prompts.

	Args:
	prompts (List[str]): A list of prompts to be interpolated.
	pipeline: The pipeline object containing the tokenizer and text encoder.
	num_interpolation_steps (int): The number of interpolation steps between each pair of prompts.
	sample_mid_interpolation (int): The number of intermediate embeddings to sample from the middle of the interpolated prompts.
	remove_n_middle (int, optional): The number of middle embeddings to remove from the interpolated prompts. Defaults to 0.
	device (str, optional): The device to run the interpolation on. Defaults to "cuda".

	Returns:
	interpolated_prompt_embeds (torch.Tensor): The interpolated prompt embeddings.
	prompt_metadata (dict): Metadata about the interpolation process, including similarity scores and nearest class information.

	e.g. if num_interpolation_steps = 10, sample_mid_interpolation = 6, remove_n_middle = 2
	Interpolated: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
	Sampled: [2, 3, 4, 5, 6, 7]
	Removed: x x
	Returns: [2, 3, 6, 7]
	"""

	###############################
	# Reference:
	# Akimov, R. (2024) Images Interpolation with Stable Diffusion - Hugging Face Open-Source AI Cookbook. Available at: https://huggingface.co/learn/cookbook/en/stable_diffusion_interpolation (Accessed: 4 June 2024).
	###############################

	def slerp(v0, v1, num, t0=0, t1=1):
	"""
	Performs spherical linear interpolation between two vectors.

	Args:
	v0 (torch.Tensor): The starting vector.
	v1 (torch.Tensor): The ending vector.
	num (int): The number of interpolation points.
	t0 (float, optional): The starting time. Defaults to 0.
	t1 (float, optional): The ending time. Defaults to 1.

	Returns:
	torch.Tensor: The interpolated vectors.

	"""
	###############################
	# Reference:
	# Karpathy, A. (2022) hacky stablediffusion code for generating videos, Gist. Available at: https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355 (Accessed: 4 June 2024).
	###############################
	v0 = v0.detach().cpu().numpy()
	v1 = v1.detach().cpu().numpy()

	def interpolation(t, v0, v1, DOT_THRESHOLD=0.9995):
	"""helper function to spherically interpolate two arrays v1 v2"""
	dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
	if np.abs(dot) > DOT_THRESHOLD:
	v2 = (1 - t) * v0 + t * v1
	else:
	theta_0 = np.arccos(dot)
	sin_theta_0 = np.sin(theta_0)
	theta_t = theta_0 * t
	sin_theta_t = np.sin(theta_t)
	s0 = np.sin(theta_0 - theta_t) / sin_theta_0
	s1 = sin_theta_t / sin_theta_0
	v2 = s0 * v0 + s1 * v1
	return v2

	t = np.linspace(t0, t1, num)

	v3 = torch.tensor(np.array([interpolation(t[i], v0, v1) for i in range(num)]))

	return v3

	def get_middle_elements(lst, n):
	"""
	Returns a tuple containing a sublist of the middle elements of the given list `lst` and a range of indices of those elements.

	Args:
	lst (list): The list from which to extract the middle elements.
	n (int): The number of middle elements to extract.

	Returns:
	tuple: A tuple containing the sublist of middle elements and a range of indices.

	Raises:
	None

	Examples:
	lst = [1, 2, 3, 4, 5]
	get_middle_elements(lst, 3)
	([2, 3, 4], range(2, 5))
	"""
	if n % 2 == 0: # Even number of elements
	middle_index = len(lst) // 2 - 1
	start = middle_index - n // 2 + 1
	end = middle_index + n // 2 + 1
	return lst[start:end], range(start, end)
	else: # Odd number of elements
	middle_index = len(lst) // 2
	start = middle_index - n // 2
	end = middle_index + n // 2 + 1
	return lst[start:end], range(start, end)

	def remove_middle(data, n):
	"""
	Remove the middle n elements from a list.

	Args:
	data (list): The input list.
	n (int): The number of elements to remove from the middle of the list.

	Returns:
	list: The modified list with the middle n elements removed.

	Raises:
	ValueError: If n is negative or greater than the length of the list.

	"""
	if n < 0 or n > len(data):
	raise ValueError(
	"Invalid value for n. It should be non-negative and less than half the list length"
	)

	# Find the middle index
	middle = len(data) // 2

	# Create slices to exclude the middle n elements
	if n == 1:
	return data[:middle] + data[middle + 1 :]
	elif n % 2 == 0:
	return data[: middle - n // 2] + data[middle + n // 2 :]
	else:
	return data[: middle - n // 2] + data[middle + n // 2 + 1 :]

	batch_size = len(prompts)

	# Tokenizing and encoding prompts into embeddings.
	prompts_tokens = pipeline.tokenizer(
	prompts,
	padding="max_length",
	max_length=pipeline.tokenizer.model_max_length,
	truncation=True,
	return_tensors="pt",
	)
	prompts_embeds = pipeline.text_encoder(prompts_tokens.input_ids.to(device))[0]

	# Interpolating between embeddings pairs for the given number of interpolation steps.
	interpolated_prompt_embeds = []

	for i in range(batch_size - 1):
	interpolated_prompt_embeds.append(
	slerp(prompts_embeds[i], prompts_embeds[i + 1], num_interpolation_steps)
	)

	full_interpolated_prompt_embeds = interpolated_prompt_embeds[:]
	interpolated_prompt_embeds[0], sample_range = get_middle_elements(
	interpolated_prompt_embeds[0], sample_mid_interpolation
	)

	if remove_n_middle > 0:
	interpolated_prompt_embeds[0] = remove_middle(
	interpolated_prompt_embeds[0], remove_n_middle
	)

	prompt_metadata = dict()
	similarity = nn.CosineSimilarity(dim=-1, eps=1e-6)
	for i in range(num_interpolation_steps):
	class1_sim = (
	similarity(
	full_interpolated_prompt_embeds[0][0],
	full_interpolated_prompt_embeds[0][i],
	)
	.mean()
	.item()
	)
	class2_sim = (
	similarity(
	full_interpolated_prompt_embeds[0][num_interpolation_steps - 1],
	full_interpolated_prompt_embeds[0][i],
	)
	.mean()
	.item()
	)
	relative_distance = class1_sim / (class1_sim + class2_sim)

	prompt_metadata[i] = {
	"selected": i in sample_range,
	"similarity": {
	"class1": class1_sim,
	"class2": class2_sim,
	"class1_relative_distance": relative_distance,
	"class2_relative_distance": 1 - relative_distance,
	},
	"nearest_class": int(relative_distance < 0.5),
	}

	interpolated_prompt_embeds = torch.cat(interpolated_prompt_embeds, dim=0).to(device)
	return interpolated_prompt_embeds, prompt_metadata


	def genClassImg(
	pipeline,
	pos_embed,
	neg_embed,
	input_image,
	generator,
	latents,
	num_imgs=1,
	height=512,
	width=512,
	num_inference_steps=25,
	guidance_scale=7.5,
	):
	"""
	Generate class image using the given inputs.

	Args:
	pipeline: The pipeline object used for image generation.
	pos_embed: The positive embedding for the class.
	neg_embed: The negative embedding for the class (optional).
	input_image: The input image for guidance (optional).
	generator: The generator model used for image generation.
	latents: The latent vectors used for image generation.
	num_imgs: The number of images to generate (default is 1).
	height: The height of the generated images (default is 512).
	width: The width of the generated images (default is 512).
	num_inference_steps: The number of inference steps for image generation (default is 25).
	guidance_scale: The scale factor for guidance (default is 7.5).

	Returns:
	The generated class image.
	"""

	if neg_embed is not None:
	npe = neg_embed[None, ...]
	else:
	npe = None

	return pipeline(
	height=height,
	width=width,
	num_images_per_prompt=num_imgs,
	prompt_embeds=pos_embed[None, ...],
	negative_prompt_embeds=npe,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	latents=latents,
	image=input_image,
	).images[0]


	def getMetadata(
	class_pairs,
	path,
	seed,
	guidance_scale,
	num_inference_steps,
	num_interpolation_steps,
	sample_mid_interpolation,
	height,
	width,
	prompts,
	negative_prompts,
	pipeline,
	prompt_metadata,
	negative_prompt_metadata,
	ssim_metadata=None,
	save_json=True,
	save_path=".",
	):
	"""
	Generate metadata for the given parameters.

	Args:
	class_pairs (list): List of class pairs.
	path (str): Path to the data.
	seed (int): Seed value for randomization.
	guidance_scale (float): Scale factor for guidance.
	num_inference_steps (int): Number of inference steps.
	num_interpolation_steps (int): Number of interpolation steps.
	sample_mid_interpolation (bool): Flag to sample mid-interpolation.
	height (int): Height of the image.
	width (int): Width of the image.
	prompts (list): List of prompts.
	negative_prompts (list): List of negative prompts.
	pipeline (object): Pipeline object.
	prompt_metadata (dict): Metadata for prompts.
	negative_prompt_metadata (dict): Metadata for negative prompts.
	ssim_metadata (dict, optional): SSIM scores metadata. Defaults to None.
	save_json (bool, optional): Flag to save metadata as JSON. Defaults to True.
	save_path (str, optional): Path to save the JSON file. Defaults to ".".

	Returns:
	dict: Generated metadata.
	"""

	metadata = dict()

	metadata["class_pairs"] = class_pairs
	metadata["path"] = path
	metadata["seed"] = seed
	metadata["params"] = {
	"CFG": guidance_scale,
	"inferenceSteps": num_inference_steps,
	"interpolationSteps": num_interpolation_steps,
	"sampleMidInterpolation": sample_mid_interpolation,
	"height": height,
	"width": width,
	}
	for i in range(len(prompts)):
	metadata[f"prompt_text_{i}"] = prompts[i]
	if negative_prompts is not None:
	metadata[f"negative_prompt_text_{i}"] = negative_prompts[i]
	metadata["pipe_config"] = dict(pipeline.config)
	metadata["prompt_embed_similarity"] = prompt_metadata
	metadata["negative_prompt_embed_similarity"] = negative_prompt_metadata
	if ssim_metadata is not None:
	print("Info: SSIM scores are available.")
	metadata["ssim_scores"] = ssim_metadata
	if save_json:
	with open(
	os.path.join(save_path, f"{'_'.join(i for i in class_pairs)}_{seed}.json"),
	"w",
	) as f:
	json.dump(metadata, f, indent=4)
	return metadata


	def groupbyInterpolation(dir_to_classfolder):
	"""
	Group files in a directory by interpolation step.

	Args:
	dir_to_classfolder (str): The path to the directory containing the files.

	Returns:
	None
	"""
	files = [
	(f.split(sep="_")[1].split(sep=".")[0], os.path.join(dir_to_classfolder, f))
	for f in os.listdir(dir_to_classfolder)
	]
	# create a subfolder for each step of the interpolation
	for interpolation_step, file_path in files:
	new_dir = os.path.join(dir_to_classfolder, interpolation_step)
	if not os.path.exists(new_dir):
	os.makedirs(new_dir)
	os.rename(file_path, os.path.join(new_dir, os.path.basename(file_path)))


	def ungroupInterpolation(dir_to_classfolder):
	"""
	Moves all files from subdirectories within `dir_to_classfolder` to `dir_to_classfolder` itself,
	and then removes the subdirectories.

	Args:
	dir_to_classfolder (str): The path to the directory containing the subdirectories.

	Returns:
	None
	"""
	for interpolation_step in os.listdir(dir_to_classfolder):
	if os.path.isdir(os.path.join(dir_to_classfolder, interpolation_step)):
	for f in os.listdir(os.path.join(dir_to_classfolder, interpolation_step)):
	os.rename(
	os.path.join(dir_to_classfolder, interpolation_step, f),
	os.path.join(dir_to_classfolder, f),
	)
	os.rmdir(os.path.join(dir_to_classfolder, interpolation_step))


	def groupAllbyInterpolation(
	data_path,
	group=True,
	fn_group=groupbyInterpolation,
	fn_ungroup=ungroupInterpolation,
	):
	"""
	Group or ungroup all data classes by interpolation.

	Args:
	data_path (str): The path to the data.
	group (bool, optional): Whether to group the data. Defaults to True.
	fn_group (function, optional): The function to use for grouping. Defaults to groupbyInterpolation.
	fn_ungroup (function, optional): The function to use for ungrouping. Defaults to ungroupInterpolation.
	"""
	data_classes = sorted(os.listdir(data_path))
	if group:
	fn = fn_group
	else:
	fn = fn_ungroup
	for c in data_classes:
	c_path = os.path.join(data_path, c)
	if os.path.isdir(c_path):
	fn(c_path)
	print(f"Processed {c}")


	def getPairIndices(subset_len, total_pair_count=1, seed=None):
	"""
	Generate pairs of indices for a given subset length.

	Args:
	subset_len (int): The length of the subset.
	total_pair_count (int, optional): The total number of pairs to generate. Defaults to 1.
	seed (int, optional): The seed value for the random number generator. Defaults to None.

	Returns:
	list: A list of pairs of indices.

	"""
	rng = np.random.default_rng(seed)
	group_size = (subset_len + total_pair_count - 1) // total_pair_count
	numbers = list(range(subset_len))
	numbers_selection = list(range(subset_len))
	rng.shuffle(numbers)
	for i in range(group_size - subset_len % group_size):
	numbers.append(numbers_selection[i])
	numbers = np.array(numbers)
	groups = numbers[: group_size * total_pair_count].reshape(-1, group_size)
	return groups.tolist()


	def generateImagesFromDataset(
	img_subsets,
	class_iterables,
	pipeline,
	interpolated_prompt_embeds,
	interpolated_negative_prompts_embeds,
	num_inference_steps,
	guidance_scale,
	height=512,
	width=512,
	seed=None,
	save_path=".",
	class_pairs=("0", "1"),
	save_image=True,
	image_type="jpg",
	interpolate_range="full",
	device="cuda",
	return_images=False,
	):
	"""
	Generates images from a dataset using the given parameters.

	Args:
	img_subsets (dict): A dictionary containing image subsets for each class.
	class_iterables (dict): A dictionary containing iterable objects for each class.
	pipeline (object): The pipeline object used for image generation.
	interpolated_prompt_embeds (list): A list of interpolated prompt embeddings.
	interpolated_negative_prompts_embeds (list): A list of interpolated negative prompt embeddings.
	num_inference_steps (int): The number of inference steps for image generation.
	guidance_scale (float): The scale factor for guidance loss during image generation.
	height (int, optional): The height of the generated images. Defaults to 512.
	width (int, optional): The width of the generated images. Defaults to 512.
	seed (int, optional): The seed value for random number generation. Defaults to None.
	save_path (str, optional): The path to save the generated images. Defaults to ".".
	class_pairs (tuple, optional): A tuple containing pairs of class identifiers. Defaults to ("0", "1").
	save_image (bool, optional): Whether to save the generated images. Defaults to True.
	image_type (str, optional): The file format of the saved images. Defaults to "jpg".
	interpolate_range (str, optional): The range of interpolation for prompt embeddings.
	Possible values are "full", "nearest", or "furthest". Defaults to "full".
	device (str, optional): The device to use for image generation. Defaults to "cuda".
	return_images (bool, optional): Whether to return the generated images. Defaults to False.

	Returns:
	dict or tuple: If return_images is True, returns a dictionary containing the generated images for each class and a dictionary containing the SSIM scores for each class and interpolation step.
	If return_images is False, returns a dictionary containing the SSIM scores for each class and interpolation step.
	"""
	if interpolate_range == "nearest":
	nearest_half = True
	furthest_half = False
	elif interpolate_range == "furthest":
	nearest_half = False
	furthest_half = True
	else:
	nearest_half = False
	furthest_half = False

	if seed is None:
	seed = torch.Generator().seed()
	generator = torch.manual_seed(seed)
	rng = np.random.default_rng(seed)
	# Generating initial U-Net latent vectors from a random normal distribution.
	latents = torch.randn(
	(1, pipeline.unet.config.in_channels, height // 8, width // 8),
	generator=generator,
	).to(device)

	embed_len = len(interpolated_prompt_embeds)
	embed_pairs = zip(interpolated_prompt_embeds, interpolated_negative_prompts_embeds)
	embed_pairs_list = list(embed_pairs)
	if return_images:
	class_images = dict()
	class_ssim = dict()

	if nearest_half or furthest_half:
	if nearest_half:
	steps_range = (range(0, embed_len // 2), range(embed_len // 2, embed_len))
	mutiplier = 2
	elif furthest_half:
	# uses opposite class of images of the text interpolation
	steps_range = (range(embed_len // 2, embed_len), range(0, embed_len // 2))
	mutiplier = 2
	else:
	steps_range = (range(embed_len), range(embed_len))
	mutiplier = 1

	for class_iter, class_id in enumerate(class_pairs):
	if return_images:
	class_images[class_id] = list()
	class_ssim[class_id] = {
	i: {"ssim_sum": 0, "ssim_count": 0, "ssim_avg": 0} for i in range(embed_len)
	}
	subset_len = len(img_subsets[class_id])
	# to efficiently randomize the steps to interpolate for each image in the class, group_map is used
	# group_map: index is the image id, element is the group id
	# steps_range[class_iter] determines the range of steps to interpolate for the class,
	# so the first half of the steps are for the first class and so on. range(0,7) and range(8,15) for 16 steps
	# then the rest is to multiply the steps to cover the whole subset + remainder
	group_map = (
	list(steps_range[class_iter]) * mutiplier * (subset_len // embed_len + 1)
	)
	rng.shuffle(
	group_map
	) # shuffle the steps to interpolate for each image, position in the group_map is mapped to the image id

	iter_indices = class_iterables[class_id].pop()
	# generate images for each image in the class, randomly selecting an interpolated step
	for image_id in iter_indices:
	img, trg = img_subsets[class_id][image_id]
	input_image = img.unsqueeze(0)
	interpolate_step = group_map[image_id]
	prompt_embeds, negative_prompt_embeds = embed_pairs_list[interpolate_step]
	generated_image = genClassImg(
	pipeline,
	prompt_embeds,
	negative_prompt_embeds,
	input_image,
	generator,
	latents,
	num_imgs=1,
	height=height,
	width=width,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	)
	pred_image = transforms.ToTensor()(generated_image).unsqueeze(0)
	ssim_score = ssim(pred_image, input_image).item()
	class_ssim[class_id][interpolate_step]["ssim_sum"] += ssim_score
	class_ssim[class_id][interpolate_step]["ssim_count"] += 1
	if return_images:
	class_images[class_id].append(generated_image)
	if save_image:
	if image_type == "jpg":
	generated_image.save(
	f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}",
	format="JPEG",
	quality=95,
	)
	elif image_type == "png":
	generated_image.save(
	f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}",
	format="PNG",
	)
	else:
	generated_image.save(
	f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}"
	)

	# calculate ssim avg for the class
	for i_step in range(embed_len):
	if class_ssim[class_id][i_step]["ssim_count"] > 0:
	class_ssim[class_id][i_step]["ssim_avg"] = (
	class_ssim[class_id][i_step]["ssim_sum"]
	/ class_ssim[class_id][i_step]["ssim_count"]
	)

	if return_images:
	return class_images, class_ssim
	else:
	return class_ssim


	def generateTrace(
	prompts,
	img_subsets,
	class_iterables,
	interpolated_prompt_embeds,
	interpolated_negative_prompts_embeds,
	subset_indices,
	seed=None,
	save_path=".",
	class_pairs=("0", "1"),
	image_type="jpg",
	interpolate_range="full",
	save_prompt_embeds=False,
	):
	"""
	Generate a trace dictionary containing information about the generated images.

	Args:
	prompts (list): List of prompt texts.
	img_subsets (dict): Dictionary containing image subsets for each class.
	class_iterables (dict): Dictionary containing iterable objects for each class.
	interpolated_prompt_embeds (torch.Tensor): Tensor containing interpolated prompt embeddings.
	interpolated_negative_prompts_embeds (torch.Tensor): Tensor containing interpolated negative prompt embeddings.
	subset_indices (dict): Dictionary containing indices of subsets for each class.
	seed (int, optional): Seed value for random number generation. Defaults to None.
	save_path (str, optional): Path to save the generated images. Defaults to ".".
	class_pairs (tuple, optional): Tuple containing class pairs. Defaults to ("0", "1").
	image_type (str, optional): Type of the generated images. Defaults to "jpg".
	interpolate_range (str, optional): Range of interpolation. Defaults to "full".
	save_prompt_embeds (bool, optional): Flag to save prompt embeddings. Defaults to False.

	Returns:
	dict: Trace dictionary containing information about the generated images.
	"""
	trace_dict = {
	"class_pairs": list(),
	"class_id": list(),
	"image_id": list(),
	"interpolation_step": list(),
	"embed_len": list(),
	"pos_prompt_text": list(),
	"neg_prompt_text": list(),
	"input_file_path": list(),
	"output_file_path": list(),
	"input_prompts_embed": list(),
	}

	if interpolate_range == "nearest":
	nearest_half = True
	furthest_half = False
	elif interpolate_range == "furthest":
	nearest_half = False
	furthest_half = True
	else:
	nearest_half = False
	furthest_half = False

	if seed is None:
	seed = torch.Generator().seed()
	rng = np.random.default_rng(seed)

	embed_len = len(interpolated_prompt_embeds)
	embed_pairs = zip(
	interpolated_prompt_embeds.cpu().numpy(),
	interpolated_negative_prompts_embeds.cpu().numpy(),
	)
	embed_pairs_list = list(embed_pairs)

	if nearest_half or furthest_half:
	if nearest_half:
	steps_range = (range(0, embed_len // 2), range(embed_len // 2, embed_len))
	mutiplier = 2
	elif furthest_half:
	# uses opposite class of images of the text interpolation
	steps_range = (range(embed_len // 2, embed_len), range(0, embed_len // 2))
	mutiplier = 2
	else:
	steps_range = (range(embed_len), range(embed_len))
	mutiplier = 1

	for class_iter, class_id in enumerate(class_pairs):

	subset_len = len(img_subsets[class_id])
	# to efficiently randomize the steps to interpolate for each image in the class, group_map is used
	# group_map: index is the image id, element is the group id
	# steps_range[class_iter] determines the range of steps to interpolate for the class,
	# so the first half of the steps are for the first class and so on. range(0,7) and range(8,15) for 16 steps
	# then the rest is to multiply the steps to cover the whole subset + remainder
	group_map = (
	list(steps_range[class_iter]) * mutiplier * (subset_len // embed_len + 1)
	)
	rng.shuffle(
	group_map
	) # shuffle the steps to interpolate for each image, position in the group_map is mapped to the image id

	iter_indices = class_iterables[class_id].pop()
	# generate images for each image in the class, randomly selecting an interpolated step
	for image_id in iter_indices:
	class_ds = img_subsets[class_id]
	interpolate_step = group_map[image_id]
	sample_count = subset_indices[class_id][0] + image_id
	input_file = os.path.normpath(class_ds.dataset.samples[sample_count][0])
	pos_prompt = prompts[0]
	neg_prompt = prompts[1]
	output_file = f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}"
	if save_prompt_embeds:
	input_prompts_embed = embed_pairs_list[interpolate_step]
	else:
	input_prompts_embed = None

	trace_dict["class_pairs"].append(class_pairs)
	trace_dict["class_id"].append(class_id)
	trace_dict["image_id"].append(image_id)
	trace_dict["interpolation_step"].append(interpolate_step)
	trace_dict["embed_len"].append(embed_len)
	trace_dict["pos_prompt_text"].append(pos_prompt)
	trace_dict["neg_prompt_text"].append(neg_prompt)
	trace_dict["input_file_path"].append(input_file)
	trace_dict["output_file_path"].append(output_file)
	trace_dict["input_prompts_embed"].append(input_prompts_embed)

	return trace_dict