|
import torch |
|
from bayes_opt import BayesianOptimization, SequentialDomainReductionTransformer |
|
from lpips import LPIPS |
|
from scipy.stats import beta as beta_distribution |
|
|
|
from utils import compute_lpips, compute_smoothness_and_consistency |
|
|
|
|
|
def bayesian_prior_selection( |
|
interpolation_pipe, |
|
latent1: torch.FloatTensor, |
|
latent2: torch.FloatTensor, |
|
prompt1: str, |
|
prompt2: str, |
|
lpips_model: LPIPS, |
|
guide_prompt: str | None = None, |
|
negative_prompt: str = "", |
|
size: int = 3, |
|
num_inference_steps: int = 25, |
|
warmup_ratio: float = 1, |
|
early: str = "vfused", |
|
late: str = "self", |
|
target_score: float = 0.9, |
|
n_iter: int = 15, |
|
p_min: float | None = None, |
|
p_max: float | None = None, |
|
) -> tuple: |
|
""" |
|
Select the alpha and beta parameters for the interpolation using Bayesian optimization. |
|
|
|
Args: |
|
interpolation_pipe (any): The interpolation pipeline. |
|
latent1 (torch.FloatTensor): The first source latent vector. |
|
latent2 (torch.FloatTensor): The second source latent vector. |
|
prompt1 (str): The first source prompt. |
|
prompt2 (str): The second source prompt. |
|
lpips_model (any): The LPIPS model used to compute perceptual distances. |
|
guide_prompt (str | None, optional): The guide prompt for the interpolation, if any. Defaults to None. |
|
negative_prompt (str, optional): The negative prompt for the interpolation, default to empty string. Defaults to "". |
|
size (int, optional): The size of the interpolation sequence. Defaults to 3. |
|
num_inference_steps (int, optional): The number of inference steps. Defaults to 25. |
|
warmup_ratio (float, optional): The warmup ratio. Defaults to 1. |
|
early (str, optional): The early fusion method. Defaults to "vfused". |
|
late (str, optional): The late fusion method. Defaults to "self". |
|
target_score (float, optional): The target score. Defaults to 0.9. |
|
n_iter (int, optional): The maximum number of iterations. Defaults to 15. |
|
p_min (float, optional): The minimum value of alpha and beta. Defaults to None. |
|
p_max (float, optional): The maximum value of alpha and beta. Defaults to None. |
|
Returns: |
|
tuple: A tuple containing the selected alpha and beta parameters. |
|
""" |
|
|
|
def get_smoothness(alpha, beta): |
|
""" |
|
Black-box objective function of Bayesian Optimization. |
|
Get the smoothness of the interpolated sequence with the given alpha and beta. |
|
""" |
|
if alpha < beta and large_alpha_prior: |
|
return 0 |
|
if alpha > beta and not large_alpha_prior: |
|
return 0 |
|
if alpha == beta: |
|
return init_smoothness |
|
interpolation_sequence = interpolation_pipe.interpolate_save_gpu( |
|
latent1, |
|
latent2, |
|
prompt1, |
|
prompt2, |
|
guide_prompt=guide_prompt, |
|
negative_prompt=negative_prompt, |
|
size=size, |
|
num_inference_steps=num_inference_steps, |
|
warmup_ratio=warmup_ratio, |
|
early=early, |
|
late=late, |
|
alpha=alpha, |
|
beta=beta, |
|
) |
|
smoothness, _, _ = compute_smoothness_and_consistency( |
|
interpolation_sequence, lpips_model |
|
) |
|
return smoothness |
|
|
|
|
|
|
|
images = interpolation_pipe.interpolate_single( |
|
0.5, |
|
latent1, |
|
latent2, |
|
prompt1, |
|
prompt2, |
|
guide_prompt=guide_prompt, |
|
negative_prompt=negative_prompt, |
|
num_inference_steps=num_inference_steps, |
|
warmup_ratio=warmup_ratio, |
|
early=early, |
|
late=late, |
|
) |
|
|
|
distances = compute_lpips(images, lpips_model) |
|
|
|
init_smoothness, _, _ = compute_smoothness_and_consistency(images, lpips_model) |
|
|
|
large_alpha_prior = distances[0] < distances[1] |
|
|
|
|
|
num_warmup_steps = warmup_ratio * num_inference_steps |
|
if p_min is None: |
|
p_min = 1 |
|
if p_max is None: |
|
p_max = num_warmup_steps |
|
pbounds = {"alpha": (p_min, p_max), "beta": (p_min, p_max)} |
|
bounds_transformer = SequentialDomainReductionTransformer(minimum_window=0.1) |
|
optimizer = BayesianOptimization( |
|
f=get_smoothness, |
|
pbounds=pbounds, |
|
random_state=1, |
|
bounds_transformer=bounds_transformer, |
|
allow_duplicate_points=True, |
|
) |
|
alpha_init = [p_min, (p_min + p_max) / 2, p_max] |
|
beta_init = [p_min, (p_min + p_max) / 2, p_max] |
|
|
|
|
|
for alpha in alpha_init: |
|
for beta in beta_init: |
|
optimizer.probe(params={"alpha": alpha, "beta": beta}, lazy=False) |
|
latest_result = optimizer.res[-1] |
|
latest_score = latest_result["target"] |
|
if latest_score >= target_score: |
|
return alpha, beta |
|
|
|
|
|
for _ in range(n_iter): |
|
optimizer.maximize(init_points=0, n_iter=1) |
|
max_score = optimizer.max["target"] |
|
if max_score >= target_score: |
|
print(f"Stopping early, target of {target_score} reached.") |
|
break |
|
|
|
results = optimizer.max |
|
alpha = results["params"]["alpha"] |
|
beta = results["params"]["beta"] |
|
return alpha, beta |
|
|
|
|
|
def generate_beta_tensor( |
|
size: int, alpha: float = 3, beta: float = 3 |
|
) -> torch.FloatTensor: |
|
""" |
|
Assume size as n |
|
Generates a PyTorch tensor of values [x0, x1, ..., xn-1] for the Beta distribution |
|
where each xi satisfies F(xi) = i/(n-1) for the CDF F of the Beta distribution. |
|
|
|
Args: |
|
size (int): The number of values to generate. |
|
alpha (float): The alpha parameter of the Beta distribution. |
|
beta (float): The beta parameter of the Beta distribution. |
|
|
|
Returns: |
|
torch.Tensor: A tensor of the inverse CDF values of the Beta distribution. |
|
""" |
|
|
|
prob_values = [i / (size - 1) for i in range(size)] |
|
inverse_cdf_values = beta_distribution.ppf(prob_values, alpha, beta) |
|
|
|
|
|
return torch.tensor(inverse_cdf_values, dtype=torch.float32) |
|
|