import torch from utils.manifolds import Sphere from tqdm.auto import tqdm def riemannian_flow_sampler( net, batch, manifold=Sphere(), conditioning_keys=None, scheduler=None, num_steps=250, cfg_rate=0, generator=None, return_trajectories=False, callback=None, ): if scheduler is None: raise ValueError("Scheduler must be provided") x_cur = batch["y"].to(torch.float32) if return_trajectories: traj = [x_cur.detach()] step_indices = torch.arange(num_steps + 1, dtype=torch.float32, device=x_cur.device) steps = 1 - step_indices / num_steps gammas = scheduler(steps) dtype = torch.float32 if cfg_rate > 0 and conditioning_keys is not None: stacked_batch = {} stacked_batch[conditioning_keys] = torch.cat( [batch[conditioning_keys], torch.zeros_like(batch[conditioning_keys])], dim=0, ) for step, (gamma_now, gamma_next) in enumerate(zip(gammas[:-1], gammas[1:])): with torch.cuda.amp.autocast(dtype=dtype): if cfg_rate > 0 and conditioning_keys is not None: stacked_batch["y"] = torch.cat([x_cur, x_cur], dim=0) stacked_batch["gamma"] = gamma_now.expand(x_cur.shape[0] * 2) denoised_all = net(stacked_batch, current_step=step) denoised_cond, denoised_uncond = denoised_all.chunk(2, dim=0) denoised = denoised_cond * (1 + cfg_rate) - denoised_uncond * cfg_rate else: batch["y"] = x_cur batch["gamma"] = gamma_now.expand(x_cur.shape[0]) denoised = net(batch, current_step=step) dt = gamma_next - gamma_now x_next = x_cur + dt * denoised # manifold.expmap(x_cur, dt * denoised) x_next = manifold.projx(x_next) x_cur = x_next if return_trajectories: traj.append(x_cur.detach().to(torch.float32)) if return_trajectories: return x_cur.to(torch.float32), traj else: return x_cur.to(torch.float32) def ode_riemannian_flow_sampler( odefunc, x_1, manifold=Sphere(), scheduler=None, num_steps=1000, ): if scheduler is None: raise ValueError("Scheduler must be provided") x_cur = x_1.to(torch.float32) steps = ( torch.arange(num_steps + 1, dtype=torch.float32, device=x_cur.device) / num_steps ) dtype = torch.float32 for step, (t_now, t_next) in enumerate(zip(steps[:-1], steps[1:]), total=num_steps): with torch.cuda.amp.autocast(dtype=dtype): denoised = odefunc(t_now, x_cur) gamma_now = scheduler(t_now) gamma_next = scheduler(t_next) dt = gamma_next - gamma_now x_next = x_cur + dt * denoised # manifold.expmap(x_cur, dt * denoised) x_next = manifold.projx(x_next) x_cur = x_next return x_cur.to(torch.float32)