jyoung105
/

sdxl_perturbed_attention_guidance_i2i

English

Diffusion Models

Stable Diffusion XL

Perturbed-Attention Guidance

PAG

Model card Files Files and versions Community

jyoung105 commited on May 10

Commit

10f9783

•

1 Parent(s): 999ec64

Finish setup for initial pipeline

Browse files

Files changed (1) hide show

pipeline.py +16 -62

pipeline.py CHANGED Viewed

@@ -71,7 +71,7 @@ if is_torch_xla_available():
     XLA_AVAILABLE = True
 else:
     XLA_AVAILABLE = False
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -79,13 +79,18 @@ EXAMPLE_DOC_STRING = """
     Examples:
         ```py
         >>> import torch
-        >>> from diffusers import StableDiffusionXLPipeline
-        >>> pipe = StableDiffusionXLPipeline.from_pretrained(
-        ...     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
         ... )
         >>> pipe = pipe.to("cuda")
         >>> prompt = "a photo of an astronaut riding a horse on mars"
-        >>> image = pipe(prompt).images[0]
         ```
 """
@@ -315,37 +320,6 @@ class PAGCFGIdentitySelfAttnProcessor:
         return hidden_states
-if is_invisible_watermark_available():
-    from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
-if is_torch_xla_available():
-    import torch_xla.core.xla_model as xm
-    XLA_AVAILABLE = True
-else:
-    XLA_AVAILABLE = False
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-EXAMPLE_DOC_STRING = """
-    Examples:
-        ```py
-        >>> import torch
-        >>> from diffusers import StableDiffusionXLImg2ImgPipeline
-        >>> from diffusers.utils import load_image
-        >>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
-        ...     "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", torch_dtype=torch.float16,
-        ... )
-        >>> pipe = pipe.to("cuda")
-        >>> url = "https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
-        >>> init_image = load_image(url).convert("RGB")
-        >>> prompt = "a photo of an astronaut riding a horse on mars"
-        >>> image = pipe(prompt, image=init_image, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
-        ```
-"""
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
 def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
@@ -767,7 +741,7 @@ class StableDiffusionXLImg2ImgPipeline(
                 unscale_lora_layers(self.text_encoder_2, lora_scale)
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
@@ -923,26 +897,6 @@ class StableDiffusionXLImg2ImgPipeline(
             return timesteps, num_inference_steps
         return timesteps, num_inference_steps - t_start
-    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
-    # def prepare_latents(
-    #     self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None
-    # ):
-    #     shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
-    #     if isinstance(generator, list) and len(generator) != batch_size:
-    #         raise ValueError(
-    #             f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
-    #             f" size of {batch_size}. Make sure the batch size matches the length of the generators."
-    #         )
-    #     if latents is None:
-    #         latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
-    #     else:
-    #         latents = latents.to(device)
-    #     # scale the initial noise by the standard deviation required by the scheduler
-    #     latents = latents * self.scheduler.init_noise_sigma
-    #     return latents
     def prepare_latents(
         self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
@@ -1259,14 +1213,14 @@ class StableDiffusionXLImg2ImgPipeline(
     def cross_attention_kwargs(self):
         return self._cross_attention_kwargs
-    @property
-    def denoising_end(self):
-        return self._denoising_end
     @property
     def denoising_start(self):
         return self._denoising_start
     @property
     def num_timesteps(self):
         return self._num_timesteps
@@ -1547,8 +1501,8 @@ class StableDiffusionXLImg2ImgPipeline(
         self._guidance_rescale = guidance_rescale
         self._clip_skip = clip_skip
         self._cross_attention_kwargs = cross_attention_kwargs
-        self._denoising_end = denoising_end
         self._denoising_start = denoising_start
         self._interrupt = False
         self._pag_scale = pag_scale

     XLA_AVAILABLE = True
 else:
     XLA_AVAILABLE = False
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
     Examples:
         ```py
         >>> import torch
+        >>> from diffusers import StableDiffusionXLImg2ImgPipeline, AutoencoderKL
+        >>> from diffusers.utils import load_image
+        >>> vae = AutoencoderKL.from_pretrained(
+        ...     "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
+        ... )
+        >>> pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+        ...     "stabilityai/stable-diffusion-xl-base-1.0", custom_pipeline="jyoung105/sdxl_perturbed_attention_guidance_i2i", vae=vae, torch_dtype=torch.float16
         ... )
         >>> pipe = pipe.to("cuda")
         >>> prompt = "a photo of an astronaut riding a horse on mars"
+        >>> init_image = load_image("")
+        >>> image = pipe(prompt, image=init_image, strength=0.9, pag_scale=3.0, pag_applied_layers=['mid']).images[0]
         ```
 """
         return hidden_states
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
 def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
                 unscale_lora_layers(self.text_encoder_2, lora_scale)
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
             return timesteps, num_inference_steps
         return timesteps, num_inference_steps - t_start
     def prepare_latents(
         self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
     def cross_attention_kwargs(self):
         return self._cross_attention_kwargs
     @property
     def denoising_start(self):
         return self._denoising_start
+    @property
+    def denoising_end(self):
+        return self._denoising_end
     @property
     def num_timesteps(self):
         return self._num_timesteps
         self._guidance_rescale = guidance_rescale
         self._clip_skip = clip_skip
         self._cross_attention_kwargs = cross_attention_kwargs
         self._denoising_start = denoising_start
+        self._denoising_end = denoising_end
         self._interrupt = False
         self._pag_scale = pag_scale