diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9c562dd96e445205bb2bf6b43c5c1b95a8713576 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_211621/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_211904/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212001/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212022/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212052/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212110/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212155/input.png filter=lfs diff=lfs merge=lfs -text +saved_results/20241129_212220/input.png filter=lfs diff=lfs merge=lfs -text diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..0efcdede32830bf45cc66d7a2b27e1f8ae03590a --- /dev/null +++ b/app.py @@ -0,0 +1,317 @@ +import spaces +import gradio as gr +import torch +from PIL import Image +import random +import numpy as np +import torch +import os +import json +from datetime import datetime + +from pipeline_rf import RectifiedFlowPipeline + +# Load the Stable Diffusion Inpainting model +pipe = RectifiedFlowPipeline.from_pretrained("XCLIU/2_rectified_flow_from_sd_1_5", torch_dtype=torch.float32) +pipe.to("cuda") # Comment this line if GPU is not available + +# Function to process the image +@spaces.GPU(duration=20) +def process_image( + image_layers, prompt, seed, randomize_seed, num_inference_steps, + max_steps, learning_rate, optimization_steps, inverseproblem, mask_input +): + image_with_mask = { + "image": image_layers["background"], + "mask": image_layers["layers"][0] if mask_input is None else mask_input + } + + # Set seed + if randomize_seed or seed is None: + seed = random.randint(0, 2**32 - 1) + generator = torch.Generator("cuda").manual_seed(int(seed)) + + # Unpack image and mask + if image_with_mask is None: + return None, f"❌ Please upload an image and create a mask." + image = image_with_mask["image"] + mask = image_with_mask["mask"] + + if image is None or mask is None: + return None, f"❌ Please ensure both image and mask are provided." + + # Convert images to RGB + image = image.convert("RGB") + mask = mask.split()[-1] # Convert mask to grayscale + + if not prompt: + return None, f"❌ Please provide a prompt for inpainting." + with torch.autocast("cuda"): + # Placeholder for using advanced parameters in the future + # Adjust parameters according to advanced settings if applicable + result = pipe( + prompt=prompt, + negative_prompt="", + input_image=image.resize((512, 512)), + mask_image=mask.resize((512, 512)), + num_inference_steps=num_inference_steps, + guidance_scale=0.0, + generator=generator, + save_masked_image=True, + output_path="test.png", + learning_rate=learning_rate, + max_steps=max_steps, + optimization_steps=optimization_steps, + inverseproblem=inverseproblem + ).images[0] + return result, f"✅ Inpainting completed with seed {seed}." + +# Design the Gradio interface +with gr.Blocks() as demo: + gr.Markdown( + """ + + """ + ) + gr.Markdown("

🍲 FlowChef 🍲

") + gr.Markdown("

Inversion/Gradient/Training-free Steering of InstaFlow (SDv1.5) for Inpainting (Inverse Problem)

") + gr.Markdown("

Project Page | Paper

(Steering Rectified Flow Models in the Vector Field for Controlled Image Generation)

") + gr.Markdown("

💡 We recommend going through our tutorial introduction before getting started!

") + gr.Markdown("

⚡ For better performance, check out our demo on Flux!

") + + # Store current state + current_input_image = None + current_mask = None + current_output_image = None + current_params = {} + + # Images at the top + with gr.Row(): + with gr.Column(): + image_input = gr.ImageMask( + # source="upload", + # tool="sketch", + type="pil", + label="Input Image and Mask", + image_mode="RGBA", + height=512, + width=512, + ) + with gr.Column(): + output_image = gr.Image(label="Output Image") + + # All options below + with gr.Column(): + prompt = gr.Textbox( + label="Prompt", + placeholder="Describe what should appear in the masked area..." + ) + with gr.Row(): + seed = gr.Number(label="Seed (Optional)", value=None) + randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) + num_inference_steps = gr.Slider( + label="Inference Steps", minimum=50, maximum=200, value=100 + ) + # Advanced settings in an accordion + with gr.Accordion("Advanced Settings", open=False): + max_steps = gr.Slider(label="Max Steps", minimum=50, maximum=200, value=200) + learning_rate = gr.Slider(label="Learning Rate", minimum=0.01, maximum=0.5, value=0.02) + optimization_steps = gr.Slider(label="Optimization Steps", minimum=1, maximum=10, value=1) + inverseproblem = gr.Checkbox(label="Apply mask on pixel space", value=False, info="Enables inverse problem formulation for inpainting by masking the RGB image itself. Hence, to avoid artifacts we increase the mask size manually during inference.") + mask_input = gr.Image( + type="pil", + label="Optional Mask", + image_mode="RGBA", + ) + with gr.Row(): + run_button = gr.Button("Run", variant="primary") + # save_button = gr.Button("Save Data", variant="secondary") + + # def update_visibility(selected_mode): + # if selected_mode == "Inpainting": + # return gr.update(visible=True), gr.update(visible=False) + # else: + # return gr.update(visible=True), gr.update(visible=True) + + # mode.change( + # update_visibility, + # inputs=mode, + # outputs=[prompt, edit_prompt], + # ) + + def run_and_update_status( + image_with_mask, prompt, seed, randomize_seed, num_inference_steps, + max_steps, learning_rate, optimization_steps, inverseproblem, mask_input + ): + result_image, result_status = process_image( + image_with_mask, prompt, seed, randomize_seed, num_inference_steps, + max_steps, learning_rate, optimization_steps, inverseproblem, mask_input + ) + + # Store current state + global current_input_image, current_mask, current_output_image, current_params + + current_input_image = image_with_mask["background"] if image_with_mask else None + current_mask = mask_input if mask_input is not None else (image_with_mask["layers"][0] if image_with_mask else None) + current_output_image = result_image + current_params = { + "prompt": prompt, + "seed": seed, + "randomize_seed": randomize_seed, + "num_inference_steps": num_inference_steps, + "max_steps": max_steps, + "learning_rate": learning_rate, + "optimization_steps": optimization_steps, + "inverseproblem": inverseproblem, + } + + return result_image + + def save_data(): + if not os.path.exists("saved_results"): + os.makedirs("saved_results") + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + save_dir = os.path.join("saved_results", timestamp) + os.makedirs(save_dir) + + # Save images + if current_input_image: + current_input_image.save(os.path.join(save_dir, "input.png")) + if current_mask: + current_mask.save(os.path.join(save_dir, "mask.png")) + if current_output_image: + current_output_image.save(os.path.join(save_dir, "output.png")) + + # Save parameters + with open(os.path.join(save_dir, "parameters.json"), "w") as f: + json.dump(current_params, f, indent=4) + + return f"✅ Data saved in {save_dir}" + + run_button.click( + fn=run_and_update_status, + inputs=[ + image_input, + prompt, + seed, + randomize_seed, + num_inference_steps, + max_steps, + learning_rate, + optimization_steps, + inverseproblem, + mask_input + ], + outputs=output_image, + ) + + # save_button.click(fn=save_data) + + gr.Markdown( + "" + ) + + def load_example_image_with_mask(image_path): + # Load the image + image = Image.open(image_path) + # Create an empty mask of the same size + mask = Image.new('L', image.size, 0) + return {"background": image, "layers": [mask], "composite": image} + + examples_dir = "assets" + volcano_dict = load_example_image_with_mask(os.path.join(examples_dir, "vulcano.jpg")) + dog_dict = load_example_image_with_mask(os.path.join(examples_dir, "dog.webp")) + + gr.Examples( + examples=[ + [ + "./saved_results/20241129_210517/input.png", # image with mask + "./saved_results/20241129_210517/mask.png", + "./saved_results/20241129_210517/output.png", + "a cat", # prompt + 0, # seed + True, # randomize_seed + 200, # num_inference_steps + 200, # max_steps + 0.1, # learning_rate + 1, # optimization_steps + False, + ], + [ + "./saved_results/20241129_211124/input.png", # image with mask + "./saved_results/20241129_211124/mask.png", + "./saved_results/20241129_211124/output.png", + " ", # prompt + 0, # seed + True, # randomize_seed + 200, # num_inference_steps + 200, # max_steps + 0.1, # learning_rate + 5, # optimization_steps + False, + ], + [ + "./saved_results/20241129_212001/input.png", # image with mask + "./saved_results/20241129_212001/mask.png", + "./saved_results/20241129_212001/output.png", + " ", # prompt + 52, # seed + False, # randomize_seed + 200, # num_inference_steps + 200, # max_steps + 0.02, # learning_rate + 10, # optimization_steps + True, + ], + [ + "./saved_results/20241129_212052/input.png", # image with mask + "./saved_results/20241129_212052/mask.png", + "./saved_results/20241129_212052/output.png", + " ", # prompt + 52, # seed + False, # randomize_seed + 200, # num_inference_steps + 200, # max_steps + 0.02, # learning_rate + 10, # optimization_steps + True, + ], + [ + "./saved_results/20241129_212155/input.png", # image with mask + "./saved_results/20241129_212155/mask.png", + "./saved_results/20241129_212155/output.png", + " ", # prompt + 52, # seed + False, # randomize_seed + 200, # num_inference_steps + 200, # max_steps + 0.02, # learning_rate + 10, # optimization_steps + True, + ], + ], + inputs=[ + image_input, + mask_input, + output_image, + prompt, + seed, + randomize_seed, + num_inference_steps, + max_steps, + learning_rate, + optimization_steps, + inverseproblem + ], + # outputs=[output_image], + # fn=run_and_update_status, + # cache_examples=True, + ) +demo.launch() diff --git a/assets/dog.webp b/assets/dog.webp new file mode 100644 index 0000000000000000000000000000000000000000..a6a06d82b3005294c420d83f59f3a8c1bf9ec13a Binary files /dev/null and b/assets/dog.webp differ diff --git a/assets/vulcano.jpg b/assets/vulcano.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2433ee61e0a507a77cf61595f6c4d15bffc9f543 Binary files /dev/null and b/assets/vulcano.jpg differ diff --git a/assets/vulcano_mask.webp b/assets/vulcano_mask.webp new file mode 100644 index 0000000000000000000000000000000000000000..3293b6c76c6a3fbe05fd0b70031d19a2059f9079 Binary files /dev/null and b/assets/vulcano_mask.webp differ diff --git a/pipeline_rf.py b/pipeline_rf.py new file mode 100644 index 0000000000000000000000000000000000000000..ec7bc422b2e446069956b92fa8e4ea187b3000df --- /dev/null +++ b/pipeline_rf.py @@ -0,0 +1,732 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from typing import Any, Callable, Dict, List, Optional, Union + +import torch +from packaging import version +from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer + +from diffusers.configuration_utils import FrozenDict +from diffusers.image_processor import VaeImageProcessor +from diffusers.loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin +from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.models.lora import adjust_lora_scale_text_encoder +from diffusers.schedulers import KarrasDiffusionSchedulers +from diffusers.utils import ( + deprecate, + logging, + replace_example_docstring, +) +from diffusers.utils.torch_utils import randn_tensor +from diffusers.pipelines.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker + +import os +import torch + +from torchvision import transforms as TF + +import sys +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) + +logger = logging.get_logger(__name__) # pylint: disable=invalid-name + +def retrieve_latents( + encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample" +): + if hasattr(encoder_output, "latent_dist") and sample_mode == "sample": + return encoder_output.latent_dist.sample(generator) + elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax": + return encoder_output.latent_dist.mode() + elif hasattr(encoder_output, "latents"): + return encoder_output.latents + else: + raise AttributeError("Could not access latents of provided encoder_output") + + +def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): + """ + Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and + Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4 + """ + std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) + std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) + # rescale the results from guidance (fixes overexposure) + noise_pred_rescaled = noise_cfg * (std_text / std_cfg) + # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images + noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg + return noise_cfg + + +class RectifiedFlowPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin): + r""" + Pipeline for text-to-image generation using Rectified Flow and Euler discretization. + This customized pipeline is based on StableDiffusionPipeline from the official Diffusers library (0.21.4) + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods + implemented for all pipelines (downloading, saving, running on a particular device, etc.). + + The pipeline also inherits the following loading methods: + - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings + - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights + - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights + - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files + + Args: + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations. + text_encoder ([`~transformers.CLIPTextModel`]): + Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)). + tokenizer ([`~transformers.CLIPTokenizer`]): + A `CLIPTokenizer` to tokenize text. + unet ([`UNet2DConditionModel`]): + A `UNet2DConditionModel` to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offensive or harmful. + Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details + about a model's potential harms. + feature_extractor ([`~transformers.CLIPImageProcessor`]): + A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. + """ + model_cpu_offload_seq = "text_encoder->unet->vae" + _optional_components = ["safety_checker", "feature_extractor"] + _exclude_from_cpu_offload = ["safety_checker"] + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: KarrasDiffusionSchedulers, + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPImageProcessor, + requires_safety_checker: bool = True, + ): + super().__init__() + + if hasattr(scheduler.config, "steps_offset") and scheduler.config.steps_offset != 1: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`" + f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure " + "to update the config accordingly as leaving `steps_offset` might led to incorrect results" + " in future versions. If you have downloaded this checkpoint from the Hugging Face Hub," + " it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`" + " file" + ) + deprecate("steps_offset!=1", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["steps_offset"] = 1 + scheduler._internal_dict = FrozenDict(new_config) + + if hasattr(scheduler.config, "clip_sample") and scheduler.config.clip_sample is True: + deprecation_message = ( + f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`." + " `clip_sample` should be set to False in the configuration file. Please make sure to update the" + " config accordingly as not setting `clip_sample` in the config might lead to incorrect results in" + " future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be very" + " nice if you could open a Pull request for the `scheduler/scheduler_config.json` file" + ) + deprecate("clip_sample not set", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(scheduler.config) + new_config["clip_sample"] = False + scheduler._internal_dict = FrozenDict(new_config) + + if safety_checker is None and requires_safety_checker: + logger.warning( + f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure" + " that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered" + " results in services or applications open to the public. Both the diffusers team and Hugging Face" + " strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling" + " it only for use-cases that involve analyzing network behavior or auditing its results. For more" + " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ." + ) + + if safety_checker is not None and feature_extractor is None: + raise ValueError( + "Make sure to define a feature extractor when loading {self.__class__} if you want to use the safety" + " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead." + ) + + is_unet_version_less_0_9_0 = hasattr(unet.config, "_diffusers_version") and version.parse( + version.parse(unet.config._diffusers_version).base_version + ) < version.parse("0.9.0.dev0") + is_unet_sample_size_less_64 = hasattr(unet.config, "sample_size") and unet.config.sample_size < 64 + if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64: + deprecation_message = ( + "The configuration file of the unet has set the default `sample_size` to smaller than" + " 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the" + " following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-" + " CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- runwayml/stable-diffusion-v1-5" + " \n- runwayml/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the" + " configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`" + " in the config might lead to incorrect results in future versions. If you have downloaded this" + " checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for" + " the `unet/config.json` file" + ) + deprecate("sample_size<64", "1.0.0", deprecation_message, standard_warn=False) + new_config = dict(unet.config) + new_config["sample_size"] = 64 + unet._internal_dict = FrozenDict(new_config) + + self.register_modules( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) + self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) + self.register_to_config(requires_safety_checker=requires_safety_checker) + + def enable_vae_slicing(self): + r""" + Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to + compute decoding in several steps. This is useful to save some memory and allow larger batch sizes. + """ + self.vae.enable_slicing() + + def disable_vae_slicing(self): + r""" + Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_slicing() + + def enable_vae_tiling(self): + r""" + Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to + compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow + processing larger images. + """ + self.vae.enable_tiling() + + def disable_vae_tiling(self): + r""" + Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to + computing decoding in one step. + """ + self.vae.disable_tiling() + + def _encode_prompt( + self, + prompt, + device, + num_images_per_prompt, + do_classifier_free_guidance, + negative_prompt=None, + prompt_embeds: Optional[torch.FloatTensor] = None, + negative_prompt_embeds: Optional[torch.FloatTensor] = None, + lora_scale: Optional[float] = None, + ): + deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple." + deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False) + + prompt_embeds_tuple = self.encode_prompt( + prompt=prompt, + device=device, + num_images_per_prompt=num_images_per_prompt, + do_classifier_free_guidance=do_classifier_free_guidance, + negative_prompt=negative_prompt, + prompt_embeds=prompt_embeds, + negative_prompt_embeds=negative_prompt_embeds, + lora_scale=lora_scale, + ) + + # concatenate for backwards comp + prompt_embeds = torch.cat([prompt_embeds_tuple[1], prompt_embeds_tuple[0]]) + + return prompt_embeds + + def encode_prompt( + self, + prompt, + device, + num_images_per_prompt, + do_classifier_free_guidance, + negative_prompt=None, + prompt_embeds: Optional[torch.FloatTensor] = None, + negative_prompt_embeds: Optional[torch.FloatTensor] = None, + lora_scale: Optional[float] = None, + ): + r""" + Encodes the prompt into text encoder hidden states. + + Args: + prompt (`str` or `List[str]`, *optional*): + prompt to be encoded + device: (`torch.device`): + torch device + num_images_per_prompt (`int`): + number of images that should be generated per prompt + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. If not defined, one has to pass + `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is + less than `1`). + prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not + provided, text embeddings will be generated from `prompt` input argument. + negative_prompt_embeds (`torch.FloatTensor`, *optional*): + Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt + weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input + argument. + lora_scale (`float`, *optional*): + A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded. + """ + # set lora scale so that monkey patched LoRA + # function of text encoder can correctly access it + if lora_scale is not None and isinstance(self, LoraLoaderMixin): + self._lora_scale = lora_scale + + # dynamically adjust the LoRA scale + adjust_lora_scale_text_encoder(self.text_encoder, lora_scale) + + if prompt is not None and isinstance(prompt, str): + batch_size = 1 + elif prompt is not None and isinstance(prompt, list): + batch_size = len(prompt) + else: + batch_size = prompt_embeds.shape[0] + + if prompt_embeds is None: + # textual inversion: procecss multi-vector tokens if necessary + if isinstance(self, TextualInversionLoaderMixin): + prompt = self.maybe_convert_prompt(prompt, self.tokenizer) + + text_inputs = self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids + + if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal( + text_input_ids, untruncated_ids + ): + removed_text = self.tokenizer.batch_decode( + untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1] + ) + logger.warning( + "The following part of your input was truncated because CLIP can only handle sequences up to" + f" {self.tokenizer.model_max_length} tokens: {removed_text}" + ) + + if hasattr(self.text_encoder.config, "use_attention_mask") and self.text_encoder.config.use_attention_mask: + attention_mask = text_inputs.attention_mask.to(device) + else: + attention_mask = None + + prompt_embeds = self.text_encoder( + text_input_ids.to(device), + attention_mask=attention_mask, + ) + prompt_embeds = prompt_embeds[0] + + if self.text_encoder is not None: + prompt_embeds_dtype = self.text_encoder.dtype + elif self.unet is not None: + prompt_embeds_dtype = self.unet.dtype + else: + prompt_embeds_dtype = prompt_embeds.dtype + + prompt_embeds = prompt_embeds.to(dtype=prompt_embeds_dtype, device=device) + + bs_embed, seq_len, _ = prompt_embeds.shape + # duplicate text embeddings for each generation per prompt, using mps friendly method + prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1) + prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance and negative_prompt_embeds is None: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [""] * batch_size + elif prompt is not None and type(prompt) is not type(negative_prompt): + raise TypeError( + f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !=" + f" {type(prompt)}." + ) + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:" + f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches" + " the batch size of `prompt`." + ) + else: + uncond_tokens = negative_prompt + + # textual inversion: procecss multi-vector tokens if necessary + if isinstance(self, TextualInversionLoaderMixin): + uncond_tokens = self.maybe_convert_prompt(uncond_tokens, self.tokenizer) + + max_length = prompt_embeds.shape[1] + uncond_input = self.tokenizer( + uncond_tokens, + padding="max_length", + max_length=max_length, + truncation=True, + return_tensors="pt", + ) + + if hasattr(self.text_encoder.config, "use_attention_mask") and self.text_encoder.config.use_attention_mask: + attention_mask = uncond_input.attention_mask.to(device) + else: + attention_mask = None + + negative_prompt_embeds = self.text_encoder( + uncond_input.input_ids.to(device), + attention_mask=attention_mask, + ) + negative_prompt_embeds = negative_prompt_embeds[0] + + if do_classifier_free_guidance: + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method + seq_len = negative_prompt_embeds.shape[1] + + negative_prompt_embeds = negative_prompt_embeds.to(dtype=prompt_embeds_dtype, device=device) + + negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1) + negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1) + + return prompt_embeds, negative_prompt_embeds + + def run_safety_checker(self, image, device, dtype): + if self.safety_checker is None: + has_nsfw_concept = None + else: + if torch.is_tensor(image): + feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") + else: + feature_extractor_input = self.image_processor.numpy_to_pil(image) + safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, clip_input=safety_checker_input.pixel_values.to(dtype) + ) + return image, has_nsfw_concept + + def decode_latents(self, latents): + deprecation_message = "The decode_latents method is deprecated and will be removed in 1.0.0. Please use VaeImageProcessor.postprocess(...) instead" + deprecate("decode_latents", "1.0.0", deprecation_message, standard_warn=False) + + latents = 1 / self.vae.config.scaling_factor * latents + image = self.vae.decode(latents, return_dict=False)[0] + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 + image = image.cpu().permute(0, 2, 3, 1).float().numpy() + return image + + def prepare_extra_step_kwargs(self, generator, eta): + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature + # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. + # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 + # and should be between [0, 1] + + accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs["eta"] = eta + + # check if the scheduler accepts generator + accepts_generator = "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs["generator"] = generator + return extra_step_kwargs + + def check_inputs( + self, + prompt, + height, + width, + callback_steps, + negative_prompt=None, + prompt_embeds=None, + negative_prompt_embeds=None, + ): + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if (callback_steps is None) or ( + callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0) + ): + raise ValueError( + f"`callback_steps` has to be a positive integer but is {callback_steps} of type" + f" {type(callback_steps)}." + ) + + if prompt is not None and prompt_embeds is not None: + raise ValueError( + f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to" + " only forward one of the two." + ) + elif prompt is None and prompt_embeds is None: + raise ValueError( + "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined." + ) + elif prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)): + raise ValueError(f"`prompt` has to be of type `str` or `list` but is {type(prompt)}") + + if negative_prompt is not None and negative_prompt_embeds is not None: + raise ValueError( + f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:" + f" {negative_prompt_embeds}. Please make sure to only forward one of the two." + ) + + if prompt_embeds is not None and negative_prompt_embeds is not None: + if prompt_embeds.shape != negative_prompt_embeds.shape: + raise ValueError( + "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but" + f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`" + f" {negative_prompt_embeds.shape}." + ) + + def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): + shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor) + if isinstance(generator, list) and len(generator) != batch_size: + raise ValueError( + f"You have passed a list of generators of length {len(generator)}, but requested an effective batch" + f" size of {batch_size}. Make sure the batch size matches the length of the generators." + ) + + if latents is None: + latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype) + else: + latents = latents.to(device) + + # scale the initial noise by the standard deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]] = None, + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, + latents: Optional[torch.FloatTensor] = None, + prompt_embeds: Optional[torch.FloatTensor] = None, + negative_prompt_embeds: Optional[torch.FloatTensor] = None, + output_type: Optional[str] = "pil", + return_dict: bool = True, + callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, + callback_steps: int = 1, + cross_attention_kwargs: Optional[Dict[str, Any]] = None, + guidance_rescale: float = 0.0, + optimization_steps: int = 1, + learning_rate: float = 0.05, + max_steps: int = 50, + input_image = None, + mask_image = None, + save_masked_image = False, + output_path : str = "", + inverseproblem: bool = False, + ): + assert input_image is not None, "Please provide an input image for the inpainting task." + + # 0. Default height and width to unet + height = height or self.unet.config.sample_size * self.vae_scale_factor + width = width or self.unet.config.sample_size * self.vae_scale_factor + + # 1. Check inputs + self.check_inputs(prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds) + + # 2. Define call parameters + batch_size = 1 if prompt is None else (1 if isinstance(prompt, str) else len(prompt)) + device = self._execution_device + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + prompt_embeds, negative_prompt_embeds = self.encode_prompt( + prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt, + prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, + lora_scale=cross_attention_kwargs.get("scale", None) if cross_attention_kwargs is not None else None, + ) + if do_classifier_free_guidance: + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) + + # 4. Prepare timesteps + timesteps = [(1. - i/num_inference_steps) * 1000. for i in range(num_inference_steps)] + + # Convert PIL image to tensor + mask_image = mask_image.convert("L") + mask = TF.ToTensor()(mask_image).to(device=device, dtype=self.unet.dtype) + mask = TF.Resize(input_image.size, interpolation=TF.InterpolationMode.NEAREST)(mask) + mask = (mask > 0.5) + mask = ~mask + + # 4. Preprocess image + image = self.image_processor.preprocess(input_image).to(device=device, dtype=self.unet.dtype) + if inverseproblem: + image = image*mask + image = image.to(device=device, dtype=self.unet.dtype) + noisy_image = image.detach().clone() + + latents = retrieve_latents(self.vae.encode(noisy_image), generator=generator) * self.vae.config.scaling_factor + + # 5. Prepare latent variables + num_channels_latents = self.unet.config.in_channels + latents = self.prepare_latents( + batch_size * num_images_per_prompt, num_channels_latents, height, width, + prompt_embeds.dtype, device, generator, latents, + ) + + # 6. Prepare extra step kwargs + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + print(mask.shape) + h, w = latents.shape[2], latents.shape[3] + mask = TF.Resize((h, w))(mask.to(device)) + mask = (~(mask > 0.1)).float() + + # Slightly dilate the mask to increase coverage + # We do this to ensure that the VAE model does not have the adverse effect due to the compression + if inverseproblem: + print("Dilating the masks.") + kernel_size = 3 # Decreased from 3 to 2 + kernel = torch.ones((1, 1, kernel_size, kernel_size), device=device) + mask = torch.nn.functional.conv2d( + mask.unsqueeze(0), + kernel, + padding=kernel_size//2 + ).squeeze(0) + mask = torch.clamp(mask, 0, 1) + + mask = (mask > 0.1).float() + + # Apply the mask to latents_copy + random_latents = self.prepare_latents( + batch_size * num_images_per_prompt, num_channels_latents, height, width, + prompt_embeds.dtype, device, generator + ) + + bool_mask = mask.bool().unsqueeze(0).expand_as(latents) + mask = ~bool_mask + + masked_latents = (latents * mask).clone().detach() + if save_masked_image: + masked_image = self.vae.decode(masked_latents / self.vae.config.scaling_factor, return_dict=False)[0] + masked_image = self.image_processor.postprocess(masked_image, output_type="pil")[0] + masked_image_path = output_path.replace(".", "_ip_degraded.") + masked_image.save(masked_image_path) + print(f"Masked image saved to: {masked_image_path}") + + latents = random_latents.clone().detach() + + self.unet.eval() + self.vae.eval() + + # Initialize timing and memory tracking if not already done + if not hasattr(self, 'avg_total_time'): + self.avg_total_time = 0 + self.num_calls = 0 + if not hasattr(self, 'max_memory'): + self.max_memory = 0 + + with self.progress_bar(total=num_inference_steps) as progress_bar: + for i, t in enumerate(timesteps): + latents = self.perform_denoising_step( + latents, t, prompt_embeds, do_classifier_free_guidance, guidance_scale, + device, i, optimization_steps, learning_rate, + max_steps, timesteps, mask, masked_latents, noisy_image + ) + + if callback is not None and i % callback_steps == 0: + callback(i // getattr(self.scheduler, "order", 1), t, latents) + + progress_bar.update() + + # 10. Post-processing + image = self.post_process_image(latents, output_type) + + # 11. Offload all models + self.maybe_free_model_hooks() + + if not return_dict: + return (image, None) + + return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=None) + + def load_and_preprocess_image(self, image_path, custom_image_processor, device): + from diffusers.utils import load_image + img = load_image(image_path) + img = img.resize((512, 512)) + return custom_image_processor(img).unsqueeze(0).to(device) + + def perform_denoising_step(self, latents, t, prompt_embeds, do_classifier_free_guidance, guidance_scale, + device, step, optimization_steps, learning_rate, + max_steps, timesteps, mask, masked_latents, noisy_image): + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + vec_t = torch.ones((latent_model_input.shape[0],), device=latents.device) * t + v_pred = self.unet(latent_model_input, vec_t, encoder_hidden_states=prompt_embeds).sample + + if do_classifier_free_guidance: + v_pred_neg, v_pred_text = v_pred.chunk(2) + v_pred = v_pred_neg + guidance_scale * (v_pred_text - v_pred_neg) + + if step <= max_steps: + latents = self.optimize_latents(latents, v_pred, t, + device, optimization_steps, learning_rate, mask, masked_latents, noisy_image) + + + return latents + (1.0 / len(timesteps)) * v_pred + + def optimize_latents(self, latents, v_pred, t, device, optimization_steps, learning_rate, + mask, masked_latents, noisy_image): + with torch.enable_grad(): + latents = torch.autograd.Variable(latents, requires_grad=True) + optimizer = torch.optim.Adam([latents], lr=learning_rate) + + for _ in range(optimization_steps): + latents_p = latents + t/1000 * v_pred + loss = (0.001*torch.nn.functional.mse_loss(latents_p, masked_latents, reduction='none')*mask).mean() + + loss.backward() + optimizer.step() + optimizer.zero_grad() + + return latents + + def decode_latents(self, latents): + image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] + return self.image_processor.postprocess(image, output_type="pt")[0] + + def post_process_image(self, latents, output_type): + if output_type == "latent": + return latents + + image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] + do_denormalize = [True] * image.shape[0] + return self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..77a338c9b8cd6339b4ddc7b394607d5d5414c587 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +spaces +diffusers==0.31.0 +gradio==5.6.0 +numpy==2.1.3 +Pillow==11.0.0 +torch==2.1.2 +torch_xla==2.5.1 +torchvision==0.16.2 +transformers==4.45.2 diff --git a/saved_results/20241129_210517/input.png b/saved_results/20241129_210517/input.png new file mode 100644 index 0000000000000000000000000000000000000000..be95fe96bbaa270dd63caf011576ff05074f4184 Binary files /dev/null and b/saved_results/20241129_210517/input.png differ diff --git a/saved_results/20241129_210517/mask.png b/saved_results/20241129_210517/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..4a3971b3a230521f47464d02c4ff0464fffc0ed4 Binary files /dev/null and b/saved_results/20241129_210517/mask.png differ diff --git a/saved_results/20241129_210517/output.png b/saved_results/20241129_210517/output.png new file mode 100644 index 0000000000000000000000000000000000000000..9ac68c047d69567e575717f2041e6cfc1703a4e6 Binary files /dev/null and b/saved_results/20241129_210517/output.png differ diff --git a/saved_results/20241129_210517/parameters.json b/saved_results/20241129_210517/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..a1336539baf47adff2e803a59ffe19aa319533dd --- /dev/null +++ b/saved_results/20241129_210517/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": "a cat", + "seed": 0, + "randomize_seed": true, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.1, + "optimization_steps": 1, + "inverseproblem": false +} \ No newline at end of file diff --git a/saved_results/20241129_211124/input.png b/saved_results/20241129_211124/input.png new file mode 100644 index 0000000000000000000000000000000000000000..bb63c2d883c8f7c23b357c224b8714cf4c00428e Binary files /dev/null and b/saved_results/20241129_211124/input.png differ diff --git a/saved_results/20241129_211124/mask.png b/saved_results/20241129_211124/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b3cae14032f922b3823769338a9b3ce0dbd68e Binary files /dev/null and b/saved_results/20241129_211124/mask.png differ diff --git a/saved_results/20241129_211124/output.png b/saved_results/20241129_211124/output.png new file mode 100644 index 0000000000000000000000000000000000000000..bd8a6fc327cbfd4ac3b4425aebfd4a45569b1e9f Binary files /dev/null and b/saved_results/20241129_211124/output.png differ diff --git a/saved_results/20241129_211124/parameters.json b/saved_results/20241129_211124/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..34cd68cf297c3783fe7911eaa80dce682fc41e30 --- /dev/null +++ b/saved_results/20241129_211124/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 0, + "randomize_seed": true, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.1, + "optimization_steps": 5, + "inverseproblem": false +} \ No newline at end of file diff --git a/saved_results/20241129_211142/input.png b/saved_results/20241129_211142/input.png new file mode 100644 index 0000000000000000000000000000000000000000..bb63c2d883c8f7c23b357c224b8714cf4c00428e Binary files /dev/null and b/saved_results/20241129_211142/input.png differ diff --git a/saved_results/20241129_211142/mask.png b/saved_results/20241129_211142/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..c0b3cae14032f922b3823769338a9b3ce0dbd68e Binary files /dev/null and b/saved_results/20241129_211142/mask.png differ diff --git a/saved_results/20241129_211142/output.png b/saved_results/20241129_211142/output.png new file mode 100644 index 0000000000000000000000000000000000000000..c80f5bdae41cb2ce655764d494f7f286f5e17c8c Binary files /dev/null and b/saved_results/20241129_211142/output.png differ diff --git a/saved_results/20241129_211142/parameters.json b/saved_results/20241129_211142/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..fb569007fc7bfe8f66c7616512b6568c1e4ab132 --- /dev/null +++ b/saved_results/20241129_211142/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 0, + "randomize_seed": true, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.1, + "optimization_steps": 5, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_211621/input.png b/saved_results/20241129_211621/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_211621/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_211621/mask.png b/saved_results/20241129_211621/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..16fd1b0723c8b16a1a04c50937a51f233d5b35ad Binary files /dev/null and b/saved_results/20241129_211621/mask.png differ diff --git a/saved_results/20241129_211621/output.png b/saved_results/20241129_211621/output.png new file mode 100644 index 0000000000000000000000000000000000000000..733712bdfde39d9fb5ed60c56b80610cb400b641 Binary files /dev/null and b/saved_results/20241129_211621/output.png differ diff --git a/saved_results/20241129_211621/parameters.json b/saved_results/20241129_211621/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..fb569007fc7bfe8f66c7616512b6568c1e4ab132 --- /dev/null +++ b/saved_results/20241129_211621/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 0, + "randomize_seed": true, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.1, + "optimization_steps": 5, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_211904/input.png b/saved_results/20241129_211904/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_211904/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_211904/mask.png b/saved_results/20241129_211904/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..a89afefe5da7e69fcec4b23232888867a1cf77dd Binary files /dev/null and b/saved_results/20241129_211904/mask.png differ diff --git a/saved_results/20241129_211904/output.png b/saved_results/20241129_211904/output.png new file mode 100644 index 0000000000000000000000000000000000000000..4de0cfb4ceda9d6fb42a7fb0735c93c66f2abcfe Binary files /dev/null and b/saved_results/20241129_211904/output.png differ diff --git a/saved_results/20241129_211904/parameters.json b/saved_results/20241129_211904/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..7056e796e3d79c662df53a618eef507a2381036c --- /dev/null +++ b/saved_results/20241129_211904/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 0, + "randomize_seed": true, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 5, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_212001/input.png b/saved_results/20241129_212001/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212001/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212001/mask.png b/saved_results/20241129_212001/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..a89afefe5da7e69fcec4b23232888867a1cf77dd Binary files /dev/null and b/saved_results/20241129_212001/mask.png differ diff --git a/saved_results/20241129_212001/output.png b/saved_results/20241129_212001/output.png new file mode 100644 index 0000000000000000000000000000000000000000..9c06272cc69491ea960b45bceb402d0e7e899257 Binary files /dev/null and b/saved_results/20241129_212001/output.png differ diff --git a/saved_results/20241129_212001/parameters.json b/saved_results/20241129_212001/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..dd730f6cdbe432700efdbc1db0457b8c7f93e8e4 --- /dev/null +++ b/saved_results/20241129_212001/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_212022/input.png b/saved_results/20241129_212022/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212022/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212022/mask.png b/saved_results/20241129_212022/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..a89afefe5da7e69fcec4b23232888867a1cf77dd Binary files /dev/null and b/saved_results/20241129_212022/mask.png differ diff --git a/saved_results/20241129_212022/output.png b/saved_results/20241129_212022/output.png new file mode 100644 index 0000000000000000000000000000000000000000..e484849cdf22ec064a650228820a900449d53819 Binary files /dev/null and b/saved_results/20241129_212022/output.png differ diff --git a/saved_results/20241129_212022/parameters.json b/saved_results/20241129_212022/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..6795e04282d980f85950ff6334b0f05e1ac7df8a --- /dev/null +++ b/saved_results/20241129_212022/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": false +} \ No newline at end of file diff --git a/saved_results/20241129_212052/input.png b/saved_results/20241129_212052/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212052/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212052/mask.png b/saved_results/20241129_212052/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..920756457cf5bf8db03a3cc24af1fb6570d4cfdf Binary files /dev/null and b/saved_results/20241129_212052/mask.png differ diff --git a/saved_results/20241129_212052/output.png b/saved_results/20241129_212052/output.png new file mode 100644 index 0000000000000000000000000000000000000000..5a953a0078b1c1d613f95cc409c5ee717c06bca0 Binary files /dev/null and b/saved_results/20241129_212052/output.png differ diff --git a/saved_results/20241129_212052/parameters.json b/saved_results/20241129_212052/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..dd730f6cdbe432700efdbc1db0457b8c7f93e8e4 --- /dev/null +++ b/saved_results/20241129_212052/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_212110/input.png b/saved_results/20241129_212110/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212110/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212110/mask.png b/saved_results/20241129_212110/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..920756457cf5bf8db03a3cc24af1fb6570d4cfdf Binary files /dev/null and b/saved_results/20241129_212110/mask.png differ diff --git a/saved_results/20241129_212110/output.png b/saved_results/20241129_212110/output.png new file mode 100644 index 0000000000000000000000000000000000000000..2dbbb38fd44a8e9a6da2f1be7bc13abf6c8eb80c Binary files /dev/null and b/saved_results/20241129_212110/output.png differ diff --git a/saved_results/20241129_212110/parameters.json b/saved_results/20241129_212110/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..6795e04282d980f85950ff6334b0f05e1ac7df8a --- /dev/null +++ b/saved_results/20241129_212110/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": false +} \ No newline at end of file diff --git a/saved_results/20241129_212155/input.png b/saved_results/20241129_212155/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212155/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212155/mask.png b/saved_results/20241129_212155/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..c2dabcb0e6b48afad8e54456e177e15b2f847f0e Binary files /dev/null and b/saved_results/20241129_212155/mask.png differ diff --git a/saved_results/20241129_212155/output.png b/saved_results/20241129_212155/output.png new file mode 100644 index 0000000000000000000000000000000000000000..20c6184351a40a9d7ea57da46b09a36bfc29a272 Binary files /dev/null and b/saved_results/20241129_212155/output.png differ diff --git a/saved_results/20241129_212155/parameters.json b/saved_results/20241129_212155/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..dd730f6cdbe432700efdbc1db0457b8c7f93e8e4 --- /dev/null +++ b/saved_results/20241129_212155/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": true +} \ No newline at end of file diff --git a/saved_results/20241129_212220/input.png b/saved_results/20241129_212220/input.png new file mode 100644 index 0000000000000000000000000000000000000000..a6e3e6984793a804fbaeaba1a86a5b1c06aca6ab --- /dev/null +++ b/saved_results/20241129_212220/input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac27eecc91790ee401253a78d3aff8ca7fd1401b92a074cd81dc96f54449cc5 +size 1363292 diff --git a/saved_results/20241129_212220/mask.png b/saved_results/20241129_212220/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..c2dabcb0e6b48afad8e54456e177e15b2f847f0e Binary files /dev/null and b/saved_results/20241129_212220/mask.png differ diff --git a/saved_results/20241129_212220/output.png b/saved_results/20241129_212220/output.png new file mode 100644 index 0000000000000000000000000000000000000000..a4a25e8497ef08f969f84c1493c9394b3beeea2e Binary files /dev/null and b/saved_results/20241129_212220/output.png differ diff --git a/saved_results/20241129_212220/parameters.json b/saved_results/20241129_212220/parameters.json new file mode 100644 index 0000000000000000000000000000000000000000..6795e04282d980f85950ff6334b0f05e1ac7df8a --- /dev/null +++ b/saved_results/20241129_212220/parameters.json @@ -0,0 +1,10 @@ +{ + "prompt": " ", + "seed": 52, + "randomize_seed": false, + "num_inference_steps": 200, + "max_steps": 200, + "learning_rate": 0.02, + "optimization_steps": 10, + "inverseproblem": false +} \ No newline at end of file