Erasing-Concepts-In-Diffusion

Runtime error

App Files Files Community

Damian Stewart commited on Aug 9, 2023

Commit

50b9662

•

1 Parent(s): 6067469

add tensorboard, validation, sample output

Browse files

Files changed (5) hide show

StableDiffuser.py +32 -16
app.py +111 -38
memory_efficiency.py +1 -1
requirements.txt +1 -0
train.py +162 -57

StableDiffuser.py CHANGED Viewed

@@ -36,11 +36,13 @@ class StableDiffuser(torch.nn.Module):
     def __init__(self,
                 scheduler='LMS',
                  keep_pipeline=False,
                  repo_id_or_path="CompVis/stable-diffusion-v1-4"):
         super().__init__()
         self.pipeline = StableDiffusionPipeline.from_pretrained(repo_id_or_path)
         self.vae = self.pipeline.vae
         self.unet = self.pipeline.unet
@@ -60,8 +62,10 @@ class StableDiffuser(torch.nn.Module):
         if not keep_pipeline:
             del self.pipeline
-    def get_noise(self, batch_size, width, height, generator=None):
         param = list(self.parameters())[0]
         return torch.randn(
             (batch_size, self.unet.config.in_channels, width // 8, height // 8),
             generator=generator).type(param.dtype).to(param.device)
@@ -95,16 +99,20 @@ class StableDiffuser(torch.nn.Module):
     def set_scheduler_timesteps(self, n_steps):
         self.scheduler.set_timesteps(n_steps, device=self.unet.device)
-    def get_initial_latents(self, n_imgs, height, width, n_prompts, generator=None):
         noise = self.get_noise(n_imgs, height, width, generator=generator).repeat(n_prompts, 1, 1, 1)
         latents = noise * self.scheduler.init_noise_sigma
         return latents
-    def get_text_embeddings(self, prompts, negative_prompts=None, n_imgs=1):
         text_tokens = self.text_tokenize(prompts)
         text_embeddings = self.text_encode(text_tokens)
         if negative_prompts is None:
-            negative_prompts = [""] * len(prompts)
         unconditional_tokens = self.text_tokenize(negative_prompts)
         unconditional_embeddings = self.text_encode(unconditional_tokens)
         text_embeddings = torch.cat([unconditional_embeddings, text_embeddings]).repeat_interleave(n_imgs, dim=0)
@@ -136,12 +144,12 @@ class StableDiffuser(torch.nn.Module):
     @torch.no_grad()
     def diffusion(self,
                   latents,
-                  text_embeddings,
                   end_iteration=1000,
                   start_iteration=0,
                   return_steps=False,
                   pred_x0=False,
-                  trace_args=None,
                   show_progress=True,
                   use_amp=False,
                   **kwargs):
@@ -159,7 +167,7 @@ class StableDiffuser(torch.nn.Module):
                 noise_pred = self.predict_noise(
                     iteration,
                     latents,
-                    text_embeddings,
                     **kwargs)
             # compute the previous noisy sample x_t -> x_t-1
@@ -182,30 +190,38 @@ class StableDiffuser(torch.nn.Module):
     @torch.no_grad()
     def __call__(self,
-                 prompts,
-                 negative_prompts,
-                 width=512,
-                 height=512,
                  n_steps=50,
                  n_imgs=1,
                  end_iteration=None,
                  generator=None,
                  **kwargs
                  ):
         assert 0 <= n_steps <= 1000
-        if not isinstance(prompts, list):
-            prompts = [prompts]
         self.set_scheduler_timesteps(n_steps)
-        latents = self.get_initial_latents(n_imgs, height, width, len(prompts), generator=generator)
-        text_embeddings = self.get_text_embeddings(prompts,negative_prompts,n_imgs=n_imgs)
         end_iteration = end_iteration or n_steps
         latents_steps, trace_steps = self.diffusion(
             latents,
-            text_embeddings,
             end_iteration=end_iteration,
             **kwargs
         )

     def __init__(self,
                 scheduler='LMS',
                  keep_pipeline=False,
+                 native_img_size=512,
                  repo_id_or_path="CompVis/stable-diffusion-v1-4"):
         super().__init__()
         self.pipeline = StableDiffusionPipeline.from_pretrained(repo_id_or_path)
+        self.native_image_size = native_img_size
         self.vae = self.pipeline.vae
         self.unet = self.pipeline.unet
         if not keep_pipeline:
             del self.pipeline
+    def get_noise(self, batch_size, width=None, height=None, generator=None):
         param = list(self.parameters())[0]
+        width = width or self.native_image_size
+        height = height or self.native_image_size
         return torch.randn(
             (batch_size, self.unet.config.in_channels, width // 8, height // 8),
             generator=generator).type(param.dtype).to(param.device)
     def set_scheduler_timesteps(self, n_steps):
         self.scheduler.set_timesteps(n_steps, device=self.unet.device)
+    def get_initial_latents(self, n_imgs, height=None, width=None, n_prompts=1, generator=None):
+        height = height or self.native_image_size
+        width = width or self.native_image_size
         noise = self.get_noise(n_imgs, height, width, generator=generator).repeat(n_prompts, 1, 1, 1)
         latents = noise * self.scheduler.init_noise_sigma
         return latents
+    def get_cond_and_uncond_embeddings(self, prompts, negative_prompts=None, n_imgs=1):
         text_tokens = self.text_tokenize(prompts)
         text_embeddings = self.text_encode(text_tokens)
         if negative_prompts is None:
+            negative_prompts = []
+        while len(negative_prompts) < len(prompts):
+            negative_prompts.append("")
         unconditional_tokens = self.text_tokenize(negative_prompts)
         unconditional_embeddings = self.text_encode(unconditional_tokens)
         text_embeddings = torch.cat([unconditional_embeddings, text_embeddings]).repeat_interleave(n_imgs, dim=0)
     @torch.no_grad()
     def diffusion(self,
                   latents,
+                  uncond_and_cond_embeddings,
                   end_iteration=1000,
                   start_iteration=0,
                   return_steps=False,
                   pred_x0=False,
+                  trace_args=None,
                   show_progress=True,
                   use_amp=False,
                   **kwargs):
                 noise_pred = self.predict_noise(
                     iteration,
                     latents,
+                    uncond_and_cond_embeddings,
                     **kwargs)
             # compute the previous noisy sample x_t -> x_t-1
     @torch.no_grad()
     def __call__(self,
+                 prompts=None,
+                 negative_prompts=None,
+                 combined_embeddings=None, # uncond first, then cond
+                 width=None,
+                 height=None,
                  n_steps=50,
                  n_imgs=1,
                  end_iteration=None,
                  generator=None,
+                 use_amp=False,
                  **kwargs
                  ):
         assert 0 <= n_steps <= 1000
+        if combined_embeddings is None:
+            assert prompts is not None, "missing prompts or combined_embeddings"
+            combined_embeddings = diffuser.get_cond_and_uncond_embeddings(prompts, negative_prompts, n_imgs=n_imgs)
+        width = width or self.native_image_size
+        height = height or self.native_image_size
+        num_prompts = combined_embeddings.shape[0] // 2
         self.set_scheduler_timesteps(n_steps)
+        latents = self.get_initial_latents(n_imgs, height, width, num_prompts, generator=generator)
         end_iteration = end_iteration or n_steps
         latents_steps, trace_steps = self.diffusion(
             latents,
+            combined_embeddings,
             end_iteration=end_iteration,
+            use_amp=use_amp,
             **kwargs
         )

app.py CHANGED Viewed

@@ -7,12 +7,13 @@ from diffusers.utils import is_xformers_available
 from finetuning import FineTunedModel
 from StableDiffuser import StableDiffuser
 from memory_efficiency import MemoryEfficiencyWrapper
-from train import train
 import os
 def populate_model_map():
-    model_map = {}
     for model_file in os.listdir('models'):
         path = 'models/' + model_file
         if any([existing_path == path for existing_path in model_map.values()]):
@@ -28,6 +29,7 @@ SHARED_UI_WARNING = f'''## Attention - Training using the ESD-u method does not
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
 '''
 class Demo:
@@ -70,24 +72,11 @@ class Demo:
                         self.negative_prompt_input_infr = gr.Text(
                             label="Negative prompt"
                         )
-                        with gr.Row():
-                            self.model_dropdown = gr.Dropdown(
-                                label="ESD Model",
-                                choices= list(model_map.keys()),
-                                value='Van Gogh',
-                                interactive=True
-                            )
-                            self.model_reload_button = gr.Button(
-                                value="🔄",
-                                interactive=True
-                            )
-                            self.seed_infr = gr.Number(
-                                label="Seed",
-                                value=42
-                            )
                             self.img_width_infr = gr.Slider(
                                 label="Image width",
                                 minimum=256,
@@ -95,7 +84,6 @@ class Demo:
                                 value=512,
                                 step=64
                             )
                             self.img_height_infr = gr.Slider(
                                 label="Image height",
                                 minimum=256,
@@ -104,6 +92,18 @@ class Demo:
                                 step=64
                             )
                         self.base_repo_id_or_path_input_infr = gr.Text(
                             label="Base model",
                             value="CompVis/stable-diffusion-v1-4",
@@ -131,14 +131,12 @@ class Demo:
             with gr.Tab("Train") as training_column:
                 with gr.Row():
                     self.explain_train= gr.Markdown(interactive=False,
                                       value='In this part you can erase any concept from Stable Diffusion.   Enter a prompt for the concept or style you want to erase, and select ESD-x if you want to focus erasure on prompts that mention the concept explicitly. [NOTE: ESD-u is currently unavailable in this space. But you can duplicate the space and run it on GPU with VRAM >40GB for enabling ESD-u]. With default settings, it takes about 15 minutes to fine-tune the model; then you can try inference above or download the weights.  The training code used here is slightly different than the code tested in the original paper.  Code and details are at [github link](https://github.com/rohitgandikota/erasing).')
                 with gr.Row():
                     with gr.Column(scale=3):
                         self.train_model_input = gr.Text(
                             label="Model to Edit",
                             value="CompVis/stable-diffusion-v1-4",
@@ -196,7 +194,7 @@ class Demo:
                         )
                         self.train_save_every_input = gr.Number(
                             value=-1,
-                            label="Save every N steps",
                             info="If >0, save the model throughout training at the given step interval."
                         )
@@ -210,6 +208,28 @@ class Demo:
                                 self.train_use_gradient_checkpointing_input = gr.Checkbox(
                                     label="Gradient checkpointing", value=False)
                     with gr.Column(scale=1):
                         self.train_status = gr.Button(value='', variant='primary', label='Status', interactive=False)
@@ -219,7 +239,7 @@ class Demo:
                         )
                         self.train_cancel_button = gr.Button(
-                            value="Cancel training"
                         )
                         self.download = gr.Files()
@@ -260,6 +280,7 @@ class Demo:
                             value='', variant='primary', label='Status', interactive=False)
                         self.export_button = gr.Button(
                             value="Export")
         self.infr_button.click(self.inference, inputs = [
             self.prompt_input_infr,
@@ -292,10 +313,16 @@ class Demo:
             self.train_use_gradient_checkpointing_input,
             self.train_seed_input,
             self.train_save_every_input,
         ],
         outputs=[self.train_button, self.train_status, self.download, self.model_dropdown]
         )
-        self.train_cancel_button.click(lambda x: print("cancel pressed"), cancels=[train_event])
         self.export_button.click(self.export, inputs = [
             self.model_dropdown_export,
@@ -303,23 +330,51 @@ class Demo:
             self.save_path_input_export,
             self.save_half_export
         ],
-        outputs=[self.export_status]
         )
     def reload_models(self, model_dropdown):
         current_model_name = model_dropdown
         global model_map
-        model_map = populate_model_map()
-        return [gr.Dropdown.update(choices=list(model_map.keys()), value=current_model_name)]
     def train(self, repo_id_or_path, img_size, prompt, train_method, neg_guidance, iterations, lr,
               use_adamw8bit=True, use_xformers=False, use_amp=False, use_gradient_checkpointing=False,
               seed=-1, save_every=-1,
-              pbar = gr.Progress(track_tqdm=True)):
         if self.training:
             return [gr.update(interactive=True, value='Train'), gr.update(value='Someone else is training... Try again soon'), None, gr.update()]
         print(f"Training {repo_id_or_path} at {img_size} to remove '{prompt}'.")
         print(f"  {train_method}, negative guidance {neg_guidance}, lr {lr}, {iterations} iterations.")
         print(f" {'✅' if use_gradient_checkpointing else '❌'} gradient checkpointing")
@@ -348,23 +403,38 @@ class Demo:
                 break
             # repeat until a not-in-use path is found
         try:
             self.training = True
             self.train_cancel_button.update(interactive=True)
-            train(repo_id_or_path, img_size, prompt, modules, frozen, iterations, neg_guidance, lr, save_path,
                   use_adamw8bit, use_xformers, use_amp, use_gradient_checkpointing,
-                  seed=int(seed), save_every=int(save_every))
         finally:
             self.training = False
             self.train_cancel_button.update(interactive=False)
         torch.cuda.empty_cache()
-        new_model_name = f'{os.path.basename(save_path)}'
-        model_map[new_model_name] = save_path
         return [gr.update(interactive=True, value='Train'),
-                gr.update(value=f'Done Training! Try your model ({new_model_name}) in the "Test" tab'),
                 save_path,
                 gr.Dropdown.update(choices=list(model_map.keys()), value=new_model_name)]
@@ -373,7 +443,7 @@ class Demo:
         checkpoint = torch.load(model_path)
         diffuser = StableDiffuser(scheduler='DDIM',
                                        keep_pipeline=True,
-                                       repo_id_or_path=base_repo_id_or_path
                                        ).eval()
         finetuner = FineTunedModel.from_checkpoint(diffuser, checkpoint).eval()
         with finetuner:
@@ -381,7 +451,10 @@ class Demo:
                 diffuser = diffuser.half()
                 diffuser.pipeline.to('cpu', torch_dtype=torch.float16)
             diffuser.pipeline.save_pretrained(save_path)
-        return [gr.update(value=f'Done! Your model is at {save_path}.')]
     def inference(self, prompt, negative_prompt, seed, width, height, model_name, base_repo_id_or_path, pbar = gr.Progress(track_tqdm=True)):

 from finetuning import FineTunedModel
 from StableDiffuser import StableDiffuser
 from memory_efficiency import MemoryEfficiencyWrapper
+from train import train, training_should_cancel
 import os
+model_map = {}
 def populate_model_map():
+    global model_map
     for model_file in os.listdir('models'):
         path = 'models/' + model_file
         if any([existing_path == path for existing_path in model_map.values()]):
 <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
 '''
+# work around Gradio's weird threading
 class Demo:
                         self.negative_prompt_input_infr = gr.Text(
                             label="Negative prompt"
                         )
+                        self.seed_infr = gr.Number(
+                            label="Seed",
+                            value=42
+                        )
+                        with gr.Row(scale=1):
                             self.img_width_infr = gr.Slider(
                                 label="Image width",
                                 minimum=256,
                                 value=512,
                                 step=64
                             )
                             self.img_height_infr = gr.Slider(
                                 label="Image height",
                                 minimum=256,
                                 step=64
                             )
+                        with gr.Row(scale=1):
+                            self.model_dropdown = gr.Dropdown(
+                                label="ESD Model",
+                                choices= list(model_map.keys()),
+                                value='Van Gogh',
+                                interactive=True
+                            )
+                            self.model_reload_button = gr.Button(
+                                value="🔄",
+                                interactive=True
+                            )
                         self.base_repo_id_or_path_input_infr = gr.Text(
                             label="Base model",
                             value="CompVis/stable-diffusion-v1-4",
             with gr.Tab("Train") as training_column:
                 with gr.Row():
                     self.explain_train= gr.Markdown(interactive=False,
                                       value='In this part you can erase any concept from Stable Diffusion.   Enter a prompt for the concept or style you want to erase, and select ESD-x if you want to focus erasure on prompts that mention the concept explicitly. [NOTE: ESD-u is currently unavailable in this space. But you can duplicate the space and run it on GPU with VRAM >40GB for enabling ESD-u]. With default settings, it takes about 15 minutes to fine-tune the model; then you can try inference above or download the weights.  The training code used here is slightly different than the code tested in the original paper.  Code and details are at [github link](https://github.com/rohitgandikota/erasing).')
                 with gr.Row():
                     with gr.Column(scale=3):
                         self.train_model_input = gr.Text(
                             label="Model to Edit",
                             value="CompVis/stable-diffusion-v1-4",
                         )
                         self.train_save_every_input = gr.Number(
                             value=-1,
+                            label="Save Every N Steps",
                             info="If >0, save the model throughout training at the given step interval."
                         )
                                 self.train_use_gradient_checkpointing_input = gr.Checkbox(
                                     label="Gradient checkpointing", value=False)
+                        self.train_validation_prompts = gr.TextArea(
+                            label="Validation Prompts",
+                            placeholder="Probably, you want to put the \"Prompt to Erase\" in here as the first entry...",
+                            value='',
+                            info="Prompts for producing validation graphs, one per line."
+                        )
+                        self.train_sample_positive_prompts = gr.TextArea(
+                            label="Sample Prompts",
+                            value='',
+                            info="Positive prompts for generating sample images, one per line."
+                        )
+                        self.train_sample_negative_prompts = gr.TextArea(
+                            label="Sample Negative Prompts",
+                            value='',
+                            info="Negative prompts for use when generating sample images. One for each positive prompt, or leave empty for none."
+                        )
+                        self.train_validate_every_n_steps = gr.Number(
+                            label="Validate Every N Steps",
+                            value=20,
+                            info="Validation and sample generation will be run at intervals of this many steps"
+                        )
                     with gr.Column(scale=1):
                         self.train_status = gr.Button(value='', variant='primary', label='Status', interactive=False)
                         )
                         self.train_cancel_button = gr.Button(
+                            value="Cancel Training"
                         )
                         self.download = gr.Files()
                             value='', variant='primary', label='Status', interactive=False)
                         self.export_button = gr.Button(
                             value="Export")
+                        self.export_download = gr.Files()
         self.infr_button.click(self.inference, inputs = [
             self.prompt_input_infr,
             self.train_use_gradient_checkpointing_input,
             self.train_seed_input,
             self.train_save_every_input,
+            self.train_validation_prompts,
+            self.train_sample_positive_prompts,
+            self.train_sample_negative_prompts,
+            self.train_validate_every_n_steps
         ],
         outputs=[self.train_button, self.train_status, self.download, self.model_dropdown]
         )
+        self.train_cancel_button.click(self.cancel_training,
+                                       inputs=[],
+                                       outputs=[self.train_cancel_button])
         self.export_button.click(self.export, inputs = [
             self.model_dropdown_export,
             self.save_path_input_export,
             self.save_half_export
         ],
+        outputs=[self.export_button, self.export_status, self.export_download]
         )
     def reload_models(self, model_dropdown):
         current_model_name = model_dropdown
         global model_map
+        populate_model_map()
+        return [self.model_dropdown.update(choices=list(model_map.keys()), value=current_model_name)]
+    def cancel_training(self):
+        train.training_should_cancel = True
+        return [gr.update(value="Cancelling...", interactive=False)]
     def train(self, repo_id_or_path, img_size, prompt, train_method, neg_guidance, iterations, lr,
               use_adamw8bit=True, use_xformers=False, use_amp=False, use_gradient_checkpointing=False,
               seed=-1, save_every=-1,
+              validation_prompts: str=None, sample_positive_prompts: str=None, sample_negative_prompts: str=None, validate_every_n_steps=-1,
+              pbar=gr.Progress(track_tqdm=True)):
+        """
+        :param repo_id_or_path:
+        :param img_size:
+        :param prompt:
+        :param train_method:
+        :param neg_guidance:
+        :param iterations:
+        :param lr:
+        :param use_adamw8bit:
+        :param use_xformers:
+        :param use_amp:
+        :param use_gradient_checkpointing:
+        :param seed:
+        :param save_every:
+        :param validation_prompts: split on \n
+        :param sample_positive_prompts: split on \n
+        :param sample_negative_prompts: split on \n
+        :param validate_every_n_steps: split on \n
+        :param pbar:
+        :return:
+        """
         if self.training:
             return [gr.update(interactive=True, value='Train'), gr.update(value='Someone else is training... Try again soon'), None, gr.update()]
+        train.training_should_cancel = False
         print(f"Training {repo_id_or_path} at {img_size} to remove '{prompt}'.")
         print(f"  {train_method}, negative guidance {neg_guidance}, lr {lr}, {iterations} iterations.")
         print(f" {'✅' if use_gradient_checkpointing else '❌'} gradient checkpointing")
                 break
             # repeat until a not-in-use path is found
+        validation_prompts = [] if validation_prompts is None else validation_prompts.split('\n')
+        sample_positive_prompts = [] if sample_positive_prompts is None else sample_positive_prompts.split('\n')
+        sample_negative_prompts = [] if sample_negative_prompts is None else sample_negative_prompts.split('\n')
+        print(f"validation prompts: {validation_prompts}")
+        print(f"sample positive prompts: {sample_positive_prompts}")
+        print(f"sample negative prompts: {sample_negative_prompts}")
         try:
             self.training = True
             self.train_cancel_button.update(interactive=True)
+            save_path = train(repo_id_or_path, img_size, prompt, modules, frozen, iterations, neg_guidance, lr, save_path,
                   use_adamw8bit, use_xformers, use_amp, use_gradient_checkpointing,
+                  seed=int(seed), save_every_n_steps=int(save_every),
+                  validate_every_n_steps=validate_every_n_steps, validation_prompts=validation_prompts,
+                  sample_positive_prompts=sample_positive_prompts, sample_negative_prompts=sample_negative_prompts)
+            if save_path is None:
+                new_model_name = None
+                finished_message = "Training cancelled."
+            else:
+                new_model_name = f'{os.path.basename(save_path)}'
+                finished_message = f'Done Training! Try your model ({new_model_name}) in the "Test" tab'
         finally:
             self.training = False
             self.train_cancel_button.update(interactive=False)
         torch.cuda.empty_cache()
+        if new_model_name is not None:
+            model_map[new_model_name] = save_path
         return [gr.update(interactive=True, value='Train'),
+                gr.update(value=finished_message),
                 save_path,
                 gr.Dropdown.update(choices=list(model_map.keys()), value=new_model_name)]
         checkpoint = torch.load(model_path)
         diffuser = StableDiffuser(scheduler='DDIM',
                                        keep_pipeline=True,
+                                       repo_id_or_path=base_repo_id_or_path,
                                        ).eval()
         finetuner = FineTunedModel.from_checkpoint(diffuser, checkpoint).eval()
         with finetuner:
                 diffuser = diffuser.half()
                 diffuser.pipeline.to('cpu', torch_dtype=torch.float16)
             diffuser.pipeline.save_pretrained(save_path)
+        return [gr.update(interactive=True, value='Export'),
+                gr.update(value=f'Done Exporting!'),
+                save_path]
     def inference(self, prompt, negative_prompt, seed, width, height, model_name, base_repo_id_or_path, pbar = gr.Progress(track_tqdm=True)):

memory_efficiency.py CHANGED Viewed

@@ -44,7 +44,7 @@ class MemoryEfficiencyWrapper:
             print("xformers disabled via arg, using attention slicing instead")
             self.diffuser.unet.set_attention_slice("auto")
-        self.diffuser.vae = self.diffuser.vae.to(self.diffuser.vae.device, dtype=torch.float16 if self.use_amp else torch.float32)
         self.diffuser.unet = self.diffuser.unet.to(self.diffuser.unet.device, dtype=torch.float32)
         try:

             print("xformers disabled via arg, using attention slicing instead")
             self.diffuser.unet.set_attention_slice("auto")
+        #self.diffuser.vae = self.diffuser.vae.to(self.diffuser.vae.device, dtype=torch.float16 if self.use_amp else torch.float32)
         self.diffuser.unet = self.diffuser.unet.to(self.diffuser.unet.device, dtype=torch.float32)
         try:

requirements.txt CHANGED Viewed

@@ -9,3 +9,4 @@ git+https://github.com/davidbau/baukit.git
 xformers
 bitsandbytes==0.38.1
 safetensors

 xformers
 bitsandbytes==0.38.1
 safetensors
+tensorboard

train.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import random
 from accelerate.utils import set_seed
 from torch.cuda.amp import autocast
 from StableDiffuser import StableDiffuser
 from finetuning import FineTunedModel
@@ -10,13 +13,90 @@ from tqdm import tqdm
 from isolate_rng import isolate_rng
 from memory_efficiency import MemoryEfficiencyWrapper
 def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations, negative_guidance, lr, save_path,
-          use_adamw8bit=True, use_xformers=True, use_amp=True, use_gradient_checkpointing=False, seed=-1, save_every=-1):
     nsteps = 50
-    diffuser = StableDiffuser(scheduler='DDIM', repo_id_or_path=repo_id_or_path).to('cuda')
     memory_efficiency_wrapper = MemoryEfficiencyWrapper(diffuser=diffuser, use_amp=use_amp, use_xformers=use_xformers,
                                                         use_gradient_checkpointing=use_gradient_checkpointing )
@@ -40,16 +120,18 @@ def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations
         pbar = tqdm(range(iterations))
         with torch.no_grad():
-            neutral_text_embeddings = diffuser.get_text_embeddings([''],n_imgs=1)
-            positive_text_embeddings = diffuser.get_text_embeddings([prompt],n_imgs=1)
-        del diffuser.vae
-        del diffuser.text_encoder
-        del diffuser.tokenizer
-        torch.cuda.empty_cache()
-        print(f"using img_size of {img_size}")
         if seed == -1:
             seed = random.randint(0, 2 ** 30)
@@ -58,65 +140,88 @@ def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations
         prev_losses = []
         start_loss = None
         max_prev_loss_count = 10
-        for i in pbar:
-            with torch.no_grad():
-                diffuser.set_scheduler_timesteps(nsteps)
-                optimizer.zero_grad()
-                iteration = torch.randint(1, nsteps - 1, (1,)).item()
-                latents = diffuser.get_initial_latents(1, width=img_size, height=img_size, n_prompts=1)
-                with finetuner:
-                    latents_steps, _ = diffuser.diffusion(
-                        latents,
-                        positive_text_embeddings,
-                        start_iteration=0,
-                        end_iteration=iteration,
-                        guidance_scale=3,
-                        show_progress=False,
-                        use_amp=use_amp
-                    )
-                diffuser.set_scheduler_timesteps(1000)
-                iteration = int(iteration / nsteps * 1000)
-                with autocast(enabled=use_amp):
-                    positive_latents = diffuser.predict_noise(iteration, latents_steps[0], positive_text_embeddings, guidance_scale=1)
-                    neutral_latents = diffuser.predict_noise(iteration, latents_steps[0], neutral_text_embeddings, guidance_scale=1)
-            with finetuner:
-                with autocast(enabled=use_amp):
-                    negative_latents = diffuser.predict_noise(iteration, latents_steps[0], positive_text_embeddings, guidance_scale=1)
-            positive_latents.requires_grad = False
-            neutral_latents.requires_grad = False
-            # loss = criteria(e_n, e_0) works the best try 5000 epochs
-            loss = criteria(negative_latents, neutral_latents - (negative_guidance*(positive_latents - neutral_latents)))
-            memory_efficiency_wrapper.step(optimizer, loss)
-            optimizer.zero_grad()
-            # print moving average loss
-            prev_losses.append(loss.detach().clone())
-            if len(prev_losses) > max_prev_loss_count:
-                prev_losses.pop(0)
-            if start_loss is None:
-                start_loss = prev_losses[-1]
-            if len(prev_losses) >= max_prev_loss_count:
-                moving_average_loss = sum(prev_losses) / len(prev_losses)
-                print(
-                    f"step {i}: loss={loss.item()} (avg={moving_average_loss.item()}, start ∆={(moving_average_loss - start_loss).item()}")
-            else:
-                print(f"step {i}: loss={loss.item()}")
-            if save_every > 0 and ((i % save_every) == (save_every-1)):
-                torch.save(finetuner.state_dict(), save_path + f"__step_{i}.pt")
-    torch.save(finetuner.state_dict(), save_path)
-    del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents, latents_steps, latents
-    torch.cuda.empty_cache()
 if __name__ == '__main__':
     import argparse

+import os.path
 import random
 from accelerate.utils import set_seed
+from diffusers import StableDiffusionPipeline
 from torch.cuda.amp import autocast
+from torchvision import transforms
 from StableDiffuser import StableDiffuser
 from finetuning import FineTunedModel
 from isolate_rng import isolate_rng
 from memory_efficiency import MemoryEfficiencyWrapper
+from torch.utils.tensorboard import SummaryWriter
+training_should_cancel = False
+def validate(diffuser: StableDiffuser, finetuner: FineTunedModel,
+             validation_embeddings: torch.FloatTensor,
+             neutral_embeddings: torch.FloatTensor,
+             sample_embeddings: torch.FloatTensor,
+             logger: SummaryWriter, use_amp: bool,
+             global_step: int,
+             validation_seed: int = 555,
+             ):
+    print("validating...")
+    with isolate_rng(include_cuda=True), torch.no_grad():
+        set_seed(validation_seed)
+        criteria = torch.nn.MSELoss()
+        negative_guidance = 1
+        val_count = 5
+        nsteps=50
+        num_validation_prompts = validation_embeddings.shape[0] // 2
+        for i in range(0, num_validation_prompts):
+            accumulated_loss = None
+            this_validation_embeddings = validation_embeddings[i*2:i*2+2]
+            for j in range(val_count):
+                iteration = random.randint(1, nsteps)
+                diffused_latents = get_diffused_latents(diffuser, nsteps, this_validation_embeddings, iteration, use_amp)
+                with autocast(enabled=use_amp):
+                    positive_latents = diffuser.predict_noise(iteration, diffused_latents, this_validation_embeddings, guidance_scale=1)
+                    neutral_latents = diffuser.predict_noise(iteration, diffused_latents, neutral_embeddings, guidance_scale=1)
+                with finetuner, autocast(enabled=use_amp):
+                    negative_latents = diffuser.predict_noise(iteration, diffused_latents, this_validation_embeddings, guidance_scale=1)
+                loss = criteria(negative_latents, neutral_latents - (negative_guidance*(positive_latents - neutral_latents)))
+                accumulated_loss = (accumulated_loss or 0) + loss.item()
+            logger.add_scalar(f"loss/val_{i}", accumulated_loss/val_count, global_step=global_step)
+        num_samples = sample_embeddings.shape[0] // 2
+        for i in range(0, num_samples):
+            print(f'making sample {i}...')
+            with finetuner:
+                pipeline = StableDiffusionPipeline(vae=diffuser.vae,
+                                               text_encoder=diffuser.text_encoder,
+                                               tokenizer=diffuser.tokenizer,
+                                               unet=diffuser.unet,
+                                               scheduler=diffuser.scheduler,
+                                                   safety_checker=None,
+                                                   feature_extractor=None,
+                                               requires_safety_checker=False)
+                images = pipeline(prompt_embeds=sample_embeddings[i*2+1:i*2+2], negative_prompt_embeds=sample_embeddings[i*2:i*2+1],
+                                  num_inference_steps=50)
+                image_tensor = transforms.ToTensor()(images.images[0])
+                logger.add_image(f"samples/{i}", img_tensor=image_tensor, global_step=global_step)
+            """
+            with finetuner, torch.cuda.amp.autocast(enabled=use_amp):
+                images = diffuser(
+                    combined_embeddings=sample_embeddings[i*2:i*2+2],
+                    n_steps=50
+                )
+                logger.add_images(f"samples/{i}", images)
+                """
+        torch.cuda.empty_cache()
 def train(repo_id_or_path, img_size, prompt, modules, freeze_modules, iterations, negative_guidance, lr, save_path,
+          use_adamw8bit=True, use_xformers=True, use_amp=True, use_gradient_checkpointing=False, seed=-1,
+          save_every_n_steps=-1, validate_every_n_steps=-1,
+          validation_prompts=[], sample_positive_prompts=[], sample_negative_prompts=[]):
+    diffuser = None
+    loss = None
+    optimizer = None
+    finetuner = None
+    negative_latents = None
+    neutral_latents = None
+    positive_latents = None
     nsteps = 50
+    print(f"using img_size of {img_size}")
+    diffuser = StableDiffuser(scheduler='DDIM', repo_id_or_path=repo_id_or_path, native_img_size=img_size).to('cuda')
+    logger = SummaryWriter(log_dir=f"logs/{os.path.splitext(os.path.basename(save_path))[0]}")
     memory_efficiency_wrapper = MemoryEfficiencyWrapper(diffuser=diffuser, use_amp=use_amp, use_xformers=use_xformers,
                                                         use_gradient_checkpointing=use_gradient_checkpointing )
         pbar = tqdm(range(iterations))
         with torch.no_grad():
+            neutral_text_embeddings = diffuser.get_cond_and_uncond_embeddings([''], n_imgs=1)
+            positive_text_embeddings = diffuser.get_cond_and_uncond_embeddings([prompt], n_imgs=1)
+            validation_embeddings = diffuser.get_cond_and_uncond_embeddings(validation_prompts, n_imgs=1)
+            sample_embeddings = diffuser.get_cond_and_uncond_embeddings(sample_positive_prompts, sample_negative_prompts, n_imgs=1)
+        #if use_amp:
+        #    diffuser.vae = diffuser.vae.to(diffuser.vae.device, dtype=torch.float16)
+        #del diffuser.text_encoder
+        #del diffuser.tokenizer
+        torch.cuda.empty_cache()
         if seed == -1:
             seed = random.randint(0, 2 ** 30)
         prev_losses = []
         start_loss = None
         max_prev_loss_count = 10
+        try:
+            for i in pbar:
+                if training_should_cancel:
+                    print("received cancellation request")
+                    return None
+                with torch.no_grad():
+                    optimizer.zero_grad()
+                    iteration = torch.randint(1, nsteps - 1, (1,)).item()
+                    with finetuner:
+                        diffused_latents = get_diffused_latents(diffuser, nsteps, positive_text_embeddings, iteration, use_amp)
+                    iteration = int(iteration / nsteps * 1000)
+                    with autocast(enabled=use_amp):
+                        positive_latents = diffuser.predict_noise(iteration, diffused_latents, positive_text_embeddings, guidance_scale=1)
+                        neutral_latents = diffuser.predict_noise(iteration, diffused_latents, neutral_text_embeddings, guidance_scale=1)
+                with finetuner:
+                    with autocast(enabled=use_amp):
+                        negative_latents = diffuser.predict_noise(iteration, diffused_latents, positive_text_embeddings, guidance_scale=1)
+                positive_latents.requires_grad = False
+                neutral_latents.requires_grad = False
+                # loss = criteria(e_n, e_0) works the best try 5000 epochs
+                loss = criteria(negative_latents, neutral_latents - (negative_guidance*(positive_latents - neutral_latents)))
+                memory_efficiency_wrapper.step(optimizer, loss)
+                optimizer.zero_grad()
+                logger.add_scalar("loss", loss.item(), global_step=i)
+                # print moving average loss
+                prev_losses.append(loss.detach().clone())
+                if len(prev_losses) > max_prev_loss_count:
+                    prev_losses.pop(0)
+                if start_loss is None:
+                    start_loss = prev_losses[-1]
+                if len(prev_losses) >= max_prev_loss_count:
+                    moving_average_loss = sum(prev_losses) / len(prev_losses)
+                    print(
+                        f"step {i}: loss={loss.item()} (avg={moving_average_loss.item()}, start ∆={(moving_average_loss - start_loss).item()}")
+                else:
+                    print(f"step {i}: loss={loss.item()}")
+                if save_every_n_steps > 0 and ((i+1) % save_every_n_steps) == 0:
+                    torch.save(finetuner.state_dict(), save_path + f"__step_{i+1}.pt")
+                if validate_every_n_steps > 0 and ((i+1) % validate_every_n_steps) == 0:
+                    validate(diffuser, finetuner,
+                             validation_embeddings=validation_embeddings,
+                             sample_embeddings=sample_embeddings,
+                             neutral_embeddings=neutral_text_embeddings,
+                             logger=logger, use_amp=False, global_step=i)
+            torch.save(finetuner.state_dict(), save_path)
+            return save_path
+        finally:
+            del diffuser, loss, optimizer, finetuner, negative_latents, neutral_latents, positive_latents
+            torch.cuda.empty_cache()
+def get_diffused_latents(diffuser, nsteps, text_embeddings, end_iteration, use_amp):
+    diffuser.set_scheduler_timesteps(nsteps)
+    latents = diffuser.get_initial_latents(1, n_prompts=1)
+    latents_steps, _ = diffuser.diffusion(
+        latents,
+        text_embeddings,
+        start_iteration=0,
+        end_iteration=end_iteration,
+        guidance_scale=3,
+        show_progress=False,
+        use_amp=use_amp
+    )
+    # because return_latents is not passed to diffuser.diffusion(), latents_steps should have only 1 entry
+    # but we take the "last" (-1) entry because paranoia
+    diffused_latents = latents_steps[-1]
+    diffuser.set_scheduler_timesteps(1000)
+    del latents_steps, latents
+    return diffused_latents
 if __name__ == '__main__':
     import argparse