custom-diffusion

Build error

App Files Files Community

Nupur Kumari commited on Dec 18, 2022

Commit

e417d7a

1 Parent(s): 7db1586

custom-diffusion-space

Browse files

Files changed (2) hide show

app.py +7 -3
trainer.py +3 -0

app.py CHANGED Viewed

@@ -76,8 +76,8 @@ def create_training_demo(trainer: Trainer,
                 class_prompt = gr.Textbox(label='Regularization set Prompt',
                                             max_lines=1, placeholder='Example: "cat"')
                 gr.Markdown('''
-                    - We use "\<new1\>" appended in front of the concept. E.g. "\<new1\> cat".
-                    - For a new concept, use "photo of a \<new1\> cat" for concept_prompt and "cat" for class_prompt.
                     - For a style concept, use "painting in the style of \<new1\> art" for concept_prompt and "art" for class_prompt.
                     ''')
             with gr.Box():
@@ -95,10 +95,13 @@ def create_training_demo(trainer: Trainer,
                     label='Number of Gradient Accumulation',
                     value=1,
                     precision=0)
-                use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
                 gr.Markdown('''
                     - Only enable one of "Train Text Encoder" or "modifier token" or None.
                     - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
                     - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
                     ''')
@@ -129,6 +132,7 @@ def create_training_demo(trainer: Trainer,
                              gradient_accumulation,
                              batch_size,
                              use_8bit_adam,
                          ],
                          outputs=[
                              training_status,

                 class_prompt = gr.Textbox(label='Regularization set Prompt',
                                             max_lines=1, placeholder='Example: "cat"')
                 gr.Markdown('''
+                    - Use "\<new1\>" appended in front of the concept. E.g. "\<new1\> cat" if modifier_token is enabled.
+                    - For a new concept e.g. concept_prompt is "photo of a \<new1\> cat" and "cat" for class_prompt.
                     - For a style concept, use "painting in the style of \<new1\> art" for concept_prompt and "art" for class_prompt.
                     ''')
             with gr.Box():
                     label='Number of Gradient Accumulation',
                     value=1,
                     precision=0)
+                with gr.Row():
+                    use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
+                    gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
                 gr.Markdown('''
                     - Only enable one of "Train Text Encoder" or "modifier token" or None.
                     - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
+                    - Enable gradient checkpointing to save memory (~14GB) at the expense of slower backward pass.
                     - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
                     ''')
                              gradient_accumulation,
                              batch_size,
                              use_8bit_adam,
+                             gradient_checkpointing
                          ],
                          outputs=[
                              training_status,

trainer.py CHANGED Viewed

@@ -69,6 +69,7 @@ class Trainer:
         gradient_accumulation: int,
         batch_size: int,
         use_8bit_adam: bool,
     ) -> tuple[dict, list[pathlib.Path]]:
         if not torch.cuda.is_available():
             raise gr.Error('CUDA is not available.')
@@ -111,6 +112,8 @@ class Trainer:
             command += ' --use_8bit_adam'
         if train_text_encoder:
             command += f' --train_text_encoder'
         with open(self.output_dir / 'train.sh', 'w') as f:
             command_s = ' '.join(command.split())

         gradient_accumulation: int,
         batch_size: int,
         use_8bit_adam: bool,
+        gradient_checkpointing: bool,
     ) -> tuple[dict, list[pathlib.Path]]:
         if not torch.cuda.is_available():
             raise gr.Error('CUDA is not available.')
             command += ' --use_8bit_adam'
         if train_text_encoder:
             command += f' --train_text_encoder'
+        if gradient_checkpointing:
+            command += f' --gradient_checkpointing'
         with open(self.output_dir / 'train.sh', 'w') as f:
             command_s = ' '.join(command.split())