custom-diffusion

Build error

Nupur Kumari commited on Dec 19, 2022

Commit

71da51f

•

1 Parent(s): 2ab48ae

custom-diffusion-space

Files changed (2) hide show

app.py CHANGED Viewed

@@ -99,9 +99,10 @@ def create_training_demo(trainer: Trainer,
                     use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
                     gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
                 gr.Markdown('''
-                    - Only enable one of "Train Text Encoder" or "modifier token" or None.
-                    - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
                     - Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
                     - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
                     ''')

                     use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
                     gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
                 gr.Markdown('''
+                    - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
+                    - Our results in the paper are with the above batch-size of 2 and 2 GPUs.
                     - Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
+                    - If "Train Text Encoder", disable "modifier token".
                     - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
                     ''')

inference.py CHANGED Viewed

@@ -12,26 +12,27 @@ import torch
 from diffusers import StableDiffusionPipeline
 sys.path.insert(0, 'custom-diffusion')
-def load_model(text_encoder, tokenizer, unet, save_path, modifier_token, freeze_model='crossattn_kv'):
-    st = torch.load(save_path)
-    if 'text_encoder' in st:
-        text_encoder.load_state_dict(st['text_encoder'])
-    if modifier_token in st:
-        _ = tokenizer.add_tokens(modifier_token)
-        modifier_token_id = tokenizer.convert_tokens_to_ids(modifier_token)
-        # Resize the token embeddings as we are adding new special tokens to the tokenizer
-        text_encoder.resize_token_embeddings(len(tokenizer))
-        token_embeds = text_encoder.get_input_embeddings().weight.data
-        token_embeds[modifier_token_id] = st[modifier_token]
-    print(st.keys())
-    for name, params in unet.named_parameters():
-        if freeze_model == 'crossattn':
-            if 'attn2' in name:
-                params.data.copy_(st['unet'][f'{name}'])
-        else:
-            if 'attn2.to_k' in name or 'attn2.to_v' in name:
-                params.data.copy_(st['unet'][f'{name}'])
 class InferencePipeline:
@@ -67,7 +68,7 @@ class InferencePipeline:
                 model_id, torch_dtype=torch.float16)
             pipe = pipe.to(self.device)
-        load_model(pipe.text_encoder, pipe.tokenizer, pipe.unet, weight_path, '<new1>')
         self.pipe = pipe

 from diffusers import StableDiffusionPipeline
 sys.path.insert(0, 'custom-diffusion')
+from sys import diffuser_training
+# def load_model(text_encoder, tokenizer, unet, save_path, modifier_token, freeze_model='crossattn_kv'):
+#     st = torch.load(save_path)
+#     if 'text_encoder' in st:
+#         text_encoder.load_state_dict(st['text_encoder'])
+#     if modifier_token in st:
+#         _ = tokenizer.add_tokens(modifier_token)
+#         modifier_token_id = tokenizer.convert_tokens_to_ids(modifier_token)
+#         # Resize the token embeddings as we are adding new special tokens to the tokenizer
+#         text_encoder.resize_token_embeddings(len(tokenizer))
+#         token_embeds = text_encoder.get_input_embeddings().weight.data
+#         token_embeds[modifier_token_id] = st[modifier_token]
+#     print(st.keys())
+#     for name, params in unet.named_parameters():
+#         if freeze_model == 'crossattn':
+#             if 'attn2' in name:
+#                 params.data.copy_(st['unet'][f'{name}'])
+#         else:
+#             if 'attn2.to_k' in name or 'attn2.to_v' in name:
+#                 params.data.copy_(st['unet'][f'{name}'])
 class InferencePipeline:
                 model_id, torch_dtype=torch.float16)
             pipe = pipe.to(self.device)
+        diffuser_training.load_model(pipe.text_encoder, pipe.tokenizer, pipe.unet, weight_path, '<new1>')
         self.pipe = pipe