Nupur Kumari commited on
Commit
7b6145e
1 Parent(s): c4a18be
Files changed (2) hide show
  1. app.py +41 -17
  2. inference.py +2 -0
app.py CHANGED
@@ -22,6 +22,15 @@ DESCRIPTION = '''This is a demo for [https://github.com/adobe-research/custom-di
22
  It is recommended to upgrade to GPU in Settings after duplicating this space to use it.
23
  <a href="https://huggingface.co/spaces/nupurkmr9/custom-diffusion?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
24
  '''
 
 
 
 
 
 
 
 
 
25
 
26
  ORIGINAL_SPACE_ID = 'nupurkmr9/custom-diffusion'
27
  SPACE_ID = os.getenv('SPACE_ID', ORIGINAL_SPACE_ID)
@@ -74,38 +83,44 @@ def create_training_demo(trainer: Trainer,
74
  with gr.Box():
75
  gr.Markdown('Training Data')
76
  concept_images = gr.Files(label='Images for your concept')
77
- concept_prompt = gr.Textbox(label='Concept Prompt',
78
- max_lines=1, placeholder='Example: "photo of a \<new1\> cat"')
79
- class_prompt = gr.Textbox(label='Regularization set Prompt',
80
  max_lines=1, placeholder='Example: "cat"')
 
 
 
 
 
 
 
81
  gr.Markdown('''
82
- - Use "\<new1\>" appended in front of the concept, e.g., "\<new1\> cat", if modifier_token is enabled.
83
- - For a new concept an e.g. concept_prompt is "photo of a \<new1\> cat" and "cat" for class_prompt.
84
- - For a style concept, use "painting in the style of \<new1\> art" for concept_prompt and "art" for class_prompt.
 
 
85
  ''')
86
  with gr.Box():
87
  gr.Markdown('Training Parameters')
88
  num_training_steps = gr.Number(
89
  label='Number of Training Steps', value=1000, precision=0)
90
  learning_rate = gr.Number(label='Learning Rate', value=0.00001)
91
- train_text_encoder = gr.Checkbox(label='Train Text Encoder',
92
- value=False)
93
- modifier_token = gr.Checkbox(label='modifier token',
94
- value=True)
95
  batch_size = gr.Number(
96
  label='batch_size', value=1, precision=0)
97
- gradient_accumulation = gr.Number(
98
- label='Number of Gradient Accumulation',
99
- value=1,
100
- precision=0)
101
  with gr.Row():
102
  use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
103
  gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
 
 
 
 
 
 
 
104
  gr.Markdown('''
105
  - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
106
- - Our results in the paper are with the above batch-size of 2 and 2 GPUs.
107
  - Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
108
- - If "Train Text Encoder", disable "modifier token".
109
  - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
110
  ''')
111
 
@@ -136,7 +151,8 @@ def create_training_demo(trainer: Trainer,
136
  gradient_accumulation,
137
  batch_size,
138
  use_8bit_adam,
139
- gradient_checkpointing
 
140
  ],
141
  outputs=[
142
  training_status,
@@ -174,6 +190,10 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
174
  value='CompVis/stable-diffusion-v1-4',
175
  label='Base Model',
176
  visible=True)
 
 
 
 
177
  reload_button = gr.Button('Reload Weight List')
178
  weight_name = gr.Dropdown(choices=find_weight_files(),
179
  value='custom-diffusion-models/cat.bin',
@@ -214,6 +234,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
214
  gr.Markdown('''
215
  - Models with names starting with "custom-diffusion-models/" are the pretrained models provided in the [original repo](https://github.com/adobe-research/custom-diffusion), and the ones with names starting with "results/delta.bin" are your trained models.
216
  - After training, you can press "Reload Weight List" button to load your trained model names.
 
217
  ''')
218
  with gr.Column():
219
  result = gr.Image(label='Result')
@@ -231,6 +252,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
231
  guidance_scale,
232
  eta,
233
  batch_size,
 
234
  ],
235
  outputs=result,
236
  queue=False)
@@ -244,6 +266,7 @@ def create_inference_demo(pipe: InferencePipeline) -> gr.Blocks:
244
  guidance_scale,
245
  eta,
246
  batch_size,
 
247
  ],
248
  outputs=result,
249
  queue=False)
@@ -282,6 +305,7 @@ with gr.Blocks(css='style.css') as demo:
282
 
283
  gr.Markdown(TITLE)
284
  gr.Markdown(DESCRIPTION)
 
285
 
286
  with gr.Tabs():
287
  with gr.TabItem('Train'):
 
22
  It is recommended to upgrade to GPU in Settings after duplicating this space to use it.
23
  <a href="https://huggingface.co/spaces/nupurkmr9/custom-diffusion?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
24
  '''
25
+ DETAILDESCRIPTION='''
26
+ Custom Diffusion allows you to fine-tune text-to-image diffusion models, such as Stable Diffusion, given a few images of a new concept (~4-20).
27
+ We fine-tune only a subset of model parameters, namely key and value projection matrices, in the cross-attention layers and the modifier token used to represent the object.
28
+ This also reduces the extra storage for each additional concept to 75MB.
29
+ Our method further allows you to use a combination of concepts. Demo for multiple concepts will be added soon.
30
+ <center>
31
+ <img src="https://huggingface.co/spaces/nupurkmr9/custom-diffusion/resolve/main/method.jpg" width="600" align="center" >
32
+ </center>
33
+ '''
34
 
35
  ORIGINAL_SPACE_ID = 'nupurkmr9/custom-diffusion'
36
  SPACE_ID = os.getenv('SPACE_ID', ORIGINAL_SPACE_ID)
 
83
  with gr.Box():
84
  gr.Markdown('Training Data')
85
  concept_images = gr.Files(label='Images for your concept')
86
+ with gr.Row():
87
+ class_prompt = gr.Textbox(label='Class Prompt',
 
88
  max_lines=1, placeholder='Example: "cat"')
89
+ with gr.Column():
90
+ modifier_token = gr.Checkbox(label='modifier token',
91
+ value=True)
92
+ train_text_encoder = gr.Checkbox(label='Train Text Encoder',
93
+ value=False)
94
+ concept_prompt = gr.Textbox(label='Concept Prompt',
95
+ max_lines=1, placeholder='Example: "photo of a \<new1\> cat"')
96
  gr.Markdown('''
97
+ - We use "\<new1\>" modifier token in front of the concept, e.g., "\<new1\> cat". By default modifier_token is enabled.
98
+ - If "Train Text Encoder", disable "modifier token" and use any unique text to describe the concept e.g. "ktn cat".
99
+ - For a new concept an e.g. concept prompt is "photo of a \<new1\> cat" and "cat" for class prompt.
100
+ - For a style concept, use "painting in the style of \<new1\> art" for concept prompt and "art" for class prompt.
101
+ - Class prompt should be the object category.
102
  ''')
103
  with gr.Box():
104
  gr.Markdown('Training Parameters')
105
  num_training_steps = gr.Number(
106
  label='Number of Training Steps', value=1000, precision=0)
107
  learning_rate = gr.Number(label='Learning Rate', value=0.00001)
 
 
 
 
108
  batch_size = gr.Number(
109
  label='batch_size', value=1, precision=0)
 
 
 
 
110
  with gr.Row():
111
  use_8bit_adam = gr.Checkbox(label='Use 8bit Adam', value=True)
112
  gradient_checkpointing = gr.Checkbox(label='Enable gradient checkpointing', value=False)
113
+ with gr.Accordion('Other Parameters', open=False):
114
+ gradient_accumulation = gr.Number(
115
+ label='Number of Gradient Accumulation',
116
+ value=1,
117
+ precision=0)
118
+ gen_images = gr.Checkbox(label='Generated images as regularization',
119
+ value=False)
120
  gr.Markdown('''
121
  - It will take about ~10 minutes to train for 1000 steps and ~21GB on a 3090 GPU.
122
+ - Our results in the paper are trained with batch-size 4 (8 including class regularization samples).
123
  - Enable gradient checkpointing for lower memory requirements (~14GB) at the expense of slower backward pass.
 
124
  - Note that your trained models will be deleted when the second training is started. You can upload your trained model in the "Upload" tab.
125
  ''')
126
 
 
151
  gradient_accumulation,
152
  batch_size,
153
  use_8bit_adam,
154
+ gradient_checkpointing,
155
+ gen_images
156
  ],
157
  outputs=[
158
  training_status,
 
190
  value='CompVis/stable-diffusion-v1-4',
191
  label='Base Model',
192
  visible=True)
193
+ resolution = gr.Dropdown(choices=[512, 768],
194
+ value=512,
195
+ label='Resolution',
196
+ visible=True)
197
  reload_button = gr.Button('Reload Weight List')
198
  weight_name = gr.Dropdown(choices=find_weight_files(),
199
  value='custom-diffusion-models/cat.bin',
 
234
  gr.Markdown('''
235
  - Models with names starting with "custom-diffusion-models/" are the pretrained models provided in the [original repo](https://github.com/adobe-research/custom-diffusion), and the ones with names starting with "results/delta.bin" are your trained models.
236
  - After training, you can press "Reload Weight List" button to load your trained model names.
237
+ - Change default batch-size and steps for faster sampling.
238
  ''')
239
  with gr.Column():
240
  result = gr.Image(label='Result')
 
252
  guidance_scale,
253
  eta,
254
  batch_size,
255
+ resolution
256
  ],
257
  outputs=result,
258
  queue=False)
 
266
  guidance_scale,
267
  eta,
268
  batch_size,
269
+ resolution
270
  ],
271
  outputs=result,
272
  queue=False)
 
305
 
306
  gr.Markdown(TITLE)
307
  gr.Markdown(DESCRIPTION)
308
+ gr.Markdown(DETAILDESCRIPTION)
309
 
310
  with gr.Tabs():
311
  with gr.TabItem('Train'):
inference.py CHANGED
@@ -61,6 +61,7 @@ class InferencePipeline:
61
  guidance_scale: float,
62
  eta: float,
63
  batch_size: int,
 
64
  ) -> PIL.Image.Image:
65
  if not torch.cuda.is_available():
66
  raise gr.Error('CUDA is not available.')
@@ -71,6 +72,7 @@ class InferencePipeline:
71
  out = self.pipe([prompt]*batch_size,
72
  num_inference_steps=n_steps,
73
  guidance_scale=guidance_scale,
 
74
  eta = eta,
75
  generator=generator) # type: ignore
76
  out = out.images
 
61
  guidance_scale: float,
62
  eta: float,
63
  batch_size: int,
64
+ resolution: int,
65
  ) -> PIL.Image.Image:
66
  if not torch.cuda.is_available():
67
  raise gr.Error('CUDA is not available.')
 
72
  out = self.pipe([prompt]*batch_size,
73
  num_inference_steps=n_steps,
74
  guidance_scale=guidance_scale,
75
+ height=resolution, width=resolution,
76
  eta = eta,
77
  generator=generator) # type: ignore
78
  out = out.images