zeyofu commited on
Commit
3af7a55
1 Parent(s): 2ca4b56

dreambooth and custom diffusion upload

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. custom-diffusion/barn+dog-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  2. custom-diffusion/barn+dog-sdv4/configs/lightning.yaml +12 -0
  3. custom-diffusion/barn+dog-sdv4/configs/project.yaml +100 -0
  4. custom-diffusion/cat+chair-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  5. custom-diffusion/cat+chair-sdv4/configs/lightning.yaml +12 -0
  6. custom-diffusion/cat+chair-sdv4/configs/project.yaml +100 -0
  7. custom-diffusion/dog+car-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  8. custom-diffusion/dog+car-sdv4/configs/lightning.yaml +12 -0
  9. custom-diffusion/dog+car-sdv4/configs/project.yaml +100 -0
  10. custom-diffusion/dog+table-sdv4/configs/lightning.yaml +12 -0
  11. custom-diffusion/dog+table-sdv4/configs/project.yaml +100 -0
  12. custom-diffusion/flower+teddybear-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  13. custom-diffusion/flower+teddybear-sdv4/configs/lightning.yaml +12 -0
  14. custom-diffusion/flower+teddybear-sdv4/configs/project.yaml +100 -0
  15. custom-diffusion/sd-v1-4.ckpt +3 -0
  16. custom-diffusion/table+chair-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  17. custom-diffusion/table+chair-sdv4/configs/lightning.yaml +12 -0
  18. custom-diffusion/table+chair-sdv4/configs/project.yaml +100 -0
  19. custom-diffusion/teddybear+barn-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  20. custom-diffusion/teddybear+barn-sdv4/configs/lightning.yaml +12 -0
  21. custom-diffusion/teddybear+barn-sdv4/configs/project.yaml +100 -0
  22. custom-diffusion/teddybear+car-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  23. custom-diffusion/teddybear+car-sdv4/configs/lightning.yaml +12 -0
  24. custom-diffusion/teddybear+car-sdv4/configs/project.yaml +100 -0
  25. custom-diffusion/tortoise_plushy+cat-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  26. custom-diffusion/tortoise_plushy+cat-sdv4/configs/lightning.yaml +12 -0
  27. custom-diffusion/tortoise_plushy+cat-sdv4/configs/project.yaml +100 -0
  28. custom-diffusion/tortoise_plushy+table-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  29. custom-diffusion/tortoise_plushy+table-sdv4/configs/lightning.yaml +12 -0
  30. custom-diffusion/tortoise_plushy+table-sdv4/configs/project.yaml +100 -0
  31. custom-diffusion/wooden_pot+cat-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  32. custom-diffusion/wooden_pot+cat-sdv4/configs/lightning.yaml +12 -0
  33. custom-diffusion/wooden_pot+cat-sdv4/configs/project.yaml +100 -0
  34. custom-diffusion/wooden_pot+flower-sdv4/checkpoints/delta_epoch=000004.ckpt +3 -0
  35. custom-diffusion/wooden_pot+flower-sdv4/configs/lightning.yaml +12 -0
  36. custom-diffusion/wooden_pot+flower-sdv4/configs/project.yaml +100 -0
  37. dreambooth/cat+chair/feature_extractor/preprocessor_config.json +28 -0
  38. dreambooth/cat+chair/model_index.json +33 -0
  39. dreambooth/cat+chair/safety_checker/config.json +168 -0
  40. dreambooth/cat+chair/safety_checker/pytorch_model.bin +3 -0
  41. dreambooth/cat+chair/scheduler/scheduler_config.json +15 -0
  42. dreambooth/cat+chair/text_encoder/config.json +25 -0
  43. dreambooth/cat+chair/text_encoder/pytorch_model.bin +3 -0
  44. dreambooth/cat+chair/tokenizer/merges.txt +0 -0
  45. dreambooth/cat+chair/tokenizer/special_tokens_map.json +24 -0
  46. dreambooth/cat+chair/tokenizer/tokenizer_config.json +33 -0
  47. dreambooth/cat+chair/tokenizer/vocab.json +0 -0
  48. dreambooth/cat+chair/unet/config.json +65 -0
  49. dreambooth/cat+chair/unet/diffusion_pytorch_model.bin +3 -0
  50. dreambooth/cat+chair/vae/config.json +31 -0
custom-diffusion/barn+dog-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0372ff0308d045281cb19575ae382a6ba8f26ca0a466c55683d8a9f2bd76e22c
3
+ size 76694594
custom-diffusion/barn+dog-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/barn+dog-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> barn
82
+ reg_caption: real_reg/samples_barn/caption.txt
83
+ datapath: data/barn
84
+ reg_datapath: real_reg/samples_barn/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> dog
90
+ reg_caption: real_reg/samples_dog/caption.txt
91
+ datapath: data/dog
92
+ reg_datapath: real_reg/samples_dog/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> barn
98
+ reg_caption: real_reg/samples_barn/caption.txt
99
+ datapath: data/barn
100
+ reg_datapath: real_reg/samples_barn/images.txt
custom-diffusion/cat+chair-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20b869e0a6787671e7d6c6c37722f32d5c91b14a6f052cff10fa95418873b07b
3
+ size 76694594
custom-diffusion/cat+chair-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/cat+chair-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> cat
82
+ reg_caption: real_reg/samples_cat/caption.txt
83
+ datapath: data/cat
84
+ reg_datapath: real_reg/samples_cat/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> chair
90
+ reg_caption: real_reg/samples_chair/caption.txt
91
+ datapath: data/chair
92
+ reg_datapath: real_reg/samples_chair/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> cat
98
+ reg_caption: real_reg/samples_cat/caption.txt
99
+ datapath: data/cat
100
+ reg_datapath: real_reg/samples_cat/images.txt
custom-diffusion/dog+car-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9e7d7141ab88e13e64000d5beaf18987107983cc3ac492b0d48be76e2e38a3f
3
+ size 76694594
custom-diffusion/dog+car-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/dog+car-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> dog
82
+ reg_caption: real_reg/samples_dog/caption.txt
83
+ datapath: data/dog
84
+ reg_datapath: real_reg/samples_dog/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> car
90
+ reg_caption: real_reg/samples_car/caption.txt
91
+ datapath: data/car
92
+ reg_datapath: real_reg/samples_car/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> dog
98
+ reg_caption: real_reg/samples_dog/caption.txt
99
+ datapath: data/dog
100
+ reg_datapath: real_reg/samples_dog/images.txt
custom-diffusion/dog+table-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/dog+table-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> dog
82
+ reg_caption: real_reg/samples_dog/caption.txt
83
+ datapath: data/dog
84
+ reg_datapath: real_reg/samples_dog/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> table
90
+ reg_caption: real_reg/samples_table/caption.txt
91
+ datapath: data/table
92
+ reg_datapath: real_reg/samples_table/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> dog
98
+ reg_caption: real_reg/samples_dog/caption.txt
99
+ datapath: data/dog
100
+ reg_datapath: real_reg/samples_dog/images.txt
custom-diffusion/flower+teddybear-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d05af2d2ccda3d0999c9faa66bf6c3d02953e1d3905fddfce5be5607f024bb45
3
+ size 76694594
custom-diffusion/flower+teddybear-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/flower+teddybear-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> flower
82
+ reg_caption: real_reg/samples_flower/caption.txt
83
+ datapath: data/flower
84
+ reg_datapath: real_reg/samples_flower/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> teddybear
90
+ reg_caption: real_reg/samples_teddybear/caption.txt
91
+ datapath: data/teddybear
92
+ reg_datapath: real_reg/samples_teddybear/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> flower
98
+ reg_caption: real_reg/samples_flower/caption.txt
99
+ datapath: data/flower
100
+ reg_datapath: real_reg/samples_flower/images.txt
custom-diffusion/sd-v1-4.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4efff1e174c627256e44ec2991ba279b3816e364b49f9be2abc0b3ff3f8556
3
+ size 4265380512
custom-diffusion/table+chair-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1409a0f6601841298fd4b6da9db8190ee81575e19debf9f6ee36eca95f499d3
3
+ size 76694594
custom-diffusion/table+chair-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/table+chair-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> table
82
+ reg_caption: real_reg/samples_table/caption.txt
83
+ datapath: data/table
84
+ reg_datapath: real_reg/samples_table/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> chair
90
+ reg_caption: real_reg/samples_chair/caption.txt
91
+ datapath: data/chair
92
+ reg_datapath: real_reg/samples_chair/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> table
98
+ reg_caption: real_reg/samples_table/caption.txt
99
+ datapath: data/table
100
+ reg_datapath: real_reg/samples_table/images.txt
custom-diffusion/teddybear+barn-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11b477068cbe5d6786e32ac89e4f1aafb9dc5e7a9e790af3fe1120865021fd4
3
+ size 76694594
custom-diffusion/teddybear+barn-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/teddybear+barn-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> teddybear
82
+ reg_caption: real_reg/samples_teddybear/caption.txt
83
+ datapath: data/teddybear
84
+ reg_datapath: real_reg/samples_teddybear/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> barn
90
+ reg_caption: real_reg/samples_barn/caption.txt
91
+ datapath: data/barn
92
+ reg_datapath: real_reg/samples_barn/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> teddybear
98
+ reg_caption: real_reg/samples_teddybear/caption.txt
99
+ datapath: data/teddybear
100
+ reg_datapath: real_reg/samples_teddybear/images.txt
custom-diffusion/teddybear+car-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e407963aabdabd7b849be4d590436d15ccb39104e0ce12b46157cdc5eda7776
3
+ size 76694594
custom-diffusion/teddybear+car-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/teddybear+car-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> teddybear
82
+ reg_caption: real_reg/samples_teddybear/caption.txt
83
+ datapath: data/teddybear
84
+ reg_datapath: real_reg/samples_teddybear/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> car
90
+ reg_caption: real_reg/samples_car/caption.txt
91
+ datapath: data/car
92
+ reg_datapath: real_reg/samples_car/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> teddybear
98
+ reg_caption: real_reg/samples_teddybear/caption.txt
99
+ datapath: data/teddybear
100
+ reg_datapath: real_reg/samples_teddybear/images.txt
custom-diffusion/tortoise_plushy+cat-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b42e3e6098721eab1de81c7a57988dae1e25b0e80a63e5a8aafc717f1a1469c
3
+ size 76694594
custom-diffusion/tortoise_plushy+cat-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/tortoise_plushy+cat-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> tortoise plushy
82
+ reg_caption: real_reg/samples_tortoise_plushy/caption.txt
83
+ datapath: data/tortoise_plushy
84
+ reg_datapath: real_reg/samples_tortoise_plushy/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> cat
90
+ reg_caption: real_reg/samples_cat/caption.txt
91
+ datapath: data/cat
92
+ reg_datapath: real_reg/samples_cat/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> tortoise plushy
98
+ reg_caption: real_reg/samples_tortoise_plushy/caption.txt
99
+ datapath: data/tortoise_plushy
100
+ reg_datapath: real_reg/samples_tortoise_plushy/images.txt
custom-diffusion/tortoise_plushy+table-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7d03e07edbb8373f3f1355a58479dfe1312ead51aecede686ea0446b282c9b
3
+ size 76694594
custom-diffusion/tortoise_plushy+table-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/tortoise_plushy+table-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> tortoise plushy
82
+ reg_caption: real_reg/samples_tortoise_plushy/caption.txt
83
+ datapath: data/tortoise_plushy
84
+ reg_datapath: real_reg/samples_tortoise_plushy/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> table
90
+ reg_caption: real_reg/samples_table/caption.txt
91
+ datapath: data/table
92
+ reg_datapath: real_reg/samples_table/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> tortoise plushy
98
+ reg_caption: real_reg/samples_tortoise_plushy/caption.txt
99
+ datapath: data/tortoise_plushy
100
+ reg_datapath: real_reg/samples_tortoise_plushy/images.txt
custom-diffusion/wooden_pot+cat-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f2d8c94b22848c3feb695192b43fc7fa2ce7ded7f368097b4c06ed89c96e7d
3
+ size 76694594
custom-diffusion/wooden_pot+cat-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/wooden_pot+cat-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> wooden pot
82
+ reg_caption: real_reg/samples_wooden_pot/caption.txt
83
+ datapath: data/wooden_pot
84
+ reg_datapath: real_reg/samples_wooden_pot/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> cat
90
+ reg_caption: real_reg/samples_cat/caption.txt
91
+ datapath: data/cat
92
+ reg_datapath: real_reg/samples_cat/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> wooden pot
98
+ reg_caption: real_reg/samples_wooden_pot/caption.txt
99
+ datapath: data/wooden_pot
100
+ reg_datapath: real_reg/samples_wooden_pot/images.txt
custom-diffusion/wooden_pot+flower-sdv4/checkpoints/delta_epoch=000004.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d49f760131043be0d0aedcc1a6de61a87543a1cffee4951a09495ef3f0c4f88
3
+ size 76694594
custom-diffusion/wooden_pot+flower-sdv4/configs/lightning.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lightning:
2
+ callbacks:
3
+ image_logger:
4
+ target: train.ImageLogger
5
+ params:
6
+ batch_frequency: 1000
7
+ max_images: 8
8
+ increase_log_steps: false
9
+ trainer:
10
+ max_steps: 550
11
+ accelerator: ddp
12
+ gpus: 0,1,2,3
custom-diffusion/wooden_pot+flower-sdv4/configs/project.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 1.0e-05
3
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.model.CustomDiffusion
4
+ params:
5
+ linear_start: 0.00085
6
+ linear_end: 0.012
7
+ num_timesteps_cond: 1
8
+ log_every_t: 200
9
+ timesteps: 1000
10
+ first_stage_key: image
11
+ cond_stage_key: caption
12
+ image_size: 64
13
+ channels: 4
14
+ cond_stage_trainable: true
15
+ add_token: true
16
+ freeze_model: crossattn-kv
17
+ conditioning_key: crossattn
18
+ monitor: val/loss_simple_ema
19
+ scale_factor: 0.18215
20
+ use_ema: false
21
+ unet_config:
22
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.modules.diffusionmodules.openaimodel.UNetModel
23
+ params:
24
+ image_size: 64
25
+ in_channels: 4
26
+ out_channels: 4
27
+ model_channels: 320
28
+ attention_resolutions:
29
+ - 4
30
+ - 2
31
+ - 1
32
+ num_res_blocks: 2
33
+ channel_mult:
34
+ - 1
35
+ - 2
36
+ - 4
37
+ - 4
38
+ num_heads: 8
39
+ use_spatial_transformer: true
40
+ transformer_depth: 1
41
+ context_dim: 768
42
+ use_checkpoint: false
43
+ legacy: false
44
+ first_stage_config:
45
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.stable-diffusion.ldm.models.autoencoder.AutoencoderKL
46
+ params:
47
+ embed_dim: 4
48
+ monitor: val/rec_loss
49
+ ddconfig:
50
+ double_z: true
51
+ z_channels: 4
52
+ resolution: 256
53
+ in_channels: 3
54
+ out_ch: 3
55
+ ch: 128
56
+ ch_mult:
57
+ - 1
58
+ - 2
59
+ - 4
60
+ - 4
61
+ num_res_blocks: 2
62
+ attn_resolutions: []
63
+ dropout: 0.0
64
+ lossconfig:
65
+ target: torch.nn.Identity
66
+ cond_stage_config:
67
+ target: src.imagen_hub.pipelines.custom_diffusion.custom_diffusion_src.src.custom_modules.FrozenCLIPEmbedderWrapper
68
+ params:
69
+ modifier_token: <new1>+<new2>
70
+ ckpt_path: null
71
+ data:
72
+ target: train.DataModuleFromConfig
73
+ params:
74
+ batch_size: 2
75
+ num_workers: 4
76
+ wrap: false
77
+ train:
78
+ target: src.finetune_data.MaskBase
79
+ params:
80
+ size: 512
81
+ caption: <new1> wooden pot
82
+ reg_caption: real_reg/samples_wooden_pot/caption.txt
83
+ datapath: data/wooden_pot
84
+ reg_datapath: real_reg/samples_wooden_pot/images.txt
85
+ train2:
86
+ target: src.finetune_data.MaskBase
87
+ params:
88
+ size: 512
89
+ caption: <new2> flower
90
+ reg_caption: real_reg/samples_flower/caption.txt
91
+ datapath: data/flower
92
+ reg_datapath: real_reg/samples_flower/images.txt
93
+ validation:
94
+ target: src.finetune_data.MaskBase
95
+ params:
96
+ size: 512
97
+ caption: <new1> wooden pot
98
+ reg_caption: real_reg/samples_wooden_pot/caption.txt
99
+ datapath: data/wooden_pot
100
+ reg_datapath: real_reg/samples_wooden_pot/images.txt
dreambooth/cat+chair/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPImageProcessor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
dreambooth/cat+chair/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.18.0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPImageProcessor"
7
+ ],
8
+ "requires_safety_checker": true,
9
+ "safety_checker": [
10
+ "stable_diffusion",
11
+ "StableDiffusionSafetyChecker"
12
+ ],
13
+ "scheduler": [
14
+ "diffusers",
15
+ "PNDMScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "transformers",
19
+ "CLIPTextModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "UNet2DConditionModel"
28
+ ],
29
+ "vae": [
30
+ "diffusers",
31
+ "AutoencoderKL"
32
+ ]
33
+ }
dreambooth/cat+chair/safety_checker/config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "133a221b8aa7292a167afc5127cb63fb5005638b",
3
+ "_name_or_path": "/home1/x/xingyuf2/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/safety_checker",
4
+ "architectures": [
5
+ "StableDiffusionSafetyChecker"
6
+ ],
7
+ "initializer_factor": 1.0,
8
+ "logit_scale_init_value": 2.6592,
9
+ "model_type": "clip",
10
+ "projection_dim": 768,
11
+ "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 0,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
+ "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 2,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
+ "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
+ "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "projection_dim": 512,
66
+ "pruned_heads": {},
67
+ "remove_invalid_values": false,
68
+ "repetition_penalty": 1.0,
69
+ "return_dict": true,
70
+ "return_dict_in_generate": false,
71
+ "sep_token_id": null,
72
+ "suppress_tokens": null,
73
+ "task_specific_params": null,
74
+ "temperature": 1.0,
75
+ "tf_legacy_loss": false,
76
+ "tie_encoder_decoder": false,
77
+ "tie_word_embeddings": true,
78
+ "tokenizer_class": null,
79
+ "top_k": 50,
80
+ "top_p": 1.0,
81
+ "torch_dtype": null,
82
+ "torchscript": false,
83
+ "transformers_version": "4.28.0",
84
+ "typical_p": 1.0,
85
+ "use_bfloat16": false,
86
+ "vocab_size": 49408
87
+ },
88
+ "torch_dtype": "float32",
89
+ "transformers_version": null,
90
+ "vision_config": {
91
+ "_name_or_path": "",
92
+ "add_cross_attention": false,
93
+ "architectures": null,
94
+ "attention_dropout": 0.0,
95
+ "bad_words_ids": null,
96
+ "begin_suppress_tokens": null,
97
+ "bos_token_id": null,
98
+ "chunk_size_feed_forward": 0,
99
+ "cross_attention_hidden_size": null,
100
+ "decoder_start_token_id": null,
101
+ "diversity_penalty": 0.0,
102
+ "do_sample": false,
103
+ "dropout": 0.0,
104
+ "early_stopping": false,
105
+ "encoder_no_repeat_ngram_size": 0,
106
+ "eos_token_id": null,
107
+ "exponential_decay_length_penalty": null,
108
+ "finetuning_task": null,
109
+ "forced_bos_token_id": null,
110
+ "forced_eos_token_id": null,
111
+ "hidden_act": "quick_gelu",
112
+ "hidden_size": 1024,
113
+ "id2label": {
114
+ "0": "LABEL_0",
115
+ "1": "LABEL_1"
116
+ },
117
+ "image_size": 224,
118
+ "initializer_factor": 1.0,
119
+ "initializer_range": 0.02,
120
+ "intermediate_size": 4096,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "layer_norm_eps": 1e-05,
128
+ "length_penalty": 1.0,
129
+ "max_length": 20,
130
+ "min_length": 0,
131
+ "model_type": "clip_vision_model",
132
+ "no_repeat_ngram_size": 0,
133
+ "num_attention_heads": 16,
134
+ "num_beam_groups": 1,
135
+ "num_beams": 1,
136
+ "num_channels": 3,
137
+ "num_hidden_layers": 24,
138
+ "num_return_sequences": 1,
139
+ "output_attentions": false,
140
+ "output_hidden_states": false,
141
+ "output_scores": false,
142
+ "pad_token_id": null,
143
+ "patch_size": 14,
144
+ "prefix": null,
145
+ "problem_type": null,
146
+ "projection_dim": 512,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "suppress_tokens": null,
154
+ "task_specific_params": null,
155
+ "temperature": 1.0,
156
+ "tf_legacy_loss": false,
157
+ "tie_encoder_decoder": false,
158
+ "tie_word_embeddings": true,
159
+ "tokenizer_class": null,
160
+ "top_k": 50,
161
+ "top_p": 1.0,
162
+ "torch_dtype": null,
163
+ "torchscript": false,
164
+ "transformers_version": "4.28.0",
165
+ "typical_p": 1.0,
166
+ "use_bfloat16": false
167
+ }
168
+ }
dreambooth/cat+chair/safety_checker/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f5f098f12e0b765dd8f917182dccbea8c87d269265f3e616cf67282c426940
3
+ size 1216067303
dreambooth/cat+chair/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.18.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "timestep_spacing": "leading",
14
+ "trained_betas": null
15
+ }
dreambooth/cat+chair/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home1/x/xingyuf2/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.28.0",
24
+ "vocab_size": 49408
25
+ }
dreambooth/cat+chair/text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:485096afb4482023ec75c414a5373ba300aa91a4cc2ab534074b5748d6b91ee8
3
+ size 492308087
dreambooth/cat+chair/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
dreambooth/cat+chair/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
dreambooth/cat+chair/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 77,
23
+ "pad_token": "<|endoftext|>",
24
+ "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
dreambooth/cat+chair/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
dreambooth/cat+chair/unet/config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.18.0",
4
+ "_name_or_path": "/home1/x/xingyuf2/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/unet",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "block_out_channels": [
11
+ 320,
12
+ 640,
13
+ 1280,
14
+ 1280
15
+ ],
16
+ "center_input_sample": false,
17
+ "class_embed_type": null,
18
+ "class_embeddings_concat": false,
19
+ "conv_in_kernel": 3,
20
+ "conv_out_kernel": 3,
21
+ "cross_attention_dim": 768,
22
+ "cross_attention_norm": null,
23
+ "down_block_types": [
24
+ "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "DownBlock2D"
28
+ ],
29
+ "downsample_padding": 1,
30
+ "dual_cross_attention": false,
31
+ "encoder_hid_dim": null,
32
+ "encoder_hid_dim_type": null,
33
+ "flip_sin_to_cos": true,
34
+ "freq_shift": 0,
35
+ "in_channels": 4,
36
+ "layers_per_block": 2,
37
+ "mid_block_only_cross_attention": null,
38
+ "mid_block_scale_factor": 1,
39
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
40
+ "norm_eps": 1e-05,
41
+ "norm_num_groups": 32,
42
+ "num_attention_heads": null,
43
+ "num_class_embeds": null,
44
+ "only_cross_attention": false,
45
+ "out_channels": 4,
46
+ "projection_class_embeddings_input_dim": null,
47
+ "resnet_out_scale_factor": 1.0,
48
+ "resnet_skip_time_act": false,
49
+ "resnet_time_scale_shift": "default",
50
+ "sample_size": 64,
51
+ "time_cond_proj_dim": null,
52
+ "time_embedding_act_fn": null,
53
+ "time_embedding_dim": null,
54
+ "time_embedding_type": "positional",
55
+ "timestep_post_act": null,
56
+ "transformer_layers_per_block": 1,
57
+ "up_block_types": [
58
+ "UpBlock2D",
59
+ "CrossAttnUpBlock2D",
60
+ "CrossAttnUpBlock2D",
61
+ "CrossAttnUpBlock2D"
62
+ ],
63
+ "upcast_attention": false,
64
+ "use_linear_projection": false
65
+ }
dreambooth/cat+chair/unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c16134d186c043f85eecdbc7d410de06f51d0cbfb668572325d4599c1c12372
3
+ size 3438364325
dreambooth/cat+chair/vae/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.18.0",
4
+ "_name_or_path": "/home1/x/xingyuf2/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "layers_per_block": 2,
21
+ "norm_num_groups": 32,
22
+ "out_channels": 3,
23
+ "sample_size": 512,
24
+ "scaling_factor": 0.18215,
25
+ "up_block_types": [
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D"
30
+ ]
31
+ }