Prgckwb commited on
Commit
7926358
1 Parent(s): a136e6a

:tada: init

Browse files
app.py CHANGED
@@ -41,9 +41,5 @@ if __name__ == "__main__":
41
  outputs=[
42
  gr.Image(label="Image", type="pil"),
43
  ],
44
- cache_examples=True,
45
- examples=[
46
- ['stabilityai/stable-diffusion-3-medium-diffusers', 'A cat holding a sign that says hello world', ''],
47
- ]
48
  )
49
  demo.launch()
 
41
  outputs=[
42
  gr.Image(label="Image", type="pil"),
43
  ],
 
 
 
 
44
  )
45
  demo.launch()
scripts/convert_original_stable_diffusion_to_diffusers.py.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2024 The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """Conversion script for the LDM checkpoints."""
16
+
17
+ import argparse
18
+ import importlib
19
+
20
+ import torch
21
+
22
+ from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt
23
+
24
+
25
+ if __name__ == "__main__":
26
+ parser = argparse.ArgumentParser()
27
+
28
+ parser.add_argument(
29
+ "--checkpoint_path", default=None, type=str, required=True, help="Path to the checkpoint to convert."
30
+ )
31
+ # !wget https://raw.githubusercontent.com/CompVis/stable-diffusion/main/configs/stable-diffusion/v1-inference.yaml
32
+ parser.add_argument(
33
+ "--original_config_file",
34
+ default=None,
35
+ type=str,
36
+ help="The YAML config file corresponding to the original architecture.",
37
+ )
38
+ parser.add_argument(
39
+ "--config_files",
40
+ default=None,
41
+ type=str,
42
+ help="The YAML config file corresponding to the architecture.",
43
+ )
44
+ parser.add_argument(
45
+ "--num_in_channels",
46
+ default=None,
47
+ type=int,
48
+ help="The number of input channels. If `None` number of input channels will be automatically inferred.",
49
+ )
50
+ parser.add_argument(
51
+ "--scheduler_type",
52
+ default="pndm",
53
+ type=str,
54
+ help="Type of scheduler to use. Should be one of ['pndm', 'lms', 'ddim', 'euler', 'euler-ancestral', 'dpm']",
55
+ )
56
+ parser.add_argument(
57
+ "--pipeline_type",
58
+ default=None,
59
+ type=str,
60
+ help=(
61
+ "The pipeline type. One of 'FrozenOpenCLIPEmbedder', 'FrozenCLIPEmbedder', 'PaintByExample'"
62
+ ". If `None` pipeline will be automatically inferred."
63
+ ),
64
+ )
65
+ parser.add_argument(
66
+ "--image_size",
67
+ default=None,
68
+ type=int,
69
+ help=(
70
+ "The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Siffusion v2"
71
+ " Base. Use 768 for Stable Diffusion v2."
72
+ ),
73
+ )
74
+ parser.add_argument(
75
+ "--prediction_type",
76
+ default=None,
77
+ type=str,
78
+ help=(
79
+ "The prediction type that the model was trained on. Use 'epsilon' for Stable Diffusion v1.X and Stable"
80
+ " Diffusion v2 Base. Use 'v_prediction' for Stable Diffusion v2."
81
+ ),
82
+ )
83
+ parser.add_argument(
84
+ "--extract_ema",
85
+ action="store_true",
86
+ help=(
87
+ "Only relevant for checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights"
88
+ " or not. Defaults to `False`. Add `--extract_ema` to extract the EMA weights. EMA weights usually yield"
89
+ " higher quality images for inference. Non-EMA weights are usually better to continue fine-tuning."
90
+ ),
91
+ )
92
+ parser.add_argument(
93
+ "--upcast_attention",
94
+ action="store_true",
95
+ help=(
96
+ "Whether the attention computation should always be upcasted. This is necessary when running stable"
97
+ " diffusion 2.1."
98
+ ),
99
+ )
100
+ parser.add_argument(
101
+ "--from_safetensors",
102
+ action="store_true",
103
+ help="If `--checkpoint_path` is in `safetensors` format, load checkpoint with safetensors instead of PyTorch.",
104
+ )
105
+ parser.add_argument(
106
+ "--to_safetensors",
107
+ action="store_true",
108
+ help="Whether to store pipeline in safetensors format or not.",
109
+ )
110
+ parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.")
111
+ parser.add_argument("--device", type=str, help="Device to use (e.g. cpu, cuda:0, cuda:1, etc.)")
112
+ parser.add_argument(
113
+ "--stable_unclip",
114
+ type=str,
115
+ default=None,
116
+ required=False,
117
+ help="Set if this is a stable unCLIP model. One of 'txt2img' or 'img2img'.",
118
+ )
119
+ parser.add_argument(
120
+ "--stable_unclip_prior",
121
+ type=str,
122
+ default=None,
123
+ required=False,
124
+ help="Set if this is a stable unCLIP txt2img model. Selects which prior to use. If `--stable_unclip` is set to `txt2img`, the karlo prior (https://huggingface.co/kakaobrain/karlo-v1-alpha/tree/main/prior) is selected by default.",
125
+ )
126
+ parser.add_argument(
127
+ "--clip_stats_path",
128
+ type=str,
129
+ help="Path to the clip stats file. Only required if the stable unclip model's config specifies `model.params.noise_aug_config.params.clip_stats_path`.",
130
+ required=False,
131
+ )
132
+ parser.add_argument(
133
+ "--controlnet", action="store_true", default=None, help="Set flag if this is a controlnet checkpoint."
134
+ )
135
+ parser.add_argument("--half", action="store_true", help="Save weights in half precision.")
136
+ parser.add_argument(
137
+ "--vae_path",
138
+ type=str,
139
+ default=None,
140
+ required=False,
141
+ help="Set to a path, hub id to an already converted vae to not convert it again.",
142
+ )
143
+ parser.add_argument(
144
+ "--pipeline_class_name",
145
+ type=str,
146
+ default=None,
147
+ required=False,
148
+ help="Specify the pipeline class name",
149
+ )
150
+
151
+ args = parser.parse_args()
152
+
153
+ if args.pipeline_class_name is not None:
154
+ library = importlib.import_module("diffusers")
155
+ class_obj = getattr(library, args.pipeline_class_name)
156
+ pipeline_class = class_obj
157
+ else:
158
+ pipeline_class = None
159
+
160
+ pipe = download_from_original_stable_diffusion_ckpt(
161
+ checkpoint_path_or_dict=args.checkpoint_path,
162
+ original_config_file=args.original_config_file,
163
+ config_files=args.config_files,
164
+ image_size=args.image_size,
165
+ prediction_type=args.prediction_type,
166
+ model_type=args.pipeline_type,
167
+ extract_ema=args.extract_ema,
168
+ scheduler_type=args.scheduler_type,
169
+ num_in_channels=args.num_in_channels,
170
+ upcast_attention=args.upcast_attention,
171
+ from_safetensors=args.from_safetensors,
172
+ device=args.device,
173
+ stable_unclip=args.stable_unclip,
174
+ stable_unclip_prior=args.stable_unclip_prior,
175
+ clip_stats_path=args.clip_stats_path,
176
+ controlnet=args.controlnet,
177
+ vae_path=args.vae_path,
178
+ pipeline_class=pipeline_class,
179
+ )
180
+
181
+ if args.half:
182
+ pipe.to(dtype=torch.float16)
183
+
184
+ if args.controlnet:
185
+ # only save the controlnet model
186
+ pipe.controlnet.save_pretrained(args.dump_path, safe_serialization=args.to_safetensors)
187
+ else:
188
+ pipe.save_pretrained(args.dump_path, safe_serialization=args.to_safetensors)