SakanaAI
/

EvoSDXL-JP-v1

aka7774 commited on Apr 23

Commit

a366424

•

1 Parent(s): fa559b1

Use "cpu" in merge, save model

Files changed (1) hide show

evosdxl_jp_v1.py CHANGED Viewed

@@ -15,7 +15,7 @@ from diffusers.loaders import LoraLoaderMixin
 SDXL_REPO = "stabilityai/stable-diffusion-xl-base-1.0"
 JSDXL_REPO = "stabilityai/japanese-stable-diffusion-xl"
 L_REPO = "ByteDance/SDXL-Lightning"
 def load_state_dict(checkpoint_file: Union[str, os.PathLike], device: str = "cpu"):
     file_extension = os.path.basename(checkpoint_file).split(".")[-1]
@@ -104,7 +104,7 @@ def split_conv_attn(weights):
     return {"conv": conv_tensors, "attn": attn_tensors}
-def load_evosdxl_jp(device="cuda") -> StableDiffusionXLPipeline:
     sdxl_weights = split_conv_attn(load_from_pretrained(SDXL_REPO, device=device))
     dpo_weights = split_conv_attn(
         load_from_pretrained(
@@ -172,9 +172,12 @@ def load_evosdxl_jp(device="cuda") -> StableDiffusionXLPipeline:
         [0.023119324530758375, 0.04924981616469831, 0.9276308593045434],
     )
     new_weights = {**new_conv, **new_attn}
-    unet = UNet2DConditionModel.from_config(unet_config).to(device=device)
-    unet.load_state_dict({**new_conv, **new_attn})
     text_encoder = CLIPTextModelWithProjection.from_pretrained(
         JSDXL_REPO, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16"
     )
@@ -199,6 +202,8 @@ def load_evosdxl_jp(device="cuda") -> StableDiffusionXLPipeline:
 if __name__ == "__main__":
     pipe: StableDiffusionXLPipeline = load_evosdxl_jp()
     images = pipe("犬", num_inference_steps=4, guidance_scale=0).images
     images[0].save("out.png")

 SDXL_REPO = "stabilityai/stable-diffusion-xl-base-1.0"
 JSDXL_REPO = "stabilityai/japanese-stable-diffusion-xl"
 L_REPO = "ByteDance/SDXL-Lightning"
+MERGED_FILE = "evosdxl_jp_v1.safetensors"
 def load_state_dict(checkpoint_file: Union[str, os.PathLike], device: str = "cpu"):
     file_extension = os.path.basename(checkpoint_file).split(".")[-1]
     return {"conv": conv_tensors, "attn": attn_tensors}
+def merge_evosdxl_jp(device="cpu") -> StableDiffusionXLPipeline:
     sdxl_weights = split_conv_attn(load_from_pretrained(SDXL_REPO, device=device))
     dpo_weights = split_conv_attn(
         load_from_pretrained(
         [0.023119324530758375, 0.04924981616469831, 0.9276308593045434],
     )
     new_weights = {**new_conv, **new_attn}
+    safetensors.torch.save_file(new_weights, MERGED_FILE)
+def load_evosdxl_jp(device="cuda"):
+    unet_config = UNet2DConditionModel.load_config(SDXL_REPO, subfolder="unet")
+    unet = UNet2DConditionModel.from_config(unet_config).to(device=device)
+    unet.load_state_dict(safetensors.torch.load_file(MERGED_FILE))
     text_encoder = CLIPTextModelWithProjection.from_pretrained(
         JSDXL_REPO, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16"
     )
 if __name__ == "__main__":
+    if not os.path.exists(MERGED_FILE):
+        merge_evosdxl_jp()
     pipe: StableDiffusionXLPipeline = load_evosdxl_jp()
     images = pipe("犬", num_inference_steps=4, guidance_scale=0).images
     images[0].save("out.png")