diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..803589a89223b8d072dd87372bca567d08ecd6dc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,2 @@
+
+More information on all the CLI arguments and the environment are available on your [`wandb` run page](https://wandb.ai/sayakpaul/diffusion-orpo-lora-sdxl/runs/ufwurjxt).
diff --git a/checkpoint-10000/random_states_0.pkl b/checkpoint-10000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..90d54a79ecd54c667ffddcae29289dbf80ad2a38
--- /dev/null
+++ b/checkpoint-10000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f940256ed22e93714f71f4470f0c87a5adbed1c0097a477b4068c159172c00af
+size 16036
diff --git a/checkpoint-10000/scaler.pt b/checkpoint-10000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..729c0229c4eba48d3474aae4964aef8cf49707f5
--- /dev/null
+++ b/checkpoint-10000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb6f2892df879d4d1d0bbbfed1aecbef62ae82e4c38d251b916716d0fd071bc
+size 988
diff --git a/checkpoint-10000/scheduler.bin b/checkpoint-10000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5aaa470f2207f9d8976656a91358b09a2cd26cbf
--- /dev/null
+++ b/checkpoint-10000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:058b3567396721c95682f3f854f20f26834cd8a8331b207e94c1d1639a736213
+size 1000
diff --git a/checkpoint-10000/unet/config.json b/checkpoint-10000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-10000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-10000/unet/diffusion_pytorch_model.safetensors b/checkpoint-10000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..86a031c8befa3815507c1d09088af354f17d65cb
--- /dev/null
+++ b/checkpoint-10000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e09dd2e09996e45c12e528125e33997d9395233d714101add4377c536f173e92
+size 10270077736
diff --git a/checkpoint-10000/unet_ema/config.json b/checkpoint-10000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e7c8ccb7a544ce2c75bcdf9ee04e41352a595617
--- /dev/null
+++ b/checkpoint-10000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 10000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-10000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-10000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d992191c24b734cf89c3ffece194595ba9358fd2
--- /dev/null
+++ b/checkpoint-10000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d39caa45b29058c0abb740950add40a82681226aafcd1f80b24c325b1006d37
+size 10270077736
diff --git a/checkpoint-12000/random_states_0.pkl b/checkpoint-12000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1a5b9c20d39b331f23b22a3a1ca495cbe0f33139
--- /dev/null
+++ b/checkpoint-12000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7ce144472db32ad176f3d14c0ce8084d4894f8f201cccaf04311545562e9551
+size 16036
diff --git a/checkpoint-12000/scaler.pt b/checkpoint-12000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c11797fd82a96976396da1c1d057874a08fc07
--- /dev/null
+++ b/checkpoint-12000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3cbd3d6b5921bba1d83c6797932b2fd9b13925b772f70941ab48266594d3a1
+size 988
diff --git a/checkpoint-12000/scheduler.bin b/checkpoint-12000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d2819286cb79ca6b02d0371696f34dd012957b5f
--- /dev/null
+++ b/checkpoint-12000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de334306e1e3a73cdc29080b8d5f6c43f5e9ccc84b9bb04e31857a5d79f77124
+size 1000
diff --git a/checkpoint-12000/unet/config.json b/checkpoint-12000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-12000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-12000/unet/diffusion_pytorch_model.safetensors b/checkpoint-12000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f457bcc2d93ed5864b35c042f4f987d8e9121354
--- /dev/null
+++ b/checkpoint-12000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b59f1e4d9506b614d1ff3957c5e7a0c48d287cfcf434a517ca267aec78ea6d3
+size 10270077736
diff --git a/checkpoint-12000/unet_ema/config.json b/checkpoint-12000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..515a8816584153fed113fb27f3418d8dc90b53d8
--- /dev/null
+++ b/checkpoint-12000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 12000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-12000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-12000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c70b417fa9762ff12ff99be0f8327faea19f665d
--- /dev/null
+++ b/checkpoint-12000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ddea35102e7a38650d96f5c61b875280de719b1d885d548931ec53affac1386
+size 10270077736
diff --git a/checkpoint-14000/random_states_0.pkl b/checkpoint-14000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..2d54919e5c3d24da5bc225f071472f9d1a5ac5c1
--- /dev/null
+++ b/checkpoint-14000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28333a059f960aece967aea7e8f85708d39f4d35840ff89e3f48b4fad5eaf2a5
+size 16036
diff --git a/checkpoint-14000/scaler.pt b/checkpoint-14000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44b05b0e9f6baed7b36823ea8a8f00f03c11db9b
--- /dev/null
+++ b/checkpoint-14000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41f0b47b81dc991e8410a092173eefc65f4ea8154da3cbf86db639ed7c34a92b
+size 988
diff --git a/checkpoint-14000/scheduler.bin b/checkpoint-14000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bc54e70cd149733fe0d6e393066777bbd87c3e44
--- /dev/null
+++ b/checkpoint-14000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72c6331f25ec85c983e8a8bea63d7dee57c667ddef968bad427b7c49bd3ce8f3
+size 1000
diff --git a/checkpoint-14000/unet/config.json b/checkpoint-14000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-14000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-14000/unet/diffusion_pytorch_model.safetensors b/checkpoint-14000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2e4c81f17ad2f7b29e461e285de8a672cab6d4c
--- /dev/null
+++ b/checkpoint-14000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9126dc048de9e12f64c52b325664d15163e631b97546fef6a3851d3300eb6f5
+size 10270077736
diff --git a/checkpoint-14000/unet_ema/config.json b/checkpoint-14000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a18386ab7baec8440025856369c425eec89385ba
--- /dev/null
+++ b/checkpoint-14000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 14000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-14000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-14000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6c2703f6c5c12ac20fd220f3f92f0f36ef7b5979
--- /dev/null
+++ b/checkpoint-14000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35426dbe0fd0d7c35b44234bddd8ecf8a48b5c9a1a87825f0d86afec51851f88
+size 10270077736
diff --git a/checkpoint-16000/random_states_0.pkl b/checkpoint-16000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..675f7237c706ce7e54ca56e2cdfac6c90bd47f32
--- /dev/null
+++ b/checkpoint-16000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e70a601d707cf791e2a0f976b1a7030114b4bc6800c3b83035a239638944452
+size 16036
diff --git a/checkpoint-16000/scaler.pt b/checkpoint-16000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b451fe5a99d7db126166e8d680ced1ab0a87b2db
--- /dev/null
+++ b/checkpoint-16000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c971bb373e16e4ea77e1df5f0317d6816e80f243bad571d5be94e15a4165df0
+size 988
diff --git a/checkpoint-16000/scheduler.bin b/checkpoint-16000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2f246251af23b5489395c8fde5cdc7257b1206fd
--- /dev/null
+++ b/checkpoint-16000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fc3765769a1f03f5fb94d00529373577df8f6e56e6f9648c161b9a15c94d6a2
+size 1000
diff --git a/checkpoint-16000/unet/config.json b/checkpoint-16000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-16000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-16000/unet/diffusion_pytorch_model.safetensors b/checkpoint-16000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da1be671b68f50c1858d323e1418fbde661b0caa
--- /dev/null
+++ b/checkpoint-16000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f41b29b5f265e72522684aa19feb400da9c1155ad8e9513ac2fc769a6b0d537a
+size 10270077736
diff --git a/checkpoint-16000/unet_ema/config.json b/checkpoint-16000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf9b17719838863ff83805e1d9e9d77c93ff329d
--- /dev/null
+++ b/checkpoint-16000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 16000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-16000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-16000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4c1e7a855b3fcaa2038423919f10f3a3edadcc7
--- /dev/null
+++ b/checkpoint-16000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02757979bb21a56aceaca9a0fe35e6d5ca97d64c4dd71ac29b697a462756b637
+size 10270077736
diff --git a/checkpoint-18000/random_states_0.pkl b/checkpoint-18000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..34437584229e4e31add9d030815c9ff5026221e5
--- /dev/null
+++ b/checkpoint-18000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2af31962dbb7ee0c1d2be90ee02bec38fed03c1ce0ea5b5dd672231b1130fd34
+size 16036
diff --git a/checkpoint-18000/scaler.pt b/checkpoint-18000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d087eff9773014b7600d6017cf64ea51f71749b2
--- /dev/null
+++ b/checkpoint-18000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:502381426fee9e2c2df19dda15d6fdd2481884e155ff1a72e8bcfb0d04bed346
+size 988
diff --git a/checkpoint-18000/scheduler.bin b/checkpoint-18000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1b43e0f6b7fec4288e330611fb330fb901757922
--- /dev/null
+++ b/checkpoint-18000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67ae841aaf62a9cedc1561c8c8e1b78965b77f71e767e2feeae14a19cf3ead24
+size 1000
diff --git a/checkpoint-18000/unet/config.json b/checkpoint-18000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-18000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-18000/unet/diffusion_pytorch_model.safetensors b/checkpoint-18000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9ba77b3fd5b6bf8c00d6d51afe9d8c8c782235c5
--- /dev/null
+++ b/checkpoint-18000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9accedc18ebdde3fdd844dbee08f509d513ed873b6719b91cf333c9c734a4c98
+size 10270077736
diff --git a/checkpoint-18000/unet_ema/config.json b/checkpoint-18000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..edf9065fe068d61b1086298ce525d6c345c98a45
--- /dev/null
+++ b/checkpoint-18000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 18000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-18000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-18000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..55658759653676c1ca45e7d77d5f6e26f7469cf2
--- /dev/null
+++ b/checkpoint-18000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbddcee9b37cb49123c0e5bd3a9a951c6f5f3891bca4e52ef94a3cc748eb55b6
+size 10270077736
diff --git a/checkpoint-2000/random_states_0.pkl b/checkpoint-2000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..dc46dd21f69b00c23bd92feb86ae1bf5ad8c258b
--- /dev/null
+++ b/checkpoint-2000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27df19f9a5dfa359844ab31f1a2982dd4a349cbf9268b0b010f3cc0a9aebd034
+size 16036
diff --git a/checkpoint-2000/scaler.pt b/checkpoint-2000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99282b0e304ef68a38f5a2dd4a6ac123700f2f53
--- /dev/null
+++ b/checkpoint-2000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c50a9cebe5d66d453d25b140738bff479749ac03e0a43597d8776bc22f6ed0c
+size 988
diff --git a/checkpoint-2000/scheduler.bin b/checkpoint-2000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6ac110357fc7ce3ad0467e9b282506fe6b51929b
--- /dev/null
+++ b/checkpoint-2000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96db225432c9cbb18c45f9efe049c9bd8eaa10d14cc60d92268077c22253061a
+size 1000
diff --git a/checkpoint-2000/unet/config.json b/checkpoint-2000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-2000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-2000/unet/diffusion_pytorch_model.safetensors b/checkpoint-2000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f37f40b0f1739e716b8a0e9f0d8563400b4b2b8
--- /dev/null
+++ b/checkpoint-2000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39324d46adbb76110d854d6dfb6749039d9a5306d064cb9420a445649b12e722
+size 10270077736
diff --git a/checkpoint-2000/unet_ema/config.json b/checkpoint-2000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e8028fabcb37587b4ed70cc6edd481808361d5f6
--- /dev/null
+++ b/checkpoint-2000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 2000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..638d060bcc90cd859b66907320e0a85c4fa887f3
--- /dev/null
+++ b/checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1df1a1a9a723f3a7d27ae7ea8dc1b5e752895019d7279988933fae796623020
+size 10270077736
diff --git a/checkpoint-20000/random_states_0.pkl b/checkpoint-20000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..02a957a3b818e49bab05d6328f1b66fad6f47968
--- /dev/null
+++ b/checkpoint-20000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:644d0e715f62d2924d8e24e3bd791e304232572917bb16edd2a0d574d022ebf5
+size 16036
diff --git a/checkpoint-20000/scaler.pt b/checkpoint-20000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f4fec1c6190b2639db7e2afcbff27bcae94a206
--- /dev/null
+++ b/checkpoint-20000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a785c590d613f6236be768bf483064a8efa62903cebd25912b277cff0f01f1de
+size 988
diff --git a/checkpoint-20000/scheduler.bin b/checkpoint-20000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a1d22678dc5cbeab4c32b4bd353d9b2eab74e12a
--- /dev/null
+++ b/checkpoint-20000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a23387f2a388acc1a12a024990359fe75b09c636c5bc96db1314af1151677f
+size 1000
diff --git a/checkpoint-20000/unet/config.json b/checkpoint-20000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-20000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-20000/unet/diffusion_pytorch_model.safetensors b/checkpoint-20000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..892a80cef5a982beffb1fd71cd573ce34f965994
--- /dev/null
+++ b/checkpoint-20000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd303e1ae29db172c0ba5792a9faf394000c7ca5f559aa0050a89c199e42b97b
+size 10270077736
diff --git a/checkpoint-20000/unet_ema/config.json b/checkpoint-20000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a16f338841c75cb16e9494d30f488ea11dae97a7
--- /dev/null
+++ b/checkpoint-20000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 20000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-20000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-20000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fd1150d9e7ea3da33b7a19ded6bd54e1a624572f
--- /dev/null
+++ b/checkpoint-20000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad68d84c62731e42e172e7503f48f6122f3e0e54e886110c53fd78210de301e
+size 10270077736
diff --git a/checkpoint-22000/random_states_0.pkl b/checkpoint-22000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..ef6bf04abbd2e571c49982dc6c7d1a3b0348ff48
--- /dev/null
+++ b/checkpoint-22000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c734ad51bd7f4dfc574cd1e4fc324d92d2cfc2688ef87e01b6d18347f1ec411
+size 16036
diff --git a/checkpoint-22000/scaler.pt b/checkpoint-22000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb537fb3a0170793d7342835f8f08d03bfffa87f
--- /dev/null
+++ b/checkpoint-22000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8245c650bb776fbc74ef9cc8fa6754ad29dc5d5603d6efeebd8e5f3709b55fb0
+size 988
diff --git a/checkpoint-22000/scheduler.bin b/checkpoint-22000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f25892ed42d54dc1d5116383f2d19aa8a0cfab03
--- /dev/null
+++ b/checkpoint-22000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f616ea71458fcf54d00f4e041d03f52e0b0d984a719816abc52c0143056d6a7
+size 1000
diff --git a/checkpoint-22000/unet/config.json b/checkpoint-22000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-22000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-22000/unet/diffusion_pytorch_model.safetensors b/checkpoint-22000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e2d0909eb000cb91d965364681955357945ba661
--- /dev/null
+++ b/checkpoint-22000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84cec271da65575b95cc81e85efcd4ae7016d1dece8165d2ae789cf158cda584
+size 10270077736
diff --git a/checkpoint-22000/unet_ema/config.json b/checkpoint-22000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb706d52bd961046e3c37b3b0af3cb87d0cb98da
--- /dev/null
+++ b/checkpoint-22000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 22000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-22000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-22000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f86ca3fc64d081287ad18943810781c6d850bfe7
--- /dev/null
+++ b/checkpoint-22000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c012a969e34b8c8f2b9ee70a2bf2b4bcfcb29917bf9699430abf4e7721b9091
+size 10270077736
diff --git a/checkpoint-24000/random_states_0.pkl b/checkpoint-24000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..3e2ea624db321bb06499c4e98215fd493819cd57
--- /dev/null
+++ b/checkpoint-24000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:908c59b6f2eec06c08ccd5bc999d370dac0327af0e7f97007b9ee76ccc4bf561
+size 16036
diff --git a/checkpoint-24000/scaler.pt b/checkpoint-24000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..761872fc9ab331307e9b6d1b9742b912e49c708e
--- /dev/null
+++ b/checkpoint-24000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a6221269f078c30ab877c5029e7ce827921b676bffe52b66e49f3f3a9f54a4
+size 988
diff --git a/checkpoint-24000/scheduler.bin b/checkpoint-24000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5b1d5150eb74d0ccfbcae62ba45ebbd39a737c6b
--- /dev/null
+++ b/checkpoint-24000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd5401f3efd9ea177ee8bab4422bceb22498f3a8066d36ea8f14f883fa305ae
+size 1000
diff --git a/checkpoint-24000/unet/config.json b/checkpoint-24000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-24000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-24000/unet/diffusion_pytorch_model.safetensors b/checkpoint-24000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16ade7e950455aa74068005d61e1b16d46ab767a
--- /dev/null
+++ b/checkpoint-24000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0747d13cdea92d2ca13bd1bd48cbc1751d476cd6596a15903cc0a71fa9a51d5b
+size 10270077736
diff --git a/checkpoint-24000/unet_ema/config.json b/checkpoint-24000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..cf6b15a650b2c8e8acd5cc8ceed44eeb956a4878
--- /dev/null
+++ b/checkpoint-24000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 24000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-24000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-24000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..632984cf3251db296d0a3f949ace26c311dd43ef
--- /dev/null
+++ b/checkpoint-24000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86927f963b4ba880968b13d0a77b3e10dfda51757d8f6ecd65f0e1fe69a7dd43
+size 10270077736
diff --git a/checkpoint-26000/random_states_0.pkl b/checkpoint-26000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8771a4ea401d41c9ab3e600d2ee2c02ea7ebb105
--- /dev/null
+++ b/checkpoint-26000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6edc74d60122cb48897e695727404869034c87fbe77ea3058f6c2a6629bcedd
+size 16036
diff --git a/checkpoint-26000/scaler.pt b/checkpoint-26000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb9e4732f1c58eb4fe499ce762c47a99acc42753
--- /dev/null
+++ b/checkpoint-26000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40a8b99be8e06402b95405ab9ee26cb4265b661b88c1fc53e844980f9fb0a070
+size 988
diff --git a/checkpoint-26000/scheduler.bin b/checkpoint-26000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9a7b293860290c945b5da42c8cfbf07962b1a18e
--- /dev/null
+++ b/checkpoint-26000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:767865188e26758a7ebf8b0517fd4a563140230453a9d82270374e7a5e81a6fa
+size 1000
diff --git a/checkpoint-26000/unet/config.json b/checkpoint-26000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-26000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-26000/unet/diffusion_pytorch_model.safetensors b/checkpoint-26000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..965d0ab123b896cacd5d2423ebf5b4f34b9d0538
--- /dev/null
+++ b/checkpoint-26000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e0f3a0ae8d33fa94c0856748a1c4177b782489f064acce660d4e5bc6b286e59
+size 10270077736
diff --git a/checkpoint-26000/unet_ema/config.json b/checkpoint-26000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..91c8ef225c528d6ac9d5abdea9b28d18fdf78ae6
--- /dev/null
+++ b/checkpoint-26000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 26000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-26000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-26000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..11309e0b86feaf76559ccc0a7d45a7649676f1d9
--- /dev/null
+++ b/checkpoint-26000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bdaeae2251211b873f86982749776abfa79ca4646d48207986c25088199504f
+size 10270077736
diff --git a/checkpoint-28000/random_states_0.pkl b/checkpoint-28000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..25d344b3f2d113f667b8f00afe2b9fc77e20d707
--- /dev/null
+++ b/checkpoint-28000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d79ddb2b606123f9aa0e9242366609a30b9aceea80345c3ff277f5310bfbc5f4
+size 16036
diff --git a/checkpoint-28000/scaler.pt b/checkpoint-28000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c080cb9517d209eaadfa30558a1d71a67a23d54
--- /dev/null
+++ b/checkpoint-28000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:019221f3192f4927c921b123577a1666f9b9f4c37e86b8641ed6cf124537ad31
+size 988
diff --git a/checkpoint-28000/scheduler.bin b/checkpoint-28000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..72f43dbe0a33f03887dc39a48e0b4fe31612f58a
--- /dev/null
+++ b/checkpoint-28000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11cd26f68dfe01e8f1eb1308d268338b3e47cabeb8360cebbee6de64521a3970
+size 1000
diff --git a/checkpoint-28000/unet/config.json b/checkpoint-28000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-28000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-28000/unet/diffusion_pytorch_model.safetensors b/checkpoint-28000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d49d300d0b504b1cb55e19cee079e4e16f4efa51
--- /dev/null
+++ b/checkpoint-28000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1acec8d1639f9ae7d1c92275753e3dbac2d1be2e283f4133040d90cd6dfa397
+size 10270077736
diff --git a/checkpoint-28000/unet_ema/config.json b/checkpoint-28000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..62a85eb9437dea80048bee353c528de5634efd27
--- /dev/null
+++ b/checkpoint-28000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 28000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-28000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-28000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6db96eeaadfad7c5bf97ac7b5a314b88436a473
--- /dev/null
+++ b/checkpoint-28000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f279dfa1725e452331152a5755d103099bd7d8e8d76dbdbc382ab0eddcd76684
+size 10270077736
diff --git a/checkpoint-30000/random_states_0.pkl b/checkpoint-30000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e131bbf898990886ee192e0ab1b797b1f1c1f4d0
--- /dev/null
+++ b/checkpoint-30000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5e5fedd3608bafae6449647868bbb3e15906862a1981420dcfb316f24a3d336
+size 16036
diff --git a/checkpoint-30000/scaler.pt b/checkpoint-30000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e92399446b8e88145a21999e0859706567b0be5
--- /dev/null
+++ b/checkpoint-30000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdbd1c391e8a1dd3ca4828a8376c46491833d0cbfa370000ed3c0af18a2d0305
+size 988
diff --git a/checkpoint-30000/scheduler.bin b/checkpoint-30000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dfe66564880248cf2e90b50caa79ecd87a97c776
--- /dev/null
+++ b/checkpoint-30000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbc3741e222076495fad54a8274e66f0ba809c85ba4d615464a699ae8e207117
+size 1000
diff --git a/checkpoint-30000/unet/config.json b/checkpoint-30000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-30000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-30000/unet/diffusion_pytorch_model.safetensors b/checkpoint-30000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..58d94f3c732603449e184ec7f3a08183533bdf9e
--- /dev/null
+++ b/checkpoint-30000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63994998f7c61bd792cd57453c7056ae5f55690aef5b772d777f74e9f5e91057
+size 10270077736
diff --git a/checkpoint-30000/unet_ema/config.json b/checkpoint-30000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e6ca52f7ebc97631c071c0061cd064fd6e8415f
--- /dev/null
+++ b/checkpoint-30000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 30000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-30000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-30000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..87155d8f2a93c4171399a83f5af0aa0df5a9d13a
--- /dev/null
+++ b/checkpoint-30000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:237fff11c5f4a9e7dcb937aa6b1e7dc86e3abe2cdf157d42710d7b51c2b6910e
+size 10270077736
diff --git a/checkpoint-32000/random_states_0.pkl b/checkpoint-32000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7782e66980a03a6dc1c4b12eb4f5357786f344b0
--- /dev/null
+++ b/checkpoint-32000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82ff6076637bea0361695ac0ec7264e5a94064c4642cdc8b86f68ce3ee5da630
+size 16036
diff --git a/checkpoint-32000/scaler.pt b/checkpoint-32000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..105f6aecf105d0ae6d14dffc9c5f277c23ad05b9
--- /dev/null
+++ b/checkpoint-32000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32fdf2b6d78355fc563b57e779e4539eb74b8cd7df95670e9830e42a79b93bcf
+size 988
diff --git a/checkpoint-32000/scheduler.bin b/checkpoint-32000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1aa7086452849c3f21b809657a377c4d81e7d279
--- /dev/null
+++ b/checkpoint-32000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6cdef7b5b55866ba6d2858045b96f9b6d2cdef99e073fecc35f1afb9acc8187
+size 1000
diff --git a/checkpoint-32000/unet/config.json b/checkpoint-32000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-32000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-32000/unet/diffusion_pytorch_model.safetensors b/checkpoint-32000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9689e8c7874e313d0e0a0042020b8cd6dbe78a51
--- /dev/null
+++ b/checkpoint-32000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:927489740f9c4148fbbee66c584ed4cfdf05bfe4e33bcba8d62768b6169f9be1
+size 10270077736
diff --git a/checkpoint-32000/unet_ema/config.json b/checkpoint-32000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f7ced868a5b42e46b6e43120114f8010421788f
--- /dev/null
+++ b/checkpoint-32000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 32000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-32000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-32000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b556fbc8b8b0d0cbf2542e5b823ea5a7ea49e146
--- /dev/null
+++ b/checkpoint-32000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a71b3999d50664f2a59e512c1d78a8686dc9b9a2790abba6064d7102d2a85cf
+size 10270077736
diff --git a/checkpoint-34000/random_states_0.pkl b/checkpoint-34000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d086b06dd8a81c865cfffb14dbb1734744443bf2
--- /dev/null
+++ b/checkpoint-34000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98514d38e980518bf7835c3c58c9235be8f5b05ef9cacf210bf32ddd27469cea
+size 16036
diff --git a/checkpoint-34000/scaler.pt b/checkpoint-34000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88d1816c83c006f35f1e8901e130ca7f856b4416
--- /dev/null
+++ b/checkpoint-34000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7fb6712d6c9950cf6be9cb7615eb03be50ae523a158eda40d9e873d5a31b608
+size 988
diff --git a/checkpoint-34000/scheduler.bin b/checkpoint-34000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3fd2e78b6c807c675b5459e14ccae845b6f6e790
--- /dev/null
+++ b/checkpoint-34000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d9275f860a196f6f117fb6e3340f48222501ddb0ac21072b72b53c2c232df8e
+size 1000
diff --git a/checkpoint-34000/unet/config.json b/checkpoint-34000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-34000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-34000/unet/diffusion_pytorch_model.safetensors b/checkpoint-34000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..292da4695ea5bc1ab83c25993ec3a9a373a2f83d
--- /dev/null
+++ b/checkpoint-34000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7ea9007e48c1f9c6f45d72ec78e8e2085e7aeaace2f8b256ac46b60e20e5510
+size 10270077736
diff --git a/checkpoint-34000/unet_ema/config.json b/checkpoint-34000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7fa371b2be154c17413c6f3c9486295e8793a528
--- /dev/null
+++ b/checkpoint-34000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 34000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-34000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-34000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d9e9123e4ee444c83128a4ab21514b8f8ac9888
--- /dev/null
+++ b/checkpoint-34000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fea7d4ee2945752896ed6647a4583fc1d3b87bd8c64eca830b2747b48fc538b
+size 10270077736
diff --git a/checkpoint-36000/random_states_0.pkl b/checkpoint-36000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..0d421757c38c3d9dcd22d9a404ceb39a1b3872f0
--- /dev/null
+++ b/checkpoint-36000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e623edc140bb1bda3669906f211349a2de29a35117663b7ef8b73736b31327
+size 16036
diff --git a/checkpoint-36000/scaler.pt b/checkpoint-36000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3182c650d75969c1d2226208fd0734faddd84bd3
--- /dev/null
+++ b/checkpoint-36000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4ae9491585efd3b007ae5453f960cb0a835aab1da0bf378534bcfd4477fbd1c
+size 988
diff --git a/checkpoint-36000/scheduler.bin b/checkpoint-36000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fe5bfb804cad0eb6ac78a920433b6ad523cdcb17
--- /dev/null
+++ b/checkpoint-36000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d6812a820a82b8dcb74e88cd18cd50fe019b70290440404f61fac8ce69aaa8f
+size 1000
diff --git a/checkpoint-36000/unet/config.json b/checkpoint-36000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-36000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-36000/unet/diffusion_pytorch_model.safetensors b/checkpoint-36000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3d737544ed9869fa6c28e3e791105fe8b08fc466
--- /dev/null
+++ b/checkpoint-36000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efcb5cdf99202ec0d2ad2628630b40d108514b55e900069837d61e5559cc03d1
+size 10270077736
diff --git a/checkpoint-36000/unet_ema/config.json b/checkpoint-36000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..09987a25d42df76fb6b5be5635b111cf34ed4b67
--- /dev/null
+++ b/checkpoint-36000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 36000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-36000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-36000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c1f2d8476d09cf95cdab1afa6d1d1dd1e3e6f74d
--- /dev/null
+++ b/checkpoint-36000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f380e9f80e39ce2086d280b5b27ed7c490b3e897e06243defe4b87b09739da7d
+size 10270077736
diff --git a/checkpoint-38000/random_states_0.pkl b/checkpoint-38000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1b58208b29e5416705e9a19c5ee73db734120704
--- /dev/null
+++ b/checkpoint-38000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0947d6cedce5020155446936272bdbbf1f4cc324086438af54ebd64b62110b18
+size 16036
diff --git a/checkpoint-38000/scaler.pt b/checkpoint-38000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb30a67d3b6cd1e499dd9e64eba4b7bc5a2aa8c4
--- /dev/null
+++ b/checkpoint-38000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68330b3f87778d3dd08aacbf39e5bb99640abf67c57c23c42446b59c6804dd85
+size 988
diff --git a/checkpoint-38000/scheduler.bin b/checkpoint-38000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65609e6ad4efb5446914ac9b1fe00a138eb5e330
--- /dev/null
+++ b/checkpoint-38000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8c7ee482335d3aa0965665b4709d61d13c2feb739356dea3c044d09a1e1ccef
+size 1000
diff --git a/checkpoint-38000/unet/config.json b/checkpoint-38000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-38000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-38000/unet/diffusion_pytorch_model.safetensors b/checkpoint-38000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..08b590ac77fc4714aa01e9d24964c33cc94ba842
--- /dev/null
+++ b/checkpoint-38000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b33509ce48db997837ee4d52d0f4fdf8d2af4e7b0fc881575bc56056d05e1c6
+size 10270077736
diff --git a/checkpoint-38000/unet_ema/config.json b/checkpoint-38000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b023f0e6d91a907699db34c0d812229a71ddc78
--- /dev/null
+++ b/checkpoint-38000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 38000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-38000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-38000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f06301db41e300824ed94fc51799ece253e240fa
--- /dev/null
+++ b/checkpoint-38000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3026dbb3d18edba9e2e9949c5aa0f7f2b761dc113a8e0f6edae2ef318a1e0c03
+size 10270077736
diff --git a/checkpoint-4000/random_states_0.pkl b/checkpoint-4000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..2f9eebadd69d28a3d8b85780a2ea5f88296bb09e
--- /dev/null
+++ b/checkpoint-4000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdc3fcc07f2662488e7a4637bc54f2ae40df4973e89d284a727737c892bdb976
+size 16036
diff --git a/checkpoint-4000/scaler.pt b/checkpoint-4000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e3c148798a27b525e5967dae1f764aa32ba855f
--- /dev/null
+++ b/checkpoint-4000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59a4acfb0da74c479080613978839dd3cbb4608fd2c07e764b4c844401d8dd5f
+size 988
diff --git a/checkpoint-4000/scheduler.bin b/checkpoint-4000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d2d3012ebc2074d1ec7a6651c34d5c6b8b07902f
--- /dev/null
+++ b/checkpoint-4000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb3e70bed318cb9719417f7a2fecd01f712e0697d58d6c3783b6cac535f25e03
+size 1000
diff --git a/checkpoint-4000/unet/config.json b/checkpoint-4000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-4000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-4000/unet/diffusion_pytorch_model.safetensors b/checkpoint-4000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5d53638da0d87bc601f80d8d22f8f7ed86c7d2d
--- /dev/null
+++ b/checkpoint-4000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d583473c84fabf2cc56c4cdbb816135d006d22cf3b4d2b0b057e06cde6d4de8
+size 10270077736
diff --git a/checkpoint-4000/unet_ema/config.json b/checkpoint-4000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d67b881927f09959a46b3d1438c7df9360ffeb44
--- /dev/null
+++ b/checkpoint-4000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 4000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-4000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-4000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4692401002a03d65a380bea3e2b83ef59c91ee3c
--- /dev/null
+++ b/checkpoint-4000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf69bddbfe5652e353483abfa3024af311246462d13a390ebd1dd825921301d
+size 10270077736
diff --git a/checkpoint-40000/random_states_0.pkl b/checkpoint-40000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..0457465d270bb134611c4e95b647e85c5a659e40
--- /dev/null
+++ b/checkpoint-40000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:191203c42cc10542c3b077151cc5900a5ab193874d34fc6e17c53f650122ca1a
+size 16036
diff --git a/checkpoint-40000/scaler.pt b/checkpoint-40000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a88f3eb9f452147503e97cb69c6e65e2df3561e9
--- /dev/null
+++ b/checkpoint-40000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:331f1345beea2ab43b35a3a70e3582a3effd33bebf3d570520a6302d61443872
+size 988
diff --git a/checkpoint-40000/scheduler.bin b/checkpoint-40000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fed2692aef17e2557dd25998fd8bb2e77ce918d5
--- /dev/null
+++ b/checkpoint-40000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:381d43144f82399a38bb167ebfc8a64dc6a998f756463ec4203134ee920fe054
+size 1000
diff --git a/checkpoint-40000/unet/config.json b/checkpoint-40000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-40000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-40000/unet/diffusion_pytorch_model.safetensors b/checkpoint-40000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..81116c96a45da1f35a89b80c34e2f403f20a13c3
--- /dev/null
+++ b/checkpoint-40000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1cb262356cfa2abbf0eb225d638ed77bcc556759263b9f3ffacf6c2faf650e4
+size 10270077736
diff --git a/checkpoint-40000/unet_ema/config.json b/checkpoint-40000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0bec0111f7d87882065a25cce3b13638467455e
--- /dev/null
+++ b/checkpoint-40000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 40000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-40000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-40000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a20626d37f5a7fa3b4edcbd43ec47ae95c8dec09
--- /dev/null
+++ b/checkpoint-40000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67aca97f9d78df749bd6449405f78fc344d7346945d4f4e351e0f218af347ade
+size 10270077736
diff --git a/checkpoint-42000/random_states_0.pkl b/checkpoint-42000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4c11abc642a2c44253cbcc4fb75eb062b4c640fd
--- /dev/null
+++ b/checkpoint-42000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b24230ed8dc5db3cb098c9ef6484f9d287e77057630e660129e77f7231374dd6
+size 16036
diff --git a/checkpoint-42000/scaler.pt b/checkpoint-42000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7832427b9389a9165d02ca1bd180cab034c6bc62
--- /dev/null
+++ b/checkpoint-42000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82e186dd47b00982dc455dabe6ebf2d76b6cf98b44233c83c7285a475eba70ee
+size 988
diff --git a/checkpoint-42000/scheduler.bin b/checkpoint-42000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ccff08631f392af41d7cd181a0f90c3088ea50a7
--- /dev/null
+++ b/checkpoint-42000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb7a71886e953cbf0c2e159df28f695ecb7066bf68a2d5a48474dbafdd6f0ba9
+size 1000
diff --git a/checkpoint-42000/unet/config.json b/checkpoint-42000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-42000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-42000/unet/diffusion_pytorch_model.safetensors b/checkpoint-42000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..04f8c728978b28f70124d969f270616feca92ca3
--- /dev/null
+++ b/checkpoint-42000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:920be2ff644f8f62108ded2b1c5bc7630d027492d855b6d3e8bfbde3a9b73981
+size 10270077736
diff --git a/checkpoint-42000/unet_ema/config.json b/checkpoint-42000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..dffcdb911d712fb1d4afe953be25111bbd9f1729
--- /dev/null
+++ b/checkpoint-42000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 42000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-42000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-42000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..646a0f90c0b9f29cda1d4241db86ed0fe553eda7
--- /dev/null
+++ b/checkpoint-42000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8dff930d39111362b3ab33e34fa2418bbe37fe86096b7f0e2f37476e737f5be
+size 10270077736
diff --git a/checkpoint-44000/random_states_0.pkl b/checkpoint-44000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..03770bc4b9d33970b7c68288be9b383ff008cd07
--- /dev/null
+++ b/checkpoint-44000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbff63362d384809bfbb2c4bfe1d8306dc2e502ef76bf59af3e3207ccf908dbc
+size 16036
diff --git a/checkpoint-44000/scaler.pt b/checkpoint-44000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bcfde9302917af15dcbd46d07a6ec09840e08a24
--- /dev/null
+++ b/checkpoint-44000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41ff1b5ae695be6f47b5825b4c8e25f0297a443eaf324e6a2384b28c6819f43a
+size 988
diff --git a/checkpoint-44000/scheduler.bin b/checkpoint-44000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eff8b498a1209c38b5038ab2ff5078c781a5efa6
--- /dev/null
+++ b/checkpoint-44000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3e7925b25019b7930d64021e137ce5b46ae06a386ed2a59533bf812cd742f97
+size 1000
diff --git a/checkpoint-44000/unet/config.json b/checkpoint-44000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-44000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-44000/unet/diffusion_pytorch_model.safetensors b/checkpoint-44000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f0ed3d11eff7ed0042c3d67807800b274393a39
--- /dev/null
+++ b/checkpoint-44000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dac064676945e481e96e8ed2cf35098b15db41a0e1c71c8585b03b707a2b4ec
+size 10270077736
diff --git a/checkpoint-44000/unet_ema/config.json b/checkpoint-44000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..520b4bd6d208d4aa07481c5a774e02b715d9da95
--- /dev/null
+++ b/checkpoint-44000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 44000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-44000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-44000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8d446799523c8ca1f3ab49173ecc21eb0bdd76d5
--- /dev/null
+++ b/checkpoint-44000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:687d8c5ee173dc1556b727307b8e806d119f9b38a9acb7cc928e822d9cb49bf1
+size 10270077736
diff --git a/checkpoint-46000/random_states_0.pkl b/checkpoint-46000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..d70d6c41ce94de85c92f8aadd257a52717144a0b
--- /dev/null
+++ b/checkpoint-46000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4dad874e137548ce1ff21927053c2260726bf3a1a2ec951f175ee6e10e144bb
+size 16036
diff --git a/checkpoint-46000/scaler.pt b/checkpoint-46000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e15b6523eaca492a4e39786967e81918c69c636
--- /dev/null
+++ b/checkpoint-46000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f60a1a3439e0709e5d6eaec6463217b6d608d972850d376a66b95d09b7d00ec
+size 988
diff --git a/checkpoint-46000/scheduler.bin b/checkpoint-46000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6ee3dd3e4122ed2c2c6482fc7b5681bbf005e5ef
--- /dev/null
+++ b/checkpoint-46000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56ccb349f8f4b2aba067efec2a3e1c9544e64393caa18af81ddef697a9d30328
+size 1000
diff --git a/checkpoint-46000/unet/config.json b/checkpoint-46000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-46000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-46000/unet/diffusion_pytorch_model.safetensors b/checkpoint-46000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a59945923fbb888661c7692cb940a548d8830c3c
--- /dev/null
+++ b/checkpoint-46000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45eca296eb73f74c624db283c0fd17d3f7e3bbb76db4cee67cd10c779a7361f9
+size 10270077736
diff --git a/checkpoint-46000/unet_ema/config.json b/checkpoint-46000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..004f05e46689c0f88276d141f00ff73349808ee5
--- /dev/null
+++ b/checkpoint-46000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 46000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-46000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-46000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b30ef3146d662297aa946a1b8baf96aa8a0deeed
--- /dev/null
+++ b/checkpoint-46000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfbeed31da4e9a4e1223df417650c0e4f714047c4d2666c998b81ffd5810532a
+size 10270077736
diff --git a/checkpoint-48000/random_states_0.pkl b/checkpoint-48000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8062f290a6223960daa70fac071e63ef117f4a42
--- /dev/null
+++ b/checkpoint-48000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:001c65f40dcadf560162ea46e69bdbd4f8e4fdcb4a05b0fde2491129c45babfa
+size 16036
diff --git a/checkpoint-48000/scaler.pt b/checkpoint-48000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f87d6888cb92fc06309b9e8cf941d51a80f46fbf
--- /dev/null
+++ b/checkpoint-48000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b18b8adaf0d4c51e06e393813a6c306cf9a38e64c480950ad35be7843ca15a2
+size 988
diff --git a/checkpoint-48000/scheduler.bin b/checkpoint-48000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c6729df7b4ee8b7ed4a9344ee65283a335932000
--- /dev/null
+++ b/checkpoint-48000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb4a1b13800a2715196edf5f49e4fce078bafad8616adc16fb5d9465411ff73
+size 1000
diff --git a/checkpoint-48000/unet/config.json b/checkpoint-48000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-48000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-48000/unet/diffusion_pytorch_model.safetensors b/checkpoint-48000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2c61ae46e88939f019e502b58399ef3ce2410e0d
--- /dev/null
+++ b/checkpoint-48000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c732136545f5da66c7a5f9d497c427c92842b47b1fda2c39c2746279e360f11
+size 10270077736
diff --git a/checkpoint-48000/unet_ema/config.json b/checkpoint-48000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f94c007a919cbf988d80e4db6152ea417d8cf4fb
--- /dev/null
+++ b/checkpoint-48000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 48000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-48000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-48000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..92168eb9642a24f685c67b37cf74ba34b81ac8a6
--- /dev/null
+++ b/checkpoint-48000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a7300a1a910dba10c9f5ba257ab609fd8902772e35807e297eeab030ba312af
+size 10270077736
diff --git a/checkpoint-50000/random_states_0.pkl b/checkpoint-50000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..c8bcdbc5c4d0b0810d3a2d4cc57799351ff13b29
--- /dev/null
+++ b/checkpoint-50000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72f56b7ea73e59f161ec51955a94dbd134229f04f5af057dc46e682d10a256a8
+size 16036
diff --git a/checkpoint-50000/scaler.pt b/checkpoint-50000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c28c787027c41b88e4fc6c0f0b1f057cae0895c3
--- /dev/null
+++ b/checkpoint-50000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bc0afdebdb6fe80e9857016730764ef53add995299874fa03e87b8c771f1632
+size 988
diff --git a/checkpoint-50000/scheduler.bin b/checkpoint-50000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c498dcf28f8e12ecee4a7cf82eedac644794f939
--- /dev/null
+++ b/checkpoint-50000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:655c9e0f92a95ea9c285a12bb30639eab13398126984b3baae8117b71e2ac20c
+size 1000
diff --git a/checkpoint-50000/unet/config.json b/checkpoint-50000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-50000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-50000/unet/diffusion_pytorch_model.safetensors b/checkpoint-50000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a91417b6813350c63298bc69caca3d5a3d182beb
--- /dev/null
+++ b/checkpoint-50000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2a0d5f8abb27e4650c418b9e89ca244452004555d357ac60d678ac3732d7095
+size 10270077736
diff --git a/checkpoint-50000/unet_ema/config.json b/checkpoint-50000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..16b81385eaa9904a6096f816f44298bfba823593
--- /dev/null
+++ b/checkpoint-50000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 50000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-50000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-50000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f2d3b70a523f7a75357847de26ddf1899ff83ae
--- /dev/null
+++ b/checkpoint-50000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af0e56d8f49d228a1068fb3d5da387e618bf467b8c0555dcb64185f82536145f
+size 10270077736
diff --git a/checkpoint-6000/random_states_0.pkl b/checkpoint-6000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..136ec5ca4e0e4d1e05a328aa817117e929a8bc3b
--- /dev/null
+++ b/checkpoint-6000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:830be119ec8b12f3750483828a8885d906eb8490d73a5d688c162d101052bc2a
+size 16036
diff --git a/checkpoint-6000/scaler.pt b/checkpoint-6000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a5b8fd205c430f8e1a85de831dc647e1ce01274
--- /dev/null
+++ b/checkpoint-6000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78bbcb8a64648fc396f2e9449404bed78b99c4d04c8efa56f03e8a62591f09b0
+size 988
diff --git a/checkpoint-6000/scheduler.bin b/checkpoint-6000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bea99e68ca1bee86a3874db9d41bff3d551a7678
--- /dev/null
+++ b/checkpoint-6000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44845d2e7cd6139b225fe064c0c265352d5d3fba81dc619e64e33c24a22b2a1d
+size 1000
diff --git a/checkpoint-6000/unet/config.json b/checkpoint-6000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-6000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-6000/unet/diffusion_pytorch_model.safetensors b/checkpoint-6000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89985bbd63c0feed8ea94f7a96af79bc15c4eca0
--- /dev/null
+++ b/checkpoint-6000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e119cab23ebe0b0d201765a3f9ac55a35f6e42192289a2a4e792c3e71d5e581
+size 10270077736
diff --git a/checkpoint-6000/unet_ema/config.json b/checkpoint-6000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..93ceca396cf0700917a2c8152df029cc2fe028dd
--- /dev/null
+++ b/checkpoint-6000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 6000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-6000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-6000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..20fa2793e6357f01818469caf7d2789cc2d7d9b1
--- /dev/null
+++ b/checkpoint-6000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:294a031e3d4401e5af2ce5b9688ae89457fc5e23e3db5ce199416eb3c016e2c0
+size 10270077736
diff --git a/checkpoint-8000/random_states_0.pkl b/checkpoint-8000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..fe0d1fe53e4860b03b90ff621c6be2b3e7c76f2e
--- /dev/null
+++ b/checkpoint-8000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93bfb39156c96eeeabbc67cfffa6216b00e5c165695a1115b0bff9d303861b66
+size 16036
diff --git a/checkpoint-8000/scaler.pt b/checkpoint-8000/scaler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15e0bd47adc2d90e12538b2df0b959dc7190a51e
--- /dev/null
+++ b/checkpoint-8000/scaler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b702db7b518249c01b8c41b26dce942fa0f27f05dc7df518faed7f7f5ff0218a
+size 988
diff --git a/checkpoint-8000/scheduler.bin b/checkpoint-8000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3cbbac52cc37c42d61943b8f340147b2e4a1434a
--- /dev/null
+++ b/checkpoint-8000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c25183d9bebadf472694fe7f9ca44d9a085807e81d62a076c7efea31e04897bd
+size 1000
diff --git a/checkpoint-8000/unet/config.json b/checkpoint-8000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/checkpoint-8000/unet/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-8000/unet/diffusion_pytorch_model.safetensors b/checkpoint-8000/unet/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97db9864f633baa26068acaee2cb88d0bf0f6348
--- /dev/null
+++ b/checkpoint-8000/unet/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6fc00d3a4a63aedc9b2e7c994678bc760f2450feae335138dd827f824f4b28c
+size 10270077736
diff --git a/checkpoint-8000/unet_ema/config.json b/checkpoint-8000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a9dc29fa7fea18d4f852851ce9a2bb797f6aa40
--- /dev/null
+++ b/checkpoint-8000/unet_ema/config.json
@@ -0,0 +1,80 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "optimization_step": 8000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "update_after_step": 0,
+  "use_ema_warmup": false,
+  "use_linear_projection": true
+}
diff --git a/checkpoint-8000/unet_ema/diffusion_pytorch_model.safetensors b/checkpoint-8000/unet_ema/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d7cdea55dfeed283fed84d0e7bced594165ea537
--- /dev/null
+++ b/checkpoint-8000/unet_ema/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:addde04f31fc6542b580fa63de3bfc3bdd76981088bcbd30b908095d02aa1988
+size 10270077736
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..25bdef6f4c3ebef80c6b674e7878f989e76aaee9
--- /dev/null
+++ b/config.json
@@ -0,0 +1,73 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.28.0.dev0",
+  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
diff --git a/diffusion_pytorch_model.safetensors b/diffusion_pytorch_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f2d3b70a523f7a75357847de26ddf1899ff83ae
--- /dev/null
+++ b/diffusion_pytorch_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af0e56d8f49d228a1068fb3d5da387e618bf467b8c0555dcb64185f82536145f
+size 10270077736