dataautogpt3 commited on Dec 4, 2024

Commit

172be56

1 Parent(s): 792f7f1

500k

Browse files

Files changed (25) hide show

ComfyUI-test10k.json +0 -486
README.md +6 -59
example.png +2 -2
example2.png +2 -2
example3.png +2 -2
example4.png +2 -2
example5.png +2 -2
model_index.json +42 -0
scheduler/scheduler_config.json +25 -0
text_encoder/config.json +24 -0
sdxl-ztsnr-sigma-10k.safetensors → text_encoder/model.safetensors +2 -2
text_encoder_2/config.json +24 -0
text_encoder_2/model.safetensors +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +30 -0
tokenizer/tokenizer_config.json +30 -0
tokenizer/vocab.json +0 -0
tokenizer_2/merges.txt +0 -0
tokenizer_2/special_tokens_map.json +30 -0
tokenizer_2/tokenizer_config.json +39 -0
tokenizer_2/vocab.json +0 -0
unet/config.json +73 -0
unet/diffusion_pytorch_model.safetensors +3 -0
vae/config.json +38 -0
vae/diffusion_pytorch_model.safetensors +3 -0

ComfyUI-test10k.json DELETED Viewed

@@ -1,486 +0,0 @@
-{
-  "last_node_id": 373,
-  "last_link_id": 982,
-  "nodes": [
-    {
-      "id": 287,
-      "type": "VAEDecode",
-      "pos": {
-        "0": 166,
-        "1": 663
-      },
-      "size": {
-        "0": 210,
-        "1": 46
-      },
-      "flags": {
-        "collapsed": false
-      },
-      "order": 7,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "samples",
-          "type": "LATENT",
-          "link": 943
-        },
-        {
-          "name": "vae",
-          "type": "VAE",
-          "link": 980
-        }
-      ],
-      "outputs": [
-        {
-          "name": "IMAGE",
-          "type": "IMAGE",
-          "links": [
-            708
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "VAEDecode"
-      },
-      "widgets_values": []
-    },
-    {
-      "id": 347,
-      "type": "CLIPTextEncode",
-      "pos": {
-        "0": -461,
-        "1": 965
-      },
-      "size": {
-        "0": 400,
-        "1": 200
-      },
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 978
-        }
-      ],
-      "outputs": [
-        {
-          "name": "CONDITIONING",
-          "type": "CONDITIONING",
-          "links": [
-            938
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPTextEncode"
-      },
-      "widgets_values": [
-        ""
-      ]
-    },
-    {
-      "id": 358,
-      "type": "KSamplerSelect",
-      "pos": {
-        "0": -416,
-        "1": 1228
-      },
-      "size": {
-        "0": 315,
-        "1": 58
-      },
-      "flags": {},
-      "order": 0,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "SAMPLER",
-          "type": "SAMPLER",
-          "links": [
-            941
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "KSamplerSelect"
-      },
-      "widgets_values": [
-        "euler_ancestral"
-      ]
-    },
-    {
-      "id": 365,
-      "type": "BasicScheduler",
-      "pos": {
-        "0": -411,
-        "1": 1332
-      },
-      "size": {
-        "0": 315,
-        "1": 106
-      },
-      "flags": {},
-      "order": 3,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "model",
-          "type": "MODEL",
-          "link": 982
-        }
-      ],
-      "outputs": [
-        {
-          "name": "SIGMAS",
-          "type": "SIGMAS",
-          "links": [
-            955
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "BasicScheduler"
-      },
-      "widgets_values": [
-        "normal",
-        28,
-        1
-      ]
-    },
-    {
-      "id": 371,
-      "type": "EmptyLatentImage",
-      "pos": {
-        "0": -873,
-        "1": 1278
-      },
-      "size": {
-        "0": 315,
-        "1": 106
-      },
-      "flags": {},
-      "order": 1,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "LATENT",
-          "type": "LATENT",
-          "links": [
-            974
-          ]
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "EmptyLatentImage"
-      },
-      "widgets_values": [
-        1024,
-        1024,
-        2
-      ]
-    },
-    {
-      "id": 346,
-      "type": "CLIPTextEncode",
-      "pos": {
-        "0": -449,
-        "1": 696
-      },
-      "size": {
-        "0": 400,
-        "1": 200
-      },
-      "flags": {},
-      "order": 5,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "clip",
-          "type": "CLIP",
-          "link": 979
-        }
-      ],
-      "outputs": [
-        {
-          "name": "CONDITIONING",
-          "type": "CONDITIONING",
-          "links": [
-            939
-          ],
-          "slot_index": 0
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CLIPTextEncode"
-      },
-      "widgets_values": [
-        "Space Marine Meditation | Warhammer 40k Inspired Ambient "
-      ]
-    },
-    {
-      "id": 170,
-      "type": "PreviewImage",
-      "pos": {
-        "0": 681,
-        "1": 724
-      },
-      "size": {
-        "0": 1663.6668701171875,
-        "1": 803.4244384765625
-      },
-      "flags": {},
-      "order": 8,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "images",
-          "type": "IMAGE",
-          "link": 708
-        }
-      ],
-      "outputs": [],
-      "properties": {
-        "Node name for S&R": "PreviewImage"
-      },
-      "widgets_values": []
-    },
-    {
-      "id": 357,
-      "type": "SamplerCustom",
-      "pos": {
-        "0": 17,
-        "1": 880
-      },
-      "size": {
-        "0": 304.5533447265625,
-        "1": 442
-      },
-      "flags": {},
-      "order": 6,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "model",
-          "type": "MODEL",
-          "link": 981
-        },
-        {
-          "name": "positive",
-          "type": "CONDITIONING",
-          "link": 939
-        },
-        {
-          "name": "negative",
-          "type": "CONDITIONING",
-          "link": 938
-        },
-        {
-          "name": "sampler",
-          "type": "SAMPLER",
-          "link": 941
-        },
-        {
-          "name": "sigmas",
-          "type": "SIGMAS",
-          "link": 955
-        },
-        {
-          "name": "latent_image",
-          "type": "LATENT",
-          "link": 974
-        }
-      ],
-      "outputs": [
-        {
-          "name": "output",
-          "type": "LATENT",
-          "links": [
-            943
-          ],
-          "slot_index": 0
-        },
-        {
-          "name": "denoised_output",
-          "type": "LATENT",
-          "links": null
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "SamplerCustom"
-      },
-      "widgets_values": [
-        true,
-        1013633777621875,
-        "randomize",
-        7.5
-      ]
-    },
-    {
-      "id": 373,
-      "type": "CheckpointLoaderSimple",
-      "pos": {
-        "0": -1037.7359619140625,
-        "1": 827.5658569335938
-      },
-      "size": {
-        "0": 315,
-        "1": 98
-      },
-      "flags": {},
-      "order": 2,
-      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {
-          "name": "MODEL",
-          "type": "MODEL",
-          "links": [
-            981,
-            982
-          ],
-          "slot_index": 0
-        },
-        {
-          "name": "CLIP",
-          "type": "CLIP",
-          "links": [
-            978,
-            979
-          ],
-          "slot_index": 1
-        },
-        {
-          "name": "VAE",
-          "type": "VAE",
-          "links": [
-            980
-          ],
-          "slot_index": 2
-        }
-      ],
-      "properties": {
-        "Node name for S&R": "CheckpointLoaderSimple"
-      },
-      "widgets_values": [
-        "SDXL\\sdxl-ztsnr-sigma-10k.safetensors"
-      ]
-    }
-  ],
-  "links": [
-    [
-      708,
-      287,
-      0,
-      170,
-      0,
-      "IMAGE"
-    ],
-    [
-      938,
-      347,
-      0,
-      357,
-      2,
-      "CONDITIONING"
-    ],
-    [
-      939,
-      346,
-      0,
-      357,
-      1,
-      "CONDITIONING"
-    ],
-    [
-      941,
-      358,
-      0,
-      357,
-      3,
-      "SAMPLER"
-    ],
-    [
-      943,
-      357,
-      0,
-      287,
-      0,
-      "LATENT"
-    ],
-    [
-      955,
-      365,
-      0,
-      357,
-      4,
-      "SIGMAS"
-    ],
-    [
-      974,
-      371,
-      0,
-      357,
-      5,
-      "LATENT"
-    ],
-    [
-      978,
-      373,
-      1,
-      347,
-      0,
-      "CLIP"
-    ],
-    [
-      979,
-      373,
-      1,
-      346,
-      0,
-      "CLIP"
-    ],
-    [
-      980,
-      373,
-      2,
-      287,
-      1,
-      "VAE"
-    ],
-    [
-      981,
-      373,
-      0,
-      357,
-      0,
-      "MODEL"
-    ],
-    [
-      982,
-      373,
-      0,
-      365,
-      0,
-      "MODEL"
-    ]
-  ],
-  "groups": [],
-  "config": {},
-  "extra": {
-    "ds": {
-      "scale": 1.0610764609501095,
-      "offset": [
-        1717.2345868209466,
-        -391.21652783223755
-      ]
-    }
-  },
-  "version": 0.4
-}

README.md CHANGED Viewed

@@ -12,7 +12,7 @@ tags:
 - [x] 10k dataset proof of concept (completed)[link](https://huggingface.co/dataautogpt3/ProteusSigma)
-- [ ] 200k+ dataset finetune (in testing/training)
 - [ ] 12M million dataset finetune (planned)
@@ -153,11 +153,9 @@ tags:
 <div class="gallery">
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example.png" alt="Example Output 1" onclick="showImage(this.src)"/>
-        <div class="caption">A digital illustration of a lich with long grey hair and beard, as a university professor wearing a formal suit and standing in front of a class, writing on a whiteboard. He holds a marker, writing complex equations or magical symbols on the whiteboard.</div>
     </div>
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example2.png" alt="Example Output 2" onclick="showImage(this.src)"/>
-        <div class="caption">A Candid Photo of a real short grey alien peering around a corner while trying to hide from the viewer in a living room, real photography, fujifilm superia, full HD, taken on a Canon EOS R5 F1.2 ISO100 35MM</div>
     </div>
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example3.png" alt="Example Output 3" onclick="showImage(this.src)"/>
@@ -184,36 +182,11 @@ function showImage(src) {
 # Combined Proteus and Mobius datasets with ZTSNR and NovelAI V3 Improvements
-# Recommended Inference Parameters
-[Example ComfyUI workflow](https://github.com/DataCTE/SDXL-Training-Improvements/blob/main/src/inference/Comfyui-zsnrnode/ztsnr%2Bv-pred.json)
-## Installation
-1. Install the custom nodes:
-```bash
-cd /path/to/ComfyUI/custom_nodes
-git clone https://github.com/DataCTE/SDXL-Training-Improvements.git
-mv SDXL-Training-Improvements/src/inference/Comfyui-zsnrnode ./zsnrnode
-```
-Restart ComfyUI to load the new nodes
-Load the example workflow from the link above
-Recommended Settings
-Sampler: dpmpp_2m
-Scheduler: Karras (Normal noise schedule)
-Steps: 28 (Optimal step count)
-CFG: 3.0 to 5.5 (Classifier-free guidance scale)
 ## Model Details
 - **Model Type:** SDXL Fine-tuned with ZTSNR and NovelAI V3 Improvements
 - **Base Model:** stabilityai/stable-diffusion-xl-base-1.0
-- **Training Dataset:** 10,000 high-quality images
 - **License:** Apache 2.0
 ## Key Features
@@ -221,42 +194,16 @@ CFG: 3.0 to 5.5 (Classifier-free guidance scale)
 - Zero Terminal SNR (ZTSNR) implementation
 - Increased σ_max ≈ 20000.0 (NovelAI research)
 - High-resolution coherence enhancements
-- Tag-based CLIP weighting
-- VAE improvements
-### Technical Specifications
-- **Noise Schedule**: σ_max ≈ 20000.0 to σ_min ≈ 0.0292
-- **Progressive Steps**: [20000, 17.8, 12.4, 9.2, 7.2, 5.4, 3.9, 2.1, 0.9, 0.0292]
-- **Resolution Scaling**: √(H×W)/1024
 ## Training Details
 ### Training Configuration
-- **Learning Rate:** 1e-6
-- **Batch Size:** 1
-- **Gradient Accumulation Steps:** 1
 - **Optimizer:** AdamW
 - **Precision:** bfloat16
-- **VAE Finetuning:** Enabled
-- **VAE Learning Rate:** 1e-6
-### CLIP Weight Configuration
-- **Character Weight:** 1.5
-- **Style Weight:** 1.2
-- **Quality Weight:** 0.8
-- **Setting Weight:** 1.0
-- **Action Weight:** 1.1
-- **Object Weight:** 0.9
-## Performance Improvements
-- 47% fewer artifacts at σ < 5.0
-- Stable composition at σ > 12.4
-- 31% better detail consistency
-- Improved color accuracy
-- Enhanced dark tone reproduction
 ## Repository and Resources

 - [x] 10k dataset proof of concept (completed)[link](https://huggingface.co/dataautogpt3/ProteusSigma)
+- [x] 500k+ dataset finetune (completed)
 - [ ] 12M million dataset finetune (planned)
 <div class="gallery">
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example.png" alt="Example Output 1" onclick="showImage(this.src)"/>
     </div>
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example2.png" alt="Example Output 2" onclick="showImage(this.src)"/>
     </div>
     <div class="gallery-item">
         <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example3.png" alt="Example Output 3" onclick="showImage(this.src)"/>
 # Combined Proteus and Mobius datasets with ZTSNR and NovelAI V3 Improvements
 ## Model Details
 - **Model Type:** SDXL Fine-tuned with ZTSNR and NovelAI V3 Improvements
 - **Base Model:** stabilityai/stable-diffusion-xl-base-1.0
+- **Training Dataset:** 500,000 high-quality images
 - **License:** Apache 2.0
 ## Key Features
 - Zero Terminal SNR (ZTSNR) implementation
 - Increased σ_max ≈ 20000.0 (NovelAI research)
 - High-resolution coherence enhancements
 ## Training Details
 ### Training Configuration
+- **Learning Rate:** 4e-7
+- **Batch Size:** 8
+- **Gradient Accumulation Steps:** 8
+- **Epochs:** 80
 - **Optimizer:** AdamW
 - **Precision:** bfloat16
 ## Repository and Resources

example.png CHANGED Viewed

Git LFS Details

SHA256: 8f3de79ae9ca72c34741c5c810b681b01276119678e8c962fed477c48acf6bb7
Pointer size: 132 Bytes
Size of remote file: 1.87 MB

Git LFS Details

SHA256: 89ef3fb87fd2768959b3322fee028f283a8e056ae4268b5bc3092fd3fca955ea
Pointer size: 132 Bytes
Size of remote file: 1.8 MB

example2.png CHANGED Viewed

Git LFS Details

SHA256: 84160911e7475f5a332b9c08b1fa463216e6eb692615497009b67b8b80feb148
Pointer size: 132 Bytes
Size of remote file: 1.35 MB

Git LFS Details

SHA256: d2f7bfddbac7c270aff8aaa3b42655f27a90ad9dea95e6bd75056d5dc752a960
Pointer size: 132 Bytes
Size of remote file: 1.49 MB

example3.png CHANGED Viewed

Git LFS Details

SHA256: 299770f5c15e368f1ecf001a306bb4fd62394bdaa5c5b6f5c16cdeca5a5a087b
Pointer size: 132 Bytes
Size of remote file: 2.08 MB

Git LFS Details

SHA256: 982eba1fe13199aa965dd7b55fe2ba5f8e6270c41959fc2afb54db4906017ac1
Pointer size: 132 Bytes
Size of remote file: 1.57 MB

example4.png CHANGED Viewed

Git LFS Details

SHA256: 45c02918c89b46e227f3a8453c49ddbb2ec93829d468d0ff7f360ecf9fe0445d
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

Git LFS Details

SHA256: 1d120a981ea36db61f38067702318b2464a70a4f33486484f80c7a60edd176b4
Pointer size: 132 Bytes
Size of remote file: 1.65 MB

example5.png CHANGED Viewed

Git LFS Details

SHA256: 19309de34c2bf0da8517c50830b111a6acee78bea51ad57b7792fcaa1ffcd8a5
Pointer size: 132 Bytes
Size of remote file: 1.96 MB

Git LFS Details

SHA256: 3b14a31acc38e15f4a40c5a159d39f1f0c3158bc7c27b8f426987894125889b2
Pointer size: 132 Bytes
Size of remote file: 1.37 MB

model_index.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_class_name": "StableDiffusionXLPipeline",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "ProteusSigma-500k",
+  "feature_extractor": [
+    null,
+    null
+  ],
+  "force_zeros_for_empty_prompt": false,
+  "image_encoder": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "EulerDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "text_encoder_2": [
+    "transformers",
+    "CLIPTextModelWithProjection"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "tokenizer_2": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_class_name": "EulerDiscreteScheduler",
+  "_diffusers_version": "0.31.0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "final_sigmas_type": "zero",
+  "interpolation_type": "linear",
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "rescale_betas_zero_snr": false,
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "sigma_max": null,
+  "sigma_min": null,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "timestep_spacing": "leading",
+  "timestep_type": "discrete",
+  "trained_betas": null,
+  "use_beta_sigmas": false,
+  "use_exponential_sigmas": false,
+  "use_karras_sigmas": false
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float16",
+  "transformers_version": "4.32.0.dev0",
+  "vocab_size": 49408
+}

sdxl-ztsnr-sigma-10k.safetensors → text_encoder/model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25396dbefe1753cf20f7dfe1692686d2cce0f53c69bff4f40ac6ab46434784fa
-size 6938049194

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c3d6454dd2d23414b56aa1b5858a72487a656937847b6fea8d0606d7a42cdbc
+size 492265168

text_encoder_2/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "CLIPTextModelWithProjection"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 20,
+  "num_hidden_layers": 32,
+  "pad_token_id": 1,
+  "projection_dim": 1280,
+  "torch_dtype": "float16",
+  "transformers_version": "4.32.0.dev0",
+  "vocab_size": 49408
+}

text_encoder_2/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a6032f63d37ae02bbc74ccd6a27440578cd71701f96532229d0154f55a8d3ff
+size 2778702264

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>"
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_2/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_2/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "!",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer_2/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "!",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49406": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "49407": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "!",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": "<|endoftext|>",
+  "use_fast": false
+}

tokenizer_2/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "/workspace/SimpleTuner/output/models/20epochs",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "attention_type": "default",
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "reverse_transformer_layers_per_block": null,
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98d8a302c0909ddf1b775f168baa6594012f2756722c4768cbd0327c7da7fb5b
+size 5135151440

vae/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.31.0",
+  "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": false,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "latents_mean": null,
+  "latents_std": null,
+  "layers_per_block": 2,
+  "mid_block_add_attention": true,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.13025,
+  "shift_factor": null,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ],
+  "use_post_quant_conv": true,
+  "use_quant_conv": true
+}

vae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39a9822aaa359fe37ca5d3cff8a06b9505f29a018b2331d126b36472e77665a6
+size 167335590