dataautogpt3 commited on
Commit
172be56
1 Parent(s): 792f7f1
ComfyUI-test10k.json DELETED
@@ -1,486 +0,0 @@
1
- {
2
- "last_node_id": 373,
3
- "last_link_id": 982,
4
- "nodes": [
5
- {
6
- "id": 287,
7
- "type": "VAEDecode",
8
- "pos": {
9
- "0": 166,
10
- "1": 663
11
- },
12
- "size": {
13
- "0": 210,
14
- "1": 46
15
- },
16
- "flags": {
17
- "collapsed": false
18
- },
19
- "order": 7,
20
- "mode": 0,
21
- "inputs": [
22
- {
23
- "name": "samples",
24
- "type": "LATENT",
25
- "link": 943
26
- },
27
- {
28
- "name": "vae",
29
- "type": "VAE",
30
- "link": 980
31
- }
32
- ],
33
- "outputs": [
34
- {
35
- "name": "IMAGE",
36
- "type": "IMAGE",
37
- "links": [
38
- 708
39
- ],
40
- "slot_index": 0
41
- }
42
- ],
43
- "properties": {
44
- "Node name for S&R": "VAEDecode"
45
- },
46
- "widgets_values": []
47
- },
48
- {
49
- "id": 347,
50
- "type": "CLIPTextEncode",
51
- "pos": {
52
- "0": -461,
53
- "1": 965
54
- },
55
- "size": {
56
- "0": 400,
57
- "1": 200
58
- },
59
- "flags": {},
60
- "order": 4,
61
- "mode": 0,
62
- "inputs": [
63
- {
64
- "name": "clip",
65
- "type": "CLIP",
66
- "link": 978
67
- }
68
- ],
69
- "outputs": [
70
- {
71
- "name": "CONDITIONING",
72
- "type": "CONDITIONING",
73
- "links": [
74
- 938
75
- ],
76
- "slot_index": 0
77
- }
78
- ],
79
- "properties": {
80
- "Node name for S&R": "CLIPTextEncode"
81
- },
82
- "widgets_values": [
83
- ""
84
- ]
85
- },
86
- {
87
- "id": 358,
88
- "type": "KSamplerSelect",
89
- "pos": {
90
- "0": -416,
91
- "1": 1228
92
- },
93
- "size": {
94
- "0": 315,
95
- "1": 58
96
- },
97
- "flags": {},
98
- "order": 0,
99
- "mode": 0,
100
- "inputs": [],
101
- "outputs": [
102
- {
103
- "name": "SAMPLER",
104
- "type": "SAMPLER",
105
- "links": [
106
- 941
107
- ]
108
- }
109
- ],
110
- "properties": {
111
- "Node name for S&R": "KSamplerSelect"
112
- },
113
- "widgets_values": [
114
- "euler_ancestral"
115
- ]
116
- },
117
- {
118
- "id": 365,
119
- "type": "BasicScheduler",
120
- "pos": {
121
- "0": -411,
122
- "1": 1332
123
- },
124
- "size": {
125
- "0": 315,
126
- "1": 106
127
- },
128
- "flags": {},
129
- "order": 3,
130
- "mode": 0,
131
- "inputs": [
132
- {
133
- "name": "model",
134
- "type": "MODEL",
135
- "link": 982
136
- }
137
- ],
138
- "outputs": [
139
- {
140
- "name": "SIGMAS",
141
- "type": "SIGMAS",
142
- "links": [
143
- 955
144
- ]
145
- }
146
- ],
147
- "properties": {
148
- "Node name for S&R": "BasicScheduler"
149
- },
150
- "widgets_values": [
151
- "normal",
152
- 28,
153
- 1
154
- ]
155
- },
156
- {
157
- "id": 371,
158
- "type": "EmptyLatentImage",
159
- "pos": {
160
- "0": -873,
161
- "1": 1278
162
- },
163
- "size": {
164
- "0": 315,
165
- "1": 106
166
- },
167
- "flags": {},
168
- "order": 1,
169
- "mode": 0,
170
- "inputs": [],
171
- "outputs": [
172
- {
173
- "name": "LATENT",
174
- "type": "LATENT",
175
- "links": [
176
- 974
177
- ]
178
- }
179
- ],
180
- "properties": {
181
- "Node name for S&R": "EmptyLatentImage"
182
- },
183
- "widgets_values": [
184
- 1024,
185
- 1024,
186
- 2
187
- ]
188
- },
189
- {
190
- "id": 346,
191
- "type": "CLIPTextEncode",
192
- "pos": {
193
- "0": -449,
194
- "1": 696
195
- },
196
- "size": {
197
- "0": 400,
198
- "1": 200
199
- },
200
- "flags": {},
201
- "order": 5,
202
- "mode": 0,
203
- "inputs": [
204
- {
205
- "name": "clip",
206
- "type": "CLIP",
207
- "link": 979
208
- }
209
- ],
210
- "outputs": [
211
- {
212
- "name": "CONDITIONING",
213
- "type": "CONDITIONING",
214
- "links": [
215
- 939
216
- ],
217
- "slot_index": 0
218
- }
219
- ],
220
- "properties": {
221
- "Node name for S&R": "CLIPTextEncode"
222
- },
223
- "widgets_values": [
224
- "Space Marine Meditation | Warhammer 40k Inspired Ambient "
225
- ]
226
- },
227
- {
228
- "id": 170,
229
- "type": "PreviewImage",
230
- "pos": {
231
- "0": 681,
232
- "1": 724
233
- },
234
- "size": {
235
- "0": 1663.6668701171875,
236
- "1": 803.4244384765625
237
- },
238
- "flags": {},
239
- "order": 8,
240
- "mode": 0,
241
- "inputs": [
242
- {
243
- "name": "images",
244
- "type": "IMAGE",
245
- "link": 708
246
- }
247
- ],
248
- "outputs": [],
249
- "properties": {
250
- "Node name for S&R": "PreviewImage"
251
- },
252
- "widgets_values": []
253
- },
254
- {
255
- "id": 357,
256
- "type": "SamplerCustom",
257
- "pos": {
258
- "0": 17,
259
- "1": 880
260
- },
261
- "size": {
262
- "0": 304.5533447265625,
263
- "1": 442
264
- },
265
- "flags": {},
266
- "order": 6,
267
- "mode": 0,
268
- "inputs": [
269
- {
270
- "name": "model",
271
- "type": "MODEL",
272
- "link": 981
273
- },
274
- {
275
- "name": "positive",
276
- "type": "CONDITIONING",
277
- "link": 939
278
- },
279
- {
280
- "name": "negative",
281
- "type": "CONDITIONING",
282
- "link": 938
283
- },
284
- {
285
- "name": "sampler",
286
- "type": "SAMPLER",
287
- "link": 941
288
- },
289
- {
290
- "name": "sigmas",
291
- "type": "SIGMAS",
292
- "link": 955
293
- },
294
- {
295
- "name": "latent_image",
296
- "type": "LATENT",
297
- "link": 974
298
- }
299
- ],
300
- "outputs": [
301
- {
302
- "name": "output",
303
- "type": "LATENT",
304
- "links": [
305
- 943
306
- ],
307
- "slot_index": 0
308
- },
309
- {
310
- "name": "denoised_output",
311
- "type": "LATENT",
312
- "links": null
313
- }
314
- ],
315
- "properties": {
316
- "Node name for S&R": "SamplerCustom"
317
- },
318
- "widgets_values": [
319
- true,
320
- 1013633777621875,
321
- "randomize",
322
- 7.5
323
- ]
324
- },
325
- {
326
- "id": 373,
327
- "type": "CheckpointLoaderSimple",
328
- "pos": {
329
- "0": -1037.7359619140625,
330
- "1": 827.5658569335938
331
- },
332
- "size": {
333
- "0": 315,
334
- "1": 98
335
- },
336
- "flags": {},
337
- "order": 2,
338
- "mode": 0,
339
- "inputs": [],
340
- "outputs": [
341
- {
342
- "name": "MODEL",
343
- "type": "MODEL",
344
- "links": [
345
- 981,
346
- 982
347
- ],
348
- "slot_index": 0
349
- },
350
- {
351
- "name": "CLIP",
352
- "type": "CLIP",
353
- "links": [
354
- 978,
355
- 979
356
- ],
357
- "slot_index": 1
358
- },
359
- {
360
- "name": "VAE",
361
- "type": "VAE",
362
- "links": [
363
- 980
364
- ],
365
- "slot_index": 2
366
- }
367
- ],
368
- "properties": {
369
- "Node name for S&R": "CheckpointLoaderSimple"
370
- },
371
- "widgets_values": [
372
- "SDXL\\sdxl-ztsnr-sigma-10k.safetensors"
373
- ]
374
- }
375
- ],
376
- "links": [
377
- [
378
- 708,
379
- 287,
380
- 0,
381
- 170,
382
- 0,
383
- "IMAGE"
384
- ],
385
- [
386
- 938,
387
- 347,
388
- 0,
389
- 357,
390
- 2,
391
- "CONDITIONING"
392
- ],
393
- [
394
- 939,
395
- 346,
396
- 0,
397
- 357,
398
- 1,
399
- "CONDITIONING"
400
- ],
401
- [
402
- 941,
403
- 358,
404
- 0,
405
- 357,
406
- 3,
407
- "SAMPLER"
408
- ],
409
- [
410
- 943,
411
- 357,
412
- 0,
413
- 287,
414
- 0,
415
- "LATENT"
416
- ],
417
- [
418
- 955,
419
- 365,
420
- 0,
421
- 357,
422
- 4,
423
- "SIGMAS"
424
- ],
425
- [
426
- 974,
427
- 371,
428
- 0,
429
- 357,
430
- 5,
431
- "LATENT"
432
- ],
433
- [
434
- 978,
435
- 373,
436
- 1,
437
- 347,
438
- 0,
439
- "CLIP"
440
- ],
441
- [
442
- 979,
443
- 373,
444
- 1,
445
- 346,
446
- 0,
447
- "CLIP"
448
- ],
449
- [
450
- 980,
451
- 373,
452
- 2,
453
- 287,
454
- 1,
455
- "VAE"
456
- ],
457
- [
458
- 981,
459
- 373,
460
- 0,
461
- 357,
462
- 0,
463
- "MODEL"
464
- ],
465
- [
466
- 982,
467
- 373,
468
- 0,
469
- 365,
470
- 0,
471
- "MODEL"
472
- ]
473
- ],
474
- "groups": [],
475
- "config": {},
476
- "extra": {
477
- "ds": {
478
- "scale": 1.0610764609501095,
479
- "offset": [
480
- 1717.2345868209466,
481
- -391.21652783223755
482
- ]
483
- }
484
- },
485
- "version": 0.4
486
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -12,7 +12,7 @@ tags:
12
 
13
  - [x] 10k dataset proof of concept (completed)[link](https://huggingface.co/dataautogpt3/ProteusSigma)
14
 
15
- - [ ] 200k+ dataset finetune (in testing/training)
16
 
17
  - [ ] 12M million dataset finetune (planned)
18
 
@@ -153,11 +153,9 @@ tags:
153
  <div class="gallery">
154
  <div class="gallery-item">
155
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example.png" alt="Example Output 1" onclick="showImage(this.src)"/>
156
- <div class="caption">A digital illustration of a lich with long grey hair and beard, as a university professor wearing a formal suit and standing in front of a class, writing on a whiteboard. He holds a marker, writing complex equations or magical symbols on the whiteboard.</div>
157
  </div>
158
  <div class="gallery-item">
159
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example2.png" alt="Example Output 2" onclick="showImage(this.src)"/>
160
- <div class="caption">A Candid Photo of a real short grey alien peering around a corner while trying to hide from the viewer in a living room, real photography, fujifilm superia, full HD, taken on a Canon EOS R5 F1.2 ISO100 35MM</div>
161
  </div>
162
  <div class="gallery-item">
163
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example3.png" alt="Example Output 3" onclick="showImage(this.src)"/>
@@ -184,36 +182,11 @@ function showImage(src) {
184
 
185
  # Combined Proteus and Mobius datasets with ZTSNR and NovelAI V3 Improvements
186
 
187
- # Recommended Inference Parameters
188
-
189
- [Example ComfyUI workflow](https://github.com/DataCTE/SDXL-Training-Improvements/blob/main/src/inference/Comfyui-zsnrnode/ztsnr%2Bv-pred.json)
190
-
191
- ## Installation
192
-
193
- 1. Install the custom nodes:
194
- ```bash
195
- cd /path/to/ComfyUI/custom_nodes
196
- git clone https://github.com/DataCTE/SDXL-Training-Improvements.git
197
- mv SDXL-Training-Improvements/src/inference/Comfyui-zsnrnode ./zsnrnode
198
- ```
199
- Restart ComfyUI to load the new nodes
200
- Load the example workflow from the link above
201
-
202
- Recommended Settings
203
-
204
- Sampler: dpmpp_2m
205
-
206
- Scheduler: Karras (Normal noise schedule)
207
-
208
- Steps: 28 (Optimal step count)
209
-
210
- CFG: 3.0 to 5.5 (Classifier-free guidance scale)
211
-
212
  ## Model Details
213
 
214
  - **Model Type:** SDXL Fine-tuned with ZTSNR and NovelAI V3 Improvements
215
  - **Base Model:** stabilityai/stable-diffusion-xl-base-1.0
216
- - **Training Dataset:** 10,000 high-quality images
217
  - **License:** Apache 2.0
218
 
219
  ## Key Features
@@ -221,42 +194,16 @@ CFG: 3.0 to 5.5 (Classifier-free guidance scale)
221
  - Zero Terminal SNR (ZTSNR) implementation
222
  - Increased σ_max ≈ 20000.0 (NovelAI research)
223
  - High-resolution coherence enhancements
224
- - Tag-based CLIP weighting
225
- - VAE improvements
226
-
227
- ### Technical Specifications
228
-
229
- - **Noise Schedule**: σ_max ≈ 20000.0 to σ_min ≈ 0.0292
230
- - **Progressive Steps**: [20000, 17.8, 12.4, 9.2, 7.2, 5.4, 3.9, 2.1, 0.9, 0.0292]
231
- - **Resolution Scaling**: √(H×W)/1024
232
 
233
  ## Training Details
234
 
235
  ### Training Configuration
236
- - **Learning Rate:** 1e-6
237
- - **Batch Size:** 1
238
- - **Gradient Accumulation Steps:** 1
 
239
  - **Optimizer:** AdamW
240
  - **Precision:** bfloat16
241
- - **VAE Finetuning:** Enabled
242
- - **VAE Learning Rate:** 1e-6
243
-
244
- ### CLIP Weight Configuration
245
- - **Character Weight:** 1.5
246
- - **Style Weight:** 1.2
247
- - **Quality Weight:** 0.8
248
- - **Setting Weight:** 1.0
249
- - **Action Weight:** 1.1
250
- - **Object Weight:** 0.9
251
-
252
-
253
- ## Performance Improvements
254
-
255
- - 47% fewer artifacts at σ < 5.0
256
- - Stable composition at σ > 12.4
257
- - 31% better detail consistency
258
- - Improved color accuracy
259
- - Enhanced dark tone reproduction
260
 
261
  ## Repository and Resources
262
 
 
12
 
13
  - [x] 10k dataset proof of concept (completed)[link](https://huggingface.co/dataautogpt3/ProteusSigma)
14
 
15
+ - [x] 500k+ dataset finetune (completed)
16
 
17
  - [ ] 12M million dataset finetune (planned)
18
 
 
153
  <div class="gallery">
154
  <div class="gallery-item">
155
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example.png" alt="Example Output 1" onclick="showImage(this.src)"/>
 
156
  </div>
157
  <div class="gallery-item">
158
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example2.png" alt="Example Output 2" onclick="showImage(this.src)"/>
 
159
  </div>
160
  <div class="gallery-item">
161
  <img src="https://huggingface.co/dataautogpt3/ProteusSigma/resolve/main/example3.png" alt="Example Output 3" onclick="showImage(this.src)"/>
 
182
 
183
  # Combined Proteus and Mobius datasets with ZTSNR and NovelAI V3 Improvements
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  ## Model Details
186
 
187
  - **Model Type:** SDXL Fine-tuned with ZTSNR and NovelAI V3 Improvements
188
  - **Base Model:** stabilityai/stable-diffusion-xl-base-1.0
189
+ - **Training Dataset:** 500,000 high-quality images
190
  - **License:** Apache 2.0
191
 
192
  ## Key Features
 
194
  - Zero Terminal SNR (ZTSNR) implementation
195
  - Increased σ_max ≈ 20000.0 (NovelAI research)
196
  - High-resolution coherence enhancements
 
 
 
 
 
 
 
 
197
 
198
  ## Training Details
199
 
200
  ### Training Configuration
201
+ - **Learning Rate:** 4e-7
202
+ - **Batch Size:** 8
203
+ - **Gradient Accumulation Steps:** 8
204
+ - **Epochs:** 80
205
  - **Optimizer:** AdamW
206
  - **Precision:** bfloat16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  ## Repository and Resources
209
 
example.png CHANGED

Git LFS Details

  • SHA256: 8f3de79ae9ca72c34741c5c810b681b01276119678e8c962fed477c48acf6bb7
  • Pointer size: 132 Bytes
  • Size of remote file: 1.87 MB

Git LFS Details

  • SHA256: 89ef3fb87fd2768959b3322fee028f283a8e056ae4268b5bc3092fd3fca955ea
  • Pointer size: 132 Bytes
  • Size of remote file: 1.8 MB
example2.png CHANGED

Git LFS Details

  • SHA256: 84160911e7475f5a332b9c08b1fa463216e6eb692615497009b67b8b80feb148
  • Pointer size: 132 Bytes
  • Size of remote file: 1.35 MB

Git LFS Details

  • SHA256: d2f7bfddbac7c270aff8aaa3b42655f27a90ad9dea95e6bd75056d5dc752a960
  • Pointer size: 132 Bytes
  • Size of remote file: 1.49 MB
example3.png CHANGED

Git LFS Details

  • SHA256: 299770f5c15e368f1ecf001a306bb4fd62394bdaa5c5b6f5c16cdeca5a5a087b
  • Pointer size: 132 Bytes
  • Size of remote file: 2.08 MB

Git LFS Details

  • SHA256: 982eba1fe13199aa965dd7b55fe2ba5f8e6270c41959fc2afb54db4906017ac1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.57 MB
example4.png CHANGED

Git LFS Details

  • SHA256: 45c02918c89b46e227f3a8453c49ddbb2ec93829d468d0ff7f360ecf9fe0445d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB

Git LFS Details

  • SHA256: 1d120a981ea36db61f38067702318b2464a70a4f33486484f80c7a60edd176b4
  • Pointer size: 132 Bytes
  • Size of remote file: 1.65 MB
example5.png CHANGED

Git LFS Details

  • SHA256: 19309de34c2bf0da8517c50830b111a6acee78bea51ad57b7792fcaa1ffcd8a5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.96 MB

Git LFS Details

  • SHA256: 3b14a31acc38e15f4a40c5a159d39f1f0c3158bc7c27b8f426987894125889b2
  • Pointer size: 132 Bytes
  • Size of remote file: 1.37 MB
model_index.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionXLPipeline",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "ProteusSigma-500k",
5
+ "feature_extractor": [
6
+ null,
7
+ null
8
+ ],
9
+ "force_zeros_for_empty_prompt": false,
10
+ "image_encoder": [
11
+ null,
12
+ null
13
+ ],
14
+ "scheduler": [
15
+ "diffusers",
16
+ "EulerDiscreteScheduler"
17
+ ],
18
+ "text_encoder": [
19
+ "transformers",
20
+ "CLIPTextModel"
21
+ ],
22
+ "text_encoder_2": [
23
+ "transformers",
24
+ "CLIPTextModelWithProjection"
25
+ ],
26
+ "tokenizer": [
27
+ "transformers",
28
+ "CLIPTokenizer"
29
+ ],
30
+ "tokenizer_2": [
31
+ "transformers",
32
+ "CLIPTokenizer"
33
+ ],
34
+ "unet": [
35
+ "diffusers",
36
+ "UNet2DConditionModel"
37
+ ],
38
+ "vae": [
39
+ "diffusers",
40
+ "AutoencoderKL"
41
+ ]
42
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.31.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "final_sigmas_type": "zero",
9
+ "interpolation_type": "linear",
10
+ "num_train_timesteps": 1000,
11
+ "prediction_type": "epsilon",
12
+ "rescale_betas_zero_snr": false,
13
+ "sample_max_value": 1.0,
14
+ "set_alpha_to_one": false,
15
+ "sigma_max": null,
16
+ "sigma_min": null,
17
+ "skip_prk_steps": true,
18
+ "steps_offset": 1,
19
+ "timestep_spacing": "leading",
20
+ "timestep_type": "discrete",
21
+ "trained_betas": null,
22
+ "use_beta_sigmas": false,
23
+ "use_exponential_sigmas": false,
24
+ "use_karras_sigmas": false
25
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "quick_gelu",
10
+ "hidden_size": 768,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
+ }
sdxl-ztsnr-sigma-10k.safetensors → text_encoder/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25396dbefe1753cf20f7dfe1692686d2cce0f53c69bff4f40ac6ab46434784fa
3
- size 6938049194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3d6454dd2d23414b56aa1b5858a72487a656937847b6fea8d0606d7a42cdbc
3
+ size 492265168
text_encoder_2/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModelWithProjection"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_size": 1280,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 20,
18
+ "num_hidden_layers": 32,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 1280,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
+ }
text_encoder_2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6032f63d37ae02bbc74ccd6a27440578cd71701f96532229d0154f55a8d3ff
3
+ size 2778702264
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_2/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "!",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_2/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>",
38
+ "use_fast": false
39
+ }
tokenizer_2/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "/workspace/SimpleTuner/output/models/20epochs",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": "text_time",
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": 256,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20
13
+ ],
14
+ "attention_type": "default",
15
+ "block_out_channels": [
16
+ 320,
17
+ 640,
18
+ 1280
19
+ ],
20
+ "center_input_sample": false,
21
+ "class_embed_type": null,
22
+ "class_embeddings_concat": false,
23
+ "conv_in_kernel": 3,
24
+ "conv_out_kernel": 3,
25
+ "cross_attention_dim": 2048,
26
+ "cross_attention_norm": null,
27
+ "down_block_types": [
28
+ "DownBlock2D",
29
+ "CrossAttnDownBlock2D",
30
+ "CrossAttnDownBlock2D"
31
+ ],
32
+ "downsample_padding": 1,
33
+ "dropout": 0.0,
34
+ "dual_cross_attention": false,
35
+ "encoder_hid_dim": null,
36
+ "encoder_hid_dim_type": null,
37
+ "flip_sin_to_cos": true,
38
+ "freq_shift": 0,
39
+ "in_channels": 4,
40
+ "layers_per_block": 2,
41
+ "mid_block_only_cross_attention": null,
42
+ "mid_block_scale_factor": 1,
43
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
44
+ "norm_eps": 1e-05,
45
+ "norm_num_groups": 32,
46
+ "num_attention_heads": null,
47
+ "num_class_embeds": null,
48
+ "only_cross_attention": false,
49
+ "out_channels": 4,
50
+ "projection_class_embeddings_input_dim": 2816,
51
+ "resnet_out_scale_factor": 1.0,
52
+ "resnet_skip_time_act": false,
53
+ "resnet_time_scale_shift": "default",
54
+ "reverse_transformer_layers_per_block": null,
55
+ "sample_size": 128,
56
+ "time_cond_proj_dim": null,
57
+ "time_embedding_act_fn": null,
58
+ "time_embedding_dim": null,
59
+ "time_embedding_type": "positional",
60
+ "timestep_post_act": null,
61
+ "transformer_layers_per_block": [
62
+ 1,
63
+ 2,
64
+ 10
65
+ ],
66
+ "up_block_types": [
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "UpBlock2D"
70
+ ],
71
+ "upcast_attention": null,
72
+ "use_linear_projection": true
73
+ }
unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d8a302c0909ddf1b775f168baa6594012f2756722c4768cbd0327c7da7fb5b
3
+ size 5135151440
vae/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.31.0",
4
+ "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": false,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "latents_mean": null,
22
+ "latents_std": null,
23
+ "layers_per_block": 2,
24
+ "mid_block_add_attention": true,
25
+ "norm_num_groups": 32,
26
+ "out_channels": 3,
27
+ "sample_size": 512,
28
+ "scaling_factor": 0.13025,
29
+ "shift_factor": null,
30
+ "up_block_types": [
31
+ "UpDecoderBlock2D",
32
+ "UpDecoderBlock2D",
33
+ "UpDecoderBlock2D",
34
+ "UpDecoderBlock2D"
35
+ ],
36
+ "use_post_quant_conv": true,
37
+ "use_quant_conv": true
38
+ }
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a9822aaa359fe37ca5d3cff8a06b9505f29a018b2331d126b36472e77665a6
3
+ size 167335590