camenduru commited on
Commit
d2d50b3
1 Parent(s): e9e6699

thanks to TencentARC ❤

Browse files
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPImageProcessor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
model_index.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.19.3",
4
+ "_name_or_path": "experiments/pretrained_models/anything-v4.0",
5
+ "feature_extractor": [
6
+ "transformers",
7
+ "CLIPImageProcessor"
8
+ ],
9
+ "requires_safety_checker": false,
10
+ "safety_checker": [
11
+ null,
12
+ null
13
+ ],
14
+ "scheduler": [
15
+ "diffusers",
16
+ "DPMSolverMultistepScheduler"
17
+ ],
18
+ "text_encoder": [
19
+ "transformers",
20
+ "CLIPTextModel"
21
+ ],
22
+ "tokenizer": [
23
+ "transformers",
24
+ "CLIPTokenizer"
25
+ ],
26
+ "unet": [
27
+ "diffusers",
28
+ "UNet2DConditionModel"
29
+ ],
30
+ "vae": [
31
+ "diffusers",
32
+ "AutoencoderKL"
33
+ ]
34
+ }
new_concept_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<hina1>": {"concept_token_ids": [49408, 49409, 49410, 49411, 49412, 49413, 49414, 49415, 49416, 49417, 49418, 49419, 49420, 49421, 49422, 49423], "concept_token_names": ["<new0>", "<new1>", "<new2>", "<new3>", "<new4>", "<new5>", "<new6>", "<new7>", "<new8>", "<new9>", "<new10>", "<new11>", "<new12>", "<new13>", "<new14>", "<new15>"]}, "<hina2>": {"concept_token_ids": [49424, 49425, 49426, 49427, 49428, 49429, 49430, 49431, 49432, 49433, 49434, 49435, 49436, 49437, 49438, 49439], "concept_token_names": ["<new16>", "<new17>", "<new18>", "<new19>", "<new20>", "<new21>", "<new22>", "<new23>", "<new24>", "<new25>", "<new26>", "<new27>", "<new28>", "<new29>", "<new30>", "<new31>"]}, "<kaori1>": {"concept_token_ids": [49440, 49441, 49442, 49443, 49444, 49445, 49446, 49447, 49448, 49449, 49450, 49451, 49452, 49453, 49454, 49455], "concept_token_names": ["<new32>", "<new33>", "<new34>", "<new35>", "<new36>", "<new37>", "<new38>", "<new39>", "<new40>", "<new41>", "<new42>", "<new43>", "<new44>", "<new45>", "<new46>", "<new47>"]}, "<kaori2>": {"concept_token_ids": [49456, 49457, 49458, 49459, 49460, 49461, 49462, 49463, 49464, 49465, 49466, 49467, 49468, 49469, 49470, 49471], "concept_token_names": ["<new48>", "<new49>", "<new50>", "<new51>", "<new52>", "<new53>", "<new54>", "<new55>", "<new56>", "<new57>", "<new58>", "<new59>", "<new60>", "<new61>", "<new62>", "<new63>"]}, "<tezuka1>": {"concept_token_ids": [49472, 49473, 49474, 49475, 49476, 49477, 49478, 49479, 49480, 49481, 49482, 49483, 49484, 49485, 49486, 49487], "concept_token_names": ["<new64>", "<new65>", "<new66>", "<new67>", "<new68>", "<new69>", "<new70>", "<new71>", "<new72>", "<new73>", "<new74>", "<new75>", "<new76>", "<new77>", "<new78>", "<new79>"]}, "<tezuka2>": {"concept_token_ids": [49488, 49489, 49490, 49491, 49492, 49493, 49494, 49495, 49496, 49497, 49498, 49499, 49500, 49501, 49502, 49503], "concept_token_names": ["<new80>", "<new81>", "<new82>", "<new83>", "<new84>", "<new85>", "<new86>", "<new87>", "<new88>", "<new89>", "<new90>", "<new91>", "<new92>", "<new93>", "<new94>", "<new95>"]}}
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DPMSolverMultistepScheduler",
3
+ "_diffusers_version": "0.19.3",
4
+ "algorithm_type": "dpmsolver++",
5
+ "beta_end": 0.012,
6
+ "beta_schedule": "scaled_linear",
7
+ "beta_start": 0.00085,
8
+ "clip_sample": false,
9
+ "dynamic_thresholding_ratio": 0.995,
10
+ "lambda_min_clipped": -Infinity,
11
+ "lower_order_final": true,
12
+ "num_train_timesteps": 1000,
13
+ "prediction_type": "epsilon",
14
+ "sample_max_value": 1.0,
15
+ "set_alpha_to_one": false,
16
+ "skip_prk_steps": true,
17
+ "solver_order": 2,
18
+ "solver_type": "midpoint",
19
+ "steps_offset": 1,
20
+ "thresholding": false,
21
+ "timestep_spacing": "linspace",
22
+ "trained_betas": null,
23
+ "use_karras_sigmas": false,
24
+ "variance_type": null
25
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "experiments/pretrained_models/anything-v4.0/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.25.1",
24
+ "vocab_size": 49504
25
+ }
text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3739f68350fa9e5421045b38733f1e3146bec0cfd980756dc3e4174304cd54e
3
+ size 246334519
tokenizer/added_tokens.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<new0>": 49408,
3
+ "<new10>": 49418,
4
+ "<new11>": 49419,
5
+ "<new12>": 49420,
6
+ "<new13>": 49421,
7
+ "<new14>": 49422,
8
+ "<new15>": 49423,
9
+ "<new16>": 49424,
10
+ "<new17>": 49425,
11
+ "<new18>": 49426,
12
+ "<new19>": 49427,
13
+ "<new1>": 49409,
14
+ "<new20>": 49428,
15
+ "<new21>": 49429,
16
+ "<new22>": 49430,
17
+ "<new23>": 49431,
18
+ "<new24>": 49432,
19
+ "<new25>": 49433,
20
+ "<new26>": 49434,
21
+ "<new27>": 49435,
22
+ "<new28>": 49436,
23
+ "<new29>": 49437,
24
+ "<new2>": 49410,
25
+ "<new30>": 49438,
26
+ "<new31>": 49439,
27
+ "<new32>": 49440,
28
+ "<new33>": 49441,
29
+ "<new34>": 49442,
30
+ "<new35>": 49443,
31
+ "<new36>": 49444,
32
+ "<new37>": 49445,
33
+ "<new38>": 49446,
34
+ "<new39>": 49447,
35
+ "<new3>": 49411,
36
+ "<new40>": 49448,
37
+ "<new41>": 49449,
38
+ "<new42>": 49450,
39
+ "<new43>": 49451,
40
+ "<new44>": 49452,
41
+ "<new45>": 49453,
42
+ "<new46>": 49454,
43
+ "<new47>": 49455,
44
+ "<new48>": 49456,
45
+ "<new49>": 49457,
46
+ "<new4>": 49412,
47
+ "<new50>": 49458,
48
+ "<new51>": 49459,
49
+ "<new52>": 49460,
50
+ "<new53>": 49461,
51
+ "<new54>": 49462,
52
+ "<new55>": 49463,
53
+ "<new56>": 49464,
54
+ "<new57>": 49465,
55
+ "<new58>": 49466,
56
+ "<new59>": 49467,
57
+ "<new5>": 49413,
58
+ "<new60>": 49468,
59
+ "<new61>": 49469,
60
+ "<new62>": 49470,
61
+ "<new63>": 49471,
62
+ "<new64>": 49472,
63
+ "<new65>": 49473,
64
+ "<new66>": 49474,
65
+ "<new67>": 49475,
66
+ "<new68>": 49476,
67
+ "<new69>": 49477,
68
+ "<new6>": 49414,
69
+ "<new70>": 49478,
70
+ "<new71>": 49479,
71
+ "<new72>": 49480,
72
+ "<new73>": 49481,
73
+ "<new74>": 49482,
74
+ "<new75>": 49483,
75
+ "<new76>": 49484,
76
+ "<new77>": 49485,
77
+ "<new78>": 49486,
78
+ "<new79>": 49487,
79
+ "<new7>": 49415,
80
+ "<new80>": 49488,
81
+ "<new81>": 49489,
82
+ "<new82>": 49490,
83
+ "<new83>": 49491,
84
+ "<new84>": 49492,
85
+ "<new85>": 49493,
86
+ "<new86>": 49494,
87
+ "<new87>": 49495,
88
+ "<new88>": 49496,
89
+ "<new89>": 49497,
90
+ "<new8>": 49416,
91
+ "<new90>": 49498,
92
+ "<new91>": 49499,
93
+ "<new92>": 49500,
94
+ "<new93>": 49501,
95
+ "<new94>": 49502,
96
+ "<new95>": 49503,
97
+ "<new9>": 49417
98
+ }
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "experiments/pretrained_models/anything-v4.0/tokenizer",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "./special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.19.3",
4
+ "_name_or_path": "experiments/pretrained_models/anything-v4.0/unet",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": 8,
10
+ "block_out_channels": [
11
+ 320,
12
+ 640,
13
+ 1280,
14
+ 1280
15
+ ],
16
+ "center_input_sample": false,
17
+ "class_embed_type": null,
18
+ "class_embeddings_concat": false,
19
+ "conv_in_kernel": 3,
20
+ "conv_out_kernel": 3,
21
+ "cross_attention_dim": 768,
22
+ "cross_attention_norm": null,
23
+ "down_block_types": [
24
+ "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "DownBlock2D"
28
+ ],
29
+ "downsample_padding": 1,
30
+ "dual_cross_attention": false,
31
+ "encoder_hid_dim": null,
32
+ "encoder_hid_dim_type": null,
33
+ "flip_sin_to_cos": true,
34
+ "freq_shift": 0,
35
+ "in_channels": 4,
36
+ "layers_per_block": 2,
37
+ "mid_block_only_cross_attention": null,
38
+ "mid_block_scale_factor": 1,
39
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
40
+ "norm_eps": 1e-05,
41
+ "norm_num_groups": 32,
42
+ "num_attention_heads": null,
43
+ "num_class_embeds": null,
44
+ "only_cross_attention": false,
45
+ "out_channels": 4,
46
+ "projection_class_embeddings_input_dim": null,
47
+ "resnet_out_scale_factor": 1.0,
48
+ "resnet_skip_time_act": false,
49
+ "resnet_time_scale_shift": "default",
50
+ "sample_size": 64,
51
+ "time_cond_proj_dim": null,
52
+ "time_embedding_act_fn": null,
53
+ "time_embedding_dim": null,
54
+ "time_embedding_type": "positional",
55
+ "timestep_post_act": null,
56
+ "transformer_layers_per_block": 1,
57
+ "up_block_types": [
58
+ "UpBlock2D",
59
+ "CrossAttnUpBlock2D",
60
+ "CrossAttnUpBlock2D",
61
+ "CrossAttnUpBlock2D"
62
+ ],
63
+ "upcast_attention": false,
64
+ "use_linear_projection": false
65
+ }
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43625b82a0fec7006e96260939b7c4418edb070a8bf85502012d19e8fc185008
3
+ size 1719322405
vae/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.19.3",
4
+ "_name_or_path": "experiments/pretrained_models/anything-v4.0/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "layers_per_block": 2,
22
+ "norm_num_groups": 32,
23
+ "out_channels": 3,
24
+ "sample_size": 512,
25
+ "scaling_factor": 0.18215,
26
+ "up_block_types": [
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D"
31
+ ]
32
+ }
vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f7d79fc422ffd14a4addce422b50a74ef86679367df658884c08a16392f867
3
+ size 167403217