franciszzj commited on
Commit
733dac3
1 Parent(s): 15c807e

add virtual tryon model

Browse files
ckpts/densepose/Base-DensePose-RCNN-FPN.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VERSION: 2
2
+ MODEL:
3
+ META_ARCHITECTURE: "GeneralizedRCNN"
4
+ BACKBONE:
5
+ NAME: "build_resnet_fpn_backbone"
6
+ RESNETS:
7
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
8
+ FPN:
9
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
10
+ ANCHOR_GENERATOR:
11
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
12
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
13
+ RPN:
14
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
16
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
17
+ # Detectron1 uses 2000 proposals per-batch,
18
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20
+ POST_NMS_TOPK_TRAIN: 1000
21
+ POST_NMS_TOPK_TEST: 1000
22
+
23
+ DENSEPOSE_ON: True
24
+ ROI_HEADS:
25
+ NAME: "DensePoseROIHeads"
26
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
27
+ NUM_CLASSES: 1
28
+ ROI_BOX_HEAD:
29
+ NAME: "FastRCNNConvFCHead"
30
+ NUM_FC: 2
31
+ POOLER_RESOLUTION: 7
32
+ POOLER_SAMPLING_RATIO: 2
33
+ POOLER_TYPE: "ROIAlign"
34
+ ROI_DENSEPOSE_HEAD:
35
+ NAME: "DensePoseV1ConvXHead"
36
+ POOLER_TYPE: "ROIAlign"
37
+ NUM_COARSE_SEGM_CHANNELS: 2
38
+ DATASETS:
39
+ TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
40
+ TEST: ("densepose_coco_2014_minival",)
41
+ SOLVER:
42
+ IMS_PER_BATCH: 16
43
+ BASE_LR: 0.01
44
+ STEPS: (60000, 80000)
45
+ MAX_ITER: 90000
46
+ WARMUP_FACTOR: 0.1
47
+ INPUT:
48
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
ckpts/densepose/densepose_rcnn_R_50_FPN_s1x.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base-DensePose-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ RESNETS:
5
+ DEPTH: 50
6
+ SOLVER:
7
+ MAX_ITER: 130000
8
+ STEPS: (100000, 120000)
ckpts/densepose/model_final_162be9.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a7382001b16e453bad95ca9dbc68ae8f2b839b304cf90eaf5c27fbdb4dae91
3
+ size 255757821
ckpts/schp/exp-schp-201908261155-lip.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fa3254ceeb74c8435458994a64b522fb439a3635b7b86ff470457e0413da00
3
+ size 267449349
ckpts/schp/exp-schp-201908301523-atr.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d7c91ce3b4e7133df56b599fc817b533e3439c5e8d282a59126d2fda339a2a
3
+ size 267445237
ckpts/stable-diffusion-inpainting/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.6.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "set_alpha_to_one": false,
10
+ "steps_offset": 1,
11
+ "trained_betas": null,
12
+ "skip_prk_steps": true
13
+ }
ckpts/stable-diffusion-inpainting/unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0.dev0",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 9,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
ckpts/stable-diffusion-inpainting/vae/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.6.0.dev0",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
ckpts/stable-diffusion-xl-1.0-inpainting-0.1/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.21.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "interpolation_type": "linear",
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "epsilon",
11
+ "sample_max_value": 1.0,
12
+ "set_alpha_to_one": false,
13
+ "skip_prk_steps": true,
14
+ "steps_offset": 1,
15
+ "timestep_spacing": "leading",
16
+ "trained_betas": null,
17
+ "use_karras_sigmas": false
18
+ }
ckpts/stable-diffusion-xl-1.0-inpainting-0.1/unet/config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.21.0.dev0",
4
+ "_name_or_path": "valhalla/sdxl-inpaint-ema",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": "text_time",
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": 256,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20
13
+ ],
14
+ "attention_type": "default",
15
+ "block_out_channels": [
16
+ 320,
17
+ 640,
18
+ 1280
19
+ ],
20
+ "center_input_sample": false,
21
+ "class_embed_type": null,
22
+ "class_embeddings_concat": false,
23
+ "conv_in_kernel": 3,
24
+ "conv_out_kernel": 3,
25
+ "cross_attention_dim": 2048,
26
+ "cross_attention_norm": null,
27
+ "decay": 0.9999,
28
+ "down_block_types": [
29
+ "DownBlock2D",
30
+ "CrossAttnDownBlock2D",
31
+ "CrossAttnDownBlock2D"
32
+ ],
33
+ "downsample_padding": 1,
34
+ "dual_cross_attention": false,
35
+ "encoder_hid_dim": null,
36
+ "encoder_hid_dim_type": null,
37
+ "flip_sin_to_cos": true,
38
+ "freq_shift": 0,
39
+ "in_channels": 9,
40
+ "inv_gamma": 1.0,
41
+ "layers_per_block": 2,
42
+ "mid_block_only_cross_attention": null,
43
+ "mid_block_scale_factor": 1,
44
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
45
+ "min_decay": 0.0,
46
+ "norm_eps": 1e-05,
47
+ "norm_num_groups": 32,
48
+ "num_attention_heads": null,
49
+ "num_class_embeds": null,
50
+ "only_cross_attention": false,
51
+ "optimization_step": 37000,
52
+ "out_channels": 4,
53
+ "power": 0.6666666666666666,
54
+ "projection_class_embeddings_input_dim": 2816,
55
+ "resnet_out_scale_factor": 1.0,
56
+ "resnet_skip_time_act": false,
57
+ "resnet_time_scale_shift": "default",
58
+ "sample_size": 128,
59
+ "time_cond_proj_dim": null,
60
+ "time_embedding_act_fn": null,
61
+ "time_embedding_dim": null,
62
+ "time_embedding_type": "positional",
63
+ "timestep_post_act": null,
64
+ "transformer_layers_per_block": [
65
+ 1,
66
+ 2,
67
+ 10
68
+ ],
69
+ "up_block_types": [
70
+ "CrossAttnUpBlock2D",
71
+ "CrossAttnUpBlock2D",
72
+ "UpBlock2D"
73
+ ],
74
+ "upcast_attention": null,
75
+ "update_after_step": 0,
76
+ "use_ema_warmup": false,
77
+ "use_linear_projection": true
78
+ }
ckpts/stable-diffusion-xl-1.0-inpainting-0.1/vae/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.21.0.dev0",
4
+ "_name_or_path": "madebyollin/sdxl-vae-fp16-fix",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": false,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "layers_per_block": 2,
22
+ "norm_num_groups": 32,
23
+ "out_channels": 3,
24
+ "sample_size": 512,
25
+ "scaling_factor": 0.13025,
26
+ "up_block_types": [
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D"
31
+ ]
32
+ }
ckpts/virtual_tryon.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d4b31dafac19ec5cc4ccbe307d4eb61b76a52f98aaef78fc02b4405c406351
3
+ size 7211553158