ghunkins commited on
Commit
7fb23d7
1 Parent(s): 56b8162

Upload HunyuanVideoPipeline

Browse files
model_index.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_class_name": "HunyuanVideoPipeline",
3
- "_diffusers_version": "0.31.0",
4
- "_name_or_path": "magespace/hyvideo-diffusers-dev",
5
  "scheduler": [
6
  "diffusers",
7
  "FlowMatchEulerDiscreteScheduler"
 
1
  {
2
  "_class_name": "HunyuanVideoPipeline",
3
+ "_diffusers_version": "0.32.0.dev0",
 
4
  "scheduler": [
5
  "diffusers",
6
  "FlowMatchEulerDiscreteScheduler"
scheduler/scheduler_config.json CHANGED
@@ -1,11 +1,16 @@
1
  {
2
  "_class_name": "FlowMatchEulerDiscreteScheduler",
3
- "_diffusers_version": "0.31.0",
4
  "base_image_seq_len": 256,
5
  "base_shift": 0.5,
 
6
  "max_image_seq_len": 4096,
7
  "max_shift": 1.15,
8
  "num_train_timesteps": 1000,
9
  "shift": 7.0,
10
- "use_dynamic_shifting": false
 
 
 
 
11
  }
 
1
  {
2
  "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.32.0.dev0",
4
  "base_image_seq_len": 256,
5
  "base_shift": 0.5,
6
+ "invert_sigmas": false,
7
  "max_image_seq_len": 4096,
8
  "max_shift": 1.15,
9
  "num_train_timesteps": 1000,
10
  "shift": 7.0,
11
+ "shift_terminal": null,
12
+ "use_beta_sigmas": false,
13
+ "use_dynamic_shifting": false,
14
+ "use_exponential_sigmas": false,
15
+ "use_karras_sigmas": false
16
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/models--magespace--hyvideo-diffusers/snapshots/ae09a3d1e5306c922f86a6aaf8db5b691947d204/text_encoder",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -24,7 +23,7 @@
24
  "rope_theta": 500000.0,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
- "transformers_version": "4.47.0",
28
  "use_cache": true,
29
  "vocab_size": 128320
30
  }
 
1
  {
 
2
  "architectures": [
3
  "LlamaForCausalLM"
4
  ],
 
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.46.3",
27
  "use_cache": true,
28
  "vocab_size": 128320
29
  }
text_encoder/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
5
- "transformers_version": "4.47.0"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 128000,
4
  "eos_token_id": 128001,
5
+ "transformers_version": "4.46.3"
6
  }
text_encoder_2/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/models--magespace--hyvideo-diffusers/snapshots/ae09a3d1e5306c922f86a6aaf8db5b691947d204/text_encoder_2",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -19,7 +19,7 @@
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
- "torch_dtype": "bfloat16",
23
- "transformers_version": "4.47.0",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 768,
22
+ "torch_dtype": "float16",
23
+ "transformers_version": "4.46.3",
24
  "vocab_size": 49408
25
  }
text_encoder_2/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03532a1b8bb2f02b16b9b290273caf9c8793a0a268517c104eee30f7d1829120
3
- size 246144352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
+ size 246144152
tokenizer/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1cb63338521d084aba234e273e56a3a0bb0e41f6438e205a2cbc1db8f02fd1f
3
- size 17210368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2c593db4aa75b17a42c1f74d7cc38e257eaeed222e6a52674c65544165dcbaa
3
+ size 17210098
tokenizer/tokenizer_config.json CHANGED
@@ -2080,7 +2080,6 @@
2080
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2081
  "clean_up_tokenization_spaces": true,
2082
  "eos_token": "<|end_of_text|>",
2083
- "extra_special_tokens": {},
2084
  "legacy": true,
2085
  "model_input_names": [
2086
  "input_ids",
 
2080
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2081
  "clean_up_tokenization_spaces": true,
2082
  "eos_token": "<|end_of_text|>",
 
2083
  "legacy": true,
2084
  "model_input_names": [
2085
  "input_ids",
transformer/config.json CHANGED
@@ -1,31 +1,23 @@
1
  {
2
- "_class_name": "HYVideoDiffusionTransformer",
3
- "_diffusers_version": "0.31.0",
4
- "_name_or_path": "/root/.cache/huggingface/hub/models--magespace--hyvideo-diffusers/snapshots/ae09a3d1e5306c922f86a6aaf8db5b691947d204/transformer",
5
- "guidance_embed": true,
6
- "heads_num": 24,
7
- "hidden_size": 3072,
8
  "in_channels": 16,
9
- "mlp_act_type": "gelu_tanh",
10
- "mlp_width_ratio": 4,
11
- "mm_double_blocks_depth": 20,
12
- "mm_single_blocks_depth": 40,
 
13
  "out_channels": 16,
14
- "patch_size": [
15
- 1,
16
- 2,
17
- 2
18
- ],
19
- "qk_norm": true,
20
- "qk_norm_type": "rms",
21
- "qkv_bias": true,
22
  "rope_dim_list": [
23
  16,
24
  56,
25
  56
26
  ],
27
- "text_projection": "single_refiner",
28
- "text_states_dim": 4096,
29
- "text_states_dim_2": 768,
30
- "use_attention_mask": true
31
  }
 
1
  {
2
+ "_class_name": "HunyuanVideoTransformer3DModel",
3
+ "_diffusers_version": "0.32.0.dev0",
4
+ "attention_head_dim": 128,
5
+ "guidance_embeds": true,
 
 
6
  "in_channels": 16,
7
+ "mlp_ratio": 4.0,
8
+ "num_attention_heads": 24,
9
+ "num_layers": 20,
10
+ "num_refiner_layers": 2,
11
+ "num_single_layers": 40,
12
  "out_channels": 16,
13
+ "patch_size": 2,
14
+ "patch_size_t": 1,
15
+ "qk_norm": "rms_norm",
 
 
 
 
 
16
  "rope_dim_list": [
17
  16,
18
  56,
19
  56
20
  ],
21
+ "text_embed_dim": 4096,
22
+ "text_embed_dim_2": 768
 
 
23
  }
transformer/diffusion_pytorch_model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf69a571f0a78e0f04ff429152525ff4082db7c9aa4496d345c53fb599b0f9c1
3
- size 9972044720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260147e5816e920928beedf5bafddb308a3a9c943f560feffe41b6ae44380704
3
+ size 9972080096
transformer/diffusion_pytorch_model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:895efacc0127c19f4d187d9a6f6c5d23d253231982487375eb8ecf64244ceaa2
3
- size 9968230568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c02025893773faf82d16904f6ef23cbfeb7c693079d0f0bd890e97948fce51a
3
+ size 9968234824
transformer/diffusion_pytorch_model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d0e43a39b9cb6b9ed411bad5c878aca15d14678a17586565fc2e84898b534c
3
- size 5701844424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d27b9eed23d493090216922836e48e4bf143b7e5e7a6cb000647523ee298020
3
+ size 5701859992
transformer/diffusion_pytorch_model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
vae/config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
- "_class_name": "AutoencoderKLCausal3D",
3
- "_diffusers_version": "0.31.0",
4
- "_name_or_path": "/root/.cache/huggingface/hub/models--magespace--hyvideo-diffusers/snapshots/ae09a3d1e5306c922f86a6aaf8db5b691947d204/vae",
5
  "act_fn": "silu",
6
  "block_out_channels": [
7
  128,
@@ -10,27 +9,24 @@
10
  512
11
  ],
12
  "down_block_types": [
13
- "DownEncoderBlockCausal3D",
14
- "DownEncoderBlockCausal3D",
15
- "DownEncoderBlockCausal3D",
16
- "DownEncoderBlockCausal3D"
17
  ],
18
- "force_upcast": true,
19
  "in_channels": 3,
20
  "latent_channels": 16,
21
  "layers_per_block": 2,
22
  "mid_block_add_attention": true,
23
  "norm_num_groups": 32,
24
  "out_channels": 3,
25
- "sample_size": 256,
26
- "sample_tsize": 64,
27
  "scaling_factor": 0.476986,
28
  "spatial_compression_ratio": 8,
29
- "time_compression_ratio": 4,
30
  "up_block_types": [
31
- "UpDecoderBlockCausal3D",
32
- "UpDecoderBlockCausal3D",
33
- "UpDecoderBlockCausal3D",
34
- "UpDecoderBlockCausal3D"
35
  ]
36
  }
 
1
  {
2
+ "_class_name": "AutoencoderKLHunyuanVideo",
3
+ "_diffusers_version": "0.32.0.dev0",
 
4
  "act_fn": "silu",
5
  "block_out_channels": [
6
  128,
 
9
  512
10
  ],
11
  "down_block_types": [
12
+ "HunyuanVideoDownBlock3D",
13
+ "HunyuanVideoDownBlock3D",
14
+ "HunyuanVideoDownBlock3D",
15
+ "HunyuanVideoDownBlock3D"
16
  ],
 
17
  "in_channels": 3,
18
  "latent_channels": 16,
19
  "layers_per_block": 2,
20
  "mid_block_add_attention": true,
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
 
 
23
  "scaling_factor": 0.476986,
24
  "spatial_compression_ratio": 8,
25
+ "temporal_compression_ratio": 4,
26
  "up_block_types": [
27
+ "HunyuanVideoUpBlock3D",
28
+ "HunyuanVideoUpBlock3D",
29
+ "HunyuanVideoUpBlock3D",
30
+ "HunyuanVideoUpBlock3D"
31
  ]
32
  }
vae/diffusion_pytorch_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ffef191d47b661d48f356ed9ed7cf391509af5f4c000ba07a75dcdc4c03c501
3
- size 492986478
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c68a6295f9034a88225fbafb1f3258291a08d57a1fdb938233fa57b1b8f4883
3
+ size 985943868