yevvonlim commited on
Commit
6ad7c1b
·
verified ·
1 Parent(s): c9a1ec7

End of training

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +62 -0
  2. checkpoint-14500/motion_encoder/config.json +69 -0
  3. checkpoint-14500/motion_encoder/model.safetensors +3 -0
  4. checkpoint-14500/optimizer.bin +3 -0
  5. checkpoint-14500/random_states_0.pkl +3 -0
  6. checkpoint-14500/scheduler.bin +3 -0
  7. checkpoint-14500/unet/config.json +38 -0
  8. checkpoint-14500/unet/diffusion_pytorch_model.safetensors +3 -0
  9. checkpoint-15000/motion_encoder/config.json +69 -0
  10. checkpoint-15000/motion_encoder/model.safetensors +3 -0
  11. checkpoint-15000/optimizer.bin +3 -0
  12. checkpoint-15000/random_states_0.pkl +3 -0
  13. checkpoint-15000/scheduler.bin +3 -0
  14. checkpoint-15000/unet/config.json +38 -0
  15. checkpoint-15000/unet/diffusion_pytorch_model.safetensors +3 -0
  16. config.json +69 -0
  17. diffusion_pytorch_model.safetensors +3 -0
  18. model.safetensors +3 -0
  19. validation_images/step_10000_val_recon_0.mp4 +3 -0
  20. validation_images/step_10000_val_source_0.mp4 +3 -0
  21. validation_images/step_1000_val_recon_0.mp4 +3 -0
  22. validation_images/step_1000_val_source_0.mp4 +3 -0
  23. validation_images/step_10500_val_recon_0.mp4 +3 -0
  24. validation_images/step_10500_val_source_0.mp4 +3 -0
  25. validation_images/step_11000_val_recon_0.mp4 +3 -0
  26. validation_images/step_11000_val_source_0.mp4 +3 -0
  27. validation_images/step_11500_val_recon_0.mp4 +3 -0
  28. validation_images/step_11500_val_source_0.mp4 +3 -0
  29. validation_images/step_12000_val_recon_0.mp4 +3 -0
  30. validation_images/step_12000_val_source_0.mp4 +3 -0
  31. validation_images/step_12500_val_recon_0.mp4 +3 -0
  32. validation_images/step_12500_val_source_0.mp4 +3 -0
  33. validation_images/step_13000_val_recon_0.mp4 +3 -0
  34. validation_images/step_13000_val_source_0.mp4 +3 -0
  35. validation_images/step_13500_val_recon_0.mp4 +3 -0
  36. validation_images/step_13500_val_source_0.mp4 +3 -0
  37. validation_images/step_14000_val_recon_0.mp4 +3 -0
  38. validation_images/step_14000_val_source_0.mp4 +3 -0
  39. validation_images/step_14500_val_recon_0.mp4 +3 -0
  40. validation_images/step_14500_val_source_0.mp4 +3 -0
  41. validation_images/step_15000_val_recon_0.mp4 +3 -0
  42. validation_images/step_15000_val_source_0.mp4 +3 -0
  43. validation_images/step_1500_val_recon_0.mp4 +3 -0
  44. validation_images/step_1500_val_source_0.mp4 +3 -0
  45. validation_images/step_1_val_recon_0.mp4 +3 -0
  46. validation_images/step_1_val_source_0.mp4 +3 -0
  47. validation_images/step_2000_val_recon_0.mp4 +3 -0
  48. validation_images/step_2000_val_source_0.mp4 +3 -0
  49. validation_images/step_2500_val_recon_0.mp4 +3 -0
  50. validation_images/step_2500_val_source_0.mp4 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,65 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ validation_images/step_10000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ validation_images/step_10000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ validation_images/step_1000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
39
+ validation_images/step_1000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
40
+ validation_images/step_10500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
41
+ validation_images/step_10500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
42
+ validation_images/step_11000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
43
+ validation_images/step_11000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
44
+ validation_images/step_11500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
45
+ validation_images/step_11500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
46
+ validation_images/step_12000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
47
+ validation_images/step_12000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
48
+ validation_images/step_12500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
49
+ validation_images/step_12500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
50
+ validation_images/step_13000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
51
+ validation_images/step_13000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
52
+ validation_images/step_13500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
53
+ validation_images/step_13500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
54
+ validation_images/step_14000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
55
+ validation_images/step_14000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
56
+ validation_images/step_14500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
57
+ validation_images/step_14500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
58
+ validation_images/step_15000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
59
+ validation_images/step_15000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
60
+ validation_images/step_1500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
61
+ validation_images/step_1500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
62
+ validation_images/step_1_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
63
+ validation_images/step_1_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
64
+ validation_images/step_2000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
65
+ validation_images/step_2000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
66
+ validation_images/step_2500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
67
+ validation_images/step_2500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
68
+ validation_images/step_3000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
69
+ validation_images/step_3000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
70
+ validation_images/step_3500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
71
+ validation_images/step_3500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
72
+ validation_images/step_4000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
73
+ validation_images/step_4000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
74
+ validation_images/step_4500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
75
+ validation_images/step_4500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
76
+ validation_images/step_5000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
77
+ validation_images/step_5000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
78
+ validation_images/step_500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
79
+ validation_images/step_500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
80
+ validation_images/step_5500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
81
+ validation_images/step_5500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
82
+ validation_images/step_6000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
83
+ validation_images/step_6000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
84
+ validation_images/step_6500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
85
+ validation_images/step_6500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
86
+ validation_images/step_7000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
87
+ validation_images/step_7000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
88
+ validation_images/step_7500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
89
+ validation_images/step_7500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
90
+ validation_images/step_8000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
91
+ validation_images/step_8000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
92
+ validation_images/step_8500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
93
+ validation_images/step_8500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
94
+ validation_images/step_9000_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
95
+ validation_images/step_9000_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
96
+ validation_images/step_9500_val_recon_0.mp4 filter=lfs diff=lfs merge=lfs -text
97
+ validation_images/step_9500_val_source_0.mp4 filter=lfs diff=lfs merge=lfs -text
checkpoint-14500/motion_encoder/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MotionAssociativeMemoryEncoder"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "hidden_act": "gelu_fast",
7
+ "hidden_dropout_prob": 0.0,
8
+ "hidden_size": 768,
9
+ "image_size": [
10
+ 72,
11
+ 128
12
+ ],
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-06,
16
+ "model_type": "vivit",
17
+ "num_attention_heads": 8,
18
+ "num_channels": 4,
19
+ "num_frames": 24,
20
+ "num_hidden_layers": 6,
21
+ "qkv_bias": true,
22
+ "query_dims": [
23
+ 160,
24
+ 320,
25
+ 640
26
+ ],
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.47.1",
29
+ "tubelet_size": [
30
+ 2,
31
+ 9,
32
+ 16
33
+ ],
34
+ "unet_attention_names": {
35
+ "down_blocks_0_attentions_0_temporal_transformer_blocks_0_attn1": 320,
36
+ "down_blocks_0_attentions_0_transformer_blocks_0_attn1": 320,
37
+ "down_blocks_0_attentions_1_temporal_transformer_blocks_0_attn1": 320,
38
+ "down_blocks_0_attentions_1_transformer_blocks_0_attn1": 320,
39
+ "down_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 640,
40
+ "down_blocks_1_attentions_0_transformer_blocks_0_attn1": 640,
41
+ "down_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 640,
42
+ "down_blocks_1_attentions_1_transformer_blocks_0_attn1": 640,
43
+ "down_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
44
+ "down_blocks_2_attentions_0_transformer_blocks_0_attn1": 1280,
45
+ "down_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
46
+ "down_blocks_2_attentions_1_transformer_blocks_0_attn1": 1280,
47
+ "mid_block_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
48
+ "mid_block_attentions_0_transformer_blocks_0_attn1": 1280,
49
+ "up_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
50
+ "up_blocks_1_attentions_0_transformer_blocks_0_attn1": 1280,
51
+ "up_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
52
+ "up_blocks_1_attentions_1_transformer_blocks_0_attn1": 1280,
53
+ "up_blocks_1_attentions_2_temporal_transformer_blocks_0_attn1": 1280,
54
+ "up_blocks_1_attentions_2_transformer_blocks_0_attn1": 1280,
55
+ "up_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 640,
56
+ "up_blocks_2_attentions_0_transformer_blocks_0_attn1": 640,
57
+ "up_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 640,
58
+ "up_blocks_2_attentions_1_transformer_blocks_0_attn1": 640,
59
+ "up_blocks_2_attentions_2_temporal_transformer_blocks_0_attn1": 640,
60
+ "up_blocks_2_attentions_2_transformer_blocks_0_attn1": 640,
61
+ "up_blocks_3_attentions_0_temporal_transformer_blocks_0_attn1": 320,
62
+ "up_blocks_3_attentions_0_transformer_blocks_0_attn1": 320,
63
+ "up_blocks_3_attentions_1_temporal_transformer_blocks_0_attn1": 320,
64
+ "up_blocks_3_attentions_1_transformer_blocks_0_attn1": 320,
65
+ "up_blocks_3_attentions_2_temporal_transformer_blocks_0_attn1": 320,
66
+ "up_blocks_3_attentions_2_transformer_blocks_0_attn1": 320
67
+ },
68
+ "zero_init_theta": false
69
+ }
checkpoint-14500/motion_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe5df8acf8407642549636a7eb07993b1b33eede151a642c47f762bb3c79257
3
+ size 88022632
checkpoint-14500/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679118bd6c1c6cb8ea87d290636096ee6264dbb9d26e1cdad906c2d5bed2f78a
3
+ size 486517948
checkpoint-14500/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4b9d43acb279b734ffc695b741976f18ebe7c77d97372c3eed08acf6f6f8aa
3
+ size 14408
checkpoint-14500/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a689e735f57fd2077fcd0b978bf0fdd82476c3bfa187c6f6f29c2bf20fbc8377
3
+ size 1000
checkpoint-14500/unet/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MAMUNetSpatioTemporalConditionModel",
3
+ "_diffusers_version": "0.32.1",
4
+ "_name_or_path": "/home/suraj_huggingface_co/.cache/huggingface/hub/models--diffusers--svd-xt/snapshots/9703ded20c957c340781ee710b75660826deb487/unet",
5
+ "addition_time_embed_dim": 256,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "cross_attention_dim": 1024,
13
+ "down_block_types": [
14
+ "MAMCrossAttnDownBlockSpatioTemporal",
15
+ "MAMCrossAttnDownBlockSpatioTemporal",
16
+ "MAMCrossAttnDownBlockSpatioTemporal",
17
+ "MAMDownBlockSpatioTemporal"
18
+ ],
19
+ "in_channels": 8,
20
+ "layers_per_block": 2,
21
+ "num_attention_heads": [
22
+ 5,
23
+ 10,
24
+ 20,
25
+ 20
26
+ ],
27
+ "num_frames": 25,
28
+ "out_channels": 4,
29
+ "projection_class_embeddings_input_dim": 768,
30
+ "sample_size": 96,
31
+ "transformer_layers_per_block": 1,
32
+ "up_block_types": [
33
+ "MAMUpBlockSpatioTemporal",
34
+ "MAMCrossAttnUpBlockSpatioTemporal",
35
+ "MAMCrossAttnUpBlockSpatioTemporal",
36
+ "MAMCrossAttnUpBlockSpatioTemporal"
37
+ ]
38
+ }
checkpoint-14500/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e06634d0972a340850437dae50c50d7a78bfa66a22d07d44459123fb742cc7b5
3
+ size 5332308028
checkpoint-15000/motion_encoder/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MotionAssociativeMemoryEncoder"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "hidden_act": "gelu_fast",
7
+ "hidden_dropout_prob": 0.0,
8
+ "hidden_size": 768,
9
+ "image_size": [
10
+ 72,
11
+ 128
12
+ ],
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-06,
16
+ "model_type": "vivit",
17
+ "num_attention_heads": 8,
18
+ "num_channels": 4,
19
+ "num_frames": 24,
20
+ "num_hidden_layers": 6,
21
+ "qkv_bias": true,
22
+ "query_dims": [
23
+ 160,
24
+ 320,
25
+ 640
26
+ ],
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.47.1",
29
+ "tubelet_size": [
30
+ 2,
31
+ 9,
32
+ 16
33
+ ],
34
+ "unet_attention_names": {
35
+ "down_blocks_0_attentions_0_temporal_transformer_blocks_0_attn1": 320,
36
+ "down_blocks_0_attentions_0_transformer_blocks_0_attn1": 320,
37
+ "down_blocks_0_attentions_1_temporal_transformer_blocks_0_attn1": 320,
38
+ "down_blocks_0_attentions_1_transformer_blocks_0_attn1": 320,
39
+ "down_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 640,
40
+ "down_blocks_1_attentions_0_transformer_blocks_0_attn1": 640,
41
+ "down_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 640,
42
+ "down_blocks_1_attentions_1_transformer_blocks_0_attn1": 640,
43
+ "down_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
44
+ "down_blocks_2_attentions_0_transformer_blocks_0_attn1": 1280,
45
+ "down_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
46
+ "down_blocks_2_attentions_1_transformer_blocks_0_attn1": 1280,
47
+ "mid_block_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
48
+ "mid_block_attentions_0_transformer_blocks_0_attn1": 1280,
49
+ "up_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
50
+ "up_blocks_1_attentions_0_transformer_blocks_0_attn1": 1280,
51
+ "up_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
52
+ "up_blocks_1_attentions_1_transformer_blocks_0_attn1": 1280,
53
+ "up_blocks_1_attentions_2_temporal_transformer_blocks_0_attn1": 1280,
54
+ "up_blocks_1_attentions_2_transformer_blocks_0_attn1": 1280,
55
+ "up_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 640,
56
+ "up_blocks_2_attentions_0_transformer_blocks_0_attn1": 640,
57
+ "up_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 640,
58
+ "up_blocks_2_attentions_1_transformer_blocks_0_attn1": 640,
59
+ "up_blocks_2_attentions_2_temporal_transformer_blocks_0_attn1": 640,
60
+ "up_blocks_2_attentions_2_transformer_blocks_0_attn1": 640,
61
+ "up_blocks_3_attentions_0_temporal_transformer_blocks_0_attn1": 320,
62
+ "up_blocks_3_attentions_0_transformer_blocks_0_attn1": 320,
63
+ "up_blocks_3_attentions_1_temporal_transformer_blocks_0_attn1": 320,
64
+ "up_blocks_3_attentions_1_transformer_blocks_0_attn1": 320,
65
+ "up_blocks_3_attentions_2_temporal_transformer_blocks_0_attn1": 320,
66
+ "up_blocks_3_attentions_2_transformer_blocks_0_attn1": 320
67
+ },
68
+ "zero_init_theta": false
69
+ }
checkpoint-15000/motion_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ba3d5b69587a2240b1f53fb04d0399571b2ead8fcb46b1579d7c69bcbdbc7d7
3
+ size 88022632
checkpoint-15000/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5046d542a277d6728f8db4494db4bb90357ca82e9e60c7435b747ff6f8c5799
3
+ size 486517948
checkpoint-15000/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff92fecc7170fa7d2430a0c32394577417f68e9ea65f9ea1145ffbf63d56653
3
+ size 14408
checkpoint-15000/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5488e48edf134f11b30bc97f9db87ef384ca8eea75a0471520f372e45b82a0
3
+ size 1000
checkpoint-15000/unet/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "MAMUNetSpatioTemporalConditionModel",
3
+ "_diffusers_version": "0.32.1",
4
+ "_name_or_path": "/home/suraj_huggingface_co/.cache/huggingface/hub/models--diffusers--svd-xt/snapshots/9703ded20c957c340781ee710b75660826deb487/unet",
5
+ "addition_time_embed_dim": 256,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "cross_attention_dim": 1024,
13
+ "down_block_types": [
14
+ "MAMCrossAttnDownBlockSpatioTemporal",
15
+ "MAMCrossAttnDownBlockSpatioTemporal",
16
+ "MAMCrossAttnDownBlockSpatioTemporal",
17
+ "MAMDownBlockSpatioTemporal"
18
+ ],
19
+ "in_channels": 8,
20
+ "layers_per_block": 2,
21
+ "num_attention_heads": [
22
+ 5,
23
+ 10,
24
+ 20,
25
+ 20
26
+ ],
27
+ "num_frames": 25,
28
+ "out_channels": 4,
29
+ "projection_class_embeddings_input_dim": 768,
30
+ "sample_size": 96,
31
+ "transformer_layers_per_block": 1,
32
+ "up_block_types": [
33
+ "MAMUpBlockSpatioTemporal",
34
+ "MAMCrossAttnUpBlockSpatioTemporal",
35
+ "MAMCrossAttnUpBlockSpatioTemporal",
36
+ "MAMCrossAttnUpBlockSpatioTemporal"
37
+ ]
38
+ }
checkpoint-15000/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00714c5e36a575d4252d3cc9573179a9e7f20b455795f766ba6f3d647232ffc
3
+ size 5332308028
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MotionAssociativeMemoryEncoder"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "hidden_act": "gelu_fast",
7
+ "hidden_dropout_prob": 0.0,
8
+ "hidden_size": 768,
9
+ "image_size": [
10
+ 72,
11
+ 128
12
+ ],
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-06,
16
+ "model_type": "vivit",
17
+ "num_attention_heads": 8,
18
+ "num_channels": 4,
19
+ "num_frames": 24,
20
+ "num_hidden_layers": 6,
21
+ "qkv_bias": true,
22
+ "query_dims": [
23
+ 160,
24
+ 320,
25
+ 640
26
+ ],
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.47.1",
29
+ "tubelet_size": [
30
+ 2,
31
+ 9,
32
+ 16
33
+ ],
34
+ "unet_attention_names": {
35
+ "down_blocks_0_attentions_0_temporal_transformer_blocks_0_attn1": 320,
36
+ "down_blocks_0_attentions_0_transformer_blocks_0_attn1": 320,
37
+ "down_blocks_0_attentions_1_temporal_transformer_blocks_0_attn1": 320,
38
+ "down_blocks_0_attentions_1_transformer_blocks_0_attn1": 320,
39
+ "down_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 640,
40
+ "down_blocks_1_attentions_0_transformer_blocks_0_attn1": 640,
41
+ "down_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 640,
42
+ "down_blocks_1_attentions_1_transformer_blocks_0_attn1": 640,
43
+ "down_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
44
+ "down_blocks_2_attentions_0_transformer_blocks_0_attn1": 1280,
45
+ "down_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
46
+ "down_blocks_2_attentions_1_transformer_blocks_0_attn1": 1280,
47
+ "mid_block_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
48
+ "mid_block_attentions_0_transformer_blocks_0_attn1": 1280,
49
+ "up_blocks_1_attentions_0_temporal_transformer_blocks_0_attn1": 1280,
50
+ "up_blocks_1_attentions_0_transformer_blocks_0_attn1": 1280,
51
+ "up_blocks_1_attentions_1_temporal_transformer_blocks_0_attn1": 1280,
52
+ "up_blocks_1_attentions_1_transformer_blocks_0_attn1": 1280,
53
+ "up_blocks_1_attentions_2_temporal_transformer_blocks_0_attn1": 1280,
54
+ "up_blocks_1_attentions_2_transformer_blocks_0_attn1": 1280,
55
+ "up_blocks_2_attentions_0_temporal_transformer_blocks_0_attn1": 640,
56
+ "up_blocks_2_attentions_0_transformer_blocks_0_attn1": 640,
57
+ "up_blocks_2_attentions_1_temporal_transformer_blocks_0_attn1": 640,
58
+ "up_blocks_2_attentions_1_transformer_blocks_0_attn1": 640,
59
+ "up_blocks_2_attentions_2_temporal_transformer_blocks_0_attn1": 640,
60
+ "up_blocks_2_attentions_2_transformer_blocks_0_attn1": 640,
61
+ "up_blocks_3_attentions_0_temporal_transformer_blocks_0_attn1": 320,
62
+ "up_blocks_3_attentions_0_transformer_blocks_0_attn1": 320,
63
+ "up_blocks_3_attentions_1_temporal_transformer_blocks_0_attn1": 320,
64
+ "up_blocks_3_attentions_1_transformer_blocks_0_attn1": 320,
65
+ "up_blocks_3_attentions_2_temporal_transformer_blocks_0_attn1": 320,
66
+ "up_blocks_3_attentions_2_transformer_blocks_0_attn1": 320
67
+ },
68
+ "zero_init_theta": false
69
+ }
diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c00714c5e36a575d4252d3cc9573179a9e7f20b455795f766ba6f3d647232ffc
3
+ size 5332308028
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ba3d5b69587a2240b1f53fb04d0399571b2ead8fcb46b1579d7c69bcbdbc7d7
3
+ size 88022632
validation_images/step_10000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb8a65ac760b76fe0ba9bc017496875616d828667437d59a1038b0927d70726
3
+ size 221448
validation_images/step_10000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc66945c7f0a78185c7e6ceade859330cf89f2e377b70f82a8acfb2377011b
3
+ size 326942
validation_images/step_1000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70fd0aaa7ae33ee346848e050baa520761f061209ef7f0e8ef8824e515effe39
3
+ size 225851
validation_images/step_1000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47e30a1a9d4391052f1c9735761c3fb9f2af48072f6104dd7207cf1ca36bd9d
3
+ size 388107
validation_images/step_10500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9abeb1bc1246d5b4fddc522b10c962ca9efe6e8477e3633bbcced32a2d7ef6b
3
+ size 225536
validation_images/step_10500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f39d61fd009da9d9b0b2fa8a3e9306183900fe6cc101d797de4d08d35aeca4
3
+ size 373698
validation_images/step_11000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2638db21dc56d66273c301f6978ad3de5b71cbfa50eb11468b9c55b6baf9fb6
3
+ size 291524
validation_images/step_11000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc66945c7f0a78185c7e6ceade859330cf89f2e377b70f82a8acfb2377011b
3
+ size 326942
validation_images/step_11500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bc4f18ca8b50c90ac27c613818bf8ca79bddc421a126c8b5b85672d600a760
3
+ size 244213
validation_images/step_11500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33633bbbe57f9bb50799ea41b671492355c9c2024f8539c111b19a889c2b269b
3
+ size 434816
validation_images/step_12000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cc25ea62b8f77f77daf5f92dc013556e134a11cb387e93458b4e416d7b771f3
3
+ size 655657
validation_images/step_12000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc66945c7f0a78185c7e6ceade859330cf89f2e377b70f82a8acfb2377011b
3
+ size 326942
validation_images/step_12500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ad198a561643481a036e8533a45610b709c4a8d48f87d31ffe1fa625f7e2e8
3
+ size 277672
validation_images/step_12500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc66945c7f0a78185c7e6ceade859330cf89f2e377b70f82a8acfb2377011b
3
+ size 326942
validation_images/step_13000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83180a5a6ae1046706c07f20be50b685a55874276d3b168b52269fc1d51d5e00
3
+ size 283763
validation_images/step_13000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473d6a231870302a69a14637357c97fb779f9c137513b7896dfb39cacbe820d9
3
+ size 306599
validation_images/step_13500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d688927d8b8226ff2a46191bd135ba423a9a10226ac5714dc344b9c63b8a958
3
+ size 251674
validation_images/step_13500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a647b759546969482e39732076f279e8cd959df340bfc18e025c98ee2f9b11
3
+ size 375376
validation_images/step_14000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d53aeea135d965fbc1f6f3c513eb06bea35fe94019a1bbf293c189fae8ae5c
3
+ size 269107
validation_images/step_14000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47e30a1a9d4391052f1c9735761c3fb9f2af48072f6104dd7207cf1ca36bd9d
3
+ size 388107
validation_images/step_14500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4339cdba7e518fc9babfc37565d38f83ce5db630e358463fcfc9cea0d8533410
3
+ size 239801
validation_images/step_14500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f39d61fd009da9d9b0b2fa8a3e9306183900fe6cc101d797de4d08d35aeca4
3
+ size 373698
validation_images/step_15000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad68dd028150b329105d5b242cc13c566ccac3d3b43131766c2cf72f0f926dfe
3
+ size 242023
validation_images/step_15000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a647b759546969482e39732076f279e8cd959df340bfc18e025c98ee2f9b11
3
+ size 375376
validation_images/step_1500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa4a6119173e59ebc9824150e934eb026d884fcc9d7aee6863b3b6bf7dc48517
3
+ size 223194
validation_images/step_1500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473d6a231870302a69a14637357c97fb779f9c137513b7896dfb39cacbe820d9
3
+ size 306599
validation_images/step_1_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27466b31adfaf25a2c797374e942dcb441dceded556667592be04a3d2e65b1ff
3
+ size 686909
validation_images/step_1_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dc66945c7f0a78185c7e6ceade859330cf89f2e377b70f82a8acfb2377011b
3
+ size 326942
validation_images/step_2000_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d89e90b2cb96a5e4e6f5e628a151ab2d9faf4ce4e1f3f1d29aaee1b2aafb4b0
3
+ size 207004
validation_images/step_2000_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33633bbbe57f9bb50799ea41b671492355c9c2024f8539c111b19a889c2b269b
3
+ size 434816
validation_images/step_2500_val_recon_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81bf386c07bfefd22b38901f0781df55016dbec046a89a05df3ed163170bf421
3
+ size 235320
validation_images/step_2500_val_source_0.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473d6a231870302a69a14637357c97fb779f9c137513b7896dfb39cacbe820d9
3
+ size 306599