tuanio commited on
Commit
7683c6a
·
verified ·
1 Parent(s): aeceea5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +56 -0
  2. added_tokens.json +6 -0
  3. checkpoint-5000/added_tokens.json +6 -0
  4. checkpoint-5000/config.json +93 -0
  5. checkpoint-5000/generation_config.json +6 -0
  6. checkpoint-5000/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  7. checkpoint-5000/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  8. checkpoint-5000/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  9. checkpoint-5000/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  10. checkpoint-5000/global_step5000/expp_rank_0_mp_rank_00_optim_states.pt +3 -0
  11. checkpoint-5000/global_step5000/layer_0_expert_0_mp_rank_00_model_states.pt +3 -0
  12. checkpoint-5000/global_step5000/layer_0_expert_1_mp_rank_00_model_states.pt +3 -0
  13. checkpoint-5000/global_step5000/layer_0_expert_2_mp_rank_00_model_states.pt +3 -0
  14. checkpoint-5000/global_step5000/layer_0_expert_3_mp_rank_00_model_states.pt +3 -0
  15. checkpoint-5000/global_step5000/layer_10_expert_0_mp_rank_00_model_states.pt +3 -0
  16. checkpoint-5000/global_step5000/layer_10_expert_1_mp_rank_00_model_states.pt +3 -0
  17. checkpoint-5000/global_step5000/layer_10_expert_2_mp_rank_00_model_states.pt +3 -0
  18. checkpoint-5000/global_step5000/layer_10_expert_3_mp_rank_00_model_states.pt +3 -0
  19. checkpoint-5000/global_step5000/layer_11_expert_0_mp_rank_00_model_states.pt +3 -0
  20. checkpoint-5000/global_step5000/layer_11_expert_1_mp_rank_00_model_states.pt +3 -0
  21. checkpoint-5000/global_step5000/layer_11_expert_2_mp_rank_00_model_states.pt +3 -0
  22. checkpoint-5000/global_step5000/layer_11_expert_3_mp_rank_00_model_states.pt +3 -0
  23. checkpoint-5000/global_step5000/layer_1_expert_0_mp_rank_00_model_states.pt +3 -0
  24. checkpoint-5000/global_step5000/layer_1_expert_1_mp_rank_00_model_states.pt +3 -0
  25. checkpoint-5000/global_step5000/layer_1_expert_2_mp_rank_00_model_states.pt +3 -0
  26. checkpoint-5000/global_step5000/layer_1_expert_3_mp_rank_00_model_states.pt +3 -0
  27. checkpoint-5000/global_step5000/layer_2_expert_0_mp_rank_00_model_states.pt +3 -0
  28. checkpoint-5000/global_step5000/layer_2_expert_1_mp_rank_00_model_states.pt +3 -0
  29. checkpoint-5000/global_step5000/layer_2_expert_2_mp_rank_00_model_states.pt +3 -0
  30. checkpoint-5000/global_step5000/layer_2_expert_3_mp_rank_00_model_states.pt +3 -0
  31. checkpoint-5000/global_step5000/layer_3_expert_0_mp_rank_00_model_states.pt +3 -0
  32. checkpoint-5000/global_step5000/layer_3_expert_1_mp_rank_00_model_states.pt +3 -0
  33. checkpoint-5000/global_step5000/layer_3_expert_2_mp_rank_00_model_states.pt +3 -0
  34. checkpoint-5000/global_step5000/layer_3_expert_3_mp_rank_00_model_states.pt +3 -0
  35. checkpoint-5000/global_step5000/layer_4_expert_0_mp_rank_00_model_states.pt +3 -0
  36. checkpoint-5000/global_step5000/layer_4_expert_1_mp_rank_00_model_states.pt +3 -0
  37. checkpoint-5000/global_step5000/layer_4_expert_2_mp_rank_00_model_states.pt +3 -0
  38. checkpoint-5000/global_step5000/layer_4_expert_3_mp_rank_00_model_states.pt +3 -0
  39. checkpoint-5000/global_step5000/layer_5_expert_0_mp_rank_00_model_states.pt +3 -0
  40. checkpoint-5000/global_step5000/layer_5_expert_1_mp_rank_00_model_states.pt +3 -0
  41. checkpoint-5000/global_step5000/layer_5_expert_2_mp_rank_00_model_states.pt +3 -0
  42. checkpoint-5000/global_step5000/layer_5_expert_3_mp_rank_00_model_states.pt +3 -0
  43. checkpoint-5000/global_step5000/layer_6_expert_0_mp_rank_00_model_states.pt +3 -0
  44. checkpoint-5000/global_step5000/layer_6_expert_1_mp_rank_00_model_states.pt +3 -0
  45. checkpoint-5000/global_step5000/layer_6_expert_2_mp_rank_00_model_states.pt +3 -0
  46. checkpoint-5000/global_step5000/layer_6_expert_3_mp_rank_00_model_states.pt +3 -0
  47. checkpoint-5000/global_step5000/layer_7_expert_0_mp_rank_00_model_states.pt +3 -0
  48. checkpoint-5000/global_step5000/layer_7_expert_1_mp_rank_00_model_states.pt +3 -0
  49. checkpoint-5000/global_step5000/layer_7_expert_2_mp_rank_00_model_states.pt +3 -0
  50. checkpoint-5000/global_step5000/layer_7_expert_3_mp_rank_00_model_states.pt +3 -0
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: ft-moe-llava-qwen1.5-1.8b-vista-1ep
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # ft-moe-llava-qwen1.5-1.8b-vista-1ep
13
+
14
+ This model was trained from scratch on an unknown dataset.
15
+
16
+ ## Model description
17
+
18
+ More information needed
19
+
20
+ ## Intended uses & limitations
21
+
22
+ More information needed
23
+
24
+ ## Training and evaluation data
25
+
26
+ More information needed
27
+
28
+ ## Training procedure
29
+
30
+ ### Training hyperparameters
31
+
32
+ The following hyperparameters were used during training:
33
+ - learning_rate: 2e-05
34
+ - train_batch_size: 4
35
+ - eval_batch_size: 4
36
+ - seed: 42
37
+ - distributed_type: multi-GPU
38
+ - num_devices: 4
39
+ - gradient_accumulation_steps: 8
40
+ - total_train_batch_size: 128
41
+ - total_eval_batch_size: 16
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: cosine
44
+ - lr_scheduler_warmup_ratio: 0.03
45
+ - num_epochs: 1.0
46
+
47
+ ### Training results
48
+
49
+
50
+
51
+ ### Framework versions
52
+
53
+ - Transformers 4.37.0
54
+ - Pytorch 2.0.1+cu117
55
+ - Datasets 2.20.0
56
+ - Tokenizers 0.15.1
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|extra_0|>": 151646,
4
+ "<|im_end|>": 151645,
5
+ "<|im_start|>": 151644
6
+ }
checkpoint-5000/added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|extra_0|>": 151646,
4
+ "<|im_end|>": 151645,
5
+ "<|im_start|>": 151644
6
+ }
checkpoint-5000/config.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/users/astar/ares/stunvat/scratch/checkpoints/moe-llava-qwen1.5-1.8b-ft-mergedlora",
3
+ "architectures": [
4
+ "MoELLaVAQwen1_5ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "freeze_mm_mlp_adapter": false,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "image_aspect_ratio": "pad",
13
+ "image_projector_type": "mlp2x_gelu",
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 5504,
16
+ "lora": {},
17
+ "max_position_embeddings": 32768,
18
+ "max_window_layers": 21,
19
+ "mm_hidden_size": 768,
20
+ "mm_image_tower": "google/siglip-base-patch16-256-multilingual",
21
+ "mm_projector_lr": null,
22
+ "mm_use_im_patch_token": false,
23
+ "mm_use_im_start_end": false,
24
+ "mm_video_tower": null,
25
+ "mm_vision_select_feature": "patch",
26
+ "mm_vision_select_layer": -2,
27
+ "model_type": "moe_llava_qwen1_5",
28
+ "moe": {
29
+ "capacity_factor": 1.5,
30
+ "ep_size": 1,
31
+ "eval_capacity_factor": 2.0,
32
+ "min_capacity": 0,
33
+ "moe_enable": true,
34
+ "moe_layers_idx": [
35
+ 0,
36
+ 2,
37
+ 4,
38
+ 6,
39
+ 8,
40
+ 10,
41
+ 12,
42
+ 14,
43
+ 16,
44
+ 18,
45
+ 20,
46
+ 22
47
+ ],
48
+ "moe_mode": "sparse",
49
+ "num_experts": [
50
+ 4,
51
+ 4,
52
+ 4,
53
+ 4,
54
+ 4,
55
+ 4,
56
+ 4,
57
+ 4,
58
+ 4,
59
+ 4,
60
+ 4,
61
+ 4
62
+ ],
63
+ "router_aux_loss_coef": 0.01,
64
+ "top_k_experts": 2,
65
+ "train_modules": [
66
+ "mlp.gate_proj",
67
+ "mlp.up_proj",
68
+ "mlp.down_proj",
69
+ "wg"
70
+ ],
71
+ "use_residual": false
72
+ },
73
+ "num_attention_heads": 16,
74
+ "num_hidden_layers": 24,
75
+ "num_key_value_heads": 16,
76
+ "pad_token_id": 151646,
77
+ "rms_norm_eps": 1e-06,
78
+ "rope_theta": 1000000.0,
79
+ "sliding_window": 32768,
80
+ "tie_word_embeddings": false,
81
+ "tokenizer_padding_side": "right",
82
+ "torch_dtype": "bfloat16",
83
+ "transformers_version": "4.37.0",
84
+ "tune_mm_mlp_adapter": false,
85
+ "use_cache": false,
86
+ "use_mm_proj": true,
87
+ "use_sliding_window": false,
88
+ "video_global_proj": false,
89
+ "video_projector_type": "linear",
90
+ "video_spatial_proj": false,
91
+ "video_temproal_proj": false,
92
+ "vocab_size": 151646
93
+ }
checkpoint-5000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.37.0"
6
+ }
checkpoint-5000/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31ba4a3eba2816ef9661d7766e86eb9f092473f719d0474591ea2d7cf0e6d4b
3
+ size 6104617125
checkpoint-5000/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59941ab6bd196a1da9b1471f65b6369c9837efd291758ffc4707839c4ba2531d
3
+ size 6104618149
checkpoint-5000/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e30d5b971fc56a0a52de5679098742e9d6f531cfe5e89fcdef6be6e73a69f7b
3
+ size 6104618213
checkpoint-5000/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0991abea1b91c93aee41296d255981df0aad036a5bef8f4566016b94e0b79f
3
+ size 6104617061
checkpoint-5000/global_step5000/expp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e070faf58961c554d62a09159b94189a8cceac00357b8f979eedec050250237a
3
+ size 551
checkpoint-5000/global_step5000/layer_0_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25430257f8197684990b8089cb019a4bdcb5f547e2dc3a9128115f6a0b37a9a
3
+ size 67634952
checkpoint-5000/global_step5000/layer_0_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9d7dac5973e3243815bfda94e04b87d5bc17ec40154fb5f1e50da9259259c2e
3
+ size 67634952
checkpoint-5000/global_step5000/layer_0_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc0026f43f26fd9d2e4b1c95b0bc976775d4a9b462781aa6ec3271d5f7470747
3
+ size 67634952
checkpoint-5000/global_step5000/layer_0_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c34ce9cc02f812f636fa52b334747df4fcf74822e1ce8129711be9fe34be10
3
+ size 67634952
checkpoint-5000/global_step5000/layer_10_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0f6d60d0995bb4d55f2cf6ae4d2ed48674642b93294b0e5207efddb68ecac7c
3
+ size 67634957
checkpoint-5000/global_step5000/layer_10_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea61a8a3aa9c25f6357e6e84719737a455999e9f989e273b3bfbd8970f21feae
3
+ size 67634957
checkpoint-5000/global_step5000/layer_10_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228a75e0dcb3c17ed6daed6ec0150e88f90e74b2b4e18c9df99c5d04e79fab6a
3
+ size 67634957
checkpoint-5000/global_step5000/layer_10_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6106ffdb04535eca54582be00fcbf249c9dbd9bd18e5055a2e0af70d4c9570
3
+ size 67634957
checkpoint-5000/global_step5000/layer_11_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d485b5e0039f8c464f59b1e2144ae6bc96aa59286d232028a00614cac413bd
3
+ size 67634957
checkpoint-5000/global_step5000/layer_11_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2327153024393e7497e1c47814a1cda4126e7c9e8c0a0b0ee672f88a55c985f5
3
+ size 67634957
checkpoint-5000/global_step5000/layer_11_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53eafcad886cf9d5c973f3e95c426c57d580dc4271678185a7fdf07f2b6ef47d
3
+ size 67634957
checkpoint-5000/global_step5000/layer_11_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5454b501b94542f265ddcded6c7198864a51544ff989454f3d3246e0e8bd6e62
3
+ size 67634957
checkpoint-5000/global_step5000/layer_1_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80e6a590325ae226d43a2d4011ca819df198713f30744891655a96f81080f79
3
+ size 67634952
checkpoint-5000/global_step5000/layer_1_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5d86eb4aeea20fcb257900cc3c7e69d7252dec22179ea7155452b555e1f99d
3
+ size 67634952
checkpoint-5000/global_step5000/layer_1_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bc98ec629b99624a07db295410f33ef096e3f6527fc83a26e4614713ac7ce7
3
+ size 67634952
checkpoint-5000/global_step5000/layer_1_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5837e10f395a2063e3450cdbf3d5d7c5531bf0d5cd16119d44a392ac4bec468a
3
+ size 67634952
checkpoint-5000/global_step5000/layer_2_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c4ba321ac51e445e2cfc28140531e6d977a635a80b2485100665d537822e5e
3
+ size 67634952
checkpoint-5000/global_step5000/layer_2_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12689a8b4882807374e6cc67ab444b663693c5b99cd4dae3409ae52972ac1301
3
+ size 67634952
checkpoint-5000/global_step5000/layer_2_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320d52ab224156071e1eff46863b026516aadcac8e7dc7926229ba4177f1d6f7
3
+ size 67634952
checkpoint-5000/global_step5000/layer_2_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89f26805c7eb5d5d0d09c5987077623e390846b1bdec92628850bd358eec3375
3
+ size 67634952
checkpoint-5000/global_step5000/layer_3_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d4034590081e03a045ad0df9d83643925e2166976d9c1a6fd0c421368dccac
3
+ size 67634952
checkpoint-5000/global_step5000/layer_3_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82882a04998db9e0c3fbb71fb77bbe06e7d62d5df809cee6bfc1c7d297a8b5e4
3
+ size 67634952
checkpoint-5000/global_step5000/layer_3_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3fc96669cd64ddbd3b8b0413839856f361babb01cf7fe691be743dbd21af725
3
+ size 67634952
checkpoint-5000/global_step5000/layer_3_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b66b8fb13dc4fce77e055132c52ea6bef803224b49b351855b52c2ddc403cb
3
+ size 67634952
checkpoint-5000/global_step5000/layer_4_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25a4ac44377b3cfc1ec1a2c57047e489195daab726496c797bed57367ad57d1
3
+ size 67634952
checkpoint-5000/global_step5000/layer_4_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09a01d71c9ca6e5528ab25e0a0970722bee94db77fdb8578b0f83155f60d640
3
+ size 67634952
checkpoint-5000/global_step5000/layer_4_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc68203cc2b61e3e552312ed7686e2f06401a1faa43e53a32ce67cca8e5ea358
3
+ size 67634952
checkpoint-5000/global_step5000/layer_4_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6acee99d8584b604a4eadb763742f2392e10c029cbd87b163527e338d8400254
3
+ size 67634952
checkpoint-5000/global_step5000/layer_5_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7264679c47c8fa21702cd505f90b4698f0bac67d7d7e0ccd119678278710eafe
3
+ size 67634952
checkpoint-5000/global_step5000/layer_5_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0fa0459e9cecbbfad0ea5ab287d6d522d68d383ef5dc308c936b481b6463ce
3
+ size 67634952
checkpoint-5000/global_step5000/layer_5_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:260f67f6dea1cd917c99f9565299eaa204cb78bd75c9d6824e4f6b12371eb0d2
3
+ size 67634952
checkpoint-5000/global_step5000/layer_5_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1166b77de637a2002e5712dc3bc58dd6ac37e20fde3c48584627c330e8b61504
3
+ size 67634952
checkpoint-5000/global_step5000/layer_6_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259ecbef691922cbf3ed0b698849e6904b322effdc319737055cf4d78e4563d6
3
+ size 67634952
checkpoint-5000/global_step5000/layer_6_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae945fffdef1faa5ffe8b2bdea0792b4552b1c9a89d37eade3148c3a5a7204dc
3
+ size 67634952
checkpoint-5000/global_step5000/layer_6_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3aa771ff2f6d26e61bee6935c3ec331b76c1e9567ba42dcb798a28f39112f29
3
+ size 67634952
checkpoint-5000/global_step5000/layer_6_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bef0f17edcf50c281305125972c82a532429ec60d2ce652b8dea111b89a219e
3
+ size 67634952
checkpoint-5000/global_step5000/layer_7_expert_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f292dae6b9557e0bff873982a000736f5c0e0345ac9609b01d05998815b2532f
3
+ size 67634952
checkpoint-5000/global_step5000/layer_7_expert_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f8d461641c49f84c0c4e07dbbf1d5661eaaada858b0df7f580cac8eaf30bb6
3
+ size 67634952
checkpoint-5000/global_step5000/layer_7_expert_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed440c19ea3d15b418dd72f6f03af5bbffbbea7ea88f9a952c1fc7fb7be85f90
3
+ size 67634952
checkpoint-5000/global_step5000/layer_7_expert_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79a5dec960231fb1ee287d5ddbadae8a14804b3e77cda2de8e4c2f941638035
3
+ size 67634952