Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +56 -0
- added_tokens.json +6 -0
- checkpoint-5000/added_tokens.json +6 -0
- checkpoint-5000/config.json +93 -0
- checkpoint-5000/generation_config.json +6 -0
- checkpoint-5000/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- checkpoint-5000/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- checkpoint-5000/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- checkpoint-5000/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- checkpoint-5000/global_step5000/expp_rank_0_mp_rank_00_optim_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_0_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_0_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_0_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_0_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_10_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_10_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_10_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_10_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_11_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_11_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_11_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_11_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_1_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_1_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_1_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_1_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_2_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_2_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_2_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_2_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_3_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_3_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_3_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_3_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_4_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_4_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_4_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_4_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_5_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_5_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_5_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_5_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_6_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_6_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_6_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_6_expert_3_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_7_expert_0_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_7_expert_1_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_7_expert_2_mp_rank_00_model_states.pt +3 -0
- checkpoint-5000/global_step5000/layer_7_expert_3_mp_rank_00_model_states.pt +3 -0
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
model-index:
|
5 |
+
- name: ft-moe-llava-qwen1.5-1.8b-vista-1ep
|
6 |
+
results: []
|
7 |
+
---
|
8 |
+
|
9 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
10 |
+
should probably proofread and complete it, then remove this comment. -->
|
11 |
+
|
12 |
+
# ft-moe-llava-qwen1.5-1.8b-vista-1ep
|
13 |
+
|
14 |
+
This model was trained from scratch on an unknown dataset.
|
15 |
+
|
16 |
+
## Model description
|
17 |
+
|
18 |
+
More information needed
|
19 |
+
|
20 |
+
## Intended uses & limitations
|
21 |
+
|
22 |
+
More information needed
|
23 |
+
|
24 |
+
## Training and evaluation data
|
25 |
+
|
26 |
+
More information needed
|
27 |
+
|
28 |
+
## Training procedure
|
29 |
+
|
30 |
+
### Training hyperparameters
|
31 |
+
|
32 |
+
The following hyperparameters were used during training:
|
33 |
+
- learning_rate: 2e-05
|
34 |
+
- train_batch_size: 4
|
35 |
+
- eval_batch_size: 4
|
36 |
+
- seed: 42
|
37 |
+
- distributed_type: multi-GPU
|
38 |
+
- num_devices: 4
|
39 |
+
- gradient_accumulation_steps: 8
|
40 |
+
- total_train_batch_size: 128
|
41 |
+
- total_eval_batch_size: 16
|
42 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
43 |
+
- lr_scheduler_type: cosine
|
44 |
+
- lr_scheduler_warmup_ratio: 0.03
|
45 |
+
- num_epochs: 1.0
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
### Framework versions
|
52 |
+
|
53 |
+
- Transformers 4.37.0
|
54 |
+
- Pytorch 2.0.1+cu117
|
55 |
+
- Datasets 2.20.0
|
56 |
+
- Tokenizers 0.15.1
|
added_tokens.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endoftext|>": 151643,
|
3 |
+
"<|extra_0|>": 151646,
|
4 |
+
"<|im_end|>": 151645,
|
5 |
+
"<|im_start|>": 151644
|
6 |
+
}
|
checkpoint-5000/added_tokens.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endoftext|>": 151643,
|
3 |
+
"<|extra_0|>": 151646,
|
4 |
+
"<|im_end|>": 151645,
|
5 |
+
"<|im_start|>": 151644
|
6 |
+
}
|
checkpoint-5000/config.json
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/users/astar/ares/stunvat/scratch/checkpoints/moe-llava-qwen1.5-1.8b-ft-mergedlora",
|
3 |
+
"architectures": [
|
4 |
+
"MoELLaVAQwen1_5ForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151643,
|
9 |
+
"freeze_mm_mlp_adapter": false,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 2048,
|
12 |
+
"image_aspect_ratio": "pad",
|
13 |
+
"image_projector_type": "mlp2x_gelu",
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 5504,
|
16 |
+
"lora": {},
|
17 |
+
"max_position_embeddings": 32768,
|
18 |
+
"max_window_layers": 21,
|
19 |
+
"mm_hidden_size": 768,
|
20 |
+
"mm_image_tower": "google/siglip-base-patch16-256-multilingual",
|
21 |
+
"mm_projector_lr": null,
|
22 |
+
"mm_use_im_patch_token": false,
|
23 |
+
"mm_use_im_start_end": false,
|
24 |
+
"mm_video_tower": null,
|
25 |
+
"mm_vision_select_feature": "patch",
|
26 |
+
"mm_vision_select_layer": -2,
|
27 |
+
"model_type": "moe_llava_qwen1_5",
|
28 |
+
"moe": {
|
29 |
+
"capacity_factor": 1.5,
|
30 |
+
"ep_size": 1,
|
31 |
+
"eval_capacity_factor": 2.0,
|
32 |
+
"min_capacity": 0,
|
33 |
+
"moe_enable": true,
|
34 |
+
"moe_layers_idx": [
|
35 |
+
0,
|
36 |
+
2,
|
37 |
+
4,
|
38 |
+
6,
|
39 |
+
8,
|
40 |
+
10,
|
41 |
+
12,
|
42 |
+
14,
|
43 |
+
16,
|
44 |
+
18,
|
45 |
+
20,
|
46 |
+
22
|
47 |
+
],
|
48 |
+
"moe_mode": "sparse",
|
49 |
+
"num_experts": [
|
50 |
+
4,
|
51 |
+
4,
|
52 |
+
4,
|
53 |
+
4,
|
54 |
+
4,
|
55 |
+
4,
|
56 |
+
4,
|
57 |
+
4,
|
58 |
+
4,
|
59 |
+
4,
|
60 |
+
4,
|
61 |
+
4
|
62 |
+
],
|
63 |
+
"router_aux_loss_coef": 0.01,
|
64 |
+
"top_k_experts": 2,
|
65 |
+
"train_modules": [
|
66 |
+
"mlp.gate_proj",
|
67 |
+
"mlp.up_proj",
|
68 |
+
"mlp.down_proj",
|
69 |
+
"wg"
|
70 |
+
],
|
71 |
+
"use_residual": false
|
72 |
+
},
|
73 |
+
"num_attention_heads": 16,
|
74 |
+
"num_hidden_layers": 24,
|
75 |
+
"num_key_value_heads": 16,
|
76 |
+
"pad_token_id": 151646,
|
77 |
+
"rms_norm_eps": 1e-06,
|
78 |
+
"rope_theta": 1000000.0,
|
79 |
+
"sliding_window": 32768,
|
80 |
+
"tie_word_embeddings": false,
|
81 |
+
"tokenizer_padding_side": "right",
|
82 |
+
"torch_dtype": "bfloat16",
|
83 |
+
"transformers_version": "4.37.0",
|
84 |
+
"tune_mm_mlp_adapter": false,
|
85 |
+
"use_cache": false,
|
86 |
+
"use_mm_proj": true,
|
87 |
+
"use_sliding_window": false,
|
88 |
+
"video_global_proj": false,
|
89 |
+
"video_projector_type": "linear",
|
90 |
+
"video_spatial_proj": false,
|
91 |
+
"video_temproal_proj": false,
|
92 |
+
"vocab_size": 151646
|
93 |
+
}
|
checkpoint-5000/generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"eos_token_id": 151643,
|
4 |
+
"max_new_tokens": 2048,
|
5 |
+
"transformers_version": "4.37.0"
|
6 |
+
}
|
checkpoint-5000/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c31ba4a3eba2816ef9661d7766e86eb9f092473f719d0474591ea2d7cf0e6d4b
|
3 |
+
size 6104617125
|
checkpoint-5000/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59941ab6bd196a1da9b1471f65b6369c9837efd291758ffc4707839c4ba2531d
|
3 |
+
size 6104618149
|
checkpoint-5000/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e30d5b971fc56a0a52de5679098742e9d6f531cfe5e89fcdef6be6e73a69f7b
|
3 |
+
size 6104618213
|
checkpoint-5000/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0991abea1b91c93aee41296d255981df0aad036a5bef8f4566016b94e0b79f
|
3 |
+
size 6104617061
|
checkpoint-5000/global_step5000/expp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e070faf58961c554d62a09159b94189a8cceac00357b8f979eedec050250237a
|
3 |
+
size 551
|
checkpoint-5000/global_step5000/layer_0_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f25430257f8197684990b8089cb019a4bdcb5f547e2dc3a9128115f6a0b37a9a
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_0_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9d7dac5973e3243815bfda94e04b87d5bc17ec40154fb5f1e50da9259259c2e
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_0_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc0026f43f26fd9d2e4b1c95b0bc976775d4a9b462781aa6ec3271d5f7470747
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_0_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20c34ce9cc02f812f636fa52b334747df4fcf74822e1ce8129711be9fe34be10
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_10_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0f6d60d0995bb4d55f2cf6ae4d2ed48674642b93294b0e5207efddb68ecac7c
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_10_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea61a8a3aa9c25f6357e6e84719737a455999e9f989e273b3bfbd8970f21feae
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_10_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:228a75e0dcb3c17ed6daed6ec0150e88f90e74b2b4e18c9df99c5d04e79fab6a
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_10_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba6106ffdb04535eca54582be00fcbf249c9dbd9bd18e5055a2e0af70d4c9570
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_11_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28d485b5e0039f8c464f59b1e2144ae6bc96aa59286d232028a00614cac413bd
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_11_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2327153024393e7497e1c47814a1cda4126e7c9e8c0a0b0ee672f88a55c985f5
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_11_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53eafcad886cf9d5c973f3e95c426c57d580dc4271678185a7fdf07f2b6ef47d
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_11_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5454b501b94542f265ddcded6c7198864a51544ff989454f3d3246e0e8bd6e62
|
3 |
+
size 67634957
|
checkpoint-5000/global_step5000/layer_1_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d80e6a590325ae226d43a2d4011ca819df198713f30744891655a96f81080f79
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_1_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b5d86eb4aeea20fcb257900cc3c7e69d7252dec22179ea7155452b555e1f99d
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_1_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01bc98ec629b99624a07db295410f33ef096e3f6527fc83a26e4614713ac7ce7
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_1_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5837e10f395a2063e3450cdbf3d5d7c5531bf0d5cd16119d44a392ac4bec468a
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_2_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2c4ba321ac51e445e2cfc28140531e6d977a635a80b2485100665d537822e5e
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_2_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12689a8b4882807374e6cc67ab444b663693c5b99cd4dae3409ae52972ac1301
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_2_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:320d52ab224156071e1eff46863b026516aadcac8e7dc7926229ba4177f1d6f7
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_2_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89f26805c7eb5d5d0d09c5987077623e390846b1bdec92628850bd358eec3375
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_3_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3d4034590081e03a045ad0df9d83643925e2166976d9c1a6fd0c421368dccac
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_3_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82882a04998db9e0c3fbb71fb77bbe06e7d62d5df809cee6bfc1c7d297a8b5e4
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_3_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3fc96669cd64ddbd3b8b0413839856f361babb01cf7fe691be743dbd21af725
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_3_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13b66b8fb13dc4fce77e055132c52ea6bef803224b49b351855b52c2ddc403cb
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_4_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25a4ac44377b3cfc1ec1a2c57047e489195daab726496c797bed57367ad57d1
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_4_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c09a01d71c9ca6e5528ab25e0a0970722bee94db77fdb8578b0f83155f60d640
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_4_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc68203cc2b61e3e552312ed7686e2f06401a1faa43e53a32ce67cca8e5ea358
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_4_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6acee99d8584b604a4eadb763742f2392e10c029cbd87b163527e338d8400254
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_5_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7264679c47c8fa21702cd505f90b4698f0bac67d7d7e0ccd119678278710eafe
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_5_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f0fa0459e9cecbbfad0ea5ab287d6d522d68d383ef5dc308c936b481b6463ce
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_5_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:260f67f6dea1cd917c99f9565299eaa204cb78bd75c9d6824e4f6b12371eb0d2
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_5_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1166b77de637a2002e5712dc3bc58dd6ac37e20fde3c48584627c330e8b61504
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_6_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:259ecbef691922cbf3ed0b698849e6904b322effdc319737055cf4d78e4563d6
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_6_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae945fffdef1faa5ffe8b2bdea0792b4552b1c9a89d37eade3148c3a5a7204dc
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_6_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3aa771ff2f6d26e61bee6935c3ec331b76c1e9567ba42dcb798a28f39112f29
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_6_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bef0f17edcf50c281305125972c82a532429ec60d2ce652b8dea111b89a219e
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_7_expert_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f292dae6b9557e0bff873982a000736f5c0e0345ac9609b01d05998815b2532f
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_7_expert_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76f8d461641c49f84c0c4e07dbbf1d5661eaaada858b0df7f580cac8eaf30bb6
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_7_expert_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed440c19ea3d15b418dd72f6f03af5bbffbbea7ea88f9a952c1fc7fb7be85f90
|
3 |
+
size 67634952
|
checkpoint-5000/global_step5000/layer_7_expert_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c79a5dec960231fb1ee287d5ddbadae8a14804b3e77cda2de8e4c2f941638035
|
3 |
+
size 67634952
|