Move to in-library checkpoint
Browse files- config.json +3 -4
- generation_config.json +1 -1
- metadata-1714912566.json +11 -0
- model-00001-of-00021.safetensors +2 -2
- model-00002-of-00021.safetensors +2 -2
- model-00003-of-00021.safetensors +2 -2
- model-00004-of-00021.safetensors +2 -2
- model-00005-of-00021.safetensors +2 -2
- model-00006-of-00021.safetensors +2 -2
- model-00007-of-00021.safetensors +2 -2
- model-00008-of-00021.safetensors +2 -2
- model-00009-of-00021.safetensors +2 -2
- model-00010-of-00021.safetensors +2 -2
- model-00011-of-00021.safetensors +2 -2
- model-00012-of-00021.safetensors +2 -2
- model-00013-of-00021.safetensors +2 -2
- model-00014-of-00021.safetensors +2 -2
- model-00015-of-00021.safetensors +2 -2
- model-00016-of-00021.safetensors +2 -2
- model-00017-of-00021.safetensors +2 -2
- model-00018-of-00021.safetensors +2 -2
- model-00019-of-00021.safetensors +2 -2
- model-00020-of-00021.safetensors +2 -2
- model-00021-of-00021.safetensors +2 -2
- model.safetensors.index.json +0 -0
- special_tokens_map.json +28 -4
config.json
CHANGED
@@ -12,7 +12,6 @@
|
|
12 |
"AutoModelForSequenceClassification": "model.JambaForSequenceClassification"
|
13 |
},
|
14 |
"bos_token_id": 1,
|
15 |
-
"calc_logits_for_entire_prompt": false,
|
16 |
"eos_token_id": 2,
|
17 |
"expert_layer_offset": 1,
|
18 |
"expert_layer_period": 2,
|
@@ -25,15 +24,15 @@
|
|
25 |
"mamba_d_state": 16,
|
26 |
"mamba_dt_rank": 256,
|
27 |
"mamba_expand": 2,
|
28 |
-
"mamba_inner_layernorms": true,
|
29 |
"mamba_proj_bias": false,
|
|
|
30 |
"model_type": "jamba",
|
31 |
-
"n_ctx": 262144,
|
32 |
"num_attention_heads": 32,
|
33 |
"num_experts": 16,
|
34 |
"num_experts_per_tok": 2,
|
35 |
"num_hidden_layers": 32,
|
36 |
"num_key_value_heads": 8,
|
|
|
37 |
"output_router_logits": false,
|
38 |
"pad_token_id": 0,
|
39 |
"rms_norm_eps": 1e-06,
|
@@ -41,7 +40,7 @@
|
|
41 |
"sliding_window": null,
|
42 |
"tie_word_embeddings": false,
|
43 |
"torch_dtype": "bfloat16",
|
44 |
-
"transformers_version": "4.40.
|
45 |
"use_cache": true,
|
46 |
"use_mamba_kernels": true,
|
47 |
"vocab_size": 65536
|
|
|
12 |
"AutoModelForSequenceClassification": "model.JambaForSequenceClassification"
|
13 |
},
|
14 |
"bos_token_id": 1,
|
|
|
15 |
"eos_token_id": 2,
|
16 |
"expert_layer_offset": 1,
|
17 |
"expert_layer_period": 2,
|
|
|
24 |
"mamba_d_state": 16,
|
25 |
"mamba_dt_rank": 256,
|
26 |
"mamba_expand": 2,
|
|
|
27 |
"mamba_proj_bias": false,
|
28 |
+
"max_position_embeddings": 262144,
|
29 |
"model_type": "jamba",
|
|
|
30 |
"num_attention_heads": 32,
|
31 |
"num_experts": 16,
|
32 |
"num_experts_per_tok": 2,
|
33 |
"num_hidden_layers": 32,
|
34 |
"num_key_value_heads": 8,
|
35 |
+
"num_logits_to_keep": 1,
|
36 |
"output_router_logits": false,
|
37 |
"pad_token_id": 0,
|
38 |
"rms_norm_eps": 1e-06,
|
|
|
40 |
"sliding_window": null,
|
41 |
"tie_word_embeddings": false,
|
42 |
"torch_dtype": "bfloat16",
|
43 |
+
"transformers_version": "4.40.1",
|
44 |
"use_cache": true,
|
45 |
"use_mamba_kernels": true,
|
46 |
"vocab_size": 65536
|
generation_config.json
CHANGED
@@ -3,5 +3,5 @@
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 0,
|
6 |
-
"transformers_version": "4.40.
|
7 |
}
|
|
|
3 |
"bos_token_id": 1,
|
4 |
"eos_token_id": 2,
|
5 |
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.40.1"
|
7 |
}
|
metadata-1714912566.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"args": {
|
3 |
+
"legacy_model_path": "gs://ai21-algo-studio-research/tomer/published_Jamba-v0.1_new_format/legacy_jamba-v0.1",
|
4 |
+
"model_output_path": "gs://ai21-algo-studio-research/tomer/published_Jamba-v0.1_new_format/script_from_legacy_fmt"
|
5 |
+
},
|
6 |
+
"datetime": "2024-05-05 12:36:06",
|
7 |
+
"git_branch": "official-hf-format",
|
8 |
+
"git_short_sha": "0693ce8",
|
9 |
+
"script": "/app/scripts/convert_legacy_hf_to_official_hf.py",
|
10 |
+
"timestamp": 1714912566
|
11 |
+
}
|
model-00001-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1aace34ee0da3bf95605bd150fff6d3e78110be4048a3c389b0a740354b2ccb7
|
3 |
+
size 4951761424
|
model-00002-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ba1de67a86329431f14f7ffa165d84055d32ce57a6d2314e3b2464eac3732dc
|
3 |
+
size 4884669624
|
model-00003-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1abc4f16865fb78241c9453292ee3b2ca2c1e2d54ee945631da625834b95c9b2
|
3 |
+
size 4992557120
|
model-00004-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45fab97739a58e924791572ea3d06f9c90b9ff2a299460aaa4bd87c6e9d424f3
|
3 |
+
size 4958853560
|
model-00005-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4b0ec6e8f33e6d7b1f837cd4c25818487dcc7e478734606da28110507e51c97
|
3 |
+
size 4975763832
|
model-00006-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed98d5c3c8d7ab7352944bea09b0d54d98066cf567ba3d069da12c05575d56ed
|
3 |
+
size 4884669616
|
model-00007-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:735be2bc568711bf42a4caebcda8288dd300b31b48fa098b00df3cf1a98e10e2
|
3 |
+
size 4884669640
|
model-00008-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0c8d817b2b47661d361e8b520128b3194185f756cc2204a95d642e24895ee51
|
3 |
+
size 4992557176
|
model-00009-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e50222cf865ca5678d22574b131294303c46b249478cf70113c701f70331e999
|
3 |
+
size 4932507176
|
model-00010-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1b4b69b24ae55827b6c8b1e4a10807aa3525bc85f4d34dc002ac7440757fbf4
|
3 |
+
size 4884669672
|
model-00011-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60213cac13b92ed34b93ce48e670434f22e3bf8b2b8df20c60b7bf8a9515c35c
|
3 |
+
size 4884669696
|
model-00012-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05805eacd3bb40cc9da802350409f1cb078e8b276da7e06c7a8a5ca5b26cc887
|
3 |
+
size 4884669688
|
model-00013-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:201df979a1b34ced6cdbb7a790163412636779f1119e3845a704c489181d03d2
|
3 |
+
size 4932507176
|
model-00014-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0a7eb42a9ea3a385442c2e758dd5efd5dc5b913f1d10bfd37792cc963a33c93
|
3 |
+
size 4992557152
|
model-00015-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4b9afe4398000c28b36e3aa40c87086af673d4f8a64bfc5767941ab2008bcc9
|
3 |
+
size 4884669688
|
model-00016-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd1ac6cc861971c43bdf0c9c6d4c9fe72d33e5227e054a621e2e68f001419763
|
3 |
+
size 4884669688
|
model-00017-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52d9eea696dd29ef413d617bbcb62a9f159e8fe8170d36e018932cef45ee281d
|
3 |
+
size 4908522856
|
model-00018-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77acada7c098e81280645ea0a9dbfa00196dca6da8946498b9907e9e376fb42d
|
3 |
+
size 4908654000
|
model-00019-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09e10dfd6c6459cd3460b1d667639717d3657274c1694c19a6fdbac1be6a76bf
|
3 |
+
size 4992557168
|
model-00020-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bd5c27b2cca6e06f7b4497ce8c9b1522a64846817a871bad274d08507960ed0
|
3 |
+
size 4884669696
|
model-00021-of-00021.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a47ef23db8deb5364da676a40dc3dcb011fb9d9ceef13ba044c176e9a83ac1e3
|
3 |
+
size 4647318576
|
model.safetensors.index.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
CHANGED
@@ -1,6 +1,30 @@
|
|
1 |
{
|
2 |
-
"bos_token":
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
}
|
|
|
1 |
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|startoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|pad|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<|unk|>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
}
|