maxall4 commited on
Commit
0dba1f5
1 Parent(s): ad9eb94

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +88 -57
config.json CHANGED
@@ -1,58 +1,89 @@
1
- {
2
- "_commit_hash": "3b191f9a32eeba9187bbba4475a4b2a1b2de6b3d",
3
- "_name_or_path": "togethercomputer/evo-1-131k-base",
4
- "architectures": ["StripedHyenaModelForCausalLM"],
5
- "attn_layer_idxs": [8, 16, 24],
6
- "auto_map": {
7
- "AutoConfig": "togethercomputer/evo-1-131k-base--configuration_hyena.StripedHyenaConfig",
8
- "AutoModelForCausalLM": "togethercomputer/evo-1-131k-base--modeling_hyena.StripedHyenaModelForCausalLM",
9
- "AutoTokenizer": [
10
- "togethercomputer/evo-1-131k-base--tokenizer.ByteTokenizer",
11
- null
12
- ]
13
- },
14
- "column_split": false,
15
- "column_split_hyena": true,
16
- "eps": 1e-6,
17
- "final_norm": true,
18
- "hidden_size": 4096,
19
- "hyena_filter_groups": 1,
20
- "hyena_layer_idxs": [
21
- 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21,
22
- 22, 23, 25, 26, 27, 28, 29, 30, 31
23
- ],
24
- "inference_mode": false,
25
- "inner_mlp_size": 10928,
26
- "log_intermediate_values": false,
27
- "make_vocab_size_divisible_by": 8,
28
- "max_seqlen": 131072,
29
- "mha_out_proj_bias": true,
30
- "mlp_activation": "gelu",
31
- "model_parallel_size": 1,
32
- "model_type": "stripedhyena",
33
- "num_attention_heads": 32,
34
- "num_filters": 4096,
35
- "num_layers": 32,
36
- "pipe_parallel_size": 1,
37
- "prefill_style": "fft",
38
- "proj_groups": 1,
39
- "qkv_proj_bias": true,
40
- "rotary_emb_base": 10000,
41
- "rotary_emb_scaling_factor": 16,
42
- "short_filter_bias": true,
43
- "short_filter_length": 3,
44
- "smeared_gqa": false,
45
- "split_k0": true,
46
- "state_size": 8,
47
- "tie_embeddings": true,
48
- "torch_dtype": "bfloat16",
49
- "transformers_version": null,
50
- "use_cache": false,
51
- "use_flash_attention_2": true,
52
- "use_flash_attn": true,
53
- "use_flash_depthwise": true,
54
- "use_flash_rmsnorm": false,
55
- "use_flashfft": false,
56
- "use_interpolated_rotary_pos_emb": true,
57
- "vocab_size": 512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
 
1
+ "_commit_hash": null,
2
+ "_name_or_path": "togethercomputer/evo-1-131k-base",
3
+ "architectures": [
4
+ "StripedHyenaModelForCausalLM"
5
+ ],
6
+ "attn_layer_idxs": [
7
+ 8,
8
+ 16,
9
+ 24
10
+ ],
11
+ "auto_map": {
12
+ "AutoConfig": "configuration_hyena.StripedHyenaConfig",
13
+ "AutoModelForCausalLM": "modeling_hyena.StripedHyenaModelForCausalLM",
14
+ "AutoTokenizer": [
15
+ "tokenizer.ByteTokenizer",
16
+ null
17
+ ]
18
+ },
19
+ "column_split": false,
20
+ "column_split_hyena": true,
21
+ "eps": 1e-06,
22
+ "final_norm": true,
23
+ "hidden_size": 4096,
24
+ "hyena_filter_groups": 1,
25
+ "hyena_layer_idxs": [
26
+ 0,
27
+ 1,
28
+ 2,
29
+ 3,
30
+ 4,
31
+ 5,
32
+ 6,
33
+ 7,
34
+ 9,
35
+ 10,
36
+ 11,
37
+ 12,
38
+ 13,
39
+ 14,
40
+ 15,
41
+ 17,
42
+ 18,
43
+ 19,
44
+ 20,
45
+ 21,
46
+ 22,
47
+ 23,
48
+ 25,
49
+ 26,
50
+ 27,
51
+ 28,
52
+ 29,
53
+ 30,
54
+ 31
55
+ ],
56
+ "inference_mode": false,
57
+ "inner_mlp_size": 10928,
58
+ "log_intermediate_values": false,
59
+ "make_vocab_size_divisible_by": 8,
60
+ "max_seqlen": 131072,
61
+ "mha_out_proj_bias": true,
62
+ "mlp_activation": "gelu",
63
+ "model_parallel_size": 1,
64
+ "model_type": "stripedhyena",
65
+ "num_attention_heads": 32,
66
+ "num_filters": 4096,
67
+ "num_layers": 32,
68
+ "pipe_parallel_size": 1,
69
+ "prefill_style": "fft",
70
+ "proj_groups": 1,
71
+ "qkv_proj_bias": true,
72
+ "rotary_emb_base": 10000,
73
+ "rotary_emb_scaling_factor": 16,
74
+ "short_filter_bias": true,
75
+ "short_filter_length": 3,
76
+ "smeared_gqa": false,
77
+ "split_k0": true,
78
+ "state_size": 8,
79
+ "tie_embeddings": true,
80
+ "torch_dtype": "bfloat16",
81
+ "transformers_version": null,
82
+ "use_cache": true,
83
+ "use_flash_attn": true,
84
+ "use_flash_depthwise": true,
85
+ "use_flash_rmsnorm": false,
86
+ "use_flashfft": false,
87
+ "use_interpolated_rotary_pos_emb": true,
88
+ "vocab_size": 512
89
  }