benjamin commited on
Commit
f42cb57
1 Parent(s): 1d4dd5f

Upload ZettHypernet

Browse files
Files changed (2) hide show
  1. config.json +12 -10
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,15 +1,16 @@
1
  {
2
- "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
  "architectures": [
4
  "ZettHypernet"
5
  ],
 
6
  "attention_dropout": 0.0,
7
  "auto_map": {
8
  "AutoConfig": "configuration_hypernet.ZettHypernetConfig",
9
  "AutoModel": "modeling_hypernet.ZettHypernet"
10
  },
11
- "bos_token_id": 1,
12
- "eos_token_id": 2,
13
  "hidden_act": "silu",
14
  "hidden_size": 4096,
15
  "hn_add_inter_token_attention": false,
@@ -24,7 +25,7 @@
24
  "hn_language_adapter_bottleneck_dim": 0,
25
  "hn_model_name_or_path": "roberta-base",
26
  "hn_model_type": "roberta",
27
- "hn_n_extra_tokens": 522,
28
  "hn_n_inter_token_blocks": 16,
29
  "hn_n_layers": 3,
30
  "hn_num_attention_heads": 32,
@@ -34,19 +35,20 @@
34
  "hn_surface_maxlen": 7,
35
  "initializer_range": 0.02,
36
  "intermediate_size": 14336,
37
- "max_position_embeddings": 32768,
38
  "n_embd": 4096,
39
  "n_langs": 7,
40
- "name": "v7:mistral7b_en+code:lw=0.5_long",
41
  "num_attention_heads": 32,
42
  "num_hidden_layers": 32,
43
  "num_key_value_heads": 8,
44
- "original_vocab_size": 32000,
45
- "pad_token_id": 2,
 
46
  "rms_norm_eps": 1e-05,
47
- "rope_theta": 10000.0,
 
48
  "separate_out_embeddings": true,
49
- "sliding_window": 4096,
50
  "tie_word_embeddings": false,
51
  "torch_dtype": "float32",
52
  "transformers_version": "4.42.3",
 
1
  {
2
+ "_name_or_path": "meta-llama/Meta-Llama-3-8B",
3
  "architectures": [
4
  "ZettHypernet"
5
  ],
6
+ "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "auto_map": {
9
  "AutoConfig": "configuration_hypernet.ZettHypernetConfig",
10
  "AutoModel": "modeling_hypernet.ZettHypernet"
11
  },
12
+ "bos_token_id": 128000,
13
+ "eos_token_id": 128001,
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
  "hn_add_inter_token_attention": false,
 
25
  "hn_language_adapter_bottleneck_dim": 0,
26
  "hn_model_name_or_path": "roberta-base",
27
  "hn_model_type": "roberta",
28
+ "hn_n_extra_tokens": 0,
29
  "hn_n_inter_token_blocks": 16,
30
  "hn_n_layers": 3,
31
  "hn_num_attention_heads": 32,
 
35
  "hn_surface_maxlen": 7,
36
  "initializer_range": 0.02,
37
  "intermediate_size": 14336,
38
+ "max_position_embeddings": 8192,
39
  "n_embd": 4096,
40
  "n_langs": 7,
41
+ "name": "v7:llama3-8b_en+code:lw=0.5_long",
42
  "num_attention_heads": 32,
43
  "num_hidden_layers": 32,
44
  "num_key_value_heads": 8,
45
+ "original_vocab_size": 128256,
46
+ "pad_token_id": 128001,
47
+ "pretraining_tp": 1,
48
  "rms_norm_eps": 1e-05,
49
+ "rope_scaling": null,
50
+ "rope_theta": 500000.0,
51
  "separate_out_embeddings": true,
 
52
  "tie_word_embeddings": false,
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.42.3",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58ff19794dc856869f1c6a52df63ad0573d1081a2861929e7c48ae1634481af5
3
- size 2710971844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf90db11d32842a09617888ceefd36919408ed84628b82b6e9ff25dea1fdc0d
3
+ size 4791035356