djuna commited on
Commit
3204c91
1 Parent(s): 77ad341

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +11 -2
config.json CHANGED
@@ -16,7 +16,7 @@
16
  "hidden_size": 3584,
17
  "initializer_range": 0.02,
18
  "intermediate_size": 14336,
19
- "max_position_embeddings": 8192,
20
  "model_type": "gemma2",
21
  "num_attention_heads": 16,
22
  "num_hidden_layers": 42,
@@ -25,10 +25,19 @@
25
  "query_pre_attn_scalar": 224,
26
  "rms_norm_eps": 1e-06,
27
  "rope_theta": 10000.0,
 
 
 
 
 
 
 
 
 
28
  "sliding_window": 4096,
29
  "sliding_window_size": 4096,
30
  "torch_dtype": "bfloat16",
31
  "transformers_version": "4.46.2",
32
  "use_cache": true,
33
  "vocab_size": 256000
34
- }
 
16
  "hidden_size": 3584,
17
  "initializer_range": 0.02,
18
  "intermediate_size": 14336,
19
+ "max_position_embeddings": 32768,
20
  "model_type": "gemma2",
21
  "num_attention_heads": 16,
22
  "num_hidden_layers": 42,
 
25
  "query_pre_attn_scalar": 224,
26
  "rms_norm_eps": 1e-06,
27
  "rope_theta": 10000.0,
28
+ "rope_scaling": {
29
+ "factor": 4,
30
+ "original_max_position_embeddings": 8192,
31
+ "type": "yarn"
32
+ },
33
+ "string_config": {
34
+ "shifted_offset": 10813,
35
+ "local_value": 128
36
+ },
37
  "sliding_window": 4096,
38
  "sliding_window_size": 4096,
39
  "torch_dtype": "bfloat16",
40
  "transformers_version": "4.46.2",
41
  "use_cache": true,
42
  "vocab_size": 256000
43
+ }