{ | |
"block_size": 1024, | |
"flash_attention": true, | |
"n_embd": 768, | |
"n_head": 12, | |
"n_key_value_heads": 3, | |
"n_layer": 12, | |
"rotary_dim": 64, | |
"vocab_size": 50257 | |
} |
{ | |
"block_size": 1024, | |
"flash_attention": true, | |
"n_embd": 768, | |
"n_head": 12, | |
"n_key_value_heads": 3, | |
"n_layer": 12, | |
"rotary_dim": 64, | |
"vocab_size": 50257 | |
} |