dm1024 / model.yaml
jqhoogland's picture
Upload final model (step 75000) and all checkpoints at 2024-10-18T06:07:54.614288
2e55553 verified
raw
history blame contribute delete
381 Bytes
!!python/object:aether.model.model.TransformerLensModelArguments
implementation: transformer_lens
model_name: default
n_layers: 2
model_seed: 0
d_model: 1024
n_ctx: 1024
d_head: 32
n_heads: 8
act_fn: gelu
d_vocab: 5000
use_local_attn: false
tokenizer_name: georgeyw/TinyStories-tokenizer-5k
window_size: null
attn_types: null
attn_only: true
positional_embedding_type: shortformer