{ "_name_or_path": "purpcorn/prize-8b-post-sft-oc2-8k-bandit-safe", "activation_offloading_max_copy_events": 9, "activation_offloading_max_num_prefetch": 2, "architectures": [ "AlexaLlamaForCausalLM" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "purpcorn/prize-8b-post-sft-oc2-8k-bandit-safe--configuration_agmv2.AlexaLlamaConfig", "AutoModel": "purpcorn/prize-8b-post-sft-oc2-8k-bandit-safe--modeling_agmv2.AlexaLlamaModel", "AutoModelForCausalLM": "purpcorn/prize-8b-post-sft-oc2-8k-bandit-safe--modeling_agmv2.AlexaLlamaForCausalLM" }, "bos_token_id": 1, "embedding_dropout": 0.0, "eos_token_id": 2, "gradient_checkpointing": false, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 16384, "max_position_embeddings": 8196, "model_type": "alexallm-v2", "moe_ffn_hidden_size": null, "moe_frequency": 1, "moe_router_activation": "sigmoid", "moe_shared_experts": 0, "moe_topk": 1, "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 8, "num_moe_experts": 1, "num_query_groups": 8, "original_max_position_embeddings": 2048, "pad_token_id": 0, "position_abf_factor": 1, "position_interpolation_factor": 1.0, "residual_dropout": 0.0, "rms_norm_eps": 1e-06, "rope_state": null, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.48.3", "use_activation_offloading": false, "use_cache": false, "use_flash_attention": false, "use_flash_mlp": false, "vocab_size": 90000, "yarn_scale": 1.0 }