{ "architectures": [ "OpenLMModel" ], "model_type": "openlm", "params": null, "params_args_dict": { "apply_qk_norm": true, "attn_activation": null, "attn_name": "xformers_attn", "attn_seq_scalar": null, "attn_seq_scalar_alpha": null, "dim": 2048, "ffn_type": "swiglu", "model": "open_lm_1b", "model_norm": "gain_only_lp_layer_norm", "moe_capacity_factor": 1.25, "moe_expert_model_parallelism": false, "moe_freq": 0, "moe_loss_weight": 0.1, "moe_num_experts": null, "moe_top_k": 2, "moe_weight_parallelism": false, "n_heads": 16, "n_layers": 24, "norm_eps": 1e-05, "positional_embedding_type": "rotary", "post_embed_norm": false, "qk_norm": true, "seq_len": 2048, "vocab_size": 50432, "weight_tying": false }, "torch_dtype": "float32", "transformers_version": "4.40.0" }