{ "_name_or_path": "checkpoints/microsoft/phi-1_5", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.dense", "mlp.fc1", "mlp.fc2" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.fc1": 1279820, "model.layers.0.mlp.fc2": 1236307, "model.layers.0.self_attn.dense": 189187, "model.layers.0.self_attn.k_proj": 318137, "model.layers.0.self_attn.q_proj": 285336, "model.layers.0.self_attn.v_proj": 212019, "model.layers.1.mlp.fc1": 539655, "model.layers.1.mlp.fc2": 766840, "model.layers.1.self_attn.dense": 176195, "model.layers.1.self_attn.k_proj": 224990, "model.layers.1.self_attn.q_proj": 219788, "model.layers.1.self_attn.v_proj": 201422, "model.layers.10.mlp.fc1": 710241, "model.layers.10.mlp.fc2": 754886, "model.layers.10.self_attn.dense": 180978, "model.layers.10.self_attn.k_proj": 221678, "model.layers.10.self_attn.q_proj": 212231, "model.layers.10.self_attn.v_proj": 200830, "model.layers.11.mlp.fc1": 710220, "model.layers.11.mlp.fc2": 741443, "model.layers.11.self_attn.dense": 178022, "model.layers.11.self_attn.k_proj": 215089, "model.layers.11.self_attn.q_proj": 207761, "model.layers.11.self_attn.v_proj": 194583, "model.layers.12.mlp.fc1": 697047, "model.layers.12.mlp.fc2": 752645, "model.layers.12.self_attn.dense": 176274, "model.layers.12.self_attn.k_proj": 220310, "model.layers.12.self_attn.q_proj": 213155, "model.layers.12.self_attn.v_proj": 198321, "model.layers.13.mlp.fc1": 687659, "model.layers.13.mlp.fc2": 738003, "model.layers.13.self_attn.dense": 178018, "model.layers.13.self_attn.k_proj": 221049, "model.layers.13.self_attn.q_proj": 210113, "model.layers.13.self_attn.v_proj": 195362, "model.layers.14.mlp.fc1": 682287, "model.layers.14.mlp.fc2": 781981, "model.layers.14.self_attn.dense": 184895, "model.layers.14.self_attn.k_proj": 217243, "model.layers.14.self_attn.q_proj": 213786, "model.layers.14.self_attn.v_proj": 198728, "model.layers.15.mlp.fc1": 666136, "model.layers.15.mlp.fc2": 761731, "model.layers.15.self_attn.dense": 177136, "model.layers.15.self_attn.k_proj": 217895, "model.layers.15.self_attn.q_proj": 232288, "model.layers.15.self_attn.v_proj": 191081, "model.layers.16.mlp.fc1": 656390, "model.layers.16.mlp.fc2": 799995, "model.layers.16.self_attn.dense": 174656, "model.layers.16.self_attn.k_proj": 217135, "model.layers.16.self_attn.q_proj": 219926, "model.layers.16.self_attn.v_proj": 185798, "model.layers.17.mlp.fc1": 639288, "model.layers.17.mlp.fc2": 775904, "model.layers.17.self_attn.dense": 173271, "model.layers.17.self_attn.k_proj": 206996, "model.layers.17.self_attn.q_proj": 205270, "model.layers.17.self_attn.v_proj": 184931, "model.layers.18.mlp.fc1": 632334, "model.layers.18.mlp.fc2": 768287, "model.layers.18.self_attn.dense": 189412, "model.layers.18.self_attn.k_proj": 215687, "model.layers.18.self_attn.q_proj": 242190, "model.layers.18.self_attn.v_proj": 200154, "model.layers.19.mlp.fc1": 625021, "model.layers.19.mlp.fc2": 738002, "model.layers.19.self_attn.dense": 186977, "model.layers.19.self_attn.k_proj": 216466, "model.layers.19.self_attn.q_proj": 240694, "model.layers.19.self_attn.v_proj": 197648, "model.layers.2.mlp.fc1": 621667, "model.layers.2.mlp.fc2": 757420, "model.layers.2.self_attn.dense": 170986, "model.layers.2.self_attn.k_proj": 225618, "model.layers.2.self_attn.q_proj": 217741, "model.layers.2.self_attn.v_proj": 200228, "model.layers.20.mlp.fc1": 614692, "model.layers.20.mlp.fc2": 727978, "model.layers.20.self_attn.dense": 175731, "model.layers.20.self_attn.k_proj": 213423, "model.layers.20.self_attn.q_proj": 236043, "model.layers.20.self_attn.v_proj": 183771, "model.layers.21.mlp.fc1": 618662, "model.layers.21.mlp.fc2": 738785, "model.layers.21.self_attn.dense": 177493, "model.layers.21.self_attn.k_proj": 208350, "model.layers.21.self_attn.q_proj": 237646, "model.layers.21.self_attn.v_proj": 187251, "model.layers.22.mlp.fc1": 629352, "model.layers.22.mlp.fc2": 818793, "model.layers.22.self_attn.dense": 175140, "model.layers.22.self_attn.k_proj": 202527, "model.layers.22.self_attn.q_proj": 284459, "model.layers.22.self_attn.v_proj": 180999, "model.layers.23.mlp.fc1": 711633, "model.layers.23.mlp.fc2": 1103566, "model.layers.23.self_attn.dense": 219201, "model.layers.23.self_attn.k_proj": 224644, "model.layers.23.self_attn.q_proj": 397194, "model.layers.23.self_attn.v_proj": 230928, "model.layers.3.mlp.fc1": 663185, "model.layers.3.mlp.fc2": 761065, "model.layers.3.self_attn.dense": 185269, "model.layers.3.self_attn.k_proj": 240041, "model.layers.3.self_attn.q_proj": 232277, "model.layers.3.self_attn.v_proj": 214858, "model.layers.4.mlp.fc1": 716587, "model.layers.4.mlp.fc2": 767640, "model.layers.4.self_attn.dense": 179773, "model.layers.4.self_attn.k_proj": 227913, "model.layers.4.self_attn.q_proj": 220527, "model.layers.4.self_attn.v_proj": 211685, "model.layers.5.mlp.fc1": 707590, "model.layers.5.mlp.fc2": 780274, "model.layers.5.self_attn.dense": 178504, "model.layers.5.self_attn.k_proj": 247977, "model.layers.5.self_attn.q_proj": 243896, "model.layers.5.self_attn.v_proj": 207831, "model.layers.6.mlp.fc1": 710038, "model.layers.6.mlp.fc2": 763787, "model.layers.6.self_attn.dense": 190308, "model.layers.6.self_attn.k_proj": 224045, "model.layers.6.self_attn.q_proj": 220275, "model.layers.6.self_attn.v_proj": 212698, "model.layers.7.mlp.fc1": 715221, "model.layers.7.mlp.fc2": 758669, "model.layers.7.self_attn.dense": 175635, "model.layers.7.self_attn.k_proj": 231017, "model.layers.7.self_attn.q_proj": 224708, "model.layers.7.self_attn.v_proj": 200219, "model.layers.8.mlp.fc1": 722869, "model.layers.8.mlp.fc2": 747381, "model.layers.8.self_attn.dense": 184555, "model.layers.8.self_attn.k_proj": 230928, "model.layers.8.self_attn.q_proj": 224025, "model.layers.8.self_attn.v_proj": 206979, "model.layers.9.mlp.fc1": 719199, "model.layers.9.mlp.fc2": 748623, "model.layers.9.self_attn.dense": 174700, "model.layers.9.self_attn.k_proj": 228878, "model.layers.9.self_attn.q_proj": 222182, "model.layers.9.self_attn.v_proj": 199200 } }, "architectures": [ "PhiForCausalLM" ], "attention_dropout": 0.0, "bos_token_id": null, "embd_pdrop": 0.0, "eos_token_id": null, "hidden_act": "gelu_new", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "layer_norm_eps": 1e-05, "max_position_embeddings": 2048, "model_type": "phi", "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "partial_rotary_factor": 0.5, "qk_layernorm": false, "resid_pdrop": 0.0, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 51200 }