if001 commited on
Commit
cd4d4ec
1 Parent(s): c520681
Files changed (1) hide show
  1. README.md +74 -0
README.md CHANGED
@@ -1,3 +1,77 @@
1
  ---
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
  ---
4
+
5
+ PhiConfig {
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu_new",
11
+ "hidden_size": 8,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 10,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "phi",
17
+ "num_attention_heads": 4,
18
+ "num_hidden_layers": 6,
19
+ "num_key_value_heads": 2,
20
+ "partial_rotary_factor": 0.5,
21
+ "qk_layernorm": false,
22
+ "resid_pdrop": 0.0,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": false,
26
+ "transformers_version": "4.38.2",
27
+ "use_cache": true,
28
+ "vocab_size": 20
29
+ }
30
+
31
+ PhiForCausalLM(
32
+ (model): PhiModel(
33
+ (embed_tokens): Embedding(20, 8)
34
+ (embed_dropout): Dropout(p=0.0, inplace=False)
35
+ (layers): ModuleList(
36
+ (0-5): 6 x PhiDecoderLayer(
37
+ (self_attn): PhiAttention(
38
+ (q_proj): Linear(in_features=8, out_features=8, bias=True)
39
+ (k_proj): Linear(in_features=8, out_features=4, bias=True)
40
+ (v_proj): Linear(in_features=8, out_features=4, bias=True)
41
+ (dense): Linear(in_features=8, out_features=8, bias=True)
42
+ (rotary_emb): PhiRotaryEmbedding()
43
+ )
44
+ (mlp): PhiMLP(
45
+ (activation_fn): NewGELUActivation()
46
+ (fc1): Linear(in_features=8, out_features=10, bias=True)
47
+ (fc2): Linear(in_features=10, out_features=8, bias=True)
48
+ )
49
+ (input_layernorm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
50
+ (resid_dropout): Dropout(p=0.0, inplace=False)
51
+ )
52
+ )
53
+ (final_layernorm): LayerNorm((8,), eps=1e-05, elementwise_affine=True)
54
+ )
55
+ (lm_head): Linear(in_features=8, out_features=20, bias=True)
56
+ )
57
+ ===========================================================================
58
+ Layer (type:depth-idx) Param #
59
+ ===========================================================================
60
+ PhiForCausalLM --
61
+ ├─PhiModel: 1-1 --
62
+ │ └─Embedding: 2-1 160
63
+ │ └─Dropout: 2-2 --
64
+ │ └─ModuleList: 2-3 --
65
+ │ │ └─PhiDecoderLayer: 3-1 410
66
+ │ │ └─PhiDecoderLayer: 3-2 410
67
+ │ │ └─PhiDecoderLayer: 3-3 410
68
+ │ │ └─PhiDecoderLayer: 3-4 410
69
+ │ │ └─PhiDecoderLayer: 3-5 410
70
+ │ │ └─PhiDecoderLayer: 3-6 410
71
+ │ └─LayerNorm: 2-4 16
72
+ ├─Linear: 1-2 180
73
+ ===========================================================================
74
+ Total params: 2,816
75
+ Trainable params: 2,816
76
+ Non-trainable params: 0
77
+ ===========================================================================