|
PeftModelForCausalLM( |
|
(base_model): LoraModel( |
|
(model): LlamaForCausalLM( |
|
(model): LlamaModel( |
|
(embed_tokens): Embedding(128256, 4096) |
|
(layers): ModuleList( |
|
(0-31): 32 x LlamaDecoderLayer( |
|
(self_attn): LlamaAttention( |
|
(q_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=4096, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(k_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=1024, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(v_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=1024, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(o_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=4096, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(rotary_emb): LlamaExtendedRotaryEmbedding() |
|
) |
|
(mlp): LlamaMLP( |
|
(gate_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=14336, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(up_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=4096, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=14336, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(down_proj): lora.Linear4bit( |
|
(base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False) |
|
(lora_dropout): ModuleDict( |
|
(default): Identity() |
|
) |
|
(lora_A): ModuleDict( |
|
(default): Linear(in_features=14336, out_features=16, bias=False) |
|
) |
|
(lora_B): ModuleDict( |
|
(default): Linear(in_features=16, out_features=4096, bias=False) |
|
) |
|
(lora_embedding_A): ParameterDict() |
|
(lora_embedding_B): ParameterDict() |
|
(lora_magnitude_vector): ModuleDict() |
|
) |
|
(act_fn): SiLU() |
|
) |
|
(input_layernorm): LlamaRMSNorm((4096,), eps=1e-05) |
|
(post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05) |
|
) |
|
) |
|
(norm): LlamaRMSNorm((4096,), eps=1e-05) |
|
(rotary_emb): LlamaRotaryEmbedding() |
|
) |
|
(lm_head): Linear(in_features=4096, out_features=128256, bias=False) |
|
) |
|
) |
|
) |
|
|