|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
|
|
|
|
|
|
def bloom_model_postprocess_past_key_value(past_key_values): |
|
past_key_values = torch.cat(past_key_values) |
|
total_layers, batch_size, num_attention_heads, num_virtual_tokens, head_dim = past_key_values.shape |
|
keys = past_key_values[: total_layers // 2] |
|
keys = keys.transpose(2, 3).reshape( |
|
total_layers // 2, batch_size * num_attention_heads, head_dim, num_virtual_tokens |
|
) |
|
values = past_key_values[total_layers // 2 :] |
|
values = values.reshape(total_layers // 2, batch_size * num_attention_heads, num_virtual_tokens, head_dim) |
|
|
|
return tuple(zip(keys, values)) |
|
|
|
|
|
|
|
def starcoder_model_postprocess_past_key_value(past_key_values): |
|
result = [] |
|
for k in past_key_values: |
|
k = k[:, :, 0] |
|
k = k.permute([1, 2, 0, 3]) |
|
k = k.reshape(*k.shape[:-2], -1) |
|
result.append(k) |
|
return tuple(result) |
|
|
|
|
|
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING = { |
|
"bloom": bloom_model_postprocess_past_key_value, |
|
"gpt_bigcode": starcoder_model_postprocess_past_key_value, |
|
} |
|
|
|
|
|
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = { |
|
"t5": ["q", "v"], |
|
"mt5": ["q", "v"], |
|
"bart": ["q_proj", "v_proj"], |
|
"gpt2": ["c_attn"], |
|
"bloom": ["query_key_value"], |
|
"blip-2": ["q", "v", "q_proj", "v_proj"], |
|
"opt": ["q_proj", "v_proj"], |
|
"gptj": ["q_proj", "v_proj"], |
|
"gpt_neox": ["query_key_value"], |
|
"gpt_neo": ["q_proj", "v_proj"], |
|
"bert": ["query", "value"], |
|
"roberta": ["query", "value"], |
|
"xlm-roberta": ["query", "value"], |
|
"electra": ["query", "value"], |
|
"deberta-v2": ["query_proj", "value_proj"], |
|
"deberta": ["in_proj"], |
|
"layoutlm": ["query", "value"], |
|
"llama": ["q_proj", "v_proj"], |
|
"chatglm": ["query_key_value"], |
|
"gpt_bigcode": ["c_attn"], |
|
"mpt": ["Wqkv"], |
|
"RefinedWebModel": ["query_key_value"], |
|
"RefinedWeb": ["query_key_value"], |
|
"falcon": ["query_key_value"], |
|
"btlm": ["c_proj", "c_attn"], |
|
"codegen": ["qkv_proj"], |
|
"mistral": ["q_proj", "v_proj"], |
|
"mixtral": ["q_proj", "v_proj"], |
|
"stablelm": ["q_proj", "v_proj"], |
|
"phi": ["q_proj", "v_proj", "fc1", "fc2"], |
|
"gemma": ["q_proj", "v_proj"], |
|
} |
|
|
|
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = { |
|
"t5": ["k", "v", "wo"], |
|
"mt5": ["k", "v", "wi_1"], |
|
"gpt2": ["c_attn", "mlp.c_proj"], |
|
"bloom": ["query_key_value", "mlp.dense_4h_to_h"], |
|
"roberta": ["key", "value", "output.dense"], |
|
"opt": ["q_proj", "k_proj", "fc2"], |
|
"gptj": ["q_proj", "v_proj", "fc_out"], |
|
"gpt_neox": ["query_key_value", "dense_4h_to_h"], |
|
"gpt_neo": ["q_proj", "v_proj", "c_proj"], |
|
"bart": ["q_proj", "v_proj", "fc2"], |
|
"gpt_bigcode": ["c_attn", "mlp.c_proj"], |
|
"llama": ["k_proj", "v_proj", "down_proj"], |
|
"mistral": ["k_proj", "v_proj", "down_proj"], |
|
"mixtral": ["k_proj", "v_proj", "w2"], |
|
"bert": ["key", "value", "output.dense"], |
|
"deberta-v2": ["key_proj", "value_proj", "output.dense"], |
|
"deberta": ["in_proj", "output.dense"], |
|
"RefinedWebModel": ["query_key_value", "dense_4h_to_h"], |
|
"RefinedWeb": ["query_key_value", "dense_4h_to_h"], |
|
"falcon": ["query_key_value", "dense_4h_to_h"], |
|
"phi": ["q_proj", "v_proj", "fc2"], |
|
"gemma": ["q_proj", "v_proj", "down_proj"], |
|
} |
|
|
|
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING = { |
|
"t5": ["wo"], |
|
"mt5": [], |
|
"gpt2": ["mlp.c_proj"], |
|
"bloom": ["mlp.dense_4h_to_h"], |
|
"roberta": ["output.dense"], |
|
"opt": ["fc2"], |
|
"gptj": ["fc_out"], |
|
"gpt_neox": ["dense_4h_to_h"], |
|
"gpt_neo": ["c_proj"], |
|
"bart": ["fc2"], |
|
"gpt_bigcode": ["mlp.c_proj"], |
|
"llama": ["down_proj"], |
|
"mistral": ["down_proj"], |
|
"mixtral": ["w2"], |
|
"bert": ["output.dense"], |
|
"deberta-v2": ["output.dense"], |
|
"deberta": ["output.dense"], |
|
"RefinedWeb": ["dense_4h_to_h"], |
|
"RefinedWebModel": ["dense_4h_to_h"], |
|
"falcon": ["dense_4h_to_h"], |
|
"phi": ["fc2"], |
|
"gemma": ["down_proj"], |
|
} |
|
|
|
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = { |
|
"t5": ["q", "k", "v", "o", "wi", "wo"], |
|
"mt5": ["q", "k", "v", "o", "wi_0", "wi_1", "wo"], |
|
"bart": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], |
|
"gpt2": ["c_attn"], |
|
"bloom": ["query_key_value"], |
|
"opt": ["q_proj", "k_proj", "v_proj", "out_proj", "fc1", "fc2"], |
|
"gptj": ["q_proj", "v_proj"], |
|
"gpt_neox": ["query_key_value"], |
|
"gpt_neo": ["q_proj", "v_proj"], |
|
"llama": ["q_proj", "v_proj"], |
|
"bert": ["query", "value"], |
|
"roberta": ["query", "key", "value", "dense"], |
|
|
|
|
|
"deberta-v2": ["query_proj", "key_proj", "value_proj", "dense"], |
|
"gpt_bigcode": ["c_attn"], |
|
"deberta": ["in_proj"], |
|
|
|
} |
|
|
|
WEIGHTS_NAME = "adapter_model.bin" |
|
SAFETENSORS_WEIGHTS_NAME = "adapter_model.safetensors" |
|
CONFIG_NAME = "adapter_config.json" |
|
EMBEDDING_LAYER_NAMES = ["embed_tokens", "lm_head"] |
|
INCLUDE_LINEAR_LAYERS_SHORTHAND = "all-linear" |
|
TOKENIZER_CONFIG_NAME = "tokenizer_config.json" |
|
|