_BASE_: "7tasks_berttiny_training.yaml" | |
MOE: | |
MOE: True | |
MOE_TYPE: 'attribute' | |
TAG_Transform: True | |
ATTRIBUTE_LENGTH: 8 | |
EP_WORLD_SIZE: 1 # tag moe only | |
NUM_EXPERTS: 8 | |
TOP_K: 2 | |
CAPACITY_FACTOR: 3.0 | |
EVAL_MIN_CAPACITY: 4.0 | |
MIN_CAPACITY: 4 | |
NOISY_GATE_POLICY: 'vmoe' | |
MOE_PARAM_GROUP: True | |
MOE_EXPERT_TYPE: 'FFN,SA' | |
SA_LINEAR_OUT_MOE: True | |
MOE_EXPERT_LOCATION: 'all' # 'odd' | |
# MOE_LAYER_START_IDX: 3 | |
# MOE_LAYER_END_IDX: 21 | |
# MOE_LAYER_START_IDX: 18 | |
# MOE_LAYER_END_IDX: 12 | |
BATCH_PRIO: True | |
USE_TUTEL: True | |
FFN_SHARE_GATE_DECISION: True |