_BASE_: "7tasks_berttiny_training.yaml" MOE: MOE: True MOE_TYPE: 'attribute' TAG_Transform: True ATTRIBUTE_LENGTH: 8 EP_WORLD_SIZE: 1 # tag moe only NUM_EXPERTS: 8 TOP_K: 2 CAPACITY_FACTOR: 3.0 EVAL_MIN_CAPACITY: 4.0 MIN_CAPACITY: 4 NOISY_GATE_POLICY: 'vmoe' MOE_PARAM_GROUP: True MOE_EXPERT_TYPE: 'FFN,SA' SA_LINEAR_OUT_MOE: True MOE_EXPERT_LOCATION: 'all' # 'odd' # MOE_LAYER_START_IDX: 3 # MOE_LAYER_END_IDX: 21 # MOE_LAYER_START_IDX: 18 # MOE_LAYER_END_IDX: 12 BATCH_PRIO: True USE_TUTEL: True FFN_SHARE_GATE_DECISION: True