unit_test / configs /BERT_L12_H192_experiments /7tasks_berttiny_training_moe.yaml
herrius's picture
Upload 259 files
32b542e
raw
history blame contribute delete
579 Bytes
_BASE_: "7tasks_berttiny_training.yaml"
MOE:
MOE: True
MOE_TYPE: 'attribute'
TAG_Transform: True
ATTRIBUTE_LENGTH: 8
EP_WORLD_SIZE: 1 # tag moe only
NUM_EXPERTS: 8
TOP_K: 2
CAPACITY_FACTOR: 3.0
EVAL_MIN_CAPACITY: 4.0
MIN_CAPACITY: 4
NOISY_GATE_POLICY: 'vmoe'
MOE_PARAM_GROUP: True
MOE_EXPERT_TYPE: 'FFN,SA'
SA_LINEAR_OUT_MOE: True
MOE_EXPERT_LOCATION: 'all' # 'odd'
# MOE_LAYER_START_IDX: 3
# MOE_LAYER_END_IDX: 21
# MOE_LAYER_START_IDX: 18
# MOE_LAYER_END_IDX: 12
BATCH_PRIO: True
USE_TUTEL: True
FFN_SHARE_GATE_DECISION: True