herrius's picture
Upload 259 files
32b542e
_BASE_: "../base_model_bert_l12_h768.yaml"
SHARED_TARGETS:
-
NAME: 'Kinetics400'
SHARED_TARGETS_CFG:
FILE_PATH: 'open_source_dataset/k400_class_name_CLIP_with_endoftext.pkl'
DISTRIBUTED: False
TASKS:
-
NAME: K400_retrieve
DATASETS:
TRAIN: 'VideoDataSet'
VAL: 'VideoDataSet'
TASK_TYPE: 'video_classification'
DATASET_NAME: 'K400'
TARGET_SET: ['Kinetics400']
DATALOADER:
TRAIN_BATCH_SIZE: 8 # 256
TEST_BATCH_SIZE: 4 # debug
NUM_WORKERS: 4 # debug 4
FEATS_FOLDER: 'open_source_dataset/K400_official'
ANNO_FOLDER: 'open_source_dataset/K400_official'
S3_PATH: 's3://K400/'
FRAMES_PER_CLIP: 8
STRIDE: 32
FILE_EXTENSION: ''
ANNO_FILE: 'annotation.json'
TIMESFORMER_AUG: True
SAMPLING_WEIGHT: 1.0
MULTI_VEIW_NUM: 4
MULTI_VEIW: 'v2'
MODEL:
MAX_SEQ_LEN: -1
TEMP_NAME: logit_scale_video_cls
LOSSES:
NAMES: ['CrossEntropy', 'Accuracy']
LOSS_WEIGHT: 1.0
INFERENCE:
NAME: 'MiTEvaler'
ID_KEY: 'video_name'
VALUE: 'label'
VAL_ANNFILE: 'open_source_dataset/K400_official/annotation.json'
TEST_ANNFILE: ''
GENERATION_MODE: False
NUM_VIEWS: 1
ENGINE:
NAME: 'UnifiedTrainer'
MODEL:
META_ARCHITECTURE: 'MultiTaskTransformerEncoder'
ENCODER: 'UnifiedBertEncoder'
SHARE_LAYERNORM: True
BERT:
NORMALIZE_DECISION: "BERTPre"
DROP_PATH_PROB: 0.1
DROP_PATH_PROB_FIXED: True
MODEL_EMA: False
MODEL_EMA_DECAY: 0.9999
MAEParamsInit: True
POSEMBEDFIX: True
IMG_INPUT_SIZE: 224
PATCH_SIZE: 16
POSEMBED_SCALE: !!python/object/apply:eval ["160/224"]
CHECKPOINT_FILETER: False
OLD_CHECKPONT: True
LAYER_SCALE: True
LAYER_SCALE_INIT: 1e-3
DATALOADER:
USE_WEIGHTED_SAMPLER: True
UNIFIED_DATASET: True
NUM_WORKERS: 16
PADDING_TO_MAX: False # True for debugging or token moe with distributed moe
####################################### Optimizer #######################################
SOLVER:
NAME: 'Adam'
TORCH_OPTIMIZER: True
PARAMS_SEPERATE: True
# PARAMS_GROUP: True
# EPOCH: 1
MAX_ITER: 40000
CHECKPOINT_PERIOD: 50000
EVAL_PERIOD: 2000
BASE_LR: 0.000005
BIAS_LR_FACTOR: 1.0
WEIGHT_DECAY: 0.0001
WEIGHT_DECAY_NORM: 0.0
WEIGHT_DECAY_BIAS: 0.0
WEIGHT_DECAY_EMBEDDING: 0.0
MOMENTUM: 0.9
DAMPENING: 0.0
NESTEROV: 0.0
BETAS: [0.9, 0.95]
EPS: 1e-6
GRAD_CLIP: 0.1
GRAD_CLIP_TYPE: 'norm'
ACCUM_ITER: 0
AMP_FP16: True
APEX_FP16: False # dangerous
WRITE_PERIOD: 50
MIN_LOSS_SCLE: 2048.0
LOSS_SCALE_WINDOW: 200
####################################### lr scheduler #######################################
LR_SCHEDULER:
NAME: 'WarmupCosine'
WARMUP: 2000
MIN_LR: 0.000001
find_unused_parameters: true
MOE:
MOE: True
MOE_TYPE: 'attribute'
TAG_Transform: True
ATTRIBUTE_LENGTH: 8
EP_WORLD_SIZE: 1 # tag moe only
NUM_EXPERTS: 8
TOP_K: 2
CAPACITY_FACTOR: 3.0
EVAL_MIN_CAPACITY: 4.0
MIN_CAPACITY: 4
NOISY_GATE_POLICY: 'vmoe'
MOE_PARAM_GROUP: True
MOE_EXPERT_TYPE: 'FFN,SA'
SA_LINEAR_OUT_MOE: True
MOE_EXPERT_LOCATION: 'odd' # 'odd'
# MOE_LAYER_START_IDX: 3
# MOE_LAYER_END_IDX: 21
# MOE_LAYER_START_IDX: 18
# MOE_LAYER_END_IDX: 12
BATCH_PRIO: True
USE_TUTEL: True
FFN_SHARE_GATE_DECISION: True