uer
/

albert-base-chinese-cluecorpussmall

+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 512    # dimension of attention
+    attention_heads: 8
+    linear_units: 2048 # the number of units of position-wise feed forward
+    num_blocks: 18      # the number of encoder blocks
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.0
+    attention_dropout_rate: 0.0
+    input_layer: conv2d6 # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+    normalize_before: true
+    cnn_module_kernel: 15
+    use_cnn_module: True
+    activation_type: 'swish'
+    macaron_style: True
+    pos_enc_layer_type: 'rel_pos'
+    selfattention_layer_type: 'abs_selfattn'
+    nonorm: False
+    cnn_prev: True
+    cnn_after: False
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 1
+    dropout_rate: 0.0
+    positional_dropout_rate: 0.0
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 1.0
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+raw_wav: False
+data_save: True
+use_gc: True
+w2v_encoder: True
+pretrain: True
+random_pretrain: False
+wav2vec: True
+w2v_coef: 1.0
+mpc_didi_ver: False
+wav2mpc: False
+wav2mpc_reduction: False
+mpc_mask_loss: False
+mpc_coef: 0.0
+mask: True
+quantize_targets: True
+project_targets: True
+latent_vars: 320
+w2v_reduct: True
+w2v_ext_loss: True
+w2v_loss_weights: [0.1,0]
+w2v_mask_prob: 0.65
+mpc_prob: 0.5
+remove_valbest: False
+model:
+  method: 'npc'                                         # Accepts npc/apc/vqapc
+  paras:
+    kernel_size: 15     # Receptive field size (R) = kernel_size + 2*(n_blocks)
+    mask_size: 5     # Desired input mask size (M_in) as described in NPC paper
+    n_blocks: 4                     # Number of ConvBlocks stacked in NPC model
+    hidden_size: 512                       # Dimension of feature of all layers
+    dropout: 0.1                                         # Dropout in ConvBlock
+    residual: True                           # Residual connection in ConvBlock
+    batch_norm: True                             # Apply BatchNorm in ConvBlock
+    activate: 'relu'                         # Activation function of ConvBlock
+    disable_cross_layer: False      # Apply Masked ConvBlock at last layer only
+    vq:
+      codebook_size: [64,64,64,64]    # Codebook size of each group in VQ-layer
+      code_dim: [128,128,128,128] # Dim of each group summing up to hidden_size
+      gumbel_temperature: 1.0       # Temperature of Gumbel Softmax in VQ-layer
+collate_conf:
+    spec_aug: false
+# specaugmentation related
+spec_aug_conf:
+    num_time_mask: 2
+    num_freq_mask: 2
+    max_time_mask: 50
+    max_freq_mask: 10
+    max_time_warp: 80
+    gauss_mask_for_time: False
+    warp_for_time: False
+# dataset related
+dataset_conf:
+    max_length: 4500
+    min_length: 80
+    max_frames_in_batch: 16000
+    batch_type: 'dynamic' # static or dynamic
+    batch_size: 20
+    sort: true
+grad_clip: 10
+accum_grad: 2
+max_epoch: 180
+log_interval: 100
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr     # pytorch v1.1.0+ required
+scheduler_conf:
+    warmup_steps: 10000