OFA-OCR / fairseq /examples /wav2vec /config /pretraining /wav2vec2_large_librivox.yaml
JustinLin610's picture
first commit
ee21b96
raw
history blame
No virus
1.24 kB
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
checkpoint:
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: audio_pretraining
data: ???
max_sample_size: 320000
min_sample_size: 32000
normalize: true
dataset:
batch_size: 4
num_workers: 6
max_tokens: 1200000
skip_invalid_size_inputs_valid_test: true
distributed_training:
distributed_world_size: 128
ddp_backend: legacy_ddp
criterion:
_name: wav2vec
infonce: true
log_keys: ["prob_perplexity","code_perplexity","temp"]
loss_weights: [0.1, 0]
optimization:
max_update: 1000000
lr: [0.005]
optimizer:
_name: adam
adam_betas: (0.9,0.98)
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: polynomial_decay
warmup_updates: 32000
model:
_name: wav2vec2
quantize_targets: true
extractor_mode: layer_norm
layer_norm_first: true
final_dim: 768
latent_temp: [2.0,0.1,0.999995]
encoder_layerdrop: 0.00
dropout_input: 0.0
dropout_features: 0.0
dropout: 0.0
attention_dropout: 0.0
conv_bias: true
encoder_layers: 24
encoder_embed_dim: 1024
encoder_ffn_embed_dim: 4096
encoder_attention_heads: 16
feature_grad_mult: 1.0