NMT_multilingual_12 / config_1.yaml
Vikrantyadav11234's picture
Upload 3 files
ef3127b verified
# General config
save_model: model_1/multilingual_transformer
# Training files
data:
corpus_1:
path_src: final_train.src.train
path_tgt: final_train.tgt.train
transforms: [sentencepiece, filtertoolong, docify]
valid:
path_src: final_train.src.dev
path_tgt: final_train.tgt.dev
transforms: [sentencepiece, filtertoolong, docify]
save_checkpoint_steps: 5000
keep_checkpoint: 1
# Data config
save_data: data_1
src_vocab: data_1/source.vocab
tgt_vocab: data_1/target.vocab
src_vocab_size: 256000
tgt_vocab_size: 256000
# Training parameters
world_size: 2
gpu_ranks: [0 , 1]
seed: 3435
train_steps: 30000
valid_steps: 5000
report_every: 1000
# Model parameters
model_type: text
model_dtype: "fp32"
encoder_type: transformer
decoder_type: transformer
enc_layers: 8
dec_layers: 8
heads: 16
hidden_size: 768
word_vec_size: 768
transformer_ff: 3072
dropout: [0.1]
attention_dropout: [0.1]
transforms: [sentencepiece, filtertoolong, docify]
src_subword_type: sentencepiece
tgt_subword_type: sentencepiece
src_seq_length: 512
tgt_seq_length: 512
src_seq_length_trunc: 512
tgt_seq_length_trunc: 512
# Optimization parameters
optim: "adam"
bucket_size: 262144
num_workers: 2
learning_rate: 2.0
warmup_steps: 4000
decay_method: "noam"
adam_beta1: 0.9
adam_beta2: 0.998
max_grad_norm: 0.0
label_smoothing: 0.1
param_init: 0
param_init_glorot: true
batch_size: 4096
batch_type: "tokens"
normalization: "tokens"
accum_count: [2]
src_subword_model: source.model
tgt_subword_model: target.model
src_subword_nbest: 1
src_subword_alpha: 0.0
tgt_subword_nbest: 1
tgt_subword_alpha: 0.0
# Additional features
position_encoding_type: SinusoidalInterleaved
pos_ffn_activation_fn: gelu
parallel_residual: true
position_encoding: false
max_relative_positions: 32
self_attn_type: scaled-dot
sliding_window: 256
global_attention: general
# Validation parameters
valid_batch_size: 2048
# Output generator function
generator_function: softmax
# Docify parameters
docify:
doc_length: 200
max_context: 1
switchout_temperature: 0.2
temperature : 5.0 # Adjust this value to scale temperature
# Logging
log_file: train_1.log