|
|
|
|
|
save_model: model_1/multilingual_transformer |
|
|
|
data: |
|
corpus_1: |
|
path_src: final_train.src.train |
|
path_tgt: final_train.tgt.train |
|
transforms: [sentencepiece, filtertoolong, docify] |
|
valid: |
|
path_src: final_train.src.dev |
|
path_tgt: final_train.tgt.dev |
|
transforms: [sentencepiece, filtertoolong, docify] |
|
save_checkpoint_steps: 5000 |
|
keep_checkpoint: 1 |
|
|
|
|
|
save_data: data_1 |
|
src_vocab: data_1/source.vocab |
|
tgt_vocab: data_1/target.vocab |
|
src_vocab_size: 256000 |
|
tgt_vocab_size: 256000 |
|
|
|
|
|
world_size: 2 |
|
gpu_ranks: [0 , 1] |
|
|
|
seed: 3435 |
|
train_steps: 30000 |
|
valid_steps: 5000 |
|
report_every: 1000 |
|
|
|
|
|
model_type: text |
|
model_dtype: "fp32" |
|
encoder_type: transformer |
|
decoder_type: transformer |
|
|
|
enc_layers: 8 |
|
dec_layers: 8 |
|
heads: 16 |
|
hidden_size: 768 |
|
word_vec_size: 768 |
|
transformer_ff: 3072 |
|
dropout: [0.1] |
|
attention_dropout: [0.1] |
|
|
|
transforms: [sentencepiece, filtertoolong, docify] |
|
src_subword_type: sentencepiece |
|
tgt_subword_type: sentencepiece |
|
src_seq_length: 512 |
|
tgt_seq_length: 512 |
|
src_seq_length_trunc: 512 |
|
tgt_seq_length_trunc: 512 |
|
|
|
|
|
optim: "adam" |
|
bucket_size: 262144 |
|
num_workers: 2 |
|
learning_rate: 2.0 |
|
warmup_steps: 4000 |
|
decay_method: "noam" |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.998 |
|
max_grad_norm: 0.0 |
|
label_smoothing: 0.1 |
|
param_init: 0 |
|
param_init_glorot: true |
|
batch_size: 4096 |
|
batch_type: "tokens" |
|
normalization: "tokens" |
|
accum_count: [2] |
|
|
|
src_subword_model: source.model |
|
tgt_subword_model: target.model |
|
src_subword_nbest: 1 |
|
src_subword_alpha: 0.0 |
|
tgt_subword_nbest: 1 |
|
tgt_subword_alpha: 0.0 |
|
|
|
|
|
position_encoding_type: SinusoidalInterleaved |
|
pos_ffn_activation_fn: gelu |
|
parallel_residual: true |
|
position_encoding: false |
|
max_relative_positions: 32 |
|
self_attn_type: scaled-dot |
|
sliding_window: 256 |
|
global_attention: general |
|
|
|
|
|
valid_batch_size: 2048 |
|
|
|
generator_function: softmax |
|
|
|
docify: |
|
doc_length: 200 |
|
max_context: 1 |
|
|
|
switchout_temperature: 0.2 |
|
|
|
temperature : 5.0 |
|
|
|
|
|
|
|
log_file: train_1.log |
|
|