|
{ |
|
"framework": "tensorflow", |
|
"task": "translation", |
|
"pipeline": { |
|
"type": "csanmt-translation" |
|
}, |
|
"model": { |
|
"type": "csanmt-translation", |
|
"hidden_size": 1024, |
|
"filter_size": 4096, |
|
"num_heads": 16, |
|
"num_encoder_layers": 24, |
|
"num_decoder_layers": 6, |
|
"attention_dropout": 0.0, |
|
"residual_dropout": 0.0, |
|
"relu_dropout": 0.0, |
|
"layer_preproc": "layer_norm", |
|
"layer_postproc": "none", |
|
"shared_embedding_and_softmax_weights": true, |
|
"shared_source_target_embedding": true, |
|
"initializer_scale": 0.1, |
|
"position_info_type": "absolute", |
|
"max_relative_dis": 16, |
|
"num_semantic_encoder_layers": 4, |
|
"src_vocab_size": 50000, |
|
"trg_vocab_size": 50000, |
|
"seed": 1234, |
|
"beam_size": 4, |
|
"lp_rate": 0.6, |
|
"max_decoded_trg_len": 100 |
|
}, |
|
"dataset": { |
|
"train_src": "train.zh", |
|
"train_trg": "train.en", |
|
"src_vocab": { |
|
"file": "src_vocab.txt" |
|
}, |
|
"trg_vocab": { |
|
"file": "trg_vocab.txt" |
|
} |
|
}, |
|
"preprocessor": { |
|
"src_lang": "zh", |
|
"tgt_lang": "en", |
|
"src_bpe": { |
|
"file": "bpe.zh" |
|
} |
|
}, |
|
"train": { |
|
"num_gpus": 0, |
|
"warmup_steps": 4000, |
|
"update_cycle": 1, |
|
"keep_checkpoint_max": 1, |
|
"confidence": 0.9, |
|
"optimizer": "adam", |
|
"adam_beta1": 0.9, |
|
"adam_beta2": 0.98, |
|
"adam_epsilon": 1e-9, |
|
"gradient_clip_norm": 0.0, |
|
"learning_rate_decay": "linear_warmup_rsqrt_decay", |
|
"initializer": "uniform_unit_scaling", |
|
"initializer_scale": 0.1, |
|
"learning_rate": 1.0, |
|
"train_batch_size_words": 1024, |
|
"scale_l1": 0.0, |
|
"scale_l2": 0.0, |
|
"train_max_len": 100, |
|
"num_of_epochs": 2, |
|
"save_checkpoints_steps": 1000, |
|
"num_of_samples": 4, |
|
"eta": 0.6 |
|
}, |
|
"evaluation": { |
|
"beam_size": 4, |
|
"lp_rate": 0.6, |
|
"max_decoded_trg_len": 100 |
|
} |
|
|
|
} |
|
|