File size: 2,118 Bytes
9c3e26d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
    "framework": "tensorflow",
    "task": "translation",
    "pipeline": {
       "type": "csanmt-translation"
    },
    "model": {
        "type": "csanmt-translation",
        "hidden_size": 1024,
        "filter_size": 4096,
        "num_heads": 16,
        "num_encoder_layers": 24,
        "num_decoder_layers": 6,
        "attention_dropout": 0.0,
        "residual_dropout": 0.0,
        "relu_dropout": 0.0,
        "layer_preproc": "layer_norm",
        "layer_postproc": "none",
        "shared_embedding_and_softmax_weights": true,
        "shared_source_target_embedding": true,
        "initializer_scale": 0.1,
        "position_info_type": "absolute",
        "max_relative_dis": 16,
        "num_semantic_encoder_layers": 4,
        "src_vocab_size": 50000,
        "trg_vocab_size": 50000,
        "seed": 1234,
        "beam_size": 4,
        "lp_rate": 0.6,
        "max_decoded_trg_len": 100
    },
    "dataset": {
        "train_src": "train.zh",
        "train_trg": "train.en",
        "src_vocab": {
          "file": "src_vocab.txt"
        },
        "trg_vocab": {
          "file": "trg_vocab.txt"
        }
    },
    "preprocessor": {
        "src_lang": "zh",
        "tgt_lang": "en",
        "src_bpe": {
          "file": "bpe.zh"
        }
    },
    "train": {
        "num_gpus": 0,
        "warmup_steps": 4000,
        "update_cycle": 1,
        "keep_checkpoint_max": 1,
        "confidence": 0.9,
        "optimizer": "adam",
        "adam_beta1": 0.9,
        "adam_beta2": 0.98,
        "adam_epsilon": 1e-9,
        "gradient_clip_norm": 0.0,
        "learning_rate_decay": "linear_warmup_rsqrt_decay",
        "initializer": "uniform_unit_scaling",
        "initializer_scale": 0.1,
        "learning_rate": 1.0,
        "train_batch_size_words": 1024,
        "scale_l1": 0.0,
        "scale_l2": 0.0,
        "train_max_len": 100,
        "num_of_epochs": 2,
        "save_checkpoints_steps": 1000,
        "num_of_samples": 4,
        "eta": 0.6
    },
    "evaluation": {
        "beam_size": 4,
        "lp_rate": 0.6,
        "max_decoded_trg_len": 100
    }

}