|
authors: false |
|
cite: false |
|
build-info: "" |
|
workspace: -8000 |
|
log: train.log |
|
log-level: info |
|
log-time-zone: PST8PDT |
|
quiet: false |
|
quiet-translation: true |
|
seed: 141414 |
|
check-nan: false |
|
interpolate-env-vars: true |
|
relative-paths: false |
|
dump-config: "" |
|
sigterm: save-and-exit |
|
model: model_files/model.npz |
|
pretrained-model: "" |
|
ignore-model-config: false |
|
type: lm-transformer |
|
dim-vocabs: |
|
- 16000 |
|
dim-emb: 1024 |
|
factors-dim-emb: 0 |
|
factors-combine: sum |
|
lemma-dependency: "" |
|
lemma-dim-emb: 0 |
|
dim-rnn: 1024 |
|
enc-type: bidirectional |
|
enc-cell: gru |
|
enc-cell-depth: 1 |
|
enc-depth: 1 |
|
dec-cell: gru |
|
dec-cell-base-depth: 2 |
|
dec-cell-high-depth: 1 |
|
dec-depth: 12 |
|
skip: false |
|
layer-normalization: false |
|
right-left: false |
|
input-types: |
|
[] |
|
tied-embeddings: true |
|
tied-embeddings-src: false |
|
tied-embeddings-all: true |
|
output-omit-bias: true |
|
transformer-heads: 8 |
|
transformer-no-projection: false |
|
transformer-rnn-projection: false |
|
transformer-pool: false |
|
transformer-dim-ffn: 8192 |
|
transformer-decoder-dim-ffn: 8192 |
|
transformer-ffn-depth: 2 |
|
transformer-decoder-ffn-depth: 0 |
|
transformer-ffn-activation: relu |
|
transformer-dim-aan: 2048 |
|
transformer-aan-depth: 2 |
|
transformer-aan-activation: swish |
|
transformer-aan-nogate: false |
|
transformer-decoder-autoreg: self-attention |
|
transformer-tied-layers: [] |
|
transformer-guided-alignment-layer: last |
|
transformer-preprocess: "" |
|
transformer-postprocess-emb: d |
|
transformer-postprocess: dan |
|
transformer-postprocess-top: "" |
|
transformer-train-position-embeddings: false |
|
transformer-depth-scaling: true |
|
transformer-no-bias: false |
|
transformer-no-affine: false |
|
bert-mask-symbol: "[MASK]" |
|
bert-sep-symbol: "[SEP]" |
|
bert-class-symbol: "[CLS]" |
|
bert-masking-fraction: 0.15 |
|
bert-train-type-embeddings: true |
|
bert-type-vocab-size: 2 |
|
comet-final-sigmoid: false |
|
comet-mix: false |
|
comet-mix-norm: false |
|
comet-dropout: 0.1 |
|
comet-mixup: 0 |
|
comet-mixup-reg: false |
|
comet-pooler-ffn: |
|
- 2048 |
|
- 1024 |
|
comet-prepend-zero: false |
|
dropout-rnn: 0 |
|
dropout-src: 0 |
|
dropout-trg: 0 |
|
transformer-dropout: 0.1 |
|
transformer-dropout-attention: 0 |
|
transformer-dropout-ffn: 0.1 |
|
cost-type: ce-sum |
|
multi-loss-type: sum |
|
unlikelihood-loss: false |
|
overwrite: false |
|
overwrite-checkpoint: true |
|
no-reload: false |
|
train-sets: |
|
- stdin |
|
vocabs: |
|
- vocab |
|
sentencepiece-alphas: |
|
[] |
|
sentencepiece-options: "" |
|
sentencepiece-max-lines: 2000000 |
|
no-spm-encode: false |
|
after-epochs: 0 |
|
after-batches: 0 |
|
after: 40e |
|
disp-freq: 100Mt |
|
disp-first: 10 |
|
disp-label-counts: true |
|
save-freq: 1Gt |
|
logical-epoch: |
|
- 1Gt |
|
max-length: 256 |
|
max-length-crop: false |
|
tsv: true |
|
tsv-fields: 1 |
|
shuffle: batches |
|
no-restore-corpus: true |
|
tempdir: /tmp |
|
sqlite: "" |
|
sqlite-drop: false |
|
devices: |
|
- 0 |
|
- 1 |
|
no-nccl: false |
|
sharding: local |
|
sync-freq: 200u |
|
cpu-threads: 0 |
|
mini-batch: 1000 |
|
mini-batch-words: 500000 |
|
mini-batch-fit: true |
|
mini-batch-fit-step: 5 |
|
gradient-checkpointing: false |
|
maxi-batch: 1000 |
|
maxi-batch-sort: trg |
|
shuffle-in-ram: true |
|
data-threads: 8 |
|
all-caps-every: 0 |
|
english-title-case-every: 0 |
|
mini-batch-words-ref: 0 |
|
mini-batch-warmup: 4000 |
|
mini-batch-track-lr: false |
|
mini-batch-round-up: true |
|
optimizer: adam |
|
optimizer-params: |
|
- 0.9 |
|
- 0.999 |
|
- 1e-08 |
|
- 0.01 |
|
optimizer-delay: 1 |
|
sync-sgd: true |
|
learn-rate: 0.0005 |
|
lr-report: true |
|
lr-decay: 0 |
|
lr-decay-strategy: epoch+stalled |
|
lr-decay-start: |
|
- 10 |
|
- 1 |
|
lr-decay-freq: 50000 |
|
lr-decay-reset-optimizer: false |
|
lr-decay-repeat-warmup: false |
|
lr-decay-inv-sqrt: |
|
- 4000 |
|
lr-warmup: 4000 |
|
lr-warmup-start-rate: 0 |
|
lr-warmup-cycle: false |
|
lr-warmup-at-reload: false |
|
label-smoothing: 0.1 |
|
factor-weight: 1 |
|
clip-norm: 0 |
|
exponential-smoothing: 1e-3 |
|
exponential-smoothing-replace-freq: 0 |
|
guided-alignment: none |
|
guided-alignment-cost: ce |
|
guided-alignment-weight: 0 |
|
data-weighting: "" |
|
data-weighting-type: sentence |
|
embedding-vectors: |
|
[] |
|
embedding-normalization: false |
|
embedding-fix-src: false |
|
embedding-fix-trg: false |
|
precision: |
|
- float32 |
|
- float32 |
|
cost-scaling: |
|
- 256.f |
|
- 10000 |
|
- 1.f |
|
- 256.f |
|
throw-on-divergence: |
|
[] |
|
custom-fallbacks: |
|
[] |
|
gradient-norm-average-window: 100 |
|
dynamic-gradient-scaling: |
|
- 2 |
|
- log |
|
check-gradient-nan: false |
|
normalize-gradient: false |
|
train-embedder-rank: |
|
[] |
|
quantize-bits: 0 |
|
quantize-optimization-steps: 0 |
|
quantize-log-based: false |
|
quantize-biases: false |
|
ulr: false |
|
ulr-query-vectors: "" |
|
ulr-keys-vectors: "" |
|
ulr-trainable-transformation: false |
|
ulr-dim-emb: 0 |
|
ulr-dropout: 0 |
|
ulr-softmax-temperature: 1 |
|
valid-sets: |
|
- dev.de |
|
valid-freq: 1Gt |
|
valid-metrics: |
|
- perplexity |
|
- ce-mean-words |
|
- bleu |
|
- chrf |
|
valid-reset-stalled: false |
|
valid-reset-all: false |
|
early-stopping: 40 |
|
early-stopping-epsilon: |
|
- 0 |
|
early-stopping-on: first |
|
beam-size: 4 |
|
normalize: 1.0 |
|
max-length-factor: 3 |
|
word-penalty: 0.0 |
|
allow-unk: false |
|
n-best: false |
|
word-scores: false |
|
valid-mini-batch: 32 |
|
valid-max-length: 1000 |
|
valid-script-path: "" |
|
valid-script-args: |
|
[] |
|
valid-translation-output: valid.trg.output |
|
keep-best: true |
|
valid-log: valid.log |