|
authors: false |
|
cite: false |
|
build-info: "" |
|
workspace: 6500 |
|
log: /docker/home/logs/marian/log_s2s_adjusted_es_gn_s2s.log |
|
log-level: info |
|
log-time-zone: "" |
|
quiet: false |
|
quiet-translation: false |
|
seed: 38006 |
|
check-nan: false |
|
interpolate-env-vars: false |
|
relative-paths: false |
|
dump-config: "" |
|
sigterm: save-and-exit |
|
model: /docker/home/reproduce_best_models/model_s2s_adjusted_es_gn_s2s/s2s_adjusted_es_gn_s2s.npz |
|
pretrained-model: "" |
|
ignore-model-config: false |
|
type: s2s |
|
dim-vocabs: |
|
- 6000 |
|
- 6000 |
|
dim-emb: 512 |
|
factors-dim-emb: 0 |
|
factors-combine: sum |
|
lemma-dependency: "" |
|
lemma-dim-emb: 0 |
|
dim-rnn: 1024 |
|
enc-type: bidirectional |
|
enc-cell: gru |
|
enc-cell-depth: 1 |
|
enc-depth: 3 |
|
dec-cell: gru |
|
dec-cell-base-depth: 2 |
|
dec-cell-high-depth: 1 |
|
dec-depth: 3 |
|
skip: true |
|
layer-normalization: true |
|
right-left: false |
|
input-types: [] |
|
tied-embeddings: false |
|
tied-embeddings-src: false |
|
tied-embeddings-all: true |
|
output-omit-bias: false |
|
transformer-heads: 8 |
|
transformer-no-projection: false |
|
transformer-pool: false |
|
transformer-dim-ffn: 2048 |
|
transformer-decoder-dim-ffn: 0 |
|
transformer-ffn-depth: 2 |
|
transformer-decoder-ffn-depth: 0 |
|
transformer-ffn-activation: swish |
|
transformer-dim-aan: 2048 |
|
transformer-aan-depth: 2 |
|
transformer-aan-activation: swish |
|
transformer-aan-nogate: false |
|
transformer-decoder-autoreg: self-attention |
|
transformer-tied-layers: [] |
|
transformer-guided-alignment-layer: last |
|
transformer-preprocess: "" |
|
transformer-postprocess-emb: d |
|
transformer-postprocess: dan |
|
transformer-postprocess-top: "" |
|
transformer-train-position-embeddings: false |
|
transformer-depth-scaling: false |
|
bert-mask-symbol: "[MASK]" |
|
bert-sep-symbol: "[SEP]" |
|
bert-class-symbol: "[CLS]" |
|
bert-masking-fraction: 0.15 |
|
bert-train-type-embeddings: true |
|
bert-type-vocab-size: 2 |
|
dropout-rnn: 0.2 |
|
dropout-src: 0.1 |
|
dropout-trg: 0.1 |
|
transformer-dropout: 0 |
|
transformer-dropout-attention: 0 |
|
transformer-dropout-ffn: 0 |
|
cost-type: ce-sum |
|
multi-loss-type: sum |
|
unlikelihood-loss: false |
|
overwrite: true |
|
no-reload: false |
|
train-sets: |
|
- /docker/home/artifacts/data/train/train.es |
|
- /docker/home/artifacts/data/train/train.gn |
|
vocabs: |
|
- /docker/home/reproduce_best_models/pretrain_test_vocab.esV6000_6000.spm |
|
- /docker/home/reproduce_best_models/pretrain_test_vocab.gnV6000_6000.spm |
|
sentencepiece-alphas: [] |
|
sentencepiece-options: "" |
|
sentencepiece-max-lines: 2000000 |
|
after-epochs: 210 |
|
after-batches: 0 |
|
after: 0e |
|
disp-freq: 1000u |
|
disp-first: 0 |
|
disp-label-counts: true |
|
save-freq: 10000u |
|
logical-epoch: |
|
- 1e |
|
- 0 |
|
max-length: 153 |
|
max-length-crop: true |
|
tsv: false |
|
tsv-fields: 0 |
|
shuffle: data |
|
no-restore-corpus: true |
|
tempdir: /docker/home/libs |
|
sqlite: "" |
|
sqlite-drop: false |
|
devices: |
|
- 0 |
|
num-devices: 0 |
|
no-nccl: false |
|
sharding: global |
|
sync-freq: 200u |
|
cpu-threads: 0 |
|
mini-batch: 64 |
|
mini-batch-words: 0 |
|
mini-batch-fit: true |
|
mini-batch-fit-step: 10 |
|
gradient-checkpointing: false |
|
maxi-batch: 1000 |
|
maxi-batch-sort: trg |
|
shuffle-in-ram: false |
|
data-threads: 8 |
|
all-caps-every: 0 |
|
english-title-case-every: 0 |
|
mini-batch-words-ref: 0 |
|
mini-batch-warmup: 0 |
|
mini-batch-track-lr: false |
|
mini-batch-round-up: true |
|
optimizer: adam |
|
optimizer-params: [] |
|
optimizer-delay: 1 |
|
sync-sgd: false |
|
learn-rate: 0.0001307597 |
|
lr-report: false |
|
lr-decay: 0 |
|
lr-decay-strategy: epoch+stalled |
|
lr-decay-start: |
|
- 10 |
|
- 1 |
|
lr-decay-freq: 50000 |
|
lr-decay-reset-optimizer: false |
|
lr-decay-repeat-warmup: false |
|
lr-decay-inv-sqrt: |
|
- 0 |
|
lr-warmup: 0 |
|
lr-warmup-start-rate: 0 |
|
lr-warmup-cycle: false |
|
lr-warmup-at-reload: false |
|
label-smoothing: 0.1 |
|
factor-weight: 1 |
|
clip-norm: 1 |
|
exponential-smoothing: 0.0001 |
|
guided-alignment: none |
|
guided-alignment-cost: mse |
|
guided-alignment-weight: 0.1 |
|
data-weighting: "" |
|
data-weighting-type: sentence |
|
embedding-vectors: [] |
|
embedding-normalization: false |
|
embedding-fix-src: false |
|
embedding-fix-trg: false |
|
precision: |
|
- float16 |
|
- float32 |
|
cost-scaling: |
|
- 256.f |
|
- 1000 |
|
- 2.f |
|
- 256.f |
|
gradient-norm-average-window: 100 |
|
dynamic-gradient-scaling: [] |
|
check-gradient-nan: false |
|
normalize-gradient: false |
|
train-embedder-rank: [] |
|
quantize-bits: 0 |
|
quantize-optimization-steps: 0 |
|
quantize-log-based: false |
|
quantize-biases: false |
|
ulr: false |
|
ulr-query-vectors: "" |
|
ulr-keys-vectors: "" |
|
ulr-trainable-transformation: false |
|
ulr-dim-emb: 0 |
|
ulr-dropout: 0 |
|
ulr-softmax-temperature: 1 |
|
valid-sets: |
|
- /docker/home/artifacts/data/validation/valid.es |
|
- /docker/home/artifacts/data/validation/valid.gn |
|
valid-freq: 50000000 |
|
valid-metrics: |
|
- cross-entropy |
|
- translation |
|
valid-reset-stalled: false |
|
early-stopping: 10000 |
|
early-stopping-on: first |
|
beam-size: 12 |
|
normalize: 0 |
|
max-length-factor: 3 |
|
word-penalty: 0 |
|
allow-unk: false |
|
n-best: false |
|
word-scores: false |
|
valid-mini-batch: 32 |
|
valid-max-length: 1000 |
|
valid-script-path: "" |
|
valid-script-args: [] |
|
valid-translation-output: /docker/home/reproduce_best_models/decoded_adjusted_es_gn_s2s.txt |
|
keep-best: false |
|
valid-log: "" |