grammar_finetuning_es_gn / s2s_adjusted_es_gn_s2s.npz.yml
alexisbaladon's picture
Feature: Add models and vocabulary for inference
3b46508 verified
authors: false
cite: false
build-info: ""
workspace: 6500
log: /docker/home/logs/marian/log_s2s_adjusted_es_gn_s2s.log
log-level: info
log-time-zone: ""
quiet: false
quiet-translation: false
seed: 38006
check-nan: false
interpolate-env-vars: false
relative-paths: false
dump-config: ""
sigterm: save-and-exit
model: /docker/home/reproduce_best_models/model_s2s_adjusted_es_gn_s2s/s2s_adjusted_es_gn_s2s.npz
pretrained-model: ""
ignore-model-config: false
type: s2s
dim-vocabs:
- 6000
- 6000
dim-emb: 512
factors-dim-emb: 0
factors-combine: sum
lemma-dependency: ""
lemma-dim-emb: 0
dim-rnn: 1024
enc-type: bidirectional
enc-cell: gru
enc-cell-depth: 1
enc-depth: 3
dec-cell: gru
dec-cell-base-depth: 2
dec-cell-high-depth: 1
dec-depth: 3
skip: true
layer-normalization: true
right-left: false
input-types: []
tied-embeddings: false
tied-embeddings-src: false
tied-embeddings-all: true
output-omit-bias: false
transformer-heads: 8
transformer-no-projection: false
transformer-pool: false
transformer-dim-ffn: 2048
transformer-decoder-dim-ffn: 0
transformer-ffn-depth: 2
transformer-decoder-ffn-depth: 0
transformer-ffn-activation: swish
transformer-dim-aan: 2048
transformer-aan-depth: 2
transformer-aan-activation: swish
transformer-aan-nogate: false
transformer-decoder-autoreg: self-attention
transformer-tied-layers: []
transformer-guided-alignment-layer: last
transformer-preprocess: ""
transformer-postprocess-emb: d
transformer-postprocess: dan
transformer-postprocess-top: ""
transformer-train-position-embeddings: false
transformer-depth-scaling: false
bert-mask-symbol: "[MASK]"
bert-sep-symbol: "[SEP]"
bert-class-symbol: "[CLS]"
bert-masking-fraction: 0.15
bert-train-type-embeddings: true
bert-type-vocab-size: 2
dropout-rnn: 0.2
dropout-src: 0.1
dropout-trg: 0.1
transformer-dropout: 0
transformer-dropout-attention: 0
transformer-dropout-ffn: 0
cost-type: ce-sum
multi-loss-type: sum
unlikelihood-loss: false
overwrite: true
no-reload: false
train-sets:
- /docker/home/artifacts/data/train/train.es
- /docker/home/artifacts/data/train/train.gn
vocabs:
- /docker/home/reproduce_best_models/pretrain_test_vocab.esV6000_6000.spm
- /docker/home/reproduce_best_models/pretrain_test_vocab.gnV6000_6000.spm
sentencepiece-alphas: []
sentencepiece-options: ""
sentencepiece-max-lines: 2000000
after-epochs: 210
after-batches: 0
after: 0e
disp-freq: 1000u
disp-first: 0
disp-label-counts: true
save-freq: 10000u
logical-epoch:
- 1e
- 0
max-length: 153
max-length-crop: true
tsv: false
tsv-fields: 0
shuffle: data
no-restore-corpus: true
tempdir: /docker/home/libs
sqlite: ""
sqlite-drop: false
devices:
- 0
num-devices: 0
no-nccl: false
sharding: global
sync-freq: 200u
cpu-threads: 0
mini-batch: 64
mini-batch-words: 0
mini-batch-fit: true
mini-batch-fit-step: 10
gradient-checkpointing: false
maxi-batch: 1000
maxi-batch-sort: trg
shuffle-in-ram: false
data-threads: 8
all-caps-every: 0
english-title-case-every: 0
mini-batch-words-ref: 0
mini-batch-warmup: 0
mini-batch-track-lr: false
mini-batch-round-up: true
optimizer: adam
optimizer-params: []
optimizer-delay: 1
sync-sgd: false
learn-rate: 0.0001307597
lr-report: false
lr-decay: 0
lr-decay-strategy: epoch+stalled
lr-decay-start:
- 10
- 1
lr-decay-freq: 50000
lr-decay-reset-optimizer: false
lr-decay-repeat-warmup: false
lr-decay-inv-sqrt:
- 0
lr-warmup: 0
lr-warmup-start-rate: 0
lr-warmup-cycle: false
lr-warmup-at-reload: false
label-smoothing: 0.1
factor-weight: 1
clip-norm: 1
exponential-smoothing: 0.0001
guided-alignment: none
guided-alignment-cost: mse
guided-alignment-weight: 0.1
data-weighting: ""
data-weighting-type: sentence
embedding-vectors: []
embedding-normalization: false
embedding-fix-src: false
embedding-fix-trg: false
precision:
- float16
- float32
cost-scaling:
- 256.f
- 1000
- 2.f
- 256.f
gradient-norm-average-window: 100
dynamic-gradient-scaling: []
check-gradient-nan: false
normalize-gradient: false
train-embedder-rank: []
quantize-bits: 0
quantize-optimization-steps: 0
quantize-log-based: false
quantize-biases: false
ulr: false
ulr-query-vectors: ""
ulr-keys-vectors: ""
ulr-trainable-transformation: false
ulr-dim-emb: 0
ulr-dropout: 0
ulr-softmax-temperature: 1
valid-sets:
- /docker/home/artifacts/data/validation/valid.es
- /docker/home/artifacts/data/validation/valid.gn
valid-freq: 50000000
valid-metrics:
- cross-entropy
- translation
valid-reset-stalled: false
early-stopping: 10000
early-stopping-on: first
beam-size: 12
normalize: 0
max-length-factor: 3
word-penalty: 0
allow-unk: false
n-best: false
word-scores: false
valid-mini-batch: 32
valid-max-length: 1000
valid-script-path: ""
valid-script-args: []
valid-translation-output: /docker/home/reproduce_best_models/decoded_adjusted_es_gn_s2s.txt
keep-best: false
valid-log: ""