grammar_finetuning_es_gn / s2s_adjusted_es_gn_s2s.npz.yml

Feature: Add models and vocabulary for inference

3b46508 verified 3 months ago

4.79 kB

	authors: false
	cite: false
	build-info: ""
	workspace: 6500
	log: /docker/home/logs/marian/log_s2s_adjusted_es_gn_s2s.log
	log-level: info
	log-time-zone: ""
	quiet: false
	quiet-translation: false
	seed: 38006
	check-nan: false
	interpolate-env-vars: false
	relative-paths: false
	dump-config: ""
	sigterm: save-and-exit
	model: /docker/home/reproduce_best_models/model_s2s_adjusted_es_gn_s2s/s2s_adjusted_es_gn_s2s.npz
	pretrained-model: ""
	ignore-model-config: false
	type: s2s
	dim-vocabs:
	- 6000
	- 6000
	dim-emb: 512
	factors-dim-emb: 0
	factors-combine: sum
	lemma-dependency: ""
	lemma-dim-emb: 0
	dim-rnn: 1024
	enc-type: bidirectional
	enc-cell: gru
	enc-cell-depth: 1
	enc-depth: 3
	dec-cell: gru
	dec-cell-base-depth: 2
	dec-cell-high-depth: 1
	dec-depth: 3
	skip: true
	layer-normalization: true
	right-left: false
	input-types: []
	tied-embeddings: false
	tied-embeddings-src: false
	tied-embeddings-all: true
	output-omit-bias: false
	transformer-heads: 8
	transformer-no-projection: false
	transformer-pool: false
	transformer-dim-ffn: 2048
	transformer-decoder-dim-ffn: 0
	transformer-ffn-depth: 2
	transformer-decoder-ffn-depth: 0
	transformer-ffn-activation: swish
	transformer-dim-aan: 2048
	transformer-aan-depth: 2
	transformer-aan-activation: swish
	transformer-aan-nogate: false
	transformer-decoder-autoreg: self-attention
	transformer-tied-layers: []
	transformer-guided-alignment-layer: last
	transformer-preprocess: ""
	transformer-postprocess-emb: d
	transformer-postprocess: dan
	transformer-postprocess-top: ""
	transformer-train-position-embeddings: false
	transformer-depth-scaling: false
	bert-mask-symbol: "[MASK]"
	bert-sep-symbol: "[SEP]"
	bert-class-symbol: "[CLS]"
	bert-masking-fraction: 0.15
	bert-train-type-embeddings: true
	bert-type-vocab-size: 2
	dropout-rnn: 0.2
	dropout-src: 0.1
	dropout-trg: 0.1
	transformer-dropout: 0
	transformer-dropout-attention: 0
	transformer-dropout-ffn: 0
	cost-type: ce-sum
	multi-loss-type: sum
	unlikelihood-loss: false
	overwrite: true
	no-reload: false
	train-sets:
	- /docker/home/artifacts/data/train/train.es
	- /docker/home/artifacts/data/train/train.gn
	vocabs:
	- /docker/home/reproduce_best_models/pretrain_test_vocab.esV6000_6000.spm
	- /docker/home/reproduce_best_models/pretrain_test_vocab.gnV6000_6000.spm
	sentencepiece-alphas: []
	sentencepiece-options: ""
	sentencepiece-max-lines: 2000000
	after-epochs: 210
	after-batches: 0
	after: 0e
	disp-freq: 1000u
	disp-first: 0
	disp-label-counts: true
	save-freq: 10000u
	logical-epoch:
	- 1e
	- 0
	max-length: 153
	max-length-crop: true
	tsv: false
	tsv-fields: 0
	shuffle: data
	no-restore-corpus: true
	tempdir: /docker/home/libs
	sqlite: ""
	sqlite-drop: false
	devices:
	- 0
	num-devices: 0
	no-nccl: false
	sharding: global
	sync-freq: 200u
	cpu-threads: 0
	mini-batch: 64
	mini-batch-words: 0
	mini-batch-fit: true
	mini-batch-fit-step: 10
	gradient-checkpointing: false
	maxi-batch: 1000
	maxi-batch-sort: trg
	shuffle-in-ram: false
	data-threads: 8
	all-caps-every: 0
	english-title-case-every: 0
	mini-batch-words-ref: 0
	mini-batch-warmup: 0
	mini-batch-track-lr: false
	mini-batch-round-up: true
	optimizer: adam
	optimizer-params: []
	optimizer-delay: 1
	sync-sgd: false
	learn-rate: 0.0001307597
	lr-report: false
	lr-decay: 0
	lr-decay-strategy: epoch+stalled
	lr-decay-start:
	- 10
	- 1
	lr-decay-freq: 50000
	lr-decay-reset-optimizer: false
	lr-decay-repeat-warmup: false
	lr-decay-inv-sqrt:
	- 0
	lr-warmup: 0
	lr-warmup-start-rate: 0
	lr-warmup-cycle: false
	lr-warmup-at-reload: false
	label-smoothing: 0.1
	factor-weight: 1
	clip-norm: 1
	exponential-smoothing: 0.0001
	guided-alignment: none
	guided-alignment-cost: mse
	guided-alignment-weight: 0.1
	data-weighting: ""
	data-weighting-type: sentence
	embedding-vectors: []
	embedding-normalization: false
	embedding-fix-src: false
	embedding-fix-trg: false
	precision:
	- float16
	- float32
	cost-scaling:
	- 256.f
	- 1000
	- 2.f
	- 256.f
	gradient-norm-average-window: 100
	dynamic-gradient-scaling: []
	check-gradient-nan: false
	normalize-gradient: false
	train-embedder-rank: []
	quantize-bits: 0
	quantize-optimization-steps: 0
	quantize-log-based: false
	quantize-biases: false
	ulr: false
	ulr-query-vectors: ""
	ulr-keys-vectors: ""
	ulr-trainable-transformation: false
	ulr-dim-emb: 0
	ulr-dropout: 0
	ulr-softmax-temperature: 1
	valid-sets:
	- /docker/home/artifacts/data/validation/valid.es
	- /docker/home/artifacts/data/validation/valid.gn
	valid-freq: 50000000
	valid-metrics:
	- cross-entropy
	- translation
	valid-reset-stalled: false
	early-stopping: 10000
	early-stopping-on: first
	beam-size: 12
	normalize: 0
	max-length-factor: 3
	word-penalty: 0
	allow-unk: false
	n-best: false
	word-scores: false
	valid-mini-batch: 32
	valid-max-length: 1000
	valid-script-path: ""
	valid-script-args: []
	valid-translation-output: /docker/home/reproduce_best_models/decoded_adjusted_es_gn_s2s.txt
	keep-best: false
	valid-log: ""