Vikrantyadav11234
/

NMT_multilingual_12

Inference Endpoints

Model card Files Files and versions Community

NMT_multilingual_12 / config_1.yaml

Vikrantyadav11234's picture

Vikrantyadav11234

Upload 3 files

ef3127b verified 7 months ago

history blame contribute delete

2.17 kB


	# General config
	save_model: model_1/multilingual_transformer
	# Training files
	data:
	corpus_1:
	path_src: final_train.src.train
	path_tgt: final_train.tgt.train
	transforms: [sentencepiece, filtertoolong, docify]
	valid:
	path_src: final_train.src.dev
	path_tgt: final_train.tgt.dev
	transforms: [sentencepiece, filtertoolong, docify]
	save_checkpoint_steps: 5000
	keep_checkpoint: 1

	# Data config
	save_data: data_1
	src_vocab: data_1/source.vocab
	tgt_vocab: data_1/target.vocab
	src_vocab_size: 256000
	tgt_vocab_size: 256000

	# Training parameters
	world_size: 2
	gpu_ranks: [0 , 1]

	seed: 3435
	train_steps: 30000
	valid_steps: 5000
	report_every: 1000

	# Model parameters
	model_type: text
	model_dtype: "fp32"
	encoder_type: transformer
	decoder_type: transformer

	enc_layers: 8
	dec_layers: 8
	heads: 16
	hidden_size: 768
	word_vec_size: 768
	transformer_ff: 3072
	dropout: [0.1]
	attention_dropout: [0.1]

	transforms: [sentencepiece, filtertoolong, docify]
	src_subword_type: sentencepiece
	tgt_subword_type: sentencepiece
	src_seq_length: 512
	tgt_seq_length: 512
	src_seq_length_trunc: 512
	tgt_seq_length_trunc: 512

	# Optimization parameters
	optim: "adam"
	bucket_size: 262144
	num_workers: 2
	learning_rate: 2.0
	warmup_steps: 4000
	decay_method: "noam"
	adam_beta1: 0.9
	adam_beta2: 0.998
	max_grad_norm: 0.0
	label_smoothing: 0.1
	param_init: 0
	param_init_glorot: true
	batch_size: 4096
	batch_type: "tokens"
	normalization: "tokens"
	accum_count: [2]

	src_subword_model: source.model
	tgt_subword_model: target.model
	src_subword_nbest: 1
	src_subword_alpha: 0.0
	tgt_subword_nbest: 1
	tgt_subword_alpha: 0.0

	# Additional features
	position_encoding_type: SinusoidalInterleaved
	pos_ffn_activation_fn: gelu
	parallel_residual: true
	position_encoding: false
	max_relative_positions: 32
	self_attn_type: scaled-dot
	sliding_window: 256
	global_attention: general

	# Validation parameters
	valid_batch_size: 2048
	# Output generator function
	generator_function: softmax
	# Docify parameters
	docify:
	doc_length: 200
	max_context: 1

	switchout_temperature: 0.2

	temperature : 5.0 # Adjust this value to scale temperature


	# Logging
	log_file: train_1.log