Spaces:

responsibility-framing
/

sociolome

Running

sociolome / config /fn-evalita /evalita_plus_fn.vanilla_xlmr.jsonnet

Gosse Minnema

Initial commit

05922fb 10 months ago

4.04 kB

	local env = import "../env.jsonnet";

	#local dataset_path = env.str("DATA_PATH", "data/framenet/full");
	local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl";
	local ontology_path = "data/framenet/ontology.tsv";

	local debug = false;

	# reader
	local pretrained_model = env.str("ENCODER", "xlm-roberta-large");
	local smoothing_factor = env.json("SMOOTHING", "0.1");

	# model
	local label_dim = env.json("LABEL_DIM", "64");
	local dropout = env.json("DROPOUT", "0.2");
	local bio_dim = env.json("BIO_DIM", "512");
	local bio_layers = env.json("BIO_LAYER", "2");
	local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]");
	local typing_loss_factor = env.json("LOSS_FACTOR", "8.0");

	# loader
	local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05");
	local max_training_tokens = 512;
	local max_inference_tokens = 1024;

	# training
	local layer_fix = env.json("LAYER_FIX", "0");
	local grad_acc = env.json("GRAD_ACC", "1");
	#local cuda_devices = env.json("CUDA_DEVICES", "[-1]");
	local cuda_devices = [0];
	local patience = 32;

	{
	dataset_reader: {
	type: "semantic_role_labeling",
	debug: debug,
	pretrained_model: pretrained_model,
	ignore_label: false,
	[ if debug then "max_instances" ]: 128,
	event_smoothing_factor: smoothing_factor,
	arg_smoothing_factor: smoothing_factor,
	},
	train_data_path: dataset_path + "/evalita_plus_fn_train.jsonl",
	validation_data_path: dataset_path + "/evalita_dev.jsonl",
	test_data_path: dataset_path + "/evalita_test.jsonl",

	datasets_for_vocab_creation: ["train"],

	data_loader: {
	batch_sampler: {
	type: "mix_sampler",
	max_tokens: max_training_tokens,
	sorting_keys: ['tokens'],
	sampling_ratios: {
	'exemplar': 1.0,
	'full text': 0.0,
	}
	}
	},

	validation_data_loader: {
	batch_sampler: {
	type: "max_tokens_sampler",
	max_tokens: max_inference_tokens,
	sorting_keys: ['tokens']
	}
	},

	model: {
	type: "span",
	word_embedding: {
	token_embedders: {
	"pieces": {
	type: "pretrained_transformer",
	model_name: pretrained_model,
	}
	},
	},
	span_extractor: {
	type: 'combo',
	sub_extractors: [
	{
	type: 'self_attentive',
	},
	{
	type: 'bidirectional_endpoint',
	}
	]
	},
	span_finder: {
	type: "bio",
	bio_encoder: {
	type: "lstm",
	hidden_size: bio_dim,
	num_layers: bio_layers,
	bidirectional: true,
	dropout: dropout,
	},
	no_label: false,
	},
	span_typing: {
	type: 'mlp',
	hidden_dims: span_typing_dims,
	},
	metrics: [{type: "srl"}],

	typing_loss_factor: typing_loss_factor,
	ontology_path: null,
	label_dim: label_dim,
	max_decoding_spans: 128,
	max_recursion_depth: 2,
	debug: debug,
	},

	trainer: {
	num_epochs: 128,
	patience: patience,
	[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0],
	validation_metric: "+em_f",
	grad_norm: 10,
	grad_clipping: 10,
	num_gradient_accumulation_steps: grad_acc,
	optimizer: {
	type: "transformer",
	base: {
	type: "adam",
	lr: 1e-3,
	},
	embeddings_lr: 0.0,
	encoder_lr: 1e-5,
	pooler_lr: 1e-5,
	layer_fix: layer_fix,
	}
	},

	cuda_devices:: cuda_devices,
	[if std.length(cuda_devices) > 1 then "distributed"]: {
	"cuda_devices": cuda_devices
	},
	[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true
	}