local env = import "../env.jsonnet"; #local dataset_path = env.str("DATA_PATH", "data/framenet/full"); local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl"; local ontology_path = "data/framenet/ontology.tsv"; local debug = false; # reader local pretrained_model = env.str("ENCODER", "xlm-roberta-large"); local smoothing_factor = env.json("SMOOTHING", "0.1"); # model local label_dim = env.json("LABEL_DIM", "64"); local dropout = env.json("DROPOUT", "0.2"); local bio_dim = env.json("BIO_DIM", "512"); local bio_layers = env.json("BIO_LAYER", "2"); local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]"); local typing_loss_factor = env.json("LOSS_FACTOR", "8.0"); # loader local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05"); local max_training_tokens = 512; local max_inference_tokens = 1024; # training local layer_fix = env.json("LAYER_FIX", "0"); local grad_acc = env.json("GRAD_ACC", "1"); #local cuda_devices = env.json("CUDA_DEVICES", "[-1]"); local cuda_devices = [0]; local patience = 32; { dataset_reader: { type: "semantic_role_labeling", debug: debug, pretrained_model: pretrained_model, ignore_label: false, [ if debug then "max_instances" ]: 128, event_smoothing_factor: smoothing_factor, arg_smoothing_factor: smoothing_factor, }, train_data_path: dataset_path + "/evalita_plus_fn_train.jsonl", validation_data_path: dataset_path + "/evalita_dev.jsonl", test_data_path: dataset_path + "/evalita_test.jsonl", datasets_for_vocab_creation: ["train"], data_loader: { batch_sampler: { type: "mix_sampler", max_tokens: max_training_tokens, sorting_keys: ['tokens'], sampling_ratios: { 'exemplar': 1.0, 'full text': 0.0, } } }, validation_data_loader: { batch_sampler: { type: "max_tokens_sampler", max_tokens: max_inference_tokens, sorting_keys: ['tokens'] } }, model: { type: "span", word_embedding: { token_embedders: { "pieces": { type: "pretrained_transformer", model_name: pretrained_model, } }, }, span_extractor: { type: 'combo', sub_extractors: [ { type: 'self_attentive', }, { type: 'bidirectional_endpoint', } ] }, span_finder: { type: "bio", bio_encoder: { type: "lstm", hidden_size: bio_dim, num_layers: bio_layers, bidirectional: true, dropout: dropout, }, no_label: false, }, span_typing: { type: 'mlp', hidden_dims: span_typing_dims, }, metrics: [{type: "srl"}], typing_loss_factor: typing_loss_factor, ontology_path: null, label_dim: label_dim, max_decoding_spans: 128, max_recursion_depth: 2, debug: debug, }, trainer: { num_epochs: 128, patience: patience, [if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0], validation_metric: "+em_f", grad_norm: 10, grad_clipping: 10, num_gradient_accumulation_steps: grad_acc, optimizer: { type: "transformer", base: { type: "adam", lr: 1e-3, }, embeddings_lr: 0.0, encoder_lr: 1e-5, pooler_lr: 1e-5, layer_fix: layer_fix, } }, cuda_devices:: cuda_devices, [if std.length(cuda_devices) > 1 then "distributed"]: { "cuda_devices": cuda_devices }, [if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true }