Spaces:
Sleeping
Sleeping
local env = import "../env.jsonnet"; | |
local dataset_path = "/home/p289731/cloned/lome/preproc/evalita_jsonl"; | |
local ontology_path = "data/framenet/ontology.tsv"; | |
local debug = false; | |
# reader | |
local pretrained_model = "/data/p289731/cloned/lome-models/models/xlm-roberta-framenet/"; | |
local smoothing_factor = env.json("SMOOTHING", "0.1"); | |
# model | |
local label_dim = env.json("LABEL_DIM", "64"); | |
local dropout = env.json("DROPOUT", "0.2"); | |
local bio_dim = env.json("BIO_DIM", "512"); | |
local bio_layers = env.json("BIO_LAYER", "2"); | |
local span_typing_dims = env.json("TYPING_DIMS", "[256, 256]"); | |
local typing_loss_factor = env.json("LOSS_FACTOR", "8.0"); | |
# loader | |
local exemplar_ratio = env.json("EXEMPLAR_RATIO", "0.05"); | |
local max_training_tokens = 512; | |
local max_inference_tokens = 1024; | |
# training | |
local layer_fix = env.json("LAYER_FIX", "0"); | |
local grad_acc = env.json("GRAD_ACC", "1"); | |
local cuda_devices = [0]; | |
local patience = 32; | |
{ | |
dataset_reader: { | |
type: "semantic_role_labeling", | |
debug: debug, | |
pretrained_model: pretrained_model, | |
ignore_label: false, | |
[ if debug then "max_instances" ]: 128, | |
event_smoothing_factor: smoothing_factor, | |
arg_smoothing_factor: smoothing_factor, | |
}, | |
train_data_path: dataset_path + "/evalita_train.jsonl", | |
validation_data_path: dataset_path + "/evalita_dev.jsonl", | |
test_data_path: dataset_path + "/evalita_test.jsonl", | |
datasets_for_vocab_creation: ["train"], | |
data_loader: { | |
batch_sampler: { | |
type: "mix_sampler", | |
max_tokens: max_training_tokens, | |
sorting_keys: ['tokens'], | |
sampling_ratios: { | |
'exemplar': 1.0, | |
'full text': 0.0, | |
} | |
} | |
}, | |
validation_data_loader: { | |
batch_sampler: { | |
type: "max_tokens_sampler", | |
max_tokens: max_inference_tokens, | |
sorting_keys: ['tokens'] | |
} | |
}, | |
model: { | |
type: "span", | |
word_embedding: { | |
token_embedders: { | |
"pieces": { | |
type: "pretrained_transformer", | |
model_name: pretrained_model, | |
} | |
}, | |
}, | |
span_extractor: { | |
type: 'combo', | |
sub_extractors: [ | |
{ | |
type: 'self_attentive', | |
}, | |
{ | |
type: 'bidirectional_endpoint', | |
} | |
] | |
}, | |
span_finder: { | |
type: "bio", | |
bio_encoder: { | |
type: "lstm", | |
hidden_size: bio_dim, | |
num_layers: bio_layers, | |
bidirectional: true, | |
dropout: dropout, | |
}, | |
no_label: false, | |
}, | |
span_typing: { | |
type: 'mlp', | |
hidden_dims: span_typing_dims, | |
}, | |
metrics: [{type: "srl"}], | |
typing_loss_factor: typing_loss_factor, | |
ontology_path: null, | |
label_dim: label_dim, | |
max_decoding_spans: 128, | |
max_recursion_depth: 2, | |
debug: debug, | |
}, | |
trainer: { | |
num_epochs: 128, | |
patience: patience, | |
[if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0], | |
validation_metric: "+em_f", | |
grad_norm: 10, | |
grad_clipping: 10, | |
num_gradient_accumulation_steps: grad_acc, | |
optimizer: { | |
type: "transformer", | |
base: { | |
type: "adam", | |
lr: 1e-3, | |
}, | |
embeddings_lr: 0.0, | |
encoder_lr: 1e-5, | |
pooler_lr: 1e-5, | |
layer_fix: layer_fix, | |
} | |
}, | |
cuda_devices:: cuda_devices, | |
[if std.length(cuda_devices) > 1 then "distributed"]: { | |
"cuda_devices": cuda_devices | |
}, | |
[if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true | |
} | |