diff --git "a/finetuned/out.log" "b/finetuned/out.log" --- "a/finetuned/out.log" +++ "b/finetuned/out.log" @@ -1,434 +1,434 @@ -2024-06-29 21:12:38,577 - INFO - allennlp.common.params - random_seed = 13370 -2024-06-29 21:12:38,577 - INFO - allennlp.common.params - numpy_seed = 1337 -2024-06-29 21:12:38,577 - INFO - allennlp.common.params - pytorch_seed = 133 -2024-06-29 21:12:38,578 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121 -2024-06-29 21:12:38,578 - INFO - allennlp.common.params - type = default -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 21:12:38,579 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 21:12:40,263 - INFO - allennlp.common.params - train_data_path = data/train.conllu -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - datasets_for_vocab_creation = None -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - validation_dataset_reader = None -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - test_data_path = None -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - evaluate_on_test = False -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - batch_weight_key = -2024-06-29 21:12:40,264 - INFO - allennlp.common.params - data_loader.type = multiprocess -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.batch_size = 24 -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.drop_last = False -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.shuffle = True -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.batch_sampler = None -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.num_workers = 0 -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.start_method = fork -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.cuda_device = None -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.quiet = False -2024-06-29 21:12:40,265 - INFO - allennlp.common.params - data_loader.collate_fn = -2024-06-29 21:12:40,265 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 21:12:43,950 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess -2024-06-29 21:12:43,950 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 -2024-06-29 21:12:43,950 - INFO - allennlp.common.params - validation_data_loader.drop_last = False -2024-06-29 21:12:43,950 - INFO - allennlp.common.params - validation_data_loader.shuffle = False -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.start_method = fork -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.quiet = False -2024-06-29 21:12:43,951 - INFO - allennlp.common.params - validation_data_loader.collate_fn = -2024-06-29 21:12:43,951 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 21:12:44,636 - INFO - allennlp.common.params - vocabulary.type = from_files -2024-06-29 21:12:44,636 - INFO - allennlp.common.params - vocabulary.directory = xlm-roberta-base-en/common_vocab.tar.gz -2024-06-29 21:12:44,636 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ -2024-06-29 21:12:44,636 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ -2024-06-29 21:12:44,636 - INFO - allennlp.data.vocabulary - Loading token dictionary from xlm-roberta-base-en/common_vocab.tar.gz. -2024-06-29 21:12:44,638 - INFO - allennlp.common.params - model.type = from_archive -2024-06-29 21:12:44,638 - INFO - allennlp.common.params - model.archive_file = xlm-roberta-base-en/pretrained/model.tar.gz -2024-06-29 21:12:44,638 - INFO - allennlp.models.archival - loading archive file xlm-roberta-base-en/pretrained/model.tar.gz -2024-06-29 21:12:44,638 - INFO - allennlp.models.archival - extracting archive file xlm-roberta-base-en/pretrained/model.tar.gz to temp dir /tmp/tmph7ottyi_ -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 21:12:50,409 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 21:12:50,410 - INFO - allennlp.common.params - vocabulary.type = from_files -2024-06-29 21:12:50,411 - INFO - allennlp.data.vocabulary - Loading token dictionary from /tmp/tmph7ottyi_/vocabulary. -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.namespace = tags -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.max_length = None -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched -2024-06-29 21:12:50,412 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.max_length = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.sub_module = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.train_parameters = True -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.last_layer_only = True -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.override_weights_file = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.load_weights = True -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None -2024-06-29 21:12:50,413 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg -2024-06-29 21:12:50,936 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 -2024-06-29 21:12:50,936 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu -2024-06-29 21:12:50,936 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 -2024-06-29 21:12:50,936 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] -2024-06-29 21:12:50,936 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None -2024-06-29 21:12:50,938 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 -2024-06-29 21:12:50,938 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu -2024-06-29 21:12:50,938 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 -2024-06-29 21:12:50,939 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 -2024-06-29 21:12:50,939 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu -2024-06-29 21:12:50,939 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 -2024-06-29 21:12:50,977 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 -2024-06-29 21:12:50,977 - INFO - allennlp.common.params - model.misc_classifier.activation = relu -2024-06-29 21:12:50,977 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 -2024-06-29 21:12:50,978 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 -2024-06-29 21:12:50,978 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu -2024-06-29 21:12:50,978 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 -2024-06-29 21:12:50,981 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 -2024-06-29 21:12:50,981 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu -2024-06-29 21:12:50,981 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 -2024-06-29 21:12:50,986 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 -2024-06-29 21:12:50,986 - INFO - allennlp.common.params - model.null_classifier.activation = relu -2024-06-29 21:12:50,986 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 -2024-06-29 21:12:50,986 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 -2024-06-29 21:12:51,699 - INFO - allennlp.models.archival - removing temporary unarchived model dir at /tmp/tmph7ottyi_ -2024-06-29 21:12:56,850 - INFO - allennlp.common.params - trainer.type = gradient_descent -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.cuda_device = 0 -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.distributed = False -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.world_size = 1 -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.patience = None -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.validation_metric = +Avg -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.num_epochs = 10 -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.grad_norm = False -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.grad_clipping = 5 -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.use_amp = False -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.no_grad = None -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.momentum_scheduler = None -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.moving_average = None -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.checkpointer = -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True -2024-06-29 21:12:56,851 - INFO - allennlp.common.params - trainer.run_confidence_checks = True -2024-06-29 21:12:56,852 - INFO - allennlp.common.params - trainer.grad_scaling = True -2024-06-29 21:12:57,072 - INFO - allennlp.common.params - trainer.optimizer.type = adam -2024-06-29 21:12:57,073 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 -2024-06-29 21:12:57,073 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) -2024-06-29 21:12:57,073 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 -2024-06-29 21:12:57,073 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 -2024-06-29 21:12:57,073 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False -2024-06-29 21:12:57,074 - INFO - allennlp.training.optimizers - Done constructing parameter groups. -2024-06-29 21:12:57,074 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight'], {} -2024-06-29 21:12:57,074 - INFO - allennlp.training.optimizers - Group 1: ['pos_feats_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'lemma_rule_classifier.classifier.4.bias', 'null_classifier.classifier.1.bias', 'misc_classifier.classifier.4.weight', 'dependency_classifier.arc_head_mlp.1.bias', 'lemma_rule_classifier.classifier.1.bias', 'lemma_rule_classifier.classifier.1.weight', 'pos_feats_classifier.classifier.4.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'misc_classifier.classifier.1.weight', 'lemma_rule_classifier.classifier.4.weight', 'null_classifier.classifier.4.weight', 'semclass_classifier.classifier.4.weight', 'misc_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_eud._bias', 'dependency_classifier.arc_attention_eud._weight_matrix', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_attention_eud._bias', 'pos_feats_classifier.classifier.1.weight', 'null_classifier.classifier.4.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'semslot_classifier.classifier.1.bias', 'dependency_classifier.arc_head_mlp.1.weight', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'semslot_classifier.classifier.4.bias', 'semclass_classifier.classifier.1.weight', 'semclass_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'misc_classifier.classifier.1.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'semclass_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'null_classifier.classifier.1.weight', 'pos_feats_classifier.classifier.1.bias', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.rel_dep_mlp.1.bias'], {} -2024-06-29 21:12:57,074 - INFO - allennlp.training.optimizers - Group 2: [], {} -2024-06-29 21:12:57,074 - INFO - allennlp.training.optimizers - Number of trainable parameters: 290431118 -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight -2024-06-29 21:12:57,075 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias -2024-06-29 21:12:57,076 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias -2024-06-29 21:12:57,077 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight -2024-06-29 21:12:57,078 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias -2024-06-29 21:12:57,079 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight -2024-06-29 21:12:57,080 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - null_classifier.classifier.1.weight -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - null_classifier.classifier.1.bias -2024-06-29 21:12:57,081 - INFO - allennlp.common.util - null_classifier.classifier.4.weight -2024-06-29 21:12:57,082 - INFO - allennlp.common.util - null_classifier.classifier.4.bias -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 -2024-06-29 21:12:57,082 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - type = default -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - save_completed_epochs = True -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - save_every_num_seconds = None -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - save_every_num_batches = None -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - keep_most_recent_by_age = None -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard -2024-06-29 21:12:57,082 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 -2024-06-29 21:12:57,083 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None -2024-06-29 21:12:57,083 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None -2024-06-29 21:12:57,083 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False -2024-06-29 21:12:57,083 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True -2024-06-29 21:12:57,084 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled -2024-06-29 21:12:57,085 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. -2024-06-29 21:12:57,085 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 -2024-06-29 21:12:57,086 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.6G -2024-06-29 21:12:57,086 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G -2024-06-29 21:12:57,087 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 21:12:57,087 - INFO - tqdm - 0%| | 0/288 [00:00 +2024-07-01 01:04:45,799 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 01:04:49,381 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess +2024-07-01 01:04:49,381 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.drop_last = False +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.shuffle = False +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.start_method = fork +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.quiet = False +2024-07-01 01:04:49,382 - INFO - allennlp.common.params - validation_data_loader.collate_fn = +2024-07-01 01:04:49,382 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 01:04:50,043 - INFO - allennlp.common.params - vocabulary.type = from_files +2024-07-01 01:04:50,043 - INFO - allennlp.common.params - vocabulary.directory = serialization/common_vocab.tar.gz +2024-07-01 01:04:50,043 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ +2024-07-01 01:04:50,043 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ +2024-07-01 01:04:50,043 - INFO - allennlp.data.vocabulary - Loading token dictionary from serialization/common_vocab.tar.gz. +2024-07-01 01:04:50,045 - INFO - allennlp.common.params - model.type = from_archive +2024-07-01 01:04:50,045 - INFO - allennlp.common.params - model.archive_file = serialization/pretrained/model.tar.gz +2024-07-01 01:04:50,045 - INFO - allennlp.models.archival - loading archive file serialization/pretrained/model.tar.gz +2024-07-01 01:04:50,045 - INFO - allennlp.models.archival - extracting archive file serialization/pretrained/model.tar.gz to temp dir /tmp/tmpt5gndf3e +2024-07-01 01:04:55,715 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None +2024-07-01 01:04:55,716 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None +2024-07-01 01:04:55,717 - INFO - allennlp.common.params - vocabulary.type = from_files +2024-07-01 01:04:55,717 - INFO - allennlp.data.vocabulary - Loading token dictionary from /tmp/tmpt5gndf3e/vocabulary. +2024-07-01 01:04:55,718 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser +2024-07-01 01:04:55,718 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.indexer.namespace = tags +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.indexer.max_length = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.max_length = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.sub_module = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.train_parameters = True +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.last_layer_only = True +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.override_weights_file = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.load_weights = True +2024-07-01 01:04:55,719 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None +2024-07-01 01:04:55,720 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None +2024-07-01 01:04:55,720 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None +2024-07-01 01:04:55,720 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg +2024-07-01 01:04:56,244 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 +2024-07-01 01:04:56,244 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu +2024-07-01 01:04:56,244 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 +2024-07-01 01:04:56,245 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] +2024-07-01 01:04:56,245 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None +2024-07-01 01:04:56,246 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 +2024-07-01 01:04:56,246 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu +2024-07-01 01:04:56,247 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 +2024-07-01 01:04:56,248 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 +2024-07-01 01:04:56,248 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu +2024-07-01 01:04:56,248 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 +2024-07-01 01:04:56,285 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 +2024-07-01 01:04:56,285 - INFO - allennlp.common.params - model.misc_classifier.activation = relu +2024-07-01 01:04:56,285 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 +2024-07-01 01:04:56,286 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 +2024-07-01 01:04:56,286 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu +2024-07-01 01:04:56,286 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 +2024-07-01 01:04:56,289 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 +2024-07-01 01:04:56,289 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu +2024-07-01 01:04:56,289 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 +2024-07-01 01:04:56,294 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 +2024-07-01 01:04:56,294 - INFO - allennlp.common.params - model.null_classifier.activation = relu +2024-07-01 01:04:56,294 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 +2024-07-01 01:04:56,294 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 +2024-07-01 01:04:57,027 - INFO - allennlp.models.archival - removing temporary unarchived model dir at /tmp/tmpt5gndf3e +2024-07-01 01:05:02,102 - INFO - allennlp.common.params - trainer.type = gradient_descent +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.cuda_device = 0 +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.distributed = False +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.world_size = 1 +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.patience = None +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.validation_metric = +Avg +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.num_epochs = 10 +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.grad_norm = False +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.grad_clipping = 5 +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.use_amp = False +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.no_grad = None +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.momentum_scheduler = None +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.moving_average = None +2024-07-01 01:05:02,103 - INFO - allennlp.common.params - trainer.checkpointer = +2024-07-01 01:05:02,104 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True +2024-07-01 01:05:02,104 - INFO - allennlp.common.params - trainer.run_confidence_checks = True +2024-07-01 01:05:02,104 - INFO - allennlp.common.params - trainer.grad_scaling = True +2024-07-01 01:05:02,318 - INFO - allennlp.common.params - trainer.optimizer.type = adam +2024-07-01 01:05:02,319 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 +2024-07-01 01:05:02,319 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) +2024-07-01 01:05:02,319 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 +2024-07-01 01:05:02,319 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 +2024-07-01 01:05:02,319 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False +2024-07-01 01:05:02,320 - INFO - allennlp.training.optimizers - Done constructing parameter groups. +2024-07-01 01:05:02,320 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight'], {} +2024-07-01 01:05:02,320 - INFO - allennlp.training.optimizers - Group 1: ['pos_feats_classifier.classifier.1.weight', 'semslot_classifier.classifier.4.bias', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.bias', 'dependency_classifier.rel_head_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.weight', 'semclass_classifier.classifier.4.bias', 'dependency_classifier.rel_dep_mlp.1.bias', 'pos_feats_classifier.classifier.4.bias', 'null_classifier.classifier.1.bias', 'semslot_classifier.classifier.4.weight', 'dependency_classifier.rel_attention_eud._weight_matrix', 'semclass_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'dependency_classifier.rel_dep_mlp.1.weight', 'pos_feats_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'misc_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'dependency_classifier.rel_attention_ud._bias', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'semslot_classifier.classifier.1.weight', 'null_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.rel_attention_eud._bias', 'dependency_classifier.arc_head_mlp.1.bias', 'lemma_rule_classifier.classifier.4.weight', 'semclass_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.bias', 'dependency_classifier.arc_head_mlp.1.weight', 'null_classifier.classifier.4.weight', 'dependency_classifier.arc_attention_eud._bias', 'semslot_classifier.classifier.1.bias'], {} +2024-07-01 01:05:02,320 - INFO - allennlp.training.optimizers - Group 2: [], {} +2024-07-01 01:05:02,320 - INFO - allennlp.training.optimizers - Number of trainable parameters: 290403443 +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias +2024-07-01 01:05:02,321 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight +2024-07-01 01:05:02,322 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight +2024-07-01 01:05:02,323 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias +2024-07-01 01:05:02,324 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight +2024-07-01 01:05:02,325 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight +2024-07-01 01:05:02,326 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - null_classifier.classifier.1.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - null_classifier.classifier.1.bias +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - null_classifier.classifier.4.weight +2024-07-01 01:05:02,327 - INFO - allennlp.common.util - null_classifier.classifier.4.bias +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 +2024-07-01 01:05:02,328 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - type = default +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - save_completed_epochs = True +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - save_every_num_seconds = None +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - save_every_num_batches = None +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - keep_most_recent_by_age = None +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 +2024-07-01 01:05:02,328 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None +2024-07-01 01:05:02,329 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None +2024-07-01 01:05:02,329 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False +2024-07-01 01:05:02,329 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True +2024-07-01 01:05:02,330 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled +2024-07-01 01:05:02,332 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. +2024-07-01 01:05:02,332 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 +2024-07-01 01:05:02,332 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.6G +2024-07-01 01:05:02,332 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G +2024-07-01 01:05:02,333 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 01:05:02,333 - INFO - tqdm - 0%| | 0/288 [00:00") -2024-06-29 21:12:57,478 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 51) -tensor([[0, 8, 0, ..., 0, 0, 0], - [1, 0, 0, ..., 0, 0, 0], - [3, 0, 1, ..., 0, 0, 0], + [ 0, 0], + [ 0, 0], + [ 0, 0]]], device='cuda:0') +2024-07-01 01:05:02,728 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "") +2024-07-01 01:05:02,728 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 43) +tensor([[ 0, 0, 4, ..., 0, 0, 0], + [ 0, 1, 1, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], ..., - [0, 2, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:12:57,479 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 51) -tensor([[37, 66, 8, ..., 0, 0, 0], - [ 5, 1, 3, ..., 0, 0, 0], - [16, 1, 5, ..., 0, 0, 0], + [ 0, 0, 2, ..., 1, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + [ 0, 0, 25, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:02,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 43) +tensor([[ 3, 3, 14, ..., 0, 0, 0], + [ 1, 13, 5, ..., 0, 2, 2], + [ 8, 20, 21, ..., 0, 0, 0], ..., - [26, 11, 6, ..., 0, 0, 0], - [ 8, 2, 0, ..., 0, 0, 0], - [ 3, 21, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:12:57,480 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 51 x 51) + [ 6, 0, 11, ..., 5, 2, 0], + [ 6, 0, 1, ..., 0, 0, 0], + [ 2, 37, 66, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:02,730 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 43 x 43) tensor([[[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [28, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [ 9, -1, -1, ..., -1, -1, -1], - ..., + [[-1, -1, 1, ..., -1, -1, -1], + [-1, -1, 21, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + ..., [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., 0, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [ 8, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, 1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -538,49 +538,49 @@ tensor([[[-1, -1, -1, ..., -1, -1, -1], ..., - [[-1, 3, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 0, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [ 0, -1, -1, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 17, ..., -1, -1, -1], - [ 1, -1, -1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 21:12:57,487 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 51 x 51) +2024-07-01 01:05:02,737 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 43 x 43) tensor([[[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [33, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [30, -1, -1, ..., -1, -1, -1], - ..., + [[-1, -1, 1, ..., -1, -1, -1], + [-1, -1, 28, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + ..., [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., 0, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [27, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, 1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -588,78 +588,78 @@ tensor([[[-1, -1, -1, ..., -1, -1, -1], ..., - [[-1, 2, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 0, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [ 0, -1, -1, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 15, ..., -1, -1, -1], - [ 1, -1, -1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 21:12:57,493 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 51) +2024-07-01 01:05:02,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 43) tensor([[0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 1, 1, 0], [0, 0, 0, ..., 0, 0, 0], ..., + [0, 0, 0, ..., 1, 0, 0], [0, 0, 0, ..., 0, 0, 0], - [1, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:12:57,494 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 51) -tensor([[ 4, 0, 32, ..., 0, 0, 0], - [ 3, 0, 8, ..., 0, 0, 0], - [25, 0, 14, ..., 0, 0, 0], + [1, 1, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:02,744 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 43) +tensor([[ 4, 7, 0, ..., 0, 0, 0], + [ 0, 11, 47, ..., 44, 0, 0], + [32, 4, 0, ..., 0, 0, 0], ..., - [ 4, 2, 0, ..., 0, 0, 0], - [32, 0, 4, ..., 0, 0, 0], - [ 1, 0, 1, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:12:57,495 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 51) -tensor([[ 1, 5, 20, ..., 0, 0, 0], - [38, 2, 25, ..., 0, 0, 0], - [16, 2, 33, ..., 0, 0, 0], + [ 0, 4, 2, ..., 3, 0, 0], + [ 0, 18, 0, ..., 0, 0, 0], + [ 0, 12, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:02,745 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 43) +tensor([[ 1, 1, 5, ..., 0, 0, 0], + [ 2, 60, 32, ..., 8, 0, 0], + [19, 12, 0, ..., 0, 0, 0], ..., - [ 1, 9, 3, ..., 0, 0, 0], - [20, 0, 1, ..., 0, 0, 0], - [21, 18, 48, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:12:57,497 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") -2024-06-29 21:13:07,189 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.1497, Lemma: 0.8857, PosFeats: 0.7932, UD-UAS: 0.8508, UD-LAS: 0.8600, EUD-UAS: 0.7064, EUD-LAS: 0.7128, Misc: 0.9348, SS: 0.7200, SC: 0.6209, Avg: 0.7872, batch_loss: 4.1646, loss: 6.0915 ||: 55%|#####4 | 157/288 [00:10<00:08, 16.04it/s] -2024-06-29 21:13:15,214 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.3871, Lemma: 0.9084, PosFeats: 0.8505, UD-UAS: 0.8584, UD-LAS: 0.8676, EUD-UAS: 0.7186, EUD-LAS: 0.7252, Misc: 0.9481, SS: 0.7543, SC: 0.6615, Avg: 0.8103, batch_loss: 3.7720, loss: 5.1074 ||: 100%|#########9| 287/288 [00:18<00:00, 16.21it/s] -2024-06-29 21:13:15,262 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.3878, Lemma: 0.9084, PosFeats: 0.8507, UD-UAS: 0.8586, UD-LAS: 0.8678, EUD-UAS: 0.7187, EUD-LAS: 0.7254, Misc: 0.9482, SS: 0.7544, SC: 0.6617, Avg: 0.8104, batch_loss: 3.4774, loss: 5.1018 ||: 100%|##########| 288/288 [00:18<00:00, 15.85it/s] -2024-06-29 21:13:15,262 - INFO - allennlp.training.gradient_descent_trainer - Validating -2024-06-29 21:13:15,263 - INFO - tqdm - 0%| | 0/72 [00:00") +2024-07-01 01:05:12,365 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.1282, Lemma: 0.8771, PosFeats: 0.7854, UD-UAS: 0.8586, UD-LAS: 0.8672, EUD-UAS: 0.7254, EUD-LAS: 0.7315, Misc: 0.9358, SS: 0.7130, SC: 0.6199, Avg: 0.7904, batch_loss: 3.4599, loss: 6.2363 ||: 52%|#####2 | 151/288 [00:10<00:08, 15.93it/s] +2024-07-01 01:05:21,059 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.4231, Lemma: 0.9036, PosFeats: 0.8481, UD-UAS: 0.8648, UD-LAS: 0.8736, EUD-UAS: 0.7359, EUD-LAS: 0.7430, Misc: 0.9505, SS: 0.7519, SC: 0.6606, Avg: 0.8147, batch_loss: 3.7780, loss: 5.1652 ||: 100%|#########9| 287/288 [00:18<00:00, 15.95it/s] +2024-07-01 01:05:21,121 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.4236, Lemma: 0.9037, PosFeats: 0.8484, UD-UAS: 0.8648, UD-LAS: 0.8737, EUD-UAS: 0.7360, EUD-LAS: 0.7431, Misc: 0.9506, SS: 0.7521, SC: 0.6607, Avg: 0.8148, batch_loss: 3.5166, loss: 5.1595 ||: 100%|##########| 288/288 [00:18<00:00, 15.33it/s] +2024-07-01 01:05:21,121 - INFO - allennlp.training.gradient_descent_trainer - Validating +2024-07-01 01:05:21,122 - INFO - tqdm - 0%| | 0/73 [00:00") -2024-06-29 21:13:15,363 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 43) + [0, 0], + [0, 0], + [0, 0]]], device='cuda:0') +2024-07-01 01:05:21,243 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "") +2024-07-01 01:05:21,243 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 43) tensor([[ 0, 0, 0, ..., 0, 0, 0], [ 0, 0, 0, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0], + [ 0, 0, 10, ..., 0, 0, 0], ..., - [ 0, 0, 46, ..., 0, 0, 0], - [ 0, 19, 0, ..., 0, 0, 0], - [11, 0, 7, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:13:15,364 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 43) -tensor([[ 4, 5, 28, ..., 0, 0, 0], - [ 3, 3, 2, ..., 0, 0, 0], - [ 1, 6, 5, ..., 0, 0, 0], + [ 0, 6, 55, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + [ 0, 7, 1, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:21,245 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 43) +tensor([[10, 4, 0, ..., 0, 0, 0], + [ 1, 0, 2, ..., 0, 0, 0], + [ 2, 6, 5, ..., 0, 0, 0], ..., - [ 2, 64, 14, ..., 54, 2, 2], - [35, 78, 10, ..., 0, 0, 0], - [ 3, 3, 11, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:13:15,366 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 43 x 43) -tensor([[[-1, 5, -1, ..., -1, -1, -1], - [-1, -1, 3, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], + [ 3, 22, 17, ..., 0, 0, 0], + [ 6, 4, 0, ..., 0, 0, 0], + [26, 11, 5, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:21,246 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 43 x 43) +tensor([[[-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [28, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, 2, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -769,47 +769,47 @@ tensor([[[-1, 5, -1, ..., -1, -1, -1], ..., - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, -1, 26, ..., -1, -1, -1], + [-1, -1, 24, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, 3, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], + [[-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 3, ..., -1, -1, -1], - [28, -1, -1, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, 4, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 21:13:15,372 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 43 x 43) -tensor([[[-1, 5, -1, ..., -1, -1, -1], - [-1, -1, 2, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], +2024-07-01 01:05:21,252 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 43 x 43) +tensor([[[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [33, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, 3, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -819,426 +819,421 @@ tensor([[[-1, 5, -1, ..., -1, -1, -1], ..., - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, -1, 24, ..., -1, -1, -1], + [-1, -1, 23, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, 2, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], + [[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 2, ..., -1, -1, -1], - [33, -1, -1, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], + [-1, 4, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 21:13:15,378 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 43) +2024-07-01 01:05:21,259 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 43) tensor([[0, 0, 0, ..., 0, 0, 0], [0, 1, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], + [1, 0, 0, ..., 0, 0, 0], ..., - [1, 1, 0, ..., 1, 1, 0], + [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:13:15,379 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 43) -tensor([[10, 12, 2, ..., 0, 0, 0], - [ 4, 7, 0, ..., 0, 0, 0], - [ 0, 0, 60, ..., 0, 0, 0], +2024-07-01 01:05:21,260 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 43) +tensor([[ 0, 41, 18, ..., 0, 0, 0], + [ 0, 5, 0, ..., 0, 0, 0], + [ 0, 0, 4, ..., 0, 0, 0], ..., - [ 0, 4, 0, ..., 5, 0, 0], - [ 4, 2, 0, ..., 0, 0, 0], - [ 4, 7, 2, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:13:15,380 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 43) -tensor([[ 4, 39, 26, ..., 0, 0, 0], - [ 1, 1, 0, ..., 0, 0, 0], - [ 2, 3, 1, ..., 0, 0, 0], + [ 3, 0, 2, ..., 0, 0, 0], + [ 0, 6, 3, ..., 0, 0, 0], + [ 4, 2, 4, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:05:21,261 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 43) +tensor([[ 3, 105, 120, ..., 0, 0, 0], + [ 2, 8, 0, ..., 0, 0, 0], + [ 0, 3, 15, ..., 0, 0, 0], ..., - [ 0, 1, 5, ..., 4, 0, 0], - [ 1, 34, 3, ..., 0, 0, 0], - [ 1, 1, 9, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 21:13:15,382 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") -2024-06-29 21:13:21,722 - INFO - tqdm - NullAccuracy: 0.9971, NullF1: 0.7619, Lemma: 0.9479, PosFeats: 0.9392, UD-UAS: 0.9049, UD-LAS: 0.9140, EUD-UAS: 0.8241, EUD-LAS: 0.8313, Misc: 0.9739, SS: 0.8134, SC: 0.7371, Avg: 0.8762, batch_loss: 3.1977, loss: 3.3303 ||: 100%|##########| 72/72 [00:06<00:00, 12.28it/s] -2024-06-29 21:13:21,722 - INFO - tqdm - NullAccuracy: 0.9971, NullF1: 0.7619, Lemma: 0.9479, PosFeats: 0.9392, UD-UAS: 0.9049, UD-LAS: 0.9140, EUD-UAS: 0.8241, EUD-LAS: 0.8313, Misc: 0.9739, SS: 0.8134, SC: 0.7371, Avg: 0.8762, batch_loss: 3.1977, loss: 3.3303 ||: 100%|##########| 72/72 [00:06<00:00, 11.15it/s] -2024-06-29 21:13:21,723 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - Training | Validation -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.810 | 0.876 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.725 | 0.831 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.719 | 0.824 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.908 | 0.948 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.948 | 0.974 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.997 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.388 | 0.762 -2024-06-29 21:13:21,726 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.851 | 0.939 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - SC | 0.662 | 0.737 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - SS | 0.754 | 0.813 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.868 | 0.914 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.859 | 0.905 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1108.987 | N/A -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - loss | 5.102 | 3.330 -2024-06-29 21:13:21,727 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4688.102 | N/A -2024-06-29 21:13:23,301 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:26.215542 -2024-06-29 21:13:23,301 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:41 -2024-06-29 21:13:23,301 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 -2024-06-29 21:13:23,301 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.6G -2024-06-29 21:13:23,302 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 5.2G -2024-06-29 21:13:23,303 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 21:13:23,303 - INFO - tqdm - 0%| | 0/288 [00:00") +2024-07-01 01:05:27,962 - INFO - tqdm - NullAccuracy: 0.9971, NullF1: 0.7795, Lemma: 0.9476, PosFeats: 0.9350, UD-UAS: 0.9033, UD-LAS: 0.9122, EUD-UAS: 0.8358, EUD-LAS: 0.8451, Misc: 0.9750, SS: 0.8120, SC: 0.7300, Avg: 0.8774, batch_loss: 3.6512, loss: 3.3607 ||: 100%|##########| 73/73 [00:06<00:00, 10.67it/s] +2024-07-01 01:05:27,963 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - Training | Validation +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.815 | 0.877 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.743 | 0.845 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.736 | 0.836 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.904 | 0.948 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.951 | 0.975 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.997 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.424 | 0.779 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.848 | 0.935 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - SC | 0.661 | 0.730 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - SS | 0.752 | 0.812 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.874 | 0.912 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.865 | 0.903 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1108.881 | N/A +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - loss | 5.159 | 3.361 +2024-07-01 01:05:27,966 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4688.586 | N/A +2024-07-01 01:05:29,560 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:27.227861 +2024-07-01 01:05:29,560 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:50 +2024-07-01 01:05:29,560 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 +2024-07-01 01:05:29,560 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.6G +2024-07-01 01:05:29,560 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 5.2G +2024-07-01 01:05:29,561 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 01:05:29,561 - INFO - tqdm - 0%| | 0/288 [00:00