diff --git "a/finetuned/out.log" "b/finetuned/out.log" --- "a/finetuned/out.log" +++ "b/finetuned/out.log" @@ -1,334 +1,334 @@ -2024-06-29 20:36:24,689 - INFO - allennlp.common.params - random_seed = 13370 -2024-06-29 20:36:24,689 - INFO - allennlp.common.params - numpy_seed = 1337 -2024-06-29 20:36:24,689 - INFO - allennlp.common.params - pytorch_seed = 133 -2024-06-29 20:36:24,690 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121 -2024-06-29 20:36:24,690 - INFO - allennlp.common.params - type = default -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = distilbert-base-uncased -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 20:36:24,691 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - train_data_path = data/train.conllu -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - datasets_for_vocab_creation = None -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - validation_dataset_reader = None -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - test_data_path = None -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - evaluate_on_test = False -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - batch_weight_key = -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - data_loader.type = multiprocess -2024-06-29 20:36:25,559 - INFO - allennlp.common.params - data_loader.batch_size = 32 -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.drop_last = False -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.shuffle = True -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.batch_sampler = None -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.num_workers = 0 -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.start_method = fork -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.cuda_device = None -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.quiet = False -2024-06-29 20:36:25,560 - INFO - allennlp.common.params - data_loader.collate_fn = -2024-06-29 20:36:25,560 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32 -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.drop_last = False -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.shuffle = False -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None -2024-06-29 20:36:29,066 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 -2024-06-29 20:36:29,067 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None -2024-06-29 20:36:29,067 - INFO - allennlp.common.params - validation_data_loader.start_method = fork -2024-06-29 20:36:29,067 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None -2024-06-29 20:36:29,067 - INFO - allennlp.common.params - validation_data_loader.quiet = False -2024-06-29 20:36:29,067 - INFO - allennlp.common.params - validation_data_loader.collate_fn = -2024-06-29 20:36:29,067 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 20:36:30,155 - INFO - allennlp.common.params - vocabulary.type = from_files -2024-06-29 20:36:30,155 - INFO - allennlp.common.params - vocabulary.directory = distilbert/common_vocab.tar.gz -2024-06-29 20:36:30,156 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ -2024-06-29 20:36:30,156 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ -2024-06-29 20:36:30,156 - INFO - allennlp.data.vocabulary - Loading token dictionary from distilbert/common_vocab.tar.gz. -2024-06-29 20:36:30,158 - INFO - allennlp.common.params - model.type = from_archive -2024-06-29 20:36:30,158 - INFO - allennlp.common.params - model.archive_file = distilbert/pretrained/model.tar.gz -2024-06-29 20:36:30,158 - INFO - allennlp.models.archival - loading archive file distilbert/pretrained/model.tar.gz -2024-06-29 20:36:30,158 - INFO - allennlp.models.archival - extracting archive file distilbert/pretrained/model.tar.gz to temp dir /tmp/tmpwn6jyih0 -2024-06-29 20:36:31,588 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 20:36:31,588 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 20:36:31,588 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = distilbert-base-uncased -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = distilbert-base-uncased -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 20:36:31,589 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 20:36:31,590 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 20:36:31,590 - INFO - allennlp.common.params - vocabulary.type = from_files -2024-06-29 20:36:31,590 - INFO - allennlp.data.vocabulary - Loading token dictionary from /tmp/tmpwn6jyih0/vocabulary. -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.indexer.model_name = distilbert-base-uncased -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.indexer.namespace = tags -2024-06-29 20:36:31,591 - INFO - allennlp.common.params - model.indexer.max_length = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.model_name = distilbert-base-uncased -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.max_length = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.sub_module = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.train_parameters = True -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.last_layer_only = True -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.override_weights_file = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.load_weights = True -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None -2024-06-29 20:36:31,592 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg -2024-06-29 20:36:31,864 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 -2024-06-29 20:36:31,864 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu -2024-06-29 20:36:31,864 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 -2024-06-29 20:36:31,864 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] -2024-06-29 20:36:31,865 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None -2024-06-29 20:36:31,866 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 -2024-06-29 20:36:31,866 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu -2024-06-29 20:36:31,866 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 -2024-06-29 20:36:31,868 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 -2024-06-29 20:36:31,868 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu -2024-06-29 20:36:31,868 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 -2024-06-29 20:36:31,906 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 -2024-06-29 20:36:31,907 - INFO - allennlp.common.params - model.misc_classifier.activation = relu -2024-06-29 20:36:31,907 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 -2024-06-29 20:36:31,907 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 -2024-06-29 20:36:31,907 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu -2024-06-29 20:36:31,907 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 -2024-06-29 20:36:31,912 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 -2024-06-29 20:36:31,912 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu -2024-06-29 20:36:31,912 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 -2024-06-29 20:36:31,918 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 -2024-06-29 20:36:31,918 - INFO - allennlp.common.params - model.null_classifier.activation = relu -2024-06-29 20:36:31,918 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 -2024-06-29 20:36:31,918 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 -2024-06-29 20:36:32,115 - INFO - allennlp.models.archival - removing temporary unarchived model dir at /tmp/tmpwn6jyih0 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.type = gradient_descent -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.cuda_device = 0 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.distributed = False -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.world_size = 1 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.patience = None -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.validation_metric = +Avg -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.num_epochs = 10 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.grad_norm = False -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.grad_clipping = 5 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 -2024-06-29 20:36:36,489 - INFO - allennlp.common.params - trainer.use_amp = False -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.no_grad = None -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.momentum_scheduler = None -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.moving_average = None -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.checkpointer = -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.run_confidence_checks = True -2024-06-29 20:36:36,490 - INFO - allennlp.common.params - trainer.grad_scaling = True -2024-06-29 20:36:36,597 - INFO - allennlp.common.params - trainer.optimizer.type = adam -2024-06-29 20:36:36,597 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 -2024-06-29 20:36:36,598 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) -2024-06-29 20:36:36,598 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 -2024-06-29 20:36:36,598 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 -2024-06-29 20:36:36,598 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False -2024-06-29 20:36:36,598 - INFO - allennlp.training.optimizers - Done constructing parameter groups. -2024-06-29 20:36:36,599 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.weight'], {} -2024-06-29 20:36:36,599 - INFO - allennlp.training.optimizers - Group 1: ['pos_feats_classifier.classifier.4.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'lemma_rule_classifier.classifier.1.bias', 'semslot_classifier.classifier.4.weight', 'dependency_classifier.rel_attention_eud._bias', 'null_classifier.classifier.1.weight', 'semslot_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'dependency_classifier.rel_head_mlp.1.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.rel_dep_mlp.1.bias', 'semclass_classifier.classifier.4.bias', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'null_classifier.classifier.4.weight', 'misc_classifier.classifier.4.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'pos_feats_classifier.classifier.1.weight', 'dependency_classifier.arc_head_mlp.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'null_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'null_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.bias', 'semclass_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.rel_attention_eud._weight_matrix', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_eud._bias', 'lemma_rule_classifier.classifier.1.weight', 'misc_classifier.classifier.1.weight', 'misc_classifier.classifier.4.weight', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'semclass_classifier.classifier.4.weight', 'dependency_classifier.arc_head_mlp.1.bias'], {} -2024-06-29 20:36:36,599 - INFO - allennlp.training.optimizers - Group 2: [], {} -2024-06-29 20:36:36,599 - INFO - allennlp.training.optimizers - Number of trainable parameters: 78750350 -2024-06-29 20:36:36,599 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): -2024-06-29 20:36:36,599 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): -2024-06-29 20:36:36,599 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.weight -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.bias -2024-06-29 20:36:36,600 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.weight -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.bias -2024-06-29 20:36:36,601 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias -2024-06-29 20:36:36,602 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - null_classifier.classifier.1.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - null_classifier.classifier.1.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - null_classifier.classifier.4.weight -2024-06-29 20:36:36,603 - INFO - allennlp.common.util - null_classifier.classifier.4.bias -2024-06-29 20:36:36,603 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular -2024-06-29 20:36:36,603 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 -2024-06-29 20:36:36,603 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 -2024-06-29 20:36:36,603 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.01 -2024-06-29 20:36:36,604 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - type = default -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - save_completed_epochs = True -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - save_every_num_seconds = None -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - save_every_num_batches = None -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - keep_most_recent_by_age = None -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False -2024-06-29 20:36:36,604 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True -2024-06-29 20:36:36,606 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled -2024-06-29 20:36:36,607 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. -2024-06-29 20:36:36,607 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 -2024-06-29 20:36:36,607 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 1.8G -2024-06-29 20:36:36,608 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 302M -2024-06-29 20:36:36,608 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 20:36:36,608 - INFO - tqdm - 0%| | 0/216 [00:00 +2024-07-01 00:22:12,992 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 00:22:16,347 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess +2024-07-01 00:22:16,347 - INFO - allennlp.common.params - validation_data_loader.batch_size = 32 +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.drop_last = False +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.shuffle = False +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.start_method = fork +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.quiet = False +2024-07-01 00:22:16,348 - INFO - allennlp.common.params - validation_data_loader.collate_fn = +2024-07-01 00:22:16,348 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 00:22:17,450 - INFO - allennlp.common.params - vocabulary.type = from_files +2024-07-01 00:22:17,450 - INFO - allennlp.common.params - vocabulary.directory = serialization/distilbert/common_vocab.tar.gz +2024-07-01 00:22:17,450 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ +2024-07-01 00:22:17,450 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ +2024-07-01 00:22:17,450 - INFO - allennlp.data.vocabulary - Loading token dictionary from serialization/distilbert/common_vocab.tar.gz. +2024-07-01 00:22:17,452 - INFO - allennlp.common.params - model.type = from_archive +2024-07-01 00:22:17,452 - INFO - allennlp.common.params - model.archive_file = serialization/distilbert/pretrained/model.tar.gz +2024-07-01 00:22:17,452 - INFO - allennlp.models.archival - loading archive file serialization/distilbert/pretrained/model.tar.gz +2024-07-01 00:22:17,452 - INFO - allennlp.models.archival - extracting archive file serialization/distilbert/pretrained/model.tar.gz to temp dir /tmp/tmp1_88nyor +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = distilbert-base-uncased +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None +2024-07-01 00:22:18,863 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = distilbert-base-uncased +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None +2024-07-01 00:22:18,864 - INFO - allennlp.common.params - vocabulary.type = from_files +2024-07-01 00:22:18,864 - INFO - allennlp.data.vocabulary - Loading token dictionary from /tmp/tmp1_88nyor/vocabulary. +2024-07-01 00:22:18,865 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.model_name = distilbert-base-uncased +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.namespace = tags +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.max_length = None +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.embedder.model_name = distilbert-base-uncased +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.embedder.max_length = None +2024-07-01 00:22:18,866 - INFO - allennlp.common.params - model.embedder.sub_module = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.train_parameters = True +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.last_layer_only = True +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.override_weights_file = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.load_weights = True +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None +2024-07-01 00:22:18,867 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg +2024-07-01 00:22:19,147 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 +2024-07-01 00:22:19,147 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu +2024-07-01 00:22:19,147 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 +2024-07-01 00:22:19,147 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] +2024-07-01 00:22:19,147 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None +2024-07-01 00:22:19,149 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 +2024-07-01 00:22:19,149 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu +2024-07-01 00:22:19,149 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 +2024-07-01 00:22:19,150 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 +2024-07-01 00:22:19,150 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu +2024-07-01 00:22:19,150 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 +2024-07-01 00:22:19,189 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 +2024-07-01 00:22:19,189 - INFO - allennlp.common.params - model.misc_classifier.activation = relu +2024-07-01 00:22:19,189 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 +2024-07-01 00:22:19,189 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 +2024-07-01 00:22:19,189 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu +2024-07-01 00:22:19,190 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 +2024-07-01 00:22:19,194 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 +2024-07-01 00:22:19,194 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu +2024-07-01 00:22:19,194 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 +2024-07-01 00:22:19,200 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 +2024-07-01 00:22:19,200 - INFO - allennlp.common.params - model.null_classifier.activation = relu +2024-07-01 00:22:19,200 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 +2024-07-01 00:22:19,200 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 +2024-07-01 00:22:19,410 - INFO - allennlp.models.archival - removing temporary unarchived model dir at /tmp/tmp1_88nyor +2024-07-01 00:22:23,694 - INFO - allennlp.common.params - trainer.type = gradient_descent +2024-07-01 00:22:23,694 - INFO - allennlp.common.params - trainer.cuda_device = 0 +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.distributed = False +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.world_size = 1 +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.patience = None +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.validation_metric = +Avg +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.num_epochs = 10 +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.grad_norm = False +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.grad_clipping = 5 +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.use_amp = False +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.no_grad = None +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.momentum_scheduler = None +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.moving_average = None +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.checkpointer = +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.run_confidence_checks = True +2024-07-01 00:22:23,695 - INFO - allennlp.common.params - trainer.grad_scaling = True +2024-07-01 00:22:23,804 - INFO - allennlp.common.params - trainer.optimizer.type = adam +2024-07-01 00:22:23,805 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 +2024-07-01 00:22:23,805 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) +2024-07-01 00:22:23,805 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 +2024-07-01 00:22:23,805 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 +2024-07-01 00:22:23,805 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False +2024-07-01 00:22:23,806 - INFO - allennlp.training.optimizers - Done constructing parameter groups. +2024-07-01 00:22:23,806 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.weight', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.bias', 'embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.weight', 'embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.weight'], {} +2024-07-01 00:22:23,806 - INFO - allennlp.training.optimizers - Group 1: ['pos_feats_classifier.classifier.4.bias', 'semclass_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_attention_eud._bias', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.1.weight', 'dependency_classifier.arc_head_mlp.1.weight', 'semslot_classifier.classifier.1.bias', 'dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.arc_attention_eud._bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'dependency_classifier.rel_dep_mlp.1.bias', 'misc_classifier.classifier.4.bias', 'null_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.weight', 'lemma_rule_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'semslot_classifier.classifier.1.weight', 'pos_feats_classifier.classifier.4.weight', 'null_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_ud._bias', 'semslot_classifier.classifier.4.bias', 'semclass_classifier.classifier.4.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'null_classifier.classifier.1.weight', 'lemma_rule_classifier.classifier.1.bias', 'dependency_classifier.rel_head_mlp.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'pos_feats_classifier.classifier.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'semclass_classifier.classifier.1.weight', 'misc_classifier.classifier.1.bias', 'semclass_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.rel_dep_mlp.1.weight', 'null_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.rel_head_mlp.1.bias'], {} +2024-07-01 00:22:23,806 - INFO - allennlp.training.optimizers - Group 2: [], {} +2024-07-01 00:22:23,806 - INFO - allennlp.training.optimizers - Number of trainable parameters: 78722675 +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.q_lin.bias +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.k_lin.bias +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.v_lin.bias +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.weight +2024-07-01 00:22:23,807 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.attention.out_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.sa_layer_norm.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin1.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.ffn.lin2.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.0.output_layer_norm.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.q_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.k_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.v_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.attention.out_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.sa_layer_norm.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin1.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.ffn.lin2.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.1.output_layer_norm.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.q_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.k_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.v_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.attention.out_lin.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.sa_layer_norm.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin1.bias +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.weight +2024-07-01 00:22:23,808 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.ffn.lin2.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.2.output_layer_norm.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.q_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.k_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.v_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.attention.out_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.sa_layer_norm.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin1.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.ffn.lin2.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.3.output_layer_norm.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.q_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.k_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.v_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.attention.out_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.sa_layer_norm.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin1.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.ffn.lin2.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.4.output_layer_norm.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.q_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.weight +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.k_lin.bias +2024-07-01 00:22:23,809 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.v_lin.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.attention.out_lin.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.sa_layer_norm.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.ffn.lin2.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.transformer.layer.5.output_layer_norm.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias +2024-07-01 00:22:23,810 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - null_classifier.classifier.1.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - null_classifier.classifier.1.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - null_classifier.classifier.4.weight +2024-07-01 00:22:23,811 - INFO - allennlp.common.util - null_classifier.classifier.4.bias +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.01 +2024-07-01 00:22:23,811 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - type = default +2024-07-01 00:22:23,811 - INFO - allennlp.common.params - save_completed_epochs = True +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - save_every_num_seconds = None +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - save_every_num_batches = None +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - keep_most_recent_by_age = None +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False +2024-07-01 00:22:23,812 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True +2024-07-01 00:22:23,813 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled +2024-07-01 00:22:23,814 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. +2024-07-01 00:22:23,814 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 +2024-07-01 00:22:23,814 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 1.8G +2024-07-01 00:22:23,815 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 302M +2024-07-01 00:22:23,815 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 00:22:23,815 - INFO - tqdm - 0%| | 0/216 [00:00") -2024-06-29 20:36:36,960 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 32 x 51) -tensor([[0, 8, 0, ..., 0, 0, 0], - [1, 0, 0, ..., 0, 0, 0], - [3, 0, 1, ..., 0, 0, 0], + [ 0, 0], + [ 0, 0], + [ 0, 0]]], device='cuda:0') +2024-07-01 00:22:24,155 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 32 of type "") +2024-07-01 00:22:24,155 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 32 x 43) +tensor([[ 0, 0, 4, ..., 0, 0, 0], + [ 0, 1, 1, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], ..., - [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:36,961 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 32 x 51) -tensor([[37, 66, 8, ..., 0, 0, 0], - [ 5, 1, 3, ..., 0, 0, 0], - [16, 1, 5, ..., 0, 0, 0], + [ 0, 0, 8, ..., 0, 0, 0], + [ 0, 0, 19, ..., 0, 0, 0], + [ 0, 0, 5, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:24,156 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 32 x 43) +tensor([[ 3, 3, 14, ..., 0, 0, 0], + [ 1, 13, 5, ..., 0, 2, 2], + [ 8, 20, 21, ..., 0, 0, 0], ..., - [ 6, 4, 3, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0], - [ 6, 20, 19, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:36,962 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 32 x 51 x 51) + [ 6, 0, 11, ..., 0, 0, 0], + [37, 66, 13, ..., 0, 0, 0], + [ 2, 26, 11, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:24,158 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 32 x 43 x 43) tensor([[[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [28, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [ 9, -1, -1, ..., -1, -1, -1], - ..., + [[-1, -1, 1, ..., -1, -1, -1], + [-1, -1, 21, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + ..., [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., 0, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [ 8, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, 1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -438,49 +438,49 @@ tensor([[[-1, -1, -1, ..., -1, -1, -1], ..., - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, 10, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 14, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, 19, -1, ..., -1, -1, -1], + [[-1, -1, 0, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 20:36:36,969 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 32 x 51 x 51) +2024-07-01 00:22:24,164 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 32 x 43 x 43) tensor([[[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [33, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [30, -1, -1, ..., -1, -1, -1], - ..., + [[-1, -1, 1, ..., -1, -1, -1], + [-1, -1, 28, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + ..., [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., 0, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 1, ..., -1, -1, -1], - [27, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, 1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -488,77 +488,77 @@ tensor([[[-1, -1, -1, ..., -1, -1, -1], ..., - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, 8, -1, ..., -1, -1, -1], - [-1, 4, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 11, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, 17, -1, ..., -1, -1, -1], + [[-1, -1, 0, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 20:36:36,975 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 32 x 51) +2024-07-01 00:22:24,170 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 32 x 43) tensor([[0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 1, 1, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:36,976 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 32 x 51) -tensor([[ 4, 0, 32, ..., 0, 0, 0], - [ 3, 0, 8, ..., 0, 0, 0], - [25, 0, 14, ..., 0, 0, 0], + [1, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:24,172 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 32 x 43) +tensor([[ 4, 7, 0, ..., 0, 0, 0], + [ 0, 11, 47, ..., 44, 0, 0], + [32, 4, 0, ..., 0, 0, 0], ..., - [ 0, 6, 9, ..., 0, 0, 0], - [ 9, 33, 0, ..., 0, 0, 0], - [ 0, 7, 51, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:36,977 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 32 x 51) -tensor([[ 1, 5, 20, ..., 0, 0, 0], - [ 38, 2, 25, ..., 0, 0, 0], - [ 16, 2, 33, ..., 0, 0, 0], + [ 0, 3, 2, ..., 0, 0, 0], + [ 4, 0, 2, ..., 0, 0, 0], + [ 0, 16, 2, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:24,173 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 32 x 43) +tensor([[ 1, 1, 5, ..., 0, 0, 0], + [ 2, 60, 32, ..., 8, 0, 0], + [ 19, 12, 0, ..., 0, 0, 0], ..., - [ 3, 76, 160, ..., 0, 0, 0], - [301, 1, 0, ..., 0, 0, 0], - [ 3, 21, 4, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:36,979 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 32 of type "") -2024-06-29 20:36:46,611 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.2716, Lemma: 0.8726, PosFeats: 0.7957, UD-UAS: 0.8687, UD-LAS: 0.8768, EUD-UAS: 0.7715, EUD-LAS: 0.7808, Misc: 0.9417, SS: 0.7428, SC: 0.6410, Avg: 0.8102, batch_loss: 3.9208, loss: 5.7174 ||: 66%|######6 | 143/216 [00:10<00:04, 15.06it/s] -2024-06-29 20:36:51,446 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.4378, Lemma: 0.8876, PosFeats: 0.8269, UD-UAS: 0.8732, UD-LAS: 0.8811, EUD-UAS: 0.7793, EUD-LAS: 0.7884, Misc: 0.9502, SS: 0.7626, SC: 0.6625, Avg: 0.8235, batch_loss: 3.4307, loss: 5.1188 ||: 100%|#########9| 215/216 [00:14<00:00, 14.88it/s] -2024-06-29 20:36:51,501 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.4384, Lemma: 0.8877, PosFeats: 0.8271, UD-UAS: 0.8733, UD-LAS: 0.8812, EUD-UAS: 0.7794, EUD-LAS: 0.7885, Misc: 0.9502, SS: 0.7628, SC: 0.6628, Avg: 0.8237, batch_loss: 3.9430, loss: 5.1133 ||: 100%|##########| 216/216 [00:14<00:00, 14.50it/s] -2024-06-29 20:36:51,502 - INFO - allennlp.training.gradient_descent_trainer - Validating -2024-06-29 20:36:51,502 - INFO - tqdm - 0%| | 0/54 [00:00") +2024-07-01 00:22:33,869 - INFO - tqdm - NullAccuracy: 0.9940, NullF1: 0.2732, Lemma: 0.8645, PosFeats: 0.7869, UD-UAS: 0.8695, UD-LAS: 0.8782, EUD-UAS: 0.7661, EUD-LAS: 0.7745, Misc: 0.9409, SS: 0.7440, SC: 0.6461, Avg: 0.8079, batch_loss: 4.1115, loss: 5.7498 ||: 67%|######7 | 145/216 [00:10<00:04, 14.95it/s] +2024-07-01 00:22:38,558 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.4139, Lemma: 0.8804, PosFeats: 0.8185, UD-UAS: 0.8737, UD-LAS: 0.8821, EUD-UAS: 0.7726, EUD-LAS: 0.7811, Misc: 0.9488, SS: 0.7635, SC: 0.6670, Avg: 0.8208, batch_loss: 3.3410, loss: 5.1689 ||: 100%|#########9| 215/216 [00:14<00:00, 15.28it/s] +2024-07-01 00:22:38,624 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.4145, Lemma: 0.8806, PosFeats: 0.8188, UD-UAS: 0.8736, UD-LAS: 0.8820, EUD-UAS: 0.7726, EUD-LAS: 0.7810, Misc: 0.9489, SS: 0.7638, SC: 0.6670, Avg: 0.8209, batch_loss: 4.0110, loss: 5.1635 ||: 100%|##########| 216/216 [00:14<00:00, 14.59it/s] +2024-07-01 00:22:38,624 - INFO - allennlp.training.gradient_descent_trainer - Validating +2024-07-01 00:22:38,625 - INFO - tqdm - 0%| | 0/55 [00:00") -2024-06-29 20:36:51,634 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 32 x 44) -tensor([[0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], + [0, 0], + [0, 0], + [0, 0]]], device='cuda:0') +2024-07-01 00:22:38,767 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 32 of type "") +2024-07-01 00:22:38,767 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 32 x 44) +tensor([[ 0, 0, 0, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + [ 0, 0, 10, ..., 0, 0, 0], ..., - [0, 4, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 20:36:51,635 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 32 x 44) -tensor([[ 4, 5, 28, ..., 0, 0, 0], - [ 3, 3, 2, ..., 0, 0, 0], - [ 1, 6, 5, ..., 0, 0, 0], + [ 0, 0, 4, ..., 0, 0, 0], + [ 0, 0, 3, ..., 0, 0, 0], + [ 0, 0, 79, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:38,768 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 32 x 44) +tensor([[10, 4, 0, ..., 0, 0, 0], + [ 1, 0, 2, ..., 0, 0, 0], + [ 2, 6, 5, ..., 0, 0, 0], ..., - [20, 14, 3, ..., 0, 0, 0], - [ 6, 20, 4, ..., 0, 0, 0], - [ 8, 2, 6, ..., 0, 5, 2]], device='cuda:0') -2024-06-29 20:36:51,636 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 32 x 44 x 44) -tensor([[[-1, 5, -1, ..., -1, -1, -1], - [-1, -1, 3, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], + [ 8, 23, 14, ..., 0, 0, 0], + [ 8, 1, 16, ..., 0, 0, 0], + [ 3, 0, 3, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:38,770 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 32 x 44 x 44) +tensor([[[-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [28, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, 2, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -676,39 +676,39 @@ tensor([[[-1, 5, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[ 4, -1, -1, ..., -1, -1, -1], + [-1, -1, 1, ..., -1, -1, -1], + [16, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 10, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 23, -1, ..., -1, -1, -1], ..., - [-1, -1, -1, ..., -1, 10, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 20:36:51,643 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 32 x 44 x 44) -tensor([[[-1, 5, -1, ..., -1, -1, -1], - [-1, -1, 2, ..., -1, -1, -1], - [-1, -1, 4, ..., -1, -1, -1], +2024-07-01 00:22:38,776 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 32 x 44 x 44) +tensor([[[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [33, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, 3, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -726,390 +726,390 @@ tensor([[[-1, 5, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[ 4, -1, -1, ..., -1, -1, -1], + [-1, -1, 1, ..., -1, -1, -1], + [34, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 8, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 22, -1, ..., -1, -1, -1], ..., - [-1, -1, -1, ..., -1, 8, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 20:36:51,649 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 32 x 44) +2024-07-01 00:22:38,782 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 32 x 44) tensor([[0, 0, 0, ..., 0, 0, 0], [0, 1, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], + [1, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], - [1, 0, 0, ..., 0, 1, 0]], device='cuda:0') -2024-06-29 20:36:51,650 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 32 x 44) -tensor([[10, 12, 2, ..., 0, 0, 0], - [ 4, 7, 0, ..., 0, 0, 0], - [ 0, 0, 60, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:38,784 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 32 x 44) +tensor([[ 0, 41, 18, ..., 0, 0, 0], + [ 0, 5, 0, ..., 0, 0, 0], + [ 0, 0, 4, ..., 0, 0, 0], ..., - [19, 2, 8, ..., 0, 0, 0], - [ 0, 7, 8, ..., 0, 0, 0], - [32, 0, 0, ..., 41, 37, 0]], device='cuda:0') -2024-06-29 20:36:51,652 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 32 x 44) -tensor([[ 4, 39, 26, ..., 0, 0, 0], - [ 1, 1, 0, ..., 0, 0, 0], - [ 2, 3, 1, ..., 0, 0, 0], + [ 32, 19, 2, ..., 0, 0, 0], + [101, 0, 28, ..., 0, 0, 0], + [ 3, 4, 7, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 00:22:38,785 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 32 x 44) +tensor([[ 3, 105, 120, ..., 0, 0, 0], + [ 2, 8, 0, ..., 0, 0, 0], + [ 0, 3, 15, ..., 0, 0, 0], ..., - [ 21, 11, 12, ..., 0, 0, 0], - [ 3, 21, 197, ..., 0, 0, 0], - [ 16, 0, 3, ..., 200, 93, 0]], device='cuda:0') -2024-06-29 20:36:51,653 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 32 of type "") -2024-06-29 20:36:57,575 - INFO - tqdm - NullAccuracy: 0.9970, NullF1: 0.7468, Lemma: 0.9316, PosFeats: 0.9110, UD-UAS: 0.9019, UD-LAS: 0.9074, EUD-UAS: 0.8480, EUD-LAS: 0.8550, Misc: 0.9738, SS: 0.8160, SC: 0.7229, Avg: 0.8742, batch_loss: 3.5116, loss: 3.5299 ||: 100%|##########| 54/54 [00:06<00:00, 10.07it/s] -2024-06-29 20:36:57,575 - INFO - tqdm - NullAccuracy: 0.9970, NullF1: 0.7468, Lemma: 0.9316, PosFeats: 0.9110, UD-UAS: 0.9019, UD-LAS: 0.9074, EUD-UAS: 0.8480, EUD-LAS: 0.8550, Misc: 0.9738, SS: 0.8160, SC: 0.7229, Avg: 0.8742, batch_loss: 3.5116, loss: 3.5299 ||: 100%|##########| 54/54 [00:06<00:00, 8.89it/s] -2024-06-29 20:36:57,576 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - Training | Validation -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.824 | 0.874 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.788 | 0.855 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.779 | 0.848 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.888 | 0.932 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.950 | 0.974 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.997 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.438 | 0.747 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.827 | 0.911 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - SC | 0.663 | 0.723 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - SS | 0.763 | 0.816 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.881 | 0.907 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.873 | 0.902 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 302.498 | N/A -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - loss | 5.113 | 3.530 -2024-06-29 20:36:57,579 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 1869.859 | N/A -2024-06-29 20:36:58,064 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.456992 -2024-06-29 20:36:58,065 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:08 -2024-06-29 20:36:58,065 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 -2024-06-29 20:36:58,065 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 2.2G -2024-06-29 20:36:58,065 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 5.7G -2024-06-29 20:36:58,065 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 20:36:58,066 - INFO - tqdm - 0%| | 0/216 [00:00") +2024-07-01 00:22:44,937 - INFO - tqdm - NullAccuracy: 0.9968, NullF1: 0.7310, Lemma: 0.9273, PosFeats: 0.9038, UD-UAS: 0.9017, UD-LAS: 0.9085, EUD-UAS: 0.8411, EUD-LAS: 0.8500, Misc: 0.9721, SS: 0.8126, SC: 0.7239, Avg: 0.8712, batch_loss: 3.8329, loss: 3.5760 ||: 100%|##########| 55/55 [00:06<00:00, 8.71it/s] +2024-07-01 00:22:44,937 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. +2024-07-01 00:22:44,940 - INFO - allennlp.training.callbacks.console_logger - Training | Validation +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.821 | 0.871 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.781 | 0.850 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.773 | 0.841 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.881 | 0.927 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.949 | 0.972 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.995 | 0.997 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.414 | 0.731 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.819 | 0.904 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - SC | 0.667 | 0.724 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - SS | 0.764 | 0.813 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.882 | 0.909 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.874 | 0.902 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 302.392 | N/A +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - loss | 5.164 | 3.576 +2024-07-01 00:22:44,941 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 1850.277 | N/A +2024-07-01 00:22:45,428 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:00:21.613357 +2024-07-01 00:22:45,428 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:03:10 +2024-07-01 00:22:45,428 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 +2024-07-01 00:22:45,428 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 2.1G +2024-07-01 00:22:45,428 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 5.7G +2024-07-01 00:22:45,429 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 00:22:45,429 - INFO - tqdm - 0%| | 0/216 [00:00