|
{ |
|
"dataset_reader": { |
|
"type": "drop", |
|
"instance_format": "drop", |
|
"passage_length_limit": 400, |
|
"question_length_limit": 50, |
|
"skip_when_all_empty": [ |
|
"passage_span", |
|
"question_span", |
|
"addition_subtraction", |
|
"counting" |
|
], |
|
"token_indexers": { |
|
"token_characters": { |
|
"type": "characters", |
|
"min_padding_length": 5 |
|
}, |
|
"tokens": { |
|
"type": "single_id", |
|
"lowercase_tokens": true |
|
} |
|
} |
|
}, |
|
"model": { |
|
"type": "naqanet", |
|
"answering_abilities": [ |
|
"passage_span_extraction", |
|
"question_span_extraction", |
|
"addition_subtraction", |
|
"counting" |
|
], |
|
"dropout_prob": 0.1, |
|
"matrix_attention_layer": { |
|
"type": "linear", |
|
"combination": "x,y,x*y", |
|
"tensor_1_dim": 128, |
|
"tensor_2_dim": 128 |
|
}, |
|
"modeling_layer": { |
|
"type": "qanet_encoder", |
|
"attention_dropout_prob": 0, |
|
"attention_projection_dim": 128, |
|
"conv_kernel_size": 5, |
|
"dropout_prob": 0.1, |
|
"feedforward_hidden_dim": 128, |
|
"hidden_dim": 128, |
|
"input_dim": 128, |
|
"layer_dropout_undecayed_prob": 0.1, |
|
"num_attention_heads": 8, |
|
"num_blocks": 6, |
|
"num_convs_per_block": 2 |
|
}, |
|
"num_highway_layers": 2, |
|
"phrase_layer": { |
|
"type": "qanet_encoder", |
|
"attention_dropout_prob": 0, |
|
"attention_projection_dim": 128, |
|
"conv_kernel_size": 7, |
|
"dropout_prob": 0.1, |
|
"feedforward_hidden_dim": 128, |
|
"hidden_dim": 128, |
|
"input_dim": 128, |
|
"layer_dropout_undecayed_prob": 0.1, |
|
"num_attention_heads": 8, |
|
"num_blocks": 1, |
|
"num_convs_per_block": 4 |
|
}, |
|
"regularizer": { |
|
"regexes": [ |
|
[ |
|
".*", |
|
{ |
|
"alpha": 1e-07, |
|
"type": "l2" |
|
} |
|
] |
|
] |
|
}, |
|
"text_field_embedder": { |
|
"token_embedders": { |
|
"token_characters": { |
|
"type": "character_encoding", |
|
"embedding": { |
|
"embedding_dim": 64 |
|
}, |
|
"encoder": { |
|
"type": "cnn", |
|
"embedding_dim": 64, |
|
"ngram_filter_sizes": [ |
|
5 |
|
], |
|
"num_filters": 200 |
|
} |
|
}, |
|
"tokens": { |
|
"type": "embedding", |
|
"embedding_dim": 300, |
|
"pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip", |
|
"trainable": false |
|
} |
|
} |
|
} |
|
}, |
|
"train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_train.json", |
|
"validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_dev.json", |
|
"trainer": { |
|
"callbacks": [ |
|
"tensorboard" |
|
], |
|
"grad_norm": 5, |
|
"moving_average": { |
|
"type": "exponential", |
|
"decay": 0.9999 |
|
}, |
|
"num_epochs": 50, |
|
"optimizer": { |
|
"type": "adam", |
|
"betas": [ |
|
0.8, |
|
0.999 |
|
], |
|
"eps": 1e-07, |
|
"lr": 0.0005 |
|
}, |
|
"patience": 10, |
|
"validation_metric": "+f1" |
|
}, |
|
"vocabulary": { |
|
"min_count": { |
|
"token_characters": 200 |
|
}, |
|
"only_include_pretrained_words": true, |
|
"pretrained_files": { |
|
"tokens": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip" |
|
} |
|
}, |
|
"data_loader": { |
|
"batch_sampler": { |
|
"type": "bucket", |
|
"batch_size": 16 |
|
} |
|
}, |
|
"validation_dataset_reader": { |
|
"type": "drop", |
|
"instance_format": "drop", |
|
"passage_length_limit": 1000, |
|
"question_length_limit": 100, |
|
"skip_when_all_empty": [], |
|
"token_indexers": { |
|
"token_characters": { |
|
"type": "characters", |
|
"min_padding_length": 5 |
|
}, |
|
"tokens": { |
|
"type": "single_id", |
|
"lowercase_tokens": true |
|
} |
|
} |
|
} |
|
} |