python run_squad.py
--model_name_or_path google/canine-c
--do_train
--do_eval
--per_gpu_train_batch_size 1
--per_gpu_eval_batch_size 1
--gradient_accumulation_steps 128
--learning_rate 3e-5
--num_train_epochs 3
--max_seq_length 1024
--doc_stride 128
--max_answer_length 240
--output_dir canine-c-squad
--model_type bert
{ "_name_or_path": "google/canine-c", "architectures": [ "CanineForQuestionAnswering" ], "attention_probs_dropout_prob": 0.1, "bos_token_id": 57344, "downsampling_rate": 4, "eos_token_id": 57345, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "layer_norm_eps": 1e-12, "local_transformer_stride": 128, "max_position_embeddings": 16384, "model_type": "canine", "num_attention_heads": 12, "num_hash_buckets": 16384, "num_hash_functions": 8, "num_hidden_layers": 12, "pad_token_id": 0, "torch_dtype": "float32", "transformers_version": "4.19.0.dev0", "type_vocab_size": 16, "upsampling_kernel_size": 4, "use_cache": true }
{'exact': 58.893093661305585, 'f1': 72.18823344945899}