|
--- |
|
license: apache-2.0 |
|
tags: |
|
- MRC |
|
- TyDiQA |
|
- Natural Questions |
|
- SQuAD |
|
- xlm-roberta-large |
|
language: |
|
- multilingual |
|
--- |
|
*Task*: MRC |
|
|
|
# Model description |
|
|
|
An XLM-RoBERTa Large reading comprehension model trained from the combination of TyDi, NQ, and SQuAD v1 datasets, starting from a fine-tuned [Tydi xlm-roberta-large](https://huggingface.co/PrimeQA/tydiqa-primary-task-xlm-roberta-large) model. |
|
|
|
## Intended uses & limitations |
|
|
|
You can use the raw model for the reading comprehension task. Biases associated with the pre-existing language model, xlm-roberta-large, that we used may be present in our fine-tuned model. |
|
|
|
## Usage |
|
|
|
You can use this model directly with the [PrimeQA](https://github.com/primeqa/primeqa) pipeline for reading comprehension [squad.ipynb](https://github.com/primeqa/primeqa/blob/main/notebooks/mrc/squad.ipynb). |
|
|
|
### BibTeX entry and citation info |
|
|
|
```bibtex |
|
@article{kwiatkowski-etal-2019-natural, |
|
title = "Natural Questions: A Benchmark for Question Answering Research", |
|
author = "Kwiatkowski, Tom and |
|
Palomaki, Jennimaria and |
|
Redfield, Olivia and |
|
Collins, Michael and |
|
Parikh, Ankur and |
|
Alberti, Chris and |
|
Epstein, Danielle and |
|
Polosukhin, Illia and |
|
Devlin, Jacob and |
|
Lee, Kenton and |
|
Toutanova, Kristina and |
|
Jones, Llion and |
|
Kelcey, Matthew and |
|
Chang, Ming-Wei and |
|
Dai, Andrew M. and |
|
Uszkoreit, Jakob and |
|
Le, Quoc and |
|
Petrov, Slav", |
|
journal = "Transactions of the Association for Computational Linguistics", |
|
volume = "7", |
|
year = "2019", |
|
address = "Cambridge, MA", |
|
publisher = "MIT Press", |
|
url = "https://aclanthology.org/Q19-1026", |
|
doi = "10.1162/tacl_a_00276", |
|
pages = "452--466", |
|
} |
|
``` |
|
|
|
```bibtex |
|
@article{2016arXiv160605250R, |
|
author = {{Rajpurkar}, Pranav and {Zhang}, Jian and {Lopyrev}, |
|
Konstantin and {Liang}, Percy}, |
|
title = "{SQuAD: 100,000+ Questions for Machine Comprehension of Text}", |
|
journal = {arXiv e-prints}, |
|
year = 2016, |
|
eid = {arXiv:1606.05250}, |
|
pages = {arXiv:1606.05250}, |
|
archivePrefix = {arXiv}, |
|
eprint = {1606.05250}, |
|
} |
|
``` |
|
|
|
```bibtex |
|
@article{clark-etal-2020-tydi, |
|
title = "{T}y{D}i {QA}: A Benchmark for Information-Seeking Question Answering in Typologically Diverse Languages", |
|
author = "Clark, Jonathan H. and |
|
Choi, Eunsol and |
|
Collins, Michael and |
|
Garrette, Dan and |
|
Kwiatkowski, Tom and |
|
Nikolaev, Vitaly and |
|
Palomaki, Jennimaria", |
|
journal = "Transactions of the Association for Computational Linguistics", |
|
volume = "8", |
|
year = "2020", |
|
address = "Cambridge, MA", |
|
publisher = "MIT Press", |
|
url = "https://aclanthology.org/2020.tacl-1.30", |
|
doi = "10.1162/tacl_a_00317", |
|
pages = "454--470", |
|
} |
|
``` |
|
|