Update README.md
Browse files
README.md
CHANGED
@@ -27,4 +27,27 @@ IceBERT was trained with fairseq using the RoBERTa-base architecture. The traini
|
|
27 |
| Open Icelandic e-books (Rafbókavefurinn) | 14 MB | 2.6M |
|
28 |
| Data from the medical library of Landspitali | 33 MB | 5.2M |
|
29 |
| Student theses from Icelandic universities (Skemman) | 2.2 GB | 367M |
|
30 |
-
| Total | 15.8 GB | 2,664M |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
| Open Icelandic e-books (Rafbókavefurinn) | 14 MB | 2.6M |
|
28 |
| Data from the medical library of Landspitali | 33 MB | 5.2M |
|
29 |
| Student theses from Icelandic universities (Skemman) | 2.2 GB | 367M |
|
30 |
+
| Total | 15.8 GB | 2,664M |
|
31 |
+
|
32 |
+
|
33 |
+
If you find this model useful, please cite
|
34 |
+
|
35 |
+
```
|
36 |
+
@inproceedings{snaebjarnarson-etal-2022-warm,
|
37 |
+
title = "A Warm Start and a Clean Crawled Corpus - A Recipe for Good Language Models",
|
38 |
+
author = "Sn{\ae}bjarnarson, V{\'e}steinn and
|
39 |
+
S{\'\i}monarson, Haukur Barri and
|
40 |
+
Ragnarsson, P{\'e}tur Orri and
|
41 |
+
Ing{\'o}lfsd{\'o}ttir, Svanhv{\'\i}t Lilja and
|
42 |
+
J{\'o}nsson, Haukur and
|
43 |
+
Thorsteinsson, Vilhjalmur and
|
44 |
+
Einarsson, Hafsteinn",
|
45 |
+
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
|
46 |
+
month = jun,
|
47 |
+
year = "2022",
|
48 |
+
address = "Marseille, France",
|
49 |
+
publisher = "European Language Resources Association",
|
50 |
+
url = "https://aclanthology.org/2022.lrec-1.464",
|
51 |
+
pages = "4356--4366",
|
52 |
+
}
|
53 |
+
```
|