piotr-rybak
commited on
Commit
•
aed2c83
1
Parent(s):
f8c0dd1
Update README.md
Browse files
README.md
CHANGED
@@ -18,7 +18,7 @@ HerBERT was trained on six different corpora available for Polish language:
|
|
18 |
| :------ | ------: | ------: |
|
19 |
| [CCNet Middle](https://github.com/facebookresearch/cc_net) | 3243M | 7.9M |
|
20 |
| [CCNet Head](https://github.com/facebookresearch/cc_net) | 2641M | 7.0M |
|
21 |
-
| [National Corpus of Polish](http://nkjp.pl/index.php?page=14&lang=
|
22 |
| [Open Subtitles](http://opus.nlpl.eu/OpenSubtitles-v2018.php) | 1056M | 1.1M
|
23 |
| [Wikipedia](https://dumps.wikimedia.org/) | 260M | 1.4M |
|
24 |
| [Wolne Lektury](https://wolnelektury.pl/) | 41M | 5.5k |
|
@@ -62,7 +62,7 @@ If you use this model, please cite the following paper:
|
|
62 |
title = "{H}er{BERT}: Efficiently Pretrained Transformer-based Language Model for {P}olish",
|
63 |
author = "Mroczkowski, Robert and
|
64 |
Rybak, Piotr and
|
65 |
-
Wr{
|
66 |
Gawlik, Ireneusz",
|
67 |
booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing",
|
68 |
month = apr,
|
|
|
18 |
| :------ | ------: | ------: |
|
19 |
| [CCNet Middle](https://github.com/facebookresearch/cc_net) | 3243M | 7.9M |
|
20 |
| [CCNet Head](https://github.com/facebookresearch/cc_net) | 2641M | 7.0M |
|
21 |
+
| [National Corpus of Polish](http://nkjp.pl/index.php?page=14&lang=1)| 1357M | 3.9M |
|
22 |
| [Open Subtitles](http://opus.nlpl.eu/OpenSubtitles-v2018.php) | 1056M | 1.1M
|
23 |
| [Wikipedia](https://dumps.wikimedia.org/) | 260M | 1.4M |
|
24 |
| [Wolne Lektury](https://wolnelektury.pl/) | 41M | 5.5k |
|
|
|
62 |
title = "{H}er{BERT}: Efficiently Pretrained Transformer-based Language Model for {P}olish",
|
63 |
author = "Mroczkowski, Robert and
|
64 |
Rybak, Piotr and
|
65 |
+
Wr{\\'o}blewska, Alina and
|
66 |
Gawlik, Ireneusz",
|
67 |
booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing",
|
68 |
month = apr,
|