Russian
word2vec
nlpl_204 / meta.json
lbourdois's picture
Upload meta.json with huggingface_hub
67026e1
raw
history blame
3.04 kB
{
"algorithm": {
"command": null,
"id": 4,
"name": "Gensim Continuous Bag-of-Words",
"tool": "Gensim",
"url": "https://github.com/RaRe-Technologies/gensim",
"version": "3.8"
},
"contents": [
{
"filename": "model.txt",
"format": "text"
},
{
"filename": "model.bin",
"format": "data"
},
{
"filename": "meta.json",
"format": "json"
}
],
"corpus": [
{
"NER": true,
"case preserved": false,
"description": "Russian National Corpus",
"id": 88,
"language": "rus",
"lemmatized": true,
"public": false,
"stop words removal": null,
"tagger": "UDPipe 1.2",
"tagset": "UPoS",
"tokens": 270000000,
"url": "http://ruscorpora.ru/"
},
{
"NER": true,
"case preserved": false,
"description": "Russian Wikipedia dump of December 2018",
"id": 91,
"language": "rus",
"lemmatized": true,
"public": true,
"stop words removal": null,
"tagger": "UDPipe 1.2",
"tagset": "UPoS",
"tokens": 518531000,
"tool": "https://github.com/RaRe-Technologies/gensim/blob/master/gensim/scripts/segment_wiki.py",
"url": "https://dumps.wikimedia.org/"
},
{
"NER": true,
"case preserved": false,
"description": "Russian News from Dialogue Evaluation 2020",
"id": 114,
"language": "rus",
"lemmatized": true,
"public": true,
"stop words removal": null,
"tagger": "UDPipe 1.2",
"tagset": "UPoS",
"tokens": 1321489104,
"url": "https://competitions.codalab.org/competitions/22168"
},
{
"NER": true,
"case preserved": false,
"description": "Araneum Russicum Maximum",
"id": 115,
"language": "rus",
"lemmatized": true,
"public": true,
"stop words removal": "functional PoS",
"tagger": "MyStem",
"tagset": "UPoS",
"tokens": 10000000000,
"url": "https://rusvectores.org/en/models/"
}
],
"creators": [
{
"email": "andreku@ifi.uio.no",
"name": "Andrey Kutuzov"
},
{
"email": "maria.kunilovskaya@wlv.ac.uk",
"name": "Maria Kunilovskaya"
}
],
"dimensions": 300,
"documentation": [
"https://github.com/kunilovskaya/hypohyper/"
],
"external_id": "ruscorporawikiaraneumnews_mwe_upos_cbow_300_2_2020",
"handle": "http://vectors.nlpl.eu/repository/20/204.zip",
"id": 204,
"iterations": 3,
"vocabulary size": 998459,
"window": 2
}