File size: 3,325 Bytes
2f044c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
SPACY_LANGUAGE_MAPPER = {
"ca": "ca_core_news_sm",
"da": "da_core_news_sm",
"de": "de_core_news_sm",
"el": "el_core_news_sm",
"en": "en_core_web_sm",
"es": "es_core_news_sm",
"fr": "fr_core_news_sm",
"it": "it_core_news_sm",
"ja": "ja_core_news_sm",
"lt": "lt_core_news_sm",
"mk": "mk_core_news_sm",
"nb": "nb_core_news_sm",
"nl": "nl_core_news_sm",
"pl": "pl_core_news_sm",
"pt": "pt_core_news_sm",
"ro": "ro_core_news_sm",
"ru": "ru_core_news_sm",
"xx": "xx_sent_ud_sm",
"zh": "zh_core_web_sm",
"ca_core_news_sm": "ca_core_news_sm",
"ca_core_news_md": "ca_core_news_md",
"ca_core_news_lg": "ca_core_news_lg",
"ca_core_news_trf": "ca_core_news_trf",
"da_core_news_sm": "da_core_news_sm",
"da_core_news_md": "da_core_news_md",
"da_core_news_lg": "da_core_news_lg",
"da_core_news_trf": "da_core_news_trf",
"de_core_news_sm": "de_core_news_sm",
"de_core_news_md": "de_core_news_md",
"de_core_news_lg": "de_core_news_lg",
"de_dep_news_trf": "de_dep_news_trf",
"el_core_news_sm": "el_core_news_sm",
"el_core_news_md": "el_core_news_md",
"el_core_news_lg": "el_core_news_lg",
"en_core_web_sm": "en_core_web_sm",
"en_core_web_md": "en_core_web_md",
"en_core_web_lg": "en_core_web_lg",
"en_core_web_trf": "en_core_web_trf",
"es_core_news_sm": "es_core_news_sm",
"es_core_news_md": "es_core_news_md",
"es_core_news_lg": "es_core_news_lg",
"es_dep_news_trf": "es_dep_news_trf",
"fr_core_news_sm": "fr_core_news_sm",
"fr_core_news_md": "fr_core_news_md",
"fr_core_news_lg": "fr_core_news_lg",
"fr_dep_news_trf": "fr_dep_news_trf",
"it_core_news_sm": "it_core_news_sm",
"it_core_news_md": "it_core_news_md",
"it_core_news_lg": "it_core_news_lg",
"ja_core_news_sm": "ja_core_news_sm",
"ja_core_news_md": "ja_core_news_md",
"ja_core_news_lg": "ja_core_news_lg",
"ja_dep_news_trf": "ja_dep_news_trf",
"lt_core_news_sm": "lt_core_news_sm",
"lt_core_news_md": "lt_core_news_md",
"lt_core_news_lg": "lt_core_news_lg",
"mk_core_news_sm": "mk_core_news_sm",
"mk_core_news_md": "mk_core_news_md",
"mk_core_news_lg": "mk_core_news_lg",
"nb_core_news_sm": "nb_core_news_sm",
"nb_core_news_md": "nb_core_news_md",
"nb_core_news_lg": "nb_core_news_lg",
"nl_core_news_sm": "nl_core_news_sm",
"nl_core_news_md": "nl_core_news_md",
"nl_core_news_lg": "nl_core_news_lg",
"pl_core_news_sm": "pl_core_news_sm",
"pl_core_news_md": "pl_core_news_md",
"pl_core_news_lg": "pl_core_news_lg",
"pt_core_news_sm": "pt_core_news_sm",
"pt_core_news_md": "pt_core_news_md",
"pt_core_news_lg": "pt_core_news_lg",
"ro_core_news_sm": "ro_core_news_sm",
"ro_core_news_md": "ro_core_news_md",
"ro_core_news_lg": "ro_core_news_lg",
"ru_core_news_sm": "ru_core_news_sm",
"ru_core_news_md": "ru_core_news_md",
"ru_core_news_lg": "ru_core_news_lg",
"xx_ent_wiki_sm": "xx_ent_wiki_sm",
"xx_sent_ud_sm": "xx_sent_ud_sm",
"zh_core_web_sm": "zh_core_web_sm",
"zh_core_web_md": "zh_core_web_md",
"zh_core_web_lg": "zh_core_web_lg",
"zh_core_web_trf": "zh_core_web_trf",
}
from relik.inference.data.tokenizers.spacy_tokenizer import SpacyTokenizer
|