CarlosMalaga's picture
Upload 201 files
2f044c1 verified
raw
history blame
3.33 kB
SPACY_LANGUAGE_MAPPER = {
"ca": "ca_core_news_sm",
"da": "da_core_news_sm",
"de": "de_core_news_sm",
"el": "el_core_news_sm",
"en": "en_core_web_sm",
"es": "es_core_news_sm",
"fr": "fr_core_news_sm",
"it": "it_core_news_sm",
"ja": "ja_core_news_sm",
"lt": "lt_core_news_sm",
"mk": "mk_core_news_sm",
"nb": "nb_core_news_sm",
"nl": "nl_core_news_sm",
"pl": "pl_core_news_sm",
"pt": "pt_core_news_sm",
"ro": "ro_core_news_sm",
"ru": "ru_core_news_sm",
"xx": "xx_sent_ud_sm",
"zh": "zh_core_web_sm",
"ca_core_news_sm": "ca_core_news_sm",
"ca_core_news_md": "ca_core_news_md",
"ca_core_news_lg": "ca_core_news_lg",
"ca_core_news_trf": "ca_core_news_trf",
"da_core_news_sm": "da_core_news_sm",
"da_core_news_md": "da_core_news_md",
"da_core_news_lg": "da_core_news_lg",
"da_core_news_trf": "da_core_news_trf",
"de_core_news_sm": "de_core_news_sm",
"de_core_news_md": "de_core_news_md",
"de_core_news_lg": "de_core_news_lg",
"de_dep_news_trf": "de_dep_news_trf",
"el_core_news_sm": "el_core_news_sm",
"el_core_news_md": "el_core_news_md",
"el_core_news_lg": "el_core_news_lg",
"en_core_web_sm": "en_core_web_sm",
"en_core_web_md": "en_core_web_md",
"en_core_web_lg": "en_core_web_lg",
"en_core_web_trf": "en_core_web_trf",
"es_core_news_sm": "es_core_news_sm",
"es_core_news_md": "es_core_news_md",
"es_core_news_lg": "es_core_news_lg",
"es_dep_news_trf": "es_dep_news_trf",
"fr_core_news_sm": "fr_core_news_sm",
"fr_core_news_md": "fr_core_news_md",
"fr_core_news_lg": "fr_core_news_lg",
"fr_dep_news_trf": "fr_dep_news_trf",
"it_core_news_sm": "it_core_news_sm",
"it_core_news_md": "it_core_news_md",
"it_core_news_lg": "it_core_news_lg",
"ja_core_news_sm": "ja_core_news_sm",
"ja_core_news_md": "ja_core_news_md",
"ja_core_news_lg": "ja_core_news_lg",
"ja_dep_news_trf": "ja_dep_news_trf",
"lt_core_news_sm": "lt_core_news_sm",
"lt_core_news_md": "lt_core_news_md",
"lt_core_news_lg": "lt_core_news_lg",
"mk_core_news_sm": "mk_core_news_sm",
"mk_core_news_md": "mk_core_news_md",
"mk_core_news_lg": "mk_core_news_lg",
"nb_core_news_sm": "nb_core_news_sm",
"nb_core_news_md": "nb_core_news_md",
"nb_core_news_lg": "nb_core_news_lg",
"nl_core_news_sm": "nl_core_news_sm",
"nl_core_news_md": "nl_core_news_md",
"nl_core_news_lg": "nl_core_news_lg",
"pl_core_news_sm": "pl_core_news_sm",
"pl_core_news_md": "pl_core_news_md",
"pl_core_news_lg": "pl_core_news_lg",
"pt_core_news_sm": "pt_core_news_sm",
"pt_core_news_md": "pt_core_news_md",
"pt_core_news_lg": "pt_core_news_lg",
"ro_core_news_sm": "ro_core_news_sm",
"ro_core_news_md": "ro_core_news_md",
"ro_core_news_lg": "ro_core_news_lg",
"ru_core_news_sm": "ru_core_news_sm",
"ru_core_news_md": "ru_core_news_md",
"ru_core_news_lg": "ru_core_news_lg",
"xx_ent_wiki_sm": "xx_ent_wiki_sm",
"xx_sent_ud_sm": "xx_sent_ud_sm",
"zh_core_web_sm": "zh_core_web_sm",
"zh_core_web_md": "zh_core_web_md",
"zh_core_web_lg": "zh_core_web_lg",
"zh_core_web_trf": "zh_core_web_trf",
}
from relik.inference.data.tokenizers.spacy_tokenizer import SpacyTokenizer