|
{ |
|
"amber.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"aya_101.cc100-en": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 317881, |
|
"n_chars": 1121360 |
|
}, |
|
"baichuan.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280108, |
|
"n_chars": 1121360 |
|
}, |
|
"baichuan2.cc100-en": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1124813, |
|
"n_tokens": 269011, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_cased.cc100-en": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288022, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_chinese.cc100-en": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 377068, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_uncased.cc100-en": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280575, |
|
"n_chars": 1121360 |
|
}, |
|
"bloom.cc100-en": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257405, |
|
"n_chars": 1121360 |
|
}, |
|
"byt5_small.cc100-en": { |
|
"vocab_size": 384, |
|
"n_bytes": 1124813, |
|
"n_tokens": 1134813, |
|
"n_chars": 1121360 |
|
}, |
|
"character_glm_6b.cc100-en": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289347, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm2_6b.cc100-en": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289329, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm3_6b.cc100-en": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289347, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm_6b.cc100-en": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1124813, |
|
"n_tokens": 284761, |
|
"n_chars": 1121360 |
|
}, |
|
"chatyuan_large_v2.cc100-en": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 536033, |
|
"n_chars": 1121360 |
|
}, |
|
"chinese_llama.cc100-en": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1124813, |
|
"n_tokens": 291514, |
|
"n_chars": 1121360 |
|
}, |
|
"chinese_llama2.cc100-en": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"code_davinci_002.cc100-en": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258403, |
|
"n_chars": 1121360 |
|
}, |
|
"crystal_coder.cc100-en": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1124813, |
|
"n_tokens": 284627, |
|
"n_chars": 1121360 |
|
}, |
|
"dbrx_instruct.cc100-en": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-en": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1124813, |
|
"n_tokens": 287408, |
|
"n_chars": 1121360 |
|
}, |
|
"deepseek_llm_7b_base.cc100-en": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1124813, |
|
"n_tokens": 272324, |
|
"n_chars": 1121360 |
|
}, |
|
"falcon_180b.cc100-en": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1124813, |
|
"n_tokens": 262509, |
|
"n_chars": 1121360 |
|
}, |
|
"falcon_7b.cc100-en": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1124813, |
|
"n_tokens": 262509, |
|
"n_chars": 1121360 |
|
}, |
|
"fastchat_t5_3b.cc100-en": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1124813, |
|
"n_tokens": 484941, |
|
"n_chars": 1121360 |
|
}, |
|
"flan_t5_base.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"gemma_7b.cc100-en": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 268010, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt2.cc100-en": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258428, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt2_chinese.cc100-en": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 392641, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_35_turbo.cc100-en": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_4.cc100-en": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_nexo_20b.cc100-en": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 259357, |
|
"n_chars": 1121360 |
|
}, |
|
"grok_1.cc100-en": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258048, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm2_chat_7b.cc100-en": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271583, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm2_math_7b.cc100-en": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271583, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm_chat_7b.cc100-en": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271293, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm_xcomposer_7b.cc100-en": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271293, |
|
"n_chars": 1121360 |
|
}, |
|
"jamba_v0_1.cc100-en": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1124813, |
|
"n_tokens": 274242, |
|
"n_chars": 1121360 |
|
}, |
|
"kplug.cc100-en": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1124813, |
|
"n_tokens": 393564, |
|
"n_chars": 1121360 |
|
}, |
|
"llama.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"llama2.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"llama3.cc100-en": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254944, |
|
"n_chars": 1121360 |
|
}, |
|
"mistral_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"mixtral_8_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"mobilebert_uncased.cc100-en": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280575, |
|
"n_chars": 1121360 |
|
}, |
|
"moss.cc100-en": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257070, |
|
"n_chars": 1121360 |
|
}, |
|
"mt5_large.cc100-en": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 317881, |
|
"n_chars": 1121360 |
|
}, |
|
"olmo_7b.cc100-en": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1124813, |
|
"n_tokens": 259357, |
|
"n_chars": 1121360 |
|
}, |
|
"orion_14b_chat.cc100-en": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1124813, |
|
"n_tokens": 265948, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_1.cc100-en": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258409, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_2.cc100-en": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258409, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_3_mini.cc100-en": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"pko_t5_large.cc100-en": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1124813, |
|
"n_tokens": 658985, |
|
"n_chars": 1121360 |
|
}, |
|
"prompt_clue.cc100-en": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 536033, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen1_5_14b_chat.cc100-en": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_1_8b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_72b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_7b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"roberta_chinese_clue.cc100-en": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1124813, |
|
"n_tokens": 583058, |
|
"n_chars": 1121360 |
|
}, |
|
"skywork_13b_base.cc100-en": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294617, |
|
"n_chars": 1121360 |
|
}, |
|
"skywork_13b_math.cc100-en": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294617, |
|
"n_chars": 1121360 |
|
}, |
|
"solar_10_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"starchat_alpha.cc100-en": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288965, |
|
"n_chars": 1121360 |
|
}, |
|
"switch_c_2048.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_base.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_large.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_small.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"text_davinci_003.cc100-en": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258403, |
|
"n_chars": 1121360 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-en": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285652, |
|
"n_chars": 1121360 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-en": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1124813, |
|
"n_tokens": 286946, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardcoder_15b_v1.cc100-en": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288965, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardlm_7b_v1.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardmath_70b_v1.cc100-en": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"xlm_roberta.cc100-en": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1124813, |
|
"n_tokens": 300026, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_34b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 270400, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_6b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 270400, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_vl34b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 269738, |
|
"n_chars": 1121360 |
|
}, |
|
"zephyr_7b_beta.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"amber.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"aya_101.cc100-zh-Hans": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 631182, |
|
"n_chars": 927311 |
|
}, |
|
"baichuan.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 626117, |
|
"n_chars": 927311 |
|
}, |
|
"baichuan2.cc100-zh-Hans": { |
|
"vocab_size": 125696, |
|
"n_bytes": 2633047, |
|
"n_tokens": 541464, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_cased.cc100-zh-Hans": { |
|
"vocab_size": 28996, |
|
"n_bytes": 2633047, |
|
"n_tokens": 899709, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_chinese.cc100-zh-Hans": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 896599, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_uncased.cc100-zh-Hans": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2633047, |
|
"n_tokens": 898554, |
|
"n_chars": 927311 |
|
}, |
|
"bloom.cc100-zh-Hans": { |
|
"vocab_size": 250680, |
|
"n_bytes": 2633047, |
|
"n_tokens": 573008, |
|
"n_chars": 927311 |
|
}, |
|
"byt5_small.cc100-zh-Hans": { |
|
"vocab_size": 384, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2643047, |
|
"n_chars": 927311 |
|
}, |
|
"character_glm_6b.cc100-zh-Hans": { |
|
"vocab_size": 64789, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm2_6b.cc100-zh-Hans": { |
|
"vocab_size": 64787, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm3_6b.cc100-zh-Hans": { |
|
"vocab_size": 64796, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm_6b.cc100-zh-Hans": { |
|
"vocab_size": 150344, |
|
"n_bytes": 2633047, |
|
"n_tokens": 527384, |
|
"n_chars": 927311 |
|
}, |
|
"chatyuan_large_v2.cc100-zh-Hans": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 564905, |
|
"n_chars": 927311 |
|
}, |
|
"chinese_llama.cc100-zh-Hans": { |
|
"vocab_size": 49953, |
|
"n_bytes": 2633047, |
|
"n_tokens": 623219, |
|
"n_chars": 927311 |
|
}, |
|
"chinese_llama2.cc100-zh-Hans": { |
|
"vocab_size": 55296, |
|
"n_bytes": 2633047, |
|
"n_tokens": 625766, |
|
"n_chars": 927311 |
|
}, |
|
"code_davinci_002.cc100-zh-Hans": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"crystal_coder.cc100-zh-Hans": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1320093, |
|
"n_chars": 927311 |
|
}, |
|
"dbrx_instruct.cc100-zh-Hans": { |
|
"vocab_size": 100280, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-zh-Hans": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2633047, |
|
"n_tokens": 720577, |
|
"n_chars": 927311 |
|
}, |
|
"deepseek_llm_7b_base.cc100-zh-Hans": { |
|
"vocab_size": 100015, |
|
"n_bytes": 2633047, |
|
"n_tokens": 605081, |
|
"n_chars": 927311 |
|
}, |
|
"falcon_180b.cc100-zh-Hans": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1124681, |
|
"n_chars": 927311 |
|
}, |
|
"falcon_7b.cc100-zh-Hans": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1124681, |
|
"n_chars": 927311 |
|
}, |
|
"fastchat_t5_3b.cc100-zh-Hans": { |
|
"vocab_size": 32110, |
|
"n_bytes": 2633047, |
|
"n_tokens": 178974, |
|
"n_chars": 927311 |
|
}, |
|
"flan_t5_base.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173520, |
|
"n_chars": 927311 |
|
}, |
|
"gemma_7b.cc100-zh-Hans": { |
|
"vocab_size": 256000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 641795, |
|
"n_chars": 927311 |
|
}, |
|
"gpt2.cc100-zh-Hans": { |
|
"vocab_size": 50257, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"gpt2_chinese.cc100-zh-Hans": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 899506, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_35_turbo.cc100-zh-Hans": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_4.cc100-zh-Hans": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_nexo_20b.cc100-zh-Hans": { |
|
"vocab_size": 50277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1220529, |
|
"n_chars": 927311 |
|
}, |
|
"grok_1.cc100-zh-Hans": { |
|
"vocab_size": 131072, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1414508, |
|
"n_chars": 927311 |
|
}, |
|
"internlm2_chat_7b.cc100-zh-Hans": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579976, |
|
"n_chars": 927311 |
|
}, |
|
"internlm2_math_7b.cc100-zh-Hans": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579976, |
|
"n_chars": 927311 |
|
}, |
|
"internlm_chat_7b.cc100-zh-Hans": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579109, |
|
"n_chars": 927311 |
|
}, |
|
"internlm_xcomposer_7b.cc100-zh-Hans": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579109, |
|
"n_chars": 927311 |
|
}, |
|
"jamba_v0_1.cc100-zh-Hans": { |
|
"vocab_size": 65536, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1067054, |
|
"n_chars": 927311 |
|
}, |
|
"kplug.cc100-zh-Hans": { |
|
"vocab_size": 10261, |
|
"n_bytes": 2633047, |
|
"n_tokens": 902451, |
|
"n_chars": 927311 |
|
}, |
|
"llama.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"llama2.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"llama3.cc100-zh-Hans": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2633047, |
|
"n_tokens": 747405, |
|
"n_chars": 927311 |
|
}, |
|
"mistral_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"mixtral_8_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"mobilebert_uncased.cc100-zh-Hans": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2633047, |
|
"n_tokens": 898554, |
|
"n_chars": 927311 |
|
}, |
|
"moss.cc100-zh-Hans": { |
|
"vocab_size": 106072, |
|
"n_bytes": 2633047, |
|
"n_tokens": 557455, |
|
"n_chars": 927311 |
|
}, |
|
"mt5_large.cc100-zh-Hans": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 631182, |
|
"n_chars": 927311 |
|
}, |
|
"olmo_7b.cc100-zh-Hans": { |
|
"vocab_size": 50280, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1220529, |
|
"n_chars": 927311 |
|
}, |
|
"orion_14b_chat.cc100-zh-Hans": { |
|
"vocab_size": 84608, |
|
"n_bytes": 2633047, |
|
"n_tokens": 529926, |
|
"n_chars": 927311 |
|
}, |
|
"phi_1.cc100-zh-Hans": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"phi_2.cc100-zh-Hans": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"phi_3_mini.cc100-zh-Hans": { |
|
"vocab_size": 32011, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"pko_t5_large.cc100-zh-Hans": { |
|
"vocab_size": 50358, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2533519, |
|
"n_chars": 927311 |
|
}, |
|
"prompt_clue.cc100-zh-Hans": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 564905, |
|
"n_chars": 927311 |
|
}, |
|
"qwen1_5_14b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151646, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_1_8b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_72b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_7b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"roberta_chinese_clue.cc100-zh-Hans": { |
|
"vocab_size": 8021, |
|
"n_bytes": 2633047, |
|
"n_tokens": 907144, |
|
"n_chars": 927311 |
|
}, |
|
"skywork_13b_base.cc100-zh-Hans": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2633047, |
|
"n_tokens": 663923, |
|
"n_chars": 927311 |
|
}, |
|
"skywork_13b_math.cc100-zh-Hans": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2633047, |
|
"n_tokens": 663923, |
|
"n_chars": 927311 |
|
}, |
|
"solar_10_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"starchat_alpha.cc100-zh-Hans": { |
|
"vocab_size": 49156, |
|
"n_bytes": 2633047, |
|
"n_tokens": 882018, |
|
"n_chars": 927311 |
|
}, |
|
"switch_c_2048.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_base.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_large.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_small.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"text_davinci_003.cc100-zh-Hans": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-zh-Hans": { |
|
"vocab_size": 60515, |
|
"n_bytes": 2633047, |
|
"n_tokens": 577385, |
|
"n_chars": 927311 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-zh-Hans": { |
|
"vocab_size": 65110, |
|
"n_bytes": 2633047, |
|
"n_tokens": 577211, |
|
"n_chars": 927311 |
|
}, |
|
"wizardcoder_15b_v1.cc100-zh-Hans": { |
|
"vocab_size": 49153, |
|
"n_bytes": 2633047, |
|
"n_tokens": 882018, |
|
"n_chars": 927311 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"wizardlm_7b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"wizardmath_70b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32002, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"xlm_roberta.cc100-zh-Hans": { |
|
"vocab_size": 250002, |
|
"n_bytes": 2633047, |
|
"n_tokens": 619844, |
|
"n_chars": 927311 |
|
}, |
|
"yi_34b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 588729, |
|
"n_chars": 927311 |
|
}, |
|
"yi_6b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 588729, |
|
"n_chars": 927311 |
|
}, |
|
"yi_vl34b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 596166, |
|
"n_chars": 927311 |
|
}, |
|
"zephyr_7b_beta.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"amber.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"aya_101.cc100-es": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 472231, |
|
"n_chars": 1630297 |
|
}, |
|
"baichuan.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 585804, |
|
"n_chars": 1630297 |
|
}, |
|
"baichuan2.cc100-es": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1664455, |
|
"n_tokens": 551326, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_cased.cc100-es": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1664455, |
|
"n_tokens": 630231, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_chinese.cc100-es": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 609419, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_uncased.cc100-es": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1664455, |
|
"n_tokens": 558042, |
|
"n_chars": 1630297 |
|
}, |
|
"bloom.cc100-es": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1664455, |
|
"n_tokens": 350793, |
|
"n_chars": 1630297 |
|
}, |
|
"byt5_small.cc100-es": { |
|
"vocab_size": 384, |
|
"n_bytes": 1664455, |
|
"n_tokens": 1674455, |
|
"n_chars": 1630297 |
|
}, |
|
"character_glm_6b.cc100-es": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566501, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm2_6b.cc100-es": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566476, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm3_6b.cc100-es": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566501, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm_6b.cc100-es": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1664455, |
|
"n_tokens": 514848, |
|
"n_chars": 1630297 |
|
}, |
|
"chatyuan_large_v2.cc100-es": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 889530, |
|
"n_chars": 1630297 |
|
}, |
|
"chinese_llama.cc100-es": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1664455, |
|
"n_tokens": 486672, |
|
"n_chars": 1630297 |
|
}, |
|
"chinese_llama2.cc100-es": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"code_davinci_002.cc100-es": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"crystal_coder.cc100-es": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1664455, |
|
"n_tokens": 482235, |
|
"n_chars": 1630297 |
|
}, |
|
"dbrx_instruct.cc100-es": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-es": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1664455, |
|
"n_tokens": 523884, |
|
"n_chars": 1630297 |
|
}, |
|
"deepseek_llm_7b_base.cc100-es": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1664455, |
|
"n_tokens": 480877, |
|
"n_chars": 1630297 |
|
}, |
|
"falcon_180b.cc100-es": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1664455, |
|
"n_tokens": 442138, |
|
"n_chars": 1630297 |
|
}, |
|
"falcon_7b.cc100-es": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1664455, |
|
"n_tokens": 442138, |
|
"n_chars": 1630297 |
|
}, |
|
"fastchat_t5_3b.cc100-es": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1664455, |
|
"n_tokens": 970105, |
|
"n_chars": 1630297 |
|
}, |
|
"flan_t5_base.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706405, |
|
"n_chars": 1630297 |
|
}, |
|
"gemma_7b.cc100-es": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 371321, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt2.cc100-es": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt2_chinese.cc100-es": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 703390, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_35_turbo.cc100-es": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_4.cc100-es": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_nexo_20b.cc100-es": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 494577, |
|
"n_chars": 1630297 |
|
}, |
|
"grok_1.cc100-es": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1664455, |
|
"n_tokens": 449392, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm2_chat_7b.cc100-es": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1664455, |
|
"n_tokens": 518871, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm2_math_7b.cc100-es": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1664455, |
|
"n_tokens": 518871, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm_chat_7b.cc100-es": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1664455, |
|
"n_tokens": 516572, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm_xcomposer_7b.cc100-es": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1664455, |
|
"n_tokens": 516572, |
|
"n_chars": 1630297 |
|
}, |
|
"jamba_v0_1.cc100-es": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1664455, |
|
"n_tokens": 420883, |
|
"n_chars": 1630297 |
|
}, |
|
"kplug.cc100-es": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1664455, |
|
"n_tokens": 704804, |
|
"n_chars": 1630297 |
|
}, |
|
"llama.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"llama2.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"llama3.cc100-es": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433289, |
|
"n_chars": 1630297 |
|
}, |
|
"mistral_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"mixtral_8_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"mobilebert_uncased.cc100-es": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1664455, |
|
"n_tokens": 558042, |
|
"n_chars": 1630297 |
|
}, |
|
"moss.cc100-es": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1664455, |
|
"n_tokens": 568539, |
|
"n_chars": 1630297 |
|
}, |
|
"mt5_large.cc100-es": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 472231, |
|
"n_chars": 1630297 |
|
}, |
|
"olmo_7b.cc100-es": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1664455, |
|
"n_tokens": 494577, |
|
"n_chars": 1630297 |
|
}, |
|
"orion_14b_chat.cc100-es": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1664455, |
|
"n_tokens": 628571, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_1.cc100-es": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_2.cc100-es": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_3_mini.cc100-es": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"pko_t5_large.cc100-es": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1664455, |
|
"n_tokens": 1134056, |
|
"n_chars": 1630297 |
|
}, |
|
"prompt_clue.cc100-es": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 889530, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen1_5_14b_chat.cc100-es": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_1_8b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_72b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_7b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"roberta_chinese_clue.cc100-es": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1664455, |
|
"n_tokens": 866564, |
|
"n_chars": 1630297 |
|
}, |
|
"skywork_13b_base.cc100-es": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492211, |
|
"n_chars": 1630297 |
|
}, |
|
"skywork_13b_math.cc100-es": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492211, |
|
"n_chars": 1630297 |
|
}, |
|
"solar_10_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"starchat_alpha.cc100-es": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1664455, |
|
"n_tokens": 530592, |
|
"n_chars": 1630297 |
|
}, |
|
"switch_c_2048.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_base.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_large.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_small.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"text_davinci_003.cc100-es": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-es": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1664455, |
|
"n_tokens": 482553, |
|
"n_chars": 1630297 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-es": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1664455, |
|
"n_tokens": 484099, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardcoder_15b_v1.cc100-es": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1664455, |
|
"n_tokens": 530592, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardlm_7b_v1.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardmath_70b_v1.cc100-es": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"xlm_roberta.cc100-es": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1664455, |
|
"n_tokens": 399850, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_34b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 577018, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_6b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 577018, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_vl34b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 576794, |
|
"n_chars": 1630297 |
|
}, |
|
"zephyr_7b_beta.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"aya_101.cc100-fr": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 470944, |
|
"n_chars": 1484970 |
|
}, |
|
"baichuan.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 540430, |
|
"n_chars": 1484970 |
|
}, |
|
"baichuan2.cc100-fr": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1540504, |
|
"n_tokens": 512313, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_cased.cc100-fr": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1540504, |
|
"n_tokens": 583210, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_chinese.cc100-fr": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 553134, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_uncased.cc100-fr": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1540504, |
|
"n_tokens": 504075, |
|
"n_chars": 1484970 |
|
}, |
|
"bloom.cc100-fr": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1540504, |
|
"n_tokens": 321639, |
|
"n_chars": 1484970 |
|
}, |
|
"byt5_small.cc100-fr": { |
|
"vocab_size": 384, |
|
"n_bytes": 1540504, |
|
"n_tokens": 1550504, |
|
"n_chars": 1484970 |
|
}, |
|
"character_glm_6b.cc100-fr": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515052, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm2_6b.cc100-fr": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515028, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm3_6b.cc100-fr": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515052, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm_6b.cc100-fr": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1540504, |
|
"n_tokens": 499261, |
|
"n_chars": 1484970 |
|
}, |
|
"chatyuan_large_v2.cc100-fr": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 822012, |
|
"n_chars": 1484970 |
|
}, |
|
"chinese_llama.cc100-fr": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1540504, |
|
"n_tokens": 450352, |
|
"n_chars": 1484970 |
|
}, |
|
"chinese_llama2.cc100-fr": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"code_davinci_002.cc100-fr": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"crystal_coder.cc100-fr": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1540504, |
|
"n_tokens": 447243, |
|
"n_chars": 1484970 |
|
}, |
|
"dbrx_instruct.cc100-fr": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-fr": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1540504, |
|
"n_tokens": 537538, |
|
"n_chars": 1484970 |
|
}, |
|
"deepseek_llm_7b_base.cc100-fr": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1540504, |
|
"n_tokens": 507693, |
|
"n_chars": 1484970 |
|
}, |
|
"falcon_180b.cc100-fr": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1540504, |
|
"n_tokens": 407853, |
|
"n_chars": 1484970 |
|
}, |
|
"falcon_7b.cc100-fr": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1540504, |
|
"n_tokens": 407853, |
|
"n_chars": 1484970 |
|
}, |
|
"fastchat_t5_3b.cc100-fr": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1540504, |
|
"n_tokens": 717675, |
|
"n_chars": 1484970 |
|
}, |
|
"flan_t5_base.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476135, |
|
"n_chars": 1484970 |
|
}, |
|
"gemma_7b.cc100-fr": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 374551, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt2.cc100-fr": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt2_chinese.cc100-fr": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 636442, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_35_turbo.cc100-fr": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_4.cc100-fr": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_nexo_20b.cc100-fr": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 458961, |
|
"n_chars": 1484970 |
|
}, |
|
"grok_1.cc100-fr": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1540504, |
|
"n_tokens": 428298, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm2_chat_7b.cc100-fr": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1540504, |
|
"n_tokens": 496629, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm2_math_7b.cc100-fr": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1540504, |
|
"n_tokens": 496629, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm_chat_7b.cc100-fr": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1540504, |
|
"n_tokens": 495045, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm_xcomposer_7b.cc100-fr": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1540504, |
|
"n_tokens": 495045, |
|
"n_chars": 1484970 |
|
}, |
|
"jamba_v0_1.cc100-fr": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412899, |
|
"n_chars": 1484970 |
|
}, |
|
"kplug.cc100-fr": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1540504, |
|
"n_tokens": 638107, |
|
"n_chars": 1484970 |
|
}, |
|
"llama.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"llama2.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"llama3.cc100-fr": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412146, |
|
"n_chars": 1484970 |
|
}, |
|
"mistral_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"mixtral_8_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"mobilebert_uncased.cc100-fr": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1540504, |
|
"n_tokens": 504075, |
|
"n_chars": 1484970 |
|
}, |
|
"moss.cc100-fr": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515669, |
|
"n_chars": 1484970 |
|
}, |
|
"mt5_large.cc100-fr": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 470944, |
|
"n_chars": 1484970 |
|
}, |
|
"olmo_7b.cc100-fr": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1540504, |
|
"n_tokens": 458961, |
|
"n_chars": 1484970 |
|
}, |
|
"orion_14b_chat.cc100-fr": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1540504, |
|
"n_tokens": 564107, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_1.cc100-fr": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_2.cc100-fr": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_3_mini.cc100-fr": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"pko_t5_large.cc100-fr": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1540504, |
|
"n_tokens": 1044665, |
|
"n_chars": 1484970 |
|
}, |
|
"prompt_clue.cc100-fr": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 822012, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen1_5_14b_chat.cc100-fr": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_1_8b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_72b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_7b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"roberta_chinese_clue.cc100-fr": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1540504, |
|
"n_tokens": 787363, |
|
"n_chars": 1484970 |
|
}, |
|
"skywork_13b_base.cc100-fr": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457233, |
|
"n_chars": 1484970 |
|
}, |
|
"skywork_13b_math.cc100-fr": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457233, |
|
"n_chars": 1484970 |
|
}, |
|
"solar_10_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"starchat_alpha.cc100-fr": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1540504, |
|
"n_tokens": 509958, |
|
"n_chars": 1484970 |
|
}, |
|
"switch_c_2048.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_base.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_large.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_small.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"text_davinci_003.cc100-fr": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-fr": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1540504, |
|
"n_tokens": 447372, |
|
"n_chars": 1484970 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-fr": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1540504, |
|
"n_tokens": 448567, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardcoder_15b_v1.cc100-fr": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1540504, |
|
"n_tokens": 509958, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardlm_7b_v1.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardmath_70b_v1.cc100-fr": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"xlm_roberta.cc100-fr": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1540504, |
|
"n_tokens": 405041, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_34b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 533106, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_6b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 533106, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_vl34b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 532288, |
|
"n_chars": 1484970 |
|
}, |
|
"zephyr_7b_beta.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
} |
|
} |