nouamanetazi
HF staff
Upload darija_tokenizers_leaderboard.jsonl with huggingface_hub
6f9f426
verified
{"Tokenizer":"google-bert\/bert-base-uncased","Vocabulary Size":30522,"Token Count":28,"Tokens\/Character Ratio":0.9333333333,"Latin Support":"β ","Tokenizer Class":"BertTokenizerFast"} | |
{"Tokenizer":"google\/gemma-2-27b-it","Vocabulary Size":256000,"Token Count":10,"Tokens\/Character Ratio":0.3333333333,"Latin Support":"β ","Tokenizer Class":"GemmaTokenizer"} | |
{"Tokenizer":"Xenova\/gpt-4o","Vocabulary Size":200000,"Token Count":8,"Tokens\/Character Ratio":0.2666666667,"Latin Support":"β ","Tokenizer Class":"GPT2TokenizerFast"} | |