Update README.md
Browse files
README.md
CHANGED
@@ -1,17 +1,20 @@
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: mit
|
|
|
|
|
4 |
---
|
5 |
|
6 |
-
#
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
adapted for t5
|
10 |
|
11 |
Tokens:
|
12 |
`['▁In', '▁', '2', '0', '2', '3', ',', '▁Dr', '.', '▁Jane', '▁Smith', '-', 'John', 'son', '▁published', '▁groundbreaking', '▁research', '▁on', '▁quantum', '▁ent', 'ang', 'lement', ',', '▁demonstrating', '▁a', '▁', '9', '9', '.', '9', '%', '▁success', '▁rate', '▁in', '▁tele', 'port', 'ing', '▁qu', 'bits', '▁over', '▁', '1', '0', '0', 'km', '▁using', '▁her', '▁patented', "▁'", 'Q', '-', 'Link', "'", '▁technology', '.', '</s>']`
|
13 |
|
14 |
-
- Compression ratio: 3.54
|
15 |
-
- Vocabulary size: 48228
|
16 |
|
17 |
-
![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/KL4UbQpJESQgnAf3FTtiS.png)
|
|
|
1 |
---
|
2 |
library_name: transformers
|
3 |
license: mit
|
4 |
+
language:
|
5 |
+
- en
|
6 |
---
|
7 |
|
8 |
+
# 48k vocab LlamaTokenizer for T5
|
9 |
|
10 |
+
custom tokenizer from [scaling study](https://huggingface.co/sail/scaling-with-vocab-trained-tokenizers) adapted for T5 training
|
11 |
+
|
12 |
+
- Compression ratio: 3.54
|
13 |
+
- Vocabulary size: 48228
|
14 |
|
|
|
15 |
|
16 |
Tokens:
|
17 |
`['▁In', '▁', '2', '0', '2', '3', ',', '▁Dr', '.', '▁Jane', '▁Smith', '-', 'John', 'son', '▁published', '▁groundbreaking', '▁research', '▁on', '▁quantum', '▁ent', 'ang', 'lement', ',', '▁demonstrating', '▁a', '▁', '9', '9', '.', '9', '%', '▁success', '▁rate', '▁in', '▁tele', 'port', 'ing', '▁qu', 'bits', '▁over', '▁', '1', '0', '0', 'km', '▁using', '▁her', '▁patented', "▁'", 'Q', '-', 'Link', "'", '▁technology', '.', '</s>']`
|
18 |
|
|
|
|
|
19 |
|
20 |
+
![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/KL4UbQpJESQgnAf3FTtiS.png)
|