pszemraj commited on
Commit
daab53c
1 Parent(s): b41b3ca

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -5
README.md CHANGED
@@ -1,17 +1,20 @@
1
  ---
2
  library_name: transformers
3
  license: mit
 
 
4
  ---
5
 
6
- # Model Card for Model ID
7
 
 
 
 
 
8
 
9
- adapted for t5
10
 
11
  Tokens:
12
  `['▁In', '▁', '2', '0', '2', '3', ',', '▁Dr', '.', '▁Jane', '▁Smith', '-', 'John', 'son', '▁published', '▁groundbreaking', '▁research', '▁on', '▁quantum', '▁ent', 'ang', 'lement', ',', '▁demonstrating', '▁a', '▁', '9', '9', '.', '9', '%', '▁success', '▁rate', '▁in', '▁tele', 'port', 'ing', '▁qu', 'bits', '▁over', '▁', '1', '0', '0', 'km', '▁using', '▁her', '▁patented', "▁'", 'Q', '-', 'Link', "'", '▁technology', '.', '</s>']`
13
 
14
- - Compression ratio: 3.54
15
- - Vocabulary size: 48228
16
 
17
- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/KL4UbQpJESQgnAf3FTtiS.png)
 
1
  ---
2
  library_name: transformers
3
  license: mit
4
+ language:
5
+ - en
6
  ---
7
 
8
+ # 48k vocab LlamaTokenizer for T5
9
 
10
+ custom tokenizer from [scaling study](https://huggingface.co/sail/scaling-with-vocab-trained-tokenizers) adapted for T5 training
11
+
12
+ - Compression ratio: 3.54
13
+ - Vocabulary size: 48228
14
 
 
15
 
16
  Tokens:
17
  `['▁In', '▁', '2', '0', '2', '3', ',', '▁Dr', '.', '▁Jane', '▁Smith', '-', 'John', 'son', '▁published', '▁groundbreaking', '▁research', '▁on', '▁quantum', '▁ent', 'ang', 'lement', ',', '▁demonstrating', '▁a', '▁', '9', '9', '.', '9', '%', '▁success', '▁rate', '▁in', '▁tele', 'port', 'ing', '▁qu', 'bits', '▁over', '▁', '1', '0', '0', 'km', '▁using', '▁her', '▁patented', "▁'", 'Q', '-', 'Link', "'", '▁technology', '.', '</s>']`
18
 
 
 
19
 
20
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/KL4UbQpJESQgnAf3FTtiS.png)