File size: 357 Bytes
2fc4496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

import datasets
from transformers import AutoTokenizer

dataset = datasets.load_dataset(  # <1>
    "rotten_tomatoes",  # <1>
    split="train",  # <1>
)  # <1>
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

dataset = datasets.map(  # <2>
    lambda examples: tokenizer(examples["text"]),  # <2>
    batched=True,  # <2>
)  # <2>

... # <3>