facat's picture
init
2fc4496 unverified
raw
history blame contribute delete
No virus
357 Bytes
import datasets
from transformers import AutoTokenizer
dataset = datasets.load_dataset( # <1>
"rotten_tomatoes", # <1>
split="train", # <1>
) # <1>
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
dataset = datasets.map( # <2>
lambda examples: tokenizer(examples["text"]), # <2>
batched=True, # <2>
) # <2>
... # <3>