#!/usr/bin/env python | |
from datasets import load_dataset | |
_DATA_DIR = 'data' | |
splits = { | |
'train': [f'{_DATA_DIR}/train/python_train_{i}.jsonl' for i in range(14)], | |
'validation': [f'{_DATA_DIR}/valid/python_valid_{i}.jsonl' for i in range(0)], | |
'test': [f'{_DATA_DIR}/test/python_test_{i}.jsonl' for i in range(0)] | |
} | |
dataset = load_dataset('json', data_files=splits) | |
if __name__ == '__main__': | |
print(dataset) |