gpt-code / train.py
cakiki's picture
Add training stub
326fe35
raw
history blame contribute delete
422 Bytes
#!/usr/bin/env python
from datasets import load_dataset
_DATA_DIR = 'data'
splits = {
'train': [f'{_DATA_DIR}/train/python_train_{i}.jsonl' for i in range(14)],
'validation': [f'{_DATA_DIR}/valid/python_valid_{i}.jsonl' for i in range(0)],
'test': [f'{_DATA_DIR}/test/python_test_{i}.jsonl' for i in range(0)]
}
dataset = load_dataset('json', data_files=splits)
if __name__ == '__main__':
print(dataset)