ECON / lib /pymafx /utils /data_loader.py
Yuliang's picture
init
da48dbe
raw
history blame
2.29 kB
from __future__ import division
import torch
from torch.utils.data import DataLoader
from torch.utils.data.sampler import Sampler
class RandomSampler(Sampler):
def __init__(self, data_source, checkpoint):
self.data_source = data_source
if checkpoint is not None and checkpoint['dataset_perm'] is not None:
self.dataset_perm = checkpoint['dataset_perm']
self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:]
else:
self.dataset_perm = torch.randperm(len(self.data_source)).tolist()
self.perm = torch.randperm(len(self.data_source)).tolist()
def __iter__(self):
return iter(self.perm)
def __len__(self):
return len(self.perm)
class SequentialSampler(Sampler):
def __init__(self, data_source, checkpoint):
self.data_source = data_source
if checkpoint is not None and checkpoint['dataset_perm'] is not None:
self.dataset_perm = checkpoint['dataset_perm']
self.perm = self.dataset_perm[checkpoint['batch_size']*checkpoint['batch_idx']:]
else:
self.dataset_perm = list(range(len(self.data_source)))
self.perm = self.dataset_perm
def __iter__(self):
return iter(self.perm)
def __len__(self):
return len(self.perm)
class CheckpointDataLoader(DataLoader):
"""
Extends torch.utils.data.DataLoader to handle resuming training from an arbitrary point within an epoch.
"""
def __init__(self, dataset, checkpoint=None, batch_size=1,
shuffle=False, num_workers=0, pin_memory=False, drop_last=True,
timeout=0, worker_init_fn=None):
if shuffle:
sampler = RandomSampler(dataset, checkpoint)
else:
sampler = SequentialSampler(dataset, checkpoint)
if checkpoint is not None:
self.checkpoint_batch_idx = checkpoint['batch_idx']
else:
self.checkpoint_batch_idx = 0
super(CheckpointDataLoader, self).__init__(dataset, sampler=sampler, shuffle=False, batch_size=batch_size, num_workers=num_workers,
drop_last=drop_last, pin_memory=pin_memory, timeout=timeout, worker_init_fn=None)