Spaces:
Build error
Build error
File size: 5,587 Bytes
8f87579 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
'''
A sampler is just a list of integer listing the indexes of the
inputs in a data set to sample. For reproducibility, the
FixedRandomSubsetSampler uses a seeded prng to produce the same
sequence always. FixedSubsetSampler is just a wrapper for an
explicit list of integers.
coordinate_sample solves another sampling problem: when testing
convolutional outputs, we can reduce data explosing by sampling
random points of the feature map rather than the entire feature map.
coordinate_sample does this in a deterministic way that is also
resolution-independent.
'''
import numpy
import random
from torch.utils.data.sampler import Sampler
class FixedSubsetSampler(Sampler):
"""Represents a fixed sequence of data set indices.
Subsets can be created by specifying a subset of output indexes.
"""
def __init__(self, samples):
self.samples = samples
def __iter__(self):
return iter(self.samples)
def __len__(self):
return len(self.samples)
def __getitem__(self, key):
return self.samples[key]
def subset(self, new_subset):
return FixedSubsetSampler(self.dereference(new_subset))
def dereference(self, indices):
'''
Translate output sample indices (small numbers indexing the sample)
to input sample indices (larger number indexing the original full set)
'''
return [self.samples[i] for i in indices]
class FixedRandomSubsetSampler(FixedSubsetSampler):
"""Samples a fixed number of samples from the dataset, deterministically.
Arguments:
data_source,
sample_size,
seed (optional)
"""
def __init__(self, data_source, start=None, end=None, seed=1):
rng = random.Random(seed)
shuffled = list(range(len(data_source)))
rng.shuffle(shuffled)
self.data_source = data_source
super(FixedRandomSubsetSampler, self).__init__(shuffled[start:end])
def class_subset(self, class_filter):
'''
Returns only the subset matching the given rule.
'''
if isinstance(class_filter, int):
rule = lambda d: d[1] == class_filter
else:
rule = class_filter
return self.subset([i for i, j in enumerate(self.samples)
if rule(self.data_source[j])])
def coordinate_sample(shape, sample_size, seeds, grid=13, seed=1, flat=False):
'''
Returns a (end-start) sets of sample_size grid points within
the shape given. If the shape dimensions are a multiple of 'grid',
then sampled points within the same row will never be duplicated.
'''
if flat:
sampind = numpy.zeros((len(seeds), sample_size), dtype=int)
else:
sampind = numpy.zeros((len(seeds), 2, sample_size), dtype=int)
assert sample_size <= grid
for j, seed in enumerate(seeds):
rng = numpy.random.RandomState(seed)
# Shuffle the 169 random grid squares, and pick :sample_size.
square_count = grid ** len(shape)
square = numpy.stack(numpy.unravel_index(
rng.choice(square_count, square_count)[:sample_size],
(grid,) * len(shape)))
# Then add a random offset to each x, y and put in the range [0...1)
# Notice this selects the same locations regardless of resolution.
uniform = (square + rng.uniform(size=square.shape)) / grid
# TODO: support affine scaling so that we can align receptive field
# centers exactly when sampling neurons in different layers.
coords = (uniform * numpy.array(shape)[:,None]).astype(int)
# Now take sample_size without replacement. We do this in a way
# such that if sample_size is decreased or increased up to 'grid',
# the selected points become a subset, not totally different points.
if flat:
sampind[j] = numpy.ravel_multi_index(coords, dims=shape)
else:
sampind[j] = coords
return sampind
if __name__ == '__main__':
from numpy.testing import assert_almost_equal
# Test that coordinate_sample is deterministic, in-range, and scalable.
assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102)),
[[[14, 0, 12, 11, 8, 13, 11, 20, 7, 20],
[ 9, 22, 7, 11, 23, 18, 21, 15, 2, 5]]])
assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 102)),
[[[ 7, 0, 6, 5, 4, 6, 5, 10, 3, 20 // 2],
[ 4, 11, 3, 5, 11, 9, 10, 7, 1, 5 // 2]]])
assert_almost_equal(coordinate_sample((13, 13), 10, range(100, 102),
flat=True),
[[ 8, 24, 67, 103, 87, 79, 138, 94, 98, 53],
[ 95, 11, 81, 70, 63, 87, 75, 137, 40, 2+10*13]])
assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 103),
flat=True),
[[ 95, 11, 81, 70, 63, 87, 75, 137, 40, 132],
[ 0, 78, 114, 111, 66, 45, 72, 73, 79, 135]])
assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102),
flat=True),
[[373, 22, 319, 297, 231, 356, 307, 535, 184, 5+20*26]])
# Test FixedRandomSubsetSampler
fss = FixedRandomSubsetSampler(range(10))
assert len(fss) == 10
assert_almost_equal(list(fss), [8, 0, 3, 4, 5, 2, 9, 6, 7, 1])
fss = FixedRandomSubsetSampler(range(10), 3, 8)
assert len(fss) == 5
assert_almost_equal(list(fss), [4, 5, 2, 9, 6])
fss = FixedRandomSubsetSampler([(i, i % 3) for i in range(10)],
class_filter=1)
assert len(fss) == 3
assert_almost_equal(list(fss), [4, 7, 1])
|