File size: 2,795 Bytes
28958dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/functional.h>
#include <thrust/transform_reduce.h>

#include <iostream>
#include <cmath>

// The technique demonstrated in the example monte_carlo.cu 
// assigns an independently seeded random number generator to each
// of 30K threads, and uses a hashing scheme based on thread index to
// seed each RNG. This technique, while simple, may be succeptible
// to correlation among the streams of numbers generated by each RNG
// because there is no guarantee that the streams are not disjoint.
// This example demonstrates a slightly more sophisticated technique
// which ensures that the subsequences generated in each thread are
// disjoint. To achieve this, we use a single common stream
// of random numbers, but partition it among threads to ensure no overlap
// of substreams. The substreams are generated procedurally using
// default_random_engine's discard(n) member function, which skips
// past n states of the RNG. This function is accelerated and executes
// in O(lg n) time.

struct estimate_pi : public thrust::unary_function<unsigned int,float>
{
  __host__ __device__
  float operator()(unsigned int thread_id)
  {
    float sum = 0;
    unsigned int N = 5000; // samples per stream

    // note that M * N <= default_random_engine::max,
    // which is also the period of this particular RNG
    // this ensures the substreams are disjoint

    // create a random number generator
    // note that each thread uses an RNG with the same seed
    thrust::default_random_engine rng;

    // jump past the numbers used by the subsequences before me
    rng.discard(N * thread_id);

    // create a mapping from random numbers to [0,1)
    thrust::uniform_real_distribution<float> u01(0,1);

    // take N samples in a quarter circle
    for(unsigned int i = 0; i < N; ++i)
    {
      // draw a sample from the unit square
      float x = u01(rng);
      float y = u01(rng);

      // measure distance from the origin
      float dist = sqrtf(x*x + y*y);

      // add 1.0f if (u0,u1) is inside the quarter circle
      if(dist <= 1.0f)
        sum += 1.0f;
    }

    // multiply by 4 to get the area of the whole circle
    sum *= 4.0f;

    // divide by N
    return sum / N;
  }
};

int main(void)
{
  // use 30K subsequences of random numbers
  int M = 30000;

  float estimate = thrust::transform_reduce(thrust::counting_iterator<int>(0),
                                            thrust::counting_iterator<int>(M),
                                            estimate_pi(),
                                            0.0f,
                                            thrust::plus<float>());
  estimate /= M;

  std::cout << "pi is around " << estimate << std::endl;

  return 0;
}