summaryrefslogtreecommitdiff
path: root/benchmarks/zipf.c
blob: d1e31095093d417af99ea7e3f2fb389932a07e1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <stdlib.h>
#include <math.h>
#include <stdint.h>

#include "zipf.h"

/* Based on Luc Devroye's rejection method for sampling from the Zipf
   distribution (Non-Uniform Random Variate Generation, page 550,
   http://cg.scs.carleton.ca/~luc/chapter_ten.pdf).  The code is based
   on that written by Michael Dürr:
   http://lists.gnu.org/archive/html/help-gsl/2008-05/msg00057.html
   .  */
int
rand_zipf (double skew, int limit)
{
  double a = skew;
  double b = pow (2.0, a - 1.0);
  double X, T, U, V;
  do
    {
      U = drand48 ();
      V = drand48 ();
      X = floor (pow (U, -1.0 / (a - 1.0)));
      T = pow (1.0 + 1.0 / X, a - 1.0);
    }
  while ((V * X * (T - 1.0) / (b - 1.0) > (T / b))
	 /* If the number is larger than the user requested limmit, we
	    just try again.  This does not effect the
	    distribution.  */
	 || X > (double) INT32_MAX
	 || (int) X > limit);
  return (int) X;
}

#if 0
#include <stdio.h>

/* To see the resulting distribution, use gnuplot and run:

     plot "<./zipf | sort -n | uniq -c" using 2:1
 */
int
main (int argc, char *argv[])
{
  double skew = 1.4;
  if (argc > 1)
    {
      skew = atof (argv[1]);
      if (skew <= 1.0)
	{
	  fprintf (stderr, "skew must be >= 1, you said %f\n", skew);
	  exit (1);
	}
    }

  int i;
  for (i = 0; i < 100000; i ++)
    printf ("%u\n", rand_zipf (skew, 10000));

  return 0;
}
#endif