GNU Octave  3.8.0
A high-level interpreted language, primarily intended for numerical computations, mostly compatible with Matlab
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
randmtzig.c
Go to the documentation of this file.
1 /*
2 
3 Copyright (C) 2006-2013 John W. Eaton
4 
5 This file is part of Octave.
6 
7 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 Octave is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Octave; see the file COPYING. If not, see
19 <http://www.gnu.org/licenses/>.
20 
21 */
22 
23 /*
24  A C-program for MT19937, with initialization improved 2002/2/10.
25  Coded by Takuji Nishimura and Makoto Matsumoto.
26  This is a faster version by taking Shawn Cokus's optimization,
27  Matthe Bellew's simplification, Isaku Wada's real version.
28  David Bateman added normal and exponential distributions following
29  Marsaglia and Tang's Ziggurat algorithm.
30 
31  Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
32  Copyright (C) 2004, David Bateman
33  All rights reserved.
34 
35  Redistribution and use in source and binary forms, with or without
36  modification, are permitted provided that the following conditions
37  are met:
38 
39  1. Redistributions of source code must retain the above copyright
40  notice, this list of conditions and the following disclaimer.
41 
42  2. Redistributions in binary form must reproduce the above copyright
43  notice, this list of conditions and the following disclaimer in the
44  documentation and/or other materials provided with the distribution.
45 
46  3. The names of its contributors may not be used to endorse or promote
47  products derived from this software without specific prior written
48  permission.
49 
50  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
51  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
52  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
53  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
54  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
55  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
56  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
57  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
58  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
59  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
60  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61 
62 
63  Any feedback is very welcome.
64  http://www.math.keio.ac.jp/matumoto/emt.html
65  email: matumoto@math.keio.ac.jp
66 
67  * 2004-01-19 Paul Kienzle
68  * * comment out main
69  * add init_by_entropy, get_state, set_state
70  * * converted to allow compiling by C++ compiler
71  *
72  * 2004-01-25 David Bateman
73  * * Add Marsaglia and Tsang Ziggurat code
74  *
75  * 2004-07-13 Paul Kienzle
76  * * make into an independent library with some docs.
77  * * introduce new main and test code.
78  *
79  * 2004-07-28 Paul Kienzle & David Bateman
80  * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa
81  * * make the naming scheme more uniform
82  * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch.
83  *
84  * 2005-02-23 Paul Kienzle
85  * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control
86  *
87  * 2006-04-01 David Bateman
88  * * convert for use in octave, declaring static functions only used
89  * here and adding oct_ to functions visible externally
90  * * inverse sense of ALLBITS
91  *
92  * 2012-05-18 David Bateman
93  * * Remove randu64 and ALLBIT option
94  * * Add the single precision generators
95  */
96 
97 /*
98  === Build instructions ===
99 
100  Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is
101  available. This is not necessary if your architecture has
102  /dev/urandom defined.
103 
104  Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86.
105  You can force X86 behaviour with -DUSE_X86_32=1, or suppress it with
106  -DUSE_X86_32=0. You should also consider -march=i686 or similar for
107  extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit
108  x86 architectures.
109 
110  If you want to replace the Mersenne Twister with another
111  generator then redefine randi32 appropriately.
112 
113  === Usage instructions ===
114  Before using any of the generators, initialize the state with one of
115  oct_init_by_int, oct_init_by_array or oct_init_by_entropy.
116 
117  All generators share the same state vector.
118 
119  === Mersenne Twister ===
120  void oct_init_by_int (uint32_t s) 32-bit initial state
121  void oct_init_by_array (uint32_t k[],int m) m*32-bit initial state
122  void oct_init_by_entropy (void) random initial state
123  void oct_get_state (uint32_t save[MT_N+1]) saves state in array
124  void oct_set_state (uint32_t save[MT_N+1]) restores state from array
125  static uint32_t randmt (void) returns 32-bit unsigned int
126 
127  === inline generators ===
128  static uint32_t randi32 (void) returns 32-bit unsigned int
129  static uint64_t randi53 (void) returns 53-bit unsigned int
130  static uint64_t randi54 (void) returns 54-bit unsigned int
131  static float randu32 (void) returns 32-bit uniform in (0,1)
132  static double randu53 (void) returns 53-bit uniform in (0,1)
133 
134  double oct_randu (void) returns M-bit uniform in (0,1)
135  double oct_randn (void) returns M-bit standard normal
136  double oct_rande (void) returns N-bit standard exponential
137 
138  float oct_float_randu (void) returns M-bit uniform in (0,1)
139  float oct_float_randn (void) returns M-bit standard normal
140  float oct_float_rande (void) returns N-bit standard exponential
141 
142  === Array generators ===
143  void oct_fill_randi32 (octave_idx_type, uint32_t [])
144  void oct_fill_randi64 (octave_idx_type, uint64_t [])
145 
146  void oct_fill_randu (octave_idx_type, double [])
147  void oct_fill_randn (octave_idx_type, double [])
148  void oct_fill_rande (octave_idx_type, double [])
149 
150  void oct_fill_float_randu (octave_idx_type, float [])
151  void oct_fill_float_randn (octave_idx_type, float [])
152  void oct_fill_float_rande (octave_idx_type, float [])
153 */
154 
155 #if defined (HAVE_CONFIG_H)
156 #include <config.h>
157 #endif
158 
159 #include <stdio.h>
160 #include <time.h>
161 
162 #ifdef HAVE_GETTIMEOFDAY
163 #include <sys/time.h>
164 #endif
165 
166 #include "lo-math.h"
167 #include "randmtzig.h"
168 
169 /* FIXME may want to suppress X86 if sizeof(long) > 4 */
170 #if !defined (USE_X86_32)
171 # if defined (i386) || defined (HAVE_X86_32)
172 # define USE_X86_32 1
173 # else
174 # define USE_X86_32 0
175 # endif
176 #endif
177 
178 /* ===== Mersenne Twister 32-bit generator ===== */
179 
180 #define MT_M 397
181 #define MATRIX_A 0x9908b0dfUL /* constant vector a */
182 #define UMASK 0x80000000UL /* most significant w-r bits */
183 #define LMASK 0x7fffffffUL /* least significant r bits */
184 #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )
185 #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL))
186 
187 static uint32_t *next;
188 static uint32_t state[MT_N]; /* the array for the state vector */
189 static int left = 1;
190 static int initf = 0;
191 static int initt = 1;
192 static int inittf = 1;
193 
194 /* initializes state[MT_N] with a seed */
195 void
196 oct_init_by_int (uint32_t s)
197 {
198  int j;
199  state[0] = s & 0xffffffffUL;
200  for (j = 1; j < MT_N; j++)
201  {
202  state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j);
203  /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
204  /* In the previous versions, MSBs of the seed affect */
205  /* only MSBs of the array state[]. */
206  /* 2002/01/09 modified by Makoto Matsumoto */
207  state[j] &= 0xffffffffUL; /* for >32 bit machines */
208  }
209  left = 1;
210  initf = 1;
211 }
212 
213 /* initialize by an array with array-length */
214 /* init_key is the array for initializing keys */
215 /* key_length is its length */
216 void
217 oct_init_by_array (uint32_t *init_key, int key_length)
218 {
219  int i, j, k;
220  oct_init_by_int (19650218UL);
221  i = 1;
222  j = 0;
223  k = (MT_N > key_length ? MT_N : key_length);
224  for (; k; k--)
225  {
226  state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL))
227  + init_key[j] + j; /* non linear */
228  state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
229  i++;
230  j++;
231  if (i >= MT_N)
232  {
233  state[0] = state[MT_N-1];
234  i = 1;
235  }
236  if (j >= key_length)
237  j = 0;
238  }
239  for (k = MT_N - 1; k; k--)
240  {
241  state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL))
242  - i; /* non linear */
243  state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
244  i++;
245  if (i >= MT_N)
246  {
247  state[0] = state[MT_N-1];
248  i = 1;
249  }
250  }
251 
252  state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
253  left = 1;
254  initf = 1;
255 }
256 
257 void
259 {
260  uint32_t entropy[MT_N];
261  int n = 0;
262 
263  /* Look for entropy in /dev/urandom */
264  FILE* urandom = fopen ("/dev/urandom", "rb");
265  if (urandom)
266  {
267  while (n < MT_N)
268  {
269  unsigned char word[4];
270  if (fread (word, 4, 1, urandom) != 1)
271  break;
272  entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+((uint32_t)word[3]<<24);
273  }
274  fclose (urandom);
275  }
276 
277  /* If there isn't enough entropy, gather some from various sources */
278  if (n < MT_N)
279  entropy[n++] = time (NULL); /* Current time in seconds */
280  if (n < MT_N)
281  entropy[n++] = clock (); /* CPU time used (usec) */
282 #ifdef HAVE_GETTIMEOFDAY
283  if (n < MT_N)
284  {
285  struct timeval tv;
286  if (gettimeofday (&tv, NULL) != -1)
287  entropy[n++] = tv.tv_usec; /* Fractional part of current time */
288  }
289 #endif
290  /* Send all the entropy into the initial state vector */
291  oct_init_by_array (entropy,n);
292 }
293 
294 void
295 oct_set_state (uint32_t *save)
296 {
297  int i;
298  for (i = 0; i < MT_N; i++)
299  state[i] = save[i];
300  left = save[MT_N];
301  next = state + (MT_N - left + 1);
302 }
303 
304 void
305 oct_get_state (uint32_t *save)
306 {
307  int i;
308  for (i = 0; i < MT_N; i++)
309  save[i] = state[i];
310  save[MT_N] = left;
311 }
312 
313 static void
315 {
316  uint32_t *p = state;
317  int j;
318 
319  /* if init_by_int() has not been called, */
320  /* a default initial seed is used */
321  /* if (initf==0) init_by_int(5489UL); */
322  /* Or better yet, a random seed! */
323  if (initf == 0)
325 
326  left = MT_N;
327  next = state;
328 
329  for (j = MT_N - MT_M + 1; --j; p++)
330  *p = p[MT_M] ^ TWIST(p[0], p[1]);
331 
332  for (j = MT_M; --j; p++)
333  *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]);
334 
335  *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]);
336 }
337 
338 /* generates a random number on [0,0xffffffff]-interval */
339 static uint32_t
340 randmt (void)
341 {
342  register uint32_t y;
343 
344  if (--left == 0)
345  next_state ();
346  y = *next++;
347 
348  /* Tempering */
349  y ^= (y >> 11);
350  y ^= (y << 7) & 0x9d2c5680UL;
351  y ^= (y << 15) & 0xefc60000UL;
352  return (y ^ (y >> 18));
353 }
354 
355 /* ===== Uniform generators ===== */
356 
357 /* Select which 32 bit generator to use */
358 #define randi32 randmt
359 
360 static uint64_t
361 randi53 (void)
362 {
363  const uint32_t lo = randi32 ();
364  const uint32_t hi = randi32 ()&0x1FFFFF;
365 #if HAVE_X86_32
366  uint64_t u;
367  uint32_t *p = (uint32_t *)&u;
368  p[0] = lo;
369  p[1] = hi;
370  return u;
371 #else
372  return (((uint64_t)hi<<32)|lo);
373 #endif
374 }
375 
376 static uint64_t
377 randi54 (void)
378 {
379  const uint32_t lo = randi32 ();
380  const uint32_t hi = randi32 ()&0x3FFFFF;
381 #if HAVE_X86_32
382  uint64_t u;
383  uint32_t *p = (uint32_t *)&u;
384  p[0] = lo;
385  p[1] = hi;
386  return u;
387 #else
388  return (((uint64_t)hi<<32)|lo);
389 #endif
390 }
391 
392 /* generates a random number on (0,1)-real-interval */
393 static float
394 randu32 (void)
395 {
396  return ((float)randi32 () + 0.5) * (1.0/4294967296.0);
397  /* divided by 2^32 */
398 }
399 
400 /* generates a random number on (0,1) with 53-bit resolution */
401 static double
402 randu53 (void)
403 {
404  const uint32_t a = randi32 ()>>5;
405  const uint32_t b = randi32 ()>>6;
406  return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0);
407 }
408 
409 /* Determine mantissa for uniform doubles */
410 double
411 oct_randu (void)
412 {
413  return randu53 ();
414 }
415 
416 /* Determine mantissa for uniform floats */
417 float
419 {
420  return randu32 ();
421 }
422 
423 /* ===== Ziggurat normal and exponential generators ===== */
424 
425 #define ZIGGURAT_TABLE_SIZE 256
426 
427 #define ZIGGURAT_NOR_R 3.6541528853610088
428 #define ZIGGURAT_NOR_INV_R 0.27366123732975828
429 #define NOR_SECTION_AREA 0.00492867323399
430 
431 #define ZIGGURAT_EXP_R 7.69711747013104972
432 #define ZIGGURAT_EXP_INV_R 0.129918765548341586
433 #define EXP_SECTION_AREA 0.0039496598225815571993
434 
435 #define ZIGINT uint64_t
436 #define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */
437 #define ERANDI randi53() /* 53 bits for mantissa */
438 #define NMANTISSA EMANTISSA
439 #define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */
440 #define RANDU randu53()
441 
446 
447 /*
448 This code is based on the paper Marsaglia and Tsang, "The ziggurat method
449 for generating random variables", Journ. Statistical Software. Code was
450 presented in this paper for a Ziggurat of 127 levels and using a 32 bit
451 integer random number generator. This version of the code, uses the
452 Mersenne Twister as the integer generator and uses 256 levels in the
453 Ziggurat. This has several advantages.
454 
455  1) As Marsaglia and Tsang themselves states, the more levels the few
456  times the expensive tail algorithm must be called
457  2) The cycle time of the generator is determined by the integer
458  generator, thus the use of a Mersenne Twister for the core random
459  generator makes this cycle extremely long.
460  3) The license on the original code was unclear, thus rewriting the code
461  from the article means we are free of copyright issues.
462  4) Compile flag for full 53-bit random mantissa.
463 
464 It should be stated that the authors made my life easier, by the fact that
465 the algorithm developed in the text of the article is for a 256 level
466 ziggurat, even if the code itself isn't...
467 
468 One modification to the algorithm developed in the article, is that it is
469 assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in
470 terms like 2^32 in the code. As the normal distribution is defined between
471 -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus
472 in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for
473 this term. The exponential distribution is one sided so we use the
474 full 32 bits. We use EMANTISSA for this term.
475 
476 It appears that I'm slightly slower than the code in the article, this
477 is partially due to a better generator of random integers than they
478 use. But might also be that the case of rapid return was optimized by
479 inlining the relevant code with a #define. As the basic Mersenne
480 Twister is only 25% faster than this code I suspect that the main
481 reason is just the use of the Mersenne Twister and not the inlining,
482 so I'm not going to try and optimize further.
483 */
484 
485 static void
487 {
488  int i;
489  double x, x1;
490 
491  /* Ziggurat tables for the normal distribution */
492  x1 = ZIGGURAT_NOR_R;
493  wi[255] = x1 / NMANTISSA;
494  fi[255] = exp (-0.5 * x1 * x1);
495 
496  /* Index zero is special for tail strip, where Marsaglia and Tsang
497  * defines this as
498  * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
499  * where v is the area of each strip of the ziggurat.
500  */
501  ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA);
502  wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA;
503  fi[0] = 1.;
504 
505  for (i = 254; i > 0; i--)
506  {
507  /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
508  * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
509  */
510  x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + fi[i+1]));
511  ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
512  wi[i] = x / NMANTISSA;
513  fi[i] = exp (-0.5 * x * x);
514  x1 = x;
515  }
516 
517  ki[1] = 0;
518 
519  /* Zigurrat tables for the exponential distribution */
520  x1 = ZIGGURAT_EXP_R;
521  we[255] = x1 / EMANTISSA;
522  fe[255] = exp (-x1);
523 
524  /* Index zero is special for tail strip, where Marsaglia and Tsang
525  * defines this as
526  * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
527  * where v is the area of each strip of the ziggurat.
528  */
529  ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA);
530  we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA;
531  fe[0] = 1.;
532 
533  for (i = 254; i > 0; i--)
534  {
535  /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
536  * need inverse operator of y = exp(-x) -> x = -ln(y)
537  */
538  x = - log (EXP_SECTION_AREA / x1 + fe[i+1]);
539  ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
540  we[i] = x / EMANTISSA;
541  fe[i] = exp (-x);
542  x1 = x;
543  }
544  ke[1] = 0;
545 
546  initt = 0;
547 }
548 
549 /*
550  * Here is the guts of the algorithm. As Marsaglia and Tsang state the
551  * algorithm in their paper
552  *
553  * 1) Calculate a random signed integer j and let i be the index
554  * provided by the rightmost 8-bits of j
555  * 2) Set x = j * w_i. If j < k_i return x
556  * 3) If i = 0, then return x from the tail
557  * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
558  * 5) goto step 1
559  *
560  * Where f is the functional form of the distribution, which for a normal
561  * distribution is exp(-0.5*x*x)
562  */
563 
564 double
565 oct_randn (void)
566 {
567  if (initt)
569 
570  while (1)
571  {
572  /* The following code is specialized for 32-bit mantissa.
573  * Compared to the arbitrary mantissa code, there is a performance
574  * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40%
575  * There is a bigger performance gain compared to using a full
576  * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240%
577  * Of course, different compilers and operating systems may
578  * have something to do with this.
579  */
580 # if HAVE_X86_32
581  /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */
582  double x;
583  int si,idx;
584  register uint32_t lo, hi;
585  int64_t rabs;
586  uint32_t *p = (uint32_t *)&rabs;
587  lo = randi32 ();
588  idx = lo&0xFF;
589  hi = randi32 ();
590  si = hi&UMASK;
591  p[0] = lo;
592  p[1] = hi&0x1FFFFF;
593  x = ( si ? -rabs : rabs ) * wi[idx];
594 # else /* !HAVE_X86_32 */
595  /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */
596  const uint64_t r = NRANDI;
597  const int64_t rabs = r>>1;
598  const int idx = (int)(rabs&0xFF);
599  const double x = ( r&1 ? -rabs : rabs) * wi[idx];
600 # endif /* !HAVE_X86_32 */
601  if (rabs < (int64_t)ki[idx])
602  return x; /* 99.3% of the time we return here 1st try */
603  else if (idx == 0)
604  {
605  /* As stated in Marsaglia and Tsang
606  *
607  * For the normal tail, the method of Marsaglia[5] provides:
608  * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
609  * then return r+x. Except that r+x is always in the positive
610  * tail!!!! Any thing random might be used to determine the
611  * sign, but as we already have r we might as well use it
612  *
613  * [PAK] but not the bottom 8 bits, since they are all 0 here!
614  */
615  double xx, yy;
616  do
617  {
618  xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
619  yy = - log (RANDU);
620  }
621  while ( yy+yy <= xx*xx);
622  return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
623  }
624  else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp (-0.5*x*x))
625  return x;
626  }
627 }
628 
629 double
630 oct_rande (void)
631 {
632  if (initt)
634 
635  while (1)
636  {
637  ZIGINT ri = ERANDI;
638  const int idx = (int)(ri & 0xFF);
639  const double x = ri * we[idx];
640  if (ri < ke[idx])
641  return x; /* 98.9% of the time we return here 1st try */
642  else if (idx == 0)
643  {
644  /* As stated in Marsaglia and Tsang
645  *
646  * For the exponential tail, the method of Marsaglia[5] provides:
647  * x = r - ln(U);
648  */
649  return ZIGGURAT_EXP_R - log (RANDU);
650  }
651  else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp (-x))
652  return x;
653  }
654 }
655 
656 #undef ZIGINT
657 #undef EMANTISSA
658 #undef ERANDI
659 #undef NMANTISSA
660 #undef NRANDI
661 #undef RANDU
662 
663 #define ZIGINT uint32_t
664 #define EMANTISSA 4294967296.0 /* 32 bit mantissa */
665 #define ERANDI randi32() /* 32 bits for mantissa */
666 #define NMANTISSA 2147483648.0 /* 31 bit mantissa */
667 #define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */
668 #define RANDU randu32()
669 
674 
675 
676 static void
678 {
679  int i;
680  float x, x1;
681 
682  /* Ziggurat tables for the normal distribution */
683  x1 = ZIGGURAT_NOR_R;
684  fwi[255] = x1 / NMANTISSA;
685  ffi[255] = exp (-0.5 * x1 * x1);
686 
687  /* Index zero is special for tail strip, where Marsaglia and Tsang
688  * defines this as
689  * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
690  * where v is the area of each strip of the ziggurat.
691  */
692  fki[0] = (ZIGINT) (x1 * ffi[255] / NOR_SECTION_AREA * NMANTISSA);
693  fwi[0] = NOR_SECTION_AREA / ffi[255] / NMANTISSA;
694  ffi[0] = 1.;
695 
696  for (i = 254; i > 0; i--)
697  {
698  /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
699  * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
700  */
701  x = sqrt (-2. * log (NOR_SECTION_AREA / x1 + ffi[i+1]));
702  fki[i+1] = (ZIGINT)(x / x1 * NMANTISSA);
703  fwi[i] = x / NMANTISSA;
704  ffi[i] = exp (-0.5 * x * x);
705  x1 = x;
706  }
707 
708  fki[1] = 0;
709 
710  /* Zigurrat tables for the exponential distribution */
711  x1 = ZIGGURAT_EXP_R;
712  fwe[255] = x1 / EMANTISSA;
713  ffe[255] = exp (-x1);
714 
715  /* Index zero is special for tail strip, where Marsaglia and Tsang
716  * defines this as
717  * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
718  * where v is the area of each strip of the ziggurat.
719  */
720  fke[0] = (ZIGINT) (x1 * ffe[255] / EXP_SECTION_AREA * EMANTISSA);
721  fwe[0] = EXP_SECTION_AREA / ffe[255] / EMANTISSA;
722  ffe[0] = 1.;
723 
724  for (i = 254; i > 0; i--)
725  {
726  /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
727  * need inverse operator of y = exp(-x) -> x = -ln(y)
728  */
729  x = - log (EXP_SECTION_AREA / x1 + ffe[i+1]);
730  fke[i+1] = (ZIGINT)(x / x1 * EMANTISSA);
731  fwe[i] = x / EMANTISSA;
732  ffe[i] = exp (-x);
733  x1 = x;
734  }
735  fke[1] = 0;
736 
737  inittf = 0;
738 }
739 
740 /*
741  * Here is the guts of the algorithm. As Marsaglia and Tsang state the
742  * algorithm in their paper
743  *
744  * 1) Calculate a random signed integer j and let i be the index
745  * provided by the rightmost 8-bits of j
746  * 2) Set x = j * w_i. If j < k_i return x
747  * 3) If i = 0, then return x from the tail
748  * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x
749  * 5) goto step 1
750  *
751  * Where f is the functional form of the distribution, which for a normal
752  * distribution is exp(-0.5*x*x)
753  */
754 
755 float
757 {
758  if (inittf)
760 
761  while (1)
762  {
763  /* 32-bit mantissa */
764  const uint32_t r = randi32 ();
765  const uint32_t rabs = r&LMASK;
766  const int idx = (int)(r&0xFF);
767  const float x = ((int32_t)r) * fwi[idx];
768  if (rabs < fki[idx])
769  return x; /* 99.3% of the time we return here 1st try */
770  else if (idx == 0)
771  {
772  /* As stated in Marsaglia and Tsang
773  *
774  * For the normal tail, the method of Marsaglia[5] provides:
775  * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x,
776  * then return r+x. Except that r+x is always in the positive
777  * tail!!!! Any thing random might be used to determine the
778  * sign, but as we already have r we might as well use it
779  *
780  * [PAK] but not the bottom 8 bits, since they are all 0 here!
781  */
782  float xx, yy;
783  do
784  {
785  xx = - ZIGGURAT_NOR_INV_R * log (RANDU);
786  yy = - log (RANDU);
787  }
788  while ( yy+yy <= xx*xx);
789  return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx);
790  }
791  else if ((ffi[idx-1] - ffi[idx]) * RANDU + ffi[idx] < exp (-0.5*x*x))
792  return x;
793  }
794 }
795 
796 float
798 {
799  if (inittf)
801 
802  while (1)
803  {
804  ZIGINT ri = ERANDI;
805  const int idx = (int)(ri & 0xFF);
806  const float x = ri * fwe[idx];
807  if (ri < fke[idx])
808  return x; /* 98.9% of the time we return here 1st try */
809  else if (idx == 0)
810  {
811  /* As stated in Marsaglia and Tsang
812  *
813  * For the exponential tail, the method of Marsaglia[5] provides:
814  * x = r - ln(U);
815  */
816  return ZIGGURAT_EXP_R - log (RANDU);
817  }
818  else if ((ffe[idx-1] - ffe[idx]) * RANDU + ffe[idx] < exp (-x))
819  return x;
820  }
821 }
822 
823 /* Array generators */
824 void
826 {
827  octave_idx_type i;
828  for (i = 0; i < n; i++)
829  p[i] = oct_randu ();
830 }
831 
832 void
834 {
835  octave_idx_type i;
836  for (i = 0; i < n; i++)
837  p[i] = oct_randn ();
838 }
839 
840 void
842 {
843  octave_idx_type i;
844  for (i = 0; i < n; i++)
845  p[i] = oct_rande ();
846 }
847 
848 void
850 {
851  octave_idx_type i;
852  for (i = 0; i < n; i++)
853  p[i] = oct_float_randu ();
854 }
855 
856 void
858 {
859  octave_idx_type i;
860  for (i = 0; i < n; i++)
861  p[i] = oct_float_randn ();
862 }
863 
864 void
866 {
867  octave_idx_type i;
868  for (i = 0; i < n; i++)
869  p[i] = oct_float_rande ();
870 }