1*8e33eff8Schristos /* 2*8e33eff8Schristos * This file derives from SFMT 1.3.3 3*8e33eff8Schristos * (http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/index.html), which was 4*8e33eff8Schristos * released under the terms of the following license: 5*8e33eff8Schristos * 6*8e33eff8Schristos * Copyright (c) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima 7*8e33eff8Schristos * University. All rights reserved. 8*8e33eff8Schristos * 9*8e33eff8Schristos * Redistribution and use in source and binary forms, with or without 10*8e33eff8Schristos * modification, are permitted provided that the following conditions are 11*8e33eff8Schristos * met: 12*8e33eff8Schristos * 13*8e33eff8Schristos * * Redistributions of source code must retain the above copyright 14*8e33eff8Schristos * notice, this list of conditions and the following disclaimer. 15*8e33eff8Schristos * * Redistributions in binary form must reproduce the above 16*8e33eff8Schristos * copyright notice, this list of conditions and the following 17*8e33eff8Schristos * disclaimer in the documentation and/or other materials provided 18*8e33eff8Schristos * with the distribution. 19*8e33eff8Schristos * * Neither the name of the Hiroshima University nor the names of 20*8e33eff8Schristos * its contributors may be used to endorse or promote products 21*8e33eff8Schristos * derived from this software without specific prior written 22*8e33eff8Schristos * permission. 23*8e33eff8Schristos * 24*8e33eff8Schristos * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25*8e33eff8Schristos * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26*8e33eff8Schristos * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27*8e33eff8Schristos * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28*8e33eff8Schristos * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29*8e33eff8Schristos * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 30*8e33eff8Schristos * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31*8e33eff8Schristos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32*8e33eff8Schristos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33*8e33eff8Schristos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 34*8e33eff8Schristos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35*8e33eff8Schristos */ 36*8e33eff8Schristos /** 37*8e33eff8Schristos * @file SFMT.c 38*8e33eff8Schristos * @brief SIMD oriented Fast Mersenne Twister(SFMT) 39*8e33eff8Schristos * 40*8e33eff8Schristos * @author Mutsuo Saito (Hiroshima University) 41*8e33eff8Schristos * @author Makoto Matsumoto (Hiroshima University) 42*8e33eff8Schristos * 43*8e33eff8Schristos * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima 44*8e33eff8Schristos * University. All rights reserved. 45*8e33eff8Schristos * 46*8e33eff8Schristos * The new BSD License is applied to this software, see LICENSE.txt 47*8e33eff8Schristos */ 48*8e33eff8Schristos #define SFMT_C_ 49*8e33eff8Schristos #include "test/jemalloc_test.h" 50*8e33eff8Schristos #include "test/SFMT-params.h" 51*8e33eff8Schristos 52*8e33eff8Schristos #if defined(JEMALLOC_BIG_ENDIAN) && !defined(BIG_ENDIAN64) 53*8e33eff8Schristos #define BIG_ENDIAN64 1 54*8e33eff8Schristos #endif 55*8e33eff8Schristos #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64) 56*8e33eff8Schristos #define BIG_ENDIAN64 1 57*8e33eff8Schristos #endif 58*8e33eff8Schristos #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64) 59*8e33eff8Schristos #define BIG_ENDIAN64 1 60*8e33eff8Schristos #endif 61*8e33eff8Schristos #if defined(ONLY64) && !defined(BIG_ENDIAN64) 62*8e33eff8Schristos #if defined(__GNUC__) 63*8e33eff8Schristos #error "-DONLY64 must be specified with -DBIG_ENDIAN64" 64*8e33eff8Schristos #endif 65*8e33eff8Schristos #undef ONLY64 66*8e33eff8Schristos #endif 67*8e33eff8Schristos /*------------------------------------------------------ 68*8e33eff8Schristos 128-bit SIMD data type for Altivec, SSE2 or standard C 69*8e33eff8Schristos ------------------------------------------------------*/ 70*8e33eff8Schristos #if defined(HAVE_ALTIVEC) 71*8e33eff8Schristos /** 128-bit data structure */ 72*8e33eff8Schristos union W128_T { 73*8e33eff8Schristos vector unsigned int s; 74*8e33eff8Schristos uint32_t u[4]; 75*8e33eff8Schristos }; 76*8e33eff8Schristos /** 128-bit data type */ 77*8e33eff8Schristos typedef union W128_T w128_t; 78*8e33eff8Schristos 79*8e33eff8Schristos #elif defined(HAVE_SSE2) 80*8e33eff8Schristos /** 128-bit data structure */ 81*8e33eff8Schristos union W128_T { 82*8e33eff8Schristos __m128i si; 83*8e33eff8Schristos uint32_t u[4]; 84*8e33eff8Schristos }; 85*8e33eff8Schristos /** 128-bit data type */ 86*8e33eff8Schristos typedef union W128_T w128_t; 87*8e33eff8Schristos 88*8e33eff8Schristos #else 89*8e33eff8Schristos 90*8e33eff8Schristos /** 128-bit data structure */ 91*8e33eff8Schristos struct W128_T { 92*8e33eff8Schristos uint32_t u[4]; 93*8e33eff8Schristos }; 94*8e33eff8Schristos /** 128-bit data type */ 95*8e33eff8Schristos typedef struct W128_T w128_t; 96*8e33eff8Schristos 97*8e33eff8Schristos #endif 98*8e33eff8Schristos 99*8e33eff8Schristos struct sfmt_s { 100*8e33eff8Schristos /** the 128-bit internal state array */ 101*8e33eff8Schristos w128_t sfmt[N]; 102*8e33eff8Schristos /** index counter to the 32-bit internal state array */ 103*8e33eff8Schristos int idx; 104*8e33eff8Schristos /** a flag: it is 0 if and only if the internal state is not yet 105*8e33eff8Schristos * initialized. */ 106*8e33eff8Schristos int initialized; 107*8e33eff8Schristos }; 108*8e33eff8Schristos 109*8e33eff8Schristos /*-------------------------------------- 110*8e33eff8Schristos FILE GLOBAL VARIABLES 111*8e33eff8Schristos internal state, index counter and flag 112*8e33eff8Schristos --------------------------------------*/ 113*8e33eff8Schristos 114*8e33eff8Schristos /** a parity check vector which certificate the period of 2^{MEXP} */ 115*8e33eff8Schristos static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4}; 116*8e33eff8Schristos 117*8e33eff8Schristos /*---------------- 118*8e33eff8Schristos STATIC FUNCTIONS 119*8e33eff8Schristos ----------------*/ 120*8e33eff8Schristos static inline int idxof(int i); 121*8e33eff8Schristos #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) 122*8e33eff8Schristos static inline void rshift128(w128_t *out, w128_t const *in, int shift); 123*8e33eff8Schristos static inline void lshift128(w128_t *out, w128_t const *in, int shift); 124*8e33eff8Schristos #endif 125*8e33eff8Schristos static inline void gen_rand_all(sfmt_t *ctx); 126*8e33eff8Schristos static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size); 127*8e33eff8Schristos static inline uint32_t func1(uint32_t x); 128*8e33eff8Schristos static inline uint32_t func2(uint32_t x); 129*8e33eff8Schristos static void period_certification(sfmt_t *ctx); 130*8e33eff8Schristos #if defined(BIG_ENDIAN64) && !defined(ONLY64) 131*8e33eff8Schristos static inline void swap(w128_t *array, int size); 132*8e33eff8Schristos #endif 133*8e33eff8Schristos 134*8e33eff8Schristos #if defined(HAVE_ALTIVEC) 135*8e33eff8Schristos #include "test/SFMT-alti.h" 136*8e33eff8Schristos #elif defined(HAVE_SSE2) 137*8e33eff8Schristos #include "test/SFMT-sse2.h" 138*8e33eff8Schristos #endif 139*8e33eff8Schristos 140*8e33eff8Schristos /** 141*8e33eff8Schristos * This function simulate a 64-bit index of LITTLE ENDIAN 142*8e33eff8Schristos * in BIG ENDIAN machine. 143*8e33eff8Schristos */ 144*8e33eff8Schristos #ifdef ONLY64 145*8e33eff8Schristos static inline int idxof(int i) { 146*8e33eff8Schristos return i ^ 1; 147*8e33eff8Schristos } 148*8e33eff8Schristos #else 149*8e33eff8Schristos static inline int idxof(int i) { 150*8e33eff8Schristos return i; 151*8e33eff8Schristos } 152*8e33eff8Schristos #endif 153*8e33eff8Schristos /** 154*8e33eff8Schristos * This function simulates SIMD 128-bit right shift by the standard C. 155*8e33eff8Schristos * The 128-bit integer given in in is shifted by (shift * 8) bits. 156*8e33eff8Schristos * This function simulates the LITTLE ENDIAN SIMD. 157*8e33eff8Schristos * @param out the output of this function 158*8e33eff8Schristos * @param in the 128-bit data to be shifted 159*8e33eff8Schristos * @param shift the shift value 160*8e33eff8Schristos */ 161*8e33eff8Schristos #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) 162*8e33eff8Schristos #ifdef ONLY64 163*8e33eff8Schristos static inline void rshift128(w128_t *out, w128_t const *in, int shift) { 164*8e33eff8Schristos uint64_t th, tl, oh, ol; 165*8e33eff8Schristos 166*8e33eff8Schristos th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); 167*8e33eff8Schristos tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); 168*8e33eff8Schristos 169*8e33eff8Schristos oh = th >> (shift * 8); 170*8e33eff8Schristos ol = tl >> (shift * 8); 171*8e33eff8Schristos ol |= th << (64 - shift * 8); 172*8e33eff8Schristos out->u[0] = (uint32_t)(ol >> 32); 173*8e33eff8Schristos out->u[1] = (uint32_t)ol; 174*8e33eff8Schristos out->u[2] = (uint32_t)(oh >> 32); 175*8e33eff8Schristos out->u[3] = (uint32_t)oh; 176*8e33eff8Schristos } 177*8e33eff8Schristos #else 178*8e33eff8Schristos static inline void rshift128(w128_t *out, w128_t const *in, int shift) { 179*8e33eff8Schristos uint64_t th, tl, oh, ol; 180*8e33eff8Schristos 181*8e33eff8Schristos th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); 182*8e33eff8Schristos tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); 183*8e33eff8Schristos 184*8e33eff8Schristos oh = th >> (shift * 8); 185*8e33eff8Schristos ol = tl >> (shift * 8); 186*8e33eff8Schristos ol |= th << (64 - shift * 8); 187*8e33eff8Schristos out->u[1] = (uint32_t)(ol >> 32); 188*8e33eff8Schristos out->u[0] = (uint32_t)ol; 189*8e33eff8Schristos out->u[3] = (uint32_t)(oh >> 32); 190*8e33eff8Schristos out->u[2] = (uint32_t)oh; 191*8e33eff8Schristos } 192*8e33eff8Schristos #endif 193*8e33eff8Schristos /** 194*8e33eff8Schristos * This function simulates SIMD 128-bit left shift by the standard C. 195*8e33eff8Schristos * The 128-bit integer given in in is shifted by (shift * 8) bits. 196*8e33eff8Schristos * This function simulates the LITTLE ENDIAN SIMD. 197*8e33eff8Schristos * @param out the output of this function 198*8e33eff8Schristos * @param in the 128-bit data to be shifted 199*8e33eff8Schristos * @param shift the shift value 200*8e33eff8Schristos */ 201*8e33eff8Schristos #ifdef ONLY64 202*8e33eff8Schristos static inline void lshift128(w128_t *out, w128_t const *in, int shift) { 203*8e33eff8Schristos uint64_t th, tl, oh, ol; 204*8e33eff8Schristos 205*8e33eff8Schristos th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]); 206*8e33eff8Schristos tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]); 207*8e33eff8Schristos 208*8e33eff8Schristos oh = th << (shift * 8); 209*8e33eff8Schristos ol = tl << (shift * 8); 210*8e33eff8Schristos oh |= tl >> (64 - shift * 8); 211*8e33eff8Schristos out->u[0] = (uint32_t)(ol >> 32); 212*8e33eff8Schristos out->u[1] = (uint32_t)ol; 213*8e33eff8Schristos out->u[2] = (uint32_t)(oh >> 32); 214*8e33eff8Schristos out->u[3] = (uint32_t)oh; 215*8e33eff8Schristos } 216*8e33eff8Schristos #else 217*8e33eff8Schristos static inline void lshift128(w128_t *out, w128_t const *in, int shift) { 218*8e33eff8Schristos uint64_t th, tl, oh, ol; 219*8e33eff8Schristos 220*8e33eff8Schristos th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]); 221*8e33eff8Schristos tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]); 222*8e33eff8Schristos 223*8e33eff8Schristos oh = th << (shift * 8); 224*8e33eff8Schristos ol = tl << (shift * 8); 225*8e33eff8Schristos oh |= tl >> (64 - shift * 8); 226*8e33eff8Schristos out->u[1] = (uint32_t)(ol >> 32); 227*8e33eff8Schristos out->u[0] = (uint32_t)ol; 228*8e33eff8Schristos out->u[3] = (uint32_t)(oh >> 32); 229*8e33eff8Schristos out->u[2] = (uint32_t)oh; 230*8e33eff8Schristos } 231*8e33eff8Schristos #endif 232*8e33eff8Schristos #endif 233*8e33eff8Schristos 234*8e33eff8Schristos /** 235*8e33eff8Schristos * This function represents the recursion formula. 236*8e33eff8Schristos * @param r output 237*8e33eff8Schristos * @param a a 128-bit part of the internal state array 238*8e33eff8Schristos * @param b a 128-bit part of the internal state array 239*8e33eff8Schristos * @param c a 128-bit part of the internal state array 240*8e33eff8Schristos * @param d a 128-bit part of the internal state array 241*8e33eff8Schristos */ 242*8e33eff8Schristos #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) 243*8e33eff8Schristos #ifdef ONLY64 244*8e33eff8Schristos static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, 245*8e33eff8Schristos w128_t *d) { 246*8e33eff8Schristos w128_t x; 247*8e33eff8Schristos w128_t y; 248*8e33eff8Schristos 249*8e33eff8Schristos lshift128(&x, a, SL2); 250*8e33eff8Schristos rshift128(&y, c, SR2); 251*8e33eff8Schristos r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] 252*8e33eff8Schristos ^ (d->u[0] << SL1); 253*8e33eff8Schristos r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] 254*8e33eff8Schristos ^ (d->u[1] << SL1); 255*8e33eff8Schristos r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] 256*8e33eff8Schristos ^ (d->u[2] << SL1); 257*8e33eff8Schristos r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] 258*8e33eff8Schristos ^ (d->u[3] << SL1); 259*8e33eff8Schristos } 260*8e33eff8Schristos #else 261*8e33eff8Schristos static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, 262*8e33eff8Schristos w128_t *d) { 263*8e33eff8Schristos w128_t x; 264*8e33eff8Schristos w128_t y; 265*8e33eff8Schristos 266*8e33eff8Schristos lshift128(&x, a, SL2); 267*8e33eff8Schristos rshift128(&y, c, SR2); 268*8e33eff8Schristos r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] 269*8e33eff8Schristos ^ (d->u[0] << SL1); 270*8e33eff8Schristos r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] 271*8e33eff8Schristos ^ (d->u[1] << SL1); 272*8e33eff8Schristos r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] 273*8e33eff8Schristos ^ (d->u[2] << SL1); 274*8e33eff8Schristos r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] 275*8e33eff8Schristos ^ (d->u[3] << SL1); 276*8e33eff8Schristos } 277*8e33eff8Schristos #endif 278*8e33eff8Schristos #endif 279*8e33eff8Schristos 280*8e33eff8Schristos #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2)) 281*8e33eff8Schristos /** 282*8e33eff8Schristos * This function fills the internal state array with pseudorandom 283*8e33eff8Schristos * integers. 284*8e33eff8Schristos */ 285*8e33eff8Schristos static inline void gen_rand_all(sfmt_t *ctx) { 286*8e33eff8Schristos int i; 287*8e33eff8Schristos w128_t *r1, *r2; 288*8e33eff8Schristos 289*8e33eff8Schristos r1 = &ctx->sfmt[N - 2]; 290*8e33eff8Schristos r2 = &ctx->sfmt[N - 1]; 291*8e33eff8Schristos for (i = 0; i < N - POS1; i++) { 292*8e33eff8Schristos do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, 293*8e33eff8Schristos r2); 294*8e33eff8Schristos r1 = r2; 295*8e33eff8Schristos r2 = &ctx->sfmt[i]; 296*8e33eff8Schristos } 297*8e33eff8Schristos for (; i < N; i++) { 298*8e33eff8Schristos do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1 - N], r1, 299*8e33eff8Schristos r2); 300*8e33eff8Schristos r1 = r2; 301*8e33eff8Schristos r2 = &ctx->sfmt[i]; 302*8e33eff8Schristos } 303*8e33eff8Schristos } 304*8e33eff8Schristos 305*8e33eff8Schristos /** 306*8e33eff8Schristos * This function fills the user-specified array with pseudorandom 307*8e33eff8Schristos * integers. 308*8e33eff8Schristos * 309*8e33eff8Schristos * @param array an 128-bit array to be filled by pseudorandom numbers. 310*8e33eff8Schristos * @param size number of 128-bit pseudorandom numbers to be generated. 311*8e33eff8Schristos */ 312*8e33eff8Schristos static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) { 313*8e33eff8Schristos int i, j; 314*8e33eff8Schristos w128_t *r1, *r2; 315*8e33eff8Schristos 316*8e33eff8Schristos r1 = &ctx->sfmt[N - 2]; 317*8e33eff8Schristos r2 = &ctx->sfmt[N - 1]; 318*8e33eff8Schristos for (i = 0; i < N - POS1; i++) { 319*8e33eff8Schristos do_recursion(&array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2); 320*8e33eff8Schristos r1 = r2; 321*8e33eff8Schristos r2 = &array[i]; 322*8e33eff8Schristos } 323*8e33eff8Schristos for (; i < N; i++) { 324*8e33eff8Schristos do_recursion(&array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2); 325*8e33eff8Schristos r1 = r2; 326*8e33eff8Schristos r2 = &array[i]; 327*8e33eff8Schristos } 328*8e33eff8Schristos for (; i < size - N; i++) { 329*8e33eff8Schristos do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); 330*8e33eff8Schristos r1 = r2; 331*8e33eff8Schristos r2 = &array[i]; 332*8e33eff8Schristos } 333*8e33eff8Schristos for (j = 0; j < 2 * N - size; j++) { 334*8e33eff8Schristos ctx->sfmt[j] = array[j + size - N]; 335*8e33eff8Schristos } 336*8e33eff8Schristos for (; i < size; i++, j++) { 337*8e33eff8Schristos do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2); 338*8e33eff8Schristos r1 = r2; 339*8e33eff8Schristos r2 = &array[i]; 340*8e33eff8Schristos ctx->sfmt[j] = array[i]; 341*8e33eff8Schristos } 342*8e33eff8Schristos } 343*8e33eff8Schristos #endif 344*8e33eff8Schristos 345*8e33eff8Schristos #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC) 346*8e33eff8Schristos static inline void swap(w128_t *array, int size) { 347*8e33eff8Schristos int i; 348*8e33eff8Schristos uint32_t x, y; 349*8e33eff8Schristos 350*8e33eff8Schristos for (i = 0; i < size; i++) { 351*8e33eff8Schristos x = array[i].u[0]; 352*8e33eff8Schristos y = array[i].u[2]; 353*8e33eff8Schristos array[i].u[0] = array[i].u[1]; 354*8e33eff8Schristos array[i].u[2] = array[i].u[3]; 355*8e33eff8Schristos array[i].u[1] = x; 356*8e33eff8Schristos array[i].u[3] = y; 357*8e33eff8Schristos } 358*8e33eff8Schristos } 359*8e33eff8Schristos #endif 360*8e33eff8Schristos /** 361*8e33eff8Schristos * This function represents a function used in the initialization 362*8e33eff8Schristos * by init_by_array 363*8e33eff8Schristos * @param x 32-bit integer 364*8e33eff8Schristos * @return 32-bit integer 365*8e33eff8Schristos */ 366*8e33eff8Schristos static uint32_t func1(uint32_t x) { 367*8e33eff8Schristos return (x ^ (x >> 27)) * (uint32_t)1664525UL; 368*8e33eff8Schristos } 369*8e33eff8Schristos 370*8e33eff8Schristos /** 371*8e33eff8Schristos * This function represents a function used in the initialization 372*8e33eff8Schristos * by init_by_array 373*8e33eff8Schristos * @param x 32-bit integer 374*8e33eff8Schristos * @return 32-bit integer 375*8e33eff8Schristos */ 376*8e33eff8Schristos static uint32_t func2(uint32_t x) { 377*8e33eff8Schristos return (x ^ (x >> 27)) * (uint32_t)1566083941UL; 378*8e33eff8Schristos } 379*8e33eff8Schristos 380*8e33eff8Schristos /** 381*8e33eff8Schristos * This function certificate the period of 2^{MEXP} 382*8e33eff8Schristos */ 383*8e33eff8Schristos static void period_certification(sfmt_t *ctx) { 384*8e33eff8Schristos int inner = 0; 385*8e33eff8Schristos int i, j; 386*8e33eff8Schristos uint32_t work; 387*8e33eff8Schristos uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; 388*8e33eff8Schristos 389*8e33eff8Schristos for (i = 0; i < 4; i++) 390*8e33eff8Schristos inner ^= psfmt32[idxof(i)] & parity[i]; 391*8e33eff8Schristos for (i = 16; i > 0; i >>= 1) 392*8e33eff8Schristos inner ^= inner >> i; 393*8e33eff8Schristos inner &= 1; 394*8e33eff8Schristos /* check OK */ 395*8e33eff8Schristos if (inner == 1) { 396*8e33eff8Schristos return; 397*8e33eff8Schristos } 398*8e33eff8Schristos /* check NG, and modification */ 399*8e33eff8Schristos for (i = 0; i < 4; i++) { 400*8e33eff8Schristos work = 1; 401*8e33eff8Schristos for (j = 0; j < 32; j++) { 402*8e33eff8Schristos if ((work & parity[i]) != 0) { 403*8e33eff8Schristos psfmt32[idxof(i)] ^= work; 404*8e33eff8Schristos return; 405*8e33eff8Schristos } 406*8e33eff8Schristos work = work << 1; 407*8e33eff8Schristos } 408*8e33eff8Schristos } 409*8e33eff8Schristos } 410*8e33eff8Schristos 411*8e33eff8Schristos /*---------------- 412*8e33eff8Schristos PUBLIC FUNCTIONS 413*8e33eff8Schristos ----------------*/ 414*8e33eff8Schristos /** 415*8e33eff8Schristos * This function returns the identification string. 416*8e33eff8Schristos * The string shows the word size, the Mersenne exponent, 417*8e33eff8Schristos * and all parameters of this generator. 418*8e33eff8Schristos */ 419*8e33eff8Schristos const char *get_idstring(void) { 420*8e33eff8Schristos return IDSTR; 421*8e33eff8Schristos } 422*8e33eff8Schristos 423*8e33eff8Schristos /** 424*8e33eff8Schristos * This function returns the minimum size of array used for \b 425*8e33eff8Schristos * fill_array32() function. 426*8e33eff8Schristos * @return minimum size of array used for fill_array32() function. 427*8e33eff8Schristos */ 428*8e33eff8Schristos int get_min_array_size32(void) { 429*8e33eff8Schristos return N32; 430*8e33eff8Schristos } 431*8e33eff8Schristos 432*8e33eff8Schristos /** 433*8e33eff8Schristos * This function returns the minimum size of array used for \b 434*8e33eff8Schristos * fill_array64() function. 435*8e33eff8Schristos * @return minimum size of array used for fill_array64() function. 436*8e33eff8Schristos */ 437*8e33eff8Schristos int get_min_array_size64(void) { 438*8e33eff8Schristos return N64; 439*8e33eff8Schristos } 440*8e33eff8Schristos 441*8e33eff8Schristos #ifndef ONLY64 442*8e33eff8Schristos /** 443*8e33eff8Schristos * This function generates and returns 32-bit pseudorandom number. 444*8e33eff8Schristos * init_gen_rand or init_by_array must be called before this function. 445*8e33eff8Schristos * @return 32-bit pseudorandom number 446*8e33eff8Schristos */ 447*8e33eff8Schristos uint32_t gen_rand32(sfmt_t *ctx) { 448*8e33eff8Schristos uint32_t r; 449*8e33eff8Schristos uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; 450*8e33eff8Schristos 451*8e33eff8Schristos assert(ctx->initialized); 452*8e33eff8Schristos if (ctx->idx >= N32) { 453*8e33eff8Schristos gen_rand_all(ctx); 454*8e33eff8Schristos ctx->idx = 0; 455*8e33eff8Schristos } 456*8e33eff8Schristos r = psfmt32[ctx->idx++]; 457*8e33eff8Schristos return r; 458*8e33eff8Schristos } 459*8e33eff8Schristos 460*8e33eff8Schristos /* Generate a random integer in [0..limit). */ 461*8e33eff8Schristos uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) { 462*8e33eff8Schristos uint32_t ret, above; 463*8e33eff8Schristos 464*8e33eff8Schristos above = 0xffffffffU - (0xffffffffU % limit); 465*8e33eff8Schristos while (1) { 466*8e33eff8Schristos ret = gen_rand32(ctx); 467*8e33eff8Schristos if (ret < above) { 468*8e33eff8Schristos ret %= limit; 469*8e33eff8Schristos break; 470*8e33eff8Schristos } 471*8e33eff8Schristos } 472*8e33eff8Schristos return ret; 473*8e33eff8Schristos } 474*8e33eff8Schristos #endif 475*8e33eff8Schristos /** 476*8e33eff8Schristos * This function generates and returns 64-bit pseudorandom number. 477*8e33eff8Schristos * init_gen_rand or init_by_array must be called before this function. 478*8e33eff8Schristos * The function gen_rand64 should not be called after gen_rand32, 479*8e33eff8Schristos * unless an initialization is again executed. 480*8e33eff8Schristos * @return 64-bit pseudorandom number 481*8e33eff8Schristos */ 482*8e33eff8Schristos uint64_t gen_rand64(sfmt_t *ctx) { 483*8e33eff8Schristos #if defined(BIG_ENDIAN64) && !defined(ONLY64) 484*8e33eff8Schristos uint32_t r1, r2; 485*8e33eff8Schristos uint32_t *psfmt32 = &ctx->sfmt[0].u[0]; 486*8e33eff8Schristos #else 487*8e33eff8Schristos uint64_t r; 488*8e33eff8Schristos uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0]; 489*8e33eff8Schristos #endif 490*8e33eff8Schristos 491*8e33eff8Schristos assert(ctx->initialized); 492*8e33eff8Schristos assert(ctx->idx % 2 == 0); 493*8e33eff8Schristos 494*8e33eff8Schristos if (ctx->idx >= N32) { 495*8e33eff8Schristos gen_rand_all(ctx); 496*8e33eff8Schristos ctx->idx = 0; 497*8e33eff8Schristos } 498*8e33eff8Schristos #if defined(BIG_ENDIAN64) && !defined(ONLY64) 499*8e33eff8Schristos r1 = psfmt32[ctx->idx]; 500*8e33eff8Schristos r2 = psfmt32[ctx->idx + 1]; 501*8e33eff8Schristos ctx->idx += 2; 502*8e33eff8Schristos return ((uint64_t)r2 << 32) | r1; 503*8e33eff8Schristos #else 504*8e33eff8Schristos r = psfmt64[ctx->idx / 2]; 505*8e33eff8Schristos ctx->idx += 2; 506*8e33eff8Schristos return r; 507*8e33eff8Schristos #endif 508*8e33eff8Schristos } 509*8e33eff8Schristos 510*8e33eff8Schristos /* Generate a random integer in [0..limit). */ 511*8e33eff8Schristos uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) { 512*8e33eff8Schristos uint64_t ret, above; 513*8e33eff8Schristos 514*8e33eff8Schristos above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit); 515*8e33eff8Schristos while (1) { 516*8e33eff8Schristos ret = gen_rand64(ctx); 517*8e33eff8Schristos if (ret < above) { 518*8e33eff8Schristos ret %= limit; 519*8e33eff8Schristos break; 520*8e33eff8Schristos } 521*8e33eff8Schristos } 522*8e33eff8Schristos return ret; 523*8e33eff8Schristos } 524*8e33eff8Schristos 525*8e33eff8Schristos #ifndef ONLY64 526*8e33eff8Schristos /** 527*8e33eff8Schristos * This function generates pseudorandom 32-bit integers in the 528*8e33eff8Schristos * specified array[] by one call. The number of pseudorandom integers 529*8e33eff8Schristos * is specified by the argument size, which must be at least 624 and a 530*8e33eff8Schristos * multiple of four. The generation by this function is much faster 531*8e33eff8Schristos * than the following gen_rand function. 532*8e33eff8Schristos * 533*8e33eff8Schristos * For initialization, init_gen_rand or init_by_array must be called 534*8e33eff8Schristos * before the first call of this function. This function can not be 535*8e33eff8Schristos * used after calling gen_rand function, without initialization. 536*8e33eff8Schristos * 537*8e33eff8Schristos * @param array an array where pseudorandom 32-bit integers are filled 538*8e33eff8Schristos * by this function. The pointer to the array must be \b "aligned" 539*8e33eff8Schristos * (namely, must be a multiple of 16) in the SIMD version, since it 540*8e33eff8Schristos * refers to the address of a 128-bit integer. In the standard C 541*8e33eff8Schristos * version, the pointer is arbitrary. 542*8e33eff8Schristos * 543*8e33eff8Schristos * @param size the number of 32-bit pseudorandom integers to be 544*8e33eff8Schristos * generated. size must be a multiple of 4, and greater than or equal 545*8e33eff8Schristos * to (MEXP / 128 + 1) * 4. 546*8e33eff8Schristos * 547*8e33eff8Schristos * @note \b memalign or \b posix_memalign is available to get aligned 548*8e33eff8Schristos * memory. Mac OSX doesn't have these functions, but \b malloc of OSX 549*8e33eff8Schristos * returns the pointer to the aligned memory block. 550*8e33eff8Schristos */ 551*8e33eff8Schristos void fill_array32(sfmt_t *ctx, uint32_t *array, int size) { 552*8e33eff8Schristos assert(ctx->initialized); 553*8e33eff8Schristos assert(ctx->idx == N32); 554*8e33eff8Schristos assert(size % 4 == 0); 555*8e33eff8Schristos assert(size >= N32); 556*8e33eff8Schristos 557*8e33eff8Schristos gen_rand_array(ctx, (w128_t *)array, size / 4); 558*8e33eff8Schristos ctx->idx = N32; 559*8e33eff8Schristos } 560*8e33eff8Schristos #endif 561*8e33eff8Schristos 562*8e33eff8Schristos /** 563*8e33eff8Schristos * This function generates pseudorandom 64-bit integers in the 564*8e33eff8Schristos * specified array[] by one call. The number of pseudorandom integers 565*8e33eff8Schristos * is specified by the argument size, which must be at least 312 and a 566*8e33eff8Schristos * multiple of two. The generation by this function is much faster 567*8e33eff8Schristos * than the following gen_rand function. 568*8e33eff8Schristos * 569*8e33eff8Schristos * For initialization, init_gen_rand or init_by_array must be called 570*8e33eff8Schristos * before the first call of this function. This function can not be 571*8e33eff8Schristos * used after calling gen_rand function, without initialization. 572*8e33eff8Schristos * 573*8e33eff8Schristos * @param array an array where pseudorandom 64-bit integers are filled 574*8e33eff8Schristos * by this function. The pointer to the array must be "aligned" 575*8e33eff8Schristos * (namely, must be a multiple of 16) in the SIMD version, since it 576*8e33eff8Schristos * refers to the address of a 128-bit integer. In the standard C 577*8e33eff8Schristos * version, the pointer is arbitrary. 578*8e33eff8Schristos * 579*8e33eff8Schristos * @param size the number of 64-bit pseudorandom integers to be 580*8e33eff8Schristos * generated. size must be a multiple of 2, and greater than or equal 581*8e33eff8Schristos * to (MEXP / 128 + 1) * 2 582*8e33eff8Schristos * 583*8e33eff8Schristos * @note \b memalign or \b posix_memalign is available to get aligned 584*8e33eff8Schristos * memory. Mac OSX doesn't have these functions, but \b malloc of OSX 585*8e33eff8Schristos * returns the pointer to the aligned memory block. 586*8e33eff8Schristos */ 587*8e33eff8Schristos void fill_array64(sfmt_t *ctx, uint64_t *array, int size) { 588*8e33eff8Schristos assert(ctx->initialized); 589*8e33eff8Schristos assert(ctx->idx == N32); 590*8e33eff8Schristos assert(size % 2 == 0); 591*8e33eff8Schristos assert(size >= N64); 592*8e33eff8Schristos 593*8e33eff8Schristos gen_rand_array(ctx, (w128_t *)array, size / 2); 594*8e33eff8Schristos ctx->idx = N32; 595*8e33eff8Schristos 596*8e33eff8Schristos #if defined(BIG_ENDIAN64) && !defined(ONLY64) 597*8e33eff8Schristos swap((w128_t *)array, size /2); 598*8e33eff8Schristos #endif 599*8e33eff8Schristos } 600*8e33eff8Schristos 601*8e33eff8Schristos /** 602*8e33eff8Schristos * This function initializes the internal state array with a 32-bit 603*8e33eff8Schristos * integer seed. 604*8e33eff8Schristos * 605*8e33eff8Schristos * @param seed a 32-bit integer used as the seed. 606*8e33eff8Schristos */ 607*8e33eff8Schristos sfmt_t *init_gen_rand(uint32_t seed) { 608*8e33eff8Schristos void *p; 609*8e33eff8Schristos sfmt_t *ctx; 610*8e33eff8Schristos int i; 611*8e33eff8Schristos uint32_t *psfmt32; 612*8e33eff8Schristos 613*8e33eff8Schristos if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) { 614*8e33eff8Schristos return NULL; 615*8e33eff8Schristos } 616*8e33eff8Schristos ctx = (sfmt_t *)p; 617*8e33eff8Schristos psfmt32 = &ctx->sfmt[0].u[0]; 618*8e33eff8Schristos 619*8e33eff8Schristos psfmt32[idxof(0)] = seed; 620*8e33eff8Schristos for (i = 1; i < N32; i++) { 621*8e33eff8Schristos psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] 622*8e33eff8Schristos ^ (psfmt32[idxof(i - 1)] >> 30)) 623*8e33eff8Schristos + i; 624*8e33eff8Schristos } 625*8e33eff8Schristos ctx->idx = N32; 626*8e33eff8Schristos period_certification(ctx); 627*8e33eff8Schristos ctx->initialized = 1; 628*8e33eff8Schristos 629*8e33eff8Schristos return ctx; 630*8e33eff8Schristos } 631*8e33eff8Schristos 632*8e33eff8Schristos /** 633*8e33eff8Schristos * This function initializes the internal state array, 634*8e33eff8Schristos * with an array of 32-bit integers used as the seeds 635*8e33eff8Schristos * @param init_key the array of 32-bit integers, used as a seed. 636*8e33eff8Schristos * @param key_length the length of init_key. 637*8e33eff8Schristos */ 638*8e33eff8Schristos sfmt_t *init_by_array(uint32_t *init_key, int key_length) { 639*8e33eff8Schristos void *p; 640*8e33eff8Schristos sfmt_t *ctx; 641*8e33eff8Schristos int i, j, count; 642*8e33eff8Schristos uint32_t r; 643*8e33eff8Schristos int lag; 644*8e33eff8Schristos int mid; 645*8e33eff8Schristos int size = N * 4; 646*8e33eff8Schristos uint32_t *psfmt32; 647*8e33eff8Schristos 648*8e33eff8Schristos if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) { 649*8e33eff8Schristos return NULL; 650*8e33eff8Schristos } 651*8e33eff8Schristos ctx = (sfmt_t *)p; 652*8e33eff8Schristos psfmt32 = &ctx->sfmt[0].u[0]; 653*8e33eff8Schristos 654*8e33eff8Schristos if (size >= 623) { 655*8e33eff8Schristos lag = 11; 656*8e33eff8Schristos } else if (size >= 68) { 657*8e33eff8Schristos lag = 7; 658*8e33eff8Schristos } else if (size >= 39) { 659*8e33eff8Schristos lag = 5; 660*8e33eff8Schristos } else { 661*8e33eff8Schristos lag = 3; 662*8e33eff8Schristos } 663*8e33eff8Schristos mid = (size - lag) / 2; 664*8e33eff8Schristos 665*8e33eff8Schristos memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt)); 666*8e33eff8Schristos if (key_length + 1 > N32) { 667*8e33eff8Schristos count = key_length + 1; 668*8e33eff8Schristos } else { 669*8e33eff8Schristos count = N32; 670*8e33eff8Schristos } 671*8e33eff8Schristos r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] 672*8e33eff8Schristos ^ psfmt32[idxof(N32 - 1)]); 673*8e33eff8Schristos psfmt32[idxof(mid)] += r; 674*8e33eff8Schristos r += key_length; 675*8e33eff8Schristos psfmt32[idxof(mid + lag)] += r; 676*8e33eff8Schristos psfmt32[idxof(0)] = r; 677*8e33eff8Schristos 678*8e33eff8Schristos count--; 679*8e33eff8Schristos for (i = 1, j = 0; (j < count) && (j < key_length); j++) { 680*8e33eff8Schristos r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 681*8e33eff8Schristos ^ psfmt32[idxof((i + N32 - 1) % N32)]); 682*8e33eff8Schristos psfmt32[idxof((i + mid) % N32)] += r; 683*8e33eff8Schristos r += init_key[j] + i; 684*8e33eff8Schristos psfmt32[idxof((i + mid + lag) % N32)] += r; 685*8e33eff8Schristos psfmt32[idxof(i)] = r; 686*8e33eff8Schristos i = (i + 1) % N32; 687*8e33eff8Schristos } 688*8e33eff8Schristos for (; j < count; j++) { 689*8e33eff8Schristos r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 690*8e33eff8Schristos ^ psfmt32[idxof((i + N32 - 1) % N32)]); 691*8e33eff8Schristos psfmt32[idxof((i + mid) % N32)] += r; 692*8e33eff8Schristos r += i; 693*8e33eff8Schristos psfmt32[idxof((i + mid + lag) % N32)] += r; 694*8e33eff8Schristos psfmt32[idxof(i)] = r; 695*8e33eff8Schristos i = (i + 1) % N32; 696*8e33eff8Schristos } 697*8e33eff8Schristos for (j = 0; j < N32; j++) { 698*8e33eff8Schristos r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)] 699*8e33eff8Schristos + psfmt32[idxof((i + N32 - 1) % N32)]); 700*8e33eff8Schristos psfmt32[idxof((i + mid) % N32)] ^= r; 701*8e33eff8Schristos r -= i; 702*8e33eff8Schristos psfmt32[idxof((i + mid + lag) % N32)] ^= r; 703*8e33eff8Schristos psfmt32[idxof(i)] = r; 704*8e33eff8Schristos i = (i + 1) % N32; 705*8e33eff8Schristos } 706*8e33eff8Schristos 707*8e33eff8Schristos ctx->idx = N32; 708*8e33eff8Schristos period_certification(ctx); 709*8e33eff8Schristos ctx->initialized = 1; 710*8e33eff8Schristos 711*8e33eff8Schristos return ctx; 712*8e33eff8Schristos } 713*8e33eff8Schristos 714*8e33eff8Schristos void fini_gen_rand(sfmt_t *ctx) { 715*8e33eff8Schristos assert(ctx != NULL); 716*8e33eff8Schristos 717*8e33eff8Schristos ctx->initialized = 0; 718*8e33eff8Schristos free(ctx); 719*8e33eff8Schristos } 720