xref: /netbsd-src/external/bsd/jemalloc.old/include/jemalloc/internal/prng.h (revision 8e33eff89e26cf71871ead62f0d5063e1313c33a)
1*8e33eff8Schristos #ifndef JEMALLOC_INTERNAL_PRNG_H
2*8e33eff8Schristos #define JEMALLOC_INTERNAL_PRNG_H
3*8e33eff8Schristos 
4*8e33eff8Schristos #include "jemalloc/internal/atomic.h"
5*8e33eff8Schristos #include "jemalloc/internal/bit_util.h"
6*8e33eff8Schristos 
7*8e33eff8Schristos /*
8*8e33eff8Schristos  * Simple linear congruential pseudo-random number generator:
9*8e33eff8Schristos  *
10*8e33eff8Schristos  *   prng(y) = (a*x + c) % m
11*8e33eff8Schristos  *
12*8e33eff8Schristos  * where the following constants ensure maximal period:
13*8e33eff8Schristos  *
14*8e33eff8Schristos  *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
15*8e33eff8Schristos  *   c == Odd number (relatively prime to 2^n).
16*8e33eff8Schristos  *   m == 2^32
17*8e33eff8Schristos  *
18*8e33eff8Schristos  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
19*8e33eff8Schristos  *
20*8e33eff8Schristos  * This choice of m has the disadvantage that the quality of the bits is
21*8e33eff8Schristos  * proportional to bit position.  For example, the lowest bit has a cycle of 2,
22*8e33eff8Schristos  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
23*8e33eff8Schristos  * bits.
24*8e33eff8Schristos  */
25*8e33eff8Schristos 
26*8e33eff8Schristos /******************************************************************************/
27*8e33eff8Schristos /* INTERNAL DEFINITIONS -- IGNORE */
28*8e33eff8Schristos /******************************************************************************/
29*8e33eff8Schristos #define PRNG_A_32	UINT32_C(1103515241)
30*8e33eff8Schristos #define PRNG_C_32	UINT32_C(12347)
31*8e33eff8Schristos 
32*8e33eff8Schristos #define PRNG_A_64	UINT64_C(6364136223846793005)
33*8e33eff8Schristos #define PRNG_C_64	UINT64_C(1442695040888963407)
34*8e33eff8Schristos 
35*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint32_t
36*8e33eff8Schristos prng_state_next_u32(uint32_t state) {
37*8e33eff8Schristos 	return (state * PRNG_A_32) + PRNG_C_32;
38*8e33eff8Schristos }
39*8e33eff8Schristos 
40*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint64_t
41*8e33eff8Schristos prng_state_next_u64(uint64_t state) {
42*8e33eff8Schristos 	return (state * PRNG_A_64) + PRNG_C_64;
43*8e33eff8Schristos }
44*8e33eff8Schristos 
45*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE size_t
46*8e33eff8Schristos prng_state_next_zu(size_t state) {
47*8e33eff8Schristos #if LG_SIZEOF_PTR == 2
48*8e33eff8Schristos 	return (state * PRNG_A_32) + PRNG_C_32;
49*8e33eff8Schristos #elif LG_SIZEOF_PTR == 3
50*8e33eff8Schristos 	return (state * PRNG_A_64) + PRNG_C_64;
51*8e33eff8Schristos #else
52*8e33eff8Schristos #error Unsupported pointer size
53*8e33eff8Schristos #endif
54*8e33eff8Schristos }
55*8e33eff8Schristos 
56*8e33eff8Schristos /******************************************************************************/
57*8e33eff8Schristos /* BEGIN PUBLIC API */
58*8e33eff8Schristos /******************************************************************************/
59*8e33eff8Schristos 
60*8e33eff8Schristos /*
61*8e33eff8Schristos  * The prng_lg_range functions give a uniform int in the half-open range [0,
62*8e33eff8Schristos  * 2**lg_range).  If atomic is true, they do so safely from multiple threads.
63*8e33eff8Schristos  * Multithreaded 64-bit prngs aren't supported.
64*8e33eff8Schristos  */
65*8e33eff8Schristos 
66*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint32_t
67*8e33eff8Schristos prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
68*8e33eff8Schristos 	uint32_t ret, state0, state1;
69*8e33eff8Schristos 
70*8e33eff8Schristos 	assert(lg_range > 0);
71*8e33eff8Schristos 	assert(lg_range <= 32);
72*8e33eff8Schristos 
73*8e33eff8Schristos 	state0 = atomic_load_u32(state, ATOMIC_RELAXED);
74*8e33eff8Schristos 
75*8e33eff8Schristos 	if (atomic) {
76*8e33eff8Schristos 		do {
77*8e33eff8Schristos 			state1 = prng_state_next_u32(state0);
78*8e33eff8Schristos 		} while (!atomic_compare_exchange_weak_u32(state, &state0,
79*8e33eff8Schristos 		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
80*8e33eff8Schristos 	} else {
81*8e33eff8Schristos 		state1 = prng_state_next_u32(state0);
82*8e33eff8Schristos 		atomic_store_u32(state, state1, ATOMIC_RELAXED);
83*8e33eff8Schristos 	}
84*8e33eff8Schristos 	ret = state1 >> (32 - lg_range);
85*8e33eff8Schristos 
86*8e33eff8Schristos 	return ret;
87*8e33eff8Schristos }
88*8e33eff8Schristos 
89*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint64_t
90*8e33eff8Schristos prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
91*8e33eff8Schristos 	uint64_t ret, state1;
92*8e33eff8Schristos 
93*8e33eff8Schristos 	assert(lg_range > 0);
94*8e33eff8Schristos 	assert(lg_range <= 64);
95*8e33eff8Schristos 
96*8e33eff8Schristos 	state1 = prng_state_next_u64(*state);
97*8e33eff8Schristos 	*state = state1;
98*8e33eff8Schristos 	ret = state1 >> (64 - lg_range);
99*8e33eff8Schristos 
100*8e33eff8Schristos 	return ret;
101*8e33eff8Schristos }
102*8e33eff8Schristos 
103*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE size_t
104*8e33eff8Schristos prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
105*8e33eff8Schristos 	size_t ret, state0, state1;
106*8e33eff8Schristos 
107*8e33eff8Schristos 	assert(lg_range > 0);
108*8e33eff8Schristos 	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
109*8e33eff8Schristos 
110*8e33eff8Schristos 	state0 = atomic_load_zu(state, ATOMIC_RELAXED);
111*8e33eff8Schristos 
112*8e33eff8Schristos 	if (atomic) {
113*8e33eff8Schristos 		do {
114*8e33eff8Schristos 			state1 = prng_state_next_zu(state0);
115*8e33eff8Schristos 		} while (atomic_compare_exchange_weak_zu(state, &state0,
116*8e33eff8Schristos 		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
117*8e33eff8Schristos 	} else {
118*8e33eff8Schristos 		state1 = prng_state_next_zu(state0);
119*8e33eff8Schristos 		atomic_store_zu(state, state1, ATOMIC_RELAXED);
120*8e33eff8Schristos 	}
121*8e33eff8Schristos 	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
122*8e33eff8Schristos 
123*8e33eff8Schristos 	return ret;
124*8e33eff8Schristos }
125*8e33eff8Schristos 
126*8e33eff8Schristos /*
127*8e33eff8Schristos  * The prng_range functions behave like the prng_lg_range, but return a result
128*8e33eff8Schristos  * in [0, range) instead of [0, 2**lg_range).
129*8e33eff8Schristos  */
130*8e33eff8Schristos 
131*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint32_t
132*8e33eff8Schristos prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
133*8e33eff8Schristos 	uint32_t ret;
134*8e33eff8Schristos 	unsigned lg_range;
135*8e33eff8Schristos 
136*8e33eff8Schristos 	assert(range > 1);
137*8e33eff8Schristos 
138*8e33eff8Schristos 	/* Compute the ceiling of lg(range). */
139*8e33eff8Schristos 	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
140*8e33eff8Schristos 
141*8e33eff8Schristos 	/* Generate a result in [0..range) via repeated trial. */
142*8e33eff8Schristos 	do {
143*8e33eff8Schristos 		ret = prng_lg_range_u32(state, lg_range, atomic);
144*8e33eff8Schristos 	} while (ret >= range);
145*8e33eff8Schristos 
146*8e33eff8Schristos 	return ret;
147*8e33eff8Schristos }
148*8e33eff8Schristos 
149*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE uint64_t
150*8e33eff8Schristos prng_range_u64(uint64_t *state, uint64_t range) {
151*8e33eff8Schristos 	uint64_t ret;
152*8e33eff8Schristos 	unsigned lg_range;
153*8e33eff8Schristos 
154*8e33eff8Schristos 	assert(range > 1);
155*8e33eff8Schristos 
156*8e33eff8Schristos 	/* Compute the ceiling of lg(range). */
157*8e33eff8Schristos 	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
158*8e33eff8Schristos 
159*8e33eff8Schristos 	/* Generate a result in [0..range) via repeated trial. */
160*8e33eff8Schristos 	do {
161*8e33eff8Schristos 		ret = prng_lg_range_u64(state, lg_range);
162*8e33eff8Schristos 	} while (ret >= range);
163*8e33eff8Schristos 
164*8e33eff8Schristos 	return ret;
165*8e33eff8Schristos }
166*8e33eff8Schristos 
167*8e33eff8Schristos JEMALLOC_ALWAYS_INLINE size_t
168*8e33eff8Schristos prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
169*8e33eff8Schristos 	size_t ret;
170*8e33eff8Schristos 	unsigned lg_range;
171*8e33eff8Schristos 
172*8e33eff8Schristos 	assert(range > 1);
173*8e33eff8Schristos 
174*8e33eff8Schristos 	/* Compute the ceiling of lg(range). */
175*8e33eff8Schristos 	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
176*8e33eff8Schristos 
177*8e33eff8Schristos 	/* Generate a result in [0..range) via repeated trial. */
178*8e33eff8Schristos 	do {
179*8e33eff8Schristos 		ret = prng_lg_range_zu(state, lg_range, atomic);
180*8e33eff8Schristos 	} while (ret >= range);
181*8e33eff8Schristos 
182*8e33eff8Schristos 	return ret;
183*8e33eff8Schristos }
184*8e33eff8Schristos 
185*8e33eff8Schristos #endif /* JEMALLOC_INTERNAL_PRNG_H */
186