1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Ericsson AB 3 */ 4 5 #ifdef __RDSEED__ 6 #include <x86intrin.h> 7 #endif 8 #include <unistd.h> 9 10 #include <rte_branch_prediction.h> 11 #include <rte_cycles.h> 12 #include <rte_lcore.h> 13 #include <rte_random.h> 14 15 struct rte_rand_state { 16 uint64_t z1; 17 uint64_t z2; 18 uint64_t z3; 19 uint64_t z4; 20 uint64_t z5; 21 } __rte_cache_aligned; 22 23 /* One instance each for every lcore id-equipped thread, and one 24 * additional instance to be shared by all others threads (i.e., all 25 * unregistered non-EAL threads). 26 */ 27 static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; 28 29 static uint32_t 30 __rte_rand_lcg32(uint32_t *seed) 31 { 32 *seed = 1103515245U * *seed + 12345U; 33 34 return *seed; 35 } 36 37 static uint64_t 38 __rte_rand_lcg64(uint32_t *seed) 39 { 40 uint64_t low; 41 uint64_t high; 42 43 /* A 64-bit LCG would have been much cleaner, but good 44 * multiplier/increments for such seem hard to come by. 45 */ 46 47 low = __rte_rand_lcg32(seed); 48 high = __rte_rand_lcg32(seed); 49 50 return low | (high << 32); 51 } 52 53 static uint64_t 54 __rte_rand_lfsr258_gen_seed(uint32_t *seed, uint64_t min_value) 55 { 56 uint64_t res; 57 58 res = __rte_rand_lcg64(seed); 59 60 if (res < min_value) 61 res += min_value; 62 63 return res; 64 } 65 66 static void 67 __rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state) 68 { 69 uint32_t lcg_seed; 70 71 lcg_seed = (uint32_t)(seed ^ (seed >> 32)); 72 73 state->z1 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 2UL); 74 state->z2 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 512UL); 75 state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL); 76 state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL); 77 state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL); 78 } 79 80 void 81 rte_srand(uint64_t seed) 82 { 83 unsigned int lcore_id; 84 85 /* add lcore_id to seed to avoid having the same sequence */ 86 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) 87 __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]); 88 } 89 90 static __rte_always_inline uint64_t 91 __rte_rand_lfsr258_comp(uint64_t z, uint64_t a, uint64_t b, uint64_t c, 92 uint64_t d) 93 { 94 return ((z & c) << d) ^ (((z << a) ^ z) >> b); 95 } 96 97 /* Based on L’Ecuyer, P.: Tables of maximally equidistributed combined 98 * LFSR generators. 99 */ 100 101 static __rte_always_inline uint64_t 102 __rte_rand_lfsr258(struct rte_rand_state *state) 103 { 104 state->z1 = __rte_rand_lfsr258_comp(state->z1, 1UL, 53UL, 105 18446744073709551614UL, 10UL); 106 state->z2 = __rte_rand_lfsr258_comp(state->z2, 24UL, 50UL, 107 18446744073709551104UL, 5UL); 108 state->z3 = __rte_rand_lfsr258_comp(state->z3, 3UL, 23UL, 109 18446744073709547520UL, 29UL); 110 state->z4 = __rte_rand_lfsr258_comp(state->z4, 5UL, 24UL, 111 18446744073709420544UL, 23UL); 112 state->z5 = __rte_rand_lfsr258_comp(state->z5, 3UL, 33UL, 113 18446744073701163008UL, 8UL); 114 115 return state->z1 ^ state->z2 ^ state->z3 ^ state->z4 ^ state->z5; 116 } 117 118 static __rte_always_inline 119 struct rte_rand_state *__rte_rand_get_state(void) 120 { 121 unsigned int idx; 122 123 idx = rte_lcore_id(); 124 125 /* last instance reserved for unregistered non-EAL threads */ 126 if (unlikely(idx == LCORE_ID_ANY)) 127 idx = RTE_MAX_LCORE; 128 129 return &rand_states[idx]; 130 } 131 132 uint64_t 133 rte_rand(void) 134 { 135 struct rte_rand_state *state; 136 137 state = __rte_rand_get_state(); 138 139 return __rte_rand_lfsr258(state); 140 } 141 142 uint64_t 143 rte_rand_max(uint64_t upper_bound) 144 { 145 struct rte_rand_state *state; 146 uint8_t ones; 147 uint8_t leading_zeros; 148 uint64_t mask = ~((uint64_t)0); 149 uint64_t res; 150 151 if (unlikely(upper_bound < 2)) 152 return 0; 153 154 state = __rte_rand_get_state(); 155 156 ones = __builtin_popcountll(upper_bound); 157 158 /* Handle power-of-2 upper_bound as a special case, since it 159 * has no bias issues. 160 */ 161 if (unlikely(ones == 1)) 162 return __rte_rand_lfsr258(state) & (upper_bound - 1); 163 164 /* The approach to avoiding bias is to create a mask that 165 * stretches beyond the request value range, and up to the 166 * next power-of-2. In case the masked generated random value 167 * is equal to or greater than the upper bound, just discard 168 * the value and generate a new one. 169 */ 170 171 leading_zeros = __builtin_clzll(upper_bound); 172 mask >>= leading_zeros; 173 174 do { 175 res = __rte_rand_lfsr258(state) & mask; 176 } while (unlikely(res >= upper_bound)); 177 178 return res; 179 } 180 181 double 182 rte_drand(void) 183 { 184 static const uint64_t denom = (uint64_t)1 << 53; 185 uint64_t rand64 = rte_rand(); 186 187 /* 188 * The double mantissa only has 53 bits, so we uniformly mask off the 189 * high 11 bits and then floating-point divide by 2^53 to achieve a 190 * result in [0, 1). 191 * 192 * We are not allowed to emit 1.0, so denom must be one greater than 193 * the possible range of the preceding step. 194 */ 195 196 rand64 &= denom - 1; 197 return (double)rand64 / denom; 198 } 199 200 static uint64_t 201 __rte_random_initial_seed(void) 202 { 203 #ifdef RTE_LIBEAL_USE_GETENTROPY 204 int ge_rc; 205 uint64_t ge_seed; 206 207 ge_rc = getentropy(&ge_seed, sizeof(ge_seed)); 208 209 if (ge_rc == 0) 210 return ge_seed; 211 #endif 212 #ifdef __RDSEED__ 213 unsigned int rdseed_low; 214 unsigned int rdseed_high; 215 216 /* first fallback: rdseed instruction, if available */ 217 if (_rdseed32_step(&rdseed_low) == 1 && 218 _rdseed32_step(&rdseed_high) == 1) 219 return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32); 220 #endif 221 /* second fallback: seed using rdtsc */ 222 return rte_get_tsc_cycles(); 223 } 224 225 RTE_INIT(rte_rand_init) 226 { 227 uint64_t seed; 228 229 seed = __rte_random_initial_seed(); 230 231 rte_srand(seed); 232 } 233