1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2019 Ericsson AB 3 */ 4 5 #ifdef __RDSEED__ 6 #include <x86intrin.h> 7 #endif 8 #include <unistd.h> 9 10 #include <rte_bitops.h> 11 #include <rte_branch_prediction.h> 12 #include <rte_cycles.h> 13 #include <rte_lcore.h> 14 #include <rte_random.h> 15 16 struct __rte_cache_aligned rte_rand_state { 17 uint64_t z1; 18 uint64_t z2; 19 uint64_t z3; 20 uint64_t z4; 21 uint64_t z5; 22 RTE_CACHE_GUARD; 23 }; 24 25 /* One instance each for every lcore id-equipped thread, and one 26 * additional instance to be shared by all others threads (i.e., all 27 * unregistered non-EAL threads). 28 */ 29 static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1]; 30 31 static uint32_t 32 __rte_rand_lcg32(uint32_t *seed) 33 { 34 *seed = 1103515245U * *seed + 12345U; 35 36 return *seed; 37 } 38 39 static uint64_t 40 __rte_rand_lcg64(uint32_t *seed) 41 { 42 uint64_t low; 43 uint64_t high; 44 45 /* A 64-bit LCG would have been much cleaner, but good 46 * multiplier/increments for such seem hard to come by. 47 */ 48 49 low = __rte_rand_lcg32(seed); 50 high = __rte_rand_lcg32(seed); 51 52 return low | (high << 32); 53 } 54 55 static uint64_t 56 __rte_rand_lfsr258_gen_seed(uint32_t *seed, uint64_t min_value) 57 { 58 uint64_t res; 59 60 res = __rte_rand_lcg64(seed); 61 62 if (res < min_value) 63 res += min_value; 64 65 return res; 66 } 67 68 static void 69 __rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state) 70 { 71 uint32_t lcg_seed; 72 73 lcg_seed = (uint32_t)(seed ^ (seed >> 32)); 74 75 state->z1 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 2UL); 76 state->z2 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 512UL); 77 state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL); 78 state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL); 79 state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL); 80 } 81 82 void 83 rte_srand(uint64_t seed) 84 { 85 unsigned int lcore_id; 86 87 /* add lcore_id to seed to avoid having the same sequence */ 88 for (lcore_id = 0; lcore_id < RTE_DIM(rand_states); lcore_id++) 89 __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]); 90 } 91 92 static __rte_always_inline uint64_t 93 __rte_rand_lfsr258_comp(uint64_t z, uint64_t a, uint64_t b, uint64_t c, 94 uint64_t d) 95 { 96 return ((z & c) << d) ^ (((z << a) ^ z) >> b); 97 } 98 99 /* Based on L’Ecuyer, P.: Tables of maximally equidistributed combined 100 * LFSR generators. 101 */ 102 103 static __rte_always_inline uint64_t 104 __rte_rand_lfsr258(struct rte_rand_state *state) 105 { 106 state->z1 = __rte_rand_lfsr258_comp(state->z1, 1UL, 53UL, 107 18446744073709551614UL, 10UL); 108 state->z2 = __rte_rand_lfsr258_comp(state->z2, 24UL, 50UL, 109 18446744073709551104UL, 5UL); 110 state->z3 = __rte_rand_lfsr258_comp(state->z3, 3UL, 23UL, 111 18446744073709547520UL, 29UL); 112 state->z4 = __rte_rand_lfsr258_comp(state->z4, 5UL, 24UL, 113 18446744073709420544UL, 23UL); 114 state->z5 = __rte_rand_lfsr258_comp(state->z5, 3UL, 33UL, 115 18446744073701163008UL, 8UL); 116 117 return state->z1 ^ state->z2 ^ state->z3 ^ state->z4 ^ state->z5; 118 } 119 120 static __rte_always_inline 121 struct rte_rand_state *__rte_rand_get_state(void) 122 { 123 unsigned int idx; 124 125 idx = rte_lcore_id(); 126 127 /* last instance reserved for unregistered non-EAL threads */ 128 if (unlikely(idx == LCORE_ID_ANY)) 129 idx = RTE_MAX_LCORE; 130 131 return &rand_states[idx]; 132 } 133 134 uint64_t 135 rte_rand(void) 136 { 137 struct rte_rand_state *state; 138 139 state = __rte_rand_get_state(); 140 141 return __rte_rand_lfsr258(state); 142 } 143 144 uint64_t 145 rte_rand_max(uint64_t upper_bound) 146 { 147 struct rte_rand_state *state; 148 uint8_t ones; 149 uint8_t leading_zeros; 150 uint64_t mask = ~((uint64_t)0); 151 uint64_t res; 152 153 if (unlikely(upper_bound < 2)) 154 return 0; 155 156 state = __rte_rand_get_state(); 157 158 ones = rte_popcount64(upper_bound); 159 160 /* Handle power-of-2 upper_bound as a special case, since it 161 * has no bias issues. 162 */ 163 if (unlikely(ones == 1)) 164 return __rte_rand_lfsr258(state) & (upper_bound - 1); 165 166 /* The approach to avoiding bias is to create a mask that 167 * stretches beyond the request value range, and up to the 168 * next power-of-2. In case the masked generated random value 169 * is equal to or greater than the upper bound, just discard 170 * the value and generate a new one. 171 */ 172 173 leading_zeros = rte_clz64(upper_bound); 174 mask >>= leading_zeros; 175 176 do { 177 res = __rte_rand_lfsr258(state) & mask; 178 } while (unlikely(res >= upper_bound)); 179 180 return res; 181 } 182 183 double 184 rte_drand(void) 185 { 186 static const uint64_t denom = (uint64_t)1 << 53; 187 uint64_t rand64 = rte_rand(); 188 189 /* 190 * The double mantissa only has 53 bits, so we uniformly mask off the 191 * high 11 bits and then floating-point divide by 2^53 to achieve a 192 * result in [0, 1). 193 * 194 * We are not allowed to emit 1.0, so denom must be one greater than 195 * the possible range of the preceding step. 196 */ 197 198 rand64 &= denom - 1; 199 return (double)rand64 / denom; 200 } 201 202 static uint64_t 203 __rte_random_initial_seed(void) 204 { 205 #ifdef RTE_LIBEAL_USE_GETENTROPY 206 int ge_rc; 207 uint64_t ge_seed; 208 209 ge_rc = getentropy(&ge_seed, sizeof(ge_seed)); 210 211 if (ge_rc == 0) 212 return ge_seed; 213 #endif 214 #ifdef __RDSEED__ 215 unsigned int rdseed_low; 216 unsigned int rdseed_high; 217 218 /* first fallback: rdseed instruction, if available */ 219 if (_rdseed32_step(&rdseed_low) == 1 && 220 _rdseed32_step(&rdseed_high) == 1) 221 return (uint64_t)rdseed_low | ((uint64_t)rdseed_high << 32); 222 #endif 223 /* second fallback: seed using rdtsc */ 224 return rte_get_tsc_cycles(); 225 } 226 227 RTE_INIT(rte_rand_init) 228 { 229 uint64_t seed; 230 231 seed = __rte_random_initial_seed(); 232 233 rte_srand(seed); 234 } 235