1 /* $NetBSD: octeon_rnm.c,v 1.12 2020/06/18 13:52:08 simonb Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Internet Initiative Japan, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Cavium Octeon Random Number Generator / Random Number Memory `RNM' 31 * 32 * The RNM unit consists of: 33 * 34 * 1. 128 ring oscillators 35 * 2. an LFSR/SHA-1 conditioner 36 * 3. a 512-byte FIFO 37 * 38 * When the unit is enabled, there are three modes of operation: 39 * 40 * (a) deterministic: the ring oscillators are disabled and the 41 * LFSR/SHA-1 conditioner operates on fixed inputs to give 42 * reproducible results for testing, 43 * 44 * (b) conditioned entropy: the ring oscillators are enabled and 45 * samples from them are fed through the LFSR/SHA-1 46 * conditioner before being put into the FIFO, and 47 * 48 * (c) raw entropy: the ring oscillators are enabled, and a group 49 * of eight of them selected at any one time is sampled and 50 * fed into the FIFO. 51 * 52 * Details: 53 * 54 * - The FIFO is refilled whenever we read out of it, either with 55 * a load address or an IOBDMA operation. 56 * 57 * - The conditioner takes 81 cycles to produce a 64-bit block of 58 * output in the FIFO whether in deterministic or conditioned 59 * entropy mode, each block consisting of the first 64 bits of a 60 * SHA-1 hash. 61 * 62 * - A group of eight ring oscillators take 8 cycles to produce a 63 * 64-bit block of output in the FIFO in raw entropy mode, each 64 * block consisting of eight consecutive samples from each RO in 65 * parallel. 66 * 67 * The first sample of each RO always seems to be zero. Further, 68 * consecutive samples from a single ring oscillator are not 69 * independent, so naive debiasing like a von Neumann extractor 70 * falls flat on its face. And parallel ring oscillators powered 71 * by the same source may not be independent either, if they end 72 * up locked. 73 * 74 * We read out one FIFO's worth of raw samples from groups of 8 75 * ring oscillators at a time, of 128 total, by going through them 76 * round robin. We take 32 consecutive samples from each ring 77 * oscillator in a group of 8 in parallel before we count one bit 78 * of entropy. To get 256 bits of entropy, we read 4Kbit of data 79 * from each of two 8-RO groups. 80 * 81 * We could use the on-board LFSR/SHA-1 conditioner like the Linux 82 * driver written by Cavium does, but it's not clear how many RO 83 * samples go into the conditioner, and our entropy pool is a 84 * perfectly good conditioner itself, so it seems there is little 85 * advantage -- other than expedience -- to using the LFSR/SHA-1 86 * conditioner. All the manual says is that it samples 125 of the 87 * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to 88 * have a latency of 100 cycles, so it seems implausible that much 89 * more than one sample from each RO could be squeezed in there. 90 * 91 * The hardware exposes only 64 bits of each SHA-1 hash, and the 92 * Linux driver uses 32 bits of that -- which, if treated as full 93 * entropy, would mean an assessment of 3.9 bits of RO samples to 94 * get 1 bit of entropy, whereas we take 256 bits of RO samples to 95 * get one bit of entropy, so this seems reasonably conservative. 96 * 97 * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference 98 * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.12 2020/06/18 13:52:08 simonb Exp $"); 103 104 #include <sys/param.h> 105 #include <sys/device.h> 106 #include <sys/kernel.h> 107 #include <sys/rndsource.h> 108 #include <sys/systm.h> 109 110 #include <mips/locore.h> 111 #include <mips/cavium/octeonreg.h> 112 #include <mips/cavium/octeonvar.h> 113 #include <mips/cavium/include/iobusvar.h> 114 #include <mips/cavium/dev/octeon_rnmreg.h> 115 #include <mips/cavium/dev/octeon_corereg.h> 116 117 #include <sys/bus.h> 118 119 //#define OCTRNM_DEBUG 120 121 #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */ 122 #define RNG_DELAY_CLOCK 81 /* cycles for each SHA-1 output */ 123 #define NROGROUPS 16 124 #define RNG_FIFO_WORDS (512/sizeof(uint64_t)) 125 126 struct octrnm_softc { 127 uint64_t sc_sample[RNG_FIFO_WORDS]; 128 bus_space_tag_t sc_bust; 129 bus_space_handle_t sc_regh; 130 kmutex_t sc_lock; 131 krndsource_t sc_rndsrc; /* /dev/random source */ 132 unsigned sc_rogroup; 133 }; 134 135 static int octrnm_match(device_t, struct cfdata *, void *); 136 static void octrnm_attach(device_t, device_t, void *); 137 static void octrnm_rng(size_t, void *); 138 static void octrnm_reset(struct octrnm_softc *); 139 static void octrnm_conditioned_deterministic(struct octrnm_softc *); 140 static void octrnm_conditioned_entropy(struct octrnm_softc *); 141 static void octrnm_raw_entropy(struct octrnm_softc *, unsigned); 142 static uint64_t octrnm_load(struct octrnm_softc *); 143 static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned); 144 static void octrnm_delay(uint32_t); 145 146 CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc), 147 octrnm_match, octrnm_attach, NULL, NULL); 148 149 static int 150 octrnm_match(device_t parent, struct cfdata *cf, void *aux) 151 { 152 struct iobus_attach_args *aa = aux; 153 154 if (strcmp(cf->cf_name, aa->aa_name) != 0) 155 return 0; 156 if (cf->cf_unit != aa->aa_unitno) 157 return 0; 158 return 1; 159 } 160 161 static void 162 octrnm_attach(device_t parent, device_t self, void *aux) 163 { 164 struct octrnm_softc *sc = device_private(self); 165 struct iobus_attach_args *aa = aux; 166 uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b); 167 168 aprint_normal("\n"); 169 170 /* Map the device registers, all two of them. */ 171 sc->sc_bust = aa->aa_bust; 172 if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE, 173 0, &sc->sc_regh) != 0) { 174 aprint_error_dev(self, "unable to map device\n"); 175 return; 176 } 177 178 /* Verify that the built-in self-test succeeded. */ 179 bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh, 180 RNM_BIST_STATUS_OFFSET); 181 if (bist_status) { 182 aprint_error_dev(self, "RNG built in self test failed: %#lx\n", 183 bist_status); 184 return; 185 } 186 187 /* Create a mutex to serialize access to the FIFO. */ 188 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_VM); 189 190 /* 191 * Reset the core, enable the RNG engine without entropy, wait 192 * 81 cycles for it to produce a single sample, and draw the 193 * deterministic sample to test. 194 * 195 * XXX Verify that the output matches the SHA-1 computation 196 * described by the data sheet, not just a known answer. 197 */ 198 octrnm_reset(sc); 199 octrnm_conditioned_deterministic(sc); 200 octrnm_delay(RNG_DELAY_CLOCK*1); 201 sample = octrnm_load(sc); 202 if (sample != expected) 203 aprint_error_dev(self, "self-test: read %016"PRIx64"," 204 " expected %016"PRIx64, sample, expected); 205 206 /* 207 * Reset the core again to clear the FIFO, and enable the RNG 208 * engine with entropy exposed directly. Start from the first 209 * group of ring oscillators; as we gather samples we will 210 * rotate through the rest of them. 211 */ 212 octrnm_reset(sc); 213 sc->sc_rogroup = 0; 214 octrnm_raw_entropy(sc, sc->sc_rogroup); 215 octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS); 216 217 /* Attach the rndsource. */ 218 rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc); 219 rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG, 220 RND_FLAG_DEFAULT | RND_FLAG_HASCB); 221 } 222 223 static void 224 octrnm_rng(size_t nbytes, void *vsc) 225 { 226 const unsigned BPB = 256; /* bits of data per bit of entropy */ 227 struct octrnm_softc *sc = vsc; 228 uint64_t *samplepos; 229 size_t needed = NBBY*nbytes; 230 unsigned i; 231 232 /* Sample the ring oscillators round-robin. */ 233 mutex_enter(&sc->sc_lock); 234 while (needed) { 235 /* 236 * Switch to the next RO group once we drain the FIFO. 237 * By the time rnd_add_data is done, we will have 238 * processed all 512 bytes of the FIFO. We assume it 239 * takes at least one cycle per byte (realistically, 240 * more like ~80cpb to draw from the FIFO and then 241 * process it with rnd_add_data), so there is no need 242 * for any other delays. 243 */ 244 sc->sc_rogroup++; 245 sc->sc_rogroup %= NROGROUPS; 246 octrnm_raw_entropy(sc, sc->sc_rogroup); 247 248 /* 249 * Gather quarter the FIFO at a time -- we are limited 250 * to 128 bytes because of limits on the CVMSEG buffer. 251 */ 252 CTASSERT(sizeof sc->sc_sample == 512); 253 CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS); 254 for (samplepos = sc->sc_sample, i = 0; i < 4; i++) { 255 octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4); 256 samplepos += RNG_FIFO_WORDS / 4; 257 } 258 #ifdef OCTRNM_DEBUG 259 hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample); 260 #endif 261 rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample, 262 sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB); 263 needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB)); 264 265 /* Yield if requested. */ 266 if (__predict_false(curcpu()->ci_schedstate.spc_flags & 267 SPCF_SHOULDYIELD)) { 268 mutex_exit(&sc->sc_lock); 269 preempt(); 270 mutex_enter(&sc->sc_lock); 271 } 272 } 273 mutex_exit(&sc->sc_lock); 274 275 /* Zero the sample. */ 276 explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample); 277 } 278 279 /* 280 * octrnm_reset(sc) 281 * 282 * Reset the RNM unit, disabling it and clearing the FIFO. 283 */ 284 static void 285 octrnm_reset(struct octrnm_softc *sc) 286 { 287 288 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 289 RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST); 290 } 291 292 /* 293 * octrnm_conditioned_deterministic(sc) 294 * 295 * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with 296 * no entropy, for the next data loaded into the FIFO. 297 */ 298 static void 299 octrnm_conditioned_deterministic(struct octrnm_softc *sc) 300 { 301 302 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 303 RNM_CTL_STATUS_RNG_EN); 304 } 305 306 /* 307 * octrnm_conditioned_entropy(sc) 308 * 309 * Switch the RNM unit to generate ring oscillator samples 310 * conditioned with an LFSR/SHA-1, for the next data loaded into 311 * the FIFO. 312 */ 313 static void __unused 314 octrnm_conditioned_entropy(struct octrnm_softc *sc) 315 { 316 317 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 318 RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN); 319 } 320 321 /* 322 * octrnm_raw_entropy(sc, rogroup) 323 * 324 * Switch the RNM unit to generate raw ring oscillator samples 325 * from the specified group of eight ring oscillator. 326 */ 327 static void 328 octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup) 329 { 330 uint64_t ctl = 0; 331 332 ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */ 333 ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */ 334 ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */ 335 ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK); 336 337 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 338 ctl); 339 } 340 341 /* 342 * octrnm_load(sc) 343 * 344 * Load a single 64-bit word out of the FIFO. 345 */ 346 static uint64_t 347 octrnm_load(struct octrnm_softc *sc) 348 { 349 uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID); 350 351 return octeon_xkphys_read_8(addr); 352 } 353 354 /* 355 * octrnm_iobdma(sc, buf, nwords) 356 * 357 * Load nwords, at most 32, out of the FIFO into buf. 358 */ 359 static void 360 octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords) 361 { 362 /* ``scraddr'' part is index in 64-bit words, not address */ 363 size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm); 364 uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID, 365 scraddr / sizeof(uint64_t), nwords, 0); 366 367 KASSERT(nwords < 128); /* iobdma address restriction */ 368 KASSERT(nwords <= CVMSEG_LM_RNM_SIZE); /* size of CVMSEG LM buffer */ 369 370 octeon_iobdma_write_8(iobdma); 371 OCTEON_SYNCIOBDMA; 372 for (; nwords --> 0; scraddr += 8) 373 *buf++ = octeon_cvmseg_read_8(scraddr); 374 } 375 376 /* 377 * octrnm_delay(ncycles) 378 * 379 * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even 380 * if the cycle counter rolls over. 381 */ 382 static void 383 octrnm_delay(uint32_t ncycles) 384 { 385 uint32_t deadline = mips3_cp0_count_read() + ncycles; 386 387 KASSERT(ncycles <= UINT32_MAX/2); 388 389 while ((deadline - mips3_cp0_count_read()) < ncycles) 390 continue; 391 } 392