1 /* $NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2007 Internet Initiative Japan, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Cavium Octeon Random Number Generator / Random Number Memory `RNM' 31 * 32 * The RNM unit consists of: 33 * 34 * 1. 128 ring oscillators 35 * 2. an LFSR/SHA-1 conditioner 36 * 3. a 512-byte FIFO 37 * 38 * When the unit is enabled, there are three modes of operation: 39 * 40 * (a) deterministic: the ring oscillators are disabled and the 41 * LFSR/SHA-1 conditioner operates on fixed inputs to give 42 * reproducible results for testing, 43 * 44 * (b) conditioned entropy: the ring oscillators are enabled and 45 * samples from them are fed through the LFSR/SHA-1 46 * conditioner before being put into the FIFO, and 47 * 48 * (c) raw entropy: the ring oscillators are enabled, and a group 49 * of eight of them selected at any one time is sampled and 50 * fed into the FIFO. 51 * 52 * Details: 53 * 54 * - The FIFO is refilled whenever we read out of it, either with 55 * a load address or an IOBDMA operation. 56 * 57 * - The conditioner takes 81 cycles to produce a 64-bit block of 58 * output in the FIFO whether in deterministic or conditioned 59 * entropy mode, each block consisting of the first 64 bits of a 60 * SHA-1 hash. 61 * 62 * - A group of eight ring oscillators take 8 cycles to produce a 63 * 64-bit block of output in the FIFO in raw entropy mode, each 64 * block consisting of eight consecutive samples from each RO in 65 * parallel. 66 * 67 * The first sample of each RO always seems to be zero. Further, 68 * consecutive samples from a single ring oscillator are not 69 * independent, so naive debiasing like a von Neumann extractor 70 * falls flat on its face. And parallel ring oscillators powered 71 * by the same source may not be independent either, if they end 72 * up locked. 73 * 74 * We read out one FIFO's worth of raw samples from groups of 8 75 * ring oscillators at a time, of 128 total, by going through them 76 * round robin. We take 32 consecutive samples from each ring 77 * oscillator in a group of 8 in parallel before we count one bit 78 * of entropy. To get 256 bits of entropy, we read 4Kbit of data 79 * from each of two 8-RO groups. 80 * 81 * We could use the on-board LFSR/SHA-1 conditioner like the Linux 82 * driver written by Cavium does, but it's not clear how many RO 83 * samples go into the conditioner, and our entropy pool is a 84 * perfectly good conditioner itself, so it seems there is little 85 * advantage -- other than expedience -- to using the LFSR/SHA-1 86 * conditioner. All the manual says is that it samples 125 of the 87 * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to 88 * have a latency of 100 cycles, so it seems implausible that much 89 * more than one sample from each RO could be squeezed in there. 90 * 91 * The hardware exposes only 64 bits of each SHA-1 hash, and the 92 * Linux driver uses 32 bits of that -- which, if treated as full 93 * entropy, would mean an assessment of 3.9 bits of RO samples to 94 * get 1 bit of entropy, whereas we take 256 bits of RO samples to 95 * get one bit of entropy, so this seems reasonably conservative. 96 * 97 * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference 98 * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $"); 103 104 #include <sys/param.h> 105 #include <sys/device.h> 106 #include <sys/kernel.h> 107 #include <sys/rndsource.h> 108 #include <sys/systm.h> 109 110 #include <mips/locore.h> 111 #include <mips/cavium/octeonreg.h> 112 #include <mips/cavium/octeonvar.h> 113 #include <mips/cavium/include/iobusvar.h> 114 #include <mips/cavium/dev/octeon_rnmreg.h> 115 #include <mips/cavium/dev/octeon_corereg.h> 116 117 #include <sys/bus.h> 118 119 //#define OCTRNM_DEBUG 120 121 #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */ 122 #define RNG_DELAY_CLOCK 81 /* cycles for each SHA-1 output */ 123 #define NROGROUPS 16 124 #define RNG_FIFO_WORDS (512/sizeof(uint64_t)) 125 126 struct octrnm_softc { 127 uint64_t sc_sample[RNG_FIFO_WORDS]; 128 bus_space_tag_t sc_bust; 129 bus_space_handle_t sc_regh; 130 krndsource_t sc_rndsrc; /* /dev/random source */ 131 unsigned sc_rogroup; 132 }; 133 134 static int octrnm_match(device_t, struct cfdata *, void *); 135 static void octrnm_attach(device_t, device_t, void *); 136 static void octrnm_rng(size_t, void *); 137 static void octrnm_reset(struct octrnm_softc *); 138 static void octrnm_conditioned_deterministic(struct octrnm_softc *); 139 static void octrnm_conditioned_entropy(struct octrnm_softc *); 140 static void octrnm_raw_entropy(struct octrnm_softc *, unsigned); 141 static uint64_t octrnm_load(struct octrnm_softc *); 142 static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned); 143 static void octrnm_delay(uint32_t); 144 145 CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc), 146 octrnm_match, octrnm_attach, NULL, NULL); 147 148 static int 149 octrnm_match(device_t parent, struct cfdata *cf, void *aux) 150 { 151 struct iobus_attach_args *aa = aux; 152 153 if (strcmp(cf->cf_name, aa->aa_name) != 0) 154 return 0; 155 if (cf->cf_unit != aa->aa_unitno) 156 return 0; 157 return 1; 158 } 159 160 static void 161 octrnm_attach(device_t parent, device_t self, void *aux) 162 { 163 struct octrnm_softc *sc = device_private(self); 164 struct iobus_attach_args *aa = aux; 165 uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b); 166 167 aprint_normal("\n"); 168 169 /* Map the device registers, all two of them. */ 170 sc->sc_bust = aa->aa_bust; 171 if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE, 172 0, &sc->sc_regh) != 0) { 173 aprint_error_dev(self, "unable to map device\n"); 174 return; 175 } 176 177 /* Verify that the built-in self-test succeeded. */ 178 bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh, 179 RNM_BIST_STATUS_OFFSET); 180 if (bist_status) { 181 aprint_error_dev(self, "RNG built in self test failed: %#lx\n", 182 bist_status); 183 return; 184 } 185 186 /* 187 * Reset the core, enable the RNG engine without entropy, wait 188 * 81 cycles for it to produce a single sample, and draw the 189 * deterministic sample to test. 190 * 191 * XXX Verify that the output matches the SHA-1 computation 192 * described by the data sheet, not just a known answer. 193 */ 194 octrnm_reset(sc); 195 octrnm_conditioned_deterministic(sc); 196 octrnm_delay(RNG_DELAY_CLOCK*1); 197 sample = octrnm_load(sc); 198 if (sample != expected) 199 aprint_error_dev(self, "self-test: read %016"PRIx64"," 200 " expected %016"PRIx64, sample, expected); 201 202 /* 203 * Reset the core again to clear the FIFO, and enable the RNG 204 * engine with entropy exposed directly. Start from the first 205 * group of ring oscillators; as we gather samples we will 206 * rotate through the rest of them. 207 */ 208 octrnm_reset(sc); 209 sc->sc_rogroup = 0; 210 octrnm_raw_entropy(sc, sc->sc_rogroup); 211 octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS); 212 213 /* Attach the rndsource. */ 214 rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc); 215 rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG, 216 RND_FLAG_DEFAULT | RND_FLAG_HASCB); 217 } 218 219 static void 220 octrnm_rng(size_t nbytes, void *vsc) 221 { 222 const unsigned BPB = 256; /* bits of data per bit of entropy */ 223 struct octrnm_softc *sc = vsc; 224 uint64_t *samplepos; 225 size_t needed = NBBY*nbytes; 226 unsigned i; 227 228 /* Sample the ring oscillators round-robin. */ 229 while (needed) { 230 /* 231 * Switch to the next RO group once we drain the FIFO. 232 * By the time rnd_add_data is done, we will have 233 * processed all 512 bytes of the FIFO. We assume it 234 * takes at least one cycle per byte (realistically, 235 * more like ~80cpb to draw from the FIFO and then 236 * process it with rnd_add_data), so there is no need 237 * for any other delays. 238 */ 239 sc->sc_rogroup++; 240 sc->sc_rogroup %= NROGROUPS; 241 octrnm_raw_entropy(sc, sc->sc_rogroup); 242 243 /* 244 * Gather quarter the FIFO at a time -- we are limited 245 * to 128 bytes because of limits on the CVMSEG buffer. 246 */ 247 CTASSERT(sizeof sc->sc_sample == 512); 248 CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS); 249 for (samplepos = sc->sc_sample, i = 0; i < 4; i++) { 250 octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4); 251 samplepos += RNG_FIFO_WORDS / 4; 252 } 253 #ifdef OCTRNM_DEBUG 254 hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample); 255 #endif 256 rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample, 257 sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB); 258 needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB)); 259 260 /* Now's a good time to yield. */ 261 preempt_point(); 262 } 263 264 /* Zero the sample. */ 265 explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample); 266 } 267 268 /* 269 * octrnm_reset(sc) 270 * 271 * Reset the RNM unit, disabling it and clearing the FIFO. 272 */ 273 static void 274 octrnm_reset(struct octrnm_softc *sc) 275 { 276 277 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 278 RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST); 279 } 280 281 /* 282 * octrnm_conditioned_deterministic(sc) 283 * 284 * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with 285 * no entropy, for the next data loaded into the FIFO. 286 */ 287 static void 288 octrnm_conditioned_deterministic(struct octrnm_softc *sc) 289 { 290 291 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 292 RNM_CTL_STATUS_RNG_EN); 293 } 294 295 /* 296 * octrnm_conditioned_entropy(sc) 297 * 298 * Switch the RNM unit to generate ring oscillator samples 299 * conditioned with an LFSR/SHA-1, for the next data loaded into 300 * the FIFO. 301 */ 302 static void __unused 303 octrnm_conditioned_entropy(struct octrnm_softc *sc) 304 { 305 306 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 307 RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN); 308 } 309 310 /* 311 * octrnm_raw_entropy(sc, rogroup) 312 * 313 * Switch the RNM unit to generate raw ring oscillator samples 314 * from the specified group of eight ring oscillator. 315 */ 316 static void 317 octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup) 318 { 319 uint64_t ctl = 0; 320 321 ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */ 322 ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */ 323 ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */ 324 ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK); 325 326 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 327 ctl); 328 } 329 330 /* 331 * octrnm_load(sc) 332 * 333 * Load a single 64-bit word out of the FIFO. 334 */ 335 static uint64_t 336 octrnm_load(struct octrnm_softc *sc) 337 { 338 uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID); 339 340 return octeon_xkphys_read_8(addr); 341 } 342 343 /* 344 * octrnm_iobdma(sc, buf, nwords) 345 * 346 * Load nwords, at most 32, out of the FIFO into buf. 347 */ 348 static void 349 octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords) 350 { 351 /* ``scraddr'' part is index in 64-bit words, not address */ 352 size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm); 353 uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID, 354 scraddr / sizeof(uint64_t), nwords, 0); 355 356 KASSERT(nwords < 128); /* iobdma address restriction */ 357 KASSERT(nwords <= CVMSEG_LM_RNM_SIZE); /* size of CVMSEG LM buffer */ 358 359 octeon_iobdma_write_8(iobdma); 360 OCTEON_SYNCIOBDMA; 361 for (; nwords --> 0; scraddr += 8) 362 *buf++ = octeon_cvmseg_read_8(scraddr); 363 } 364 365 /* 366 * octrnm_delay(ncycles) 367 * 368 * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even 369 * if the cycle counter rolls over. 370 */ 371 static void 372 octrnm_delay(uint32_t ncycles) 373 { 374 uint32_t deadline = mips3_cp0_count_read() + ncycles; 375 376 KASSERT(ncycles <= UINT32_MAX/2); 377 378 while ((deadline - mips3_cp0_count_read()) < ncycles) 379 continue; 380 } 381