xref: /netbsd-src/sys/arch/mips/cavium/dev/octeon_rnm.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2007 Internet Initiative Japan, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Cavium Octeon Random Number Generator / Random Number Memory `RNM'
31  *
32  *	The RNM unit consists of:
33  *
34  *	1. 128 ring oscillators
35  *	2. an LFSR/SHA-1 conditioner
36  *	3. a 512-byte FIFO
37  *
38  *	When the unit is enabled, there are three modes of operation:
39  *
40  *	(a) deterministic: the ring oscillators are disabled and the
41  *	    LFSR/SHA-1 conditioner operates on fixed inputs to give
42  *	    reproducible results for testing,
43  *
44  *	(b) conditioned entropy: the ring oscillators are enabled and
45  *	    samples from them are fed through the LFSR/SHA-1
46  *	    conditioner before being put into the FIFO, and
47  *
48  *	(c) raw entropy: the ring oscillators are enabled, and a group
49  *	    of eight of them selected at any one time is sampled and
50  *	    fed into the FIFO.
51  *
52  *	Details:
53  *
54  *	- The FIFO is refilled whenever we read out of it, either with
55  *	  a load address or an IOBDMA operation.
56  *
57  *	- The conditioner takes 81 cycles to produce a 64-bit block of
58  *	  output in the FIFO whether in deterministic or conditioned
59  *	  entropy mode, each block consisting of the first 64 bits of a
60  *	  SHA-1 hash.
61  *
62  *	- A group of eight ring oscillators take 8 cycles to produce a
63  *	  64-bit block of output in the FIFO in raw entropy mode, each
64  *	  block consisting of eight consecutive samples from each RO in
65  *	  parallel.
66  *
67  *	The first sample of each RO always seems to be zero.  Further,
68  *	consecutive samples from a single ring oscillator are not
69  *	independent, so naive debiasing like a von Neumann extractor
70  *	falls flat on its face.  And parallel ring oscillators powered
71  *	by the same source may not be independent either, if they end
72  *	up locked.
73  *
74  *	We read out one FIFO's worth of raw samples from groups of 8
75  *	ring oscillators at a time, of 128 total, by going through them
76  *	round robin.  We take 32 consecutive samples from each ring
77  *	oscillator in a group of 8 in parallel before we count one bit
78  *	of entropy.  To get 256 bits of entropy, we read 4Kbit of data
79  *	from each of two 8-RO groups.
80  *
81  *	We could use the on-board LFSR/SHA-1 conditioner like the Linux
82  *	driver written by Cavium does, but it's not clear how many RO
83  *	samples go into the conditioner, and our entropy pool is a
84  *	perfectly good conditioner itself, so it seems there is little
85  *	advantage -- other than expedience -- to using the LFSR/SHA-1
86  *	conditioner.  All the manual says is that it samples 125 of the
87  *	128 ROs.  But the Cavium SHA-1 CPU instruction is advertised to
88  *	have a latency of 100 cycles, so it seems implausible that much
89  *	more than one sample from each RO could be squeezed in there.
90  *
91  *	The hardware exposes only 64 bits of each SHA-1 hash, and the
92  *	Linux driver uses 32 bits of that -- which, if treated as full
93  *	entropy, would mean an assessment of 3.9 bits of RO samples to
94  *	get 1 bit of entropy, whereas we take 256 bits of RO samples to
95  *	get one bit of entropy, so this seems reasonably conservative.
96  *
97  * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference
98  * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008.
99  */
100 
101 #include <sys/cdefs.h>
102 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $");
103 
104 #include <sys/param.h>
105 #include <sys/device.h>
106 #include <sys/kernel.h>
107 #include <sys/rndsource.h>
108 #include <sys/systm.h>
109 
110 #include <mips/locore.h>
111 #include <mips/cavium/octeonreg.h>
112 #include <mips/cavium/octeonvar.h>
113 #include <mips/cavium/include/iobusvar.h>
114 #include <mips/cavium/dev/octeon_rnmreg.h>
115 #include <mips/cavium/dev/octeon_corereg.h>
116 
117 #include <sys/bus.h>
118 
119 //#define	OCTRNM_DEBUG
120 
121 #define	ENT_DELAY_CLOCK 8	/* cycles for each 64-bit RO sample batch */
122 #define	RNG_DELAY_CLOCK 81	/* cycles for each SHA-1 output */
123 #define	NROGROUPS	16
124 #define	RNG_FIFO_WORDS	(512/sizeof(uint64_t))
125 
126 struct octrnm_softc {
127 	uint64_t		sc_sample[RNG_FIFO_WORDS];
128 	bus_space_tag_t		sc_bust;
129 	bus_space_handle_t	sc_regh;
130 	krndsource_t		sc_rndsrc;	/* /dev/random source */
131 	unsigned		sc_rogroup;
132 };
133 
134 static int octrnm_match(device_t, struct cfdata *, void *);
135 static void octrnm_attach(device_t, device_t, void *);
136 static void octrnm_rng(size_t, void *);
137 static void octrnm_reset(struct octrnm_softc *);
138 static void octrnm_conditioned_deterministic(struct octrnm_softc *);
139 static void octrnm_conditioned_entropy(struct octrnm_softc *);
140 static void octrnm_raw_entropy(struct octrnm_softc *, unsigned);
141 static uint64_t octrnm_load(struct octrnm_softc *);
142 static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned);
143 static void octrnm_delay(uint32_t);
144 
145 CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc),
146     octrnm_match, octrnm_attach, NULL, NULL);
147 
148 static int
149 octrnm_match(device_t parent, struct cfdata *cf, void *aux)
150 {
151 	struct iobus_attach_args *aa = aux;
152 
153 	if (strcmp(cf->cf_name, aa->aa_name) != 0)
154 		return 0;
155 	if (cf->cf_unit != aa->aa_unitno)
156 		return 0;
157 	return 1;
158 }
159 
160 static void
161 octrnm_attach(device_t parent, device_t self, void *aux)
162 {
163 	struct octrnm_softc *sc = device_private(self);
164 	struct iobus_attach_args *aa = aux;
165 	uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b);
166 
167 	aprint_normal("\n");
168 
169 	/* Map the device registers, all two of them.  */
170 	sc->sc_bust = aa->aa_bust;
171 	if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE,
172 	    0, &sc->sc_regh) != 0) {
173 		aprint_error_dev(self, "unable to map device\n");
174 		return;
175 	}
176 
177 	/* Verify that the built-in self-test succeeded.  */
178 	bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh,
179 	    RNM_BIST_STATUS_OFFSET);
180 	if (bist_status) {
181 		aprint_error_dev(self, "RNG built in self test failed: %#lx\n",
182 		    bist_status);
183 		return;
184 	}
185 
186 	/*
187 	 * Reset the core, enable the RNG engine without entropy, wait
188 	 * 81 cycles for it to produce a single sample, and draw the
189 	 * deterministic sample to test.
190 	 *
191 	 * XXX Verify that the output matches the SHA-1 computation
192 	 * described by the data sheet, not just a known answer.
193 	 */
194 	octrnm_reset(sc);
195 	octrnm_conditioned_deterministic(sc);
196 	octrnm_delay(RNG_DELAY_CLOCK*1);
197 	sample = octrnm_load(sc);
198 	if (sample != expected)
199 		aprint_error_dev(self, "self-test: read %016"PRIx64","
200 		    " expected %016"PRIx64, sample, expected);
201 
202 	/*
203 	 * Reset the core again to clear the FIFO, and enable the RNG
204 	 * engine with entropy exposed directly.  Start from the first
205 	 * group of ring oscillators; as we gather samples we will
206 	 * rotate through the rest of them.
207 	 */
208 	octrnm_reset(sc);
209 	sc->sc_rogroup = 0;
210 	octrnm_raw_entropy(sc, sc->sc_rogroup);
211 	octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS);
212 
213 	/* Attach the rndsource.  */
214 	rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc);
215 	rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG,
216 	    RND_FLAG_DEFAULT | RND_FLAG_HASCB);
217 }
218 
219 static void
220 octrnm_rng(size_t nbytes, void *vsc)
221 {
222 	const unsigned BPB = 256; /* bits of data per bit of entropy */
223 	struct octrnm_softc *sc = vsc;
224 	uint64_t *samplepos;
225 	size_t needed = NBBY*nbytes;
226 	unsigned i;
227 
228 	/* Sample the ring oscillators round-robin.  */
229 	while (needed) {
230 		/*
231 		 * Switch to the next RO group once we drain the FIFO.
232 		 * By the time rnd_add_data is done, we will have
233 		 * processed all 512 bytes of the FIFO.  We assume it
234 		 * takes at least one cycle per byte (realistically,
235 		 * more like ~80cpb to draw from the FIFO and then
236 		 * process it with rnd_add_data), so there is no need
237 		 * for any other delays.
238 		 */
239 		sc->sc_rogroup++;
240 		sc->sc_rogroup %= NROGROUPS;
241 		octrnm_raw_entropy(sc, sc->sc_rogroup);
242 
243 		/*
244 		 * Gather quarter the FIFO at a time -- we are limited
245 		 * to 128 bytes because of limits on the CVMSEG buffer.
246 		 */
247 		CTASSERT(sizeof sc->sc_sample == 512);
248 		CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS);
249 		for (samplepos = sc->sc_sample, i = 0; i < 4; i++) {
250 			octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4);
251 			samplepos += RNG_FIFO_WORDS / 4;
252 		}
253 #ifdef OCTRNM_DEBUG
254 		hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample);
255 #endif
256 		rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample,
257 		    sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB);
258 		needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB));
259 
260 		/* Now's a good time to yield.  */
261 		preempt_point();
262 	}
263 
264 	/* Zero the sample.  */
265 	explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample);
266 }
267 
268 /*
269  * octrnm_reset(sc)
270  *
271  *	Reset the RNM unit, disabling it and clearing the FIFO.
272  */
273 static void
274 octrnm_reset(struct octrnm_softc *sc)
275 {
276 
277 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
278 	    RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST);
279 }
280 
281 /*
282  * octrnm_conditioned_deterministic(sc)
283  *
284  *	Switch the RNM unit into the deterministic LFSR/SHA-1 mode with
285  *	no entropy, for the next data loaded into the FIFO.
286  */
287 static void
288 octrnm_conditioned_deterministic(struct octrnm_softc *sc)
289 {
290 
291 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
292 	    RNM_CTL_STATUS_RNG_EN);
293 }
294 
295 /*
296  * octrnm_conditioned_entropy(sc)
297  *
298  *	Switch the RNM unit to generate ring oscillator samples
299  *	conditioned with an LFSR/SHA-1, for the next data loaded into
300  *	the FIFO.
301  */
302 static void __unused
303 octrnm_conditioned_entropy(struct octrnm_softc *sc)
304 {
305 
306 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
307 	    RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN);
308 }
309 
310 /*
311  * octrnm_raw_entropy(sc, rogroup)
312  *
313  *	Switch the RNM unit to generate raw ring oscillator samples
314  *	from the specified group of eight ring oscillator.
315  */
316 static void
317 octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup)
318 {
319 	uint64_t ctl = 0;
320 
321 	ctl |= RNM_CTL_STATUS_RNG_EN;	/* enable FIFO */
322 	ctl |= RNM_CTL_STATUS_ENT_EN;	/* enable entropy source */
323 	ctl |= RNM_CTL_STATUS_EXP_ENT;	/* expose entropy without LFSR/SHA-1 */
324 	ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK);
325 
326 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
327 	    ctl);
328 }
329 
330 /*
331  * octrnm_load(sc)
332  *
333  *	Load a single 64-bit word out of the FIFO.
334  */
335 static uint64_t
336 octrnm_load(struct octrnm_softc *sc)
337 {
338 	uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID);
339 
340 	return octeon_xkphys_read_8(addr);
341 }
342 
343 /*
344  * octrnm_iobdma(sc, buf, nwords)
345  *
346  *	Load nwords, at most 32, out of the FIFO into buf.
347  */
348 static void
349 octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords)
350 {
351  	/* ``scraddr'' part is index in 64-bit words, not address */
352 	size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm);
353 	uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID,
354 	    scraddr / sizeof(uint64_t), nwords, 0);
355 
356 	KASSERT(nwords < 128);			/* iobdma address restriction */
357 	KASSERT(nwords <= CVMSEG_LM_RNM_SIZE);	/* size of CVMSEG LM buffer */
358 
359 	octeon_iobdma_write_8(iobdma);
360 	OCTEON_SYNCIOBDMA;
361 	for (; nwords --> 0; scraddr += 8)
362 		*buf++ = octeon_cvmseg_read_8(scraddr);
363 }
364 
365 /*
366  * octrnm_delay(ncycles)
367  *
368  *	Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even
369  *	if the cycle counter rolls over.
370  */
371 static void
372 octrnm_delay(uint32_t ncycles)
373 {
374 	uint32_t deadline = mips3_cp0_count_read() + ncycles;
375 
376 	KASSERT(ncycles <= UINT32_MAX/2);
377 
378 	while ((deadline - mips3_cp0_count_read()) < ncycles)
379 		continue;
380 }
381