xref: /netbsd-src/sys/dev/pci/ubsec.c (revision 56a34939419542e88b386b2229be7565f4f45461)
1 /*	$NetBSD: ubsec.c,v 1.17 2008/12/19 18:49:38 cegger Exp $	*/
2 /* $FreeBSD: src/sys/dev/ubsec/ubsec.c,v 1.6.2.6 2003/01/23 21:06:43 sam Exp $ */
3 /*	$OpenBSD: ubsec.c,v 1.127 2003/06/04 14:04:58 jason Exp $	*/
4 
5 /*
6  * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
7  * Copyright (c) 2000 Theo de Raadt (deraadt@openbsd.org)
8  * Copyright (c) 2001 Patrik Lindergren (patrik@ipunplugged.com)
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Effort sponsored in part by the Defense Advanced Research Projects
32  * Agency (DARPA) and Air Force Research Laboratory, Air Force
33  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
34  *
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: ubsec.c,v 1.17 2008/12/19 18:49:38 cegger Exp $");
39 
40 #undef UBSEC_DEBUG
41 
42 /*
43  * uBsec 5[56]01, bcm580xx, bcm582x hardware crypto accelerator
44  */
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/endian.h>
50 #ifdef __NetBSD__
51   #define letoh16 htole16
52   #define letoh32 htole32
53 #define UBSEC_NO_RNG		/* until statistically tested */
54 #endif
55 #include <sys/errno.h>
56 #include <sys/malloc.h>
57 #include <sys/kernel.h>
58 #include <sys/mbuf.h>
59 #include <sys/device.h>
60 #include <sys/queue.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <opencrypto/cryptodev.h>
65 #include <opencrypto/xform.h>
66 #ifdef __OpenBSD__
67  #include <dev/rndvar.h>
68  #include <sys/md5k.h>
69 #else
70  #include <sys/rnd.h>
71  #include <sys/md5.h>
72 #endif
73 #include <sys/sha1.h>
74 
75 #include <dev/pci/pcireg.h>
76 #include <dev/pci/pcivar.h>
77 #include <dev/pci/pcidevs.h>
78 
79 #include <dev/pci/ubsecreg.h>
80 #include <dev/pci/ubsecvar.h>
81 
82 /*
83  * Prototypes and count for the pci_device structure
84  */
85 static	int ubsec_probe(struct device *, struct cfdata *, void *);
86 static	void ubsec_attach(struct device *, struct device *, void *);
87 static	void ubsec_reset_board(struct ubsec_softc *);
88 static	void ubsec_init_board(struct ubsec_softc *);
89 static	void ubsec_init_pciregs(struct pci_attach_args *pa);
90 static	void ubsec_cleanchip(struct ubsec_softc *);
91 static	void ubsec_totalreset(struct ubsec_softc *);
92 static	int  ubsec_free_q(struct ubsec_softc*, struct ubsec_q *);
93 
94 #ifdef __OpenBSD__
95 struct cfattach ubsec_ca = {
96 	sizeof(struct ubsec_softc), ubsec_probe, ubsec_attach,
97 };
98 
99 struct cfdriver ubsec_cd = {
100 	0, "ubsec", DV_DULL
101 };
102 #else
103 CFATTACH_DECL(ubsec, sizeof(struct ubsec_softc), ubsec_probe, ubsec_attach,
104 	      NULL, NULL);
105 extern struct cfdriver ubsec_cd;
106 #endif
107 
108 /* patchable */
109 #ifdef	UBSEC_DEBUG
110 extern int ubsec_debug;
111 int ubsec_debug=1;
112 #endif
113 
114 static	int	ubsec_intr(void *);
115 static	int	ubsec_newsession(void*, u_int32_t *, struct cryptoini *);
116 static	int	ubsec_freesession(void*, u_int64_t);
117 static	int	ubsec_process(void*, struct cryptop *, int hint);
118 static	void	ubsec_callback(struct ubsec_softc *, struct ubsec_q *);
119 static	void	ubsec_feed(struct ubsec_softc *);
120 static	void	ubsec_mcopy(struct mbuf *, struct mbuf *, int, int);
121 static	void	ubsec_callback2(struct ubsec_softc *, struct ubsec_q2 *);
122 static	void	ubsec_feed2(struct ubsec_softc *);
123 #ifndef UBSEC_NO_RNG
124 static	void	ubsec_rng(void *);
125 #endif /* UBSEC_NO_RNG */
126 static	int 	ubsec_dma_malloc(struct ubsec_softc *, bus_size_t,
127 				 struct ubsec_dma_alloc *, int);
128 static	void	ubsec_dma_free(struct ubsec_softc *, struct ubsec_dma_alloc *);
129 static	int	ubsec_dmamap_aligned(bus_dmamap_t);
130 
131 static	int	ubsec_kprocess(void*, struct cryptkop *, int);
132 static	int	ubsec_kprocess_modexp_sw(struct ubsec_softc *,
133 					 struct cryptkop *, int);
134 static	int	ubsec_kprocess_modexp_hw(struct ubsec_softc *,
135 					 struct cryptkop *, int);
136 static	int	ubsec_kprocess_rsapriv(struct ubsec_softc *,
137 				       struct cryptkop *, int);
138 static	void	ubsec_kfree(struct ubsec_softc *, struct ubsec_q2 *);
139 static	int	ubsec_ksigbits(struct crparam *);
140 static	void	ubsec_kshift_r(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
141 static	void	ubsec_kshift_l(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
142 
143 #ifdef UBSEC_DEBUG
144 static void	ubsec_dump_pb(volatile struct ubsec_pktbuf *);
145 static void	ubsec_dump_mcr(struct ubsec_mcr *);
146 static	void	ubsec_dump_ctx2(volatile struct ubsec_ctx_keyop *);
147 #endif
148 
149 #define	READ_REG(sc,r) \
150 	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
151 
152 #define WRITE_REG(sc,reg,val) \
153 	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
154 
155 #define	SWAP32(x) (x) = htole32(ntohl((x)))
156 #ifndef HTOLE32
157  #define	HTOLE32(x) (x) = htole32(x)
158 #endif
159 
160 struct ubsec_stats ubsecstats;
161 
162 /*
163  * ubsec_maxbatch controls the number of crypto ops to voluntarily
164  * collect into one submission to the hardware.  This batching happens
165  * when ops are dispatched from the crypto subsystem with a hint that
166  * more are to follow immediately.  These ops must also not be marked
167  * with a ``no delay'' flag.
168  */
169 static	int ubsec_maxbatch = 1;
170 #ifdef SYSCTL_INT
171 SYSCTL_INT(_kern, OID_AUTO, ubsec_maxbatch, CTLFLAG_RW, &ubsec_maxbatch,
172 	    0, "Broadcom driver: max ops to batch w/o interrupt");
173 #endif
174 
175 /*
176  * ubsec_maxaggr controls the number of crypto ops to submit to the
177  * hardware as a unit.  This aggregation reduces the number of interrupts
178  * to the host at the expense of increased latency (for all but the last
179  * operation).  For network traffic setting this to one yields the highest
180  * performance but at the expense of more interrupt processing.
181  */
182 static	int ubsec_maxaggr = 1;
183 #ifdef SYSCTL_INT
184 SYSCTL_INT(_kern, OID_AUTO, ubsec_maxaggr, CTLFLAG_RW, &ubsec_maxaggr,
185 	    0, "Broadcom driver: max ops to aggregate under one interrupt");
186 #endif
187 
188 static const struct ubsec_product {
189 	pci_vendor_id_t		ubsec_vendor;
190 	pci_product_id_t	ubsec_product;
191 	int			ubsec_flags;
192 	int			ubsec_statmask;
193 	const char		*ubsec_name;
194 } ubsec_products[] = {
195 	{ PCI_VENDOR_BLUESTEEL,	PCI_PRODUCT_BLUESTEEL_5501,
196 	  0,
197 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
198 	  "Bluesteel 5501"
199 	},
200 	{ PCI_VENDOR_BLUESTEEL,	PCI_PRODUCT_BLUESTEEL_5601,
201 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
202 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
203 	  "Bluesteel 5601"
204 	},
205 
206 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5801,
207 	  0,
208 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
209 	  "Broadcom BCM5801"
210 	},
211 
212 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5802,
213 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
214 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
215 	  "Broadcom BCM5802"
216 	},
217 
218 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5805,
219 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG,
220 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
221 	  "Broadcom BCM5805"
222 	},
223 
224 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5820,
225 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
226 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
227 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR,
228 	  "Broadcom BCM5820"
229 	},
230 
231 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5821,
232 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
233 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
234 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
235 	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
236 	  "Broadcom BCM5821"
237 	},
238 	{ PCI_VENDOR_SUN,	PCI_PRODUCT_SUN_SCA1K,
239 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
240 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
241 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
242 	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
243 	  "Sun Crypto Accelerator 1000"
244 	},
245 	{ PCI_VENDOR_SUN,	PCI_PRODUCT_SUN_5821,
246 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
247 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
248 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
249 	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
250 	  "Broadcom BCM5821 (Sun)"
251 	},
252 
253 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5822,
254 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
255 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
256 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
257 	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
258 	  "Broadcom BCM5822"
259 	},
260 
261 	{ PCI_VENDOR_BROADCOM,	PCI_PRODUCT_BROADCOM_5823,
262 	  UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX |
263 	      UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY,
264 	  BS_STAT_MCR1_DONE | BS_STAT_DMAERR |
265 	      BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY,
266 	  "Broadcom BCM5823"
267 	},
268 
269 	{ 0,			0,
270 	  0,
271 	  0,
272 	  NULL
273 	}
274 };
275 
276 static const struct ubsec_product *
277 ubsec_lookup(const struct pci_attach_args *pa)
278 {
279 	const struct ubsec_product *up;
280 
281 	for (up = ubsec_products; up->ubsec_name != NULL; up++) {
282 		if (PCI_VENDOR(pa->pa_id) == up->ubsec_vendor &&
283 		    PCI_PRODUCT(pa->pa_id) == up->ubsec_product)
284 			return (up);
285 	}
286 	return (NULL);
287 }
288 
289 static int
290 ubsec_probe(struct device *parent, struct cfdata *match,
291     void *aux)
292 {
293 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
294 
295 	if (ubsec_lookup(pa) != NULL)
296 		return (1);
297 
298 	return (0);
299 }
300 
301 static void
302 ubsec_attach(struct device *parent, struct device *self, void *aux)
303 {
304 	struct ubsec_softc *sc = (struct ubsec_softc *)self;
305 	struct pci_attach_args *pa = aux;
306 	const struct ubsec_product *up;
307 	pci_chipset_tag_t pc = pa->pa_pc;
308 	pci_intr_handle_t ih;
309 	const char *intrstr = NULL;
310 	struct ubsec_dma *dmap;
311 	u_int32_t cmd, i;
312 
313 	up = ubsec_lookup(pa);
314 	if (up == NULL) {
315 		printf("\n");
316 		panic("ubsec_attach: impossible");
317 	}
318 
319 	aprint_naive(": Crypto processor\n");
320 	aprint_normal(": %s, rev. %d\n", up->ubsec_name,
321 	    PCI_REVISION(pa->pa_class));
322 
323 	SIMPLEQ_INIT(&sc->sc_queue);
324 	SIMPLEQ_INIT(&sc->sc_qchip);
325 	SIMPLEQ_INIT(&sc->sc_queue2);
326 	SIMPLEQ_INIT(&sc->sc_qchip2);
327 	SIMPLEQ_INIT(&sc->sc_q2free);
328 
329 	sc->sc_flags = up->ubsec_flags;
330 	sc->sc_statmask = up->ubsec_statmask;
331 
332 	cmd = pci_conf_read(pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
333 	cmd |= PCI_COMMAND_MASTER_ENABLE;
334 	pci_conf_write(pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, cmd);
335 
336 	if (pci_mapreg_map(pa, BS_BAR, PCI_MAPREG_TYPE_MEM, 0,
337 	    &sc->sc_st, &sc->sc_sh, NULL, NULL)) {
338 		aprint_error_dev(&sc->sc_dv, "can't find mem space");
339 		return;
340 	}
341 
342 	sc->sc_dmat = pa->pa_dmat;
343 
344 	if (pci_intr_map(pa, &ih)) {
345 		aprint_error_dev(&sc->sc_dv, "couldn't map interrupt\n");
346 		return;
347 	}
348 	intrstr = pci_intr_string(pc, ih);
349 	sc->sc_ih = pci_intr_establish(pc, ih, IPL_NET, ubsec_intr, sc);
350 	if (sc->sc_ih == NULL) {
351 		aprint_error_dev(&sc->sc_dv, "couldn't establish interrupt");
352 		if (intrstr != NULL)
353 			aprint_normal(" at %s", intrstr);
354 		aprint_normal("\n");
355 		return;
356 	}
357 	aprint_normal_dev(&sc->sc_dv, "interrupting at %s\n", intrstr);
358 
359 	sc->sc_cid = crypto_get_driverid(0);
360 	if (sc->sc_cid < 0) {
361 		aprint_error_dev(&sc->sc_dv, "couldn't get crypto driver id\n");
362 		pci_intr_disestablish(pc, sc->sc_ih);
363 		return;
364 	}
365 
366 	SIMPLEQ_INIT(&sc->sc_freequeue);
367 	dmap = sc->sc_dmaa;
368 	for (i = 0; i < UBS_MAX_NQUEUE; i++, dmap++) {
369 		struct ubsec_q *q;
370 
371 		q = (struct ubsec_q *)malloc(sizeof(struct ubsec_q),
372 		    M_DEVBUF, M_NOWAIT);
373 		if (q == NULL) {
374 			aprint_error_dev(&sc->sc_dv, "can't allocate queue buffers\n");
375 			break;
376 		}
377 
378 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_dmachunk),
379 		    &dmap->d_alloc, 0)) {
380 			aprint_error_dev(&sc->sc_dv, "can't allocate dma buffers\n");
381 			free(q, M_DEVBUF);
382 			break;
383 		}
384 		dmap->d_dma = (struct ubsec_dmachunk *)dmap->d_alloc.dma_vaddr;
385 
386 		q->q_dma = dmap;
387 		sc->sc_queuea[i] = q;
388 
389 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
390 	}
391 
392 	crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0,
393 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
394 	crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0,
395 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
396 	crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC_96, 0, 0,
397 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
398 	crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC_96, 0, 0,
399 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
400 
401 	/*
402 	 * Reset Broadcom chip
403 	 */
404 	ubsec_reset_board(sc);
405 
406 	/*
407 	 * Init Broadcom specific PCI settings
408 	 */
409 	ubsec_init_pciregs(pa);
410 
411 	/*
412 	 * Init Broadcom chip
413 	 */
414 	ubsec_init_board(sc);
415 
416 #ifndef UBSEC_NO_RNG
417 	if (sc->sc_flags & UBS_FLAGS_RNG) {
418 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
419 
420 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
421 		    &sc->sc_rng.rng_q.q_mcr, 0))
422 			goto skip_rng;
423 
424 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rngbypass),
425 		    &sc->sc_rng.rng_q.q_ctx, 0)) {
426 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
427 			goto skip_rng;
428 		}
429 
430 		if (ubsec_dma_malloc(sc, sizeof(u_int32_t) *
431 		    UBSEC_RNG_BUFSIZ, &sc->sc_rng.rng_buf, 0)) {
432 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
433 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
434 			goto skip_rng;
435 		}
436 
437 		if (hz >= 100)
438 			sc->sc_rnghz = hz / 100;
439 		else
440 			sc->sc_rnghz = 1;
441 #ifdef __OpenBSD__
442 		timeout_set(&sc->sc_rngto, ubsec_rng, sc);
443 		timeout_add(&sc->sc_rngto, sc->sc_rnghz);
444 #else
445 		callout_init(&sc->sc_rngto, 0);
446 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
447 #endif
448  skip_rng:
449 		if (sc->sc_rnghz)
450 			aprint_normal_dev(&sc->sc_dv, "random number generator enabled\n");
451 		else
452 			aprint_error_dev(&sc->sc_dv, "WARNING: random number generator "
453 			    "disabled\n");
454 	}
455 #endif /* UBSEC_NO_RNG */
456 
457 	if (sc->sc_flags & UBS_FLAGS_KEY) {
458 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
459 
460 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0,
461 				 ubsec_kprocess, sc);
462 #if 0
463 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0,
464 				 ubsec_kprocess, sc);
465 #endif
466 	}
467 }
468 
469 /*
470  * UBSEC Interrupt routine
471  */
472 static int
473 ubsec_intr(void *arg)
474 {
475 	struct ubsec_softc *sc = arg;
476 	volatile u_int32_t stat;
477 	struct ubsec_q *q;
478 	struct ubsec_dma *dmap;
479 	int npkts = 0, i;
480 
481 	stat = READ_REG(sc, BS_STAT);
482 	stat &= sc->sc_statmask;
483 	if (stat == 0) {
484 		return (0);
485 	}
486 
487 	WRITE_REG(sc, BS_STAT, stat);		/* IACK */
488 
489 	/*
490 	 * Check to see if we have any packets waiting for us
491 	 */
492 	if ((stat & BS_STAT_MCR1_DONE)) {
493 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
494 			q = SIMPLEQ_FIRST(&sc->sc_qchip);
495 			dmap = q->q_dma;
496 
497 			if ((dmap->d_dma->d_mcr.mcr_flags & htole16(UBS_MCR_DONE)) == 0)
498 				break;
499 
500 			q = SIMPLEQ_FIRST(&sc->sc_qchip);
501 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, /*q,*/ q_next);
502 
503 			npkts = q->q_nstacked_mcrs;
504 			sc->sc_nqchip -= 1+npkts;
505 			/*
506 			 * search for further sc_qchip ubsec_q's that share
507 			 * the same MCR, and complete them too, they must be
508 			 * at the top.
509 			 */
510 			for (i = 0; i < npkts; i++) {
511 				if(q->q_stacked_mcr[i])
512 					ubsec_callback(sc, q->q_stacked_mcr[i]);
513 				else
514 					break;
515 			}
516 			ubsec_callback(sc, q);
517 		}
518 
519 		/*
520 		 * Don't send any more packet to chip if there has been
521 		 * a DMAERR.
522 		 */
523 		if (!(stat & BS_STAT_DMAERR))
524 			ubsec_feed(sc);
525 	}
526 
527 	/*
528 	 * Check to see if we have any key setups/rng's waiting for us
529 	 */
530 	if ((sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) &&
531 	    (stat & BS_STAT_MCR2_DONE)) {
532 		struct ubsec_q2 *q2;
533 		struct ubsec_mcr *mcr;
534 
535 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip2)) {
536 			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
537 
538 			bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map,
539 			    0, q2->q_mcr.dma_map->dm_mapsize,
540 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
541 
542 			mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr;
543 			if ((mcr->mcr_flags & htole16(UBS_MCR_DONE)) == 0) {
544 				bus_dmamap_sync(sc->sc_dmat,
545 				    q2->q_mcr.dma_map, 0,
546 				    q2->q_mcr.dma_map->dm_mapsize,
547 				    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
548 				break;
549 			}
550 			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
551 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip2, /*q2,*/ q_next);
552 			ubsec_callback2(sc, q2);
553 			/*
554 			 * Don't send any more packet to chip if there has been
555 			 * a DMAERR.
556 			 */
557 			if (!(stat & BS_STAT_DMAERR))
558 				ubsec_feed2(sc);
559 		}
560 	}
561 
562 	/*
563 	 * Check to see if we got any DMA Error
564 	 */
565 	if (stat & BS_STAT_DMAERR) {
566 #ifdef UBSEC_DEBUG
567 		if (ubsec_debug) {
568 			volatile u_int32_t a = READ_REG(sc, BS_ERR);
569 
570 			printf("%s: dmaerr %s@%08x\n", device_xname(&sc->sc_dv),
571 			    (a & BS_ERR_READ) ? "read" : "write",
572 			       a & BS_ERR_ADDR);
573 		}
574 #endif /* UBSEC_DEBUG */
575 		ubsecstats.hst_dmaerr++;
576 		ubsec_totalreset(sc);
577 		ubsec_feed(sc);
578 	}
579 
580 	if (sc->sc_needwakeup) {		/* XXX check high watermark */
581 		int wkeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
582 #ifdef UBSEC_DEBUG
583 		if (ubsec_debug)
584 			printf("%s: wakeup crypto (%x)\n", device_xname(&sc->sc_dv),
585 				sc->sc_needwakeup);
586 #endif /* UBSEC_DEBUG */
587 		sc->sc_needwakeup &= ~wkeup;
588 		crypto_unblock(sc->sc_cid, wkeup);
589 	}
590 	return (1);
591 }
592 
593 /*
594  * ubsec_feed() - aggregate and post requests to chip
595  * OpenBSD comments:
596  *		  It is assumed that the caller set splnet()
597  */
598 static void
599 ubsec_feed(struct ubsec_softc *sc)
600 {
601 	struct ubsec_q *q, *q2;
602 	int npkts, i;
603 	void *v;
604 	u_int32_t stat;
605 #ifdef UBSEC_DEBUG
606 	static int max;
607 #endif /* UBSEC_DEBUG */
608 
609 	npkts = sc->sc_nqueue;
610 	if (npkts > ubsecstats.hst_maxqueue)
611 		ubsecstats.hst_maxqueue = npkts;
612 	if (npkts < 2)
613 		goto feed1;
614 
615 	/*
616 	 * Decide how many ops to combine in a single MCR.  We cannot
617 	 * aggregate more than UBS_MAX_AGGR because this is the number
618 	 * of slots defined in the data structure.  Otherwise we clamp
619 	 * based on the tunable parameter ubsec_maxaggr.  Note that
620 	 * aggregation can happen in two ways: either by batching ops
621 	 * from above or because the h/w backs up and throttles us.
622 	 * Aggregating ops reduces the number of interrupts to the host
623 	 * but also (potentially) increases the latency for processing
624 	 * completed ops as we only get an interrupt when all aggregated
625 	 * ops have completed.
626 	 */
627 	if (npkts > UBS_MAX_AGGR)
628 		npkts = UBS_MAX_AGGR;
629 	if (npkts > ubsec_maxaggr)
630 		npkts = ubsec_maxaggr;
631 	if (npkts > ubsecstats.hst_maxbatch)
632 		ubsecstats.hst_maxbatch = npkts;
633 	if (npkts < 2)
634 		goto feed1;
635 	ubsecstats.hst_totbatch += npkts-1;
636 
637 	if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
638 		if (stat & BS_STAT_DMAERR) {
639 			ubsec_totalreset(sc);
640 			ubsecstats.hst_dmaerr++;
641 		} else {
642 			ubsecstats.hst_mcr1full++;
643 		}
644 		return;
645 	}
646 
647 #ifdef UBSEC_DEBUG
648 	if (ubsec_debug)
649 	    printf("merging %d records\n", npkts);
650 	/* XXX temporary aggregation statistics reporting code */
651 	if (max < npkts) {
652 		max = npkts;
653 		printf("%s: new max aggregate %d\n", device_xname(&sc->sc_dv), max);
654 	}
655 #endif /* UBSEC_DEBUG */
656 
657 	q = SIMPLEQ_FIRST(&sc->sc_queue);
658 	SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q,*/ q_next);
659 	--sc->sc_nqueue;
660 
661 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
662 	    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
663 	if (q->q_dst_map != NULL)
664 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
665 		    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
666 
667 	q->q_nstacked_mcrs = npkts - 1;		/* Number of packets stacked */
668 
669 	for (i = 0; i < q->q_nstacked_mcrs; i++) {
670 		q2 = SIMPLEQ_FIRST(&sc->sc_queue);
671 		bus_dmamap_sync(sc->sc_dmat, q2->q_src_map,
672 		    0, q2->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
673 		if (q2->q_dst_map != NULL)
674 			bus_dmamap_sync(sc->sc_dmat, q2->q_dst_map,
675 			    0, q2->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
676 		q2= SIMPLEQ_FIRST(&sc->sc_queue);
677 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q2,*/ q_next);
678 		--sc->sc_nqueue;
679 
680 		v = ((void *)&q2->q_dma->d_dma->d_mcr);
681 		v = (char*)v + (sizeof(struct ubsec_mcr) -
682 				 sizeof(struct ubsec_mcr_add));
683 		bcopy(v, &q->q_dma->d_dma->d_mcradd[i], sizeof(struct ubsec_mcr_add));
684 		q->q_stacked_mcr[i] = q2;
685 	}
686 	q->q_dma->d_dma->d_mcr.mcr_pkts = htole16(npkts);
687 	SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
688 	sc->sc_nqchip += npkts;
689 	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
690 		ubsecstats.hst_maxqchip = sc->sc_nqchip;
691 	bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
692 	    0, q->q_dma->d_alloc.dma_map->dm_mapsize,
693 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
694 	WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
695 	    offsetof(struct ubsec_dmachunk, d_mcr));
696 	return;
697 
698 feed1:
699 	while (!SIMPLEQ_EMPTY(&sc->sc_queue)) {
700 		if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
701 			if (stat & BS_STAT_DMAERR) {
702 				ubsec_totalreset(sc);
703 				ubsecstats.hst_dmaerr++;
704 			} else {
705 				ubsecstats.hst_mcr1full++;
706 			}
707 			break;
708 		}
709 
710 		q = SIMPLEQ_FIRST(&sc->sc_queue);
711 
712 		bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
713 		    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
714 		if (q->q_dst_map != NULL)
715 			bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
716 			    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_PREREAD);
717 		bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
718 		    0, q->q_dma->d_alloc.dma_map->dm_mapsize,
719 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
720 
721 		WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
722 		    offsetof(struct ubsec_dmachunk, d_mcr));
723 #ifdef UBSEC_DEBUG
724 		if (ubsec_debug)
725 			printf("feed: q->chip %p %08x stat %08x\n",
726  		    	       q, (u_int32_t)q->q_dma->d_alloc.dma_paddr,
727 			       stat);
728 #endif /* UBSEC_DEBUG */
729 		q = SIMPLEQ_FIRST(&sc->sc_queue);
730 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, /*q,*/ q_next);
731 		--sc->sc_nqueue;
732 		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
733 		sc->sc_nqchip++;
734 	}
735 	if (sc->sc_nqchip > ubsecstats.hst_maxqchip)
736 		ubsecstats.hst_maxqchip = sc->sc_nqchip;
737 }
738 
739 /*
740  * Allocate a new 'session' and return an encoded session id.  'sidp'
741  * contains our registration id, and should contain an encoded session
742  * id on successful allocation.
743  */
744 static int
745 ubsec_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri)
746 {
747 	struct cryptoini *c, *encini = NULL, *macini = NULL;
748 	struct ubsec_softc *sc;
749 	struct ubsec_session *ses = NULL;
750 	MD5_CTX md5ctx;
751 	SHA1_CTX sha1ctx;
752 	int i, sesn;
753 
754 	sc = arg;
755 	KASSERT(sc != NULL /*, ("ubsec_newsession: null softc")*/);
756 
757 	if (sidp == NULL || cri == NULL || sc == NULL)
758 		return (EINVAL);
759 
760 	for (c = cri; c != NULL; c = c->cri_next) {
761 		if (c->cri_alg == CRYPTO_MD5_HMAC_96 ||
762 		    c->cri_alg == CRYPTO_SHA1_HMAC_96) {
763 			if (macini)
764 				return (EINVAL);
765 			macini = c;
766 		} else if (c->cri_alg == CRYPTO_DES_CBC ||
767 		    c->cri_alg == CRYPTO_3DES_CBC) {
768 			if (encini)
769 				return (EINVAL);
770 			encini = c;
771 		} else
772 			return (EINVAL);
773 	}
774 	if (encini == NULL && macini == NULL)
775 		return (EINVAL);
776 
777 	if (sc->sc_sessions == NULL) {
778 		ses = sc->sc_sessions = (struct ubsec_session *)malloc(
779 		    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
780 		if (ses == NULL)
781 			return (ENOMEM);
782 		sesn = 0;
783 		sc->sc_nsessions = 1;
784 	} else {
785 		for (sesn = 0; sesn < sc->sc_nsessions; sesn++) {
786 			if (sc->sc_sessions[sesn].ses_used == 0) {
787 				ses = &sc->sc_sessions[sesn];
788 				break;
789 			}
790 		}
791 
792 		if (ses == NULL) {
793 			sesn = sc->sc_nsessions;
794 			ses = (struct ubsec_session *)malloc((sesn + 1) *
795 			    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
796 			if (ses == NULL)
797 				return (ENOMEM);
798 			bcopy(sc->sc_sessions, ses, sesn *
799 			    sizeof(struct ubsec_session));
800 			bzero(sc->sc_sessions, sesn *
801 			    sizeof(struct ubsec_session));
802 			free(sc->sc_sessions, M_DEVBUF);
803 			sc->sc_sessions = ses;
804 			ses = &sc->sc_sessions[sesn];
805 			sc->sc_nsessions++;
806 		}
807 	}
808 
809 	bzero(ses, sizeof(struct ubsec_session));
810 	ses->ses_used = 1;
811 	if (encini) {
812 		/* get an IV, network byte order */
813 #ifdef __NetBSD__
814 		rnd_extract_data(ses->ses_iv,
815 		    sizeof(ses->ses_iv), RND_EXTRACT_ANY);
816 #else
817 		get_random_bytes(ses->ses_iv, sizeof(ses->ses_iv));
818 #endif
819 
820 		/* Go ahead and compute key in ubsec's byte order */
821 		if (encini->cri_alg == CRYPTO_DES_CBC) {
822 			bcopy(encini->cri_key, &ses->ses_deskey[0], 8);
823 			bcopy(encini->cri_key, &ses->ses_deskey[2], 8);
824 			bcopy(encini->cri_key, &ses->ses_deskey[4], 8);
825 		} else
826 			bcopy(encini->cri_key, ses->ses_deskey, 24);
827 
828 		SWAP32(ses->ses_deskey[0]);
829 		SWAP32(ses->ses_deskey[1]);
830 		SWAP32(ses->ses_deskey[2]);
831 		SWAP32(ses->ses_deskey[3]);
832 		SWAP32(ses->ses_deskey[4]);
833 		SWAP32(ses->ses_deskey[5]);
834 	}
835 
836 	if (macini) {
837 		for (i = 0; i < macini->cri_klen / 8; i++)
838 			macini->cri_key[i] ^= HMAC_IPAD_VAL;
839 
840 		if (macini->cri_alg == CRYPTO_MD5_HMAC_96) {
841 			MD5Init(&md5ctx);
842 			MD5Update(&md5ctx, macini->cri_key,
843 			    macini->cri_klen / 8);
844 			MD5Update(&md5ctx, hmac_ipad_buffer,
845 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
846 			bcopy(md5ctx.state, ses->ses_hminner,
847 			    sizeof(md5ctx.state));
848 		} else {
849 			SHA1Init(&sha1ctx);
850 			SHA1Update(&sha1ctx, macini->cri_key,
851 			    macini->cri_klen / 8);
852 			SHA1Update(&sha1ctx, hmac_ipad_buffer,
853 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
854 			bcopy(sha1ctx.state, ses->ses_hminner,
855 			    sizeof(sha1ctx.state));
856 		}
857 
858 		for (i = 0; i < macini->cri_klen / 8; i++)
859 			macini->cri_key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
860 
861 		if (macini->cri_alg == CRYPTO_MD5_HMAC_96) {
862 			MD5Init(&md5ctx);
863 			MD5Update(&md5ctx, macini->cri_key,
864 			    macini->cri_klen / 8);
865 			MD5Update(&md5ctx, hmac_opad_buffer,
866 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
867 			bcopy(md5ctx.state, ses->ses_hmouter,
868 			    sizeof(md5ctx.state));
869 		} else {
870 			SHA1Init(&sha1ctx);
871 			SHA1Update(&sha1ctx, macini->cri_key,
872 			    macini->cri_klen / 8);
873 			SHA1Update(&sha1ctx, hmac_opad_buffer,
874 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
875 			bcopy(sha1ctx.state, ses->ses_hmouter,
876 			    sizeof(sha1ctx.state));
877 		}
878 
879 		for (i = 0; i < macini->cri_klen / 8; i++)
880 			macini->cri_key[i] ^= HMAC_OPAD_VAL;
881 	}
882 
883 	*sidp = UBSEC_SID(device_unit(&sc->sc_dv), sesn);
884 	return (0);
885 }
886 
887 /*
888  * Deallocate a session.
889  */
890 static int
891 ubsec_freesession(void *arg, u_int64_t tid)
892 {
893 	struct ubsec_softc *sc;
894 	int session;
895 	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
896 
897 	sc = arg;
898 	KASSERT(sc != NULL /*, ("ubsec_freesession: null softc")*/);
899 
900 	session = UBSEC_SESSION(sid);
901 	if (session >= sc->sc_nsessions)
902 		return (EINVAL);
903 
904 	bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session]));
905 	return (0);
906 }
907 
908 #ifdef __FreeBSD__ /* Ugly gratuitous changes to bus_dma */
909 static void
910 ubsec_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
911 {
912 	struct ubsec_operand *op = arg;
913 
914 	KASSERT(nsegs <= UBS_MAX_SCATTER
915 		/*, ("Too many DMA segments returned when mapping operand")*/);
916 #ifdef UBSEC_DEBUG
917 	if (ubsec_debug)
918 		printf("ubsec_op_cb: mapsize %u nsegs %d\n",
919 			(u_int) mapsize, nsegs);
920 #endif
921 	op->mapsize = mapsize;
922 	op->nsegs = nsegs;
923 	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
924 }
925 #endif
926 
927 static int
928 ubsec_process(void *arg, struct cryptop *crp, int hint)
929 {
930 	struct ubsec_q *q = NULL;
931 #ifdef	__OpenBSD__
932 	int card;
933 #endif
934 	int err = 0, i, j, s, nicealign;
935 	struct ubsec_softc *sc;
936 	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
937 	int encoffset = 0, macoffset = 0, cpskip, cpoffset;
938 	int sskip, dskip, stheend, dtheend;
939 	int16_t coffset;
940 	struct ubsec_session *ses;
941 	struct ubsec_pktctx ctx;
942 	struct ubsec_dma *dmap = NULL;
943 
944 	sc = arg;
945 	KASSERT(sc != NULL /*, ("ubsec_process: null softc")*/);
946 
947 	if (crp == NULL || crp->crp_callback == NULL || sc == NULL) {
948 		ubsecstats.hst_invalid++;
949 		return (EINVAL);
950 	}
951 	if (UBSEC_SESSION(crp->crp_sid) >= sc->sc_nsessions) {
952 		ubsecstats.hst_badsession++;
953 		return (EINVAL);
954 	}
955 
956 	s = splnet();
957 
958 	if (SIMPLEQ_EMPTY(&sc->sc_freequeue)) {
959 		ubsecstats.hst_queuefull++;
960 		sc->sc_needwakeup |= CRYPTO_SYMQ;
961 		splx(s);
962 		return(ERESTART);
963 	}
964 
965 	q = SIMPLEQ_FIRST(&sc->sc_freequeue);
966 	SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, /*q,*/ q_next);
967 	splx(s);
968 
969 	dmap = q->q_dma; /* Save dma pointer */
970 	bzero(q, sizeof(struct ubsec_q));
971 	bzero(&ctx, sizeof(ctx));
972 
973 	q->q_sesn = UBSEC_SESSION(crp->crp_sid);
974 	q->q_dma = dmap;
975 	ses = &sc->sc_sessions[q->q_sesn];
976 
977 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
978 		q->q_src_m = (struct mbuf *)crp->crp_buf;
979 		q->q_dst_m = (struct mbuf *)crp->crp_buf;
980 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
981 		q->q_src_io = (struct uio *)crp->crp_buf;
982 		q->q_dst_io = (struct uio *)crp->crp_buf;
983 	} else {
984 		ubsecstats.hst_badflags++;
985 		err = EINVAL;
986 		goto errout;	/* XXX we don't handle contiguous blocks! */
987 	}
988 
989 	bzero(&dmap->d_dma->d_mcr, sizeof(struct ubsec_mcr));
990 
991 	dmap->d_dma->d_mcr.mcr_pkts = htole16(1);
992 	dmap->d_dma->d_mcr.mcr_flags = 0;
993 	q->q_crp = crp;
994 
995 	crd1 = crp->crp_desc;
996 	if (crd1 == NULL) {
997 		ubsecstats.hst_nodesc++;
998 		err = EINVAL;
999 		goto errout;
1000 	}
1001 	crd2 = crd1->crd_next;
1002 
1003 	if (crd2 == NULL) {
1004 		if (crd1->crd_alg == CRYPTO_MD5_HMAC_96 ||
1005 		    crd1->crd_alg == CRYPTO_SHA1_HMAC_96) {
1006 			maccrd = crd1;
1007 			enccrd = NULL;
1008 		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
1009 		    crd1->crd_alg == CRYPTO_3DES_CBC) {
1010 			maccrd = NULL;
1011 			enccrd = crd1;
1012 		} else {
1013 			ubsecstats.hst_badalg++;
1014 			err = EINVAL;
1015 			goto errout;
1016 		}
1017 	} else {
1018 		if ((crd1->crd_alg == CRYPTO_MD5_HMAC_96 ||
1019 		    crd1->crd_alg == CRYPTO_SHA1_HMAC_96) &&
1020 		    (crd2->crd_alg == CRYPTO_DES_CBC ||
1021 			crd2->crd_alg == CRYPTO_3DES_CBC) &&
1022 		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
1023 			maccrd = crd1;
1024 			enccrd = crd2;
1025 		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
1026 		    crd1->crd_alg == CRYPTO_3DES_CBC) &&
1027 		    (crd2->crd_alg == CRYPTO_MD5_HMAC_96 ||
1028 			crd2->crd_alg == CRYPTO_SHA1_HMAC_96) &&
1029 		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
1030 			enccrd = crd1;
1031 			maccrd = crd2;
1032 		} else {
1033 			/*
1034 			 * We cannot order the ubsec as requested
1035 			 */
1036 			ubsecstats.hst_badalg++;
1037 			err = EINVAL;
1038 			goto errout;
1039 		}
1040 	}
1041 
1042 	if (enccrd) {
1043 		encoffset = enccrd->crd_skip;
1044 		ctx.pc_flags |= htole16(UBS_PKTCTX_ENC_3DES);
1045 
1046 		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
1047 			q->q_flags |= UBSEC_QFLAGS_COPYOUTIV;
1048 
1049 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1050 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
1051 			else {
1052 				ctx.pc_iv[0] = ses->ses_iv[0];
1053 				ctx.pc_iv[1] = ses->ses_iv[1];
1054 			}
1055 
1056 			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
1057 				if (crp->crp_flags & CRYPTO_F_IMBUF)
1058 					m_copyback(q->q_src_m,
1059 					    enccrd->crd_inject,
1060 					    8, (void *)ctx.pc_iv);
1061 				else if (crp->crp_flags & CRYPTO_F_IOV)
1062 					cuio_copyback(q->q_src_io,
1063 					    enccrd->crd_inject,
1064 					    8, (void *)ctx.pc_iv);
1065 			}
1066 		} else {
1067 			ctx.pc_flags |= htole16(UBS_PKTCTX_INBOUND);
1068 
1069 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
1070 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
1071 			else if (crp->crp_flags & CRYPTO_F_IMBUF)
1072 				m_copydata(q->q_src_m, enccrd->crd_inject,
1073 				    8, (void *)ctx.pc_iv);
1074 			else if (crp->crp_flags & CRYPTO_F_IOV)
1075 				cuio_copydata(q->q_src_io,
1076 				    enccrd->crd_inject, 8,
1077 				    (void *)ctx.pc_iv);
1078 		}
1079 
1080 		ctx.pc_deskey[0] = ses->ses_deskey[0];
1081 		ctx.pc_deskey[1] = ses->ses_deskey[1];
1082 		ctx.pc_deskey[2] = ses->ses_deskey[2];
1083 		ctx.pc_deskey[3] = ses->ses_deskey[3];
1084 		ctx.pc_deskey[4] = ses->ses_deskey[4];
1085 		ctx.pc_deskey[5] = ses->ses_deskey[5];
1086 		SWAP32(ctx.pc_iv[0]);
1087 		SWAP32(ctx.pc_iv[1]);
1088 	}
1089 
1090 	if (maccrd) {
1091 		macoffset = maccrd->crd_skip;
1092 
1093 		if (maccrd->crd_alg == CRYPTO_MD5_HMAC_96)
1094 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_MD5);
1095 		else
1096 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_SHA1);
1097 
1098 		for (i = 0; i < 5; i++) {
1099 			ctx.pc_hminner[i] = ses->ses_hminner[i];
1100 			ctx.pc_hmouter[i] = ses->ses_hmouter[i];
1101 
1102 			HTOLE32(ctx.pc_hminner[i]);
1103 			HTOLE32(ctx.pc_hmouter[i]);
1104 		}
1105 	}
1106 
1107 	if (enccrd && maccrd) {
1108 		/*
1109 		 * ubsec cannot handle packets where the end of encryption
1110 		 * and authentication are not the same, or where the
1111 		 * encrypted part begins before the authenticated part.
1112 		 */
1113 		if ((encoffset + enccrd->crd_len) !=
1114 		    (macoffset + maccrd->crd_len)) {
1115 			ubsecstats.hst_lenmismatch++;
1116 			err = EINVAL;
1117 			goto errout;
1118 		}
1119 		if (enccrd->crd_skip < maccrd->crd_skip) {
1120 			ubsecstats.hst_skipmismatch++;
1121 			err = EINVAL;
1122 			goto errout;
1123 		}
1124 		sskip = maccrd->crd_skip;
1125 		cpskip = dskip = enccrd->crd_skip;
1126 		stheend = maccrd->crd_len;
1127 		dtheend = enccrd->crd_len;
1128 		coffset = enccrd->crd_skip - maccrd->crd_skip;
1129 		cpoffset = cpskip + dtheend;
1130 #ifdef UBSEC_DEBUG
1131 		if (ubsec_debug) {
1132 			printf("mac: skip %d, len %d, inject %d\n",
1133 			       maccrd->crd_skip, maccrd->crd_len, maccrd->crd_inject);
1134 			printf("enc: skip %d, len %d, inject %d\n",
1135 			       enccrd->crd_skip, enccrd->crd_len, enccrd->crd_inject);
1136 			printf("src: skip %d, len %d\n", sskip, stheend);
1137 			printf("dst: skip %d, len %d\n", dskip, dtheend);
1138 			printf("ubs: coffset %d, pktlen %d, cpskip %d, cpoffset %d\n",
1139 			       coffset, stheend, cpskip, cpoffset);
1140 		}
1141 #endif
1142 	} else {
1143 		cpskip = dskip = sskip = macoffset + encoffset;
1144 		dtheend = stheend = (enccrd)?enccrd->crd_len:maccrd->crd_len;
1145 		cpoffset = cpskip + dtheend;
1146 		coffset = 0;
1147 	}
1148 	ctx.pc_offset = htole16(coffset >> 2);
1149 
1150 	/* XXX FIXME: jonathan asks, what the heck's that 0xfff0?  */
1151 	if (bus_dmamap_create(sc->sc_dmat, 0xfff0, UBS_MAX_SCATTER,
1152 		0xfff0, 0, BUS_DMA_NOWAIT, &q->q_src_map) != 0) {
1153 		err = ENOMEM;
1154 		goto errout;
1155 	}
1156 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
1157 		if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_src_map,
1158 		    q->q_src_m, BUS_DMA_NOWAIT) != 0) {
1159 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1160 			q->q_src_map = NULL;
1161 			ubsecstats.hst_noload++;
1162 			err = ENOMEM;
1163 			goto errout;
1164 		}
1165 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
1166 		if (bus_dmamap_load_uio(sc->sc_dmat, q->q_src_map,
1167 		    q->q_src_io, BUS_DMA_NOWAIT) != 0) {
1168 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1169 			q->q_src_map = NULL;
1170 			ubsecstats.hst_noload++;
1171 			err = ENOMEM;
1172 			goto errout;
1173 		}
1174 	}
1175 	nicealign = ubsec_dmamap_aligned(q->q_src_map);
1176 
1177 	dmap->d_dma->d_mcr.mcr_pktlen = htole16(stheend);
1178 
1179 #ifdef UBSEC_DEBUG
1180 	if (ubsec_debug)
1181 		printf("src skip: %d nicealign: %u\n", sskip, nicealign);
1182 #endif
1183 	for (i = j = 0; i < q->q_src_map->dm_nsegs; i++) {
1184 		struct ubsec_pktbuf *pb;
1185 		bus_size_t packl = q->q_src_map->dm_segs[i].ds_len;
1186 		bus_addr_t packp = q->q_src_map->dm_segs[i].ds_addr;
1187 
1188 		if (sskip >= packl) {
1189 			sskip -= packl;
1190 			continue;
1191 		}
1192 
1193 		packl -= sskip;
1194 		packp += sskip;
1195 		sskip = 0;
1196 
1197 		if (packl > 0xfffc) {
1198 			err = EIO;
1199 			goto errout;
1200 		}
1201 
1202 		if (j == 0)
1203 			pb = &dmap->d_dma->d_mcr.mcr_ipktbuf;
1204 		else
1205 			pb = &dmap->d_dma->d_sbuf[j - 1];
1206 
1207 		pb->pb_addr = htole32(packp);
1208 
1209 		if (stheend) {
1210 			if (packl > stheend) {
1211 				pb->pb_len = htole32(stheend);
1212 				stheend = 0;
1213 			} else {
1214 				pb->pb_len = htole32(packl);
1215 				stheend -= packl;
1216 			}
1217 		} else
1218 			pb->pb_len = htole32(packl);
1219 
1220 		if ((i + 1) == q->q_src_map->dm_nsegs)
1221 			pb->pb_next = 0;
1222 		else
1223 			pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1224 			    offsetof(struct ubsec_dmachunk, d_sbuf[j]));
1225 		j++;
1226 	}
1227 
1228 	if (enccrd == NULL && maccrd != NULL) {
1229 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr = 0;
1230 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_len = 0;
1231 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_next = htole32(dmap->d_alloc.dma_paddr +
1232 		    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1233 #ifdef UBSEC_DEBUG
1234 		if (ubsec_debug)
1235 			printf("opkt: %x %x %x\n",
1236 	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr,
1237 	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_len,
1238 	 		    dmap->d_dma->d_mcr.mcr_opktbuf.pb_next);
1239 
1240 #endif
1241 	} else {
1242 		if (crp->crp_flags & CRYPTO_F_IOV) {
1243 			if (!nicealign) {
1244 				ubsecstats.hst_iovmisaligned++;
1245 				err = EINVAL;
1246 				goto errout;
1247 			}
1248 			/* XXX: ``what the heck's that'' 0xfff0? */
1249 			if (bus_dmamap_create(sc->sc_dmat, 0xfff0,
1250 			    UBS_MAX_SCATTER, 0xfff0, 0, BUS_DMA_NOWAIT,
1251 			    &q->q_dst_map) != 0) {
1252 				ubsecstats.hst_nomap++;
1253 				err = ENOMEM;
1254 				goto errout;
1255 			}
1256 			if (bus_dmamap_load_uio(sc->sc_dmat, q->q_dst_map,
1257 			    q->q_dst_io, BUS_DMA_NOWAIT) != 0) {
1258 				bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1259 				q->q_dst_map = NULL;
1260 				ubsecstats.hst_noload++;
1261 				err = ENOMEM;
1262 				goto errout;
1263 			}
1264 		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
1265 			if (nicealign) {
1266 				q->q_dst_m = q->q_src_m;
1267 				q->q_dst_map = q->q_src_map;
1268 			} else {
1269 				int totlen, len;
1270 				struct mbuf *m, *top, **mp;
1271 
1272 				ubsecstats.hst_unaligned++;
1273 				totlen = q->q_src_map->dm_mapsize;
1274 				if (q->q_src_m->m_flags & M_PKTHDR) {
1275 					len = MHLEN;
1276 					MGETHDR(m, M_DONTWAIT, MT_DATA);
1277 					/*XXX FIXME: m_dup_pkthdr */
1278 					if (m && 1 /*!m_dup_pkthdr(m, q->q_src_m, M_DONTWAIT)*/) {
1279 						m_free(m);
1280 						m = NULL;
1281 					}
1282 				} else {
1283 					len = MLEN;
1284 					MGET(m, M_DONTWAIT, MT_DATA);
1285 				}
1286 				if (m == NULL) {
1287 					ubsecstats.hst_nombuf++;
1288 					err = sc->sc_nqueue ? ERESTART : ENOMEM;
1289 					goto errout;
1290 				}
1291 				if (len == MHLEN)
1292 				  /*XXX was M_DUP_PKTHDR*/
1293 				  M_COPY_PKTHDR(m, q->q_src_m);
1294 				if (totlen >= MINCLSIZE) {
1295 					MCLGET(m, M_DONTWAIT);
1296 					if ((m->m_flags & M_EXT) == 0) {
1297 						m_free(m);
1298 						ubsecstats.hst_nomcl++;
1299 						err = sc->sc_nqueue ? ERESTART : ENOMEM;
1300 						goto errout;
1301 					}
1302 					len = MCLBYTES;
1303 				}
1304 				m->m_len = len;
1305 				top = NULL;
1306 				mp = &top;
1307 
1308 				while (totlen > 0) {
1309 					if (top) {
1310 						MGET(m, M_DONTWAIT, MT_DATA);
1311 						if (m == NULL) {
1312 							m_freem(top);
1313 							ubsecstats.hst_nombuf++;
1314 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1315 							goto errout;
1316 						}
1317 						len = MLEN;
1318 					}
1319 					if (top && totlen >= MINCLSIZE) {
1320 						MCLGET(m, M_DONTWAIT);
1321 						if ((m->m_flags & M_EXT) == 0) {
1322 							*mp = m;
1323 							m_freem(top);
1324 							ubsecstats.hst_nomcl++;
1325 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
1326 							goto errout;
1327 						}
1328 						len = MCLBYTES;
1329 					}
1330 					m->m_len = len = min(totlen, len);
1331 					totlen -= len;
1332 					*mp = m;
1333 					mp = &m->m_next;
1334 				}
1335 				q->q_dst_m = top;
1336 				ubsec_mcopy(q->q_src_m, q->q_dst_m,
1337 				    cpskip, cpoffset);
1338 				/* XXX again, what the heck is that 0xfff0? */
1339 				if (bus_dmamap_create(sc->sc_dmat, 0xfff0,
1340 				    UBS_MAX_SCATTER, 0xfff0, 0, BUS_DMA_NOWAIT,
1341 				    &q->q_dst_map) != 0) {
1342 					ubsecstats.hst_nomap++;
1343 					err = ENOMEM;
1344 					goto errout;
1345 				}
1346 				if (bus_dmamap_load_mbuf(sc->sc_dmat,
1347 				    q->q_dst_map, q->q_dst_m,
1348 				    BUS_DMA_NOWAIT) != 0) {
1349 					bus_dmamap_destroy(sc->sc_dmat,
1350 					q->q_dst_map);
1351 					q->q_dst_map = NULL;
1352 					ubsecstats.hst_noload++;
1353 					err = ENOMEM;
1354 					goto errout;
1355 				}
1356 			}
1357 		} else {
1358 			ubsecstats.hst_badflags++;
1359 			err = EINVAL;
1360 			goto errout;
1361 		}
1362 
1363 #ifdef UBSEC_DEBUG
1364 		if (ubsec_debug)
1365 			printf("dst skip: %d\n", dskip);
1366 #endif
1367 		for (i = j = 0; i < q->q_dst_map->dm_nsegs; i++) {
1368 			struct ubsec_pktbuf *pb;
1369 			bus_size_t packl = q->q_dst_map->dm_segs[i].ds_len;
1370 			bus_addr_t packp = q->q_dst_map->dm_segs[i].ds_addr;
1371 
1372 			if (dskip >= packl) {
1373 				dskip -= packl;
1374 				continue;
1375 			}
1376 
1377 			packl -= dskip;
1378 			packp += dskip;
1379 			dskip = 0;
1380 
1381 			if (packl > 0xfffc) {
1382 				err = EIO;
1383 				goto errout;
1384 			}
1385 
1386 			if (j == 0)
1387 				pb = &dmap->d_dma->d_mcr.mcr_opktbuf;
1388 			else
1389 				pb = &dmap->d_dma->d_dbuf[j - 1];
1390 
1391 			pb->pb_addr = htole32(packp);
1392 
1393 			if (dtheend) {
1394 				if (packl > dtheend) {
1395 					pb->pb_len = htole32(dtheend);
1396 					dtheend = 0;
1397 				} else {
1398 					pb->pb_len = htole32(packl);
1399 					dtheend -= packl;
1400 				}
1401 			} else
1402 				pb->pb_len = htole32(packl);
1403 
1404 			if ((i + 1) == q->q_dst_map->dm_nsegs) {
1405 				if (maccrd)
1406 					pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1407 					    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
1408 				else
1409 					pb->pb_next = 0;
1410 			} else
1411 				pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
1412 				    offsetof(struct ubsec_dmachunk, d_dbuf[j]));
1413 			j++;
1414 		}
1415 	}
1416 
1417 	dmap->d_dma->d_mcr.mcr_cmdctxp = htole32(dmap->d_alloc.dma_paddr +
1418 	    offsetof(struct ubsec_dmachunk, d_ctx));
1419 
1420 	if (sc->sc_flags & UBS_FLAGS_LONGCTX) {
1421 		struct ubsec_pktctx_long *ctxl;
1422 
1423 		ctxl = (struct ubsec_pktctx_long *)((char *)dmap->d_alloc.dma_vaddr +
1424 		    offsetof(struct ubsec_dmachunk, d_ctx));
1425 
1426 		/* transform small context into long context */
1427 		ctxl->pc_len = htole16(sizeof(struct ubsec_pktctx_long));
1428 		ctxl->pc_type = htole16(UBS_PKTCTX_TYPE_IPSEC);
1429 		ctxl->pc_flags = ctx.pc_flags;
1430 		ctxl->pc_offset = ctx.pc_offset;
1431 		for (i = 0; i < 6; i++)
1432 			ctxl->pc_deskey[i] = ctx.pc_deskey[i];
1433 		for (i = 0; i < 5; i++)
1434 			ctxl->pc_hminner[i] = ctx.pc_hminner[i];
1435 		for (i = 0; i < 5; i++)
1436 			ctxl->pc_hmouter[i] = ctx.pc_hmouter[i];
1437 		ctxl->pc_iv[0] = ctx.pc_iv[0];
1438 		ctxl->pc_iv[1] = ctx.pc_iv[1];
1439 	} else
1440 		memcpy((char *)dmap->d_alloc.dma_vaddr +
1441 		    offsetof(struct ubsec_dmachunk, d_ctx), &ctx,
1442 		    sizeof(struct ubsec_pktctx));
1443 
1444 	s = splnet();
1445 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue, q, q_next);
1446 	sc->sc_nqueue++;
1447 	ubsecstats.hst_ipackets++;
1448 	ubsecstats.hst_ibytes += dmap->d_alloc.dma_map->dm_mapsize;
1449 	if ((hint & CRYPTO_HINT_MORE) == 0 || sc->sc_nqueue >= ubsec_maxbatch)
1450 		ubsec_feed(sc);
1451 	splx(s);
1452 	return (0);
1453 
1454 errout:
1455 	if (q != NULL) {
1456 		if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
1457 			m_freem(q->q_dst_m);
1458 
1459 		if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1460 			bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1461 			bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1462 		}
1463 		if (q->q_src_map != NULL) {
1464 			bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1465 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1466 		}
1467 
1468 		s = splnet();
1469 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1470 		splx(s);
1471 	}
1472 #if 0 /* jonathan says: this openbsd code seems to be subsumed elsewhere */
1473 	if (err == EINVAL)
1474 		ubsecstats.hst_invalid++;
1475 	else
1476 		ubsecstats.hst_nomem++;
1477 #endif
1478 	if (err != ERESTART) {
1479 		crp->crp_etype = err;
1480 		crypto_done(crp);
1481 	} else {
1482 		sc->sc_needwakeup |= CRYPTO_SYMQ;
1483 	}
1484 	return (err);
1485 }
1486 
1487 static void
1488 ubsec_callback(struct ubsec_softc *sc, struct ubsec_q *q)
1489 {
1490 	struct cryptop *crp = (struct cryptop *)q->q_crp;
1491 	struct cryptodesc *crd;
1492 	struct ubsec_dma *dmap = q->q_dma;
1493 
1494 	ubsecstats.hst_opackets++;
1495 	ubsecstats.hst_obytes += dmap->d_alloc.dma_size;
1496 
1497 	bus_dmamap_sync(sc->sc_dmat, dmap->d_alloc.dma_map, 0,
1498 	    dmap->d_alloc.dma_map->dm_mapsize,
1499 	    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1500 	if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
1501 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
1502 		    0, q->q_dst_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1503 		bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
1504 		bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
1505 	}
1506 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
1507 	    0, q->q_src_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1508 	bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
1509 	bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
1510 
1511 	if ((crp->crp_flags & CRYPTO_F_IMBUF) && (q->q_src_m != q->q_dst_m)) {
1512 		m_freem(q->q_src_m);
1513 		crp->crp_buf = (void *)q->q_dst_m;
1514 	}
1515 
1516 	/* copy out IV for future use */
1517 	if (q->q_flags & UBSEC_QFLAGS_COPYOUTIV) {
1518 		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1519 			if (crd->crd_alg != CRYPTO_DES_CBC &&
1520 			    crd->crd_alg != CRYPTO_3DES_CBC)
1521 				continue;
1522 			if (crp->crp_flags & CRYPTO_F_IMBUF)
1523 				m_copydata((struct mbuf *)crp->crp_buf,
1524 				    crd->crd_skip + crd->crd_len - 8, 8,
1525 				    (void *)sc->sc_sessions[q->q_sesn].ses_iv);
1526 			else if (crp->crp_flags & CRYPTO_F_IOV) {
1527 				cuio_copydata((struct uio *)crp->crp_buf,
1528 				    crd->crd_skip + crd->crd_len - 8, 8,
1529 				    (void *)sc->sc_sessions[q->q_sesn].ses_iv);
1530 			}
1531 			break;
1532 		}
1533 	}
1534 
1535 	for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
1536 		if (crd->crd_alg != CRYPTO_MD5_HMAC_96 &&
1537 		    crd->crd_alg != CRYPTO_SHA1_HMAC_96)
1538 			continue;
1539 		if (crp->crp_flags & CRYPTO_F_IMBUF)
1540 			m_copyback((struct mbuf *)crp->crp_buf,
1541 			    crd->crd_inject, 12,
1542 			    (void *)dmap->d_dma->d_macbuf);
1543 		else if (crp->crp_flags & CRYPTO_F_IOV && crp->crp_mac)
1544 			bcopy((void *)dmap->d_dma->d_macbuf,
1545 			    crp->crp_mac, 12);
1546 		break;
1547 	}
1548 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1549 	crypto_done(crp);
1550 }
1551 
1552 static void
1553 ubsec_mcopy(struct mbuf *srcm, struct mbuf *dstm, int hoffset, int toffset)
1554 {
1555 	int i, j, dlen, slen;
1556 	char *dptr, *sptr;
1557 
1558 	j = 0;
1559 	sptr = srcm->m_data;
1560 	slen = srcm->m_len;
1561 	dptr = dstm->m_data;
1562 	dlen = dstm->m_len;
1563 
1564 	while (1) {
1565 		for (i = 0; i < min(slen, dlen); i++) {
1566 			if (j < hoffset || j >= toffset)
1567 				*dptr++ = *sptr++;
1568 			slen--;
1569 			dlen--;
1570 			j++;
1571 		}
1572 		if (slen == 0) {
1573 			srcm = srcm->m_next;
1574 			if (srcm == NULL)
1575 				return;
1576 			sptr = srcm->m_data;
1577 			slen = srcm->m_len;
1578 		}
1579 		if (dlen == 0) {
1580 			dstm = dstm->m_next;
1581 			if (dstm == NULL)
1582 				return;
1583 			dptr = dstm->m_data;
1584 			dlen = dstm->m_len;
1585 		}
1586 	}
1587 }
1588 
1589 /*
1590  * feed the key generator, must be called at splnet() or higher.
1591  */
1592 static void
1593 ubsec_feed2(struct ubsec_softc *sc)
1594 {
1595 	struct ubsec_q2 *q;
1596 
1597 	while (!SIMPLEQ_EMPTY(&sc->sc_queue2)) {
1598 		if (READ_REG(sc, BS_STAT) & BS_STAT_MCR2_FULL)
1599 			break;
1600 		q = SIMPLEQ_FIRST(&sc->sc_queue2);
1601 
1602 		bus_dmamap_sync(sc->sc_dmat, q->q_mcr.dma_map, 0,
1603 		    q->q_mcr.dma_map->dm_mapsize,
1604 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1605 		bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, 0,
1606 		    q->q_ctx.dma_map->dm_mapsize,
1607 		    BUS_DMASYNC_PREWRITE);
1608 
1609 		WRITE_REG(sc, BS_MCR2, q->q_mcr.dma_paddr);
1610 		q = SIMPLEQ_FIRST(&sc->sc_queue2);
1611 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue2, /*q,*/ q_next);
1612 		--sc->sc_nqueue2;
1613 		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip2, q, q_next);
1614 	}
1615 }
1616 
1617 /*
1618  * Callback for handling random numbers
1619  */
1620 static void
1621 ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q)
1622 {
1623 	struct cryptkop *krp;
1624 	struct ubsec_ctx_keyop *ctx;
1625 
1626 	ctx = (struct ubsec_ctx_keyop *)q->q_ctx.dma_vaddr;
1627 	bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, 0,
1628 	    q->q_ctx.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1629 
1630 	switch (q->q_type) {
1631 #ifndef UBSEC_NO_RNG
1632 	case UBS_CTXOP_RNGSHA1:
1633 	case UBS_CTXOP_RNGBYPASS: {
1634 		struct ubsec_q2_rng *rng = (struct ubsec_q2_rng *)q;
1635 		u_int32_t *p;
1636 		int i;
1637 
1638 		bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, 0,
1639 		    rng->rng_buf.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1640 		p = (u_int32_t *)rng->rng_buf.dma_vaddr;
1641 #ifndef __NetBSD__
1642 		for (i = 0; i < UBSEC_RNG_BUFSIZ; p++, i++)
1643 			add_true_randomness(letoh32(*p));
1644 		rng->rng_used = 0;
1645 #else
1646 		/* XXX NetBSD rnd subsystem too weak */
1647 		i = 0; (void)i;	/* shut off gcc warnings */
1648 #endif
1649 #ifdef __OpenBSD__
1650 		timeout_add(&sc->sc_rngto, sc->sc_rnghz);
1651 #else
1652 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
1653 #endif
1654 		break;
1655 	}
1656 #endif
1657 	case UBS_CTXOP_MODEXP: {
1658 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
1659 		u_int rlen, clen;
1660 
1661 		krp = me->me_krp;
1662 		rlen = (me->me_modbits + 7) / 8;
1663 		clen = (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8;
1664 
1665 		bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
1666 		    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1667 		bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
1668 		    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1669 		bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
1670 		    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1671 		bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
1672 		    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1673 
1674 		if (clen < rlen)
1675 			krp->krp_status = E2BIG;
1676 		else {
1677 			if (sc->sc_flags & UBS_FLAGS_HWNORM) {
1678 				bzero(krp->krp_param[krp->krp_iparams].crp_p,
1679 				    (krp->krp_param[krp->krp_iparams].crp_nbits
1680 					+ 7) / 8);
1681 				bcopy(me->me_C.dma_vaddr,
1682 				    krp->krp_param[krp->krp_iparams].crp_p,
1683 				    (me->me_modbits + 7) / 8);
1684 			} else
1685 				ubsec_kshift_l(me->me_shiftbits,
1686 				    me->me_C.dma_vaddr, me->me_normbits,
1687 				    krp->krp_param[krp->krp_iparams].crp_p,
1688 				    krp->krp_param[krp->krp_iparams].crp_nbits);
1689 		}
1690 
1691 		crypto_kdone(krp);
1692 
1693 		/* bzero all potentially sensitive data */
1694 		bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
1695 		bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
1696 		bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
1697 		bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
1698 
1699 		/* Can't free here, so put us on the free list. */
1700 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &me->me_q, q_next);
1701 		break;
1702 	}
1703 	case UBS_CTXOP_RSAPRIV: {
1704 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
1705 		u_int len;
1706 
1707 		krp = rp->rpr_krp;
1708 		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map, 0,
1709 		    rp->rpr_msgin.dma_map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
1710 		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map, 0,
1711 		    rp->rpr_msgout.dma_map->dm_mapsize, BUS_DMASYNC_POSTREAD);
1712 
1713 		len = (krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_nbits + 7) / 8;
1714 		bcopy(rp->rpr_msgout.dma_vaddr,
1715 		    krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_p, len);
1716 
1717 		crypto_kdone(krp);
1718 
1719 		bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
1720 		bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
1721 		bzero(rp->rpr_q.q_ctx.dma_vaddr, rp->rpr_q.q_ctx.dma_size);
1722 
1723 		/* Can't free here, so put us on the free list. */
1724 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &rp->rpr_q, q_next);
1725 		break;
1726 	}
1727 	default:
1728 		printf("%s: unknown ctx op: %x\n", device_xname(&sc->sc_dv),
1729 		    letoh16(ctx->ctx_op));
1730 		break;
1731 	}
1732 }
1733 
1734 #ifndef UBSEC_NO_RNG
1735 static void
1736 ubsec_rng(void *vsc)
1737 {
1738 	struct ubsec_softc *sc = vsc;
1739 	struct ubsec_q2_rng *rng = &sc->sc_rng;
1740 	struct ubsec_mcr *mcr;
1741 	struct ubsec_ctx_rngbypass *ctx;
1742 	int s;
1743 
1744 	s = splnet();
1745 	if (rng->rng_used) {
1746 		splx(s);
1747 		return;
1748 	}
1749 	sc->sc_nqueue2++;
1750 	if (sc->sc_nqueue2 >= UBS_MAX_NQUEUE)
1751 		goto out;
1752 
1753 	mcr = (struct ubsec_mcr *)rng->rng_q.q_mcr.dma_vaddr;
1754 	ctx = (struct ubsec_ctx_rngbypass *)rng->rng_q.q_ctx.dma_vaddr;
1755 
1756 	mcr->mcr_pkts = htole16(1);
1757 	mcr->mcr_flags = 0;
1758 	mcr->mcr_cmdctxp = htole32(rng->rng_q.q_ctx.dma_paddr);
1759 	mcr->mcr_ipktbuf.pb_addr = mcr->mcr_ipktbuf.pb_next = 0;
1760 	mcr->mcr_ipktbuf.pb_len = 0;
1761 	mcr->mcr_reserved = mcr->mcr_pktlen = 0;
1762 	mcr->mcr_opktbuf.pb_addr = htole32(rng->rng_buf.dma_paddr);
1763 	mcr->mcr_opktbuf.pb_len = htole32(((sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ)) &
1764 	    UBS_PKTBUF_LEN);
1765 	mcr->mcr_opktbuf.pb_next = 0;
1766 
1767 	ctx->rbp_len = htole16(sizeof(struct ubsec_ctx_rngbypass));
1768 	ctx->rbp_op = htole16(UBS_CTXOP_RNGSHA1);
1769 	rng->rng_q.q_type = UBS_CTXOP_RNGSHA1;
1770 
1771 	bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, 0,
1772 	    rng->rng_buf.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
1773 
1774 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rng->rng_q, q_next);
1775 	rng->rng_used = 1;
1776 	ubsec_feed2(sc);
1777 	ubsecstats.hst_rng++;
1778 	splx(s);
1779 
1780 	return;
1781 
1782 out:
1783 	/*
1784 	 * Something weird happened, generate our own call back.
1785 	 */
1786 	sc->sc_nqueue2--;
1787 	splx(s);
1788 #ifdef __OpenBSD__
1789 	timeout_add(&sc->sc_rngto, sc->sc_rnghz);
1790 #else
1791 	callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
1792 #endif
1793 }
1794 #endif /* UBSEC_NO_RNG */
1795 
1796 static int
1797 ubsec_dma_malloc(struct ubsec_softc *sc, bus_size_t size,
1798 		 struct ubsec_dma_alloc *dma,int mapflags)
1799 {
1800 	int r;
1801 
1802 	if ((r = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0,
1803 	    &dma->dma_seg, 1, &dma->dma_nseg, BUS_DMA_NOWAIT)) != 0)
1804 		goto fail_0;
1805 
1806 	if ((r = bus_dmamem_map(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg,
1807 	    size, &dma->dma_vaddr, mapflags | BUS_DMA_NOWAIT)) != 0)
1808 		goto fail_1;
1809 
1810 	if ((r = bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
1811 	    BUS_DMA_NOWAIT, &dma->dma_map)) != 0)
1812 		goto fail_2;
1813 
1814 	if ((r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr,
1815 	    size, NULL, BUS_DMA_NOWAIT)) != 0)
1816 		goto fail_3;
1817 
1818 	dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
1819 	dma->dma_size = size;
1820 	return (0);
1821 
1822 fail_3:
1823 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
1824 fail_2:
1825 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, size);
1826 fail_1:
1827 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
1828 fail_0:
1829 	dma->dma_map = NULL;
1830 	return (r);
1831 }
1832 
1833 static void
1834 ubsec_dma_free(struct ubsec_softc *sc, struct ubsec_dma_alloc *dma)
1835 {
1836 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
1837 	bus_dmamem_unmap(sc->sc_dmat, dma->dma_vaddr, dma->dma_size);
1838 	bus_dmamem_free(sc->sc_dmat, &dma->dma_seg, dma->dma_nseg);
1839 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
1840 }
1841 
1842 /*
1843  * Resets the board.  Values in the regesters are left as is
1844  * from the reset (i.e. initial values are assigned elsewhere).
1845  */
1846 static void
1847 ubsec_reset_board(struct ubsec_softc *sc)
1848 {
1849     volatile u_int32_t ctrl;
1850 
1851     ctrl = READ_REG(sc, BS_CTRL);
1852     ctrl |= BS_CTRL_RESET;
1853     WRITE_REG(sc, BS_CTRL, ctrl);
1854 
1855     /*
1856      * Wait aprox. 30 PCI clocks = 900 ns = 0.9 us
1857      */
1858     DELAY(10);
1859 }
1860 
1861 /*
1862  * Init Broadcom registers
1863  */
1864 static void
1865 ubsec_init_board(struct ubsec_softc *sc)
1866 {
1867 	u_int32_t ctrl;
1868 
1869 	ctrl = READ_REG(sc, BS_CTRL);
1870 	ctrl &= ~(BS_CTRL_BE32 | BS_CTRL_BE64);
1871 	ctrl |= BS_CTRL_LITTLE_ENDIAN | BS_CTRL_MCR1INT;
1872 
1873 	/*
1874 	 * XXX: Sam Leffler's code has (UBS_FLAGS_KEY|UBS_FLAGS_RNG)).
1875 	 * anyone got hw docs?
1876 	 */
1877 	if (sc->sc_flags & UBS_FLAGS_KEY)
1878 		ctrl |= BS_CTRL_MCR2INT;
1879 	else
1880 		ctrl &= ~BS_CTRL_MCR2INT;
1881 
1882 	if (sc->sc_flags & UBS_FLAGS_HWNORM)
1883 		ctrl &= ~BS_CTRL_SWNORM;
1884 
1885 	WRITE_REG(sc, BS_CTRL, ctrl);
1886 }
1887 
1888 /*
1889  * Init Broadcom PCI registers
1890  */
1891 static void
1892 ubsec_init_pciregs(struct pci_attach_args *pa)
1893 {
1894 	pci_chipset_tag_t pc = pa->pa_pc;
1895 	u_int32_t misc;
1896 
1897 	/*
1898 	 * This will set the cache line size to 1, this will
1899 	 * force the BCM58xx chip just to do burst read/writes.
1900 	 * Cache line read/writes are to slow
1901 	 */
1902 	misc = pci_conf_read(pc, pa->pa_tag, PCI_BHLC_REG);
1903 	misc = (misc & ~(PCI_CACHELINE_MASK << PCI_CACHELINE_SHIFT))
1904 	    | ((UBS_DEF_CACHELINE & 0xff) << PCI_CACHELINE_SHIFT);
1905 	pci_conf_write(pc, pa->pa_tag, PCI_BHLC_REG, misc);
1906 }
1907 
1908 /*
1909  * Clean up after a chip crash.
1910  * It is assumed that the caller in splnet()
1911  */
1912 static void
1913 ubsec_cleanchip(struct ubsec_softc *sc)
1914 {
1915 	struct ubsec_q *q;
1916 
1917 	while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
1918 		q = SIMPLEQ_FIRST(&sc->sc_qchip);
1919 		SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, /*q,*/ q_next);
1920 		ubsec_free_q(sc, q);
1921 	}
1922 	sc->sc_nqchip = 0;
1923 }
1924 
1925 /*
1926  * free a ubsec_q
1927  * It is assumed that the caller is within splnet()
1928  */
1929 static int
1930 ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q)
1931 {
1932 	struct ubsec_q *q2;
1933 	struct cryptop *crp;
1934 	int npkts;
1935 	int i;
1936 
1937 	npkts = q->q_nstacked_mcrs;
1938 
1939 	for (i = 0; i < npkts; i++) {
1940 		if(q->q_stacked_mcr[i]) {
1941 			q2 = q->q_stacked_mcr[i];
1942 
1943 			if ((q2->q_dst_m != NULL) && (q2->q_src_m != q2->q_dst_m))
1944 				m_freem(q2->q_dst_m);
1945 
1946 			crp = (struct cryptop *)q2->q_crp;
1947 
1948 			SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q2, q_next);
1949 
1950 			crp->crp_etype = EFAULT;
1951 			crypto_done(crp);
1952 		} else {
1953 			break;
1954 		}
1955 	}
1956 
1957 	/*
1958 	 * Free header MCR
1959 	 */
1960 	if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
1961 		m_freem(q->q_dst_m);
1962 
1963 	crp = (struct cryptop *)q->q_crp;
1964 
1965 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
1966 
1967 	crp->crp_etype = EFAULT;
1968 	crypto_done(crp);
1969 	return(0);
1970 }
1971 
1972 /*
1973  * Routine to reset the chip and clean up.
1974  * It is assumed that the caller is in splnet()
1975  */
1976 static void
1977 ubsec_totalreset(struct ubsec_softc *sc)
1978 {
1979 	ubsec_reset_board(sc);
1980 	ubsec_init_board(sc);
1981 	ubsec_cleanchip(sc);
1982 }
1983 
1984 static int
1985 ubsec_dmamap_aligned(bus_dmamap_t map)
1986 {
1987 	int i;
1988 
1989 	for (i = 0; i < map->dm_nsegs; i++) {
1990 		if (map->dm_segs[i].ds_addr & 3)
1991 			return (0);
1992 		if ((i != (map->dm_nsegs - 1)) &&
1993 		    (map->dm_segs[i].ds_len & 3))
1994 			return (0);
1995 	}
1996 	return (1);
1997 }
1998 
1999 #ifdef __OpenBSD__
2000 struct ubsec_softc *
2001 ubsec_kfind(struct cryptkop *krp)
2002 {
2003 	struct ubsec_softc *sc;
2004 	int i;
2005 
2006 	for (i = 0; i < ubsec_cd.cd_ndevs; i++) {
2007 		sc = ubsec_cd.cd_devs[i];
2008 		if (sc == NULL)
2009 			continue;
2010 		if (sc->sc_cid == krp->krp_hid)
2011 			return (sc);
2012 	}
2013 	return (NULL);
2014 }
2015 #endif
2016 
2017 static void
2018 ubsec_kfree(struct ubsec_softc *sc, struct ubsec_q2 *q)
2019 {
2020 	switch (q->q_type) {
2021 	case UBS_CTXOP_MODEXP: {
2022 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
2023 
2024 		ubsec_dma_free(sc, &me->me_q.q_mcr);
2025 		ubsec_dma_free(sc, &me->me_q.q_ctx);
2026 		ubsec_dma_free(sc, &me->me_M);
2027 		ubsec_dma_free(sc, &me->me_E);
2028 		ubsec_dma_free(sc, &me->me_C);
2029 		ubsec_dma_free(sc, &me->me_epb);
2030 		free(me, M_DEVBUF);
2031 		break;
2032 	}
2033 	case UBS_CTXOP_RSAPRIV: {
2034 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
2035 
2036 		ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2037 		ubsec_dma_free(sc, &rp->rpr_q.q_ctx);
2038 		ubsec_dma_free(sc, &rp->rpr_msgin);
2039 		ubsec_dma_free(sc, &rp->rpr_msgout);
2040 		free(rp, M_DEVBUF);
2041 		break;
2042 	}
2043 	default:
2044 		printf("%s: invalid kfree 0x%x\n", device_xname(&sc->sc_dv),
2045 		    q->q_type);
2046 		break;
2047 	}
2048 }
2049 
2050 static int
2051 ubsec_kprocess(void *arg, struct cryptkop *krp, int hint)
2052 {
2053 	struct ubsec_softc *sc;
2054 	int r;
2055 
2056 	if (krp == NULL || krp->krp_callback == NULL)
2057 		return (EINVAL);
2058 #ifdef __OpenBSD__
2059 	if ((sc = ubsec_kfind(krp)) == NULL)
2060 		return (EINVAL);
2061 #else
2062 	sc = arg;
2063 	KASSERT(sc != NULL /*, ("ubsec_kprocess: null softc")*/);
2064 #endif
2065 
2066 	while (!SIMPLEQ_EMPTY(&sc->sc_q2free)) {
2067 		struct ubsec_q2 *q;
2068 
2069 		q = SIMPLEQ_FIRST(&sc->sc_q2free);
2070 		SIMPLEQ_REMOVE_HEAD(&sc->sc_q2free, /*q,*/ q_next);
2071 		ubsec_kfree(sc, q);
2072 	}
2073 
2074 	switch (krp->krp_op) {
2075 	case CRK_MOD_EXP:
2076 		if (sc->sc_flags & UBS_FLAGS_HWNORM)
2077 			r = ubsec_kprocess_modexp_hw(sc, krp, hint);
2078 		else
2079 			r = ubsec_kprocess_modexp_sw(sc, krp, hint);
2080 		break;
2081 	case CRK_MOD_EXP_CRT:
2082 		r = ubsec_kprocess_rsapriv(sc, krp, hint);
2083 		break;
2084 	default:
2085 		printf("%s: kprocess: invalid op 0x%x\n",
2086 		    device_xname(&sc->sc_dv), krp->krp_op);
2087 		krp->krp_status = EOPNOTSUPP;
2088 		crypto_kdone(krp);
2089 		r = 0;
2090 	}
2091 	return (r);
2092 }
2093 
2094 /*
2095  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (sw normalization)
2096  */
2097 static int
2098 ubsec_kprocess_modexp_sw(struct ubsec_softc *sc, struct cryptkop *krp,
2099 			 int hint)
2100 {
2101 	struct ubsec_q2_modexp *me;
2102 	struct ubsec_mcr *mcr;
2103 	struct ubsec_ctx_modexp *ctx;
2104 	struct ubsec_pktbuf *epb;
2105 	int s, err = 0;
2106 	u_int nbits, normbits, mbits, shiftbits, ebits;
2107 
2108 	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
2109 	if (me == NULL) {
2110 		err = ENOMEM;
2111 		goto errout;
2112 	}
2113 	bzero(me, sizeof *me);
2114 	me->me_krp = krp;
2115 	me->me_q.q_type = UBS_CTXOP_MODEXP;
2116 
2117 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2118 	if (nbits <= 512)
2119 		normbits = 512;
2120 	else if (nbits <= 768)
2121 		normbits = 768;
2122 	else if (nbits <= 1024)
2123 		normbits = 1024;
2124 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2125 		normbits = 1536;
2126 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2127 		normbits = 2048;
2128 	else {
2129 		err = E2BIG;
2130 		goto errout;
2131 	}
2132 
2133 	shiftbits = normbits - nbits;
2134 
2135 	me->me_modbits = nbits;
2136 	me->me_shiftbits = shiftbits;
2137 	me->me_normbits = normbits;
2138 
2139 	/* Sanity check: result bits must be >= true modulus bits. */
2140 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2141 		err = ERANGE;
2142 		goto errout;
2143 	}
2144 
2145 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2146 	    &me->me_q.q_mcr, 0)) {
2147 		err = ENOMEM;
2148 		goto errout;
2149 	}
2150 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2151 
2152 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2153 	    &me->me_q.q_ctx, 0)) {
2154 		err = ENOMEM;
2155 		goto errout;
2156 	}
2157 
2158 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2159 	if (mbits > nbits) {
2160 		err = E2BIG;
2161 		goto errout;
2162 	}
2163 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2164 		err = ENOMEM;
2165 		goto errout;
2166 	}
2167 	ubsec_kshift_r(shiftbits,
2168 	    krp->krp_param[UBS_MODEXP_PAR_M].crp_p, mbits,
2169 	    me->me_M.dma_vaddr, normbits);
2170 
2171 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2172 		err = ENOMEM;
2173 		goto errout;
2174 	}
2175 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2176 
2177 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2178 	if (ebits > nbits) {
2179 		err = E2BIG;
2180 		goto errout;
2181 	}
2182 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2183 		err = ENOMEM;
2184 		goto errout;
2185 	}
2186 	ubsec_kshift_r(shiftbits,
2187 	    krp->krp_param[UBS_MODEXP_PAR_E].crp_p, ebits,
2188 	    me->me_E.dma_vaddr, normbits);
2189 
2190 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2191 	    &me->me_epb, 0)) {
2192 		err = ENOMEM;
2193 		goto errout;
2194 	}
2195 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2196 	epb->pb_addr = htole32(me->me_E.dma_paddr);
2197 	epb->pb_next = 0;
2198 	epb->pb_len = htole32(normbits / 8);
2199 
2200 #ifdef UBSEC_DEBUG
2201 	if (ubsec_debug) {
2202 		printf("Epb ");
2203 		ubsec_dump_pb(epb);
2204 	}
2205 #endif
2206 
2207 	mcr->mcr_pkts = htole16(1);
2208 	mcr->mcr_flags = 0;
2209 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2210 	mcr->mcr_reserved = 0;
2211 	mcr->mcr_pktlen = 0;
2212 
2213 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2214 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2215 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2216 
2217 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2218 	mcr->mcr_opktbuf.pb_next = 0;
2219 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2220 
2221 #ifdef DIAGNOSTIC
2222 	/* Misaligned output buffer will hang the chip. */
2223 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2224 		panic("%s: modexp invalid addr 0x%x",
2225 		    device_xname(&sc->sc_dv), letoh32(mcr->mcr_opktbuf.pb_addr));
2226 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2227 		panic("%s: modexp invalid len 0x%x",
2228 		    device_xname(&sc->sc_dv), letoh32(mcr->mcr_opktbuf.pb_len));
2229 #endif
2230 
2231 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2232 	bzero(ctx, sizeof(*ctx));
2233 	ubsec_kshift_r(shiftbits,
2234 	    krp->krp_param[UBS_MODEXP_PAR_N].crp_p, nbits,
2235 	    ctx->me_N, normbits);
2236 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2237 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2238 	ctx->me_E_len = htole16(nbits);
2239 	ctx->me_N_len = htole16(nbits);
2240 
2241 #ifdef UBSEC_DEBUG
2242 	if (ubsec_debug) {
2243 		ubsec_dump_mcr(mcr);
2244 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2245 	}
2246 #endif
2247 
2248 	/*
2249 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2250 	 * everything else.
2251 	 */
2252 	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
2253 	    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2254 	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
2255 	    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2256 	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
2257 	    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2258 	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
2259 	    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2260 
2261 	/* Enqueue and we're done... */
2262 	s = splnet();
2263 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2264 	ubsec_feed2(sc);
2265 	ubsecstats.hst_modexp++;
2266 	splx(s);
2267 
2268 	return (0);
2269 
2270 errout:
2271 	if (me != NULL) {
2272 		if (me->me_q.q_mcr.dma_map != NULL)
2273 			ubsec_dma_free(sc, &me->me_q.q_mcr);
2274 		if (me->me_q.q_ctx.dma_map != NULL) {
2275 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
2276 			ubsec_dma_free(sc, &me->me_q.q_ctx);
2277 		}
2278 		if (me->me_M.dma_map != NULL) {
2279 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
2280 			ubsec_dma_free(sc, &me->me_M);
2281 		}
2282 		if (me->me_E.dma_map != NULL) {
2283 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
2284 			ubsec_dma_free(sc, &me->me_E);
2285 		}
2286 		if (me->me_C.dma_map != NULL) {
2287 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2288 			ubsec_dma_free(sc, &me->me_C);
2289 		}
2290 		if (me->me_epb.dma_map != NULL)
2291 			ubsec_dma_free(sc, &me->me_epb);
2292 		free(me, M_DEVBUF);
2293 	}
2294 	krp->krp_status = err;
2295 	crypto_kdone(krp);
2296 	return (0);
2297 }
2298 
2299 /*
2300  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (hw normalization)
2301  */
2302 static int
2303 ubsec_kprocess_modexp_hw(struct ubsec_softc *sc, struct cryptkop *krp,
2304 			 int hint)
2305 {
2306 	struct ubsec_q2_modexp *me;
2307 	struct ubsec_mcr *mcr;
2308 	struct ubsec_ctx_modexp *ctx;
2309 	struct ubsec_pktbuf *epb;
2310 	int s, err = 0;
2311 	u_int nbits, normbits, mbits, shiftbits, ebits;
2312 
2313 	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
2314 	if (me == NULL) {
2315 		err = ENOMEM;
2316 		goto errout;
2317 	}
2318 	bzero(me, sizeof *me);
2319 	me->me_krp = krp;
2320 	me->me_q.q_type = UBS_CTXOP_MODEXP;
2321 
2322 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
2323 	if (nbits <= 512)
2324 		normbits = 512;
2325 	else if (nbits <= 768)
2326 		normbits = 768;
2327 	else if (nbits <= 1024)
2328 		normbits = 1024;
2329 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
2330 		normbits = 1536;
2331 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
2332 		normbits = 2048;
2333 	else {
2334 		err = E2BIG;
2335 		goto errout;
2336 	}
2337 
2338 	shiftbits = normbits - nbits;
2339 
2340 	/* XXX ??? */
2341 	me->me_modbits = nbits;
2342 	me->me_shiftbits = shiftbits;
2343 	me->me_normbits = normbits;
2344 
2345 	/* Sanity check: result bits must be >= true modulus bits. */
2346 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
2347 		err = ERANGE;
2348 		goto errout;
2349 	}
2350 
2351 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2352 	    &me->me_q.q_mcr, 0)) {
2353 		err = ENOMEM;
2354 		goto errout;
2355 	}
2356 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
2357 
2358 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
2359 	    &me->me_q.q_ctx, 0)) {
2360 		err = ENOMEM;
2361 		goto errout;
2362 	}
2363 
2364 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
2365 	if (mbits > nbits) {
2366 		err = E2BIG;
2367 		goto errout;
2368 	}
2369 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
2370 		err = ENOMEM;
2371 		goto errout;
2372 	}
2373 	bzero(me->me_M.dma_vaddr, normbits / 8);
2374 	bcopy(krp->krp_param[UBS_MODEXP_PAR_M].crp_p,
2375 	    me->me_M.dma_vaddr, (mbits + 7) / 8);
2376 
2377 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
2378 		err = ENOMEM;
2379 		goto errout;
2380 	}
2381 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2382 
2383 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
2384 	if (ebits > nbits) {
2385 		err = E2BIG;
2386 		goto errout;
2387 	}
2388 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
2389 		err = ENOMEM;
2390 		goto errout;
2391 	}
2392 	bzero(me->me_E.dma_vaddr, normbits / 8);
2393 	bcopy(krp->krp_param[UBS_MODEXP_PAR_E].crp_p,
2394 	    me->me_E.dma_vaddr, (ebits + 7) / 8);
2395 
2396 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
2397 	    &me->me_epb, 0)) {
2398 		err = ENOMEM;
2399 		goto errout;
2400 	}
2401 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
2402 	epb->pb_addr = htole32(me->me_E.dma_paddr);
2403 	epb->pb_next = 0;
2404 	epb->pb_len = htole32((ebits + 7) / 8);
2405 
2406 #ifdef UBSEC_DEBUG
2407 	if (ubsec_debug) {
2408 		printf("Epb ");
2409 		ubsec_dump_pb(epb);
2410 	}
2411 #endif
2412 
2413 	mcr->mcr_pkts = htole16(1);
2414 	mcr->mcr_flags = 0;
2415 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
2416 	mcr->mcr_reserved = 0;
2417 	mcr->mcr_pktlen = 0;
2418 
2419 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
2420 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
2421 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
2422 
2423 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
2424 	mcr->mcr_opktbuf.pb_next = 0;
2425 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
2426 
2427 #ifdef DIAGNOSTIC
2428 	/* Misaligned output buffer will hang the chip. */
2429 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
2430 		panic("%s: modexp invalid addr 0x%x",
2431 		    device_xname(&sc->sc_dv), letoh32(mcr->mcr_opktbuf.pb_addr));
2432 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
2433 		panic("%s: modexp invalid len 0x%x",
2434 		    device_xname(&sc->sc_dv), letoh32(mcr->mcr_opktbuf.pb_len));
2435 #endif
2436 
2437 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
2438 	bzero(ctx, sizeof(*ctx));
2439 	bcopy(krp->krp_param[UBS_MODEXP_PAR_N].crp_p, ctx->me_N,
2440 	    (nbits + 7) / 8);
2441 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
2442 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
2443 	ctx->me_E_len = htole16(ebits);
2444 	ctx->me_N_len = htole16(nbits);
2445 
2446 #ifdef UBSEC_DEBUG
2447 	if (ubsec_debug) {
2448 		ubsec_dump_mcr(mcr);
2449 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
2450 	}
2451 #endif
2452 
2453 	/*
2454 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2455 	 * everything else.
2456 	 */
2457 	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
2458 	    0, me->me_M.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2459 	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
2460 	    0, me->me_E.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2461 	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
2462 	    0, me->me_C.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2463 	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
2464 	    0, me->me_epb.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2465 
2466 	/* Enqueue and we're done... */
2467 	s = splnet();
2468 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
2469 	ubsec_feed2(sc);
2470 	splx(s);
2471 
2472 	return (0);
2473 
2474 errout:
2475 	if (me != NULL) {
2476 		if (me->me_q.q_mcr.dma_map != NULL)
2477 			ubsec_dma_free(sc, &me->me_q.q_mcr);
2478 		if (me->me_q.q_ctx.dma_map != NULL) {
2479 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
2480 			ubsec_dma_free(sc, &me->me_q.q_ctx);
2481 		}
2482 		if (me->me_M.dma_map != NULL) {
2483 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
2484 			ubsec_dma_free(sc, &me->me_M);
2485 		}
2486 		if (me->me_E.dma_map != NULL) {
2487 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
2488 			ubsec_dma_free(sc, &me->me_E);
2489 		}
2490 		if (me->me_C.dma_map != NULL) {
2491 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
2492 			ubsec_dma_free(sc, &me->me_C);
2493 		}
2494 		if (me->me_epb.dma_map != NULL)
2495 			ubsec_dma_free(sc, &me->me_epb);
2496 		free(me, M_DEVBUF);
2497 	}
2498 	krp->krp_status = err;
2499 	crypto_kdone(krp);
2500 	return (0);
2501 }
2502 
2503 static int
2504 ubsec_kprocess_rsapriv(struct ubsec_softc *sc, struct cryptkop *krp,
2505 		       int hint)
2506 {
2507 	struct ubsec_q2_rsapriv *rp = NULL;
2508 	struct ubsec_mcr *mcr;
2509 	struct ubsec_ctx_rsapriv *ctx;
2510 	int s, err = 0;
2511 	u_int padlen, msglen;
2512 
2513 	msglen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_P]);
2514 	padlen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_Q]);
2515 	if (msglen > padlen)
2516 		padlen = msglen;
2517 
2518 	if (padlen <= 256)
2519 		padlen = 256;
2520 	else if (padlen <= 384)
2521 		padlen = 384;
2522 	else if (padlen <= 512)
2523 		padlen = 512;
2524 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 768)
2525 		padlen = 768;
2526 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 1024)
2527 		padlen = 1024;
2528 	else {
2529 		err = E2BIG;
2530 		goto errout;
2531 	}
2532 
2533 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DP]) > padlen) {
2534 		err = E2BIG;
2535 		goto errout;
2536 	}
2537 
2538 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DQ]) > padlen) {
2539 		err = E2BIG;
2540 		goto errout;
2541 	}
2542 
2543 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_PINV]) > padlen) {
2544 		err = E2BIG;
2545 		goto errout;
2546 	}
2547 
2548 	rp = malloc(sizeof *rp, M_DEVBUF, M_NOWAIT|M_ZERO);
2549 	if (rp == NULL)
2550 		return (ENOMEM);
2551 	rp->rpr_krp = krp;
2552 	rp->rpr_q.q_type = UBS_CTXOP_RSAPRIV;
2553 
2554 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
2555 	    &rp->rpr_q.q_mcr, 0)) {
2556 		err = ENOMEM;
2557 		goto errout;
2558 	}
2559 	mcr = (struct ubsec_mcr *)rp->rpr_q.q_mcr.dma_vaddr;
2560 
2561 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rsapriv),
2562 	    &rp->rpr_q.q_ctx, 0)) {
2563 		err = ENOMEM;
2564 		goto errout;
2565 	}
2566 	ctx = (struct ubsec_ctx_rsapriv *)rp->rpr_q.q_ctx.dma_vaddr;
2567 	bzero(ctx, sizeof *ctx);
2568 
2569 	/* Copy in p */
2570 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_P].crp_p,
2571 	    &ctx->rpr_buf[0 * (padlen / 8)],
2572 	    (krp->krp_param[UBS_RSAPRIV_PAR_P].crp_nbits + 7) / 8);
2573 
2574 	/* Copy in q */
2575 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_p,
2576 	    &ctx->rpr_buf[1 * (padlen / 8)],
2577 	    (krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_nbits + 7) / 8);
2578 
2579 	/* Copy in dp */
2580 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_p,
2581 	    &ctx->rpr_buf[2 * (padlen / 8)],
2582 	    (krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_nbits + 7) / 8);
2583 
2584 	/* Copy in dq */
2585 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_p,
2586 	    &ctx->rpr_buf[3 * (padlen / 8)],
2587 	    (krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_nbits + 7) / 8);
2588 
2589 	/* Copy in pinv */
2590 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_p,
2591 	    &ctx->rpr_buf[4 * (padlen / 8)],
2592 	    (krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_nbits + 7) / 8);
2593 
2594 	msglen = padlen * 2;
2595 
2596 	/* Copy in input message (aligned buffer/length). */
2597 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGIN]) > msglen) {
2598 		/* Is this likely? */
2599 		err = E2BIG;
2600 		goto errout;
2601 	}
2602 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgin, 0)) {
2603 		err = ENOMEM;
2604 		goto errout;
2605 	}
2606 	bzero(rp->rpr_msgin.dma_vaddr, (msglen + 7) / 8);
2607 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_p,
2608 	    rp->rpr_msgin.dma_vaddr,
2609 	    (krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_nbits + 7) / 8);
2610 
2611 	/* Prepare space for output message (aligned buffer/length). */
2612 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT]) < msglen) {
2613 		/* Is this likely? */
2614 		err = E2BIG;
2615 		goto errout;
2616 	}
2617 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgout, 0)) {
2618 		err = ENOMEM;
2619 		goto errout;
2620 	}
2621 	bzero(rp->rpr_msgout.dma_vaddr, (msglen + 7) / 8);
2622 
2623 	mcr->mcr_pkts = htole16(1);
2624 	mcr->mcr_flags = 0;
2625 	mcr->mcr_cmdctxp = htole32(rp->rpr_q.q_ctx.dma_paddr);
2626 	mcr->mcr_ipktbuf.pb_addr = htole32(rp->rpr_msgin.dma_paddr);
2627 	mcr->mcr_ipktbuf.pb_next = 0;
2628 	mcr->mcr_ipktbuf.pb_len = htole32(rp->rpr_msgin.dma_size);
2629 	mcr->mcr_reserved = 0;
2630 	mcr->mcr_pktlen = htole16(msglen);
2631 	mcr->mcr_opktbuf.pb_addr = htole32(rp->rpr_msgout.dma_paddr);
2632 	mcr->mcr_opktbuf.pb_next = 0;
2633 	mcr->mcr_opktbuf.pb_len = htole32(rp->rpr_msgout.dma_size);
2634 
2635 #ifdef DIAGNOSTIC
2636 	if (rp->rpr_msgin.dma_paddr & 3 || rp->rpr_msgin.dma_size & 3) {
2637 		panic("%s: rsapriv: invalid msgin 0x%lx(0x%lx)",
2638 		    device_xname(&sc->sc_dv), (u_long) rp->rpr_msgin.dma_paddr,
2639 		    (u_long) rp->rpr_msgin.dma_size);
2640 	}
2641 	if (rp->rpr_msgout.dma_paddr & 3 || rp->rpr_msgout.dma_size & 3) {
2642 		panic("%s: rsapriv: invalid msgout 0x%lx(0x%lx)",
2643 		    device_xname(&sc->sc_dv), (u_long) rp->rpr_msgout.dma_paddr,
2644 		    (u_long) rp->rpr_msgout.dma_size);
2645 	}
2646 #endif
2647 
2648 	ctx->rpr_len = (sizeof(u_int16_t) * 4) + (5 * (padlen / 8));
2649 	ctx->rpr_op = htole16(UBS_CTXOP_RSAPRIV);
2650 	ctx->rpr_q_len = htole16(padlen);
2651 	ctx->rpr_p_len = htole16(padlen);
2652 
2653 	/*
2654 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
2655 	 * everything else.
2656 	 */
2657 	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map,
2658 	    0, rp->rpr_msgin.dma_map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2659 	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map,
2660 	    0, rp->rpr_msgout.dma_map->dm_mapsize, BUS_DMASYNC_PREREAD);
2661 
2662 	/* Enqueue and we're done... */
2663 	s = splnet();
2664 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rp->rpr_q, q_next);
2665 	ubsec_feed2(sc);
2666 	ubsecstats.hst_modexpcrt++;
2667 	splx(s);
2668 	return (0);
2669 
2670 errout:
2671 	if (rp != NULL) {
2672 		if (rp->rpr_q.q_mcr.dma_map != NULL)
2673 			ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
2674 		if (rp->rpr_msgin.dma_map != NULL) {
2675 			bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
2676 			ubsec_dma_free(sc, &rp->rpr_msgin);
2677 		}
2678 		if (rp->rpr_msgout.dma_map != NULL) {
2679 			bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
2680 			ubsec_dma_free(sc, &rp->rpr_msgout);
2681 		}
2682 		free(rp, M_DEVBUF);
2683 	}
2684 	krp->krp_status = err;
2685 	crypto_kdone(krp);
2686 	return (0);
2687 }
2688 
2689 #ifdef UBSEC_DEBUG
2690 static void
2691 ubsec_dump_pb(volatile struct ubsec_pktbuf *pb)
2692 {
2693 	printf("addr 0x%x (0x%x) next 0x%x\n",
2694 	    pb->pb_addr, pb->pb_len, pb->pb_next);
2695 }
2696 
2697 static void
2698 ubsec_dump_ctx2(volatile struct ubsec_ctx_keyop *c)
2699 {
2700 	printf("CTX (0x%x):\n", c->ctx_len);
2701 	switch (letoh16(c->ctx_op)) {
2702 	case UBS_CTXOP_RNGBYPASS:
2703 	case UBS_CTXOP_RNGSHA1:
2704 		break;
2705 	case UBS_CTXOP_MODEXP:
2706 	{
2707 		struct ubsec_ctx_modexp *cx = (void *)c;
2708 		int i, len;
2709 
2710 		printf(" Elen %u, Nlen %u\n",
2711 		    letoh16(cx->me_E_len), letoh16(cx->me_N_len));
2712 		len = (cx->me_N_len + 7)/8;
2713 		for (i = 0; i < len; i++)
2714 			printf("%s%02x", (i == 0) ? " N: " : ":", cx->me_N[i]);
2715 		printf("\n");
2716 		break;
2717 	}
2718 	default:
2719 		printf("unknown context: %x\n", c->ctx_op);
2720 	}
2721 	printf("END CTX\n");
2722 }
2723 
2724 static void
2725 ubsec_dump_mcr(struct ubsec_mcr *mcr)
2726 {
2727 	volatile struct ubsec_mcr_add *ma;
2728 	int i;
2729 
2730 	printf("MCR:\n");
2731 	printf(" pkts: %u, flags 0x%x\n",
2732 	    letoh16(mcr->mcr_pkts), letoh16(mcr->mcr_flags));
2733 	ma = (volatile struct ubsec_mcr_add *)&mcr->mcr_cmdctxp;
2734 	for (i = 0; i < letoh16(mcr->mcr_pkts); i++) {
2735 		printf(" %d: ctx 0x%x len 0x%x rsvd 0x%x\n", i,
2736 		    letoh32(ma->mcr_cmdctxp), letoh16(ma->mcr_pktlen),
2737 		    letoh16(ma->mcr_reserved));
2738 		printf(" %d: ipkt ", i);
2739 		ubsec_dump_pb(&ma->mcr_ipktbuf);
2740 		printf(" %d: opkt ", i);
2741 		ubsec_dump_pb(&ma->mcr_opktbuf);
2742 		ma++;
2743 	}
2744 	printf("END MCR\n");
2745 }
2746 #endif /* UBSEC_DEBUG */
2747 
2748 /*
2749  * Return the number of significant bits of a big number.
2750  */
2751 static int
2752 ubsec_ksigbits(struct crparam *cr)
2753 {
2754 	u_int plen = (cr->crp_nbits + 7) / 8;
2755 	int i, sig = plen * 8;
2756 	u_int8_t c, *p = cr->crp_p;
2757 
2758 	for (i = plen - 1; i >= 0; i--) {
2759 		c = p[i];
2760 		if (c != 0) {
2761 			while ((c & 0x80) == 0) {
2762 				sig--;
2763 				c <<= 1;
2764 			}
2765 			break;
2766 		}
2767 		sig -= 8;
2768 	}
2769 	return (sig);
2770 }
2771 
2772 static void
2773 ubsec_kshift_r(u_int shiftbits, u_int8_t *src, u_int srcbits,
2774     u_int8_t *dst, u_int dstbits)
2775 {
2776 	u_int slen, dlen;
2777 	int i, si, di, n;
2778 
2779 	slen = (srcbits + 7) / 8;
2780 	dlen = (dstbits + 7) / 8;
2781 
2782 	for (i = 0; i < slen; i++)
2783 		dst[i] = src[i];
2784 	for (i = 0; i < dlen - slen; i++)
2785 		dst[slen + i] = 0;
2786 
2787 	n = shiftbits / 8;
2788 	if (n != 0) {
2789 		si = dlen - n - 1;
2790 		di = dlen - 1;
2791 		while (si >= 0)
2792 			dst[di--] = dst[si--];
2793 		while (di >= 0)
2794 			dst[di--] = 0;
2795 	}
2796 
2797 	n = shiftbits % 8;
2798 	if (n != 0) {
2799 		for (i = dlen - 1; i > 0; i--)
2800 			dst[i] = (dst[i] << n) |
2801 			    (dst[i - 1] >> (8 - n));
2802 		dst[0] = dst[0] << n;
2803 	}
2804 }
2805 
2806 static void
2807 ubsec_kshift_l(u_int shiftbits, u_int8_t *src, u_int srcbits,
2808     u_int8_t *dst, u_int dstbits)
2809 {
2810 	int slen, dlen, i, n;
2811 
2812 	slen = (srcbits + 7) / 8;
2813 	dlen = (dstbits + 7) / 8;
2814 
2815 	n = shiftbits / 8;
2816 	for (i = 0; i < slen; i++)
2817 		dst[i] = src[i + n];
2818 	for (i = 0; i < dlen - slen; i++)
2819 		dst[slen + i] = 0;
2820 
2821 	n = shiftbits % 8;
2822 	if (n != 0) {
2823 		for (i = 0; i < (dlen - 1); i++)
2824 			dst[i] = (dst[i] >> n) | (dst[i + 1] << (8 - n));
2825 		dst[dlen - 1] = dst[dlen - 1] >> n;
2826 	}
2827 }
2828