xref: /netbsd-src/sys/arch/sparc64/dev/iommu.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: iommu.c,v 1.86 2009/02/15 13:04:03 martin Exp $	*/
2 
3 /*
4  * Copyright (c) 1999, 2000 Matthew R. Green
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2001, 2002 Eduardo Horvath
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. The name of the author may not be used to endorse or promote products
42  *    derived from this software without specific prior written permission.
43  *
44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
50  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
51  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54  * SUCH DAMAGE.
55  */
56 
57 /*
58  * UltraSPARC IOMMU support; used by both the sbus and pci code.
59  */
60 
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: iommu.c,v 1.86 2009/02/15 13:04:03 martin Exp $");
63 
64 #include "opt_ddb.h"
65 
66 #include <sys/param.h>
67 #include <sys/extent.h>
68 #include <sys/malloc.h>
69 #include <sys/systm.h>
70 #include <sys/device.h>
71 #include <sys/proc.h>
72 
73 #include <uvm/uvm_extern.h>
74 
75 #include <machine/bus.h>
76 #include <sparc64/dev/iommureg.h>
77 #include <sparc64/dev/iommuvar.h>
78 
79 #include <machine/autoconf.h>
80 #include <machine/cpu.h>
81 
82 #ifdef DEBUG
83 #define IDB_BUSDMA	0x1
84 #define IDB_IOMMU	0x2
85 #define IDB_INFO	0x4
86 #define	IDB_SYNC	0x8
87 int iommudebug = 0x0;
88 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
89 #else
90 #define DPRINTF(l, s)
91 #endif
92 
93 #define iommu_strbuf_flush(i, v) do {					\
94 	if ((i)->sb_flush)						\
95 		bus_space_write_8((i)->sb_is->is_bustag, (i)->sb_sb,	\
96 			STRBUFREG(strbuf_pgflush), (v));		\
97 	} while (0)
98 
99 static	int iommu_strbuf_flush_done(struct strbuf_ctl *);
100 static	void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
101 		bus_size_t, int);
102 
103 /*
104  * initialise the UltraSPARC IOMMU (SBUS or PCI):
105  *	- allocate and setup the iotsb.
106  *	- enable the IOMMU
107  *	- initialise the streaming buffers (if they exist)
108  *	- create a private DVMA map.
109  */
110 void
111 iommu_init(char *name, struct iommu_state *is, int tsbsize, uint32_t iovabase)
112 {
113 	psize_t size;
114 	vaddr_t va;
115 	paddr_t pa;
116 	struct vm_page *pg;
117 	struct pglist pglist;
118 
119 	/*
120 	 * Setup the iommu.
121 	 *
122 	 * The sun4u iommu is part of the SBUS or PCI controller so we will
123 	 * deal with it here..
124 	 *
125 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
126 	 * 0xffffe000, but the starting address depends on the size of the
127 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
128 	 * entry is 8 bytes.  The start of the map can be calculated by
129 	 * (0xffffe000 << (8 + is->is_tsbsize)).
130 	 *
131 	 * But sabre and hummingbird use a different scheme that seems to
132 	 * be hard-wired, so we read the start and size from the PROM and
133 	 * just use those values.
134 	 */
135 	is->is_cr = (tsbsize << 16) | IOMMUCR_EN;
136 	is->is_tsbsize = tsbsize;
137 	if (iovabase == -1) {
138 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
139 		is->is_dvmaend = IOTSB_VEND;
140 	} else {
141 		is->is_dvmabase = iovabase;
142 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize);
143 	}
144 
145 	/*
146 	 * Allocate memory for I/O pagetables.  They need to be physically
147 	 * contiguous.
148 	 */
149 
150 	size = PAGE_SIZE << is->is_tsbsize;
151 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
152 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
153 		panic("iommu_init: no memory");
154 
155 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
156 	if (va == 0)
157 		panic("iommu_init: no memory");
158 	is->is_tsb = (int64_t *)va;
159 
160 	is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
161 
162 	/* Map the pages */
163 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
164 		pa = VM_PAGE_TO_PHYS(pg);
165 		pmap_kenter_pa(va, pa | PMAP_NVC, VM_PROT_READ | VM_PROT_WRITE);
166 		va += PAGE_SIZE;
167 	}
168 	pmap_update(pmap_kernel());
169 	memset(is->is_tsb, 0, size);
170 
171 #ifdef DEBUG
172 	if (iommudebug & IDB_INFO)
173 	{
174 		/* Probe the iommu */
175 
176 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
177 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
178 				offsetof (struct iommureg, iommu_cr)),
179 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
180 				offsetof (struct iommureg, iommu_tsb)),
181 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
182 				offsetof (struct iommureg, iommu_flush)));
183 		printf("iommu cr=%llx tsb=%llx\n",
184 			(unsigned long long)bus_space_read_8(is->is_bustag,
185 				is->is_iommu,
186 				offsetof (struct iommureg, iommu_cr)),
187 			(unsigned long long)bus_space_read_8(is->is_bustag,
188 				is->is_iommu,
189 				offsetof (struct iommureg, iommu_tsb)));
190 		printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
191 			(unsigned long long)is->is_ptsb);
192 		delay(1000000); /* 1 s */
193 	}
194 #endif
195 
196 	/*
197 	 * now actually start up the IOMMU
198 	 */
199 	iommu_reset(is);
200 
201 	/*
202 	 * Now all the hardware's working we need to allocate a dvma map.
203 	 */
204 	printf("DVMA map: %x to %x\n",
205 		(unsigned int)is->is_dvmabase,
206 		(unsigned int)is->is_dvmaend);
207 	printf("IOTSB: %llx to %llx\n",
208 		(unsigned long long)is->is_ptsb,
209 		(unsigned long long)(is->is_ptsb + size));
210 	is->is_dvmamap = extent_create(name,
211 	    is->is_dvmabase, is->is_dvmaend - PAGE_SIZE,
212 	    M_DEVBUF, 0, 0, EX_NOWAIT);
213 }
214 
215 /*
216  * Streaming buffers don't exist on the UltraSPARC IIi; we should have
217  * detected that already and disabled them.  If not, we will notice that
218  * they aren't there when the STRBUF_EN bit does not remain.
219  */
220 void
221 iommu_reset(struct iommu_state *is)
222 {
223 	int i;
224 	struct strbuf_ctl *sb;
225 
226 	/* Need to do 64-bit stores */
227 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
228 		is->is_ptsb);
229 
230 	/* Enable IOMMU in diagnostic mode */
231 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
232 		is->is_cr|IOMMUCR_DE);
233 
234 	for (i = 0; i < 2; i++) {
235 		if ((sb = is->is_sb[i])) {
236 
237 			/* Enable diagnostics mode? */
238 			bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
239 				STRBUFREG(strbuf_ctl), STRBUF_EN);
240 
241 			/* No streaming buffers? Disable them */
242 			if (bus_space_read_8(is->is_bustag,
243 				is->is_sb[i]->sb_sb,
244 				STRBUFREG(strbuf_ctl)) == 0) {
245 				is->is_sb[i]->sb_flush = NULL;
246 			} else {
247 
248 				/*
249 				 * locate the pa of the flush buffer.
250 				 */
251 				(void)pmap_extract(pmap_kernel(),
252 					(vaddr_t)is->is_sb[i]->sb_flush,
253 					&is->is_sb[i]->sb_flushpa);
254 			}
255 		}
256 	}
257 }
258 
259 /*
260  * Here are the iommu control routines.
261  */
262 void
263 iommu_enter(struct strbuf_ctl *sb, vaddr_t va, int64_t pa, int flags)
264 {
265 	struct iommu_state *is = sb->sb_is;
266 	int strbuf = (flags & BUS_DMA_STREAMING);
267 	int64_t tte;
268 
269 #ifdef DIAGNOSTIC
270 	if (va < is->is_dvmabase || va > is->is_dvmaend)
271 		panic("iommu_enter: va %#lx not in DVMA space", va);
272 #endif
273 
274 	/* Is the streamcache flush really needed? */
275 	if (sb->sb_flush) {
276 		iommu_strbuf_flush(sb, va);
277 		iommu_strbuf_flush_done(sb);
278 	} else
279 		/* If we can't flush the strbuf don't enable it. */
280 		strbuf = 0;
281 
282 	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
283 		!(flags & BUS_DMA_NOCACHE), (strbuf));
284 #ifdef DEBUG
285 	tte |= (flags & 0xff000LL)<<(4*8);
286 #endif
287 
288 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
289 		       (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
290 	is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
291 	bus_space_write_8(is->is_bustag, is->is_iommu,
292 		IOMMUREG(iommu_flush), va);
293 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
294 		va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
295 		(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
296 		(u_long)tte));
297 }
298 
299 /*
300  * Find the value of a DVMA address (debug routine).
301  */
302 paddr_t
303 iommu_extract(struct iommu_state *is, vaddr_t dva)
304 {
305 	int64_t tte = 0;
306 
307 	if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
308 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
309 
310 	if ((tte & IOTTE_V) == 0)
311 		return ((paddr_t)-1L);
312 	return (tte & IOTTE_PAMASK);
313 }
314 
315 /*
316  * iommu_remove: removes mappings created by iommu_enter
317  *
318  * Only demap from IOMMU if flag is set.
319  *
320  * XXX: this function needs better internal error checking.
321  */
322 void
323 iommu_remove(struct iommu_state *is, vaddr_t va, size_t len)
324 {
325 
326 #ifdef DIAGNOSTIC
327 	if (va < is->is_dvmabase || va > is->is_dvmaend)
328 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
329 	if ((long)(va + len) < (long)va)
330 		panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
331 		      (long) va, (long) len);
332 	if (len & ~0xfffffff)
333 		panic("iommu_remove: ridiculous len 0x%lx", (u_long)len);
334 #endif
335 
336 	va = trunc_page(va);
337 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%lx]@%p\n",
338 		va, (u_long)IOTSBSLOT(va, is->is_tsbsize),
339 		&is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)]));
340 	while (len > 0) {
341 		DPRINTF(IDB_IOMMU, ("iommu_remove: clearing TSB slot %d "
342 			"for va %p size %lx\n",
343 			(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va,
344 			(u_long)len));
345 		if (len <= PAGE_SIZE)
346 			len = 0;
347 		else
348 			len -= PAGE_SIZE;
349 
350 		/* XXX Zero-ing the entry would not require RMW */
351 		is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
352 		bus_space_write_8(is->is_bustag, is->is_iommu,
353 			IOMMUREG(iommu_flush), va);
354 		va += PAGE_SIZE;
355 	}
356 }
357 
358 static int
359 iommu_strbuf_flush_done(struct strbuf_ctl *sb)
360 {
361 	struct iommu_state *is = sb->sb_is;
362 	struct timeval cur, flushtimeout;
363 
364 #define BUMPTIME(t, usec) { \
365 	register volatile struct timeval *tp = (t); \
366 	register long us; \
367  \
368 	tp->tv_usec = us = tp->tv_usec + (usec); \
369 	if (us >= 1000000) { \
370 		tp->tv_usec = us - 1000000; \
371 		tp->tv_sec++; \
372 	} \
373 }
374 
375 	if (!sb->sb_flush)
376 		return (0);
377 
378 	/*
379 	 * Streaming buffer flushes:
380 	 *
381 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
382 	 *     we're not on a cache line boundary (64-bits):
383 	 *   2 Store 0 in flag
384 	 *   3 Store pointer to flag in flushsync
385 	 *   4 wait till flushsync becomes 0x1
386 	 *
387 	 * If it takes more than .5 sec, something
388 	 * went wrong.
389 	 */
390 
391 	*sb->sb_flush = 0;
392 	bus_space_write_8(is->is_bustag, sb->sb_sb,
393 		STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
394 
395 	microtime(&flushtimeout);
396 	cur = flushtimeout;
397 	BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
398 
399 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx "
400 		"at va = %lx pa = %lx now=%"PRIx64":%"PRIx32" until = %"PRIx64":%"PRIx32"\n",
401 		(long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
402 		cur.tv_sec, cur.tv_usec,
403 		flushtimeout.tv_sec, flushtimeout.tv_usec));
404 
405 	/* Bypass non-coherent D$ */
406 	while ((!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) &&
407 		timercmp(&cur, &flushtimeout, <=))
408 		microtime(&cur);
409 
410 #ifdef DIAGNOSTIC
411 	if (!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) {
412 		printf("iommu_strbuf_flush_done: flush timeout %p, at %p\n",
413 			(void *)(u_long)*sb->sb_flush,
414 			(void *)(u_long)sb->sb_flushpa); /* panic? */
415 #ifdef DDB
416 		Debugger();
417 #endif
418 	}
419 #endif
420 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flushed\n"));
421 	return (*sb->sb_flush);
422 }
423 
424 /*
425  * IOMMU DVMA operations, common to SBUS and PCI.
426  */
427 int
428 iommu_dvmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
429 	bus_size_t buflen, struct proc *p, int flags)
430 {
431 	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
432 	struct iommu_state *is = sb->sb_is;
433 	int s;
434 	int err;
435 	bus_size_t sgsize;
436 	paddr_t curaddr;
437 	u_long dvmaddr, sgstart, sgend;
438 	bus_size_t align, boundary, len;
439 	vaddr_t vaddr = (vaddr_t)buf;
440 	int seg;
441 	struct pmap *pmap;
442 
443 	if (map->dm_nsegs) {
444 		/* Already in use?? */
445 #ifdef DIAGNOSTIC
446 		printf("iommu_dvmamap_load: map still in use\n");
447 #endif
448 		bus_dmamap_unload(t, map);
449 	}
450 
451 	/*
452 	 * Make sure that on error condition we return "no valid mappings".
453 	 */
454 	map->dm_nsegs = 0;
455 	if (buflen > map->_dm_size) {
456 		DPRINTF(IDB_BUSDMA,
457 		    ("iommu_dvmamap_load(): error %d > %d -- "
458 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
459 		return (EINVAL);
460 	}
461 
462 	sgsize = round_page(buflen + ((int)vaddr & PGOFSET));
463 
464 	/*
465 	 * A boundary presented to bus_dmamem_alloc() takes precedence
466 	 * over boundary in the map.
467 	 */
468 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
469 		boundary = map->_dm_boundary;
470 	align = max(map->dm_segs[0]._ds_align, PAGE_SIZE);
471 
472 	/*
473 	 * If our segment size is larger than the boundary we need to
474 	 * split the transfer up int little pieces ourselves.
475 	 */
476 	s = splhigh();
477 	err = extent_alloc(is->is_dvmamap, sgsize, align,
478 	    (sgsize > boundary) ? 0 : boundary,
479 	    EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
480 	splx(s);
481 
482 #ifdef DEBUG
483 	if (err || (dvmaddr == (u_long)-1)) {
484 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
485 		    (int)sgsize, flags);
486 #ifdef DDB
487 		Debugger();
488 #endif
489 	}
490 #endif
491 	if (err != 0)
492 		return (err);
493 
494 	if (dvmaddr == (u_long)-1)
495 		return (ENOMEM);
496 
497 	/* Set the active DVMA map */
498 	map->_dm_dvmastart = dvmaddr;
499 	map->_dm_dvmasize = sgsize;
500 
501 	/*
502 	 * Now split the DVMA range into segments, not crossing
503 	 * the boundary.
504 	 */
505 	seg = 0;
506 	sgstart = dvmaddr + (vaddr & PGOFSET);
507 	sgend = sgstart + buflen - 1;
508 	map->dm_segs[seg].ds_addr = sgstart;
509 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: boundary %lx boundary - 1 %lx "
510 	    "~(boundary - 1) %lx\n", (long)boundary, (long)(boundary - 1),
511 	    (long)~(boundary - 1)));
512 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
513 		/* Oops.  We crossed a boundary.  Split the xfer. */
514 		len = boundary - (sgstart & (boundary - 1));
515 		map->dm_segs[seg].ds_len = len;
516 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
517 		    "seg %d start %lx size %lx\n", seg,
518 		    (long)map->dm_segs[seg].ds_addr,
519 		    (long)map->dm_segs[seg].ds_len));
520 		if (++seg >= map->_dm_segcnt) {
521 			/* Too many segments.  Fail the operation. */
522 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
523 			    "too many segments %d\n", seg));
524 			s = splhigh();
525 			/* How can this fail?  And if it does what can we do? */
526 			err = extent_free(is->is_dvmamap,
527 			    dvmaddr, sgsize, EX_NOWAIT);
528 			map->_dm_dvmastart = 0;
529 			map->_dm_dvmasize = 0;
530 			splx(s);
531 			return (EFBIG);
532 		}
533 		sgstart += len;
534 		map->dm_segs[seg].ds_addr = sgstart;
535 	}
536 	map->dm_segs[seg].ds_len = sgend - sgstart + 1;
537 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
538 	    "seg %d start %lx size %lx\n", seg,
539 	    (long)map->dm_segs[seg].ds_addr, (long)map->dm_segs[seg].ds_len));
540 	map->dm_nsegs = seg + 1;
541 	map->dm_mapsize = buflen;
542 
543 	if (p != NULL)
544 		pmap = p->p_vmspace->vm_map.pmap;
545 	else
546 		pmap = pmap_kernel();
547 
548 	for (; buflen > 0; ) {
549 
550 		/*
551 		 * Get the physical address for this page.
552 		 */
553 		if (pmap_extract(pmap, (vaddr_t)vaddr, &curaddr) == FALSE) {
554 #ifdef DIAGNOSTIC
555 			printf("iommu_dvmamap_load: pmap_extract failed %lx\n", vaddr);
556 #endif
557 			bus_dmamap_unload(t, map);
558 			return (-1);
559 		}
560 
561 		/*
562 		 * Compute the segment size, and adjust counts.
563 		 */
564 		sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
565 		if (buflen < sgsize)
566 			sgsize = buflen;
567 
568 		DPRINTF(IDB_BUSDMA,
569 		    ("iommu_dvmamap_load: map %p loading va %p "
570 		    "dva %lx at pa %lx\n",
571 		    map, (void *)vaddr, (long)dvmaddr,
572 		    (long)(curaddr & ~(PAGE_SIZE-1))));
573 		iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),
574 		    flags|0x4000);
575 
576 		dvmaddr += PAGE_SIZE;
577 		vaddr += sgsize;
578 		buflen -= sgsize;
579 	}
580 #ifdef DIAGNOSTIC
581 	for (seg = 0; seg < map->dm_nsegs; seg++) {
582 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
583 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
584 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
585 			    seg, (long)map->dm_segs[seg].ds_addr,
586 			    is->is_dvmabase, is->is_dvmaend);
587 #ifdef DDB
588 			Debugger();
589 #endif
590 		}
591 	}
592 #endif
593 	return (0);
594 }
595 
596 
597 void
598 iommu_dvmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
599 {
600 	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
601 	struct iommu_state *is = sb->sb_is;
602 	int error, s;
603 	bus_size_t sgsize = map->_dm_dvmasize;
604 
605 	/* Flush the iommu */
606 #ifdef DEBUG
607 	if (!map->_dm_dvmastart) {
608 		printf("iommu_dvmamap_unload: No dvmastart is zero\n");
609 #ifdef DDB
610 		Debugger();
611 #endif
612 	}
613 #endif
614 	iommu_remove(is, map->_dm_dvmastart, map->_dm_dvmasize);
615 
616 	/* Flush the caches */
617 	bus_dmamap_unload(t->_parent, map);
618 
619 	/* Mark the mappings as invalid. */
620 	map->dm_mapsize = 0;
621 	map->dm_nsegs = 0;
622 
623 	s = splhigh();
624 	error = extent_free(is->is_dvmamap, map->_dm_dvmastart,
625 		map->_dm_dvmasize, EX_NOWAIT);
626 	map->_dm_dvmastart = 0;
627 	map->_dm_dvmasize = 0;
628 	splx(s);
629 	if (error != 0)
630 		printf("warning: %qd of DVMA space lost\n", (long long)sgsize);
631 
632 	/* Clear the map */
633 }
634 
635 
636 int
637 iommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
638 	bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
639 {
640 	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
641 	struct iommu_state *is = sb->sb_is;
642 	struct vm_page *pg;
643 	int i, j, s;
644 	int left;
645 	int err;
646 	bus_size_t sgsize;
647 	paddr_t pa;
648 	bus_size_t boundary, align;
649 	u_long dvmaddr, sgstart, sgend;
650 	struct pglist *pglist;
651 	int pagesz = PAGE_SIZE;
652 	int npg = 0; /* DEBUG */
653 
654 	if (map->dm_nsegs) {
655 		/* Already in use?? */
656 #ifdef DIAGNOSTIC
657 		printf("iommu_dvmamap_load_raw: map still in use\n");
658 #endif
659 		bus_dmamap_unload(t, map);
660 	}
661 
662 	/*
663 	 * A boundary presented to bus_dmamem_alloc() takes precedence
664 	 * over boundary in the map.
665 	 */
666 	if ((boundary = segs[0]._ds_boundary) == 0)
667 		boundary = map->_dm_boundary;
668 
669 	align = max(segs[0]._ds_align, pagesz);
670 
671 	/*
672 	 * Make sure that on error condition we return "no valid mappings".
673 	 */
674 	map->dm_nsegs = 0;
675 	/* Count up the total number of pages we need */
676 	pa = segs[0].ds_addr;
677 	sgsize = 0;
678 	left = size;
679 	for (i = 0; left && i < nsegs; i++) {
680 		if (round_page(pa) != round_page(segs[i].ds_addr))
681 			sgsize = round_page(sgsize);
682 		sgsize += min(left, segs[i].ds_len);
683 		left -= segs[i].ds_len;
684 		pa = segs[i].ds_addr + segs[i].ds_len;
685 	}
686 	sgsize = round_page(sgsize) + PAGE_SIZE; /* XXX reserve extra dvma page */
687 
688 	s = splhigh();
689 	/*
690 	 * If our segment size is larger than the boundary we need to
691 	 * split the transfer up into little pieces ourselves.
692 	 */
693 	err = extent_alloc(is->is_dvmamap, sgsize, align,
694 		(sgsize > boundary) ? 0 : boundary,
695 		((flags & BUS_DMA_NOWAIT) == 0 ? EX_WAITOK : EX_NOWAIT) |
696 		EX_BOUNDZERO, &dvmaddr);
697 	splx(s);
698 
699 	if (err != 0)
700 		return (err);
701 
702 #ifdef DEBUG
703 	if (dvmaddr == (u_long)-1)
704 	{
705 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) failed!\n",
706 		    (int)sgsize, flags);
707 #ifdef DDB
708 		Debugger();
709 #endif
710 	}
711 #endif
712 	if (dvmaddr == (u_long)-1)
713 		return (ENOMEM);
714 
715 	/* Set the active DVMA map */
716 	map->_dm_dvmastart = dvmaddr;
717 	map->_dm_dvmasize = sgsize;
718 
719 	if ((pglist = segs[0]._ds_mlist) == NULL) {
720 		u_long prev_va = 0UL;
721 		paddr_t prev_pa = 0;
722 		int end = 0, offset;
723 
724 		/*
725 		 * This segs is made up of individual physical
726 		 *  segments, probably by _bus_dmamap_load_uio() or
727 		 * _bus_dmamap_load_mbuf().  Ignore the mlist and
728 		 * load each one individually.
729 		 */
730 		map->dm_mapsize = size;
731 
732 		j = 0;
733 		for (i = 0; i < nsegs ; i++) {
734 
735 			pa = segs[i].ds_addr;
736 			offset = (pa & PGOFSET);
737 			pa = trunc_page(pa);
738 			dvmaddr = trunc_page(dvmaddr);
739 			left = min(size, segs[i].ds_len);
740 
741 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: converting "
742 				"physseg %d start %lx size %lx\n", i,
743 				(long)segs[i].ds_addr, (long)segs[i].ds_len));
744 
745 			if ((pa == prev_pa) &&
746 				((offset != 0) || (end != offset))) {
747 				/* We can re-use this mapping */
748 				dvmaddr = prev_va;
749 			}
750 
751 			sgstart = dvmaddr + offset;
752 			sgend = sgstart + left - 1;
753 
754 			/* Are the segments virtually adjacent? */
755 			if ((j > 0) && (end == offset) &&
756 				((offset == 0) || (pa == prev_pa))) {
757 				/* Just append to the previous segment. */
758 				map->dm_segs[--j].ds_len += left;
759 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
760 					"appending seg %d start %lx size %lx\n", j,
761 					(long)map->dm_segs[j].ds_addr,
762 					(long)map->dm_segs[j].ds_len));
763 			} else {
764 				if (j >= map->_dm_segcnt) {
765 					iommu_dvmamap_unload(t, map);
766 					return (EFBIG);
767 				}
768 				map->dm_segs[j].ds_addr = sgstart;
769 				map->dm_segs[j].ds_len = left;
770 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
771 					"seg %d start %lx size %lx\n", j,
772 					(long)map->dm_segs[j].ds_addr,
773 					(long)map->dm_segs[j].ds_len));
774 			}
775 			end = (offset + left) & PGOFSET;
776 
777 			/* Check for boundary issues */
778 			while ((sgstart & ~(boundary - 1)) !=
779 				(sgend & ~(boundary - 1))) {
780 				/* Need a new segment. */
781 				map->dm_segs[j].ds_len =
782 					boundary - (sgstart & (boundary - 1));
783 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
784 					"seg %d start %lx size %lx\n", j,
785 					(long)map->dm_segs[j].ds_addr,
786 					(long)map->dm_segs[j].ds_len));
787 				if (++j >= map->_dm_segcnt) {
788 					iommu_dvmamap_unload(t, map);
789 					return (EFBIG);
790 				}
791 				sgstart = roundup(sgstart, boundary);
792 				map->dm_segs[j].ds_addr = sgstart;
793 				map->dm_segs[j].ds_len = sgend - sgstart + 1;
794 			}
795 
796 			if (sgsize == 0)
797 				panic("iommu_dmamap_load_raw: size botch");
798 
799 			/* Now map a series of pages. */
800 			while (dvmaddr <= sgend) {
801 				DPRINTF(IDB_BUSDMA,
802 					("iommu_dvmamap_load_raw: map %p "
803 						"loading va %lx at pa %lx\n",
804 						map, (long)dvmaddr,
805 						(long)(pa)));
806 				/* Enter it if we haven't before. */
807 				if (prev_va != dvmaddr)
808 					iommu_enter(sb, prev_va = dvmaddr,
809 						prev_pa = pa,
810 						flags | (++npg << 12));
811 				dvmaddr += pagesz;
812 				pa += pagesz;
813 			}
814 
815 			size -= left;
816 			++j;
817 		}
818 
819 		map->dm_nsegs = j;
820 #ifdef DIAGNOSTIC
821 		{ int seg;
822 	for (seg = 0; seg < map->dm_nsegs; seg++) {
823 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
824 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
825 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
826 				seg, (long)map->dm_segs[seg].ds_addr,
827 				is->is_dvmabase, is->is_dvmaend);
828 #ifdef DDB
829 			Debugger();
830 #endif
831 		}
832 	}
833 		}
834 #endif
835 		return (0);
836 	}
837 
838 	/*
839 	 * This was allocated with bus_dmamem_alloc.
840 	 * The pages are on a `pglist'.
841 	 */
842 	map->dm_mapsize = size;
843 	i = 0;
844 	sgstart = dvmaddr;
845 	sgend = sgstart + size - 1;
846 	map->dm_segs[i].ds_addr = sgstart;
847 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
848 		/* Oops.  We crossed a boundary.  Split the xfer. */
849 		map->dm_segs[i].ds_len = boundary - (sgstart & (boundary - 1));
850 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
851 			"seg %d start %lx size %lx\n", i,
852 			(long)map->dm_segs[i].ds_addr,
853 			(long)map->dm_segs[i].ds_len));
854 		if (++i >= map->_dm_segcnt) {
855 			/* Too many segments.  Fail the operation. */
856 			s = splhigh();
857 			/* How can this fail?  And if it does what can we do? */
858 			err = extent_free(is->is_dvmamap,
859 				dvmaddr, sgsize, EX_NOWAIT);
860 			map->_dm_dvmastart = 0;
861 			map->_dm_dvmasize = 0;
862 			splx(s);
863 			return (EFBIG);
864 		}
865 		sgstart = roundup(sgstart, boundary);
866 		map->dm_segs[i].ds_addr = sgstart;
867 	}
868 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
869 			"seg %d start %lx size %lx\n", i,
870 			(long)map->dm_segs[i].ds_addr, (long)map->dm_segs[i].ds_len));
871 	map->dm_segs[i].ds_len = sgend - sgstart + 1;
872 
873 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
874 		if (sgsize == 0)
875 			panic("iommu_dmamap_load_raw: size botch");
876 		pa = VM_PAGE_TO_PHYS(pg);
877 
878 		DPRINTF(IDB_BUSDMA,
879 		    ("iommu_dvmamap_load_raw: map %p loading va %lx at pa %lx\n",
880 		    map, (long)dvmaddr, (long)(pa)));
881 		iommu_enter(sb, dvmaddr, pa, flags|0x8000);
882 
883 		dvmaddr += pagesz;
884 		sgsize -= pagesz;
885 	}
886 	map->dm_mapsize = size;
887 	map->dm_nsegs = i+1;
888 #ifdef DIAGNOSTIC
889 	{ int seg;
890 	for (seg = 0; seg < map->dm_nsegs; seg++) {
891 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
892 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
893 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
894 				seg, (long)map->dm_segs[seg].ds_addr,
895 				is->is_dvmabase, is->is_dvmaend);
896 #ifdef DDB
897 			Debugger();
898 #endif
899 		}
900 	}
901 	}
902 #endif
903 	return (0);
904 }
905 
906 
907 /*
908  * Flush an individual dma segment, returns non-zero if the streaming buffers
909  * need flushing afterwards.
910  */
911 static int
912 iommu_dvmamap_sync_range(struct strbuf_ctl *sb, vaddr_t va, bus_size_t len)
913 {
914 	vaddr_t vaend;
915 	struct iommu_state *is = sb->sb_is;
916 
917 #ifdef DIAGNOSTIC
918 	if (va < is->is_dvmabase || va > is->is_dvmaend)
919 		panic("invalid va: %llx", (long long)va);
920 #endif
921 
922 	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
923 		DPRINTF(IDB_BUSDMA,
924 			("iommu_dvmamap_sync_range: attempting to flush "
925 			 "non-streaming entry\n"));
926 		return (0);
927 	}
928 
929 	vaend = (va + len + PGOFSET) & ~PGOFSET;
930 	va &= ~PGOFSET;
931 
932 #ifdef DIAGNOSTIC
933 	if (va < is->is_dvmabase || vaend > is->is_dvmaend)
934 		panic("invalid va range: %llx to %llx (%x to %x)",
935 		    (long long)va, (long long)vaend,
936 		    is->is_dvmabase,
937 		    is->is_dvmaend);
938 #endif
939 
940 	for ( ; va <= vaend; va += PAGE_SIZE) {
941 		DPRINTF(IDB_BUSDMA,
942 		    ("iommu_dvmamap_sync_range: flushing va %p\n",
943 		    (void *)(u_long)va));
944 		iommu_strbuf_flush(sb, va);
945 	}
946 
947 	return (1);
948 }
949 
950 static void
951 _iommu_dvmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
952 	bus_size_t len, int ops)
953 {
954 	struct strbuf_ctl *sb = (struct strbuf_ctl *)map->_dm_cookie;
955 	bus_size_t count;
956 	int i, needsflush = 0;
957 
958 	if (!sb->sb_flush)
959 		return;
960 
961 	for (i = 0; i < map->dm_nsegs; i++) {
962 		if (offset < map->dm_segs[i].ds_len)
963 			break;
964 		offset -= map->dm_segs[i].ds_len;
965 	}
966 
967 	if (i == map->dm_nsegs)
968 		panic("iommu_dvmamap_sync: segment too short %llu",
969 		    (unsigned long long)offset);
970 
971 	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
972 		/* Nothing to do */;
973 	}
974 
975 	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {
976 
977 		for (; len > 0 && i < map->dm_nsegs; i++) {
978 			count = MIN(map->dm_segs[i].ds_len - offset, len);
979 			if (count > 0 &&
980 			    iommu_dvmamap_sync_range(sb,
981 				map->dm_segs[i].ds_addr + offset, count))
982 				needsflush = 1;
983 			offset = 0;
984 			len -= count;
985 		}
986 #ifdef DIAGNOSTIC
987 		if (i == map->dm_nsegs && len > 0)
988 			panic("iommu_dvmamap_sync: leftover %llu",
989 			    (unsigned long long)len);
990 #endif
991 
992 		if (needsflush)
993 			iommu_strbuf_flush_done(sb);
994 	}
995 }
996 
997 void
998 iommu_dvmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
999 	bus_size_t len, int ops)
1000 {
1001 
1002 	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) {
1003 		/* Flush the CPU then the IOMMU */
1004 		bus_dmamap_sync(t->_parent, map, offset, len, ops);
1005 		_iommu_dvmamap_sync(t, map, offset, len, ops);
1006 	}
1007 	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) {
1008 		/* Flush the IOMMU then the CPU */
1009 		_iommu_dvmamap_sync(t, map, offset, len, ops);
1010 		bus_dmamap_sync(t->_parent, map, offset, len, ops);
1011 	}
1012 }
1013 
1014 int
1015 iommu_dvmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
1016 	bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
1017 	int flags)
1018 {
1019 
1020 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx bound %llx "
1021 	   "segp %p flags %d\n", (unsigned long long)size,
1022 	   (unsigned long long)alignment, (unsigned long long)boundary,
1023 	   segs, flags));
1024 	return (bus_dmamem_alloc(t->_parent, size, alignment, boundary,
1025 	    segs, nsegs, rsegs, flags|BUS_DMA_DVMA));
1026 }
1027 
1028 void
1029 iommu_dvmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs)
1030 {
1031 
1032 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1033 	    segs, nsegs));
1034 	bus_dmamem_free(t->_parent, segs, nsegs);
1035 }
1036 
1037 /*
1038  * Map the DVMA mappings into the kernel pmap.
1039  * Check the flags to see whether we're streaming or coherent.
1040  */
1041 int
1042 iommu_dvmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
1043 	size_t size, void **kvap, int flags)
1044 {
1045 	struct vm_page *pg;
1046 	vaddr_t va;
1047 	bus_addr_t addr;
1048 	struct pglist *pglist;
1049 	int cbit;
1050 	const uvm_flag_t kmflags =
1051 	    (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0;
1052 
1053 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: segp %p nsegs %d size %lx\n",
1054 	    segs, nsegs, size));
1055 
1056 	/*
1057 	 * Allocate some space in the kernel map, and then map these pages
1058 	 * into this space.
1059 	 */
1060 	size = round_page(size);
1061 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags);
1062 	if (va == 0)
1063 		return (ENOMEM);
1064 
1065 	*kvap = (void *)va;
1066 
1067 	/*
1068 	 * digest flags:
1069 	 */
1070 	cbit = 0;
1071 	if (flags & BUS_DMA_COHERENT)	/* Disable vcache */
1072 		cbit |= PMAP_NVC;
1073 	if (flags & BUS_DMA_NOCACHE)	/* sideffects */
1074 		cbit |= PMAP_NC;
1075 
1076 	/*
1077 	 * Now take this and map it into the CPU.
1078 	 */
1079 	pglist = segs[0]._ds_mlist;
1080 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
1081 #ifdef DIAGNOSTIC
1082 		if (size == 0)
1083 			panic("iommu_dvmamem_map: size botch");
1084 #endif
1085 		addr = VM_PAGE_TO_PHYS(pg);
1086 		DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: "
1087 		    "mapping va %lx at %llx\n", va, (unsigned long long)addr | cbit));
1088 		pmap_kenter_pa(va, addr | cbit, VM_PROT_READ | VM_PROT_WRITE);
1089 		va += PAGE_SIZE;
1090 		size -= PAGE_SIZE;
1091 	}
1092 	pmap_update(pmap_kernel());
1093 	return (0);
1094 }
1095 
1096 /*
1097  * Unmap DVMA mappings from kernel
1098  */
1099 void
1100 iommu_dvmamem_unmap(bus_dma_tag_t t, void *kva, size_t size)
1101 {
1102 
1103 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_unmap: kvm %p size %lx\n",
1104 	    kva, size));
1105 
1106 #ifdef DIAGNOSTIC
1107 	if ((u_long)kva & PGOFSET)
1108 		panic("iommu_dvmamem_unmap");
1109 #endif
1110 
1111 	size = round_page(size);
1112 	pmap_kremove((vaddr_t)kva, size);
1113 	pmap_update(pmap_kernel());
1114 	uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
1115 }
1116