xref: /netbsd-src/sys/arch/sparc64/dev/iommu.c (revision 2980e352a13e8f0b545a366830c411e7a542ada8)
1 /*	$NetBSD: iommu.c,v 1.83 2008/06/04 12:41:41 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 1999, 2000 Matthew R. Green
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2001, 2002 Eduardo Horvath
31  * All rights reserved.
32  *
33  * Redistribution and use in source and binary forms, with or without
34  * modification, are permitted provided that the following conditions
35  * are met:
36  * 1. Redistributions of source code must retain the above copyright
37  *    notice, this list of conditions and the following disclaimer.
38  * 2. Redistributions in binary form must reproduce the above copyright
39  *    notice, this list of conditions and the following disclaimer in the
40  *    documentation and/or other materials provided with the distribution.
41  * 3. The name of the author may not be used to endorse or promote products
42  *    derived from this software without specific prior written permission.
43  *
44  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
49  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
50  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
51  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54  * SUCH DAMAGE.
55  */
56 
57 /*
58  * UltraSPARC IOMMU support; used by both the sbus and pci code.
59  */
60 
61 #include <sys/cdefs.h>
62 __KERNEL_RCSID(0, "$NetBSD: iommu.c,v 1.83 2008/06/04 12:41:41 ad Exp $");
63 
64 #include "opt_ddb.h"
65 
66 #include <sys/param.h>
67 #include <sys/extent.h>
68 #include <sys/malloc.h>
69 #include <sys/systm.h>
70 #include <sys/device.h>
71 #include <sys/proc.h>
72 
73 #include <uvm/uvm_extern.h>
74 
75 #include <machine/bus.h>
76 #include <sparc64/sparc64/cache.h>
77 #include <sparc64/dev/iommureg.h>
78 #include <sparc64/dev/iommuvar.h>
79 
80 #include <machine/autoconf.h>
81 #include <machine/cpu.h>
82 
83 #ifdef DEBUG
84 #define IDB_BUSDMA	0x1
85 #define IDB_IOMMU	0x2
86 #define IDB_INFO	0x4
87 #define	IDB_SYNC	0x8
88 int iommudebug = 0x0;
89 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
90 #else
91 #define DPRINTF(l, s)
92 #endif
93 
94 #define iommu_strbuf_flush(i, v) do {					\
95 	if ((i)->sb_flush)						\
96 		bus_space_write_8((i)->sb_is->is_bustag, (i)->sb_sb,	\
97 			STRBUFREG(strbuf_pgflush), (v));		\
98 	} while (0)
99 
100 static	int iommu_strbuf_flush_done(struct strbuf_ctl *);
101 
102 /*
103  * initialise the UltraSPARC IOMMU (SBUS or PCI):
104  *	- allocate and setup the iotsb.
105  *	- enable the IOMMU
106  *	- initialise the streaming buffers (if they exist)
107  *	- create a private DVMA map.
108  */
109 void
110 iommu_init(char *name, struct iommu_state *is, int tsbsize, uint32_t iovabase)
111 {
112 	psize_t size;
113 	vaddr_t va;
114 	paddr_t pa;
115 	struct vm_page *pg;
116 	struct pglist pglist;
117 
118 	/*
119 	 * Setup the iommu.
120 	 *
121 	 * The sun4u iommu is part of the SBUS or PCI controller so we will
122 	 * deal with it here..
123 	 *
124 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
125 	 * 0xffffe000, but the starting address depends on the size of the
126 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
127 	 * entry is 8 bytes.  The start of the map can be calculated by
128 	 * (0xffffe000 << (8 + is->is_tsbsize)).
129 	 *
130 	 * But sabre and hummingbird use a different scheme that seems to
131 	 * be hard-wired, so we read the start and size from the PROM and
132 	 * just use those values.
133 	 */
134 	is->is_cr = (tsbsize << 16) | IOMMUCR_EN;
135 	is->is_tsbsize = tsbsize;
136 	if (iovabase == -1) {
137 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
138 		is->is_dvmaend = IOTSB_VEND;
139 	} else {
140 		is->is_dvmabase = iovabase;
141 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize);
142 	}
143 
144 	/*
145 	 * Allocate memory for I/O pagetables.  They need to be physically
146 	 * contiguous.
147 	 */
148 
149 	size = PAGE_SIZE << is->is_tsbsize;
150 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
151 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
152 		panic("iommu_init: no memory");
153 
154 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
155 	if (va == 0)
156 		panic("iommu_init: no memory");
157 	is->is_tsb = (int64_t *)va;
158 
159 	is->is_ptsb = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist));
160 
161 	/* Map the pages */
162 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
163 		pa = VM_PAGE_TO_PHYS(pg);
164 		pmap_kenter_pa(va, pa | PMAP_NVC, VM_PROT_READ | VM_PROT_WRITE);
165 		va += PAGE_SIZE;
166 	}
167 	pmap_update(pmap_kernel());
168 	memset(is->is_tsb, 0, size);
169 
170 #ifdef DEBUG
171 	if (iommudebug & IDB_INFO)
172 	{
173 		/* Probe the iommu */
174 
175 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
176 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
177 				offsetof (struct iommureg, iommu_cr)),
178 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
179 				offsetof (struct iommureg, iommu_tsb)),
180 			(u_long)bus_space_read_8(is->is_bustag, is->is_iommu,
181 				offsetof (struct iommureg, iommu_flush)));
182 		printf("iommu cr=%llx tsb=%llx\n",
183 			(unsigned long long)bus_space_read_8(is->is_bustag,
184 				is->is_iommu,
185 				offsetof (struct iommureg, iommu_cr)),
186 			(unsigned long long)bus_space_read_8(is->is_bustag,
187 				is->is_iommu,
188 				offsetof (struct iommureg, iommu_tsb)));
189 		printf("TSB base %p phys %llx\n", (void *)is->is_tsb,
190 			(unsigned long long)is->is_ptsb);
191 		delay(1000000); /* 1 s */
192 	}
193 #endif
194 
195 	/*
196 	 * now actually start up the IOMMU
197 	 */
198 	iommu_reset(is);
199 
200 	/*
201 	 * Now all the hardware's working we need to allocate a dvma map.
202 	 */
203 	printf("DVMA map: %x to %x\n",
204 		(unsigned int)is->is_dvmabase,
205 		(unsigned int)is->is_dvmaend);
206 	printf("IOTSB: %llx to %llx\n",
207 		(unsigned long long)is->is_ptsb,
208 		(unsigned long long)(is->is_ptsb + size));
209 	is->is_dvmamap = extent_create(name,
210 	    is->is_dvmabase, is->is_dvmaend - PAGE_SIZE,
211 	    M_DEVBUF, 0, 0, EX_NOWAIT);
212 }
213 
214 /*
215  * Streaming buffers don't exist on the UltraSPARC IIi; we should have
216  * detected that already and disabled them.  If not, we will notice that
217  * they aren't there when the STRBUF_EN bit does not remain.
218  */
219 void
220 iommu_reset(struct iommu_state *is)
221 {
222 	int i;
223 	struct strbuf_ctl *sb;
224 
225 	/* Need to do 64-bit stores */
226 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_tsb),
227 		is->is_ptsb);
228 
229 	/* Enable IOMMU in diagnostic mode */
230 	bus_space_write_8(is->is_bustag, is->is_iommu, IOMMUREG(iommu_cr),
231 		is->is_cr|IOMMUCR_DE);
232 
233 	for (i = 0; i < 2; i++) {
234 		if ((sb = is->is_sb[i])) {
235 
236 			/* Enable diagnostics mode? */
237 			bus_space_write_8(is->is_bustag, is->is_sb[i]->sb_sb,
238 				STRBUFREG(strbuf_ctl), STRBUF_EN);
239 
240 			/* No streaming buffers? Disable them */
241 			if (bus_space_read_8(is->is_bustag,
242 				is->is_sb[i]->sb_sb,
243 				STRBUFREG(strbuf_ctl)) == 0) {
244 				is->is_sb[i]->sb_flush = NULL;
245 			} else {
246 
247 				/*
248 				 * locate the pa of the flush buffer.
249 				 */
250 				(void)pmap_extract(pmap_kernel(),
251 					(vaddr_t)is->is_sb[i]->sb_flush,
252 					&is->is_sb[i]->sb_flushpa);
253 			}
254 		}
255 	}
256 }
257 
258 /*
259  * Here are the iommu control routines.
260  */
261 void
262 iommu_enter(struct strbuf_ctl *sb, vaddr_t va, int64_t pa, int flags)
263 {
264 	struct iommu_state *is = sb->sb_is;
265 	int strbuf = (flags & BUS_DMA_STREAMING);
266 	int64_t tte;
267 
268 #ifdef DIAGNOSTIC
269 	if (va < is->is_dvmabase || va > is->is_dvmaend)
270 		panic("iommu_enter: va %#lx not in DVMA space", va);
271 #endif
272 
273 	/* Is the streamcache flush really needed? */
274 	if (sb->sb_flush) {
275 		iommu_strbuf_flush(sb, va);
276 		iommu_strbuf_flush_done(sb);
277 	} else
278 		/* If we can't flush the strbuf don't enable it. */
279 		strbuf = 0;
280 
281 	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
282 		!(flags & BUS_DMA_NOCACHE), (strbuf));
283 #ifdef DEBUG
284 	tte |= (flags & 0xff000LL)<<(4*8);
285 #endif
286 
287 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
288 		       (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
289 	is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] = tte;
290 	bus_space_write_8(is->is_bustag, is->is_iommu,
291 		IOMMUREG(iommu_flush), va);
292 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
293 		va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
294 		(void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
295 		(u_long)tte));
296 }
297 
298 /*
299  * Find the value of a DVMA address (debug routine).
300  */
301 paddr_t
302 iommu_extract(struct iommu_state *is, vaddr_t dva)
303 {
304 	int64_t tte = 0;
305 
306 	if (dva >= is->is_dvmabase && dva < is->is_dvmaend)
307 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
308 
309 	if ((tte & IOTTE_V) == 0)
310 		return ((paddr_t)-1L);
311 	return (tte & IOTTE_PAMASK);
312 }
313 
314 /*
315  * iommu_remove: removes mappings created by iommu_enter
316  *
317  * Only demap from IOMMU if flag is set.
318  *
319  * XXX: this function needs better internal error checking.
320  */
321 void
322 iommu_remove(struct iommu_state *is, vaddr_t va, size_t len)
323 {
324 
325 #ifdef DIAGNOSTIC
326 	if (va < is->is_dvmabase || va > is->is_dvmaend)
327 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
328 	if ((long)(va + len) < (long)va)
329 		panic("iommu_remove: va 0x%lx + len 0x%lx wraps",
330 		      (long) va, (long) len);
331 	if (len & ~0xfffffff)
332 		panic("iommu_remove: ridiculous len 0x%lx", (u_long)len);
333 #endif
334 
335 	va = trunc_page(va);
336 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%lx]@%p\n",
337 		va, (u_long)IOTSBSLOT(va, is->is_tsbsize),
338 		&is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)]));
339 	while (len > 0) {
340 		DPRINTF(IDB_IOMMU, ("iommu_remove: clearing TSB slot %d "
341 			"for va %p size %lx\n",
342 			(int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va,
343 			(u_long)len));
344 		if (len <= PAGE_SIZE)
345 			len = 0;
346 		else
347 			len -= PAGE_SIZE;
348 
349 		/* XXX Zero-ing the entry would not require RMW */
350 		is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)] &= ~IOTTE_V;
351 		bus_space_write_8(is->is_bustag, is->is_iommu,
352 			IOMMUREG(iommu_flush), va);
353 		va += PAGE_SIZE;
354 	}
355 }
356 
357 static int
358 iommu_strbuf_flush_done(struct strbuf_ctl *sb)
359 {
360 	struct iommu_state *is = sb->sb_is;
361 	struct timeval cur, flushtimeout;
362 
363 #define BUMPTIME(t, usec) { \
364 	register volatile struct timeval *tp = (t); \
365 	register long us; \
366  \
367 	tp->tv_usec = us = tp->tv_usec + (usec); \
368 	if (us >= 1000000) { \
369 		tp->tv_usec = us - 1000000; \
370 		tp->tv_sec++; \
371 	} \
372 }
373 
374 	if (!sb->sb_flush)
375 		return (0);
376 
377 	/*
378 	 * Streaming buffer flushes:
379 	 *
380 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.  If
381 	 *     we're not on a cache line boundary (64-bits):
382 	 *   2 Store 0 in flag
383 	 *   3 Store pointer to flag in flushsync
384 	 *   4 wait till flushsync becomes 0x1
385 	 *
386 	 * If it takes more than .5 sec, something
387 	 * went wrong.
388 	 */
389 
390 	*sb->sb_flush = 0;
391 	bus_space_write_8(is->is_bustag, sb->sb_sb,
392 		STRBUFREG(strbuf_flushsync), sb->sb_flushpa);
393 
394 	microtime(&flushtimeout);
395 	cur = flushtimeout;
396 	BUMPTIME(&flushtimeout, 500000); /* 1/2 sec */
397 
398 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flush = %lx "
399 		"at va = %lx pa = %lx now=%lx:%lx until = %lx:%lx\n",
400 		(long)*sb->sb_flush, (long)sb->sb_flush, (long)sb->sb_flushpa,
401 		cur.tv_sec, cur.tv_usec,
402 		flushtimeout.tv_sec, flushtimeout.tv_usec));
403 
404 	/* Bypass non-coherent D$ */
405 	while ((!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) &&
406 		timercmp(&cur, &flushtimeout, <=))
407 		microtime(&cur);
408 
409 #ifdef DIAGNOSTIC
410 	if (!ldxa(sb->sb_flushpa, ASI_PHYS_CACHED)) {
411 		printf("iommu_strbuf_flush_done: flush timeout %p, at %p\n",
412 			(void *)(u_long)*sb->sb_flush,
413 			(void *)(u_long)sb->sb_flushpa); /* panic? */
414 #ifdef DDB
415 		Debugger();
416 #endif
417 	}
418 #endif
419 	DPRINTF(IDB_IOMMU, ("iommu_strbuf_flush_done: flushed\n"));
420 	return (*sb->sb_flush);
421 }
422 
423 /*
424  * IOMMU DVMA operations, common to SBUS and PCI.
425  */
426 int
427 iommu_dvmamap_load(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
428 	void *buf, bus_size_t buflen, struct proc *p, int flags)
429 {
430 	struct iommu_state *is = sb->sb_is;
431 	int s;
432 	int err;
433 	bus_size_t sgsize;
434 	paddr_t curaddr;
435 	u_long dvmaddr, sgstart, sgend;
436 	bus_size_t align, boundary, len;
437 	vaddr_t vaddr = (vaddr_t)buf;
438 	int seg;
439 	struct pmap *pmap;
440 
441 	if (map->dm_nsegs) {
442 		/* Already in use?? */
443 #ifdef DIAGNOSTIC
444 		printf("iommu_dvmamap_load: map still in use\n");
445 #endif
446 		bus_dmamap_unload(t, map);
447 	}
448 
449 	/*
450 	 * Make sure that on error condition we return "no valid mappings".
451 	 */
452 	map->dm_nsegs = 0;
453 	if (buflen > map->_dm_size) {
454 		DPRINTF(IDB_BUSDMA,
455 		    ("iommu_dvmamap_load(): error %d > %d -- "
456 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
457 		return (EINVAL);
458 	}
459 
460 	sgsize = round_page(buflen + ((int)vaddr & PGOFSET));
461 
462 	/*
463 	 * A boundary presented to bus_dmamem_alloc() takes precedence
464 	 * over boundary in the map.
465 	 */
466 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
467 		boundary = map->_dm_boundary;
468 	align = max(map->dm_segs[0]._ds_align, PAGE_SIZE);
469 
470 	/*
471 	 * If our segment size is larger than the boundary we need to
472 	 * split the transfer up int little pieces ourselves.
473 	 */
474 	s = splhigh();
475 	err = extent_alloc(is->is_dvmamap, sgsize, align,
476 	    (sgsize > boundary) ? 0 : boundary,
477 	    EX_NOWAIT|EX_BOUNDZERO, &dvmaddr);
478 	splx(s);
479 
480 #ifdef DEBUG
481 	if (err || (dvmaddr == (u_long)-1)) {
482 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
483 		    (int)sgsize, flags);
484 #ifdef DDB
485 		Debugger();
486 #endif
487 	}
488 #endif
489 	if (err != 0)
490 		return (err);
491 
492 	if (dvmaddr == (u_long)-1)
493 		return (ENOMEM);
494 
495 	/* Set the active DVMA map */
496 	map->_dm_dvmastart = dvmaddr;
497 	map->_dm_dvmasize = sgsize;
498 
499 	/*
500 	 * Now split the DVMA range into segments, not crossing
501 	 * the boundary.
502 	 */
503 	seg = 0;
504 	sgstart = dvmaddr + (vaddr & PGOFSET);
505 	sgend = sgstart + buflen - 1;
506 	map->dm_segs[seg].ds_addr = sgstart;
507 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: boundary %lx boundary - 1 %lx "
508 	    "~(boundary - 1) %lx\n", (long)boundary, (long)(boundary - 1),
509 	    (long)~(boundary - 1)));
510 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
511 		/* Oops.  We crossed a boundary.  Split the xfer. */
512 		len = boundary - (sgstart & (boundary - 1));
513 		map->dm_segs[seg].ds_len = len;
514 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
515 		    "seg %d start %lx size %lx\n", seg,
516 		    (long)map->dm_segs[seg].ds_addr,
517 		    (long)map->dm_segs[seg].ds_len));
518 		if (++seg >= map->_dm_segcnt) {
519 			/* Too many segments.  Fail the operation. */
520 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
521 			    "too many segments %d\n", seg));
522 			s = splhigh();
523 			/* How can this fail?  And if it does what can we do? */
524 			err = extent_free(is->is_dvmamap,
525 			    dvmaddr, sgsize, EX_NOWAIT);
526 			map->_dm_dvmastart = 0;
527 			map->_dm_dvmasize = 0;
528 			splx(s);
529 			return (EFBIG);
530 		}
531 		sgstart += len;
532 		map->dm_segs[seg].ds_addr = sgstart;
533 	}
534 	map->dm_segs[seg].ds_len = sgend - sgstart + 1;
535 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load: "
536 	    "seg %d start %lx size %lx\n", seg,
537 	    (long)map->dm_segs[seg].ds_addr, (long)map->dm_segs[seg].ds_len));
538 	map->dm_nsegs = seg + 1;
539 	map->dm_mapsize = buflen;
540 
541 	if (p != NULL)
542 		pmap = p->p_vmspace->vm_map.pmap;
543 	else
544 		pmap = pmap_kernel();
545 
546 	for (; buflen > 0; ) {
547 
548 		/*
549 		 * Get the physical address for this page.
550 		 */
551 		if (pmap_extract(pmap, (vaddr_t)vaddr, &curaddr) == FALSE) {
552 #ifdef DIAGNOSTIC
553 			printf("iommu_dvmamap_load: pmap_extract failed %lx\n", vaddr);
554 #endif
555 			bus_dmamap_unload(t, map);
556 			return (-1);
557 		}
558 
559 		/*
560 		 * Compute the segment size, and adjust counts.
561 		 */
562 		sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
563 		if (buflen < sgsize)
564 			sgsize = buflen;
565 
566 		DPRINTF(IDB_BUSDMA,
567 		    ("iommu_dvmamap_load: map %p loading va %p "
568 		    "dva %lx at pa %lx\n",
569 		    map, (void *)vaddr, (long)dvmaddr,
570 		    (long)(curaddr & ~(PAGE_SIZE-1))));
571 		iommu_enter(sb, trunc_page(dvmaddr), trunc_page(curaddr),
572 		    flags|0x4000);
573 
574 		dvmaddr += PAGE_SIZE;
575 		vaddr += sgsize;
576 		buflen -= sgsize;
577 	}
578 #ifdef DIAGNOSTIC
579 	for (seg = 0; seg < map->dm_nsegs; seg++) {
580 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
581 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
582 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
583 			    seg, (long)map->dm_segs[seg].ds_addr,
584 			    is->is_dvmabase, is->is_dvmaend);
585 #ifdef DDB
586 			Debugger();
587 #endif
588 		}
589 	}
590 #endif
591 	return (0);
592 }
593 
594 
595 void
596 iommu_dvmamap_unload(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map)
597 {
598 	struct iommu_state *is = sb->sb_is;
599 	int error, s;
600 	bus_size_t sgsize = map->_dm_dvmasize;
601 
602 	/* Flush the iommu */
603 #ifdef DEBUG
604 	if (!map->_dm_dvmastart) {
605 		printf("iommu_dvmamap_unload: No dvmastart is zero\n");
606 #ifdef DDB
607 		Debugger();
608 #endif
609 	}
610 #endif
611 	iommu_remove(is, map->_dm_dvmastart, map->_dm_dvmasize);
612 
613 	/* Flush the caches */
614 	bus_dmamap_unload(t->_parent, map);
615 
616 	/* Mark the mappings as invalid. */
617 	map->dm_mapsize = 0;
618 	map->dm_nsegs = 0;
619 
620 	s = splhigh();
621 	error = extent_free(is->is_dvmamap, map->_dm_dvmastart,
622 		map->_dm_dvmasize, EX_NOWAIT);
623 	map->_dm_dvmastart = 0;
624 	map->_dm_dvmasize = 0;
625 	splx(s);
626 	if (error != 0)
627 		printf("warning: %qd of DVMA space lost\n", (long long)sgsize);
628 
629 	/* Clear the map */
630 }
631 
632 
633 int
634 iommu_dvmamap_load_raw(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
635 	bus_dma_segment_t *segs, int nsegs, int flags, bus_size_t size)
636 {
637 	struct iommu_state *is = sb->sb_is;
638 	struct vm_page *pg;
639 	int i, j, s;
640 	int left;
641 	int err;
642 	bus_size_t sgsize;
643 	paddr_t pa;
644 	bus_size_t boundary, align;
645 	u_long dvmaddr, sgstart, sgend;
646 	struct pglist *pglist;
647 	int pagesz = PAGE_SIZE;
648 	int npg = 0; /* DEBUG */
649 
650 	if (map->dm_nsegs) {
651 		/* Already in use?? */
652 #ifdef DIAGNOSTIC
653 		printf("iommu_dvmamap_load_raw: map still in use\n");
654 #endif
655 		bus_dmamap_unload(t, map);
656 	}
657 
658 	/*
659 	 * A boundary presented to bus_dmamem_alloc() takes precedence
660 	 * over boundary in the map.
661 	 */
662 	if ((boundary = segs[0]._ds_boundary) == 0)
663 		boundary = map->_dm_boundary;
664 
665 	align = max(segs[0]._ds_align, pagesz);
666 
667 	/*
668 	 * Make sure that on error condition we return "no valid mappings".
669 	 */
670 	map->dm_nsegs = 0;
671 	/* Count up the total number of pages we need */
672 	pa = segs[0].ds_addr;
673 	sgsize = 0;
674 	left = size;
675 	for (i = 0; left && i < nsegs; i++) {
676 		if (round_page(pa) != round_page(segs[i].ds_addr))
677 			sgsize = round_page(sgsize);
678 		sgsize += min(left, segs[i].ds_len);
679 		left -= segs[i].ds_len;
680 		pa = segs[i].ds_addr + segs[i].ds_len;
681 	}
682 	sgsize = round_page(sgsize) + PAGE_SIZE; /* XXX reserve extra dvma page */
683 
684 	s = splhigh();
685 	/*
686 	 * If our segment size is larger than the boundary we need to
687 	 * split the transfer up into little pieces ourselves.
688 	 */
689 	err = extent_alloc(is->is_dvmamap, sgsize, align,
690 		(sgsize > boundary) ? 0 : boundary,
691 		((flags & BUS_DMA_NOWAIT) == 0 ? EX_WAITOK : EX_NOWAIT) |
692 		EX_BOUNDZERO, &dvmaddr);
693 	splx(s);
694 
695 	if (err != 0)
696 		return (err);
697 
698 #ifdef DEBUG
699 	if (dvmaddr == (u_long)-1)
700 	{
701 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) failed!\n",
702 		    (int)sgsize, flags);
703 #ifdef DDB
704 		Debugger();
705 #endif
706 	}
707 #endif
708 	if (dvmaddr == (u_long)-1)
709 		return (ENOMEM);
710 
711 	/* Set the active DVMA map */
712 	map->_dm_dvmastart = dvmaddr;
713 	map->_dm_dvmasize = sgsize;
714 
715 	if ((pglist = segs[0]._ds_mlist) == NULL) {
716 		u_long prev_va = 0UL;
717 		paddr_t prev_pa = 0;
718 		int end = 0, offset;
719 
720 		/*
721 		 * This segs is made up of individual physical
722 		 *  segments, probably by _bus_dmamap_load_uio() or
723 		 * _bus_dmamap_load_mbuf().  Ignore the mlist and
724 		 * load each one individually.
725 		 */
726 		map->dm_mapsize = size;
727 
728 		j = 0;
729 		for (i = 0; i < nsegs ; i++) {
730 
731 			pa = segs[i].ds_addr;
732 			offset = (pa & PGOFSET);
733 			pa = trunc_page(pa);
734 			dvmaddr = trunc_page(dvmaddr);
735 			left = min(size, segs[i].ds_len);
736 
737 			DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: converting "
738 				"physseg %d start %lx size %lx\n", i,
739 				(long)segs[i].ds_addr, (long)segs[i].ds_len));
740 
741 			if ((pa == prev_pa) &&
742 				((offset != 0) || (end != offset))) {
743 				/* We can re-use this mapping */
744 				dvmaddr = prev_va;
745 			}
746 
747 			sgstart = dvmaddr + offset;
748 			sgend = sgstart + left - 1;
749 
750 			/* Are the segments virtually adjacent? */
751 			if ((j > 0) && (end == offset) &&
752 				((offset == 0) || (pa == prev_pa))) {
753 				/* Just append to the previous segment. */
754 				map->dm_segs[--j].ds_len += left;
755 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
756 					"appending seg %d start %lx size %lx\n", j,
757 					(long)map->dm_segs[j].ds_addr,
758 					(long)map->dm_segs[j].ds_len));
759 			} else {
760 				if (j >= map->_dm_segcnt) {
761 					iommu_dvmamap_unload(t, sb, map);
762 					return (EFBIG);
763 				}
764 				map->dm_segs[j].ds_addr = sgstart;
765 				map->dm_segs[j].ds_len = left;
766 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
767 					"seg %d start %lx size %lx\n", j,
768 					(long)map->dm_segs[j].ds_addr,
769 					(long)map->dm_segs[j].ds_len));
770 			}
771 			end = (offset + left) & PGOFSET;
772 
773 			/* Check for boundary issues */
774 			while ((sgstart & ~(boundary - 1)) !=
775 				(sgend & ~(boundary - 1))) {
776 				/* Need a new segment. */
777 				map->dm_segs[j].ds_len =
778 					boundary - (sgstart & (boundary - 1));
779 				DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
780 					"seg %d start %lx size %lx\n", j,
781 					(long)map->dm_segs[j].ds_addr,
782 					(long)map->dm_segs[j].ds_len));
783 				if (++j >= map->_dm_segcnt) {
784 					iommu_dvmamap_unload(t, sb, map);
785 					return (EFBIG);
786 				}
787 				sgstart = roundup(sgstart, boundary);
788 				map->dm_segs[j].ds_addr = sgstart;
789 				map->dm_segs[j].ds_len = sgend - sgstart + 1;
790 			}
791 
792 			if (sgsize == 0)
793 				panic("iommu_dmamap_load_raw: size botch");
794 
795 			/* Now map a series of pages. */
796 			while (dvmaddr <= sgend) {
797 				DPRINTF(IDB_BUSDMA,
798 					("iommu_dvmamap_load_raw: map %p "
799 						"loading va %lx at pa %lx\n",
800 						map, (long)dvmaddr,
801 						(long)(pa)));
802 				/* Enter it if we haven't before. */
803 				if (prev_va != dvmaddr)
804 					iommu_enter(sb, prev_va = dvmaddr,
805 						prev_pa = pa,
806 						flags | (++npg << 12));
807 				dvmaddr += pagesz;
808 				pa += pagesz;
809 			}
810 
811 			size -= left;
812 			++j;
813 		}
814 
815 		map->dm_nsegs = j;
816 #ifdef DIAGNOSTIC
817 		{ int seg;
818 	for (seg = 0; seg < map->dm_nsegs; seg++) {
819 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
820 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
821 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
822 				seg, (long)map->dm_segs[seg].ds_addr,
823 				is->is_dvmabase, is->is_dvmaend);
824 #ifdef DDB
825 			Debugger();
826 #endif
827 		}
828 	}
829 		}
830 #endif
831 		return (0);
832 	}
833 
834 	/*
835 	 * This was allocated with bus_dmamem_alloc.
836 	 * The pages are on a `pglist'.
837 	 */
838 	map->dm_mapsize = size;
839 	i = 0;
840 	sgstart = dvmaddr;
841 	sgend = sgstart + size - 1;
842 	map->dm_segs[i].ds_addr = sgstart;
843 	while ((sgstart & ~(boundary - 1)) != (sgend & ~(boundary - 1))) {
844 		/* Oops.  We crossed a boundary.  Split the xfer. */
845 		map->dm_segs[i].ds_len = boundary - (sgstart & (boundary - 1));
846 		DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
847 			"seg %d start %lx size %lx\n", i,
848 			(long)map->dm_segs[i].ds_addr,
849 			(long)map->dm_segs[i].ds_len));
850 		if (++i >= map->_dm_segcnt) {
851 			/* Too many segments.  Fail the operation. */
852 			s = splhigh();
853 			/* How can this fail?  And if it does what can we do? */
854 			err = extent_free(is->is_dvmamap,
855 				dvmaddr, sgsize, EX_NOWAIT);
856 			map->_dm_dvmastart = 0;
857 			map->_dm_dvmasize = 0;
858 			splx(s);
859 			return (EFBIG);
860 		}
861 		sgstart = roundup(sgstart, boundary);
862 		map->dm_segs[i].ds_addr = sgstart;
863 	}
864 	DPRINTF(IDB_INFO, ("iommu_dvmamap_load_raw: "
865 			"seg %d start %lx size %lx\n", i,
866 			(long)map->dm_segs[i].ds_addr, (long)map->dm_segs[i].ds_len));
867 	map->dm_segs[i].ds_len = sgend - sgstart + 1;
868 
869 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
870 		if (sgsize == 0)
871 			panic("iommu_dmamap_load_raw: size botch");
872 		pa = VM_PAGE_TO_PHYS(pg);
873 
874 		DPRINTF(IDB_BUSDMA,
875 		    ("iommu_dvmamap_load_raw: map %p loading va %lx at pa %lx\n",
876 		    map, (long)dvmaddr, (long)(pa)));
877 		iommu_enter(sb, dvmaddr, pa, flags|0x8000);
878 
879 		dvmaddr += pagesz;
880 		sgsize -= pagesz;
881 	}
882 	map->dm_mapsize = size;
883 	map->dm_nsegs = i+1;
884 #ifdef DIAGNOSTIC
885 	{ int seg;
886 	for (seg = 0; seg < map->dm_nsegs; seg++) {
887 		if (map->dm_segs[seg].ds_addr < is->is_dvmabase ||
888 			map->dm_segs[seg].ds_addr > is->is_dvmaend) {
889 			printf("seg %d dvmaddr %lx out of range %x - %x\n",
890 				seg, (long)map->dm_segs[seg].ds_addr,
891 				is->is_dvmabase, is->is_dvmaend);
892 #ifdef DDB
893 			Debugger();
894 #endif
895 		}
896 	}
897 	}
898 #endif
899 	return (0);
900 }
901 
902 
903 /*
904  * Flush an individual dma segment, returns non-zero if the streaming buffers
905  * need flushing afterwards.
906  */
907 static int
908 iommu_dvmamap_sync_range(struct strbuf_ctl *sb, vaddr_t va, bus_size_t len)
909 {
910 	vaddr_t vaend;
911 	struct iommu_state *is = sb->sb_is;
912 
913 #ifdef DIAGNOSTIC
914 	if (va < is->is_dvmabase || va > is->is_dvmaend)
915 		panic("invalid va: %llx", (long long)va);
916 #endif
917 
918 	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
919 		DPRINTF(IDB_BUSDMA,
920 			("iommu_dvmamap_sync_range: attempting to flush "
921 			 "non-streaming entry\n"));
922 		return (0);
923 	}
924 
925 	vaend = (va + len + PGOFSET) & ~PGOFSET;
926 	va &= ~PGOFSET;
927 
928 #ifdef DIAGNOSTIC
929 	if (va < is->is_dvmabase || vaend > is->is_dvmaend)
930 		panic("invalid va range: %llx to %llx (%x to %x)",
931 		    (long long)va, (long long)vaend,
932 		    is->is_dvmabase,
933 		    is->is_dvmaend);
934 #endif
935 
936 	for ( ; va <= vaend; va += PAGE_SIZE) {
937 		DPRINTF(IDB_BUSDMA,
938 		    ("iommu_dvmamap_sync_range: flushing va %p\n",
939 		    (void *)(u_long)va));
940 		iommu_strbuf_flush(sb, va);
941 	}
942 
943 	return (1);
944 }
945 
946 void
947 iommu_dvmamap_sync(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_dmamap_t map,
948 	bus_addr_t offset, bus_size_t len, int ops)
949 {
950 	bus_size_t count;
951 	int i, needsflush = 0;
952 
953 	if (!sb->sb_flush)
954 		return;
955 
956 	for (i = 0; i < map->dm_nsegs; i++) {
957 		if (offset < map->dm_segs[i].ds_len)
958 			break;
959 		offset -= map->dm_segs[i].ds_len;
960 	}
961 
962 	if (i == map->dm_nsegs)
963 		panic("iommu_dvmamap_sync: segment too short %llu",
964 		    (unsigned long long)offset);
965 
966 	if (ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTWRITE)) {
967 		/* Nothing to do */;
968 	}
969 
970 	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)) {
971 
972 		for (; len > 0 && i < map->dm_nsegs; i++) {
973 			count = MIN(map->dm_segs[i].ds_len - offset, len);
974 			if (count > 0 &&
975 			    iommu_dvmamap_sync_range(sb,
976 				map->dm_segs[i].ds_addr + offset, count))
977 				needsflush = 1;
978 			offset = 0;
979 			len -= count;
980 		}
981 #ifdef DIAGNOSTIC
982 		if (i == map->dm_nsegs && len > 0)
983 			panic("iommu_dvmamap_sync: leftover %llu",
984 			    (unsigned long long)len);
985 #endif
986 
987 		if (needsflush)
988 			iommu_strbuf_flush_done(sb);
989 	}
990 }
991 
992 int
993 iommu_dvmamem_alloc(bus_dma_tag_t t, struct strbuf_ctl *sb, bus_size_t size,
994 	bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
995 	int nsegs, int *rsegs, int flags)
996 {
997 
998 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx bound %llx "
999 	   "segp %p flags %d\n", (unsigned long long)size,
1000 	   (unsigned long long)alignment, (unsigned long long)boundary,
1001 	   segs, flags));
1002 	return (bus_dmamem_alloc(t->_parent, size, alignment, boundary,
1003 	    segs, nsegs, rsegs, flags|BUS_DMA_DVMA));
1004 }
1005 
1006 void
1007 iommu_dvmamem_free(bus_dma_tag_t t, struct strbuf_ctl *sb,
1008 	bus_dma_segment_t *segs, int nsegs)
1009 {
1010 
1011 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1012 	    segs, nsegs));
1013 	bus_dmamem_free(t->_parent, segs, nsegs);
1014 }
1015 
1016 /*
1017  * Map the DVMA mappings into the kernel pmap.
1018  * Check the flags to see whether we're streaming or coherent.
1019  */
1020 int
1021 iommu_dvmamem_map(bus_dma_tag_t t, struct strbuf_ctl *sb,
1022 	bus_dma_segment_t *segs, int nsegs, size_t size, void **kvap,
1023 	int flags)
1024 {
1025 	struct vm_page *pg;
1026 	vaddr_t va;
1027 	bus_addr_t addr;
1028 	struct pglist *pglist;
1029 	int cbit;
1030 	const uvm_flag_t kmflags =
1031 	    (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0;
1032 
1033 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: segp %p nsegs %d size %lx\n",
1034 	    segs, nsegs, size));
1035 
1036 	/*
1037 	 * Allocate some space in the kernel map, and then map these pages
1038 	 * into this space.
1039 	 */
1040 	size = round_page(size);
1041 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags);
1042 	if (va == 0)
1043 		return (ENOMEM);
1044 
1045 	*kvap = (void *)va;
1046 
1047 	/*
1048 	 * digest flags:
1049 	 */
1050 	cbit = 0;
1051 	if (flags & BUS_DMA_COHERENT)	/* Disable vcache */
1052 		cbit |= PMAP_NVC;
1053 	if (flags & BUS_DMA_NOCACHE)	/* sideffects */
1054 		cbit |= PMAP_NC;
1055 
1056 	/*
1057 	 * Now take this and map it into the CPU.
1058 	 */
1059 	pglist = segs[0]._ds_mlist;
1060 	TAILQ_FOREACH(pg, pglist, pageq.queue) {
1061 #ifdef DIAGNOSTIC
1062 		if (size == 0)
1063 			panic("iommu_dvmamem_map: size botch");
1064 #endif
1065 		addr = VM_PAGE_TO_PHYS(pg);
1066 		DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_map: "
1067 		    "mapping va %lx at %llx\n", va, (unsigned long long)addr | cbit));
1068 		pmap_kenter_pa(va, addr | cbit, VM_PROT_READ | VM_PROT_WRITE);
1069 		va += PAGE_SIZE;
1070 		size -= PAGE_SIZE;
1071 	}
1072 	pmap_update(pmap_kernel());
1073 	return (0);
1074 }
1075 
1076 /*
1077  * Unmap DVMA mappings from kernel
1078  */
1079 void
1080 iommu_dvmamem_unmap(bus_dma_tag_t t, struct strbuf_ctl *sb, void *kva,
1081 	size_t size)
1082 {
1083 
1084 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_unmap: kvm %p size %lx\n",
1085 	    kva, size));
1086 
1087 #ifdef DIAGNOSTIC
1088 	if ((u_long)kva & PGOFSET)
1089 		panic("iommu_dvmamem_unmap");
1090 #endif
1091 
1092 	size = round_page(size);
1093 	pmap_kremove((vaddr_t)kva, size);
1094 	pmap_update(pmap_kernel());
1095 	uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
1096 }
1097