xref: /openbsd-src/sys/arch/sparc64/dev/iommu.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: iommu.c,v 1.64 2011/05/18 23:36:31 ariane Exp $	*/
2 /*	$NetBSD: iommu.c,v 1.47 2002/02/08 20:03:45 eeh Exp $	*/
3 
4 /*
5  * Copyright (c) 2003 Henric Jungheim
6  * Copyright (c) 2001, 2002 Eduardo Horvath
7  * Copyright (c) 1999, 2000 Matthew R. Green
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * UltraSPARC IOMMU support; used by both the sbus and pci code.
36  */
37 #include <sys/param.h>
38 #include <sys/extent.h>
39 #include <sys/malloc.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/device.h>
43 #include <sys/mbuf.h>
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <machine/bus.h>
48 #include <sparc64/sparc64/cache.h>
49 #include <sparc64/dev/iommureg.h>
50 #include <sparc64/dev/iommuvar.h>
51 
52 #include <machine/autoconf.h>
53 #include <machine/cpu.h>
54 
55 #ifdef DDB
56 #include <machine/db_machdep.h>
57 #include <ddb/db_sym.h>
58 #include <ddb/db_extern.h>
59 #endif
60 
61 #ifdef DEBUG
62 #define IDB_BUSDMA	0x1
63 #define IDB_IOMMU	0x2
64 #define IDB_INFO	0x4
65 #define IDB_SYNC	0x8
66 #define IDB_XXX		0x10
67 #define IDB_PRINT_MAP	0x20
68 #define IDB_BREAK	0x40
69 int iommudebug = IDB_INFO;
70 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
71 #else
72 #define DPRINTF(l, s)
73 #endif
74 
75 void iommu_enter(struct iommu_state *, struct strbuf_ctl *, bus_addr_t,
76     paddr_t, int);
77 void iommu_remove(struct iommu_state *, struct strbuf_ctl *, bus_addr_t);
78 int iommu_dvmamap_sync_range(struct strbuf_ctl*, bus_addr_t, bus_size_t);
79 int iommu_strbuf_flush_done(struct iommu_map_state *);
80 int iommu_dvmamap_load_seg(bus_dma_tag_t, struct iommu_state *,
81     bus_dmamap_t, bus_dma_segment_t *, int, int, bus_size_t, bus_size_t);
82 int iommu_dvmamap_load_mlist(bus_dma_tag_t, struct iommu_state *,
83     bus_dmamap_t, struct pglist *, int, bus_size_t, bus_size_t);
84 int iommu_dvmamap_validate_map(bus_dma_tag_t, struct iommu_state *,
85     bus_dmamap_t);
86 void iommu_dvmamap_print_map(bus_dma_tag_t, struct iommu_state *,
87     bus_dmamap_t);
88 int iommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
89     bus_size_t, int, bus_size_t);
90 int64_t iommu_tsb_entry(struct iommu_state *, bus_addr_t);
91 void strbuf_reset(struct strbuf_ctl *);
92 int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
93 bus_addr_t iommu_iomap_translate(struct iommu_map_state *, paddr_t);
94 void iommu_iomap_load_map(struct iommu_state *, struct iommu_map_state *,
95     bus_addr_t, int);
96 void iommu_iomap_unload_map(struct iommu_state *, struct iommu_map_state *);
97 struct iommu_map_state *iommu_iomap_create(int);
98 void iommu_iomap_destroy(struct iommu_map_state *);
99 void iommu_iomap_clear_pages(struct iommu_map_state *);
100 void _iommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
101     bus_addr_t, bus_size_t, int);
102 
103 /*
104  * Initiate an STC entry flush.
105  */
106 static inline void
107 iommu_strbuf_flush(struct strbuf_ctl *sb, bus_addr_t va)
108 {
109 #ifdef DEBUG
110 	if (sb->sb_flush == NULL) {
111 		printf("iommu_strbuf_flush: attempting to flush w/o STC\n");
112 		return;
113 	}
114 #endif
115 
116 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
117 	    STRBUFREG(strbuf_pgflush), va);
118 }
119 
120 /*
121  * initialise the UltraSPARC IOMMU (SBus or PCI):
122  *	- allocate and setup the iotsb.
123  *	- enable the IOMMU
124  *	- initialise the streaming buffers (if they exist)
125  *	- create a private DVMA map.
126  */
127 void
128 iommu_init(char *name, struct iommu_state *is, int tsbsize, u_int32_t iovabase)
129 {
130 	psize_t size;
131 	vaddr_t va;
132 	paddr_t pa;
133 	struct vm_page *m;
134 	struct pglist mlist;
135 
136 	/*
137 	 * Setup the iommu.
138 	 *
139 	 * The sun4u iommu is part of the SBus or PCI controller so we will
140 	 * deal with it here..
141 	 *
142 	 * For sysio and psycho/psycho+ the IOMMU address space always ends at
143 	 * 0xffffe000, but the starting address depends on the size of the
144 	 * map.  The map size is 1024 * 2 ^ is->is_tsbsize entries, where each
145 	 * entry is 8 bytes.  The start of the map can be calculated by
146 	 * (0xffffe000 << (8 + is->is_tsbsize)).
147 	 *
148 	 * But sabre and hummingbird use a different scheme that seems to
149 	 * be hard-wired, so we read the start and size from the PROM and
150 	 * just use those values.
151 	 */
152 	is->is_cr = IOMMUCR_EN;
153 	is->is_tsbsize = tsbsize;
154 	if (iovabase == (u_int32_t)-1) {
155 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
156 		is->is_dvmaend = IOTSB_VEND;
157 	} else {
158 		is->is_dvmabase = iovabase;
159 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize) - 1;
160 	}
161 
162 	/*
163 	 * Allocate memory for I/O pagetables.  They need to be physically
164 	 * contiguous.
165 	 */
166 
167 	size = PAGE_SIZE << is->is_tsbsize;
168 	TAILQ_INIT(&mlist);
169 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
170 	    (paddr_t)PAGE_SIZE, (paddr_t)0, &mlist, 1, UVM_PLA_NOWAIT) != 0)
171 		panic("iommu_init: no memory");
172 
173 	va = uvm_km_valloc(kernel_map, size);
174 	if (va == 0)
175 		panic("iommu_init: no memory");
176 	is->is_tsb = (int64_t *)va;
177 
178 	m = TAILQ_FIRST(&mlist);
179 	is->is_ptsb = VM_PAGE_TO_PHYS(m);
180 
181 	/* Map the pages */
182 	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
183 		pa = VM_PAGE_TO_PHYS(m);
184 		pmap_enter(pmap_kernel(), va, pa | PMAP_NVC,
185 			VM_PROT_READ|VM_PROT_WRITE,
186 			VM_PROT_READ|VM_PROT_WRITE|PMAP_WIRED);
187 		va += PAGE_SIZE;
188 	}
189 	pmap_update(pmap_kernel());
190 	memset(is->is_tsb, 0, size);
191 
192 #ifdef DEBUG
193 	if (iommudebug & IDB_INFO) {
194 		/* Probe the iommu */
195 		/* The address or contents of the regs...? */
196 		printf("iommu regs at: cr=%lx tsb=%lx flush=%lx\n",
197 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
198 			IOMMUREG(iommu_cr),
199 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
200 			IOMMUREG(iommu_tsb),
201 		    (u_long)bus_space_vaddr(is->is_bustag, is->is_iommu) +
202 			IOMMUREG(iommu_flush));
203 		printf("iommu cr=%llx tsb=%llx\n",
204 		    IOMMUREG_READ(is, iommu_cr),
205 		    IOMMUREG_READ(is, iommu_tsb));
206 		printf("TSB base %p phys %llx\n",
207 		    (void *)is->is_tsb, (unsigned long long)is->is_ptsb);
208 		delay(1000000); /* 1 s */
209 	}
210 #endif
211 
212 	/*
213 	 * Now all the hardware's working we need to allocate a dvma map.
214 	 */
215 	printf("dvma map %x-%x", is->is_dvmabase, is->is_dvmaend);
216 #ifdef DEBUG
217 	printf(", iotdb %llx-%llx",
218 	    (unsigned long long)is->is_ptsb,
219 	    (unsigned long long)(is->is_ptsb + size));
220 #endif
221 	is->is_dvmamap = extent_create(name,
222 	    is->is_dvmabase, (u_long)is->is_dvmaend + 1,
223 	    M_DEVBUF, 0, 0, EX_NOWAIT);
224 	mtx_init(&is->is_mtx, IPL_HIGH);
225 
226 	/*
227 	 * Set the TSB size.  The relevant bits were moved to the TSB
228 	 * base register in the PCIe host bridges.
229 	 */
230 	if (strncmp(name, "pyro", 4) == 0)
231 		is->is_ptsb |= is->is_tsbsize;
232 	else
233 		is->is_cr |= (is->is_tsbsize << 16);
234 
235 	/*
236 	 * Now actually start up the IOMMU.
237 	 */
238 	iommu_reset(is);
239 	printf("\n");
240 }
241 
242 /*
243  * Streaming buffers don't exist on the UltraSPARC IIi/e; we should have
244  * detected that already and disabled them.  If not, we will notice that
245  * they aren't there when the STRBUF_EN bit does not remain.
246  */
247 void
248 iommu_reset(struct iommu_state *is)
249 {
250 	int i;
251 
252 	IOMMUREG_WRITE(is, iommu_tsb, is->is_ptsb);
253 
254 	/* Enable IOMMU */
255 	IOMMUREG_WRITE(is, iommu_cr, is->is_cr);
256 
257 	for (i = 0; i < 2; ++i) {
258 		struct strbuf_ctl *sb = is->is_sb[i];
259 
260 		if (sb == NULL)
261 			continue;
262 
263 		sb->sb_iommu = is;
264 		strbuf_reset(sb);
265 
266 		if (sb->sb_flush)
267 			printf(", STC%d enabled", i);
268 	}
269 
270 	if (is->is_flags & IOMMU_FLUSH_CACHE)
271 		IOMMUREG_WRITE(is, iommu_cache_invalidate, -1ULL);
272 }
273 
274 /*
275  * Initialize one STC.
276  */
277 void
278 strbuf_reset(struct strbuf_ctl *sb)
279 {
280 	if(sb->sb_flush == NULL)
281 		return;
282 
283 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
284 	    STRBUFREG(strbuf_ctl), STRBUF_EN);
285 
286 	membar(Lookaside);
287 
288 	/* No streaming buffers? Disable them */
289 	if (bus_space_read_8(sb->sb_bustag, sb->sb_sb,
290 	    STRBUFREG(strbuf_ctl)) == 0) {
291 		sb->sb_flush = NULL;
292 	} else {
293 		/*
294 		 * locate the pa of the flush buffer
295 		 */
296 		if (pmap_extract(pmap_kernel(),
297 		    (vaddr_t)sb->sb_flush, &sb->sb_flushpa) == FALSE)
298 			sb->sb_flush = NULL;
299 		mtx_init(&sb->sb_mtx, IPL_HIGH);
300 	}
301 }
302 
303 /*
304  * Add an entry to the IOMMU table.
305  *
306  * The entry is marked streaming if an STC was detected and
307  * the BUS_DMA_STREAMING flag is set.
308  */
309 void
310 iommu_enter(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va,
311     paddr_t pa, int flags)
312 {
313 	int64_t tte;
314 	volatile int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)];
315 
316 #ifdef DIAGNOSTIC
317 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
318 		panic("iommu_enter: va %#lx not in DVMA space", va);
319 
320 	tte = *tte_ptr;
321 
322 	if (tte & IOTTE_V) {
323 		printf("Overwriting valid tte entry (dva %lx pa %lx "
324 		    "&tte %p tte %llx)\n", va, pa, tte_ptr, tte);
325 		extent_print(is->is_dvmamap);
326 		panic("IOMMU overwrite");
327 	}
328 #endif
329 
330 	tte = MAKEIOTTE(pa, !(flags & BUS_DMA_NOWRITE),
331 	    !(flags & BUS_DMA_NOCACHE), (flags & BUS_DMA_STREAMING));
332 
333 	DPRINTF(IDB_IOMMU, ("Clearing TSB slot %d for va %p\n",
334 	    (int)IOTSBSLOT(va,is->is_tsbsize), (void *)(u_long)va));
335 
336 	*tte_ptr = tte;
337 
338 	/*
339 	 * Why bother to flush this va?  It should only be relevant for
340 	 * V ==> V or V ==> non-V transitions.  The former is illegal and
341 	 * the latter is never done here.  It is true that this provides
342 	 * some protection against a misbehaving master using an address
343 	 * after it should.  The IOMMU documentations specifically warns
344 	 * that the consequences of a simultaneous IOMMU flush and DVMA
345 	 * access to the same address are undefined.  (By that argument,
346 	 * the STC should probably be flushed as well.)   Note that if
347 	 * a bus master keeps using a memory region after it has been
348 	 * unmapped, the specific behavior of the IOMMU is likely to
349 	 * be the least of our worries.
350 	 */
351 	IOMMUREG_WRITE(is, iommu_flush, va);
352 
353 	DPRINTF(IDB_IOMMU, ("iommu_enter: va %lx pa %lx TSB[%lx]@%p=%lx\n",
354 	    va, (long)pa, (u_long)IOTSBSLOT(va,is->is_tsbsize),
355 	    (void *)(u_long)&is->is_tsb[IOTSBSLOT(va,is->is_tsbsize)],
356 	    (u_long)tte));
357 }
358 
359 /*
360  * Remove an entry from the IOMMU table.
361  *
362  * The entry is flushed from the STC if an STC is detected and the TSB
363  * entry has the IOTTE_STREAM flags set.  It should be impossible for
364  * the TSB entry to have this flag set without the BUS_DMA_STREAMING
365  * flag, but better to be safe.  (The IOMMU will be ignored as long
366  * as an STC entry exists.)
367  */
368 void
369 iommu_remove(struct iommu_state *is, struct strbuf_ctl *sb, bus_addr_t va)
370 {
371 	int64_t *tte_ptr = &is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)];
372 	int64_t tte;
373 
374 #ifdef DIAGNOSTIC
375 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
376 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
377 	if (va != trunc_page(va)) {
378 		printf("iommu_remove: unaligned va: %lx\n", va);
379 		va = trunc_page(va);
380 	}
381 #endif
382 	tte = *tte_ptr;
383 
384 	DPRINTF(IDB_IOMMU, ("iommu_remove: va %lx TSB[%llx]@%p\n",
385 	    va, tte, tte_ptr));
386 
387 #ifdef DIAGNOSTIC
388 	if ((tte & IOTTE_V) == 0) {
389 		printf("Removing invalid tte entry (dva %lx &tte %p "
390 		    "tte %llx)\n", va, tte_ptr, tte);
391 		extent_print(is->is_dvmamap);
392 		panic("IOMMU remove overwrite");
393 	}
394 #endif
395 
396 	*tte_ptr = tte & ~IOTTE_V;
397 
398 	/*
399 	 * IO operations are strongly ordered WRT each other.  It is
400 	 * unclear how they relate to normal memory accesses.
401 	 */
402 	membar(StoreStore);
403 
404 	IOMMUREG_WRITE(is, iommu_flush, va);
405 
406 	if (sb && (tte & IOTTE_STREAM))
407 		iommu_strbuf_flush(sb, va);
408 
409 	/* Should we sync the iommu and stc here? */
410 }
411 
412 /*
413  * Find the physical address of a DVMA address (debug routine).
414  */
415 paddr_t
416 iommu_extract(struct iommu_state *is, bus_addr_t dva)
417 {
418 	int64_t tte = 0;
419 
420 	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
421 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
422 
423 	return (tte & IOTTE_PAMASK);
424 }
425 
426 /*
427  * Lookup a TSB entry for a given DVMA (debug routine).
428  */
429 int64_t
430 iommu_lookup_tte(struct iommu_state *is, bus_addr_t dva)
431 {
432 	int64_t tte = 0;
433 
434 	if (dva >= is->is_dvmabase && dva <= is->is_dvmaend)
435 		tte = is->is_tsb[IOTSBSLOT(dva, is->is_tsbsize)];
436 
437 	return (tte);
438 }
439 
440 /*
441  * Lookup a TSB entry at a given physical address (debug routine).
442  */
443 int64_t
444 iommu_fetch_tte(struct iommu_state *is, paddr_t pa)
445 {
446 	int64_t tte = 0;
447 
448 	if (pa >= is->is_ptsb && pa < is->is_ptsb +
449 	    (PAGE_SIZE << is->is_tsbsize))
450 		tte = ldxa(pa, ASI_PHYS_CACHED);
451 
452 	return (tte);
453 }
454 
455 /*
456  * Fetch a TSB entry with some sanity checking.
457  */
458 int64_t
459 iommu_tsb_entry(struct iommu_state *is, bus_addr_t dva)
460 {
461 	int64_t tte;
462 
463 	if (dva < is->is_dvmabase || dva > is->is_dvmaend)
464 		panic("invalid dva: %llx", (long long)dva);
465 
466 	tte = is->is_tsb[IOTSBSLOT(dva,is->is_tsbsize)];
467 
468 	if ((tte & IOTTE_V) == 0)
469 		panic("iommu_tsb_entry: invalid entry %lx", dva);
470 
471 	return (tte);
472 }
473 
474 /*
475  * Initiate and then block until an STC flush synchronization has completed.
476  */
477 int
478 iommu_strbuf_flush_done(struct iommu_map_state *ims)
479 {
480 	struct strbuf_ctl *sb = ims->ims_sb;
481 	struct strbuf_flush *sf = &ims->ims_flush;
482 	struct timeval cur, flushtimeout;
483 	struct timeval to = { 0, 500000 };
484 	u_int64_t flush;
485 	int timeout_started = 0;
486 
487 #ifdef DIAGNOSTIC
488 	if (sb == NULL) {
489 		panic("iommu_strbuf_flush_done: invalid flush buffer");
490 	}
491 #endif
492 
493 	mtx_enter(&sb->sb_mtx);
494 
495 	/*
496 	 * Streaming buffer flushes:
497 	 *
498 	 *   1 Tell strbuf to flush by storing va to strbuf_pgflush.
499 	 *   2 Store 0 in flag
500 	 *   3 Store pointer to flag in flushsync
501 	 *   4 wait till flushsync becomes 0x1
502 	 *
503 	 * If it takes more than .5 sec, something went very, very wrong.
504 	 */
505 
506 	/*
507 	 * If we're reading from ASI_PHYS_CACHED, then we'll write to
508 	 * it too.  No need to tempt fate or learn about Si bugs or such.
509 	 * FreeBSD just uses normal "volatile" reads/writes...
510 	 */
511 
512 	stxa(sf->sbf_flushpa, ASI_PHYS_CACHED, 0);
513 
514 	/*
515 	 * Insure any previous strbuf operations are complete and that
516 	 * memory is initialized before the IOMMU uses it.
517 	 * Is this Needed?  How are IO and memory operations ordered?
518 	 */
519 	membar(StoreStore);
520 
521 	bus_space_write_8(sb->sb_bustag, sb->sb_sb,
522 		    STRBUFREG(strbuf_flushsync), sf->sbf_flushpa);
523 
524 	DPRINTF(IDB_IOMMU,
525 	    ("iommu_strbuf_flush_done: flush = %llx pa = %lx\n",
526 		ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED), sf->sbf_flushpa));
527 
528 	membar(StoreLoad | Lookaside);
529 
530 	for(;;) {
531 		int i;
532 
533 		/*
534 		 * Try to shave a few instruction cycles off the average
535 		 * latency by only checking the elapsed time every few
536 		 * fetches.
537 		 */
538 		for (i = 0; i < 1000; ++i) {
539 			membar(LoadLoad);
540 			/* Bypass non-coherent D$ */
541 			/* non-coherent...?   Huh? */
542 			flush = ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED);
543 
544 			if (flush) {
545 				DPRINTF(IDB_IOMMU,
546 				    ("iommu_strbuf_flush_done: flushed\n"));
547 				mtx_leave(&sb->sb_mtx);
548 				return (0);
549 			}
550 		}
551 
552 		microtime(&cur);
553 
554 		if (timeout_started) {
555 			if (timercmp(&cur, &flushtimeout, >))
556 				panic("STC timeout at %lx (%lld)",
557 				    sf->sbf_flushpa, flush);
558 		} else {
559 			timeradd(&cur, &to, &flushtimeout);
560 
561 			timeout_started = 1;
562 
563 			DPRINTF(IDB_IOMMU,
564 			    ("iommu_strbuf_flush_done: flush = %llx pa = %lx "
565 				"now=%lx:%lx until = %lx:%lx\n",
566 				ldxa(sf->sbf_flushpa, ASI_PHYS_CACHED),
567 				sf->sbf_flushpa, cur.tv_sec, cur.tv_usec,
568 				flushtimeout.tv_sec, flushtimeout.tv_usec));
569 		}
570 	}
571 }
572 
573 /*
574  * IOMMU DVMA operations, common to SBus and PCI.
575  */
576 
577 #define BUS_DMA_FIND_PARENT(t, fn)                                      \
578         if (t->_parent == NULL)                                         \
579                 panic("null bus_dma parent (" #fn ")");                 \
580         for (t = t->_parent; t->fn == NULL; t = t->_parent)             \
581                 if (t->_parent == NULL)                                 \
582                         panic("no bus_dma " #fn " located");
583 
584 int
585 iommu_dvmamap_create(bus_dma_tag_t t, bus_dma_tag_t t0, struct strbuf_ctl *sb,
586     bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary,
587     int flags, bus_dmamap_t *dmamap)
588 {
589 	int ret;
590 	bus_dmamap_t map;
591 	struct iommu_map_state *ims;
592 
593 	BUS_DMA_FIND_PARENT(t, _dmamap_create);
594 	ret = (*t->_dmamap_create)(t, t0, size, nsegments, maxsegsz, boundary,
595 	    flags, &map);
596 
597 	if (ret)
598 		return (ret);
599 
600 	ims = iommu_iomap_create(atop(round_page(size)));
601 
602 	if (ims == NULL) {
603 		bus_dmamap_destroy(t0, map);
604 		return (ENOMEM);
605 	}
606 
607 	ims->ims_sb = sb;
608 	map->_dm_cookie = ims;
609 
610 #ifdef DIAGNOSTIC
611 	if (ims->ims_sb == NULL)
612 		panic("iommu_dvmamap_create: null sb");
613 	if (ims->ims_sb->sb_iommu == NULL)
614 		panic("iommu_dvmamap_create: null iommu");
615 #endif
616 	*dmamap = map;
617 
618 	return (0);
619 }
620 
621 void
622 iommu_dvmamap_destroy(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
623 {
624 	/*
625 	 * The specification (man page) requires a loaded
626 	 * map to be unloaded before it is destroyed.
627 	 */
628 	if (map->dm_nsegs)
629 		bus_dmamap_unload(t0, map);
630 
631         if (map->_dm_cookie)
632                 iommu_iomap_destroy(map->_dm_cookie);
633 	map->_dm_cookie = NULL;
634 
635 	BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
636 	(*t->_dmamap_destroy)(t, t0, map);
637 }
638 
639 /*
640  * Load a contiguous kva buffer into a dmamap.  The physical pages are
641  * not assumed to be contiguous.  Two passes are made through the buffer
642  * and both call pmap_extract() for the same va->pa translations.  It
643  * is possible to run out of pa->dvma mappings; the code should be smart
644  * enough to resize the iomap (when the "flags" permit allocation).  It
645  * is trivial to compute the number of entries required (round the length
646  * up to the page size and then divide by the page size)...
647  */
648 int
649 iommu_dvmamap_load(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
650     void *buf, bus_size_t buflen, struct proc *p, int flags)
651 {
652 	int err = 0;
653 	bus_size_t sgsize;
654 	u_long dvmaddr, sgstart, sgend;
655 	bus_size_t align, boundary;
656 	struct iommu_state *is;
657 	struct iommu_map_state *ims = map->_dm_cookie;
658 	pmap_t pmap;
659 
660 #ifdef DIAGNOSTIC
661 	if (ims == NULL)
662 		panic("iommu_dvmamap_load: null map state");
663 #endif
664 #ifdef DEBUG
665 	if (ims->ims_sb == NULL)
666 		panic("iommu_dvmamap_load: null sb");
667 	if (ims->ims_sb->sb_iommu == NULL)
668 		panic("iommu_dvmamap_load: null iommu");
669 #endif /* DEBUG */
670 	is = ims->ims_sb->sb_iommu;
671 
672 	if (map->dm_nsegs) {
673 		/*
674 		 * Is it still in use? _bus_dmamap_load should have taken care
675 		 * of this.
676 		 */
677 #ifdef DIAGNOSTIC
678 		panic("iommu_dvmamap_load: map still in use");
679 #endif
680 		bus_dmamap_unload(t0, map);
681 	}
682 
683 	/*
684 	 * Make sure that on error condition we return "no valid mappings".
685 	 */
686 	map->dm_nsegs = 0;
687 
688 	if (buflen < 1 || buflen > map->_dm_size) {
689 		DPRINTF(IDB_BUSDMA,
690 		    ("iommu_dvmamap_load(): error %d > %d -- "
691 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
692 		return (EINVAL);
693 	}
694 
695 	/*
696 	 * A boundary presented to bus_dmamem_alloc() takes precedence
697 	 * over boundary in the map.
698 	 */
699 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
700 		boundary = map->_dm_boundary;
701 	align = MAX(map->dm_segs[0]._ds_align, PAGE_SIZE);
702 
703 	pmap = p ? p->p_vmspace->vm_map.pmap : pmap_kernel();
704 
705 	/* Count up the total number of pages we need */
706 	iommu_iomap_clear_pages(ims);
707 	{ /* Scope */
708 		bus_addr_t a, aend;
709 		bus_addr_t addr = (bus_addr_t)buf;
710 		int seg_len = buflen;
711 
712 		aend = round_page(addr + seg_len);
713 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
714 			paddr_t pa;
715 
716 			if (pmap_extract(pmap, a, &pa) == FALSE)
717 				panic("iomap pmap error addr 0x%llx\n", a);
718 
719 			err = iommu_iomap_insert_page(ims, pa);
720 			if (err) {
721 				printf("iomap insert error: %d for "
722 				    "va 0x%llx pa 0x%lx "
723 				    "(buf %p len %lld/%llx)\n",
724 				    err, a, pa, buf, buflen, buflen);
725 				iommu_dvmamap_print_map(t, is, map);
726 				iommu_iomap_clear_pages(ims);
727 				return (EFBIG);
728 			}
729 		}
730 	}
731 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
732 
733 	mtx_enter(&is->is_mtx);
734 	if (flags & BUS_DMA_24BIT) {
735 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
736 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
737 	} else {
738 		sgstart = is->is_dvmamap->ex_start;
739 		sgend = is->is_dvmamap->ex_end;
740 	}
741 
742 	/*
743 	 * If our segment size is larger than the boundary we need to
744 	 * split the transfer up into little pieces ourselves.
745 	 */
746 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
747 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
748 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
749 	mtx_leave(&is->is_mtx);
750 
751 #ifdef DEBUG
752 	if (err || (dvmaddr == (bus_addr_t)-1))	{
753 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
754 		    (int)sgsize, flags);
755 #ifdef DDB
756 		if (iommudebug & IDB_BREAK)
757 			Debugger();
758 #endif
759 	}
760 #endif
761 	if (err != 0) {
762 		iommu_iomap_clear_pages(ims);
763 		return (err);
764 	}
765 
766 	/* Set the active DVMA map */
767 	map->_dm_dvmastart = dvmaddr;
768 	map->_dm_dvmasize = sgsize;
769 
770 	map->dm_mapsize = buflen;
771 
772 #ifdef DEBUG
773 	iommu_dvmamap_validate_map(t, is, map);
774 #endif
775 
776 	iommu_iomap_load_map(is, ims, dvmaddr, flags);
777 
778 	{ /* Scope */
779 		bus_addr_t a, aend;
780 		bus_addr_t addr = (bus_addr_t)buf;
781 		int seg_len = buflen;
782 
783 		aend = round_page(addr + seg_len);
784 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
785 			bus_addr_t pgstart;
786 			bus_addr_t pgend;
787 			paddr_t pa;
788 			int pglen;
789 
790 			/* Yuck... Redoing the same pmap_extract... */
791 			if (pmap_extract(pmap, a, &pa) == FALSE)
792 				panic("iomap pmap error addr 0x%llx\n", a);
793 
794 			pgstart = pa | (MAX(a, addr) & PAGE_MASK);
795 			pgend = pa | (MIN(a + PAGE_SIZE - 1,
796 			    addr + seg_len - 1) & PAGE_MASK);
797 			pglen = pgend - pgstart + 1;
798 
799 			if (pglen < 1)
800 				continue;
801 
802 			err = iommu_dvmamap_append_range(t, map, pgstart,
803 			    pglen, flags, boundary);
804 			if (err == EFBIG)
805 				break;
806 			else if (err) {
807 				printf("iomap load seg page: %d for "
808 				    "va 0x%llx pa %lx (%llx - %llx) "
809 				    "for %d/0x%x\n",
810 				    err, a, pa, pgstart, pgend, pglen, pglen);
811 				break;
812 			}
813 		}
814 	}
815 #ifdef DEBUG
816 	iommu_dvmamap_validate_map(t, is, map);
817 
818 	if (err)
819 		printf("**** iommu_dvmamap_load failed with error %d\n",
820 		    err);
821 
822 	if (err || (iommudebug & IDB_PRINT_MAP)) {
823 		iommu_dvmamap_print_map(t, is, map);
824 #ifdef DDB
825 		if (iommudebug & IDB_BREAK)
826 			Debugger();
827 #endif
828 	}
829 #endif
830 	if (err)
831 		iommu_dvmamap_unload(t, t0, map);
832 
833 	return (err);
834 }
835 
836 /*
837  * Load a dvmamap from an array of segs or an mlist (if the first
838  * "segs" entry's mlist is non-null).  It calls iommu_dvmamap_load_segs()
839  * or iommu_dvmamap_load_mlist() for part of the 2nd pass through the
840  * mapping.  This is ugly.  A better solution would probably be to have
841  * function pointers for implementing the traversal.  That way, there
842  * could be one core load routine for each of the three required algorithms
843  * (buffer, seg, and mlist).  That would also mean that the traversal
844  * algorithm would then only need one implementation for each algorithm
845  * instead of two (one for populating the iomap and one for populating
846  * the dvma map).
847  */
848 int
849 iommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
850     bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
851 {
852 	int i;
853 	int left;
854 	int err = 0;
855 	bus_size_t sgsize;
856 	bus_size_t boundary, align;
857 	u_long dvmaddr, sgstart, sgend;
858 	struct iommu_state *is;
859 	struct iommu_map_state *ims = map->_dm_cookie;
860 
861 #ifdef DIAGNOSTIC
862 	if (ims == NULL)
863 		panic("iommu_dvmamap_load_raw: null map state");
864 #endif
865 #ifdef DEBUG
866 	if (ims->ims_sb == NULL)
867 		panic("iommu_dvmamap_load_raw: null sb");
868 	if (ims->ims_sb->sb_iommu == NULL)
869 		panic("iommu_dvmamap_load_raw: null iommu");
870 #endif /* DEBUG */
871 	is = ims->ims_sb->sb_iommu;
872 
873 	if (map->dm_nsegs) {
874 		/* Already in use?? */
875 #ifdef DIAGNOSTIC
876 		panic("iommu_dvmamap_load_raw: map still in use");
877 #endif
878 		bus_dmamap_unload(t0, map);
879 	}
880 
881 	/*
882 	 * A boundary presented to bus_dmamem_alloc() takes precedence
883 	 * over boundary in the map.
884 	 */
885 	if ((boundary = segs[0]._ds_boundary) == 0)
886 		boundary = map->_dm_boundary;
887 
888 	align = MAX(segs[0]._ds_align, PAGE_SIZE);
889 
890 	/*
891 	 * Make sure that on error condition we return "no valid mappings".
892 	 */
893 	map->dm_nsegs = 0;
894 
895 	iommu_iomap_clear_pages(ims);
896 	if (segs[0]._ds_mlist) {
897 		struct pglist *mlist = segs[0]._ds_mlist;
898 		struct vm_page *m;
899 		for (m = TAILQ_FIRST(mlist); m != NULL;
900 		    m = TAILQ_NEXT(m,pageq)) {
901 			err = iommu_iomap_insert_page(ims, VM_PAGE_TO_PHYS(m));
902 
903 			if(err) {
904 				printf("iomap insert error: %d for "
905 				    "pa 0x%lx\n", err, VM_PAGE_TO_PHYS(m));
906 				iommu_dvmamap_print_map(t, is, map);
907 				iommu_iomap_clear_pages(ims);
908 				return (EFBIG);
909 			}
910 		}
911 	} else {
912 		/* Count up the total number of pages we need */
913 		for (i = 0, left = size; left > 0 && i < nsegs; i++) {
914 			bus_addr_t a, aend;
915 			bus_size_t len = segs[i].ds_len;
916 			bus_addr_t addr = segs[i].ds_addr;
917 			int seg_len = MIN(left, len);
918 
919 			if (len < 1)
920 				continue;
921 
922 			aend = round_page(addr + seg_len);
923 			for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
924 
925 				err = iommu_iomap_insert_page(ims, a);
926 				if (err) {
927 					printf("iomap insert error: %d for "
928 					    "pa 0x%llx\n", err, a);
929 					iommu_dvmamap_print_map(t, is, map);
930 					iommu_iomap_clear_pages(ims);
931 					return (EFBIG);
932 				}
933 			}
934 
935 			left -= seg_len;
936 		}
937 	}
938 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
939 
940 	mtx_enter(&is->is_mtx);
941 	if (flags & BUS_DMA_24BIT) {
942 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
943 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
944 	} else {
945 		sgstart = is->is_dvmamap->ex_start;
946 		sgend = is->is_dvmamap->ex_end;
947 	}
948 
949 	/*
950 	 * If our segment size is larger than the boundary we need to
951 	 * split the transfer up into little pieces ourselves.
952 	 */
953 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
954 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
955 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
956 	mtx_leave(&is->is_mtx);
957 
958 	if (err != 0) {
959 		iommu_iomap_clear_pages(ims);
960 		return (err);
961 	}
962 
963 #ifdef DEBUG
964 	if (dvmaddr == (bus_addr_t)-1)	{
965 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) "
966 		    "failed!\n", (int)sgsize, flags);
967 #ifdef DDB
968 		if (iommudebug & IDB_BREAK)
969 			Debugger();
970 #else
971 		panic("");
972 #endif
973 	}
974 #endif
975 
976 	/* Set the active DVMA map */
977 	map->_dm_dvmastart = dvmaddr;
978 	map->_dm_dvmasize = sgsize;
979 
980 	map->dm_mapsize = size;
981 
982 #ifdef DEBUG
983 	iommu_dvmamap_validate_map(t, is, map);
984 #endif
985 
986 	iommu_iomap_load_map(is, ims, dvmaddr, flags);
987 
988 	if (segs[0]._ds_mlist)
989 		err = iommu_dvmamap_load_mlist(t, is, map, segs[0]._ds_mlist,
990 		    flags, size, boundary);
991 	else
992 		err = iommu_dvmamap_load_seg(t, is, map, segs, nsegs,
993 		    flags, size, boundary);
994 
995 #ifdef DEBUG
996 	/* The map should be valid even if the load failed */
997 	if (iommu_dvmamap_validate_map(t, is, map)) {
998 		printf("load size %lld/0x%llx\n", size, size);
999 		if (segs[0]._ds_mlist)
1000 			printf("mlist %p\n", segs[0]._ds_mlist);
1001 		else  {
1002 			long tot_len = 0;
1003 			long clip_len = 0;
1004 			printf("segs %p nsegs %d\n", segs, nsegs);
1005 
1006 			left = size;
1007 			for(i = 0; i < nsegs; i++) {
1008 				bus_size_t len = segs[i].ds_len;
1009 				bus_addr_t addr = segs[i].ds_addr;
1010 				int seg_len = MIN(left, len);
1011 
1012 				printf("addr %llx len %lld/0x%llx seg_len "
1013 				    "%d/0x%x left %d/0x%x\n", addr, len, len,
1014 				    seg_len, seg_len, left, left);
1015 
1016 				left -= seg_len;
1017 
1018 				clip_len += seg_len;
1019 				tot_len += segs[i].ds_len;
1020 			}
1021 			printf("total length %ld/0x%lx total seg. "
1022 			    "length %ld/0x%lx\n", tot_len, tot_len, clip_len,
1023 			    clip_len);
1024 		}
1025 
1026 		if (err == 0)
1027 			err = 1;
1028 	}
1029 
1030 	if (err)
1031 		printf("**** iommu_dvmamap_load_raw failed with error %d\n",
1032 		    err);
1033 
1034 	if (err || (iommudebug & IDB_PRINT_MAP)) {
1035 		iommu_dvmamap_print_map(t, is, map);
1036 #ifdef DDB
1037 		if (iommudebug & IDB_BREAK)
1038 			Debugger();
1039 #endif
1040 	}
1041 #endif
1042 	if (err)
1043 		iommu_dvmamap_unload(t, t0, map);
1044 
1045 	return (err);
1046 }
1047 
1048 /*
1049  * Insert a range of addresses into a loaded map respecting the specified
1050  * boundary and alignment restrictions.  The range is specified by its
1051  * physical address and length.  The range cannot cross a page boundary.
1052  * This code (along with most of the rest of the function in this file)
1053  * assumes that the IOMMU page size is equal to PAGE_SIZE.
1054  */
1055 int
1056 iommu_dvmamap_append_range(bus_dma_tag_t t, bus_dmamap_t map, paddr_t pa,
1057     bus_size_t length, int flags, bus_size_t boundary)
1058 {
1059 	struct iommu_map_state *ims = map->_dm_cookie;
1060 	bus_addr_t sgstart, sgend, bd_mask;
1061 	bus_dma_segment_t *seg = NULL;
1062 	int i = map->dm_nsegs;
1063 
1064 #ifdef DEBUG
1065 	if (ims == NULL)
1066 		panic("iommu_dvmamap_append_range: null map state");
1067 #endif
1068 
1069 	sgstart = iommu_iomap_translate(ims, pa);
1070 	sgend = sgstart + length - 1;
1071 
1072 #ifdef DIAGNOSTIC
1073 	if (sgstart == 0 || sgstart > sgend) {
1074 		printf("append range invalid mapping for %lx "
1075 		    "(0x%llx - 0x%llx)\n", pa, sgstart, sgend);
1076 		map->dm_nsegs = 0;
1077 		return (EINVAL);
1078 	}
1079 #endif
1080 
1081 #ifdef DEBUG
1082 	if (trunc_page(sgstart) != trunc_page(sgend)) {
1083 		printf("append range crossing page boundary! "
1084 		    "pa %lx length %lld/0x%llx sgstart %llx sgend %llx\n",
1085 		    pa, length, length, sgstart, sgend);
1086 	}
1087 #endif
1088 
1089 	/*
1090 	 * We will attempt to merge this range with the previous entry
1091 	 * (if there is one).
1092 	 */
1093 	if (i > 0) {
1094 		seg = &map->dm_segs[i - 1];
1095 		if (sgstart == seg->ds_addr + seg->ds_len) {
1096 			length += seg->ds_len;
1097 			sgstart = seg->ds_addr;
1098 			sgend = sgstart + length - 1;
1099 		} else
1100 			seg = NULL;
1101 	}
1102 
1103 	if (seg == NULL) {
1104 		seg = &map->dm_segs[i];
1105 		if (++i > map->_dm_segcnt) {
1106 			map->dm_nsegs = 0;
1107 			return (EFBIG);
1108 		}
1109 	}
1110 
1111 	/*
1112 	 * At this point, "i" is the index of the *next* bus_dma_segment_t
1113 	 * (the segment count, aka map->dm_nsegs) and "seg" points to the
1114 	 * *current* entry.  "length", "sgstart", and "sgend" reflect what
1115 	 * we intend to put in "*seg".  No assumptions should be made about
1116 	 * the contents of "*seg".  Only "boundary" issue can change this
1117 	 * and "boundary" is often zero, so explicitly test for that case
1118 	 * (the test is strictly an optimization).
1119 	 */
1120 	if (boundary != 0) {
1121 		bd_mask = ~(boundary - 1);
1122 
1123 		while ((sgstart & bd_mask) != (sgend & bd_mask)) {
1124 			/*
1125 			 * We are crossing a boundary so fill in the current
1126 			 * segment with as much as possible, then grab a new
1127 			 * one.
1128 			 */
1129 
1130 			seg->ds_addr = sgstart;
1131 			seg->ds_len = boundary - (sgstart & bd_mask);
1132 
1133 			sgstart += seg->ds_len; /* sgend stays the same */
1134 			length -= seg->ds_len;
1135 
1136 			seg = &map->dm_segs[i];
1137 			if (++i > map->_dm_segcnt) {
1138 				map->dm_nsegs = 0;
1139 				return (EFBIG);
1140 			}
1141 		}
1142 	}
1143 
1144 	seg->ds_addr = sgstart;
1145 	seg->ds_len = length;
1146 	map->dm_nsegs = i;
1147 
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Populate the iomap from a bus_dma_segment_t array.  See note for
1153  * iommu_dvmamap_load() * regarding page entry exhaustion of the iomap.
1154  * This is less of a problem for load_seg, as the number of pages
1155  * is usually similar to the number of segments (nsegs).
1156  */
1157 int
1158 iommu_dvmamap_load_seg(bus_dma_tag_t t, struct iommu_state *is,
1159     bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int flags,
1160     bus_size_t size, bus_size_t boundary)
1161 {
1162 	int i;
1163 	int left;
1164 	int seg;
1165 
1166 	/*
1167 	 * This segs is made up of individual physical
1168 	 * segments, probably by _bus_dmamap_load_uio() or
1169 	 * _bus_dmamap_load_mbuf().  Ignore the mlist and
1170 	 * load each one individually.
1171 	 */
1172 
1173 	/*
1174 	 * Keep in mind that each segment could span
1175 	 * multiple pages and that these are not always
1176 	 * adjacent. The code is no longer adding dvma
1177 	 * aliases to the IOMMU.  The STC will not cross
1178 	 * page boundaries anyway and a IOMMU table walk
1179 	 * vs. what may be a streamed PCI DMA to a ring
1180 	 * descriptor is probably a wash.  It eases TLB
1181 	 * pressure and in the worst possible case, it is
1182 	 * only as bad a non-IOMMUed architecture.  More
1183 	 * importantly, the code is not quite as hairy.
1184 	 * (It's bad enough as it is.)
1185 	 */
1186 	left = size;
1187 	seg = 0;
1188 	for (i = 0; left > 0 && i < nsegs; i++) {
1189 		bus_addr_t a, aend;
1190 		bus_size_t len = segs[i].ds_len;
1191 		bus_addr_t addr = segs[i].ds_addr;
1192 		int seg_len = MIN(left, len);
1193 
1194 		if (len < 1)
1195 			continue;
1196 
1197 		aend = round_page(addr + seg_len);
1198 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
1199 			bus_addr_t pgstart;
1200 			bus_addr_t pgend;
1201 			int pglen;
1202 			int err;
1203 
1204 			pgstart = MAX(a, addr);
1205 			pgend = MIN(a + PAGE_SIZE - 1, addr + seg_len - 1);
1206 			pglen = pgend - pgstart + 1;
1207 
1208 			if (pglen < 1)
1209 				continue;
1210 
1211 			err = iommu_dvmamap_append_range(t, map, pgstart,
1212 			    pglen, flags, boundary);
1213 			if (err == EFBIG)
1214 				return (err);
1215 			if (err) {
1216 				printf("iomap load seg page: %d for "
1217 				    "pa 0x%llx (%llx - %llx for %d/%x\n",
1218 				    err, a, pgstart, pgend, pglen, pglen);
1219 				return (err);
1220 			}
1221 
1222 		}
1223 
1224 		left -= seg_len;
1225 	}
1226 	return (0);
1227 }
1228 
1229 /*
1230  * Populate the iomap from an mlist.  See note for iommu_dvmamap_load()
1231  * regarding page entry exhaustion of the iomap.
1232  */
1233 int
1234 iommu_dvmamap_load_mlist(bus_dma_tag_t t, struct iommu_state *is,
1235     bus_dmamap_t map, struct pglist *mlist, int flags,
1236     bus_size_t size, bus_size_t boundary)
1237 {
1238 	struct vm_page *m;
1239 	paddr_t pa;
1240 	int err;
1241 
1242 	/*
1243 	 * This was allocated with bus_dmamem_alloc.
1244 	 * The pages are on an `mlist'.
1245 	 */
1246 	for (m = TAILQ_FIRST(mlist); m != NULL; m = TAILQ_NEXT(m,pageq)) {
1247 		pa = VM_PAGE_TO_PHYS(m);
1248 
1249 		err = iommu_dvmamap_append_range(t, map, pa, PAGE_SIZE,
1250 		    flags, boundary);
1251 		if (err == EFBIG)
1252 			return (err);
1253 		if (err) {
1254 			printf("iomap load seg page: %d for pa 0x%lx "
1255 			    "(%lx - %lx for %d/%x\n", err, pa, pa,
1256 			    pa + PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1257 			return (err);
1258 		}
1259 	}
1260 
1261 	return (0);
1262 }
1263 
1264 /*
1265  * Unload a dvmamap.
1266  */
1267 void
1268 iommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
1269 {
1270 	struct iommu_state *is;
1271 	struct iommu_map_state *ims = map->_dm_cookie;
1272 	bus_addr_t dvmaddr = map->_dm_dvmastart;
1273 	bus_size_t sgsize = map->_dm_dvmasize;
1274 	int error;
1275 
1276 #ifdef DEBUG
1277 	if (ims == NULL)
1278 		panic("iommu_dvmamap_unload: null map state");
1279 	if (ims->ims_sb == NULL)
1280 		panic("iommu_dvmamap_unload: null sb");
1281 	if (ims->ims_sb->sb_iommu == NULL)
1282 		panic("iommu_dvmamap_unload: null iommu");
1283 #endif /* DEBUG */
1284 
1285 	is = ims->ims_sb->sb_iommu;
1286 
1287 	/* Flush the iommu */
1288 #ifdef DEBUG
1289 	if (dvmaddr == 0) {
1290 		printf("iommu_dvmamap_unload: No dvmastart\n");
1291 #ifdef DDB
1292 		if (iommudebug & IDB_BREAK)
1293 			Debugger();
1294 #endif
1295 		return;
1296 	}
1297 
1298 	iommu_dvmamap_validate_map(t, is, map);
1299 
1300 	if (iommudebug & IDB_PRINT_MAP)
1301 		iommu_dvmamap_print_map(t, is, map);
1302 #endif /* DEBUG */
1303 
1304 	/* Remove the IOMMU entries */
1305 	iommu_iomap_unload_map(is, ims);
1306 
1307 	/* Clear the iomap */
1308 	iommu_iomap_clear_pages(ims);
1309 
1310 	bus_dmamap_unload(t->_parent, map);
1311 
1312 	/* Mark the mappings as invalid. */
1313 	map->dm_mapsize = 0;
1314 	map->dm_nsegs = 0;
1315 
1316 	mtx_enter(&is->is_mtx);
1317 	error = extent_free(is->is_dvmamap, dvmaddr, sgsize, EX_NOWAIT);
1318 	map->_dm_dvmastart = 0;
1319 	map->_dm_dvmasize = 0;
1320 	mtx_leave(&is->is_mtx);
1321 	if (error != 0)
1322 		printf("warning: %qd of DVMA space lost\n", sgsize);
1323 }
1324 
1325 #ifdef DEBUG
1326 /*
1327  * Perform internal consistency checking on a dvmamap.
1328  */
1329 int
1330 iommu_dvmamap_validate_map(bus_dma_tag_t t, struct iommu_state *is,
1331     bus_dmamap_t map)
1332 {
1333 	int err = 0;
1334 	int seg;
1335 
1336 	if (trunc_page(map->_dm_dvmastart) != map->_dm_dvmastart) {
1337 		printf("**** dvmastart address not page aligned: %llx",
1338 			map->_dm_dvmastart);
1339 		err = 1;
1340 	}
1341 	if (trunc_page(map->_dm_dvmasize) != map->_dm_dvmasize) {
1342 		printf("**** dvmasize not a multiple of page size: %llx",
1343 			map->_dm_dvmasize);
1344 		err = 1;
1345 	}
1346 	if (map->_dm_dvmastart < is->is_dvmabase ||
1347 	    (round_page(map->_dm_dvmastart + map->_dm_dvmasize) - 1) >
1348 	    is->is_dvmaend) {
1349 		printf("dvmaddr %llx len %llx out of range %x - %x\n",
1350 			    map->_dm_dvmastart, map->_dm_dvmasize,
1351 			    is->is_dvmabase, is->is_dvmaend);
1352 		err = 1;
1353 	}
1354 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1355 		if (map->dm_segs[seg].ds_addr == 0 ||
1356 		    map->dm_segs[seg].ds_len == 0) {
1357 			printf("seg %d null segment dvmaddr %llx len %llx for "
1358 			    "range %llx len %llx\n",
1359 			    seg,
1360 			    map->dm_segs[seg].ds_addr,
1361 			    map->dm_segs[seg].ds_len,
1362 			    map->_dm_dvmastart, map->_dm_dvmasize);
1363 			err = 1;
1364 		} else if (map->dm_segs[seg].ds_addr < map->_dm_dvmastart ||
1365 		    round_page(map->dm_segs[seg].ds_addr +
1366 			map->dm_segs[seg].ds_len) >
1367 		    map->_dm_dvmastart + map->_dm_dvmasize) {
1368 			printf("seg %d dvmaddr %llx len %llx out of "
1369 			    "range %llx len %llx\n",
1370 			    seg,
1371 			    map->dm_segs[seg].ds_addr,
1372 			    map->dm_segs[seg].ds_len,
1373 			    map->_dm_dvmastart, map->_dm_dvmasize);
1374 			err = 1;
1375 		}
1376 	}
1377 
1378 	if (err) {
1379 		iommu_dvmamap_print_map(t, is, map);
1380 #if defined(DDB) && defined(DEBUG)
1381 		if (iommudebug & IDB_BREAK)
1382 			Debugger();
1383 #endif
1384 	}
1385 
1386 	return (err);
1387 }
1388 #endif /* DEBUG */
1389 
1390 void
1391 iommu_dvmamap_print_map(bus_dma_tag_t t, struct iommu_state *is,
1392     bus_dmamap_t map)
1393 {
1394 	int seg, i;
1395 	long full_len, source_len;
1396 	struct mbuf *m;
1397 
1398 	printf("DVMA %x for %x, mapping %p: dvstart %llx dvsize %llx "
1399 	    "size %lld/%llx maxsegsz %llx boundary %llx segcnt %d "
1400 	    "flags %x type %d source %p "
1401 	    "cookie %p mapsize %llx nsegs %d\n",
1402 	    is ? is->is_dvmabase : 0, is ? is->is_dvmaend : 0, map,
1403 	    map->_dm_dvmastart, map->_dm_dvmasize,
1404 	    map->_dm_size, map->_dm_size, map->_dm_maxsegsz, map->_dm_boundary,
1405 	    map->_dm_segcnt, map->_dm_flags, map->_dm_type,
1406 	    map->_dm_source, map->_dm_cookie, map->dm_mapsize,
1407 	    map->dm_nsegs);
1408 
1409 	full_len = 0;
1410 	for (seg = 0; seg < map->dm_nsegs; seg++) {
1411 		printf("seg %d dvmaddr %llx pa %lx len %llx (tte %llx)\n",
1412 		    seg, map->dm_segs[seg].ds_addr,
1413 		    is ? iommu_extract(is, map->dm_segs[seg].ds_addr) : 0,
1414 		    map->dm_segs[seg].ds_len,
1415 		    is ? iommu_lookup_tte(is, map->dm_segs[seg].ds_addr) : 0);
1416 		full_len += map->dm_segs[seg].ds_len;
1417 	}
1418 	printf("total length = %ld/0x%lx\n", full_len, full_len);
1419 
1420 	if (map->_dm_source) switch (map->_dm_type) {
1421 	case _DM_TYPE_MBUF:
1422 		m = map->_dm_source;
1423 		if (m->m_flags & M_PKTHDR)
1424 			printf("source PKTHDR mbuf (%p) hdr len = %d/0x%x:\n",
1425 			    m, m->m_pkthdr.len, m->m_pkthdr.len);
1426 		else
1427 			printf("source mbuf (%p):\n", m);
1428 
1429 		source_len = 0;
1430 		for ( ; m; m = m->m_next) {
1431 			vaddr_t vaddr = mtod(m, vaddr_t);
1432 			long len = m->m_len;
1433 			paddr_t pa;
1434 
1435 			if (pmap_extract(pmap_kernel(), vaddr, &pa))
1436 				printf("kva %lx pa %lx len %ld/0x%lx\n",
1437 				    vaddr, pa, len, len);
1438 			else
1439 				printf("kva %lx pa <invalid> len %ld/0x%lx\n",
1440 				    vaddr, len, len);
1441 
1442 			source_len += len;
1443 		}
1444 
1445 		if (full_len != source_len)
1446 			printf("mbuf length %ld/0x%lx is %s than mapping "
1447 			    "length %ld/0x%lx\n", source_len, source_len,
1448 			    (source_len > full_len) ? "greater" : "less",
1449 			    full_len, full_len);
1450 		else
1451 			printf("mbuf length %ld/0x%lx\n", source_len,
1452 			    source_len);
1453 		break;
1454 	case _DM_TYPE_LOAD:
1455 	case _DM_TYPE_SEGS:
1456 	case _DM_TYPE_UIO:
1457 	default:
1458 		break;
1459 	}
1460 
1461 	if (map->_dm_cookie) {
1462 		struct iommu_map_state *ims = map->_dm_cookie;
1463 		struct iommu_page_map *ipm = &ims->ims_map;
1464 
1465 		printf("page map (%p) of size %d with %d entries\n",
1466 		    ipm, ipm->ipm_maxpage, ipm->ipm_pagecnt);
1467 		for (i = 0; i < ipm->ipm_pagecnt; ++i) {
1468 			struct iommu_page_entry *e = &ipm->ipm_map[i];
1469 			printf("%d: vmaddr 0x%lx pa 0x%lx\n", i,
1470 			    e->ipe_va, e->ipe_pa);
1471 		}
1472 	} else
1473 		printf("iommu map state (cookie) is NULL\n");
1474 }
1475 
1476 void
1477 _iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1478 	bus_addr_t offset, bus_size_t len, int ops)
1479 {
1480 	struct iommu_state *is;
1481 	struct iommu_map_state *ims = map->_dm_cookie;
1482 	struct strbuf_ctl *sb;
1483 	bus_size_t count;
1484 	int i, needsflush = 0;
1485 
1486 	sb = ims->ims_sb;
1487 	is = sb->sb_iommu;
1488 
1489 	for (i = 0; i < map->dm_nsegs; i++) {
1490 		if (offset < map->dm_segs[i].ds_len)
1491 			break;
1492 		offset -= map->dm_segs[i].ds_len;
1493 	}
1494 
1495 	if (i == map->dm_nsegs)
1496 		panic("iommu_dvmamap_sync: too short %llu", offset);
1497 
1498 	for (; len > 0 && i < map->dm_nsegs; i++) {
1499 		count = MIN(map->dm_segs[i].ds_len - offset, len);
1500 		if (count > 0 && iommu_dvmamap_sync_range(sb,
1501 		    map->dm_segs[i].ds_addr + offset, count))
1502 			needsflush = 1;
1503 		len -= count;
1504 	}
1505 
1506 #ifdef DIAGNOSTIC
1507 	if (i == map->dm_nsegs && len > 0)
1508 		panic("iommu_dvmamap_sync: leftover %llu", len);
1509 #endif
1510 
1511 	if (needsflush)
1512 		iommu_strbuf_flush_done(ims);
1513 }
1514 
1515 void
1516 iommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
1517     bus_addr_t offset, bus_size_t len, int ops)
1518 {
1519 	struct iommu_map_state *ims = map->_dm_cookie;
1520 
1521 #ifdef DIAGNOSTIC
1522 	if (ims == NULL)
1523 		panic("iommu_dvmamap_sync: null map state");
1524 	if (ims->ims_sb == NULL)
1525 		panic("iommu_dvmamap_sync: null sb");
1526 	if (ims->ims_sb->sb_iommu == NULL)
1527 		panic("iommu_dvmamap_sync: null iommu");
1528 #endif
1529 	if (len == 0)
1530 		return;
1531 
1532 	if (ops & BUS_DMASYNC_PREWRITE)
1533 		membar(MemIssue);
1534 
1535 	if ((ims->ims_flags & IOMMU_MAP_STREAM) &&
1536 	    (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE)))
1537 		_iommu_dvmamap_sync(t, t0, map, offset, len, ops);
1538 
1539 	if (ops & BUS_DMASYNC_POSTREAD)
1540 		membar(MemIssue);
1541 }
1542 
1543 /*
1544  * Flush an individual dma segment, returns non-zero if the streaming buffers
1545  * need flushing afterwards.
1546  */
1547 int
1548 iommu_dvmamap_sync_range(struct strbuf_ctl *sb, bus_addr_t va, bus_size_t len)
1549 {
1550 	bus_addr_t vaend;
1551 #ifdef DIAGNOSTIC
1552 	struct iommu_state *is = sb->sb_iommu;
1553 
1554 	if (va < is->is_dvmabase || va > is->is_dvmaend)
1555 		panic("invalid va: %llx", (long long)va);
1556 
1557 	if ((is->is_tsb[IOTSBSLOT(va, is->is_tsbsize)] & IOTTE_STREAM) == 0) {
1558 		printf("iommu_dvmamap_sync_range: attempting to flush "
1559 		    "non-streaming entry\n");
1560 		return (0);
1561 	}
1562 #endif
1563 
1564 	vaend = (va + len + PAGE_MASK) & ~PAGE_MASK;
1565 	va &= ~PAGE_MASK;
1566 
1567 #ifdef DIAGNOSTIC
1568 	if (va < is->is_dvmabase || (vaend - 1) > is->is_dvmaend)
1569 		panic("invalid va range: %llx to %llx (%x to %x)",
1570 		    (long long)va, (long long)vaend,
1571 		    is->is_dvmabase,
1572 		    is->is_dvmaend);
1573 #endif
1574 
1575 	for ( ; va <= vaend; va += PAGE_SIZE) {
1576 		DPRINTF(IDB_BUSDMA,
1577 		    ("iommu_dvmamap_sync_range: flushing va %p\n",
1578 		    (void *)(u_long)va));
1579 		iommu_strbuf_flush(sb, va);
1580 	}
1581 
1582 	return (1);
1583 }
1584 
1585 int
1586 iommu_dvmamem_alloc(bus_dma_tag_t t, bus_dma_tag_t t0, bus_size_t size,
1587     bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
1588     int nsegs, int *rsegs, int flags)
1589 {
1590 
1591 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx "
1592 	    "bound %llx segp %p flags %d\n", (unsigned long long)size,
1593 	    (unsigned long long)alignment, (unsigned long long)boundary,
1594 	    segs, flags));
1595 	BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
1596 	return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
1597 	    segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
1598 }
1599 
1600 void
1601 iommu_dvmamem_free(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dma_segment_t *segs,
1602     int nsegs)
1603 {
1604 
1605 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
1606 	    segs, nsegs));
1607 	BUS_DMA_FIND_PARENT(t, _dmamem_free);
1608 	(*t->_dmamem_free)(t, t0, segs, nsegs);
1609 }
1610 
1611 /*
1612  * Create a new iomap.
1613  */
1614 struct iommu_map_state *
1615 iommu_iomap_create(int n)
1616 {
1617 	struct iommu_map_state *ims;
1618 	struct strbuf_flush *sbf;
1619 	vaddr_t va;
1620 
1621 	/* Safety for heavily fragmented data, such as mbufs */
1622 	n += 4;
1623 	if (n < 16)
1624 		n = 16;
1625 
1626 	ims = malloc(sizeof(*ims) + (n - 1) * sizeof(ims->ims_map.ipm_map[0]),
1627 		M_DEVBUF, M_NOWAIT | M_ZERO);
1628 	if (ims == NULL)
1629 		return (NULL);
1630 
1631 	/* Initialize the map. */
1632 	ims->ims_map.ipm_maxpage = n;
1633 	SPLAY_INIT(&ims->ims_map.ipm_tree);
1634 
1635 	/* Initialize the flush area. */
1636 	sbf = &ims->ims_flush;
1637 	va = (vaddr_t)&sbf->sbf_area[0x40];
1638 	va &= ~0x3f;
1639 	pmap_extract(pmap_kernel(), va, &sbf->sbf_flushpa);
1640 	sbf->sbf_flush = (void *)va;
1641 
1642 	return (ims);
1643 }
1644 
1645 /*
1646  * Destroy an iomap.
1647  */
1648 void
1649 iommu_iomap_destroy(struct iommu_map_state *ims)
1650 {
1651 #ifdef DIAGNOSTIC
1652 	if (ims->ims_map.ipm_pagecnt > 0)
1653 		printf("iommu_iomap_destroy: %d page entries in use\n",
1654 		    ims->ims_map.ipm_pagecnt);
1655 #endif
1656 
1657 	free(ims, M_DEVBUF);
1658 }
1659 
1660 /*
1661  * Utility function used by splay tree to order page entries by pa.
1662  */
1663 static inline int
1664 iomap_compare(struct iommu_page_entry *a, struct iommu_page_entry *b)
1665 {
1666 	return ((a->ipe_pa > b->ipe_pa) ? 1 :
1667 		(a->ipe_pa < b->ipe_pa) ? -1 : 0);
1668 }
1669 
1670 SPLAY_PROTOTYPE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1671 
1672 SPLAY_GENERATE(iommu_page_tree, iommu_page_entry, ipe_node, iomap_compare);
1673 
1674 /*
1675  * Insert a pa entry in the iomap.
1676  */
1677 int
1678 iommu_iomap_insert_page(struct iommu_map_state *ims, paddr_t pa)
1679 {
1680 	struct iommu_page_map *ipm = &ims->ims_map;
1681 	struct iommu_page_entry *e;
1682 
1683 	if (ipm->ipm_pagecnt >= ipm->ipm_maxpage) {
1684 		struct iommu_page_entry ipe;
1685 
1686 		ipe.ipe_pa = pa;
1687 		if (SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &ipe))
1688 			return (0);
1689 
1690 		return (ENOMEM);
1691 	}
1692 
1693 	e = &ipm->ipm_map[ipm->ipm_pagecnt];
1694 
1695 	e->ipe_pa = pa;
1696 	e->ipe_va = 0;
1697 
1698 	e = SPLAY_INSERT(iommu_page_tree, &ipm->ipm_tree, e);
1699 
1700 	/* Duplicates are okay, but only count them once. */
1701 	if (e)
1702 		return (0);
1703 
1704 	++ipm->ipm_pagecnt;
1705 
1706 	return (0);
1707 }
1708 
1709 /*
1710  * Locate the iomap by filling in the pa->va mapping and inserting it
1711  * into the IOMMU tables.
1712  */
1713 void
1714 iommu_iomap_load_map(struct iommu_state *is, struct iommu_map_state *ims,
1715     bus_addr_t vmaddr, int flags)
1716 {
1717 	struct iommu_page_map *ipm = &ims->ims_map;
1718 	struct iommu_page_entry *e;
1719 	struct strbuf_ctl *sb = ims->ims_sb;
1720 	int i, slot;
1721 
1722 	if (sb->sb_flush == NULL)
1723 		flags &= ~BUS_DMA_STREAMING;
1724 
1725 	if (flags & BUS_DMA_STREAMING)
1726 		ims->ims_flags |= IOMMU_MAP_STREAM;
1727 	else
1728 		ims->ims_flags &= ~IOMMU_MAP_STREAM;
1729 
1730 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1731 		e->ipe_va = vmaddr;
1732 		iommu_enter(is, sb, e->ipe_va, e->ipe_pa, flags);
1733 
1734 		/* Flush cache if necessary. */
1735 		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1736 		if (is->is_flags & IOMMU_FLUSH_CACHE &&
1737 		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1738 			IOMMUREG_WRITE(is, iommu_cache_flush,
1739 			    is->is_ptsb + slot * 8);
1740 
1741 		vmaddr += PAGE_SIZE;
1742 	}
1743 }
1744 
1745 /*
1746  * Remove the iomap from the IOMMU.
1747  */
1748 void
1749 iommu_iomap_unload_map(struct iommu_state *is, struct iommu_map_state *ims)
1750 {
1751 	struct iommu_page_map *ipm = &ims->ims_map;
1752 	struct iommu_page_entry *e;
1753 	struct strbuf_ctl *sb = ims->ims_sb;
1754 	int i, slot;
1755 
1756 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
1757 		iommu_remove(is, sb, e->ipe_va);
1758 
1759 		/* Flush cache if necessary. */
1760 		slot = IOTSBSLOT(e->ipe_va, is->is_tsbsize);
1761 		if (is->is_flags & IOMMU_FLUSH_CACHE &&
1762 		    (i == (ipm->ipm_pagecnt - 1) || (slot % 8) == 7))
1763 			IOMMUREG_WRITE(is, iommu_cache_flush,
1764 			    is->is_ptsb + slot * 8);
1765 	}
1766 }
1767 
1768 /*
1769  * Translate a physical address (pa) into a DVMA address.
1770  */
1771 bus_addr_t
1772 iommu_iomap_translate(struct iommu_map_state *ims, paddr_t pa)
1773 {
1774 	struct iommu_page_map *ipm = &ims->ims_map;
1775 	struct iommu_page_entry *e;
1776 	struct iommu_page_entry pe;
1777 	paddr_t offset = pa & PAGE_MASK;
1778 
1779 	pe.ipe_pa = trunc_page(pa);
1780 
1781 	e = SPLAY_FIND(iommu_page_tree, &ipm->ipm_tree, &pe);
1782 
1783 	if (e == NULL)
1784 		return (0);
1785 
1786 	return (e->ipe_va | offset);
1787 }
1788 
1789 /*
1790  * Clear the iomap table and tree.
1791  */
1792 void
1793 iommu_iomap_clear_pages(struct iommu_map_state *ims)
1794 {
1795 	ims->ims_map.ipm_pagecnt = 0;
1796 	SPLAY_INIT(&ims->ims_map.ipm_tree);
1797 }
1798 
1799