xref: /openbsd-src/sys/arch/sparc64/dev/viommu.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: viommu.c,v 1.5 2009/04/05 21:57:41 oga Exp $	*/
2 /*	$NetBSD: iommu.c,v 1.47 2002/02/08 20:03:45 eeh Exp $	*/
3 
4 /*
5  * Coptright (c) 2008 Mark Kettenis
6  * Copyright (c) 2003 Henric Jungheim
7  * Copyright (c) 2001, 2002 Eduardo Horvath
8  * Copyright (c) 1999, 2000 Matthew R. Green
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * UltraSPARC Hypervisor IOMMU support.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/extent.h>
41 #include <sys/malloc.h>
42 #include <sys/systm.h>
43 #include <sys/device.h>
44 #include <sys/mbuf.h>
45 
46 #include <uvm/uvm_extern.h>
47 
48 #include <machine/bus.h>
49 #include <sparc64/sparc64/cache.h>
50 #include <sparc64/dev/iommureg.h>
51 #include <sparc64/dev/iommuvar.h>
52 #include <sparc64/dev/viommuvar.h>
53 
54 #include <machine/autoconf.h>
55 #include <machine/cpu.h>
56 #include <machine/hypervisor.h>
57 
58 #ifdef DDB
59 #include <machine/db_machdep.h>
60 #include <ddb/db_sym.h>
61 #include <ddb/db_extern.h>
62 #endif
63 
64 #ifdef DEBUG
65 #define IDB_BUSDMA	0x1
66 #define IDB_IOMMU	0x2
67 #define IDB_INFO	0x4
68 #define IDB_SYNC	0x8
69 #define IDB_XXX		0x10
70 #define IDB_PRINT_MAP	0x20
71 #define IDB_BREAK	0x40
72 extern int iommudebug;
73 #define DPRINTF(l, s)   do { if (iommudebug & l) printf s; } while (0)
74 #else
75 #define DPRINTF(l, s)
76 #endif
77 
78 void viommu_enter(struct iommu_state *, struct strbuf_ctl *, vaddr_t, paddr_t,
79     int);
80 void viommu_remove(struct iommu_state *, struct strbuf_ctl *, vaddr_t);
81 int viommu_dvmamap_load_seg(bus_dma_tag_t, struct iommu_state *,
82     bus_dmamap_t, bus_dma_segment_t *, int, int, bus_size_t, bus_size_t);
83 int viommu_dvmamap_load_mlist(bus_dma_tag_t, struct iommu_state *,
84     bus_dmamap_t, struct pglist *, int, bus_size_t, bus_size_t);
85 int viommu_dvmamap_append_range(bus_dma_tag_t, bus_dmamap_t, paddr_t,
86     bus_size_t, int, bus_size_t);
87 int iommu_iomap_insert_page(struct iommu_map_state *, paddr_t);
88 vaddr_t iommu_iomap_translate(struct iommu_map_state *, paddr_t);
89 void viommu_iomap_load_map(struct iommu_state *, struct iommu_map_state *,
90     vaddr_t, int);
91 void viommu_iomap_unload_map(struct iommu_state *, struct iommu_map_state *);
92 struct iommu_map_state *viommu_iomap_create(int);
93 void iommu_iomap_destroy(struct iommu_map_state *);
94 void iommu_iomap_clear_pages(struct iommu_map_state *);
95 void _viommu_dvmamap_sync(bus_dma_tag_t, bus_dma_tag_t, bus_dmamap_t,
96     bus_addr_t, bus_size_t, int);
97 
98 /*
99  * initialise the UltraSPARC IOMMU (Hypervisior):
100  *	- allocate and setup the iotsb.
101  *	- enable the IOMMU
102  *	- create a private DVMA map.
103  */
104 void
105 viommu_init(char *name, struct iommu_state *is, int tsbsize,
106     u_int32_t iovabase)
107 {
108 	/*
109 	 * Setup the iommu.
110 	 *
111 	 * The sun4v iommu is accessed through the hypervisor so we will
112 	 * deal with it here..
113 	 */
114 	is->is_tsbsize = tsbsize;
115 	if (iovabase == (u_int32_t)-1) {
116 		is->is_dvmabase = IOTSB_VSTART(is->is_tsbsize);
117 		is->is_dvmaend = IOTSB_VEND;
118 	} else {
119 		is->is_dvmabase = iovabase;
120 		is->is_dvmaend = iovabase + IOTSB_VSIZE(tsbsize) - 1;
121 	}
122 
123 	/*
124 	 * Allocate a dvma map.
125 	 */
126 	printf("dvma map %x-%x", is->is_dvmabase, is->is_dvmaend);
127 	is->is_dvmamap = extent_create(name,
128 	    is->is_dvmabase, (u_long)is->is_dvmaend + 1,
129 	    M_DEVBUF, 0, 0, EX_NOWAIT);
130 	mtx_init(&is->is_mtx, IPL_HIGH);
131 
132 	printf("\n");
133 }
134 
135 /*
136  * Add an entry to the IOMMU table.
137  */
138 void
139 viommu_enter(struct iommu_state *is, struct strbuf_ctl *sb, vaddr_t va,
140     paddr_t pa, int flags)
141 {
142 	u_int64_t tsbid = IOTSBSLOT(va, is->is_tsbsize);
143 	paddr_t page_list[1], addr;
144 	u_int64_t attr, nmapped;
145 	int err;
146 
147 	KASSERT(sb == NULL);
148 
149 #ifdef DIAGNOSTIC
150 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
151 		panic("viommu_enter: va %#lx not in DVMA space", va);
152 #endif
153 
154 	attr = PCI_MAP_ATTR_READ | PCI_MAP_ATTR_WRITE;
155 	if (flags & BUS_DMA_READ)
156 		attr &= ~PCI_MAP_ATTR_READ;
157 	if (flags & BUS_DMA_WRITE)
158 		attr &= ~PCI_MAP_ATTR_WRITE;
159 
160 	page_list[0] = trunc_page(pa);
161 	if (!pmap_extract(pmap_kernel(), (vaddr_t)page_list, &addr))
162 		panic("viommu_enter: pmap_extract failed\n");
163 	err = hv_pci_iommu_map(is->is_devhandle, tsbid, 1, attr,
164 	    addr, &nmapped);
165 	if (err != H_EOK || nmapped != 1)
166 		panic("hv_pci_iommu_map: err=%d", err);
167 }
168 
169 /*
170  * Remove an entry from the IOMMU table.
171  */
172 void
173 viommu_remove(struct iommu_state *is, struct strbuf_ctl *sb, vaddr_t va)
174 {
175 	u_int64_t tsbid = IOTSBSLOT(va, is->is_tsbsize);
176 	u_int64_t ndemapped;
177 	int err;
178 
179 	KASSERT(sb == NULL);
180 
181 #ifdef DIAGNOSTIC
182 	if (va < is->is_dvmabase || (va + PAGE_MASK) > is->is_dvmaend)
183 		panic("iommu_remove: va 0x%lx not in DVMA space", (u_long)va);
184 	if (va != trunc_page(va)) {
185 		printf("iommu_remove: unaligned va: %lx\n", va);
186 		va = trunc_page(va);
187 	}
188 #endif
189 
190 	err = hv_pci_iommu_demap(is->is_devhandle, tsbid, 1, &ndemapped);
191 	if (err != H_EOK || ndemapped != 1)
192 		panic("hv_pci_iommu_unmap: err=%d", err);
193 }
194 
195 /*
196  * IOMMU DVMA operations, sun4v hypervisor version.
197  */
198 
199 #define BUS_DMA_FIND_PARENT(t, fn)                                      \
200         if (t->_parent == NULL)                                         \
201                 panic("null bus_dma parent (" #fn ")");                 \
202         for (t = t->_parent; t->fn == NULL; t = t->_parent)             \
203                 if (t->_parent == NULL)                                 \
204                         panic("no bus_dma " #fn " located");
205 
206 int
207 viommu_dvmamap_create(bus_dma_tag_t t, bus_dma_tag_t t0,
208     struct iommu_state *is, bus_size_t size, int nsegments,
209     bus_size_t maxsegsz, bus_size_t boundary, int flags,
210     bus_dmamap_t *dmamap)
211 {
212 	int ret;
213 	bus_dmamap_t map;
214 	struct iommu_map_state *ims;
215 
216 	BUS_DMA_FIND_PARENT(t, _dmamap_create);
217 	ret = (*t->_dmamap_create)(t, t0, size, nsegments, maxsegsz, boundary,
218 	    flags, &map);
219 
220 	if (ret)
221 		return (ret);
222 
223 	ims = viommu_iomap_create(atop(round_page(size)));
224 
225 	if (ims == NULL) {
226 		bus_dmamap_destroy(t0, map);
227 		return (ENOMEM);
228 	}
229 
230 	ims->ims_iommu = is;
231 	map->_dm_cookie = ims;
232 
233 	*dmamap = map;
234 
235 	return (0);
236 }
237 
238 void
239 viommu_dvmamap_destroy(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
240 {
241 	/*
242 	 * The specification (man page) requires a loaded
243 	 * map to be unloaded before it is destroyed.
244 	 */
245 	if (map->dm_nsegs)
246 		bus_dmamap_unload(t0, map);
247 
248         if (map->_dm_cookie)
249                 iommu_iomap_destroy(map->_dm_cookie);
250 	map->_dm_cookie = NULL;
251 
252 	BUS_DMA_FIND_PARENT(t, _dmamap_destroy);
253 	(*t->_dmamap_destroy)(t, t0, map);
254 }
255 
256 /*
257  * Load a contiguous kva buffer into a dmamap.  The physical pages are
258  * not assumed to be contiguous.  Two passes are made through the buffer
259  * and both call pmap_extract() for the same va->pa translations.  It
260  * is possible to run out of pa->dvma mappings; the code should be smart
261  * enough to resize the iomap (when the "flags" permit allocation).  It
262  * is trivial to compute the number of entries required (round the length
263  * up to the page size and then divide by the page size)...
264  */
265 int
266 viommu_dvmamap_load(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
267     void *buf, bus_size_t buflen, struct proc *p, int flags)
268 {
269 	int err = 0;
270 	bus_size_t sgsize;
271 	u_long dvmaddr, sgstart, sgend;
272 	bus_size_t align, boundary;
273 	struct iommu_state *is;
274 	struct iommu_map_state *ims = map->_dm_cookie;
275 	pmap_t pmap;
276 
277 #ifdef DIAGNOSTIC
278 	if (ims == NULL)
279 		panic("viommu_dvmamap_load: null map state");
280 	if (ims->ims_iommu == NULL)
281 		panic("viommu_dvmamap_load: null iommu");
282 #endif
283 	is = ims->ims_iommu;
284 
285 	if (map->dm_nsegs) {
286 		/*
287 		 * Is it still in use? _bus_dmamap_load should have taken care
288 		 * of this.
289 		 */
290 #ifdef DIAGNOSTIC
291 		panic("iommu_dvmamap_load: map still in use");
292 #endif
293 		bus_dmamap_unload(t0, map);
294 	}
295 
296 	/*
297 	 * Make sure that on error condition we return "no valid mappings".
298 	 */
299 	map->dm_nsegs = 0;
300 
301 	if (buflen < 1 || buflen > map->_dm_size) {
302 		DPRINTF(IDB_BUSDMA,
303 		    ("iommu_dvmamap_load(): error %d > %d -- "
304 		     "map size exceeded!\n", (int)buflen, (int)map->_dm_size));
305 		return (EINVAL);
306 	}
307 
308 	/*
309 	 * A boundary presented to bus_dmamem_alloc() takes precedence
310 	 * over boundary in the map.
311 	 */
312 	if ((boundary = (map->dm_segs[0]._ds_boundary)) == 0)
313 		boundary = map->_dm_boundary;
314 	align = MAX(map->dm_segs[0]._ds_align, PAGE_SIZE);
315 
316 	pmap = p ? p->p_vmspace->vm_map.pmap : pmap_kernel();
317 
318 	/* Count up the total number of pages we need */
319 	iommu_iomap_clear_pages(ims);
320 	{ /* Scope */
321 		bus_addr_t a, aend;
322 		bus_addr_t addr = (vaddr_t)buf;
323 		int seg_len = buflen;
324 
325 		aend = round_page(addr + seg_len);
326 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
327 			paddr_t pa;
328 
329 			if (pmap_extract(pmap, a, &pa) == FALSE) {
330 				printf("iomap pmap error addr 0x%llx\n", a);
331 				iommu_iomap_clear_pages(ims);
332 				return (EFBIG);
333 			}
334 
335 			err = iommu_iomap_insert_page(ims, pa);
336 			if (err) {
337 				printf("iomap insert error: %d for "
338 				    "va 0x%llx pa 0x%lx "
339 				    "(buf %p len %lld/%llx)\n",
340 				    err, a, pa, buf, buflen, buflen);
341 				iommu_iomap_clear_pages(ims);
342 				return (EFBIG);
343 			}
344 		}
345 	}
346 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
347 
348 	mtx_enter(&is->is_mtx);
349 	if (flags & BUS_DMA_24BIT) {
350 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
351 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
352 	} else {
353 		sgstart = is->is_dvmamap->ex_start;
354 		sgend = is->is_dvmamap->ex_end;
355 	}
356 
357 	/*
358 	 * If our segment size is larger than the boundary we need to
359 	 * split the transfer up into little pieces ourselves.
360 	 */
361 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
362 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
363 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
364 	mtx_leave(&is->is_mtx);
365 
366 #ifdef DEBUG
367 	if (err || (dvmaddr == (bus_addr_t)-1))	{
368 		printf("iommu_dvmamap_load(): extent_alloc(%d, %x) failed!\n",
369 		    (int)sgsize, flags);
370 #ifdef DDB
371 		if (iommudebug & IDB_BREAK)
372 			Debugger();
373 #endif
374 	}
375 #endif
376 	if (err != 0)
377 		return (err);
378 
379 	/* Set the active DVMA map */
380 	map->_dm_dvmastart = dvmaddr;
381 	map->_dm_dvmasize = sgsize;
382 
383 	map->dm_mapsize = buflen;
384 
385 	viommu_iomap_load_map(is, ims, dvmaddr, flags);
386 
387 	{ /* Scope */
388 		bus_addr_t a, aend;
389 		bus_addr_t addr = (vaddr_t)buf;
390 		int seg_len = buflen;
391 
392 		aend = round_page(addr + seg_len);
393 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
394 			bus_addr_t pgstart;
395 			bus_addr_t pgend;
396 			paddr_t pa;
397 			int pglen;
398 
399 			/* Yuck... Redoing the same pmap_extract... */
400 			if (pmap_extract(pmap, a, &pa) == FALSE) {
401 				printf("iomap pmap error addr 0x%llx\n", a);
402 				err = EFBIG;
403 				break;
404 			}
405 
406 			pgstart = pa | (MAX(a, addr) & PAGE_MASK);
407 			pgend = pa | (MIN(a + PAGE_SIZE - 1,
408 			    addr + seg_len - 1) & PAGE_MASK);
409 			pglen = pgend - pgstart + 1;
410 
411 			if (pglen < 1)
412 				continue;
413 
414 			err = viommu_dvmamap_append_range(t, map, pgstart,
415 			    pglen, flags, boundary);
416 			if (err == EFBIG)
417 				break;
418 			else if (err) {
419 				printf("iomap load seg page: %d for "
420 				    "va 0x%llx pa %lx (%llx - %llx) "
421 				    "for %d/0x%x\n",
422 				    err, a, pa, pgstart, pgend, pglen, pglen);
423 				break;
424 			}
425 		}
426 	}
427 	if (err) {
428 		/* XXX keep enough state and just call unload here? */
429 		viommu_iomap_unload_map(is, ims);
430 		iommu_iomap_clear_pages(ims);
431 		map->dm_mapsize = 0;
432 		map->dm_nsegs = 0;
433 		mtx_enter(&is->is_mtx);
434 		err = extent_free(is->is_dvmamap, dvmaddr, sgsize, EX_NOWAIT);
435 		map->_dm_dvmastart = 0;
436 		map->_dm_dvmasize = 0;
437 		mtx_leave(&is->is_mtx);
438 	}
439 
440 	return (err);
441 }
442 
443 /*
444  * Load a dvmamap from an array of segs or an mlist (if the first
445  * "segs" entry's mlist is non-null).  It calls iommu_dvmamap_load_segs()
446  * or iommu_dvmamap_load_mlist() for part of the 2nd pass through the
447  * mapping.  This is ugly.  A better solution would probably be to have
448  * function pointers for implementing the traversal.  That way, there
449  * could be one core load routine for each of the three required algorithms
450  * (buffer, seg, and mlist).  That would also mean that the traversal
451  * algorithm would then only need one implementation for each algorithm
452  * instead of two (one for populating the iomap and one for populating
453  * the dvma map).
454  */
455 int
456 viommu_dvmamap_load_raw(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
457     bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
458 {
459 	int i;
460 	int left;
461 	int err = 0;
462 	bus_size_t sgsize;
463 	bus_size_t boundary, align;
464 	u_long dvmaddr, sgstart, sgend;
465 	struct iommu_state *is;
466 	struct iommu_map_state *ims = map->_dm_cookie;
467 
468 #ifdef DIAGNOSTIC
469 	if (ims == NULL)
470 		panic("viommu_dvmamap_load_raw: null map state");
471 	if (ims->ims_iommu == NULL)
472 		panic("viommu_dvmamap_load_raw: null iommu");
473 #endif
474 	is = ims->ims_iommu;
475 
476 	if (map->dm_nsegs) {
477 		/* Already in use?? */
478 #ifdef DIAGNOSTIC
479 		panic("iommu_dvmamap_load_raw: map still in use");
480 #endif
481 		bus_dmamap_unload(t0, map);
482 	}
483 
484 	/*
485 	 * A boundary presented to bus_dmamem_alloc() takes precedence
486 	 * over boundary in the map.
487 	 */
488 	if ((boundary = segs[0]._ds_boundary) == 0)
489 		boundary = map->_dm_boundary;
490 
491 	align = MAX(segs[0]._ds_align, PAGE_SIZE);
492 
493 	/*
494 	 * Make sure that on error condition we return "no valid mappings".
495 	 */
496 	map->dm_nsegs = 0;
497 
498 	iommu_iomap_clear_pages(ims);
499 	if (segs[0]._ds_mlist) {
500 		struct pglist *mlist = segs[0]._ds_mlist;
501 		struct vm_page *m;
502 		for (m = TAILQ_FIRST(mlist); m != NULL;
503 		    m = TAILQ_NEXT(m,pageq)) {
504 			err = iommu_iomap_insert_page(ims, VM_PAGE_TO_PHYS(m));
505 
506 			if(err) {
507 				printf("iomap insert error: %d for "
508 				    "pa 0x%lx\n", err, VM_PAGE_TO_PHYS(m));
509 				iommu_iomap_clear_pages(ims);
510 				return (EFBIG);
511 			}
512 		}
513 	} else {
514 		/* Count up the total number of pages we need */
515 		for (i = 0, left = size; left > 0 && i < nsegs; i++) {
516 			bus_addr_t a, aend;
517 			bus_size_t len = segs[i].ds_len;
518 			bus_addr_t addr = segs[i].ds_addr;
519 			int seg_len = MIN(left, len);
520 
521 			if (len < 1)
522 				continue;
523 
524 			aend = round_page(addr + seg_len);
525 			for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
526 
527 				err = iommu_iomap_insert_page(ims, a);
528 				if (err) {
529 					printf("iomap insert error: %d for "
530 					    "pa 0x%llx\n", err, a);
531 					iommu_iomap_clear_pages(ims);
532 					return (EFBIG);
533 				}
534 			}
535 
536 			left -= seg_len;
537 		}
538 	}
539 	sgsize = ims->ims_map.ipm_pagecnt * PAGE_SIZE;
540 
541 	mtx_enter(&is->is_mtx);
542 	if (flags & BUS_DMA_24BIT) {
543 		sgstart = MAX(is->is_dvmamap->ex_start, 0xff000000);
544 		sgend = MIN(is->is_dvmamap->ex_end, 0xffffffff);
545 	} else {
546 		sgstart = is->is_dvmamap->ex_start;
547 		sgend = is->is_dvmamap->ex_end;
548 	}
549 
550 	/*
551 	 * If our segment size is larger than the boundary we need to
552 	 * split the transfer up into little pieces ourselves.
553 	 */
554 	err = extent_alloc_subregion(is->is_dvmamap, sgstart, sgend,
555 	    sgsize, align, 0, (sgsize > boundary) ? 0 : boundary,
556 	    EX_NOWAIT | EX_BOUNDZERO, (u_long *)&dvmaddr);
557 	mtx_leave(&is->is_mtx);
558 
559 	if (err != 0)
560 		return (err);
561 
562 #ifdef DEBUG
563 	if (dvmaddr == (bus_addr_t)-1)	{
564 		printf("iommu_dvmamap_load_raw(): extent_alloc(%d, %x) "
565 		    "failed!\n", (int)sgsize, flags);
566 #ifdef DDB
567 		if (iommudebug & IDB_BREAK)
568 			Debugger();
569 #else
570 		panic("");
571 #endif
572 	}
573 #endif
574 
575 	/* Set the active DVMA map */
576 	map->_dm_dvmastart = dvmaddr;
577 	map->_dm_dvmasize = sgsize;
578 
579 	map->dm_mapsize = size;
580 
581 	viommu_iomap_load_map(is, ims, dvmaddr, flags);
582 
583 	if (segs[0]._ds_mlist)
584 		err = viommu_dvmamap_load_mlist(t, is, map, segs[0]._ds_mlist,
585 		    flags, size, boundary);
586 	else
587 		err = viommu_dvmamap_load_seg(t, is, map, segs, nsegs,
588 		    flags, size, boundary);
589 
590 	if (err) {
591 		/* XXX keep enough state and just call unload here? */
592 		viommu_iomap_unload_map(is, ims);
593 		iommu_iomap_clear_pages(ims);
594 		map->dm_mapsize = 0;
595 		map->dm_nsegs = 0;
596 		mtx_enter(&is->is_mtx);
597 		err = extent_free(is->is_dvmamap, dvmaddr, sgsize, EX_NOWAIT);
598 		map->_dm_dvmastart = 0;
599 		map->_dm_dvmasize = 0;
600 		mtx_leave(&is->is_mtx);
601 	}
602 
603 	return (err);
604 }
605 
606 /*
607  * Insert a range of addresses into a loaded map respecting the specified
608  * boundary and alignment restrictions.  The range is specified by its
609  * physical address and length.  The range cannot cross a page boundary.
610  * This code (along with most of the rest of the function in this file)
611  * assumes that the IOMMU page size is equal to PAGE_SIZE.
612  */
613 int
614 viommu_dvmamap_append_range(bus_dma_tag_t t, bus_dmamap_t map, paddr_t pa,
615     bus_size_t length, int flags, bus_size_t boundary)
616 {
617 	struct iommu_map_state *ims = map->_dm_cookie;
618 	bus_addr_t sgstart, sgend, bd_mask;
619 	bus_dma_segment_t *seg = NULL;
620 	int i = map->dm_nsegs;
621 
622 #ifdef DEBUG
623 	if (ims == NULL)
624 		panic("iommu_dvmamap_append_range: null map state");
625 #endif
626 
627 	sgstart = iommu_iomap_translate(ims, pa);
628 	sgend = sgstart + length - 1;
629 
630 #ifdef DIAGNOSTIC
631 	if (sgstart == NULL || sgstart > sgend) {
632 		printf("append range invalid mapping for %lx "
633 		    "(0x%llx - 0x%llx)\n", pa, sgstart, sgend);
634 		map->dm_nsegs = 0;
635 		return (EINVAL);
636 	}
637 #endif
638 
639 #ifdef DEBUG
640 	if (trunc_page(sgstart) != trunc_page(sgend)) {
641 		printf("append range crossing page boundary! "
642 		    "pa %lx length %lld/0x%llx sgstart %llx sgend %llx\n",
643 		    pa, length, length, sgstart, sgend);
644 	}
645 #endif
646 
647 	/*
648 	 * We will attempt to merge this range with the previous entry
649 	 * (if there is one).
650 	 */
651 	if (i > 0) {
652 		seg = &map->dm_segs[i - 1];
653 		if (sgstart == seg->ds_addr + seg->ds_len) {
654 			length += seg->ds_len;
655 			sgstart = seg->ds_addr;
656 			sgend = sgstart + length - 1;
657 		} else
658 			seg = NULL;
659 	}
660 
661 	if (seg == NULL) {
662 		seg = &map->dm_segs[i];
663 		if (++i > map->_dm_segcnt) {
664 			map->dm_nsegs = 0;
665 			return (EFBIG);
666 		}
667 	}
668 
669 	/*
670 	 * At this point, "i" is the index of the *next* bus_dma_segment_t
671 	 * (the segment count, aka map->dm_nsegs) and "seg" points to the
672 	 * *current* entry.  "length", "sgstart", and "sgend" reflect what
673 	 * we intend to put in "*seg".  No assumptions should be made about
674 	 * the contents of "*seg".  Only "boundary" issue can change this
675 	 * and "boundary" is often zero, so explicitly test for that case
676 	 * (the test is strictly an optimization).
677 	 */
678 	if (boundary != 0) {
679 		bd_mask = ~(boundary - 1);
680 
681 		while ((sgstart & bd_mask) != (sgend & bd_mask)) {
682 			/*
683 			 * We are crossing a boundary so fill in the current
684 			 * segment with as much as possible, then grab a new
685 			 * one.
686 			 */
687 
688 			seg->ds_addr = sgstart;
689 			seg->ds_len = boundary - (sgstart & bd_mask);
690 
691 			sgstart += seg->ds_len; /* sgend stays the same */
692 			length -= seg->ds_len;
693 
694 			seg = &map->dm_segs[i];
695 			if (++i > map->_dm_segcnt) {
696 				map->dm_nsegs = 0;
697 				return (EFBIG);
698 			}
699 		}
700 	}
701 
702 	seg->ds_addr = sgstart;
703 	seg->ds_len = length;
704 	map->dm_nsegs = i;
705 
706 	return (0);
707 }
708 
709 /*
710  * Populate the iomap from a bus_dma_segment_t array.  See note for
711  * iommu_dvmamap_load() * regarding page entry exhaustion of the iomap.
712  * This is less of a problem for load_seg, as the number of pages
713  * is usually similar to the number of segments (nsegs).
714  */
715 int
716 viommu_dvmamap_load_seg(bus_dma_tag_t t, struct iommu_state *is,
717     bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int flags,
718     bus_size_t size, bus_size_t boundary)
719 {
720 	int i;
721 	int left;
722 	int seg;
723 
724 	/*
725 	 * This segs is made up of individual physical
726 	 * segments, probably by _bus_dmamap_load_uio() or
727 	 * _bus_dmamap_load_mbuf().  Ignore the mlist and
728 	 * load each one individually.
729 	 */
730 
731 	/*
732 	 * Keep in mind that each segment could span
733 	 * multiple pages and that these are not always
734 	 * adjacent. The code is no longer adding dvma
735 	 * aliases to the IOMMU.  The STC will not cross
736 	 * page boundaries anyway and a IOMMU table walk
737 	 * vs. what may be a streamed PCI DMA to a ring
738 	 * descriptor is probably a wash.  It eases TLB
739 	 * pressure and in the worst possible case, it is
740 	 * only as bad a non-IOMMUed architecture.  More
741 	 * importantly, the code is not quite as hairy.
742 	 * (It's bad enough as it is.)
743 	 */
744 	left = size;
745 	seg = 0;
746 	for (i = 0; left > 0 && i < nsegs; i++) {
747 		bus_addr_t a, aend;
748 		bus_size_t len = segs[i].ds_len;
749 		bus_addr_t addr = segs[i].ds_addr;
750 		int seg_len = MIN(left, len);
751 
752 		if (len < 1)
753 			continue;
754 
755 		aend = round_page(addr + seg_len);
756 		for (a = trunc_page(addr); a < aend; a += PAGE_SIZE) {
757 			bus_addr_t pgstart;
758 			bus_addr_t pgend;
759 			int pglen;
760 			int err;
761 
762 			pgstart = MAX(a, addr);
763 			pgend = MIN(a + PAGE_SIZE - 1, addr + seg_len - 1);
764 			pglen = pgend - pgstart + 1;
765 
766 			if (pglen < 1)
767 				continue;
768 
769 			err = viommu_dvmamap_append_range(t, map, pgstart,
770 			    pglen, flags, boundary);
771 			if (err == EFBIG)
772 				return (err);
773 			if (err) {
774 				printf("iomap load seg page: %d for "
775 				    "pa 0x%llx (%llx - %llx for %d/%x\n",
776 				    err, a, pgstart, pgend, pglen, pglen);
777 				return (err);
778 			}
779 
780 		}
781 
782 		left -= seg_len;
783 	}
784 	return (0);
785 }
786 
787 /*
788  * Populate the iomap from an mlist.  See note for iommu_dvmamap_load()
789  * regarding page entry exhaustion of the iomap.
790  */
791 int
792 viommu_dvmamap_load_mlist(bus_dma_tag_t t, struct iommu_state *is,
793     bus_dmamap_t map, struct pglist *mlist, int flags,
794     bus_size_t size, bus_size_t boundary)
795 {
796 	struct vm_page *m;
797 	paddr_t pa;
798 	int err;
799 
800 	/*
801 	 * This was allocated with bus_dmamem_alloc.
802 	 * The pages are on an `mlist'.
803 	 */
804 	for (m = TAILQ_FIRST(mlist); m != NULL; m = TAILQ_NEXT(m,pageq)) {
805 		pa = VM_PAGE_TO_PHYS(m);
806 
807 		err = viommu_dvmamap_append_range(t, map, pa, PAGE_SIZE,
808 		    flags, boundary);
809 		if (err == EFBIG)
810 			return (err);
811 		if (err) {
812 			printf("iomap load seg page: %d for pa 0x%lx "
813 			    "(%lx - %lx for %d/%x\n", err, pa, pa,
814 			    pa + PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
815 			return (err);
816 		}
817 	}
818 
819 	return (0);
820 }
821 
822 /*
823  * Unload a dvmamap.
824  */
825 void
826 viommu_dvmamap_unload(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map)
827 {
828 	struct iommu_state *is;
829 	struct iommu_map_state *ims = map->_dm_cookie;
830 	bus_addr_t dvmaddr = map->_dm_dvmastart;
831 	bus_size_t sgsize = map->_dm_dvmasize;
832 	int error;
833 
834 #ifdef DEBUG
835 	if (ims == NULL)
836 		panic("viommu_dvmamap_unload: null map state");
837 	if (ims->ims_iommu == NULL)
838 		panic("viommu_dvmamap_unload: null iommu");
839 #endif /* DEBUG */
840 
841 	is = ims->ims_iommu;
842 
843 	/* Remove the IOMMU entries */
844 	viommu_iomap_unload_map(is, ims);
845 
846 	/* Clear the iomap */
847 	iommu_iomap_clear_pages(ims);
848 
849 	bus_dmamap_unload(t->_parent, map);
850 
851 	/* Mark the mappings as invalid. */
852 	map->dm_mapsize = 0;
853 	map->dm_nsegs = 0;
854 
855 	mtx_enter(&is->is_mtx);
856 	error = extent_free(is->is_dvmamap, dvmaddr,
857 		sgsize, EX_NOWAIT);
858 	map->_dm_dvmastart = 0;
859 	map->_dm_dvmasize = 0;
860 	mtx_leave(&is->is_mtx);
861 	if (error != 0)
862 		printf("warning: %qd of DVMA space lost\n", sgsize);
863 }
864 
865 void
866 viommu_dvmamap_sync(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dmamap_t map,
867     bus_addr_t offset, bus_size_t len, int ops)
868 {
869 #ifdef DIAGNOSTIC
870 	struct iommu_map_state *ims = map->_dm_cookie;
871 
872 	if (ims == NULL)
873 		panic("viommu_dvmamap_sync: null map state");
874 	if (ims->ims_iommu == NULL)
875 		panic("viommu_dvmamap_sync: null iommu");
876 #endif
877 	if (len == 0)
878 		return;
879 
880 	if (ops & BUS_DMASYNC_PREWRITE)
881 		membar(MemIssue);
882 
883 #if 0
884 	if (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_PREWRITE))
885 		_viommu_dvmamap_sync(t, t0, map, offset, len, ops);
886 #endif
887 
888 	if (ops & BUS_DMASYNC_POSTREAD)
889 		membar(MemIssue);
890 }
891 
892 int
893 viommu_dvmamem_alloc(bus_dma_tag_t t, bus_dma_tag_t t0, bus_size_t size,
894     bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
895     int nsegs, int *rsegs, int flags)
896 {
897 
898 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_alloc: sz %llx align %llx "
899 	    "bound %llx segp %p flags %d\n", (unsigned long long)size,
900 	    (unsigned long long)alignment, (unsigned long long)boundary,
901 	    segs, flags));
902 	BUS_DMA_FIND_PARENT(t, _dmamem_alloc);
903 	return ((*t->_dmamem_alloc)(t, t0, size, alignment, boundary,
904 	    segs, nsegs, rsegs, flags | BUS_DMA_DVMA));
905 }
906 
907 void
908 viommu_dvmamem_free(bus_dma_tag_t t, bus_dma_tag_t t0, bus_dma_segment_t *segs,
909     int nsegs)
910 {
911 
912 	DPRINTF(IDB_BUSDMA, ("iommu_dvmamem_free: segp %p nsegs %d\n",
913 	    segs, nsegs));
914 	BUS_DMA_FIND_PARENT(t, _dmamem_free);
915 	(*t->_dmamem_free)(t, t0, segs, nsegs);
916 }
917 
918 /*
919  * Create a new iomap.
920  */
921 struct iommu_map_state *
922 viommu_iomap_create(int n)
923 {
924 	struct iommu_map_state *ims;
925 
926 	/* Safety for heavily fragmented data, such as mbufs */
927 	n += 4;
928 	if (n < 16)
929 		n = 16;
930 
931 	ims = malloc(sizeof(*ims) + (n - 1) * sizeof(ims->ims_map.ipm_map[0]),
932 		M_DEVBUF, M_NOWAIT | M_ZERO);
933 	if (ims == NULL)
934 		return (NULL);
935 
936 	/* Initialize the map. */
937 	ims->ims_map.ipm_maxpage = n;
938 	SPLAY_INIT(&ims->ims_map.ipm_tree);
939 
940 	return (ims);
941 }
942 
943 /*
944  * Locate the iomap by filling in the pa->va mapping and inserting it
945  * into the IOMMU tables.
946  */
947 void
948 viommu_iomap_load_map(struct iommu_state *is, struct iommu_map_state *ims,
949     vaddr_t vmaddr, int flags)
950 {
951 	struct iommu_page_map *ipm = &ims->ims_map;
952 	struct iommu_page_entry *e;
953 	int i;
954 
955 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e) {
956 		e->ipe_va = vmaddr;
957 		viommu_enter(is, NULL, e->ipe_va, e->ipe_pa, flags);
958 		vmaddr += PAGE_SIZE;
959 	}
960 }
961 
962 /*
963  * Remove the iomap from the IOMMU.
964  */
965 void
966 viommu_iomap_unload_map(struct iommu_state *is, struct iommu_map_state *ims)
967 {
968 	struct iommu_page_map *ipm = &ims->ims_map;
969 	struct iommu_page_entry *e;
970 	int i;
971 
972 	for (i = 0, e = ipm->ipm_map; i < ipm->ipm_pagecnt; ++i, ++e)
973 		viommu_remove(is, NULL, e->ipe_va);
974 }
975