xref: /netbsd-src/sys/arch/arm/arm32/bus_dma.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: bus_dma.c,v 1.139 2023/02/25 08:05:46 skrll Exp $	*/
2 
3 /*-
4  * Copyright (c) 1996, 1997, 1998, 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #define _ARM32_BUS_DMA_PRIVATE
34 
35 #include "opt_arm_bus_space.h"
36 #include "opt_cputypes.h"
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.139 2023/02/25 08:05:46 skrll Exp $");
40 
41 #include <sys/param.h>
42 
43 #include <sys/bus.h>
44 #include <sys/cpu.h>
45 #include <sys/kmem.h>
46 #include <sys/mbuf.h>
47 
48 #include <uvm/uvm.h>
49 
50 #include <arm/cpuconf.h>
51 #include <arm/cpufunc.h>
52 
53 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
54 #include <dev/mm.h>
55 #endif
56 
57 #ifdef BUSDMA_COUNTERS
58 static struct evcnt bus_dma_creates =
59 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "creates");
60 static struct evcnt bus_dma_bounced_creates =
61 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "bounced creates");
62 static struct evcnt bus_dma_loads =
63 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "loads");
64 static struct evcnt bus_dma_bounced_loads =
65 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "bounced loads");
66 static struct evcnt bus_dma_coherent_loads =
67 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "coherent loads");
68 static struct evcnt bus_dma_read_bounces =
69 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "read bounces");
70 static struct evcnt bus_dma_write_bounces =
71 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "write bounces");
72 static struct evcnt bus_dma_bounced_unloads =
73 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "bounced unloads");
74 static struct evcnt bus_dma_bounced_mbuf_loads =
75 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "bounced mbuf loads");
76 static struct evcnt bus_dma_unloads =
77 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "unloads");
78 static struct evcnt bus_dma_bounced_destroys =
79 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "bounced destroys");
80 static struct evcnt bus_dma_destroys =
81 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "destroys");
82 static struct evcnt bus_dma_sync_prereadwrite =
83 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync prereadwrite");
84 static struct evcnt bus_dma_sync_preread_begin =
85 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync preread begin");
86 static struct evcnt bus_dma_sync_preread =
87 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync preread");
88 static struct evcnt bus_dma_sync_preread_tail =
89 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync preread tail");
90 static struct evcnt bus_dma_sync_prewrite =
91 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync prewrite");
92 static struct evcnt bus_dma_sync_postread =
93 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync postread");
94 static struct evcnt bus_dma_sync_postreadwrite =
95 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync postreadwrite");
96 static struct evcnt bus_dma_sync_postwrite =
97 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync postwrite");
98 static struct evcnt bus_dma_inrange_fail =
99 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "inrange check failed");
100 
101 static struct evcnt bus_dma_sync_coherent_prereadwrite =
102 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent prereadwrite");
103 static struct evcnt bus_dma_sync_coherent_preread =
104 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent preread");
105 static struct evcnt bus_dma_sync_coherent_prewrite =
106 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent prewrite");
107 static struct evcnt bus_dma_sync_coherent_postread =
108 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent postread");
109 static struct evcnt bus_dma_sync_coherent_postreadwrite =
110 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent postreadwrite");
111 static struct evcnt bus_dma_sync_coherent_postwrite =
112 	EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "busdma", "sync coherent postwrite");
113 
114 EVCNT_ATTACH_STATIC(bus_dma_creates);
115 EVCNT_ATTACH_STATIC(bus_dma_bounced_creates);
116 EVCNT_ATTACH_STATIC(bus_dma_loads);
117 EVCNT_ATTACH_STATIC(bus_dma_bounced_loads);
118 EVCNT_ATTACH_STATIC(bus_dma_coherent_loads);
119 EVCNT_ATTACH_STATIC(bus_dma_read_bounces);
120 EVCNT_ATTACH_STATIC(bus_dma_write_bounces);
121 EVCNT_ATTACH_STATIC(bus_dma_unloads);
122 EVCNT_ATTACH_STATIC(bus_dma_bounced_unloads);
123 EVCNT_ATTACH_STATIC(bus_dma_destroys);
124 EVCNT_ATTACH_STATIC(bus_dma_bounced_destroys);
125 EVCNT_ATTACH_STATIC(bus_dma_bounced_mbuf_loads);
126 EVCNT_ATTACH_STATIC(bus_dma_sync_prereadwrite);
127 EVCNT_ATTACH_STATIC(bus_dma_sync_preread_begin);
128 EVCNT_ATTACH_STATIC(bus_dma_sync_preread);
129 EVCNT_ATTACH_STATIC(bus_dma_sync_preread_tail);
130 EVCNT_ATTACH_STATIC(bus_dma_sync_prewrite);
131 EVCNT_ATTACH_STATIC(bus_dma_sync_postread);
132 EVCNT_ATTACH_STATIC(bus_dma_sync_postreadwrite);
133 EVCNT_ATTACH_STATIC(bus_dma_sync_postwrite);
134 EVCNT_ATTACH_STATIC(bus_dma_inrange_fail);
135 
136 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_prereadwrite);
137 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_preread);
138 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_prewrite);
139 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_postread);
140 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_postreadwrite);
141 EVCNT_ATTACH_STATIC(bus_dma_sync_coherent_postwrite);
142 
143 #define	STAT_INCR(x)	(bus_dma_ ## x.ev_count++)
144 #else
145 #define	STAT_INCR(x)	__nothing
146 #endif
147 
148 int	_bus_dmamap_load_buffer(bus_dma_tag_t, bus_dmamap_t, void *,
149 	    bus_size_t, struct vmspace *, int);
150 
151 /*
152  * Check to see if the specified page is in an allowed DMA range.
153  */
154 static inline struct arm32_dma_range *
155 _bus_dma_paddr_inrange(struct arm32_dma_range *ranges, int nranges,
156     bus_addr_t curaddr)
157 {
158 	struct arm32_dma_range *dr;
159 	int i;
160 
161 	for (i = 0, dr = ranges; i < nranges; i++, dr++) {
162 		if (curaddr >= dr->dr_sysbase &&
163 		    curaddr < (dr->dr_sysbase + dr->dr_len))
164 			return dr;
165 	}
166 
167 	return NULL;
168 }
169 
170 /*
171  * Check to see if the specified busaddr is in an allowed DMA range.
172  */
173 static inline paddr_t
174 _bus_dma_busaddr_to_paddr(bus_dma_tag_t t, bus_addr_t curaddr)
175 {
176 	struct arm32_dma_range *dr;
177 	u_int i;
178 
179 	if (t->_nranges == 0)
180 		return curaddr;
181 
182 	for (i = 0, dr = t->_ranges; i < t->_nranges; i++, dr++) {
183 		if (dr->dr_busbase <= curaddr
184 		    && curaddr < dr->dr_busbase + dr->dr_len)
185 			return curaddr - dr->dr_busbase + dr->dr_sysbase;
186 	}
187 	panic("%s: curaddr %#lx not in range", __func__, curaddr);
188 }
189 
190 /*
191  * Common function to load the specified physical address into the
192  * DMA map, coalescing segments and boundary checking as necessary.
193  */
194 static int
195 _bus_dmamap_load_paddr(bus_dma_tag_t t, bus_dmamap_t map,
196     bus_addr_t paddr, bus_size_t size, bool coherent)
197 {
198 	bus_dma_segment_t * const segs = map->dm_segs;
199 	int nseg = map->dm_nsegs;
200 	bus_addr_t lastaddr;
201 	bus_addr_t bmask = ~(map->_dm_boundary - 1);
202 	bus_addr_t curaddr;
203 	bus_size_t sgsize;
204 	uint32_t _ds_flags = coherent ? _BUS_DMAMAP_COHERENT : 0;
205 
206 	if (nseg > 0)
207 		lastaddr = segs[nseg - 1].ds_addr + segs[nseg - 1].ds_len;
208 	else
209 		lastaddr = 0xdead;
210 
211  again:
212 	sgsize = size;
213 
214 	/* Make sure we're in an allowed DMA range. */
215 	if (t->_ranges != NULL) {
216 		/* XXX cache last result? */
217 		const struct arm32_dma_range * const dr =
218 		    _bus_dma_paddr_inrange(t->_ranges, t->_nranges, paddr);
219 		if (dr == NULL) {
220 			STAT_INCR(inrange_fail);
221 			return EINVAL;
222 		}
223 
224 		/*
225 		 * If this region is coherent, mark the segment as coherent.
226 		 */
227 		_ds_flags |= dr->dr_flags & _BUS_DMAMAP_COHERENT;
228 
229 		/*
230 		 * In a valid DMA range.  Translate the physical
231 		 * memory address to an address in the DMA window.
232 		 */
233 		curaddr = (paddr - dr->dr_sysbase) + dr->dr_busbase;
234 #if 0
235 		printf("%p: %#lx: range %#lx/%#lx/%#lx/%#x: %#x <-- %#lx\n",
236 		    t, paddr, dr->dr_sysbase, dr->dr_busbase,
237 		    dr->dr_len, dr->dr_flags, _ds_flags, curaddr);
238 #endif
239 	} else
240 		curaddr = paddr;
241 
242 	/*
243 	 * Make sure we don't cross any boundaries.
244 	 */
245 	if (map->_dm_boundary > 0) {
246 		bus_addr_t baddr;	/* next boundary address */
247 
248 		baddr = (curaddr + map->_dm_boundary) & bmask;
249 		if (sgsize > (baddr - curaddr))
250 			sgsize = (baddr - curaddr);
251 	}
252 
253 	/*
254 	 * Insert chunk into a segment, coalescing with the
255 	 * previous segment if possible.
256 	 */
257 	if (nseg > 0 && curaddr == lastaddr &&
258 	    segs[nseg - 1].ds_len + sgsize <= map->dm_maxsegsz &&
259 	    ((segs[nseg - 1]._ds_flags ^ _ds_flags) & _BUS_DMAMAP_COHERENT) == 0 &&
260 	    (map->_dm_boundary == 0 ||
261 	     (segs[nseg - 1].ds_addr & bmask) == (curaddr & bmask))) {
262 	     	/* coalesce */
263 		segs[nseg - 1].ds_len += sgsize;
264 	} else if (nseg >= map->_dm_segcnt) {
265 		return EFBIG;
266 	} else {
267 		/* new segment */
268 		segs[nseg].ds_addr = curaddr;
269 		segs[nseg].ds_len = sgsize;
270 		segs[nseg]._ds_paddr = curaddr;
271 		segs[nseg]._ds_flags = _ds_flags;
272 		nseg++;
273 	}
274 
275 	lastaddr = curaddr + sgsize;
276 
277 	paddr += sgsize;
278 	size -= sgsize;
279 	if (size > 0)
280 		goto again;
281 
282 	map->_dm_flags &= (_ds_flags & _BUS_DMAMAP_COHERENT);
283 	map->dm_nsegs = nseg;
284 	return 0;
285 }
286 
287 static int _bus_dma_uiomove(void *buf, struct uio *uio, size_t n,
288 	    int direction);
289 
290 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
291 static int _bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map,
292 	    bus_size_t size, int flags);
293 static void _bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map);
294 
295 static int
296 _bus_dma_load_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
297 	size_t buflen, int buftype, int flags)
298 {
299 	struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
300 	struct vmspace * const vm = vmspace_kernel();
301 	int error;
302 
303 	KASSERT(cookie != NULL);
304 	KASSERT(cookie->id_flags & _BUS_DMA_MIGHT_NEED_BOUNCE);
305 
306 	/*
307 	 * Allocate bounce pages, if necessary.
308 	 */
309 	if ((cookie->id_flags & _BUS_DMA_HAS_BOUNCE) == 0) {
310 		error = _bus_dma_alloc_bouncebuf(t, map, buflen, flags);
311 		if (error)
312 			return error;
313 	}
314 
315 	/*
316 	 * Since we're trying again, clear the previous attempt.
317 	 */
318 	map->dm_mapsize = 0;
319 	map->dm_nsegs = 0;
320 	map->_dm_buftype = _BUS_DMA_BUFTYPE_INVALID;
321 	/* _bus_dmamap_load_buffer() clears this if we're not... */
322 	map->_dm_flags |= _BUS_DMAMAP_COHERENT;
323 
324 	/*
325 	 * Cache a pointer to the caller's buffer and load the DMA map
326 	 * with the bounce buffer.
327 	 */
328 	cookie->id_origbuf = buf;
329 	cookie->id_origbuflen = buflen;
330 	error = _bus_dmamap_load_buffer(t, map, cookie->id_bouncebuf,
331 	    buflen, vm, flags);
332 	if (error)
333 		return error;
334 
335 	STAT_INCR(bounced_loads);
336 	map->dm_mapsize = buflen;
337 	map->_dm_vmspace = vm;
338 	map->_dm_buftype = buftype;
339 
340 	/* ...so _bus_dmamap_sync() knows we're bouncing */
341 	map->_dm_flags |= _BUS_DMAMAP_IS_BOUNCING;
342 	cookie->id_flags |= _BUS_DMA_IS_BOUNCING;
343 	return 0;
344 }
345 #endif /* _ARM32_NEED_BUS_DMA_BOUNCE */
346 
347 /*
348  * Common function for DMA map creation.  May be called by bus-specific
349  * DMA map creation functions.
350  */
351 int
352 _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
353     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
354 {
355 	struct arm32_bus_dmamap *map;
356 	void *mapstore;
357 	int error = 0;
358 
359 #ifdef DEBUG_DMA
360 	printf("dmamap_create: t=%p size=%#lx nseg=%#x msegsz=%#lx boundary=%#lx"
361 	    " flags=%#x\n", t, size, nsegments, maxsegsz, boundary, flags);
362 #endif	/* DEBUG_DMA */
363 
364 	/*
365 	 * Allocate and initialize the DMA map.  The end of the map
366 	 * is a variable-sized array of segments, so we allocate enough
367 	 * room for them in one shot.
368 	 *
369 	 * Note we don't preserve the WAITOK or NOWAIT flags.  Preservation
370 	 * of ALLOCNOW notifies others that we've reserved these resources,
371 	 * and they are not to be freed.
372 	 *
373 	 * The bus_dmamap_t includes one bus_dma_segment_t, hence
374 	 * the (nsegments - 1).
375 	 */
376 	const size_t mapsize = sizeof(struct arm32_bus_dmamap) +
377 	    (sizeof(bus_dma_segment_t) * (nsegments - 1));
378 	const int zallocflags = (flags & BUS_DMA_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
379 	if ((mapstore = kmem_intr_zalloc(mapsize, zallocflags)) == NULL)
380 		return ENOMEM;
381 
382 	map = (struct arm32_bus_dmamap *)mapstore;
383 	map->_dm_size = size;
384 	map->_dm_segcnt = nsegments;
385 	map->_dm_maxmaxsegsz = maxsegsz;
386 	map->_dm_boundary = boundary;
387 	map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
388 	map->_dm_origbuf = NULL;
389 	map->_dm_buftype = _BUS_DMA_BUFTYPE_INVALID;
390 	map->_dm_vmspace = vmspace_kernel();
391 	map->_dm_cookie = NULL;
392 	map->dm_maxsegsz = maxsegsz;
393 	map->dm_mapsize = 0;		/* no valid mappings */
394 	map->dm_nsegs = 0;
395 
396 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
397 	struct arm32_bus_dma_cookie *cookie;
398 	int cookieflags;
399 	void *cookiestore;
400 
401 	cookieflags = 0;
402 
403 	if (t->_may_bounce != NULL) {
404 		error = (*t->_may_bounce)(t, map, flags, &cookieflags);
405 		if (error != 0)
406 			goto out;
407 	}
408 
409 	if (t->_ranges != NULL) {
410 		/*
411 		 * If ranges are defined, we may have to bounce. The only
412 		 * exception is if there is exactly one range that covers
413 		 * all of physical memory.
414 		 */
415 		switch (t->_nranges) {
416 		case 1:
417 			if (t->_ranges[0].dr_sysbase == 0 &&
418 			    t->_ranges[0].dr_len == UINTPTR_MAX) {
419 				break;
420 			}
421 			/* FALLTHROUGH */
422 		default:
423 			cookieflags |= _BUS_DMA_MIGHT_NEED_BOUNCE;
424 		}
425 	}
426 
427 	if ((cookieflags & _BUS_DMA_MIGHT_NEED_BOUNCE) == 0) {
428 		STAT_INCR(creates);
429 		*dmamp = map;
430 		return 0;
431 	}
432 
433 	const size_t cookiesize = sizeof(struct arm32_bus_dma_cookie) +
434 	    (sizeof(bus_dma_segment_t) * map->_dm_segcnt);
435 
436 	/*
437 	 * Allocate our cookie.
438 	 */
439 	if ((cookiestore = kmem_intr_zalloc(cookiesize, zallocflags)) == NULL) {
440 		error = ENOMEM;
441 		goto out;
442 	}
443 	cookie = (struct arm32_bus_dma_cookie *)cookiestore;
444 	cookie->id_flags = cookieflags;
445 	map->_dm_cookie = cookie;
446 	STAT_INCR(bounced_creates);
447 
448 	error = _bus_dma_alloc_bouncebuf(t, map, size, flags);
449  out:
450 	if (error)
451 		_bus_dmamap_destroy(t, map);
452 	else
453 		*dmamp = map;
454 #else
455 	*dmamp = map;
456 	STAT_INCR(creates);
457 #endif /* _ARM32_NEED_BUS_DMA_BOUNCE */
458 #ifdef DEBUG_DMA
459 	printf("dmamap_create:map=%p\n", map);
460 #endif	/* DEBUG_DMA */
461 	return error;
462 }
463 
464 /*
465  * Common function for DMA map destruction.  May be called by bus-specific
466  * DMA map destruction functions.
467  */
468 void
469 _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
470 {
471 
472 #ifdef DEBUG_DMA
473 	printf("dmamap_destroy: t=%p map=%p\n", t, map);
474 #endif	/* DEBUG_DMA */
475 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
476 	struct arm32_bus_dma_cookie *cookie = map->_dm_cookie;
477 
478 	/*
479 	 * Free any bounce pages this map might hold.
480 	 */
481 	if (cookie != NULL) {
482 		const size_t cookiesize = sizeof(struct arm32_bus_dma_cookie) +
483 		    (sizeof(bus_dma_segment_t) * map->_dm_segcnt);
484 
485 		if (cookie->id_flags & _BUS_DMA_IS_BOUNCING)
486 			STAT_INCR(bounced_unloads);
487 		map->dm_nsegs = 0;
488 		if (cookie->id_flags & _BUS_DMA_HAS_BOUNCE)
489 			_bus_dma_free_bouncebuf(t, map);
490 		STAT_INCR(bounced_destroys);
491 		kmem_intr_free(cookie, cookiesize);
492 	} else
493 #endif
494 	STAT_INCR(destroys);
495 
496 	if (map->dm_nsegs > 0)
497 		STAT_INCR(unloads);
498 
499 	const size_t mapsize = sizeof(struct arm32_bus_dmamap) +
500 	    (sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1));
501 	kmem_intr_free(map, mapsize);
502 }
503 
504 /*
505  * Common function for loading a DMA map with a linear buffer.  May
506  * be called by bus-specific DMA map load functions.
507  */
508 int
509 _bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
510     bus_size_t buflen, struct proc *p, int flags)
511 {
512 	struct vmspace *vm;
513 	int error;
514 
515 #ifdef DEBUG_DMA
516 	printf("dmamap_load: t=%p map=%p buf=%p len=%#lx p=%p f=%#x\n",
517 	    t, map, buf, buflen, p, flags);
518 #endif	/* DEBUG_DMA */
519 
520 	if (map->dm_nsegs > 0) {
521 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
522 		struct arm32_bus_dma_cookie *cookie = map->_dm_cookie;
523 		if (cookie != NULL) {
524 			if (cookie->id_flags & _BUS_DMA_IS_BOUNCING) {
525 				STAT_INCR(bounced_unloads);
526 				cookie->id_flags &= ~_BUS_DMA_IS_BOUNCING;
527 				map->_dm_flags &= ~_BUS_DMAMAP_IS_BOUNCING;
528 			}
529 		} else
530 #endif
531 		STAT_INCR(unloads);
532 	}
533 
534 	/*
535 	 * Make sure that on error condition we return "no valid mappings".
536 	 */
537 	map->dm_mapsize = 0;
538 	map->dm_nsegs = 0;
539 	map->_dm_buftype = _BUS_DMA_BUFTYPE_INVALID;
540 	KASSERTMSG(map->dm_maxsegsz <= map->_dm_maxmaxsegsz,
541 	    "dm_maxsegsz %lu _dm_maxmaxsegsz %lu",
542 	    map->dm_maxsegsz, map->_dm_maxmaxsegsz);
543 
544 	if (buflen > map->_dm_size)
545 		return EINVAL;
546 
547 	if (p != NULL) {
548 		vm = p->p_vmspace;
549 	} else {
550 		vm = vmspace_kernel();
551 	}
552 
553 	/* _bus_dmamap_load_buffer() clears this if we're not... */
554 	map->_dm_flags |= _BUS_DMAMAP_COHERENT;
555 
556 	error = _bus_dmamap_load_buffer(t, map, buf, buflen, vm, flags);
557 	if (error == 0) {
558 		map->dm_mapsize = buflen;
559 		map->_dm_vmspace = vm;
560 		map->_dm_origbuf = buf;
561 		map->_dm_buftype = _BUS_DMA_BUFTYPE_LINEAR;
562 		if (map->_dm_flags & _BUS_DMAMAP_COHERENT) {
563 			STAT_INCR(coherent_loads);
564 		} else {
565 			STAT_INCR(loads);
566 		}
567 		return 0;
568 	}
569 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
570 	struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
571 	if (cookie != NULL && (cookie->id_flags & _BUS_DMA_MIGHT_NEED_BOUNCE)) {
572 		error = _bus_dma_load_bouncebuf(t, map, buf, buflen,
573 		    _BUS_DMA_BUFTYPE_LINEAR, flags);
574 	}
575 #endif
576 	return error;
577 }
578 
579 /*
580  * Like _bus_dmamap_load(), but for mbufs.
581  */
582 int
583 _bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
584     int flags)
585 {
586 	struct mbuf *m;
587 	int error;
588 
589 #ifdef DEBUG_DMA
590 	printf("dmamap_load_mbuf: t=%p map=%p m0=%p f=%#x\n",
591 	    t, map, m0, flags);
592 #endif	/* DEBUG_DMA */
593 
594 	if (map->dm_nsegs > 0) {
595 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
596 		struct arm32_bus_dma_cookie *cookie = map->_dm_cookie;
597 		if (cookie != NULL) {
598 			if (cookie->id_flags & _BUS_DMA_IS_BOUNCING) {
599 				STAT_INCR(bounced_unloads);
600 				cookie->id_flags &= ~_BUS_DMA_IS_BOUNCING;
601 				map->_dm_flags &= ~_BUS_DMAMAP_IS_BOUNCING;
602 			}
603 		} else
604 #endif
605 		STAT_INCR(unloads);
606 	}
607 
608 	/*
609 	 * Make sure that on error condition we return "no valid mappings."
610 	 */
611 	map->dm_mapsize = 0;
612 	map->dm_nsegs = 0;
613 	map->_dm_buftype = _BUS_DMA_BUFTYPE_INVALID;
614 	KASSERTMSG(map->dm_maxsegsz <= map->_dm_maxmaxsegsz,
615 	    "dm_maxsegsz %lu _dm_maxmaxsegsz %lu",
616 	    map->dm_maxsegsz, map->_dm_maxmaxsegsz);
617 
618 	KASSERT(m0->m_flags & M_PKTHDR);
619 
620 	if (m0->m_pkthdr.len > map->_dm_size)
621 		return EINVAL;
622 
623 	/* _bus_dmamap_load_paddr() clears this if we're not... */
624 	map->_dm_flags |= _BUS_DMAMAP_COHERENT;
625 
626 	error = 0;
627 	for (m = m0; m != NULL && error == 0; m = m->m_next) {
628 		int offset;
629 		int remainbytes;
630 		const struct vm_page * const *pgs;
631 		paddr_t paddr;
632 		int size;
633 
634 		if (m->m_len == 0)
635 			continue;
636 		/*
637 		 * Don't allow reads in read-only mbufs.
638 		 */
639 		if (M_ROMAP(m) && (flags & BUS_DMA_READ)) {
640 			error = EFAULT;
641 			break;
642 		}
643 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER|M_EXT_PAGES)) {
644 		case M_EXT|M_EXT_CLUSTER:
645 			/* XXX KDASSERT */
646 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
647 			paddr = m->m_ext.ext_paddr +
648 			    (m->m_data - m->m_ext.ext_buf);
649 			size = m->m_len;
650 			error = _bus_dmamap_load_paddr(t, map, paddr, size,
651 			    false);
652 			break;
653 
654 		case M_EXT|M_EXT_PAGES:
655 			KASSERT(m->m_ext.ext_buf <= m->m_data);
656 			KASSERT(m->m_data <=
657 			    m->m_ext.ext_buf + m->m_ext.ext_size);
658 
659 			offset = (vaddr_t)m->m_data -
660 			    trunc_page((vaddr_t)m->m_ext.ext_buf);
661 			remainbytes = m->m_len;
662 
663 			/* skip uninteresting pages */
664 			pgs = (const struct vm_page * const *)
665 			    m->m_ext.ext_pgs + (offset >> PAGE_SHIFT);
666 
667 			offset &= PAGE_MASK;	/* offset in the first page */
668 
669 			/* load each page */
670 			while (remainbytes > 0) {
671 				const struct vm_page *pg;
672 
673 				size = MIN(remainbytes, PAGE_SIZE - offset);
674 
675 				pg = *pgs++;
676 				KASSERT(pg);
677 				paddr = VM_PAGE_TO_PHYS(pg) + offset;
678 
679 				error = _bus_dmamap_load_paddr(t, map,
680 				    paddr, size, false);
681 				if (error)
682 					break;
683 				offset = 0;
684 				remainbytes -= size;
685 			}
686 			break;
687 
688 		case 0:
689 			paddr = m->m_paddr + M_BUFOFFSET(m) +
690 			    (m->m_data - M_BUFADDR(m));
691 			size = m->m_len;
692 			error = _bus_dmamap_load_paddr(t, map, paddr, size,
693 			    false);
694 			break;
695 
696 		default:
697 			error = _bus_dmamap_load_buffer(t, map, m->m_data,
698 			    m->m_len, vmspace_kernel(), flags);
699 		}
700 	}
701 	if (error == 0) {
702 		map->dm_mapsize = m0->m_pkthdr.len;
703 		map->_dm_origbuf = m0;
704 		map->_dm_buftype = _BUS_DMA_BUFTYPE_MBUF;
705 		map->_dm_vmspace = vmspace_kernel();	/* always kernel */
706 		if (map->_dm_flags & _BUS_DMAMAP_COHERENT) {
707 			STAT_INCR(coherent_loads);
708 		} else {
709 			STAT_INCR(loads);
710 		}
711 		return 0;
712 	}
713 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
714 	struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
715 	if (cookie != NULL && (cookie->id_flags & _BUS_DMA_MIGHT_NEED_BOUNCE)) {
716 		error = _bus_dma_load_bouncebuf(t, map, m0, m0->m_pkthdr.len,
717 		    _BUS_DMA_BUFTYPE_MBUF, flags);
718 		STAT_INCR(bounced_mbuf_loads);
719 	}
720 #endif
721 	return error;
722 }
723 
724 /*
725  * Like _bus_dmamap_load(), but for uios.
726  */
727 int
728 _bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio,
729     int flags)
730 {
731 	bus_size_t minlen, resid;
732 	struct iovec *iov;
733 	void *addr;
734 	int i, error;
735 
736 	/*
737 	 * Make sure that on error condition we return "no valid mappings."
738 	 */
739 	map->dm_mapsize = 0;
740 	map->dm_nsegs = 0;
741 	KASSERTMSG(map->dm_maxsegsz <= map->_dm_maxmaxsegsz,
742 	    "dm_maxsegsz %lu _dm_maxmaxsegsz %lu",
743 	    map->dm_maxsegsz, map->_dm_maxmaxsegsz);
744 
745 	resid = uio->uio_resid;
746 	iov = uio->uio_iov;
747 
748 	/* _bus_dmamap_load_buffer() clears this if we're not... */
749 	map->_dm_flags |= _BUS_DMAMAP_COHERENT;
750 
751 	error = 0;
752 	for (i = 0; i < uio->uio_iovcnt && resid != 0 && error == 0; i++) {
753 		/*
754 		 * Now at the first iovec to load.  Load each iovec
755 		 * until we have exhausted the residual count.
756 		 */
757 		minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len;
758 		addr = (void *)iov[i].iov_base;
759 
760 		error = _bus_dmamap_load_buffer(t, map, addr, minlen,
761 		    uio->uio_vmspace, flags);
762 
763 		resid -= minlen;
764 	}
765 	if (error == 0) {
766 		map->dm_mapsize = uio->uio_resid;
767 		map->_dm_origbuf = uio;
768 		map->_dm_buftype = _BUS_DMA_BUFTYPE_UIO;
769 		map->_dm_vmspace = uio->uio_vmspace;
770 		if (map->_dm_flags & _BUS_DMAMAP_COHERENT) {
771 			STAT_INCR(coherent_loads);
772 		} else {
773 			STAT_INCR(loads);
774 		}
775 	}
776 	return error;
777 }
778 
779 /*
780  * Like _bus_dmamap_load(), but for raw memory allocated with
781  * bus_dmamem_alloc().
782  */
783 int
784 _bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
785     bus_dma_segment_t *segs, int nsegs, bus_size_t size0, int flags)
786 {
787 
788 	bus_size_t size;
789 	int i, error = 0;
790 
791 	/*
792 	 * Make sure that on error conditions we return "no valid mappings."
793 	 */
794 	map->dm_mapsize = 0;
795 	map->dm_nsegs = 0;
796 	KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);
797 
798 	if (size0 > map->_dm_size)
799 		return EINVAL;
800 
801 	for (i = 0, size = size0; i < nsegs && size > 0; i++) {
802 		bus_dma_segment_t *ds = &segs[i];
803 		bus_size_t sgsize;
804 
805 		sgsize = MIN(ds->ds_len, size);
806 		if (sgsize == 0)
807 			continue;
808 		const bool coherent =
809 		    (ds->_ds_flags & _BUS_DMAMAP_COHERENT) != 0;
810 		error = _bus_dmamap_load_paddr(t, map, ds->ds_addr,
811 		    sgsize, coherent);
812 		if (error != 0)
813 			break;
814 		size -= sgsize;
815 	}
816 
817 	if (error != 0) {
818 		map->dm_mapsize = 0;
819 		map->dm_nsegs = 0;
820 		return error;
821 	}
822 
823 	/* XXX TBD bounce */
824 
825 	map->dm_mapsize = size0;
826 	map->_dm_origbuf = NULL;
827 	map->_dm_buftype = _BUS_DMA_BUFTYPE_RAW;
828 	map->_dm_vmspace = NULL;
829 	return 0;
830 }
831 
832 /*
833  * Common function for unloading a DMA map.  May be called by
834  * bus-specific DMA map unload functions.
835  */
836 void
837 _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
838 {
839 
840 #ifdef DEBUG_DMA
841 	printf("dmamap_unload: t=%p map=%p\n", t, map);
842 #endif	/* DEBUG_DMA */
843 
844 	/*
845 	 * No resources to free; just mark the mappings as
846 	 * invalid.
847 	 */
848 	map->dm_mapsize = 0;
849 	map->dm_nsegs = 0;
850 	map->_dm_origbuf = NULL;
851 	map->_dm_buftype = _BUS_DMA_BUFTYPE_INVALID;
852 	map->_dm_vmspace = NULL;
853 }
854 
855 static void
856 _bus_dmamap_sync_segment(vaddr_t va, paddr_t pa, vsize_t len, int ops,
857     bool readonly_p)
858 {
859 
860 #if defined(ARM_MMU_EXTENDED)
861 	/*
862 	 * No optimisations are available for readonly mbufs on armv6+, so
863 	 * assume it's not readonly from here on.
864 	 *
865  	 * See the comment in _bus_dmamap_sync_mbuf
866 	 */
867 	readonly_p = false;
868 #endif
869 
870 	KASSERTMSG((va & PAGE_MASK) == (pa & PAGE_MASK),
871 	    "va %#lx pa %#lx", va, pa);
872 #if 0
873 	printf("sync_segment: va=%#lx pa=%#lx len=%#lx ops=%#x ro=%d\n",
874 	    va, pa, len, ops, readonly_p);
875 #endif
876 
877 	switch (ops) {
878 	case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
879 		if (!readonly_p) {
880 			STAT_INCR(sync_prereadwrite);
881 			cpu_dcache_wbinv_range(va, len);
882 			cpu_sdcache_wbinv_range(va, pa, len);
883 			break;
884 		}
885 		/* FALLTHROUGH */
886 
887 	case BUS_DMASYNC_PREREAD: {
888 		const size_t line_size = arm_dcache_align;
889 		const size_t line_mask = arm_dcache_align_mask;
890 		vsize_t misalignment = va & line_mask;
891 		if (misalignment) {
892 			va -= misalignment;
893 			pa -= misalignment;
894 			len += misalignment;
895 			STAT_INCR(sync_preread_begin);
896 			cpu_dcache_wbinv_range(va, line_size);
897 			cpu_sdcache_wbinv_range(va, pa, line_size);
898 			if (len <= line_size)
899 				break;
900 			va += line_size;
901 			pa += line_size;
902 			len -= line_size;
903 		}
904 		misalignment = len & line_mask;
905 		len -= misalignment;
906 		if (len > 0) {
907 			STAT_INCR(sync_preread);
908 			cpu_dcache_inv_range(va, len);
909 			cpu_sdcache_inv_range(va, pa, len);
910 		}
911 		if (misalignment) {
912 			va += len;
913 			pa += len;
914 			STAT_INCR(sync_preread_tail);
915 			cpu_dcache_wbinv_range(va, line_size);
916 			cpu_sdcache_wbinv_range(va, pa, line_size);
917 		}
918 		break;
919 	}
920 
921 	case BUS_DMASYNC_PREWRITE:
922 		STAT_INCR(sync_prewrite);
923 		cpu_dcache_wb_range(va, len);
924 		cpu_sdcache_wb_range(va, pa, len);
925 		break;
926 
927 #if defined(CPU_CORTEX) || defined(CPU_ARMV8)
928 
929 	/*
930 	 * Cortex CPUs can do speculative loads so we need to clean the cache
931 	 * after a DMA read to deal with any speculatively loaded cache lines.
932 	 * Since these can't be dirty, we can just invalidate them and don't
933 	 * have to worry about having to write back their contents.
934 	 */
935 	case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
936 		STAT_INCR(sync_postreadwrite);
937 		cpu_dcache_inv_range(va, len);
938 		cpu_sdcache_inv_range(va, pa, len);
939 		break;
940 
941 	case BUS_DMASYNC_POSTREAD:
942 		STAT_INCR(sync_postread);
943 		cpu_dcache_inv_range(va, len);
944 		cpu_sdcache_inv_range(va, pa, len);
945 		break;
946 #endif
947 	}
948 }
949 
950 static inline void
951 _bus_dmamap_sync_linear(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
952     bus_size_t len, int ops)
953 {
954 	bus_dma_segment_t *ds = map->dm_segs;
955 	vaddr_t va = (vaddr_t) map->_dm_origbuf;
956 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
957 	if (map->_dm_flags & _BUS_DMAMAP_IS_BOUNCING) {
958 		struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
959 		va = (vaddr_t) cookie->id_bouncebuf;
960 	}
961 #endif
962 
963 	while (len > 0) {
964 		while (offset >= ds->ds_len) {
965 			offset -= ds->ds_len;
966 			va += ds->ds_len;
967 			ds++;
968 		}
969 
970 		paddr_t pa = _bus_dma_busaddr_to_paddr(t, ds->ds_addr + offset);
971 		size_t seglen = uimin(len, ds->ds_len - offset);
972 
973 		if ((ds->_ds_flags & _BUS_DMAMAP_COHERENT) == 0)
974 			_bus_dmamap_sync_segment(va + offset, pa, seglen, ops,
975 			    false);
976 
977 		offset += seglen;
978 		len -= seglen;
979 	}
980 }
981 
982 static inline void
983 _bus_dmamap_sync_mbuf(bus_dma_tag_t t, bus_dmamap_t map, bus_size_t offset,
984     bus_size_t len, int ops)
985 {
986 	bus_dma_segment_t *ds = map->dm_segs;
987 	struct mbuf *m = map->_dm_origbuf;
988 	bus_size_t voff = offset;
989 	bus_size_t ds_off = offset;
990 
991 	while (len > 0) {
992 		/* Find the current dma segment */
993 		while (ds_off >= ds->ds_len) {
994 			ds_off -= ds->ds_len;
995 			ds++;
996 		}
997 		/* Find the current mbuf. */
998 		while (voff >= m->m_len) {
999 			voff -= m->m_len;
1000 			m = m->m_next;
1001 		}
1002 
1003 		/*
1004 		 * Now at the first mbuf to sync; nail each one until
1005 		 * we have exhausted the length.
1006 		 */
1007 		vsize_t seglen = uimin(len, uimin(m->m_len - voff, ds->ds_len - ds_off));
1008 		vaddr_t va = mtod(m, vaddr_t) + voff;
1009 		paddr_t pa = _bus_dma_busaddr_to_paddr(t, ds->ds_addr + ds_off);
1010 
1011 		/*
1012 		 * We can save a lot of work here if we know the mapping
1013 		 * is read-only at the MMU and we aren't using the armv6+
1014 		 * MMU:
1015 		 *
1016 		 * If a mapping is read-only, no dirty cache blocks will
1017 		 * exist for it.  If a writable mapping was made read-only,
1018 		 * we know any dirty cache lines for the range will have
1019 		 * been cleaned for us already.  Therefore, if the upper
1020 		 * layer can tell us we have a read-only mapping, we can
1021 		 * skip all cache cleaning.
1022 		 *
1023 		 * NOTE: This only works if we know the pmap cleans pages
1024 		 * before making a read-write -> read-only transition.  If
1025 		 * this ever becomes non-true (e.g. Physically Indexed
1026 		 * cache), this will have to be revisited.
1027 		 */
1028 
1029 		if ((ds->_ds_flags & _BUS_DMAMAP_COHERENT) == 0) {
1030 			/*
1031 			 * If we are doing preread (DMAing into the mbuf),
1032 			 * this mbuf better not be readonly,
1033 			 */
1034 			KASSERT(!(ops & BUS_DMASYNC_PREREAD) || !M_ROMAP(m));
1035 			_bus_dmamap_sync_segment(va, pa, seglen, ops,
1036 			    M_ROMAP(m));
1037 		}
1038 		voff += seglen;
1039 		ds_off += seglen;
1040 		len -= seglen;
1041 	}
1042 }
1043 
1044 static inline void
1045 _bus_dmamap_sync_uio(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
1046     bus_size_t len, int ops)
1047 {
1048 	bus_dma_segment_t *ds = map->dm_segs;
1049 	struct uio *uio = map->_dm_origbuf;
1050 	struct iovec *iov = uio->uio_iov;
1051 	bus_size_t voff = offset;
1052 	bus_size_t ds_off = offset;
1053 
1054 	while (len > 0) {
1055 		/* Find the current dma segment */
1056 		while (ds_off >= ds->ds_len) {
1057 			ds_off -= ds->ds_len;
1058 			ds++;
1059 		}
1060 
1061 		/* Find the current iovec. */
1062 		while (voff >= iov->iov_len) {
1063 			voff -= iov->iov_len;
1064 			iov++;
1065 		}
1066 
1067 		/*
1068 		 * Now at the first iovec to sync; nail each one until
1069 		 * we have exhausted the length.
1070 		 */
1071 		vsize_t seglen = uimin(len, uimin(iov->iov_len - voff, ds->ds_len - ds_off));
1072 		vaddr_t va = (vaddr_t) iov->iov_base + voff;
1073 		paddr_t pa = _bus_dma_busaddr_to_paddr(t, ds->ds_addr + ds_off);
1074 
1075 		if ((ds->_ds_flags & _BUS_DMAMAP_COHERENT) == 0)
1076 			_bus_dmamap_sync_segment(va, pa, seglen, ops, false);
1077 
1078 		voff += seglen;
1079 		ds_off += seglen;
1080 		len -= seglen;
1081 	}
1082 }
1083 
1084 /*
1085  * Common function for DMA map synchronization.  May be called
1086  * by bus-specific DMA map synchronization functions.
1087  *
1088  * XXX Should have separate versions for write-through vs.
1089  * XXX write-back caches.  We currently assume write-back
1090  * XXX here, which is not as efficient as it could be for
1091  * XXX the write-through case.
1092  */
1093 void
1094 _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
1095     bus_size_t len, int ops)
1096 {
1097 #ifdef DEBUG_DMA
1098 	printf("dmamap_sync: t=%p map=%p offset=%#lx len=%#lx ops=%#x\n",
1099 	    t, map, offset, len, ops);
1100 #endif	/* DEBUG_DMA */
1101 
1102 	/*
1103 	 * Mixing of PRE and POST operations is not allowed.
1104 	 */
1105 	if ((ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) != 0 &&
1106 	    (ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) != 0)
1107 		panic("%s: mix PRE and POST", __func__);
1108 
1109 	KASSERTMSG(offset < map->dm_mapsize,
1110 	    "offset %lu mapsize %lu",
1111 	    offset, map->dm_mapsize);
1112 	KASSERTMSG(len > 0 && offset + len <= map->dm_mapsize,
1113 	    "len %lu offset %lu mapsize %lu",
1114 	    len, offset, map->dm_mapsize);
1115 
1116 	/*
1117 	 * For a virtually-indexed write-back cache, we need
1118 	 * to do the following things:
1119 	 *
1120 	 *	PREREAD -- Invalidate the D-cache.  We do this
1121 	 *	here in case a write-back is required by the back-end.
1122 	 *
1123 	 *	PREWRITE -- Write-back the D-cache.  Note that if
1124 	 *	we are doing a PREREAD|PREWRITE, we can collapse
1125 	 *	the whole thing into a single Wb-Inv.
1126 	 *
1127 	 *	POSTREAD -- Re-invalidate the D-cache in case speculative
1128 	 *	memory accesses caused cachelines to become valid with now
1129 	 *	invalid data.
1130 	 *
1131 	 *	POSTWRITE -- Nothing.
1132 	 */
1133 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
1134 	const bool bouncing = (map->_dm_flags & _BUS_DMAMAP_IS_BOUNCING);
1135 #else
1136 	const bool bouncing = false;
1137 #endif
1138 
1139 	const int pre_ops = ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
1140 #if defined(CPU_CORTEX) || defined(CPU_ARMV8)
1141 	const int post_ops = ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
1142 #else
1143 	const int post_ops = 0;
1144 #endif
1145 	if (pre_ops == 0 && post_ops == 0)
1146 		return;
1147 
1148 	if (post_ops == BUS_DMASYNC_POSTWRITE) {
1149 		KASSERT(pre_ops == 0);
1150 		if ((map->_dm_flags & _BUS_DMAMAP_COHERENT)) {
1151 			STAT_INCR(sync_coherent_postwrite);
1152 		} else {
1153 			STAT_INCR(sync_postwrite);
1154 		}
1155 		return;
1156 	}
1157 
1158 	KASSERTMSG(bouncing || pre_ops != 0 || (post_ops & BUS_DMASYNC_POSTREAD),
1159 	    "pre_ops %#x post_ops %#x", pre_ops, post_ops);
1160 
1161 	if (bouncing && (ops & BUS_DMASYNC_PREWRITE)) {
1162 		struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
1163 		STAT_INCR(write_bounces);
1164 		char * const dataptr = (char *)cookie->id_bouncebuf + offset;
1165 		/*
1166 		 * Copy the caller's buffer to the bounce buffer.
1167 		 */
1168 		switch (map->_dm_buftype) {
1169 		case _BUS_DMA_BUFTYPE_LINEAR:
1170 			memcpy(dataptr, cookie->id_origlinearbuf + offset, len);
1171 			break;
1172 
1173 		case _BUS_DMA_BUFTYPE_MBUF:
1174 			m_copydata(cookie->id_origmbuf, offset, len, dataptr);
1175 			break;
1176 
1177 		case _BUS_DMA_BUFTYPE_UIO:
1178 			_bus_dma_uiomove(dataptr, cookie->id_origuio, len,
1179 			    UIO_WRITE);
1180 			break;
1181 
1182 #ifdef DIAGNOSTIC
1183 		case _BUS_DMA_BUFTYPE_RAW:
1184 			panic("%s:(pre): _BUS_DMA_BUFTYPE_RAW", __func__);
1185 			break;
1186 
1187 		case _BUS_DMA_BUFTYPE_INVALID:
1188 			panic("%s(pre): _BUS_DMA_BUFTYPE_INVALID", __func__);
1189 			break;
1190 
1191 		default:
1192 			panic("%s(pre): map %p: unknown buffer type %d\n",
1193 			    __func__, map, map->_dm_buftype);
1194 			break;
1195 #endif /* DIAGNOSTIC */
1196 		}
1197 	}
1198 
1199 	/* Skip cache frobbing if mapping was COHERENT */
1200 	if ((map->_dm_flags & _BUS_DMAMAP_COHERENT)) {
1201 		switch (ops) {
1202 		case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
1203 			STAT_INCR(sync_coherent_prereadwrite);
1204 			break;
1205 
1206 		case BUS_DMASYNC_PREREAD:
1207 			STAT_INCR(sync_coherent_preread);
1208 			break;
1209 
1210 		case BUS_DMASYNC_PREWRITE:
1211 			STAT_INCR(sync_coherent_prewrite);
1212 			break;
1213 
1214 		case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
1215 			STAT_INCR(sync_coherent_postreadwrite);
1216 			break;
1217 
1218 		case BUS_DMASYNC_POSTREAD:
1219 			STAT_INCR(sync_coherent_postread);
1220 			break;
1221 
1222 		/* BUS_DMASYNC_POSTWRITE was aleady handled as a fastpath */
1223 		}
1224 		/*
1225 		 * Drain the write buffer of DMA operators.
1226 		 * 1) when cpu->device (prewrite)
1227 		 * 2) when device->cpu (postread)
1228 		 */
1229 		if ((pre_ops & BUS_DMASYNC_PREWRITE) || (post_ops & BUS_DMASYNC_POSTREAD))
1230 			cpu_drain_writebuf();
1231 
1232 		/*
1233 		 * Only thing left to do for COHERENT mapping is copy from bounce
1234 		 * in the POSTREAD case.
1235 		 */
1236 		if (bouncing && (post_ops & BUS_DMASYNC_POSTREAD))
1237 			goto bounce_it;
1238 
1239 		return;
1240 	}
1241 
1242 #if !defined(ARM_MMU_EXTENDED)
1243 	/*
1244 	 * If the mapping belongs to a non-kernel vmspace, and the
1245 	 * vmspace has not been active since the last time a full
1246 	 * cache flush was performed, we don't need to do anything.
1247 	 */
1248 	if (__predict_false(!VMSPACE_IS_KERNEL_P(map->_dm_vmspace) &&
1249 	    vm_map_pmap(&map->_dm_vmspace->vm_map)->pm_cstate.cs_cache_d == 0))
1250 		return;
1251 #endif
1252 
1253 	int buftype = map->_dm_buftype;
1254 	if (bouncing) {
1255 		buftype = _BUS_DMA_BUFTYPE_LINEAR;
1256 	}
1257 
1258 	switch (buftype) {
1259 	case _BUS_DMA_BUFTYPE_LINEAR:
1260 	case _BUS_DMA_BUFTYPE_RAW:
1261 		_bus_dmamap_sync_linear(t, map, offset, len, ops);
1262 		break;
1263 
1264 	case _BUS_DMA_BUFTYPE_MBUF:
1265 		_bus_dmamap_sync_mbuf(t, map, offset, len, ops);
1266 		break;
1267 
1268 	case _BUS_DMA_BUFTYPE_UIO:
1269 		_bus_dmamap_sync_uio(t, map, offset, len, ops);
1270 		break;
1271 
1272 	case _BUS_DMA_BUFTYPE_INVALID:
1273 		panic("%s: _BUS_DMA_BUFTYPE_INVALID", __func__);
1274 		break;
1275 
1276 	default:
1277 		panic("%s: map %p: unknown buffer type %d\n", __func__, map,
1278 		    map->_dm_buftype);
1279 	}
1280 
1281 	/* Drain the write buffer. */
1282 	cpu_drain_writebuf();
1283 
1284 	if (!bouncing || (ops & BUS_DMASYNC_POSTREAD) == 0)
1285 		return;
1286 
1287   bounce_it:
1288 	STAT_INCR(read_bounces);
1289 
1290 	struct arm32_bus_dma_cookie * const cookie = map->_dm_cookie;
1291 	char * const dataptr = (char *)cookie->id_bouncebuf + offset;
1292 	/*
1293 	 * Copy the bounce buffer to the caller's buffer.
1294 	 */
1295 	switch (map->_dm_buftype) {
1296 	case _BUS_DMA_BUFTYPE_LINEAR:
1297 		memcpy(cookie->id_origlinearbuf + offset, dataptr, len);
1298 		break;
1299 
1300 	case _BUS_DMA_BUFTYPE_MBUF:
1301 		m_copyback(cookie->id_origmbuf, offset, len, dataptr);
1302 		break;
1303 
1304 	case _BUS_DMA_BUFTYPE_UIO:
1305 		_bus_dma_uiomove(dataptr, cookie->id_origuio, len, UIO_READ);
1306 		break;
1307 
1308 #ifdef DIAGNOSTIC
1309 	case _BUS_DMA_BUFTYPE_RAW:
1310 		panic("%s(post): _BUS_DMA_BUFTYPE_RAW", __func__);
1311 		break;
1312 
1313 	case _BUS_DMA_BUFTYPE_INVALID:
1314 		panic("%s(post): _BUS_DMA_BUFTYPE_INVALID", __func__);
1315 		break;
1316 
1317 	default:
1318 		panic("%s(post): map %p: unknown buffer type %d\n", __func__,
1319 		    map, map->_dm_buftype);
1320 		break;
1321 #endif
1322 	}
1323 }
1324 
1325 /*
1326  * Common function for DMA-safe memory allocation.  May be called
1327  * by bus-specific DMA memory allocation functions.
1328  */
1329 
1330 int
1331 _bus_dmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
1332     bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
1333     int flags)
1334 {
1335 	struct arm32_dma_range *dr;
1336 	int error, i;
1337 
1338 #ifdef DEBUG_DMA
1339 	printf("dmamem_alloc t=%p size=%#lx align=%#lx boundary=%#lx "
1340 	    "segs=%p nsegs=%#x rsegs=%p flags=%#x\n", t, size, alignment,
1341 	    boundary, segs, nsegs, rsegs, flags);
1342 #endif
1343 
1344 	if ((dr = t->_ranges) != NULL) {
1345 		error = ENOMEM;
1346 		for (i = 0; i < t->_nranges; i++, dr++) {
1347 			if (dr->dr_len == 0
1348 			    || (dr->dr_flags & _BUS_DMAMAP_NOALLOC))
1349 				continue;
1350 			error = _bus_dmamem_alloc_range(t, size, alignment,
1351 			    boundary, segs, nsegs, rsegs, flags,
1352 			    trunc_page(dr->dr_sysbase),
1353 			    trunc_page(dr->dr_sysbase + dr->dr_len));
1354 			if (error == 0)
1355 				break;
1356 		}
1357 	} else {
1358 		error = _bus_dmamem_alloc_range(t, size, alignment, boundary,
1359 		    segs, nsegs, rsegs, flags, 0UL, ~0UL);
1360 	}
1361 
1362 #ifdef DEBUG_DMA
1363 	printf("dmamem_alloc: =%d\n", error);
1364 #endif
1365 
1366 	return error;
1367 }
1368 
1369 /*
1370  * Common function for freeing DMA-safe memory.  May be called by
1371  * bus-specific DMA memory free functions.
1372  */
1373 void
1374 _bus_dmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs)
1375 {
1376 	struct vm_page *m;
1377 	bus_addr_t addr;
1378 	struct pglist mlist;
1379 	int curseg;
1380 
1381 #ifdef DEBUG_DMA
1382 	printf("dmamem_free: t=%p segs=%p nsegs=%#x\n", t, segs, nsegs);
1383 #endif	/* DEBUG_DMA */
1384 
1385 	/*
1386 	 * Build a list of pages to free back to the VM system.
1387 	 */
1388 	TAILQ_INIT(&mlist);
1389 	for (curseg = 0; curseg < nsegs; curseg++) {
1390 		for (addr = segs[curseg].ds_addr;
1391 		    addr < (segs[curseg].ds_addr + segs[curseg].ds_len);
1392 		    addr += PAGE_SIZE) {
1393 			m = PHYS_TO_VM_PAGE(addr);
1394 			TAILQ_INSERT_TAIL(&mlist, m, pageq.queue);
1395 		}
1396 	}
1397 	uvm_pglistfree(&mlist);
1398 }
1399 
1400 /*
1401  * Common function for mapping DMA-safe memory.  May be called by
1402  * bus-specific DMA memory map functions.
1403  */
1404 int
1405 _bus_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
1406     size_t size, void **kvap, int flags)
1407 {
1408 	vaddr_t va;
1409 	paddr_t pa;
1410 	int curseg;
1411 	const uvm_flag_t kmflags = UVM_KMF_VAONLY
1412 	    | ((flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0);
1413 	vsize_t align = 0;
1414 
1415 #ifdef DEBUG_DMA
1416 	printf("dmamem_map: t=%p segs=%p nsegs=%#x size=%#lx flags=%#x\n", t,
1417 	    segs, nsegs, (unsigned long)size, flags);
1418 #endif	/* DEBUG_DMA */
1419 
1420 #ifdef PMAP_MAP_POOLPAGE
1421 	/*
1422 	 * If all of memory is mapped, and we are mapping a single physically
1423 	 * contiguous area then this area is already mapped.  Let's see if we
1424 	 * avoid having a separate mapping for it.
1425 	 */
1426 	if (nsegs == 1 && (flags & BUS_DMA_PREFETCHABLE) == 0) {
1427 		/*
1428 		 * If this is a non-COHERENT mapping, then the existing kernel
1429 		 * mapping is already compatible with it.
1430 		 */
1431 		bool direct_mapable = (flags & BUS_DMA_COHERENT) == 0;
1432 		pa = segs[0].ds_addr;
1433 
1434 		/*
1435 		 * This is a COHERENT mapping which, unless this address is in
1436 		 * a COHERENT dma range, will not be compatible.
1437 		 */
1438 		if (t->_ranges != NULL) {
1439 			const struct arm32_dma_range * const dr =
1440 			    _bus_dma_paddr_inrange(t->_ranges, t->_nranges, pa);
1441 			if (dr != NULL
1442 			    && (dr->dr_flags & _BUS_DMAMAP_COHERENT)) {
1443 				direct_mapable = true;
1444 			}
1445 		}
1446 
1447 #ifdef PMAP_NEED_ALLOC_POOLPAGE
1448 		/*
1449 		 * The page can only be direct mapped if was allocated out
1450 		 * of the arm poolpage vm freelist.
1451 		 */
1452 		uvm_physseg_t upm = uvm_physseg_find(atop(pa), NULL);
1453 		KASSERT(uvm_physseg_valid_p(upm));
1454 		if (direct_mapable) {
1455 			direct_mapable =
1456 			    (arm_poolpage_vmfreelist == uvm_physseg_get_free_list(upm));
1457 		}
1458 #endif
1459 
1460 		if (direct_mapable) {
1461 			*kvap = (void *)PMAP_MAP_POOLPAGE(pa);
1462 #ifdef DEBUG_DMA
1463 			printf("dmamem_map: =%p\n", *kvap);
1464 #endif	/* DEBUG_DMA */
1465 			return 0;
1466 		}
1467 	}
1468 #endif
1469 
1470 	size = round_page(size);
1471 
1472 #ifdef PMAP_MAPSIZE1
1473 	if (size >= PMAP_MAPSIZE1)
1474 		align = PMAP_MAPSIZE1;
1475 
1476 #ifdef PMAP_MAPSIZE2
1477 
1478 #if PMAP_MAPSIZE1 > PMAP_MAPSIZE2
1479 #error PMAP_MAPSIZE1 must be smaller than PMAP_MAPSIZE2
1480 #endif
1481 
1482 	if (size >= PMAP_MAPSIZE2)
1483 		align = PMAP_MAPSIZE2;
1484 
1485 #ifdef PMAP_MAPSIZE3
1486 
1487 #if PMAP_MAPSIZE2 > PMAP_MAPSIZE3
1488 #error PMAP_MAPSIZE2 must be smaller than PMAP_MAPSIZE3
1489 #endif
1490 
1491 	if (size >= PMAP_MAPSIZE3)
1492 		align = PMAP_MAPSIZE3;
1493 #endif
1494 #endif
1495 #endif
1496 
1497 	va = uvm_km_alloc(kernel_map, size, align, kmflags);
1498 	if (__predict_false(va == 0 && align > 0)) {
1499 		align = 0;
1500 		va = uvm_km_alloc(kernel_map, size, 0, kmflags);
1501 	}
1502 
1503 	if (va == 0)
1504 		return ENOMEM;
1505 
1506 	*kvap = (void *)va;
1507 
1508 	for (curseg = 0; curseg < nsegs; curseg++) {
1509 		for (pa = segs[curseg].ds_addr;
1510 		    pa < (segs[curseg].ds_addr + segs[curseg].ds_len);
1511 		    pa += PAGE_SIZE, va += PAGE_SIZE, size -= PAGE_SIZE) {
1512 			bool uncached = (flags & BUS_DMA_COHERENT);
1513 			bool prefetchable = (flags & BUS_DMA_PREFETCHABLE);
1514 #ifdef DEBUG_DMA
1515 			printf("wiring P%#lx to V%#lx\n", pa, va);
1516 #endif	/* DEBUG_DMA */
1517 			if (size == 0)
1518 				panic("_bus_dmamem_map: size botch");
1519 
1520 			const struct arm32_dma_range * const dr =
1521 			    _bus_dma_paddr_inrange(t->_ranges, t->_nranges, pa);
1522 			/*
1523 			 * If this dma region is coherent then there is
1524 			 * no need for an uncached mapping.
1525 			 */
1526 			if (dr != NULL
1527 			    && (dr->dr_flags & _BUS_DMAMAP_COHERENT)) {
1528 				uncached = false;
1529 			}
1530 
1531 			u_int pmap_flags = PMAP_WIRED;
1532 			if (prefetchable)
1533 				pmap_flags |= PMAP_WRITE_COMBINE;
1534 			else if (uncached)
1535 				pmap_flags |= PMAP_NOCACHE;
1536 
1537 			pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE,
1538 			    pmap_flags);
1539 		}
1540 	}
1541 	pmap_update(pmap_kernel());
1542 #ifdef DEBUG_DMA
1543 	printf("dmamem_map: =%p\n", *kvap);
1544 #endif	/* DEBUG_DMA */
1545 	return 0;
1546 }
1547 
1548 /*
1549  * Common function for unmapping DMA-safe memory.  May be called by
1550  * bus-specific DMA memory unmapping functions.
1551  */
1552 void
1553 _bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t size)
1554 {
1555 
1556 #ifdef DEBUG_DMA
1557 	printf("dmamem_unmap: t=%p kva=%p size=%#zx\n", t, kva, size);
1558 #endif	/* DEBUG_DMA */
1559 	KASSERTMSG(((uintptr_t)kva & PAGE_MASK) == 0,
1560 	    "kva %p (%#"PRIxPTR")", kva, ((uintptr_t)kva & PAGE_MASK));
1561 
1562 #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
1563 	/*
1564 	 * Check to see if this used direct mapped memory.  Get its physical
1565 	 * address and try to map it.  If the resultant matches the kva, then
1566 	 * it was and so we can just return since we have nothing to free up.
1567 	 */
1568 	paddr_t pa;
1569 	vaddr_t va;
1570 	(void)pmap_extract(pmap_kernel(), (vaddr_t)kva, &pa);
1571 	if (mm_md_direct_mapped_phys(pa, &va) && va == (vaddr_t)kva)
1572 		return;
1573 #endif
1574 
1575 	size = round_page(size);
1576 	pmap_kremove((vaddr_t)kva, size);
1577 	pmap_update(pmap_kernel());
1578 	uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
1579 }
1580 
1581 /*
1582  * Common function for mmap(2)'ing DMA-safe memory.  May be called by
1583  * bus-specific DMA mmap(2)'ing functions.
1584  */
1585 paddr_t
1586 _bus_dmamem_mmap(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
1587     off_t off, int prot, int flags)
1588 {
1589 	paddr_t map_flags;
1590 	int i;
1591 
1592 	for (i = 0; i < nsegs; i++) {
1593 		KASSERTMSG((off & PAGE_MASK) == 0,
1594 		    "off %#jx (%#x)", (uintmax_t)off, (int)off & PAGE_MASK);
1595 		KASSERTMSG((segs[i].ds_addr & PAGE_MASK) == 0,
1596 		    "ds_addr %#lx (%#x)", segs[i].ds_addr,
1597 		    (int)segs[i].ds_addr & PAGE_MASK);
1598 		KASSERTMSG((segs[i].ds_len & PAGE_MASK) == 0,
1599 		    "ds_len %#lx (%#x)", segs[i].ds_addr,
1600 		    (int)segs[i].ds_addr & PAGE_MASK);
1601 		if (off >= segs[i].ds_len) {
1602 			off -= segs[i].ds_len;
1603 			continue;
1604 		}
1605 
1606 		map_flags = 0;
1607 		if (flags & BUS_DMA_PREFETCHABLE)
1608 			map_flags |= ARM_MMAP_WRITECOMBINE;
1609 
1610 		return arm_btop((u_long)segs[i].ds_addr + off) | map_flags;
1611 
1612 	}
1613 
1614 	/* Page not found. */
1615 	return -1;
1616 }
1617 
1618 /**********************************************************************
1619  * DMA utility functions
1620  **********************************************************************/
1621 
1622 /*
1623  * Utility function to load a linear buffer.  lastaddrp holds state
1624  * between invocations (for multiple-buffer loads).  segp contains
1625  * the starting segment on entrance, and the ending segment on exit.
1626  * first indicates if this is the first invocation of this function.
1627  */
1628 int
1629 _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1630     bus_size_t buflen, struct vmspace *vm, int flags)
1631 {
1632 	bus_size_t sgsize;
1633 	bus_addr_t curaddr;
1634 	vaddr_t vaddr = (vaddr_t)buf;
1635 	int error;
1636 	pmap_t pmap;
1637 
1638 #ifdef DEBUG_DMA
1639 	printf("_bus_dmamap_load_buffer(buf=%p, len=%#lx, flags=%#x)\n",
1640 	    buf, buflen, flags);
1641 #endif	/* DEBUG_DMA */
1642 
1643 	pmap = vm_map_pmap(&vm->vm_map);
1644 
1645 	while (buflen > 0) {
1646 		/*
1647 		 * Get the physical address for this segment.
1648 		 *
1649 		 */
1650 		bool coherent;
1651 		bool ok __diagused;
1652 		ok = pmap_extract_coherency(pmap, vaddr, &curaddr, &coherent);
1653 
1654 		/*
1655 		 * trying to bus_dmamap_load an unmapped buffer is a
1656 		 * programming error.
1657 		 */
1658 		KASSERT(ok);
1659 
1660 		KASSERTMSG((vaddr & PAGE_MASK) == (curaddr & PAGE_MASK),
1661 		    "va %#lx curaddr %#lx", vaddr, curaddr);
1662 
1663 		/*
1664 		 * Compute the segment size, and adjust counts.
1665 		 */
1666 		sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
1667 		if (buflen < sgsize)
1668 			sgsize = buflen;
1669 
1670 		error = _bus_dmamap_load_paddr(t, map, curaddr, sgsize,
1671 		    coherent);
1672 		if (error)
1673 			return error;
1674 
1675 		vaddr += sgsize;
1676 		buflen -= sgsize;
1677 	}
1678 
1679 	return 0;
1680 }
1681 
1682 /*
1683  * Allocate physical memory from the given physical address range.
1684  * Called by DMA-safe memory allocation methods.
1685  */
1686 int
1687 _bus_dmamem_alloc_range(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
1688     bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
1689     int flags, paddr_t low, paddr_t high)
1690 {
1691 	paddr_t curaddr, lastaddr;
1692 	struct vm_page *m;
1693 	struct pglist mlist;
1694 	int curseg, error;
1695 
1696 	KASSERTMSG(boundary == 0 || (boundary & (boundary - 1)) == 0,
1697 	    "invalid boundary %#lx", boundary);
1698 
1699 #ifdef DEBUG_DMA
1700 	printf("alloc_range: t=%p size=%#lx align=%#lx boundary=%#lx segs=%p nsegs=%#x rsegs=%p flags=%#x lo=%#lx hi=%#lx\n",
1701 	    t, size, alignment, boundary, segs, nsegs, rsegs, flags, low, high);
1702 #endif	/* DEBUG_DMA */
1703 
1704 	/* Always round the size. */
1705 	size = round_page(size);
1706 
1707 	/*
1708 	 * We accept boundaries < size, splitting in multiple segments
1709 	 * if needed. uvm_pglistalloc does not, so compute an appropriate
1710 	 * boundary: next power of 2 >= size
1711 	 */
1712 	bus_size_t uboundary = boundary;
1713 	if (uboundary <= PAGE_SIZE) {
1714 		uboundary = 0;
1715 	} else {
1716 		while (uboundary < size) {
1717 			uboundary <<= 1;
1718 		}
1719 	}
1720 
1721 	/*
1722 	 * Allocate pages from the VM system.
1723 	 */
1724 	error = uvm_pglistalloc(size, low, high, alignment, uboundary,
1725 	    &mlist, nsegs, (flags & BUS_DMA_NOWAIT) == 0);
1726 	if (error)
1727 		return error;
1728 
1729 	/*
1730 	 * Compute the location, size, and number of segments actually
1731 	 * returned by the VM code.
1732 	 */
1733 	m = TAILQ_FIRST(&mlist);
1734 	curseg = 0;
1735 	lastaddr = segs[curseg].ds_addr = segs[curseg]._ds_paddr =
1736 	    VM_PAGE_TO_PHYS(m);
1737 	segs[curseg].ds_len = PAGE_SIZE;
1738 #ifdef DEBUG_DMA
1739 		printf("alloc: page %#lx\n", lastaddr);
1740 #endif	/* DEBUG_DMA */
1741 	m = TAILQ_NEXT(m, pageq.queue);
1742 
1743 	for (; m != NULL; m = TAILQ_NEXT(m, pageq.queue)) {
1744 		curaddr = VM_PAGE_TO_PHYS(m);
1745 		KASSERTMSG(low <= curaddr && curaddr < high,
1746 		    "uvm_pglistalloc returned non-sensicaladdress %#lx "
1747 		    "(low=%#lx, high=%#lx\n", curaddr, low, high);
1748 #ifdef DEBUG_DMA
1749 		printf("alloc: page %#lx\n", curaddr);
1750 #endif	/* DEBUG_DMA */
1751 		if (curaddr == lastaddr + PAGE_SIZE
1752 		    && (lastaddr & boundary) == (curaddr & boundary))
1753 			segs[curseg].ds_len += PAGE_SIZE;
1754 		else {
1755 			curseg++;
1756 			if (curseg >= nsegs) {
1757 				uvm_pglistfree(&mlist);
1758 				return EFBIG;
1759 			}
1760 			segs[curseg].ds_addr = curaddr;
1761 			segs[curseg]._ds_paddr = curaddr;
1762 			segs[curseg].ds_len = PAGE_SIZE;
1763 		}
1764 		lastaddr = curaddr;
1765 	}
1766 
1767 	*rsegs = curseg + 1;
1768 
1769 	return 0;
1770 }
1771 
1772 /*
1773  * Check if a memory region intersects with a DMA range, and return the
1774  * page-rounded intersection if it does.
1775  */
1776 int
1777 arm32_dma_range_intersect(struct arm32_dma_range *ranges, int nranges,
1778     paddr_t pa, psize_t size, paddr_t *pap, psize_t *sizep)
1779 {
1780 	struct arm32_dma_range *dr;
1781 	int i;
1782 
1783 	if (ranges == NULL)
1784 		return 0;
1785 
1786 	for (i = 0, dr = ranges; i < nranges; i++, dr++) {
1787 		if (dr->dr_sysbase <= pa &&
1788 		    pa < (dr->dr_sysbase + dr->dr_len)) {
1789 			/*
1790 			 * Beginning of region intersects with this range.
1791 			 */
1792 			*pap = trunc_page(pa);
1793 			*sizep = round_page(uimin(pa + size,
1794 			    dr->dr_sysbase + dr->dr_len) - pa);
1795 			return 1;
1796 		}
1797 		if (pa < dr->dr_sysbase && dr->dr_sysbase < (pa + size)) {
1798 			/*
1799 			 * End of region intersects with this range.
1800 			 */
1801 			*pap = trunc_page(dr->dr_sysbase);
1802 			*sizep = round_page(uimin((pa + size) - dr->dr_sysbase,
1803 			    dr->dr_len));
1804 			return 1;
1805 		}
1806 	}
1807 
1808 	/* No intersection found. */
1809 	return 0;
1810 }
1811 
1812 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
1813 static int
1814 _bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map,
1815     bus_size_t size, int flags)
1816 {
1817 	struct arm32_bus_dma_cookie *cookie = map->_dm_cookie;
1818 	int error = 0;
1819 
1820 	KASSERT(cookie != NULL);
1821 
1822 	cookie->id_bouncebuflen = round_page(size);
1823 	error = _bus_dmamem_alloc(t, cookie->id_bouncebuflen,
1824 	    PAGE_SIZE, map->_dm_boundary, cookie->id_bouncesegs,
1825 	    map->_dm_segcnt, &cookie->id_nbouncesegs, flags);
1826 	if (error == 0) {
1827 		error = _bus_dmamem_map(t, cookie->id_bouncesegs,
1828 		    cookie->id_nbouncesegs, cookie->id_bouncebuflen,
1829 		    (void **)&cookie->id_bouncebuf, flags);
1830 		if (error) {
1831 			_bus_dmamem_free(t, cookie->id_bouncesegs,
1832 			    cookie->id_nbouncesegs);
1833 			cookie->id_bouncebuflen = 0;
1834 			cookie->id_nbouncesegs = 0;
1835 		} else {
1836 			cookie->id_flags |= _BUS_DMA_HAS_BOUNCE;
1837 		}
1838 	} else {
1839 		cookie->id_bouncebuflen = 0;
1840 		cookie->id_nbouncesegs = 0;
1841 	}
1842 
1843 	return error;
1844 }
1845 
1846 static void
1847 _bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map)
1848 {
1849 	struct arm32_bus_dma_cookie *cookie = map->_dm_cookie;
1850 
1851 	KASSERT(cookie != NULL);
1852 
1853 	_bus_dmamem_unmap(t, cookie->id_bouncebuf, cookie->id_bouncebuflen);
1854 	_bus_dmamem_free(t, cookie->id_bouncesegs, cookie->id_nbouncesegs);
1855 	cookie->id_bouncebuflen = 0;
1856 	cookie->id_nbouncesegs = 0;
1857 	cookie->id_flags &= ~_BUS_DMA_HAS_BOUNCE;
1858 }
1859 #endif /* _ARM32_NEED_BUS_DMA_BOUNCE */
1860 
1861 /*
1862  * This function does the same as uiomove, but takes an explicit
1863  * direction, and does not update the uio structure.
1864  */
1865 static int
1866 _bus_dma_uiomove(void *buf, struct uio *uio, size_t n, int direction)
1867 {
1868 	struct iovec *iov;
1869 	int error;
1870 	struct vmspace *vm;
1871 	char *cp;
1872 	size_t resid, cnt;
1873 	int i;
1874 
1875 	iov = uio->uio_iov;
1876 	vm = uio->uio_vmspace;
1877 	cp = buf;
1878 	resid = n;
1879 
1880 	for (i = 0; i < uio->uio_iovcnt && resid > 0; i++) {
1881 		iov = &uio->uio_iov[i];
1882 		if (iov->iov_len == 0)
1883 			continue;
1884 		cnt = MIN(resid, iov->iov_len);
1885 
1886 		if (!VMSPACE_IS_KERNEL_P(vm)) {
1887 			preempt_point();
1888 		}
1889 		if (direction == UIO_READ) {
1890 			error = copyout_vmspace(vm, cp, iov->iov_base, cnt);
1891 		} else {
1892 			error = copyin_vmspace(vm, iov->iov_base, cp, cnt);
1893 		}
1894 		if (error)
1895 			return error;
1896 		cp += cnt;
1897 		resid -= cnt;
1898 	}
1899 	return 0;
1900 }
1901 
1902 int
1903 _bus_dmatag_subregion(bus_dma_tag_t tag, bus_addr_t min_addr,
1904     bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags)
1905 {
1906 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
1907 	if (min_addr >= max_addr)
1908 		return EOPNOTSUPP;
1909 
1910 	struct arm32_dma_range *dr;
1911 	bool psubset = true;
1912 	size_t nranges = 0;
1913 	size_t i;
1914 	for (i = 0, dr = tag->_ranges; i < tag->_nranges; i++, dr++) {
1915 		/*
1916 		 * If the new {min,max}_addr are narrower than any of the
1917 		 * ranges in the parent tag then we need a new tag;
1918 		 * otherwise the parent tag is a subset of the new
1919 		 * range and can continue to be used.
1920 		 */
1921 		if (min_addr > dr->dr_sysbase
1922 		    || max_addr < dr->dr_sysbase + dr->dr_len - 1) {
1923 			psubset = false;
1924 		}
1925 		if (min_addr <= dr->dr_sysbase + dr->dr_len
1926 		    && max_addr >= dr->dr_sysbase) {
1927 			nranges++;
1928 		}
1929 	}
1930 	if (nranges == 0) {
1931 		nranges = 1;
1932 		psubset = false;
1933 	}
1934 	if (psubset) {
1935 		*newtag = tag;
1936 		/* if the tag must be freed, add a reference */
1937 		if (tag->_tag_needs_free)
1938 			(tag->_tag_needs_free)++;
1939 		return 0;
1940 	}
1941 
1942 	const size_t tagsize = sizeof(*tag) + nranges * sizeof(*dr);
1943 	if ((*newtag = kmem_intr_zalloc(tagsize,
1944 	    (flags & BUS_DMA_NOWAIT) ? KM_NOSLEEP : KM_SLEEP)) == NULL)
1945 		return ENOMEM;
1946 
1947 	dr = (void *)(*newtag + 1);
1948 	**newtag = *tag;
1949 	(*newtag)->_tag_needs_free = 1;
1950 	(*newtag)->_ranges = dr;
1951 	(*newtag)->_nranges = nranges;
1952 
1953 	if (tag->_ranges == NULL) {
1954 		dr->dr_sysbase = min_addr;
1955 		dr->dr_busbase = min_addr;
1956 		dr->dr_len = max_addr + 1 - min_addr;
1957 	} else {
1958 		struct arm32_dma_range *pdr;
1959 
1960 		for (i = 0, pdr = tag->_ranges; i < tag->_nranges; i++, pdr++) {
1961 			KASSERT(nranges != 0);
1962 
1963 			if (min_addr > pdr->dr_sysbase + pdr->dr_len
1964 			    || max_addr < pdr->dr_sysbase) {
1965 				/*
1966 				 * this range doesn't overlap with new limits,
1967 				 * so skip.
1968 				 */
1969 				continue;
1970 			}
1971 			/*
1972 			 * Copy the range and adjust to fit within the new
1973 			 * limits
1974 			 */
1975 			dr[0] = pdr[0];
1976 			if (dr->dr_sysbase < min_addr) {
1977 				psize_t diff = min_addr - dr->dr_sysbase;
1978 				dr->dr_busbase += diff;
1979 				dr->dr_len -= diff;
1980 				dr->dr_sysbase += diff;
1981 			}
1982 			if (max_addr <= dr->dr_sysbase + dr->dr_len - 1) {
1983 				dr->dr_len = max_addr + 1 - dr->dr_sysbase;
1984 			}
1985 			dr++;
1986 			nranges--;
1987 		}
1988 	}
1989 
1990 	return 0;
1991 #else
1992 	return EOPNOTSUPP;
1993 #endif /* _ARM32_NEED_BUS_DMA_BOUNCE */
1994 }
1995 
1996 void
1997 _bus_dmatag_destroy(bus_dma_tag_t tag)
1998 {
1999 #ifdef _ARM32_NEED_BUS_DMA_BOUNCE
2000 	switch (tag->_tag_needs_free) {
2001 	case 0:
2002 		break;				/* not allocated with kmem */
2003 	case 1: {
2004 		const size_t tagsize = sizeof(*tag)
2005 		    + tag->_nranges * sizeof(*tag->_ranges);
2006 		kmem_intr_free(tag, tagsize);	/* last reference to tag */
2007 		break;
2008 	}
2009 	default:
2010 		(tag->_tag_needs_free)--;	/* one less reference */
2011 	}
2012 #endif
2013 }
2014