xref: /netbsd-src/sys/arch/arm/apple/apple_dart.c (revision b5511178647c2b729885643786d61219695e9788)
1 /* $NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $ */
2 /*	$OpenBSD: apldart.c,v 1.10 2022/02/27 17:36:52 kettenis Exp $	*/
3 
4 /*-
5  * Copyright (c) 2021 Mark Kettenis <kettenis@openbsd.org>
6  * Copyright (c) 2021 Jared McNeill <jmcneill@invisible.ca>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 //#define APPLE_DART_DEBUG
22 
23 #include <sys/cdefs.h>
24 __KERNEL_RCSID(0, "$NetBSD: apple_dart.c,v 1.5 2023/02/24 11:19:15 jmcneill Exp $");
25 
26 #include <sys/param.h>
27 #include <sys/bus.h>
28 #include <sys/device.h>
29 #include <sys/intr.h>
30 #include <sys/kernel.h>
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/vmem.h>
34 
35 #include <arm/cpufunc.h>
36 
37 #include <dev/fdt/fdtvar.h>
38 /*
39  * This driver largely ignores stream IDs and simply uses a single
40  * translation table for all the devices that it serves.  This is good
41  * enough for the PCIe host bridge that serves the on-board devices on
42  * the current generation Apple Silicon Macs as these only have a
43  * single PCIe device behind each DART.
44  */
45 
46 /*
47  * DART registers
48  */
49 #define	DART_PARAMS2		0x0004
50 #define	 DART_PARAMS2_BYPASS_SUPPORT	__BIT(0)
51 #define	DART_TLB_OP		0x0020
52 #define	 DART_TLB_OP_BUSY		__BIT(2)
53 #define	 DART_TLB_OP_FLUSH		__BIT(20)
54 #define	DART_TLB_OP_SIDMASK	0x0034
55 #define	DART_ERR_STATUS		0x0040
56 #define	 DART_ERR_FLAG		__BIT(31)
57 #define	 DART_ERR_STREAM_MASK	__BITS(27, 24)
58 #define	 DART_ERR_CODE_MASK	__BITS(11, 0)
59 #define	 DART_ERR_READ_FAULT	__BIT(4)
60 #define	 DART_ERR_WRITE_FAULT	__BIT(3)
61 #define	 DART_ERR_NOPTE		__BIT(2)
62 #define	 DART_ERR_NOPMD		__BIT(1)
63 #define	 DART_ERR_NOTTBR	__BIT(0)
64 #define	DART_ERR_ADDRL		0x0050
65 #define	DART_ERR_ADDRH		0x0054
66 #define	DART_CONFIG		0x0060
67 #define	 DART_CONFIG_LOCK		__BIT(15)
68 #define	DART_TCR(sid)		(0x0100 + (sid) * 0x4)
69 #define	 DART_TCR_TRANSLATE_ENABLE	__BIT(7)
70 #define	 DART_TCR_BYPASS_DART		__BIT(8)
71 #define	 DART_TCR_BYPASS_DAPF		__BIT(12)
72 #define	DART_TTBR(sid, idx)	(0x0200 + (sid) * 0x10 + (idx) * 0x4)
73 #define	 DART_TTBR_VALID		__BIT(31)
74 #define	 DART_TTBR_SHIFT		12
75 
76 #define	DART_NUM_STREAMS	16
77 #define	DART_ALL_STREAMS	((1 << DART_NUM_STREAMS) - 1)
78 
79 #define	DART_APERTURE_START	0x00100000
80 #define	DART_APERTURE_SIZE	0x3fe00000
81 #define	DART_PAGE_SIZE		16384
82 #define	DART_PAGE_MASK		(DART_PAGE_SIZE - 1)
83 
84 /*
85  * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory
86  * access.  To make sure this doesn't fault, round the subpage limits
87  * down and up accordingly.
88  */
89 #define	DART_OFFSET_MASK	7
90 
91 #define	DART_L1_TABLE		0x3
92 #define	DART_L2_INVAL		0x0
93 #define	DART_L2_VALID		__BIT(0)
94 #define	DART_L2_FULL_PAGE	__BIT(1)
95 
96 #define	DART_L2_START_MASK	__BITS(63, 52)
97 #define	DART_L2_END_MASK	__BITS(51, 40)
98 #define	DART_L2_SUBPAGE(addr)	__SHIFTOUT((addr), __BITS(13, 2))
99 #define	DART_L2_START(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_START_MASK)
100 #define	DART_L2_END(addr)	__SHIFTIN(DART_L2_SUBPAGE(addr), DART_L2_END_MASK)
101 
102 #define	DART_ROUND_PAGE(pa)	(((pa) + DART_PAGE_MASK) & ~DART_PAGE_MASK)
103 #define	DART_TRUNC_PAGE(pa)	((pa) & ~DART_PAGE_MASK)
104 #define	DART_ROUND_OFFSET(pa)	(((pa) + DART_OFFSET_MASK) & ~DART_OFFSET_MASK)
105 #define	DART_TRUNC_OFFSET(pa)	((pa) & ~DART_OFFSET_MASK)
106 
107 static const struct device_compatible_entry compat_data[] = {
108 	{ .compat = "apple,dart-m1",		.value = 16 },
109 	{ .compat = "apple,t8103-dart",		.value = 16 },
110 	DEVICE_COMPAT_EOL
111 };
112 
113 static struct arm32_dma_range apple_dart_dma_ranges[] = {
114 	[0] = {
115 		.dr_sysbase = 0,
116 		.dr_busbase = 0,
117 		.dr_len = UINTPTR_MAX,
118 		.dr_flags = _BUS_DMAMAP_COHERENT,
119 	}
120 };
121 
122 struct apple_dart_map_state {
123 	bus_addr_t ams_dva;
124 	bus_size_t ams_len;
125 };
126 
127 struct apple_dart_dma {
128 	bus_dmamap_t dma_map;
129 	bus_dma_segment_t dma_seg;
130 	bus_size_t dma_size;
131 	void *dma_kva;
132 };
133 
134 #define	DART_DMA_MAP(_dma)	((_dma)->dma_map)
135 #define	DART_DMA_LEN(_dma)	((_dma)->dma_size)
136 #define	DART_DMA_DVA(_dma)	((_dma)->dma_map->dm_segs[0].ds_addr)
137 #define	DART_DMA_KVA(_dma)	((_dma)->dma_kva)
138 
139 struct apple_dart_softc {
140 	device_t sc_dev;
141 	int sc_phandle;
142 	bus_space_tag_t sc_bst;
143 	bus_space_handle_t sc_bsh;
144 	bus_dma_tag_t sc_dmat;
145 
146 	uint64_t sc_sid_mask;
147 	u_int sc_nsid;
148 
149 	vmem_t *sc_dvamap;
150 
151 	struct apple_dart_dma *sc_l1;
152 	struct apple_dart_dma **sc_l2;
153 	u_int sc_nl2;
154 
155 	struct arm32_bus_dma_tag sc_bus_dmat;
156 };
157 
158 #define DART_READ(sc, reg) \
159 	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
160 #define	DART_WRITE(sc, reg, val) \
161 	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))
162 
163 static void
apple_dart_flush_tlb(struct apple_dart_softc * sc)164 apple_dart_flush_tlb(struct apple_dart_softc *sc)
165 {
166 	dsb(sy);
167 	isb();
168 
169 	DART_WRITE(sc, DART_TLB_OP_SIDMASK, sc->sc_sid_mask);
170 	DART_WRITE(sc, DART_TLB_OP, DART_TLB_OP_FLUSH);
171 	while ((DART_READ(sc, DART_TLB_OP) & DART_TLB_OP_BUSY) != 0) {
172 		__asm volatile ("yield" ::: "memory");
173 	}
174 }
175 
176 static struct apple_dart_dma *
apple_dart_dma_alloc(bus_dma_tag_t dmat,bus_size_t size,bus_size_t align)177 apple_dart_dma_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)
178 {
179 	struct apple_dart_dma *dma;
180 	int nsegs, error;
181 
182 	dma = kmem_zalloc(sizeof(*dma), KM_SLEEP);
183 	dma->dma_size = size;
184 
185 	error = bus_dmamem_alloc(dmat, size, align, 0, &dma->dma_seg, 1,
186 	    &nsegs, BUS_DMA_WAITOK);
187 	if (error != 0) {
188 		goto destroy;
189 	}
190 
191 	error = bus_dmamem_map(dmat, &dma->dma_seg, nsegs, size,
192 	    &dma->dma_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE);
193 	if (error != 0) {
194 		goto free;
195 	}
196 
197 	error = bus_dmamap_create(dmat, size, 1, size, 0,
198 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
199 	if (error != 0) {
200 		goto dmafree;
201 	}
202 
203 	error = bus_dmamap_load(dmat, dma->dma_map, dma->dma_kva, size,
204 	    NULL, BUS_DMA_WAITOK);
205 	if (error != 0) {
206 		goto unmap;
207 	}
208 
209 	memset(dma->dma_kva, 0, size);
210 
211 	return dma;
212 
213 destroy:
214 	bus_dmamap_destroy(dmat, dma->dma_map);
215 unmap:
216 	bus_dmamem_unmap(dmat, dma->dma_kva, size);
217 free:
218 	bus_dmamem_free(dmat, &dma->dma_seg, 1);
219 dmafree:
220 	kmem_free(dma, sizeof(*dma));
221 	return NULL;
222 }
223 
224 static int
apple_dart_intr(void * priv)225 apple_dart_intr(void *priv)
226 {
227 	struct apple_dart_softc * const sc = priv;
228 	char fdt_path[128];
229 	uint64_t addr;
230 	uint32_t status;
231 
232 	status = DART_READ(sc, DART_ERR_STATUS);
233 	addr  = __SHIFTIN(DART_READ(sc, DART_ERR_ADDRL), __BITS(31, 0));
234 	addr |= __SHIFTIN(DART_READ(sc, DART_ERR_ADDRH), __BITS(63, 32));
235 	DART_WRITE(sc, DART_ERR_STATUS, status);
236 
237 	if ((status & DART_ERR_FLAG) == 0)
238 		return 1;
239 
240 #ifdef APPLE_DART_DEBUG
241 	printf("%s: status %#"PRIx32"\n", __func__, status);
242 	printf("%s: addrl  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRL));
243 	printf("%s: addrh  %#"PRIx32"\n", __func__, DART_READ(sc, DART_ERR_ADDRH));
244 #endif
245 
246 	const char *reason = NULL;
247 	int32_t code = __SHIFTOUT(status, DART_ERR_CODE_MASK);
248 	switch (code) {
249 	case DART_ERR_NOTTBR:
250 	    reason = "no ttbr for address";
251 	    break;
252 	case DART_ERR_NOPMD:
253 	    reason = "no pmd for address";
254 	    break;
255 	case DART_ERR_NOPTE:
256 	    reason = "no pte for address";
257 	    break;
258 	case DART_ERR_WRITE_FAULT:
259 	    reason = "write fault";
260 	    break;
261 	case DART_ERR_READ_FAULT:
262 	    reason = "read fault";
263 	    break;
264 	}
265 	fdtbus_get_path(sc->sc_phandle, fdt_path, sizeof(fdt_path));
266 
267 	printf("%s (%s): error addr 0x%016lx status 0x%08x: %s\n",
268 	    device_xname(sc->sc_dev), fdt_path, addr, status, reason);
269 
270 	return 1;
271 }
272 
273 static volatile uint64_t *
apple_dart_lookup_tte(struct apple_dart_softc * sc,bus_addr_t dva)274 apple_dart_lookup_tte(struct apple_dart_softc *sc, bus_addr_t dva)
275 {
276 	int idx = dva / DART_PAGE_SIZE;
277 	int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t));
278 	int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t));
279 	volatile uint64_t *l2 = DART_DMA_KVA(sc->sc_l2[l2_idx]);
280 
281 	return &l2[tte_idx];
282 }
283 
284 static void
apple_dart_unload_map(struct apple_dart_softc * sc,bus_dmamap_t map)285 apple_dart_unload_map(struct apple_dart_softc *sc, bus_dmamap_t map)
286 {
287 	struct apple_dart_map_state *ams = map->_dm_iommu;
288 	volatile uint64_t *tte;
289 	int seg;
290 
291 	/* For each segment */
292 	for (seg = 0; seg < map->dm_nsegs; seg++) {
293 		u_long len, dva;
294 
295 		if (ams[seg].ams_len == 0) {
296 			continue;
297 		}
298 
299 		dva = ams[seg].ams_dva;
300 		len = ams[seg].ams_len;
301 
302 		while (len > 0) {
303 			tte = apple_dart_lookup_tte(sc, dva);
304 			*tte = DART_L2_INVAL;
305 
306 			dva += DART_PAGE_SIZE;
307 			len -= DART_PAGE_SIZE;
308 		}
309 
310 		vmem_xfree(sc->sc_dvamap, ams[seg].ams_dva, ams[seg].ams_len);
311 
312 		ams[seg].ams_dva = 0;
313 		ams[seg].ams_len = 0;
314 	}
315 
316 	apple_dart_flush_tlb(sc);
317 }
318 
319 static int
apple_dart_load_map(struct apple_dart_softc * sc,bus_dmamap_t map)320 apple_dart_load_map(struct apple_dart_softc *sc, bus_dmamap_t map)
321 {
322 	struct apple_dart_map_state *ams = map->_dm_iommu;
323 	volatile uint64_t *tte;
324 	int seg, error;
325 
326 	/* For each segment */
327 	for (seg = 0; seg < map->dm_nsegs; seg++) {
328 		paddr_t pa = map->dm_segs[seg]._ds_paddr;
329 		psize_t off = pa - DART_TRUNC_PAGE(pa);
330 		u_long len, dva;
331 
332 		len = DART_ROUND_PAGE(map->dm_segs[seg].ds_len + off);
333 
334 #ifdef APPLE_DART_DEBUG
335 		device_printf(sc->sc_dev, "load pa=%#lx off=%lu len=%lu ",
336 		    pa, off, len);
337 #endif
338 
339 		error = vmem_xalloc(sc->sc_dvamap, len, DART_PAGE_SIZE, 0,
340 		    0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, VM_BESTFIT|VM_NOSLEEP,
341 		    &dva);
342 		if (error != 0) {
343 			apple_dart_unload_map(sc, map);
344 #ifdef APPLE_DART_DEBUG
345 			printf("error=%d\n", error);
346 #endif
347 			return error;
348 		}
349 
350 #ifdef APPLE_DART_DEBUG
351 		printf("dva=%#lx\n", dva);
352 #endif
353 
354 		ams[seg].ams_dva = dva;
355 		ams[seg].ams_len = len;
356 
357 		map->dm_segs[seg].ds_addr = dva + off;
358 
359 		pa = DART_TRUNC_PAGE(pa);
360 		paddr_t start = DART_TRUNC_OFFSET(off);
361 		paddr_t end = DART_PAGE_MASK;
362 		while (len > 0) {
363 			tte = apple_dart_lookup_tte(sc, dva);
364 			if (len < DART_PAGE_SIZE)
365 				end = DART_ROUND_OFFSET(len) - 1;
366 
367 			*tte = pa | DART_L2_VALID |
368 			    DART_L2_START(start) | DART_L2_END(end);
369 #ifdef APPLE_DART_DEBUG
370 			printf("tte %p = %"PRIx64"\n", tte, *tte);
371 #endif
372 			pa += DART_PAGE_SIZE;
373 			dva += DART_PAGE_SIZE;
374 			len -= DART_PAGE_SIZE;
375 			start = 0;
376 		}
377 	}
378 
379 	apple_dart_flush_tlb(sc);
380 
381 	return 0;
382 }
383 
384 static int
apple_dart_dmamap_create(bus_dma_tag_t t,bus_size_t size,int nsegments,bus_size_t maxsegsz,bus_size_t boundary,int flags,bus_dmamap_t * dmamap)385 apple_dart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
386     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
387 {
388 	struct apple_dart_softc *sc = t->_cookie;
389 	struct apple_dart_map_state *ams;
390 	bus_dmamap_t map;
391 	int error;
392 
393 	error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments,
394 	    maxsegsz, boundary, flags, &map);
395 	if (error != 0) {
396 		return error;
397 	}
398 
399 	ams = kmem_zalloc(map->_dm_segcnt * sizeof(*ams),
400 	    (flags & BUS_DMA_NOWAIT) != 0 ? KM_NOSLEEP : KM_SLEEP);
401 	if (ams == NULL) {
402 		sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
403 		return ENOMEM;
404 	}
405 
406 	map->_dm_iommu = ams;
407 	*dmamap = map;
408 	return 0;
409 }
410 
411 static void
apple_dart_dmamap_destroy(bus_dma_tag_t t,bus_dmamap_t map)412 apple_dart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
413 {
414 	struct apple_dart_softc *sc = t->_cookie;
415 	struct apple_dart_map_state *ams = map->_dm_iommu;
416 
417 	kmem_free(ams, map->_dm_segcnt * sizeof(*ams));
418 	sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
419 }
420 
421 static int
apple_dart_dmamap_load(bus_dma_tag_t t,bus_dmamap_t map,void * buf,size_t buflen,struct proc * p,int flags)422 apple_dart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
423     size_t buflen, struct proc *p, int flags)
424 {
425 	struct apple_dart_softc *sc = t->_cookie;
426 	int error;
427 
428 	error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
429 	    buf, buflen, p, flags);
430 	if (error != 0) {
431 		return error;
432 	}
433 
434 	error = apple_dart_load_map(sc, map);
435 	if (error != 0) {
436 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
437 	}
438 
439 	return error;
440 }
441 
442 static int
apple_dart_dmamap_load_mbuf(bus_dma_tag_t t,bus_dmamap_t map,struct mbuf * m,int flags)443 apple_dart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map,
444     struct mbuf *m, int flags)
445 {
446 	struct apple_dart_softc *sc = t->_cookie;
447 	int error;
448 
449 	error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
450 	    m, flags);
451 	if (error != 0) {
452 		return error;
453 	}
454 
455 	error = apple_dart_load_map(sc, map);
456 	if (error != 0) {
457 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
458 	}
459 
460 	return error;
461 }
462 
463 static int
apple_dart_dmamap_load_uio(bus_dma_tag_t t,bus_dmamap_t map,struct uio * uio,int flags)464 apple_dart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map,
465     struct uio *uio, int flags)
466 {
467 	struct apple_dart_softc *sc = t->_cookie;
468 	int error;
469 
470 	error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
471 	    uio, flags);
472 	if (error != 0) {
473 		return error;
474 	}
475 
476 	error = apple_dart_load_map(sc, map);
477 	if (error != 0) {
478 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
479 	}
480 
481 	return error;
482 }
483 
484 static int
apple_dart_dmamap_load_raw(bus_dma_tag_t t,bus_dmamap_t map,bus_dma_segment_t * segs,int nsegs,bus_size_t size,int flags)485 apple_dart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
486     bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
487 {
488 	struct apple_dart_softc *sc = t->_cookie;
489 	int error;
490 
491 	error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
492 	    segs, nsegs, size, flags);
493 	if (error != 0) {
494 		return error;
495 	}
496 
497 	error = apple_dart_load_map(sc, map);
498 	if (error != 0) {
499 		sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
500 	}
501 
502 	return error;
503 }
504 
505 static void
apple_dart_dmamap_unload(bus_dma_tag_t t,bus_dmamap_t map)506 apple_dart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
507 {
508 	struct apple_dart_softc *sc = t->_cookie;
509 
510 	apple_dart_unload_map(sc, map);
511 	sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
512 }
513 
514 static bus_dma_tag_t
apple_dart_iommu_map(device_t dev,const u_int * data,bus_dma_tag_t dmat)515 apple_dart_iommu_map(device_t dev, const u_int *data, bus_dma_tag_t dmat)
516 {
517 	struct apple_dart_softc * const sc = device_private(dev);
518 
519 	return &sc->sc_bus_dmat;
520 }
521 
522 const struct fdtbus_iommu_func apple_dart_iommu_funcs = {
523 	.map = apple_dart_iommu_map,
524 };
525 
526 static int
apple_dart_match(device_t parent,cfdata_t cf,void * aux)527 apple_dart_match(device_t parent, cfdata_t cf, void *aux)
528 {
529 	struct fdt_attach_args * const faa = aux;
530 
531 	return of_compatible_match(faa->faa_phandle, compat_data);
532 }
533 
534 static void
apple_dart_attach(device_t parent,device_t self,void * aux)535 apple_dart_attach(device_t parent, device_t self, void *aux)
536 {
537 	struct apple_dart_softc * const sc = device_private(self);
538 	struct fdt_attach_args * const faa = aux;
539 	const int phandle = faa->faa_phandle;
540 	char intrstr[128];
541 	volatile uint64_t *l1;
542 	bus_addr_t addr;
543 	bus_size_t size;
544 	u_int sid, idx;
545 	paddr_t pa;
546 	void *ih;
547 
548 	if (fdtbus_get_reg(phandle, 0, &addr, &size) != 0) {
549 		aprint_error(": couldn't get registers\n");
550 		return;
551 	}
552 	if (!fdtbus_intr_str(phandle, 0, intrstr, sizeof(intrstr))) {
553 		aprint_error(": couldn't decode interrupt\n");
554 		return;
555 	}
556 
557 	sc->sc_dev = self;
558 	sc->sc_phandle = phandle;
559 	sc->sc_dmat = faa->faa_dmat;
560 	sc->sc_bst = faa->faa_bst;
561 	if (bus_space_map(sc->sc_bst, addr, size, 0, &sc->sc_bsh) != 0) {
562 		aprint_error(": couldn't map registers\n");
563 		return;
564 	}
565 
566 	/* Skip locked DARTs for now. */
567 	uint32_t config = DART_READ(sc, DART_CONFIG);
568 	if (config & DART_CONFIG_LOCK) {
569 		aprint_naive("\n");
570 		aprint_normal(": locked\n");
571 		return;
572 	}
573 
574 	/*
575 	 * Use bypass mode if supported.  This avoids an issue with
576 	 * the USB3 controllers which need mappings entered into two
577 	 * IOMMUs, which is somewhat difficult to implement with our
578 	 * current kernel interfaces.
579 	 */
580 	uint32_t params2 = DART_READ(sc, DART_PARAMS2);
581 	if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
582 		for (sid = 0; sid < DART_NUM_STREAMS; sid++) {
583 			DART_WRITE(sc, DART_TCR(sid),
584 			    DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF);
585 		}
586 		aprint_naive("\n");
587 		aprint_normal(": bypass\n");
588 		return;
589 	}
590 
591 	sc->sc_nsid = of_compatible_lookup(phandle, compat_data)->value;
592 	sc->sc_sid_mask = __MASK(sc->sc_nsid);
593 
594 	aprint_naive("\n");
595 	aprint_normal(": Apple DART @ %#lx/%#lx, %u SIDs (mask 0x%lx)\n",
596 	    addr, size, sc->sc_nsid, sc->sc_sid_mask);
597 
598 	KASSERT(sc->sc_nsid == 16);
599 	KASSERT(sc->sc_sid_mask == 0xffff);
600 
601 	sc->sc_dvamap = vmem_create(device_xname(self),
602 	    DART_APERTURE_START, DART_APERTURE_SIZE, DART_PAGE_SIZE,
603 	    NULL, NULL, NULL, 0, VM_SLEEP, IPL_HIGH);
604 	if (sc->sc_dvamap == NULL) {
605 		aprint_error_dev(self, "couldn't allocate DVA map\n");
606 		return;
607 	}
608 
609 	/* Disable translations */
610 	for (sid = 0; sid < sc->sc_nsid; sid++) {
611 		DART_WRITE(sc, DART_TCR(sid), 0);
612 	}
613 
614 	/* Remove page tables */
615 	for (sid = 0; sid < sc->sc_nsid; sid++) {
616 		for (idx = 0; idx < 4; idx++) {
617 			DART_WRITE(sc, DART_TTBR(sid, idx), 0);
618 		}
619 	}
620 	apple_dart_flush_tlb(sc);
621 
622 	/*
623 	 * Build translation tables. We pre-allocate the translation
624 	 * tables for the entire aperture such that we don't have to worry
625 	 * about growing them in an mpsafe manner later.
626 	 *
627 	 * Cover the entire address space [0, ..._START + ..._SIZE) even if vmem
628 	 * only allocates from [..._START, ..._START + ...+SIZE)
629 	 */
630 
631 	const u_int ntte = howmany(DART_APERTURE_START + DART_APERTURE_SIZE - 1,
632 				   DART_PAGE_SIZE);
633 	const u_int nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
634 	const u_int nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));
635 
636 	sc->sc_l1 = apple_dart_dma_alloc(sc->sc_dmat,
637 	    nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE);
638 	if (sc->sc_l1 == NULL) {
639 		aprint_error_dev(self, "couldn't allocate L1 tables\n");
640 		return;
641 	}
642 	sc->sc_l2 = kmem_zalloc(nl2 * sizeof(*sc->sc_l2), KM_SLEEP);
643 	sc->sc_nl2 = nl2;
644 
645 	l1 = DART_DMA_KVA(sc->sc_l1);
646 	for (idx = 0; idx < nl2; idx++) {
647 		sc->sc_l2[idx] = apple_dart_dma_alloc(sc->sc_dmat,
648 		    DART_PAGE_SIZE, DART_PAGE_SIZE);
649 		if (sc->sc_l2[idx] == NULL) {
650 			aprint_error_dev(self,
651 			    "couldn't allocate L2 tables\n");
652 			return;
653 		}
654 
655 		l1[idx] = DART_DMA_DVA(sc->sc_l2[idx]) | DART_L1_TABLE;
656 #ifdef APPLE_DART_DEBUG
657 		printf("l1[%d] (%p) = %"PRIx64"\n", idx, &l1[idx], l1[idx]);
658 #endif
659 	}
660 
661 	/* Install page tables */
662 	for (sid = 0; sid < sc->sc_nsid; sid++) {
663 		pa = DART_DMA_DVA(sc->sc_l1);
664 		for (idx = 0; idx < nl1; idx++) {
665 			KASSERTMSG(__SHIFTOUT(pa, __BITS(DART_TTBR_SHIFT - 1, 0)) == 0,
666 			    "TTBR pa is not correctly aligned %" PRIxPADDR, pa);
667 
668 			DART_WRITE(sc, DART_TTBR(sid, idx),
669 			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID);
670 			pa += DART_PAGE_SIZE;
671 #ifdef APPLE_DART_DEBUG
672 			printf("writing %"PRIx64" to %"PRIx32"\n",
673 			    (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID,
674 			    DART_TTBR(sid, idx));
675 #endif
676 		}
677 	}
678 	apple_dart_flush_tlb(sc);
679 
680 	/* Enable translations */
681 	for (sid = 0; sid < sc->sc_nsid; sid++) {
682 		DART_WRITE(sc, DART_TCR(sid), DART_TCR_TRANSLATE_ENABLE);
683 	}
684 
685 	ih = fdtbus_intr_establish_xname(phandle, 0, IPL_HIGH, FDT_INTR_MPSAFE,
686 	    apple_dart_intr, sc, device_xname(self));
687 	if (ih == NULL) {
688 		aprint_error_dev(self, "couldn't establish interrupt on %s\n",
689 		    intrstr);
690 		return;
691 	}
692 	aprint_normal_dev(self, "interrupting on %s\n", intrstr);
693 
694 	/* Setup bus DMA tag */
695 	sc->sc_bus_dmat = *sc->sc_dmat;
696 	sc->sc_bus_dmat._ranges = apple_dart_dma_ranges;
697 	sc->sc_bus_dmat._nranges = 1;
698 	sc->sc_bus_dmat._cookie = sc;
699 	sc->sc_bus_dmat._dmamap_create = apple_dart_dmamap_create;
700 	sc->sc_bus_dmat._dmamap_destroy = apple_dart_dmamap_destroy;
701 	sc->sc_bus_dmat._dmamap_load = apple_dart_dmamap_load;
702 	sc->sc_bus_dmat._dmamap_load_mbuf = apple_dart_dmamap_load_mbuf;
703 	sc->sc_bus_dmat._dmamap_load_uio = apple_dart_dmamap_load_uio;
704 	sc->sc_bus_dmat._dmamap_load_raw = apple_dart_dmamap_load_raw;
705 	sc->sc_bus_dmat._dmamap_unload = apple_dart_dmamap_unload;
706 
707 	fdtbus_register_iommu(self, phandle, &apple_dart_iommu_funcs);
708 }
709 
710 CFATTACH_DECL_NEW(apple_dart, sizeof(struct apple_dart_softc),
711 	apple_dart_match, apple_dart_attach, NULL, NULL);
712