1 /* $OpenBSD: apldart.c,v 1.21 2024/05/13 01:15:50 jsg Exp $ */
2 /*
3 * Copyright (c) 2021 Mark Kettenis <kettenis@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/param.h>
19 #include <sys/systm.h>
20 #include <sys/device.h>
21 #include <sys/extent.h>
22 #include <sys/malloc.h>
23 #include <sys/mutex.h>
24
25 #include <machine/intr.h>
26 #include <machine/bus.h>
27 #include <machine/fdt.h>
28
29 #include <uvm/uvm_extern.h>
30
31 #include <dev/ofw/openfirm.h>
32 #include <dev/ofw/ofw_misc.h>
33 #include <dev/ofw/ofw_power.h>
34 #include <dev/ofw/fdt.h>
35
36 /*
37 * This driver largely ignores stream IDs and simply uses a single
38 * translation table for all the devices that it serves. This is good
39 * enough for the PCIe host bridge that serves the on-board devices on
40 * the current generation Apple Silicon Macs as these only have a
41 * single PCIe device behind each DART.
42 */
43
44 #define DART_PARAMS2 0x0004
45 #define DART_PARAMS2_BYPASS_SUPPORT (1 << 0)
46
47 #define DART_T8020_TLB_CMD 0x0020
48 #define DART_T8020_TLB_CMD_FLUSH (1 << 20)
49 #define DART_T8020_TLB_CMD_BUSY (1 << 2)
50 #define DART_T8020_TLB_SIDMASK 0x0034
51 #define DART_T8020_ERROR 0x0040
52 #define DART_T8020_ERROR_ADDR_LO 0x0050
53 #define DART_T8020_ERROR_ADDR_HI 0x0054
54 #define DART_T8020_CONFIG 0x0060
55 #define DART_T8020_CONFIG_LOCK (1 << 15)
56 #define DART_T8020_SID_ENABLE 0x00fc
57 #define DART_T8020_TCR_BASE 0x0100
58 #define DART_T8020_TCR_TRANSLATE_ENABLE (1 << 7)
59 #define DART_T8020_TCR_BYPASS_DART (1 << 8)
60 #define DART_T8020_TCR_BYPASS_DAPF (1 << 12)
61 #define DART_T8020_TTBR_BASE 0x0200
62 #define DART_T8020_TTBR_VALID (1U << 31)
63
64 #define DART_T8110_PARAMS3 0x0008
65 #define DART_T8110_PARAMS3_REV_MIN(x) (((x) >> 0) & 0xff)
66 #define DART_T8110_PARAMS3_REV_MAJ(x) (((x) >> 8) & 0xff)
67 #define DART_T8110_PARAMS3_VA_WIDTH(x) (((x) >> 16) & 0x3f)
68 #define DART_T8110_PARAMS4 0x000c
69 #define DART_T8110_PARAMS4_NSID_MASK (0x1ff << 0)
70 #define DART_T8110_TLB_CMD 0x0080
71 #define DART_T8110_TLB_CMD_BUSY (1U << 31)
72 #define DART_T8110_TLB_CMD_FLUSH_ALL (0 << 8)
73 #define DART_T8110_TLB_CMD_FLUSH_SID (1 << 8)
74 #define DART_T8110_ERROR 0x0100
75 #define DART_T8110_ERROR_MASK 0x0104
76 #define DART_T8110_ERROR_ADDR_LO 0x0170
77 #define DART_T8110_ERROR_ADDR_HI 0x0174
78 #define DART_T8110_PROTECT 0x0200
79 #define DART_T8110_PROTECT_TTBR_TCR (1 << 0)
80 #define DART_T8110_SID_ENABLE_BASE 0x0c00
81 #define DART_T8110_TCR_BASE 0x1000
82 #define DART_T8110_TCR_BYPASS_DAPF (1 << 2)
83 #define DART_T8110_TCR_BYPASS_DART (1 << 1)
84 #define DART_T8110_TCR_TRANSLATE_ENABLE (1 << 0)
85 #define DART_T8110_TTBR_BASE 0x1400
86 #define DART_T8110_TTBR_VALID (1 << 0)
87
88 #define DART_PAGE_SIZE 16384
89 #define DART_PAGE_MASK (DART_PAGE_SIZE - 1)
90
91 #define DART_SID_ENABLE(sc, idx) \
92 ((sc)->sc_sid_enable_base + 4 * (idx))
93 #define DART_TCR(sc, sid) ((sc)->sc_tcr_base + 4 * (sid))
94 #define DART_TTBR(sc, sid, idx) \
95 ((sc)->sc_ttbr_base + 4 * (sc)->sc_nttbr * (sid) + 4 * (idx))
96 #define DART_TTBR_SHIFT 12
97
98 #define DART_ALL_STREAMS(sc) ((1U << (sc)->sc_nsid) - 1)
99
100 /*
101 * Some hardware (e.g. bge(4)) will always use (aligned) 64-bit memory
102 * access. To make sure this doesn't fault, round the subpage limits
103 * down and up accordingly.
104 */
105 #define DART_OFFSET_MASK 7
106
107 #define DART_L1_TABLE 0x3
108 #define DART_L2_INVAL 0
109 #define DART_L2_VALID (1 << 0)
110 #define DART_L2_FULL_PAGE (1 << 1)
111 #define DART_L2_START(addr) ((((addr) & DART_PAGE_MASK) >> 2) << 52)
112 #define DART_L2_END(addr) ((((addr) & DART_PAGE_MASK) >> 2) << 40)
113
114 static inline paddr_t
apldart_round_page(paddr_t pa)115 apldart_round_page(paddr_t pa)
116 {
117 return ((pa + DART_PAGE_MASK) & ~DART_PAGE_MASK);
118 }
119
120 static inline paddr_t
apldart_trunc_page(paddr_t pa)121 apldart_trunc_page(paddr_t pa)
122 {
123 return (pa & ~DART_PAGE_MASK);
124 }
125
126 static inline psize_t
apldart_round_offset(psize_t off)127 apldart_round_offset(psize_t off)
128 {
129 return ((off + DART_OFFSET_MASK) & ~DART_OFFSET_MASK);
130 }
131
132 static inline psize_t
apldart_trunc_offset(psize_t off)133 apldart_trunc_offset(psize_t off)
134 {
135 return (off & ~DART_OFFSET_MASK);
136 }
137
138 #define HREAD4(sc, reg) \
139 (bus_space_read_4((sc)->sc_iot, (sc)->sc_ioh, (reg)))
140 #define HWRITE4(sc, reg, val) \
141 bus_space_write_4((sc)->sc_iot, (sc)->sc_ioh, (reg), (val))
142
143 struct apldart_stream {
144 struct apldart_softc *as_sc;
145 int as_sid;
146
147 struct extent *as_dvamap;
148 struct mutex as_dvamap_mtx;
149 struct apldart_dmamem *as_l1;
150 struct apldart_dmamem **as_l2;
151
152 struct machine_bus_dma_tag as_dmat;
153 };
154
155 struct apldart_softc {
156 struct device sc_dev;
157 bus_space_tag_t sc_iot;
158 bus_space_handle_t sc_ioh;
159 bus_dma_tag_t sc_dmat;
160 int sc_node;
161
162 int sc_ias;
163 int sc_nsid;
164 int sc_nttbr;
165 int sc_shift;
166 bus_addr_t sc_sid_enable_base;
167 bus_addr_t sc_tcr_base;
168 uint32_t sc_tcr_translate_enable;
169 uint32_t sc_tcr_bypass;
170 bus_addr_t sc_ttbr_base;
171 uint32_t sc_ttbr_valid;
172 void (*sc_flush_tlb)(struct apldart_softc *, int);
173
174 bus_addr_t sc_dvabase;
175 bus_addr_t sc_dvaend;
176 bus_addr_t sc_dvamask;
177
178 struct apldart_stream **sc_as;
179 struct iommu_device sc_id;
180
181 int sc_locked;
182 int sc_translating;
183 int sc_do_suspend;
184 };
185
186 struct apldart_map_state {
187 struct extent_region ams_er;
188 bus_addr_t ams_dva;
189 bus_size_t ams_len;
190 };
191
192 struct apldart_dmamem {
193 bus_dmamap_t adm_map;
194 bus_dma_segment_t adm_seg;
195 size_t adm_size;
196 caddr_t adm_kva;
197 };
198
199 #define APLDART_DMA_MAP(_adm) ((_adm)->adm_map)
200 #define APLDART_DMA_LEN(_adm) ((_adm)->adm_size)
201 #define APLDART_DMA_DVA(_adm) ((_adm)->adm_map->dm_segs[0].ds_addr)
202 #define APLDART_DMA_KVA(_adm) ((void *)(_adm)->adm_kva)
203
204 struct apldart_dmamem *apldart_dmamem_alloc(bus_dma_tag_t, bus_size_t,
205 bus_size_t);
206 void apldart_dmamem_free(bus_dma_tag_t, struct apldart_dmamem *);
207
208 int apldart_match(struct device *, void *, void *);
209 void apldart_attach(struct device *, struct device *, void *);
210 int apldart_activate(struct device *, int);
211
212 const struct cfattach apldart_ca = {
213 sizeof (struct apldart_softc), apldart_match, apldart_attach, NULL,
214 apldart_activate
215 };
216
217 struct cfdriver apldart_cd = {
218 NULL, "apldart", DV_DULL
219 };
220
221 bus_dma_tag_t apldart_map(void *, uint32_t *, bus_dma_tag_t);
222 void apldart_reserve(void *, uint32_t *, bus_addr_t, bus_size_t);
223 int apldart_t8020_intr(void *);
224 int apldart_t8110_intr(void *);
225
226 void apldart_t8020_flush_tlb(struct apldart_softc *, int);
227 void apldart_t8110_flush_tlb(struct apldart_softc *, int);
228 int apldart_load_map(struct apldart_stream *, bus_dmamap_t, int);
229 void apldart_unload_map(struct apldart_stream *, bus_dmamap_t);
230
231 int apldart_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
232 bus_size_t boundary, int, bus_dmamap_t *);
233 void apldart_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
234 int apldart_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *,
235 bus_size_t, struct proc *, int);
236 int apldart_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t,
237 struct mbuf *, int);
238 int apldart_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t,
239 struct uio *, int);
240 int apldart_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
241 bus_dma_segment_t *, int, bus_size_t, int);
242 void apldart_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
243
244 int
apldart_match(struct device * parent,void * match,void * aux)245 apldart_match(struct device *parent, void *match, void *aux)
246 {
247 struct fdt_attach_args *faa = aux;
248
249 return OF_is_compatible(faa->fa_node, "apple,t6000-dart") ||
250 OF_is_compatible(faa->fa_node, "apple,t8103-dart") ||
251 OF_is_compatible(faa->fa_node, "apple,t8110-dart");
252 }
253
254 void
apldart_attach(struct device * parent,struct device * self,void * aux)255 apldart_attach(struct device *parent, struct device *self, void *aux)
256 {
257 struct apldart_softc *sc = (struct apldart_softc *)self;
258 struct fdt_attach_args *faa = aux;
259 uint64_t dva_range[2];
260 uint32_t config, maj, min, params2, params3, params4, tcr, ttbr;
261 int sid, idx;
262
263 if (faa->fa_nreg < 1) {
264 printf(": no registers\n");
265 return;
266 }
267
268 sc->sc_iot = faa->fa_iot;
269 if (bus_space_map(sc->sc_iot, faa->fa_reg[0].addr,
270 faa->fa_reg[0].size, 0, &sc->sc_ioh)) {
271 printf(": can't map registers\n");
272 return;
273 }
274
275 sc->sc_dmat = faa->fa_dmat;
276 sc->sc_node = faa->fa_node;
277
278 power_domain_enable(sc->sc_node);
279
280 if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) {
281 params3 = HREAD4(sc, DART_T8110_PARAMS3);
282 params4 = HREAD4(sc, DART_T8110_PARAMS4);
283 sc->sc_ias = DART_T8110_PARAMS3_VA_WIDTH(params3);
284 sc->sc_nsid = params4 & DART_T8110_PARAMS4_NSID_MASK;
285 sc->sc_nttbr = 1;
286 sc->sc_sid_enable_base = DART_T8110_SID_ENABLE_BASE;
287 sc->sc_tcr_base = DART_T8110_TCR_BASE;
288 sc->sc_tcr_translate_enable = DART_T8110_TCR_TRANSLATE_ENABLE;
289 sc->sc_tcr_bypass =
290 DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART;
291 sc->sc_ttbr_base = DART_T8110_TTBR_BASE;
292 sc->sc_ttbr_valid = DART_T8110_TTBR_VALID;
293 sc->sc_flush_tlb = apldart_t8110_flush_tlb;
294 maj = DART_T8110_PARAMS3_REV_MAJ(params3);
295 min = DART_T8110_PARAMS3_REV_MIN(params3);
296 } else {
297 sc->sc_ias = 32;
298 sc->sc_nsid = 16;
299 sc->sc_nttbr = 4;
300 sc->sc_sid_enable_base = DART_T8020_SID_ENABLE;
301 sc->sc_tcr_base = DART_T8020_TCR_BASE;
302 sc->sc_tcr_translate_enable = DART_T8020_TCR_TRANSLATE_ENABLE;
303 sc->sc_tcr_bypass =
304 DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART;
305 sc->sc_ttbr_base = DART_T8020_TTBR_BASE;
306 sc->sc_ttbr_valid = DART_T8020_TTBR_VALID;
307 sc->sc_flush_tlb = apldart_t8020_flush_tlb;
308 maj = min = 0;
309 }
310
311 if (OF_is_compatible(sc->sc_node, "apple,t6000-dart") ||
312 OF_is_compatible(sc->sc_node, "apple,t8110-dart"))
313 sc->sc_shift = 4;
314
315 /* Skip locked DARTs for now. */
316 if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) {
317 config = HREAD4(sc, DART_T8110_PROTECT);
318 if (config & DART_T8110_PROTECT_TTBR_TCR)
319 sc->sc_locked = 1;
320 } else {
321 config = HREAD4(sc, DART_T8020_CONFIG);
322 if (config & DART_T8020_CONFIG_LOCK)
323 sc->sc_locked = 1;
324 }
325
326 if (maj != 0 || min != 0)
327 printf(" rev %d.%d", maj, min);
328
329 printf(": %d bits", sc->sc_ias);
330
331 /*
332 * Anything over 36 bits requires 4-level page tables which we
333 * don't implement yet. So limit to 36 bits.
334 */
335 if (sc->sc_ias > 36)
336 sc->sc_ias = 36;
337 sc->sc_dvamask = (1ULL << sc->sc_ias) - 1;
338
339 /*
340 * Resetting the DART used for the display controller will
341 * kill the framebuffer. This should be the only DART that
342 * has translation enabled and a valid translation table
343 * installed. Skip this DART for now.
344 */
345 for (sid = 0; sid < sc->sc_nsid; sid++) {
346 tcr = HREAD4(sc, DART_TCR(sc, sid));
347 if ((tcr & sc->sc_tcr_translate_enable) == 0)
348 continue;
349
350 for (idx = 0; idx < sc->sc_nttbr; idx++) {
351 ttbr = HREAD4(sc, DART_TTBR(sc, sid, idx));
352 if (ttbr & sc->sc_ttbr_valid)
353 sc->sc_translating = 1;
354 }
355 }
356
357 /*
358 * If we have full control over this DART, do suspend it.
359 */
360 sc->sc_do_suspend = !sc->sc_locked && !sc->sc_translating;
361
362 /*
363 * Use bypass mode if supported. This avoids an issue with
364 * the USB3 controllers which need mappings entered into two
365 * IOMMUs, which is somewhat difficult to implement with our
366 * current kernel interfaces.
367 */
368 params2 = HREAD4(sc, DART_PARAMS2);
369 if ((params2 & DART_PARAMS2_BYPASS_SUPPORT) &&
370 !sc->sc_locked && !sc->sc_translating) {
371 for (sid = 0; sid < sc->sc_nsid; sid++)
372 HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_bypass);
373 printf(", bypass\n");
374 return;
375 }
376
377 if (sc->sc_locked)
378 printf(", locked\n");
379 else if (sc->sc_translating)
380 printf(", translating\n");
381 else
382 printf("\n");
383
384 if (OF_getpropint64array(sc->sc_node, "apple,dma-range",
385 dva_range, sizeof(dva_range)) == sizeof(dva_range)) {
386 sc->sc_dvabase = dva_range[0];
387 sc->sc_dvaend = dva_range[0] + dva_range[1] - 1;
388 } else {
389 /*
390 * Restrict ourselves to 32-bit addresses to cater for
391 * devices that don't do 64-bit DMA. Skip the first
392 * page to help catching bugs where a device is doing
393 * DMA to/from address zero because we didn't properly
394 * set up the DMA transfer. Skip the last page to
395 * avoid using the address reserved for MSIs.
396 */
397 sc->sc_dvabase = DART_PAGE_SIZE;
398 sc->sc_dvaend = 0xffffffff - DART_PAGE_SIZE;
399 }
400
401 if (!sc->sc_locked && !sc->sc_translating) {
402 /* Disable translations. */
403 for (sid = 0; sid < sc->sc_nsid; sid++)
404 HWRITE4(sc, DART_TCR(sc, sid), 0);
405
406 /* Remove page tables. */
407 for (sid = 0; sid < sc->sc_nsid; sid++) {
408 for (idx = 0; idx < sc->sc_nttbr; idx++)
409 HWRITE4(sc, DART_TTBR(sc, sid, idx), 0);
410 }
411 sc->sc_flush_tlb(sc, -1);
412 }
413
414 if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) {
415 HWRITE4(sc, DART_T8110_ERROR, HREAD4(sc, DART_T8110_ERROR));
416 HWRITE4(sc, DART_T8110_ERROR_MASK, 0);
417 fdt_intr_establish(faa->fa_node, IPL_NET, apldart_t8110_intr,
418 sc, sc->sc_dev.dv_xname);
419 } else {
420 HWRITE4(sc, DART_T8020_ERROR, HREAD4(sc, DART_T8020_ERROR));
421 fdt_intr_establish(faa->fa_node, IPL_NET, apldart_t8020_intr,
422 sc, sc->sc_dev.dv_xname);
423 }
424
425 sc->sc_as = mallocarray(sc->sc_nsid, sizeof(*sc->sc_as),
426 M_DEVBUF, M_WAITOK | M_ZERO);
427
428 sc->sc_id.id_node = faa->fa_node;
429 sc->sc_id.id_cookie = sc;
430 sc->sc_id.id_map = apldart_map;
431 sc->sc_id.id_reserve = apldart_reserve;
432 iommu_device_register(&sc->sc_id);
433 }
434
435 void
apldart_suspend(struct apldart_softc * sc)436 apldart_suspend(struct apldart_softc *sc)
437 {
438 if (!sc->sc_do_suspend)
439 return;
440
441 power_domain_disable(sc->sc_node);
442 }
443
444 void
apldart_resume(struct apldart_softc * sc)445 apldart_resume(struct apldart_softc *sc)
446 {
447 paddr_t pa;
448 int ntte, nl1, nl2;
449 uint32_t params2;
450 uint32_t mask;
451 int sid, idx;
452
453 if (!sc->sc_do_suspend)
454 return;
455
456 power_domain_enable(sc->sc_node);
457
458 params2 = HREAD4(sc, DART_PARAMS2);
459 if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
460 for (sid = 0; sid < sc->sc_nsid; sid++)
461 HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_bypass);
462 return;
463 }
464
465 ntte = howmany((sc->sc_dvaend & sc->sc_dvamask), DART_PAGE_SIZE);
466 nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
467 nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));
468
469 /* Install page tables. */
470 for (sid = 0; sid < sc->sc_nsid; sid++) {
471 if (sc->sc_as[sid] == NULL)
472 continue;
473 pa = APLDART_DMA_DVA(sc->sc_as[sid]->as_l1);
474 for (idx = 0; idx < nl1; idx++) {
475 HWRITE4(sc, DART_TTBR(sc, sid, idx),
476 (pa >> DART_TTBR_SHIFT) | sc->sc_ttbr_valid);
477 pa += DART_PAGE_SIZE;
478 }
479 }
480 sc->sc_flush_tlb(sc, -1);
481
482 /* Enable all active streams. */
483 for (sid = 0; sid < sc->sc_nsid; sid++) {
484 if (sc->sc_as[sid] == NULL)
485 continue;
486 mask = HREAD4(sc, DART_SID_ENABLE(sc, sid / 32));
487 mask |= (1U << (sid % 32));
488 HWRITE4(sc, DART_SID_ENABLE(sc, sid / 32), mask);
489 }
490
491 /* Enable translations. */
492 for (sid = 0; sid < sc->sc_nsid; sid++) {
493 if (sc->sc_as[sid] == NULL)
494 continue;
495 HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_translate_enable);
496 }
497
498 if (OF_is_compatible(sc->sc_node, "apple,t8110-dart")) {
499 HWRITE4(sc, DART_T8110_ERROR, HREAD4(sc, DART_T8110_ERROR));
500 HWRITE4(sc, DART_T8110_ERROR_MASK, 0);
501 } else {
502 HWRITE4(sc, DART_T8020_ERROR, HREAD4(sc, DART_T8020_ERROR));
503 }
504 }
505
506 int
apldart_activate(struct device * self,int act)507 apldart_activate(struct device *self, int act)
508 {
509 struct apldart_softc *sc = (struct apldart_softc *)self;
510
511 switch (act) {
512 case DVACT_SUSPEND:
513 apldart_suspend(sc);
514 break;
515 case DVACT_RESUME:
516 apldart_resume(sc);
517 break;
518 }
519
520 return 0;
521 }
522
523 void
apldart_init_locked_stream(struct apldart_stream * as)524 apldart_init_locked_stream(struct apldart_stream *as)
525 {
526 struct apldart_softc *sc = as->as_sc;
527 uint32_t ttbr;
528 vaddr_t startva, endva, va;
529 paddr_t pa;
530 bus_addr_t dva, dvaend, dvabase;
531 volatile uint64_t *l1;
532 int nl1, nl2, ntte;
533 int idx;
534
535 for (idx = 0; idx < sc->sc_nttbr; idx++) {
536 ttbr = HREAD4(sc, DART_TTBR(sc, as->as_sid, idx));
537 if ((ttbr & sc->sc_ttbr_valid) == 0)
538 break;
539 }
540 KASSERT(idx > 0);
541
542 nl2 = idx * (DART_PAGE_SIZE / sizeof(uint64_t));
543 ntte = nl2 * (DART_PAGE_SIZE / sizeof(uint64_t));
544
545 dvabase = sc->sc_dvabase & ~sc->sc_dvamask;
546 dvaend = dvabase + (bus_addr_t)ntte * DART_PAGE_SIZE;
547 if (dvaend < sc->sc_dvaend)
548 sc->sc_dvaend = dvaend;
549
550 as->as_dvamap = extent_create(sc->sc_dev.dv_xname, 0, ULONG_MAX,
551 M_DEVBUF, NULL, 0, EX_WAITOK | EX_NOCOALESCE);
552 if (sc->sc_dvabase > 0) {
553 extent_alloc_region(as->as_dvamap, 0, sc->sc_dvabase,
554 EX_WAITOK);
555 }
556 if (sc->sc_dvaend < ULONG_MAX) {
557 extent_alloc_region(as->as_dvamap, sc->sc_dvaend + 1,
558 ULONG_MAX - sc->sc_dvaend, EX_WAITOK);
559 }
560
561 ntte = howmany((sc->sc_dvaend & sc->sc_dvamask), DART_PAGE_SIZE);
562 nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
563 nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));
564
565 as->as_l2 = mallocarray(nl2, sizeof(*as->as_l2),
566 M_DEVBUF, M_WAITOK | M_ZERO);
567
568 l1 = km_alloc(nl1 * DART_PAGE_SIZE, &kv_any, &kp_none, &kd_waitok);
569 KASSERT(l1);
570
571 for (idx = 0; idx < nl1; idx++) {
572 startva = (vaddr_t)l1 + idx * DART_PAGE_SIZE;
573 endva = startva + DART_PAGE_SIZE;
574 ttbr = HREAD4(sc, DART_TTBR(sc, as->as_sid, idx));
575 pa = (paddr_t)(ttbr & ~sc->sc_ttbr_valid) << DART_TTBR_SHIFT;
576 for (va = startva; va < endva; va += PAGE_SIZE) {
577 pmap_kenter_cache(va, pa, PROT_READ | PROT_WRITE,
578 PMAP_CACHE_CI);
579 pa += PAGE_SIZE;
580 }
581 }
582
583 for (idx = 0; idx < nl2; idx++) {
584 if (l1[idx] & DART_L1_TABLE) {
585 dva = idx * (DART_PAGE_SIZE / sizeof(uint64_t)) *
586 DART_PAGE_SIZE;
587 dvaend = dva + DART_PAGE_SIZE * DART_PAGE_SIZE - 1;
588 extent_alloc_region(as->as_dvamap, dvabase + dva,
589 dvaend - dva + 1, EX_WAITOK | EX_CONFLICTOK);
590 } else {
591 as->as_l2[idx] = apldart_dmamem_alloc(sc->sc_dmat,
592 DART_PAGE_SIZE, DART_PAGE_SIZE);
593 pa = APLDART_DMA_DVA(as->as_l2[idx]);
594 l1[idx] = (pa >> sc->sc_shift) | DART_L1_TABLE;
595 }
596 }
597 sc->sc_flush_tlb(sc, as->as_sid);
598
599 memcpy(&as->as_dmat, sc->sc_dmat, sizeof(*sc->sc_dmat));
600 as->as_dmat._cookie = as;
601 as->as_dmat._dmamap_create = apldart_dmamap_create;
602 as->as_dmat._dmamap_destroy = apldart_dmamap_destroy;
603 as->as_dmat._dmamap_load = apldart_dmamap_load;
604 as->as_dmat._dmamap_load_mbuf = apldart_dmamap_load_mbuf;
605 as->as_dmat._dmamap_load_uio = apldart_dmamap_load_uio;
606 as->as_dmat._dmamap_load_raw = apldart_dmamap_load_raw;
607 as->as_dmat._dmamap_unload = apldart_dmamap_unload;
608 as->as_dmat._flags |= BUS_DMA_COHERENT;
609 }
610
611 struct apldart_stream *
apldart_alloc_stream(struct apldart_softc * sc,int sid)612 apldart_alloc_stream(struct apldart_softc *sc, int sid)
613 {
614 struct apldart_stream *as;
615 paddr_t pa;
616 volatile uint64_t *l1;
617 int idx, ntte, nl1, nl2;
618 uint32_t mask;
619
620 as = malloc(sizeof(*as), M_DEVBUF, M_WAITOK | M_ZERO);
621
622 as->as_sc = sc;
623 as->as_sid = sid;
624
625 mtx_init(&as->as_dvamap_mtx, IPL_HIGH);
626
627 if (sc->sc_locked || sc->sc_translating) {
628 apldart_init_locked_stream(as);
629 return as;
630 }
631
632 as->as_dvamap = extent_create(sc->sc_dev.dv_xname, 0, ULONG_MAX,
633 M_DEVBUF, NULL, 0, EX_WAITOK | EX_NOCOALESCE);
634 if (sc->sc_dvabase > 0) {
635 extent_alloc_region(as->as_dvamap, 0, sc->sc_dvabase,
636 EX_WAITOK);
637 }
638 if (sc->sc_dvaend < ULONG_MAX) {
639 extent_alloc_region(as->as_dvamap, sc->sc_dvaend + 1,
640 ULONG_MAX - sc->sc_dvaend, EX_WAITOK);
641 }
642
643 /*
644 * Build translation tables. We pre-allocate the translation
645 * tables for the entire aperture such that we don't have to
646 * worry about growing them in an mpsafe manner later.
647 */
648
649 ntte = howmany((sc->sc_dvaend & sc->sc_dvamask), DART_PAGE_SIZE);
650 nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t));
651 nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t));
652
653 as->as_l1 = apldart_dmamem_alloc(sc->sc_dmat,
654 nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE);
655 as->as_l2 = mallocarray(nl2, sizeof(*as->as_l2),
656 M_DEVBUF, M_WAITOK | M_ZERO);
657
658 l1 = APLDART_DMA_KVA(as->as_l1);
659 for (idx = 0; idx < nl2; idx++) {
660 as->as_l2[idx] = apldart_dmamem_alloc(sc->sc_dmat,
661 DART_PAGE_SIZE, DART_PAGE_SIZE);
662 pa = APLDART_DMA_DVA(as->as_l2[idx]);
663 l1[idx] = (pa >> sc->sc_shift) | DART_L1_TABLE;
664 }
665
666 /* Install page tables. */
667 pa = APLDART_DMA_DVA(as->as_l1);
668 for (idx = 0; idx < nl1; idx++) {
669 HWRITE4(sc, DART_TTBR(sc, sid, idx),
670 (pa >> DART_TTBR_SHIFT) | sc->sc_ttbr_valid);
671 pa += DART_PAGE_SIZE;
672 }
673 sc->sc_flush_tlb(sc, sid);
674
675 /* Enable this stream. */
676 mask = HREAD4(sc, DART_SID_ENABLE(sc, sid / 32));
677 mask |= (1U << (sid % 32));
678 HWRITE4(sc, DART_SID_ENABLE(sc, sid / 32), mask);
679
680 /* Enable translations. */
681 HWRITE4(sc, DART_TCR(sc, sid), sc->sc_tcr_translate_enable);
682
683 memcpy(&as->as_dmat, sc->sc_dmat, sizeof(*sc->sc_dmat));
684 as->as_dmat._cookie = as;
685 as->as_dmat._dmamap_create = apldart_dmamap_create;
686 as->as_dmat._dmamap_destroy = apldart_dmamap_destroy;
687 as->as_dmat._dmamap_load = apldart_dmamap_load;
688 as->as_dmat._dmamap_load_mbuf = apldart_dmamap_load_mbuf;
689 as->as_dmat._dmamap_load_uio = apldart_dmamap_load_uio;
690 as->as_dmat._dmamap_load_raw = apldart_dmamap_load_raw;
691 as->as_dmat._dmamap_unload = apldart_dmamap_unload;
692 as->as_dmat._flags |= BUS_DMA_COHERENT;
693
694 return as;
695 }
696
697 bus_dma_tag_t
apldart_map(void * cookie,uint32_t * cells,bus_dma_tag_t dmat)698 apldart_map(void *cookie, uint32_t *cells, bus_dma_tag_t dmat)
699 {
700 struct apldart_softc *sc = cookie;
701 uint32_t sid = cells[0];
702
703 KASSERT(sid < sc->sc_nsid);
704
705 if (sc->sc_as[sid] == NULL)
706 sc->sc_as[sid] = apldart_alloc_stream(sc, sid);
707
708 return &sc->sc_as[sid]->as_dmat;
709 }
710
711 void
apldart_reserve(void * cookie,uint32_t * cells,bus_addr_t addr,bus_size_t size)712 apldart_reserve(void *cookie, uint32_t *cells, bus_addr_t addr, bus_size_t size)
713 {
714 }
715
716 int
apldart_t8020_intr(void * arg)717 apldart_t8020_intr(void *arg)
718 {
719 struct apldart_softc *sc = arg;
720
721 panic("%s: error 0x%08x addr 0x%08x%08x\n",
722 sc->sc_dev.dv_xname, HREAD4(sc, DART_T8020_ERROR),
723 HREAD4(sc, DART_T8020_ERROR_ADDR_HI),
724 HREAD4(sc, DART_T8020_ERROR_ADDR_LO));
725 }
726
727 int
apldart_t8110_intr(void * arg)728 apldart_t8110_intr(void *arg)
729 {
730 struct apldart_softc *sc = arg;
731
732 panic("%s: error 0x%08x addr 0x%08x%08x\n",
733 sc->sc_dev.dv_xname, HREAD4(sc, DART_T8110_ERROR),
734 HREAD4(sc, DART_T8110_ERROR_ADDR_HI),
735 HREAD4(sc, DART_T8110_ERROR_ADDR_LO));
736 }
737
738 void
apldart_t8020_flush_tlb(struct apldart_softc * sc,int sid)739 apldart_t8020_flush_tlb(struct apldart_softc *sc, int sid)
740 {
741 uint32_t mask;
742
743 __asm volatile ("dsb sy" ::: "memory");
744
745 if (sid == -1)
746 mask = DART_ALL_STREAMS(sc);
747 else
748 mask = (1U << sid);
749
750 HWRITE4(sc, DART_T8020_TLB_SIDMASK, mask);
751 HWRITE4(sc, DART_T8020_TLB_CMD, DART_T8020_TLB_CMD_FLUSH);
752 while (HREAD4(sc, DART_T8020_TLB_CMD) & DART_T8020_TLB_CMD_BUSY)
753 CPU_BUSY_CYCLE();
754 }
755
756 void
apldart_t8110_flush_tlb(struct apldart_softc * sc,int sid)757 apldart_t8110_flush_tlb(struct apldart_softc *sc, int sid)
758 {
759 uint32_t cmd;
760
761 __asm volatile ("dsb sy" ::: "memory");
762
763 if (sid == -1)
764 cmd = DART_T8110_TLB_CMD_FLUSH_ALL;
765 else
766 cmd = DART_T8110_TLB_CMD_FLUSH_SID | sid;
767
768 HWRITE4(sc, DART_T8110_TLB_CMD, cmd);
769 while (HREAD4(sc, DART_T8110_TLB_CMD) & DART_T8110_TLB_CMD_BUSY)
770 CPU_BUSY_CYCLE();
771 }
772
773 volatile uint64_t *
apldart_lookup_tte(struct apldart_stream * as,bus_addr_t dva)774 apldart_lookup_tte(struct apldart_stream *as, bus_addr_t dva)
775 {
776 int idx = (dva & as->as_sc->sc_dvamask) / DART_PAGE_SIZE;
777 int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t));
778 int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t));
779 volatile uint64_t *l2;
780
781 l2 = APLDART_DMA_KVA(as->as_l2[l2_idx]);
782 return &l2[tte_idx];
783 }
784
785 int
apldart_load_map(struct apldart_stream * as,bus_dmamap_t map,int flags)786 apldart_load_map(struct apldart_stream *as, bus_dmamap_t map, int flags)
787 {
788 struct apldart_softc *sc = as->as_sc;
789 struct apldart_map_state *ams = map->_dm_cookie;
790 volatile uint64_t *tte;
791 int seg, error;
792
793 /* For each segment. */
794 for (seg = 0; seg < map->dm_nsegs; seg++) {
795 paddr_t pa = map->dm_segs[seg]._ds_paddr;
796 psize_t off = pa - apldart_trunc_page(pa);
797 psize_t start, end;
798 u_long len, dva;
799
800 len = apldart_round_page(map->dm_segs[seg].ds_len + off);
801
802 mtx_enter(&as->as_dvamap_mtx);
803 if (flags & BUS_DMA_FIXED) {
804 dva = apldart_trunc_page(map->dm_segs[seg].ds_addr);
805 /* XXX truncate because "apple,dma-range" mismatch */
806 if (dva > sc->sc_dvaend)
807 dva &= sc->sc_dvamask;
808 error = extent_alloc_region_with_descr(as->as_dvamap,
809 dva, len, EX_NOWAIT, &ams[seg].ams_er);
810 } else {
811 error = extent_alloc_with_descr(as->as_dvamap, len,
812 DART_PAGE_SIZE, 0, 0, EX_NOWAIT, &ams[seg].ams_er,
813 &dva);
814 }
815 mtx_leave(&as->as_dvamap_mtx);
816 if (error) {
817 apldart_unload_map(as, map);
818 return error;
819 }
820
821 ams[seg].ams_dva = dva;
822 ams[seg].ams_len = len;
823
824 map->dm_segs[seg].ds_addr = dva + off;
825
826 pa = apldart_trunc_page(pa);
827 start = apldart_trunc_offset(off);
828 end = DART_PAGE_MASK;
829 while (len > 0) {
830 if (len < DART_PAGE_SIZE)
831 end = apldart_round_offset(len) - 1;
832
833 tte = apldart_lookup_tte(as, dva);
834 *tte = (pa >> sc->sc_shift) | DART_L2_VALID |
835 DART_L2_START(start) | DART_L2_END(end);
836
837 pa += DART_PAGE_SIZE;
838 dva += DART_PAGE_SIZE;
839 len -= DART_PAGE_SIZE;
840 start = 0;
841 }
842 }
843
844 sc->sc_flush_tlb(sc, as->as_sid);
845
846 return 0;
847 }
848
849 void
apldart_unload_map(struct apldart_stream * as,bus_dmamap_t map)850 apldart_unload_map(struct apldart_stream *as, bus_dmamap_t map)
851 {
852 struct apldart_softc *sc = as->as_sc;
853 struct apldart_map_state *ams = map->_dm_cookie;
854 volatile uint64_t *tte;
855 int seg, error;
856
857 /* For each segment. */
858 for (seg = 0; seg < map->dm_nsegs; seg++) {
859 u_long len, dva;
860
861 if (ams[seg].ams_len == 0)
862 continue;
863
864 dva = ams[seg].ams_dva;
865 len = ams[seg].ams_len;
866
867 while (len > 0) {
868 tte = apldart_lookup_tte(as, dva);
869 *tte = DART_L2_INVAL;
870
871 dva += DART_PAGE_SIZE;
872 len -= DART_PAGE_SIZE;
873 }
874
875 mtx_enter(&as->as_dvamap_mtx);
876 error = extent_free(as->as_dvamap, ams[seg].ams_dva,
877 ams[seg].ams_len, EX_NOWAIT);
878 mtx_leave(&as->as_dvamap_mtx);
879
880 KASSERT(error == 0);
881
882 ams[seg].ams_dva = 0;
883 ams[seg].ams_len = 0;
884 }
885
886 sc->sc_flush_tlb(sc, as->as_sid);
887 }
888
889 int
apldart_dmamap_create(bus_dma_tag_t t,bus_size_t size,int nsegments,bus_size_t maxsegsz,bus_size_t boundary,int flags,bus_dmamap_t * dmamap)890 apldart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
891 bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap)
892 {
893 struct apldart_stream *as = t->_cookie;
894 struct apldart_softc *sc = as->as_sc;
895 struct apldart_map_state *ams;
896 bus_dmamap_t map;
897 int error;
898
899 error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments,
900 maxsegsz, boundary, flags, &map);
901 if (error)
902 return error;
903
904 ams = mallocarray(map->_dm_segcnt, sizeof(*ams), M_DEVBUF,
905 (flags & BUS_DMA_NOWAIT) ? (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO));
906 if (ams == NULL) {
907 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
908 return ENOMEM;
909 }
910
911 map->_dm_cookie = ams;
912 *dmamap = map;
913 return 0;
914 }
915
916 void
apldart_dmamap_destroy(bus_dma_tag_t t,bus_dmamap_t map)917 apldart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
918 {
919 struct apldart_stream *as = t->_cookie;
920 struct apldart_softc *sc = as->as_sc;
921 struct apldart_map_state *ams = map->_dm_cookie;
922
923 if (map->dm_nsegs)
924 apldart_dmamap_unload(t, map);
925
926 free(ams, M_DEVBUF, map->_dm_segcnt * sizeof(*ams));
927 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map);
928 }
929
930 int
apldart_dmamap_load(bus_dma_tag_t t,bus_dmamap_t map,void * buf,size_t buflen,struct proc * p,int flags)931 apldart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
932 size_t buflen, struct proc *p, int flags)
933 {
934 struct apldart_stream *as = t->_cookie;
935 struct apldart_softc *sc = as->as_sc;
936 int error;
937
938 error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map,
939 buf, buflen, p, flags);
940 if (error)
941 return error;
942
943 error = apldart_load_map(as, map, flags);
944 if (error)
945 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
946
947 return error;
948 }
949
950 int
apldart_dmamap_load_mbuf(bus_dma_tag_t t,bus_dmamap_t map,struct mbuf * m,int flags)951 apldart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map,
952 struct mbuf *m, int flags)
953 {
954 struct apldart_stream *as = t->_cookie;
955 struct apldart_softc *sc = as->as_sc;
956 int error;
957
958 error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map,
959 m, flags);
960 if (error)
961 return error;
962
963 error = apldart_load_map(as, map, flags);
964 if (error)
965 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
966
967 return error;
968 }
969
970 int
apldart_dmamap_load_uio(bus_dma_tag_t t,bus_dmamap_t map,struct uio * uio,int flags)971 apldart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map,
972 struct uio *uio, int flags)
973 {
974 struct apldart_stream *as = t->_cookie;
975 struct apldart_softc *sc = as->as_sc;
976 int error;
977
978 error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map,
979 uio, flags);
980 if (error)
981 return error;
982
983 error = apldart_load_map(as, map, flags);
984 if (error)
985 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
986
987 return error;
988 }
989
990 int
apldart_dmamap_load_raw(bus_dma_tag_t t,bus_dmamap_t map,bus_dma_segment_t * segs,int nsegs,bus_size_t size,int flags)991 apldart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
992 bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
993 {
994 struct apldart_stream *as = t->_cookie;
995 struct apldart_softc *sc = as->as_sc;
996 int i, error;
997
998 if (flags & BUS_DMA_FIXED) {
999 if (map->dm_nsegs != nsegs)
1000 return EINVAL;
1001 for (i = 0; i < nsegs; i++) {
1002 if (map->dm_segs[i].ds_len != segs[i].ds_len)
1003 return EINVAL;
1004 map->dm_segs[i]._ds_paddr = segs[i].ds_addr;
1005 map->dm_segs[i]._ds_vaddr = segs[i]._ds_vaddr;
1006 }
1007 } else {
1008 error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map,
1009 segs, nsegs, size, flags);
1010 if (error)
1011 return error;
1012 }
1013
1014 error = apldart_load_map(as, map, flags);
1015 if (error)
1016 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1017
1018 return error;
1019 }
1020
1021 void
apldart_dmamap_unload(bus_dma_tag_t t,bus_dmamap_t map)1022 apldart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1023 {
1024 struct apldart_stream *as = t->_cookie;
1025 struct apldart_softc *sc = as->as_sc;
1026
1027 apldart_unload_map(as, map);
1028 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map);
1029 }
1030
1031 struct apldart_dmamem *
apldart_dmamem_alloc(bus_dma_tag_t dmat,bus_size_t size,bus_size_t align)1032 apldart_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align)
1033 {
1034 struct apldart_dmamem *adm;
1035 int nsegs;
1036
1037 adm = malloc(sizeof(*adm), M_DEVBUF, M_WAITOK | M_ZERO);
1038 adm->adm_size = size;
1039
1040 if (bus_dmamap_create(dmat, size, 1, size, 0,
1041 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &adm->adm_map) != 0)
1042 goto admfree;
1043
1044 if (bus_dmamem_alloc(dmat, size, align, 0, &adm->adm_seg, 1,
1045 &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1046 goto destroy;
1047
1048 if (bus_dmamem_map(dmat, &adm->adm_seg, nsegs, size,
1049 &adm->adm_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE) != 0)
1050 goto free;
1051
1052 if (bus_dmamap_load_raw(dmat, adm->adm_map, &adm->adm_seg,
1053 nsegs, size, BUS_DMA_WAITOK) != 0)
1054 goto unmap;
1055
1056 return adm;
1057
1058 unmap:
1059 bus_dmamem_unmap(dmat, adm->adm_kva, size);
1060 free:
1061 bus_dmamem_free(dmat, &adm->adm_seg, 1);
1062 destroy:
1063 bus_dmamap_destroy(dmat, adm->adm_map);
1064 admfree:
1065 free(adm, M_DEVBUF, sizeof(*adm));
1066
1067 return NULL;
1068 }
1069
1070 void
apldart_dmamem_free(bus_dma_tag_t dmat,struct apldart_dmamem * adm)1071 apldart_dmamem_free(bus_dma_tag_t dmat, struct apldart_dmamem *adm)
1072 {
1073 bus_dmamem_unmap(dmat, adm->adm_kva, adm->adm_size);
1074 bus_dmamem_free(dmat, &adm->adm_seg, 1);
1075 bus_dmamap_destroy(dmat, adm->adm_map);
1076 free(adm, M_DEVBUF, sizeof(*adm));
1077 }
1078