1 /* $OpenBSD: apldart.c,v 1.2 2021/03/29 17:04:00 kettenis Exp $ */ 2 /* 3 * Copyright (c) 2021 Mark Kettenis <kettenis@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/device.h> 21 #include <sys/extent.h> 22 #include <sys/malloc.h> 23 24 #include <machine/intr.h> 25 #include <machine/bus.h> 26 #include <machine/fdt.h> 27 28 #include <dev/ofw/openfirm.h> 29 #include <dev/ofw/ofw_misc.h> 30 #include <dev/ofw/fdt.h> 31 32 /* 33 * This driver is based on preliminary device tree bindings and will 34 * almost certainly need changes once the official bindings land in 35 * mainline Linux. Support for these preliminary bindings will be 36 * dropped as soon as official bindings are available. 37 * 38 * This driver largely ignores stream IDs and simply uses a single 39 * translation table for all the devices that it serves. This is good 40 * enough for the PCIe host bridge that serves the on-board devices on 41 * the current generation Apple Silicon Macs as these only have a 42 * single PCIe device behind each DART. 43 */ 44 45 #define DART_TLB_OP 0x0020 46 #define DART_TLB_OP_FLUSH (1 << 20) 47 #define DART_TLB_OP_BUSY (1 << 2) 48 #define DART_TLB_OP_SIDMASK 0x0034 49 #define DART_CONFIG(sid) (0x0100 + 4 *(sid)) 50 #define DART_CONFIG_TXEN (1 << 7) 51 #define DART_TTBR(sid, idx) (0x0200 + 16 * (sid) + 4 * (idx)) 52 #define DART_TTBR_VALID (1U << 31) 53 #define DART_TTBR_SHIFT 12 54 55 #define DART_PAGE_SIZE 16384 56 #define DART_PAGE_MASK (DART_PAGE_SIZE - 1) 57 58 #define DART_L1_TABLE 0xb 59 #define DART_L2_INVAL 0x0 60 #define DART_L2_PAGE 0x3 61 62 inline paddr_t 63 apldart_round_page(paddr_t pa) 64 { 65 return ((pa + DART_PAGE_MASK) & ~DART_PAGE_MASK); 66 } 67 68 inline paddr_t 69 apldart_trunc_page(paddr_t pa) 70 { 71 return (pa & ~DART_PAGE_MASK); 72 } 73 74 #define HREAD4(sc, reg) \ 75 (bus_space_read_4((sc)->sc_iot, (sc)->sc_ioh, (reg))) 76 #define HWRITE4(sc, reg, val) \ 77 bus_space_write_4((sc)->sc_iot, (sc)->sc_ioh, (reg), (val)) 78 79 struct apldart_softc { 80 struct device sc_dev; 81 bus_space_tag_t sc_iot; 82 bus_space_handle_t sc_ioh; 83 bus_dma_tag_t sc_dmat; 84 85 uint32_t sc_sid_mask; 86 int sc_nsid; 87 88 bus_addr_t sc_dvabase; 89 bus_addr_t sc_dvaend; 90 struct extent *sc_dvamap; 91 struct mutex sc_dvamap_mtx; 92 93 struct apldart_dmamem *sc_l1; 94 struct apldart_dmamem **sc_l2; 95 96 struct machine_bus_dma_tag sc_bus_dmat; 97 struct iommu_device sc_id; 98 }; 99 100 struct apldart_map_state { 101 struct extent_region ams_er; 102 bus_addr_t ams_dva; 103 bus_size_t ams_len; 104 }; 105 106 struct apldart_dmamem { 107 bus_dmamap_t adm_map; 108 bus_dma_segment_t adm_seg; 109 size_t adm_size; 110 caddr_t adm_kva; 111 }; 112 113 #define APLDART_DMA_MAP(_adm) ((_adm)->adm_map) 114 #define APLDART_DMA_LEN(_adm) ((_adm)->adm_size) 115 #define APLDART_DMA_DVA(_adm) ((_adm)->adm_map->dm_segs[0].ds_addr) 116 #define APLDART_DMA_KVA(_adm) ((void *)(_adm)->adm_kva) 117 118 struct apldart_dmamem *apldart_dmamem_alloc(bus_dma_tag_t, bus_size_t, 119 bus_size_t); 120 void apldart_dmamem_free(bus_dma_tag_t, struct apldart_dmamem *); 121 122 int apldart_match(struct device *, void *, void *); 123 void apldart_attach(struct device *, struct device *, void *); 124 125 struct cfattach apldart_ca = { 126 sizeof (struct apldart_softc), apldart_match, apldart_attach 127 }; 128 129 struct cfdriver apldart_cd = { 130 NULL, "apldart", DV_DULL 131 }; 132 133 bus_dma_tag_t apldart_map(void *, uint32_t *, bus_dma_tag_t); 134 int apldart_intr(void *); 135 136 void apldart_flush_tlb(struct apldart_softc *); 137 int apldart_load_map(struct apldart_softc *, bus_dmamap_t); 138 void apldart_unload_map(struct apldart_softc *, bus_dmamap_t); 139 140 int apldart_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t, 141 bus_size_t boundary, int, bus_dmamap_t *); 142 void apldart_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t); 143 int apldart_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, 144 bus_size_t, struct proc *, int); 145 int apldart_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, 146 struct mbuf *, int); 147 int apldart_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, 148 struct uio *, int); 149 int apldart_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t, 150 bus_dma_segment_t *, int, bus_size_t, int); 151 void apldart_dmamap_unload(bus_dma_tag_t, bus_dmamap_t); 152 153 int 154 apldart_match(struct device *parent, void *match, void *aux) 155 { 156 struct fdt_attach_args *faa = aux; 157 158 return OF_is_compatible(faa->fa_node, "apple,dart-m1"); 159 } 160 161 void 162 apldart_attach(struct device *parent, struct device *self, void *aux) 163 { 164 struct apldart_softc *sc = (struct apldart_softc *)self; 165 struct fdt_attach_args *faa = aux; 166 paddr_t pa; 167 volatile uint64_t *l1; 168 int ntte, nl1, nl2; 169 int sid, idx; 170 171 if (faa->fa_nreg < 1) { 172 printf(": no registers\n"); 173 return; 174 } 175 176 sc->sc_iot = faa->fa_iot; 177 if (bus_space_map(sc->sc_iot, faa->fa_reg[0].addr, 178 faa->fa_reg[0].size, 0, &sc->sc_ioh)) { 179 printf(": can't map registers\n"); 180 return; 181 } 182 183 sc->sc_dmat = faa->fa_dmat; 184 185 printf("\n"); 186 187 if (OF_getproplen(faa->fa_node, "pcie-dart") != 0) 188 return; 189 190 sc->sc_sid_mask = OF_getpropint(faa->fa_node, "sid-mask", 0xffff); 191 sc->sc_nsid = fls(sc->sc_sid_mask); 192 193 /* 194 * Skip the first page to help catching bugs where a device is 195 * doing DMA to/from address zero because we didn't properly 196 * set up the DMA transfer. Skip the last page to avoid using 197 * the address reserved for MSIs. 198 */ 199 sc->sc_dvabase = DART_PAGE_SIZE; 200 sc->sc_dvaend = 0xffffffff - DART_PAGE_SIZE; 201 202 /* Disable translations. */ 203 for (sid = 0; sid < sc->sc_nsid; sid++) 204 HWRITE4(sc, DART_CONFIG(sid), 0); 205 206 /* Remove page tables. */ 207 for (sid = 0; sid < sc->sc_nsid; sid++) { 208 for (idx = 0; idx < 4; idx++) 209 HWRITE4(sc, DART_TTBR(sid, idx), 0); 210 } 211 apldart_flush_tlb(sc); 212 213 /* 214 * Build translation tables. We pre-allocate the translation 215 * tables for the entire aperture such that we don't have to 216 * worry about growing them in an mpsafe manner later. 217 */ 218 219 ntte = howmany(sc->sc_dvaend, DART_PAGE_SIZE); 220 nl2 = howmany(ntte, DART_PAGE_SIZE / sizeof(uint64_t)); 221 nl1 = howmany(nl2, DART_PAGE_SIZE / sizeof(uint64_t)); 222 223 sc->sc_l1 = apldart_dmamem_alloc(sc->sc_dmat, 224 nl1 * DART_PAGE_SIZE, DART_PAGE_SIZE); 225 sc->sc_l2 = mallocarray(nl2, sizeof(*sc->sc_l2), 226 M_DEVBUF, M_WAITOK | M_ZERO); 227 228 l1 = APLDART_DMA_KVA(sc->sc_l1); 229 for (idx = 0; idx < nl2; idx++) { 230 sc->sc_l2[idx] = apldart_dmamem_alloc(sc->sc_dmat, 231 DART_PAGE_SIZE, DART_PAGE_SIZE); 232 l1[idx] = APLDART_DMA_DVA(sc->sc_l2[idx]) | DART_L1_TABLE; 233 } 234 235 /* Install page tables. */ 236 for (sid = 0; sid < sc->sc_nsid; sid++) { 237 pa = APLDART_DMA_DVA(sc->sc_l1); 238 for (idx = 0; idx < nl1; idx++) { 239 HWRITE4(sc, DART_TTBR(sid, idx), 240 (pa >> DART_TTBR_SHIFT) | DART_TTBR_VALID); 241 pa += DART_PAGE_SIZE; 242 } 243 } 244 apldart_flush_tlb(sc); 245 246 /* Enable translations. */ 247 for (sid = 0; sid < sc->sc_nsid; sid++) 248 HWRITE4(sc, DART_CONFIG(sid), DART_CONFIG_TXEN); 249 250 fdt_intr_establish(faa->fa_node, IPL_NET, apldart_intr, 251 sc, sc->sc_dev.dv_xname); 252 253 sc->sc_dvamap = extent_create(sc->sc_dev.dv_xname, 254 sc->sc_dvabase, sc->sc_dvaend, M_DEVBUF, 255 NULL, 0, EX_NOCOALESCE); 256 mtx_init(&sc->sc_dvamap_mtx, IPL_HIGH); 257 258 memcpy(&sc->sc_bus_dmat, sc->sc_dmat, sizeof(sc->sc_bus_dmat)); 259 sc->sc_bus_dmat._cookie = sc; 260 sc->sc_bus_dmat._dmamap_create = apldart_dmamap_create; 261 sc->sc_bus_dmat._dmamap_destroy = apldart_dmamap_destroy; 262 sc->sc_bus_dmat._dmamap_load = apldart_dmamap_load; 263 sc->sc_bus_dmat._dmamap_load_mbuf = apldart_dmamap_load_mbuf; 264 sc->sc_bus_dmat._dmamap_load_uio = apldart_dmamap_load_uio; 265 sc->sc_bus_dmat._dmamap_load_raw = apldart_dmamap_load_raw; 266 sc->sc_bus_dmat._dmamap_unload = apldart_dmamap_unload; 267 sc->sc_bus_dmat._flags |= BUS_DMA_COHERENT; 268 269 sc->sc_id.id_node = faa->fa_node; 270 sc->sc_id.id_cookie = sc; 271 sc->sc_id.id_map = apldart_map; 272 iommu_device_register(&sc->sc_id); 273 } 274 275 bus_dma_tag_t 276 apldart_map(void *cookie, uint32_t *cells, bus_dma_tag_t dmat) 277 { 278 struct apldart_softc *sc = cookie; 279 280 return &sc->sc_bus_dmat; 281 } 282 283 int 284 apldart_intr(void *arg) 285 { 286 struct apldart_softc *sc = arg; 287 288 panic("%s: %s", sc->sc_dev.dv_xname, __func__); 289 } 290 291 void 292 apldart_flush_tlb(struct apldart_softc *sc) 293 { 294 __asm volatile ("dsb sy" ::: "memory"); 295 296 HWRITE4(sc, DART_TLB_OP_SIDMASK, sc->sc_sid_mask); 297 HWRITE4(sc, DART_TLB_OP, DART_TLB_OP_FLUSH); 298 while (HREAD4(sc, DART_TLB_OP) & DART_TLB_OP_BUSY) 299 CPU_BUSY_CYCLE(); 300 } 301 302 volatile uint64_t * 303 apldart_lookup_tte(struct apldart_softc *sc, bus_addr_t dva) 304 { 305 int idx = dva / DART_PAGE_SIZE; 306 int l2_idx = idx / (DART_PAGE_SIZE / sizeof(uint64_t)); 307 int tte_idx = idx % (DART_PAGE_SIZE / sizeof(uint64_t)); 308 volatile uint64_t *l2; 309 310 l2 = APLDART_DMA_KVA(sc->sc_l2[l2_idx]); 311 return &l2[tte_idx]; 312 } 313 314 int 315 apldart_load_map(struct apldart_softc *sc, bus_dmamap_t map) 316 { 317 struct apldart_map_state *ams = map->_dm_cookie; 318 volatile uint64_t *tte; 319 int seg, error; 320 321 /* For each segment. */ 322 for (seg = 0; seg < map->dm_nsegs; seg++) { 323 paddr_t pa = map->dm_segs[seg]._ds_paddr; 324 psize_t off = pa - apldart_trunc_page(pa); 325 u_long len, dva; 326 327 len = apldart_round_page(map->dm_segs[seg].ds_len + off); 328 329 mtx_enter(&sc->sc_dvamap_mtx); 330 error = extent_alloc_with_descr(sc->sc_dvamap, len, 331 DART_PAGE_SIZE, 0, 0, EX_NOWAIT, &ams[seg].ams_er, &dva); 332 mtx_leave(&sc->sc_dvamap_mtx); 333 if (error) { 334 apldart_unload_map(sc, map); 335 return error; 336 } 337 338 ams[seg].ams_dva = dva; 339 ams[seg].ams_len = len; 340 341 map->dm_segs[seg].ds_addr = dva + off; 342 343 pa = apldart_trunc_page(pa); 344 while (len > 0) { 345 tte = apldart_lookup_tte(sc, dva); 346 *tte = pa | DART_L2_PAGE; 347 348 pa += DART_PAGE_SIZE; 349 dva += DART_PAGE_SIZE; 350 len -= DART_PAGE_SIZE; 351 } 352 } 353 354 apldart_flush_tlb(sc); 355 356 return 0; 357 } 358 359 void 360 apldart_unload_map(struct apldart_softc *sc, bus_dmamap_t map) 361 { 362 struct apldart_map_state *ams = map->_dm_cookie; 363 volatile uint64_t *tte; 364 int seg, error; 365 366 /* For each segment. */ 367 for (seg = 0; seg < map->dm_nsegs; seg++) { 368 u_long len, dva; 369 370 if (ams[seg].ams_len == 0) 371 continue; 372 373 dva = ams[seg].ams_dva; 374 len = ams[seg].ams_len; 375 376 while (len > 0) { 377 tte = apldart_lookup_tte(sc, dva); 378 *tte = DART_L2_INVAL; 379 380 dva += DART_PAGE_SIZE; 381 len -= DART_PAGE_SIZE; 382 } 383 384 mtx_enter(&sc->sc_dvamap_mtx); 385 error = extent_free(sc->sc_dvamap, ams[seg].ams_dva, 386 ams[seg].ams_len, EX_NOWAIT); 387 mtx_leave(&sc->sc_dvamap_mtx); 388 389 KASSERT(error == 0); 390 391 ams[seg].ams_dva = 0; 392 ams[seg].ams_len = 0; 393 } 394 395 apldart_flush_tlb(sc); 396 } 397 398 int 399 apldart_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, 400 bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamap) 401 { 402 struct apldart_softc *sc = t->_cookie; 403 struct apldart_map_state *ams; 404 bus_dmamap_t map; 405 int error; 406 407 error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments, 408 maxsegsz, boundary, flags, &map); 409 if (error) 410 return error; 411 412 ams = mallocarray(map->_dm_segcnt, sizeof(*ams), M_DEVBUF, 413 (flags & BUS_DMA_NOWAIT) ? (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO)); 414 if (ams == NULL) { 415 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map); 416 return ENOMEM; 417 } 418 419 map->_dm_cookie = ams; 420 *dmamap = map; 421 return 0; 422 } 423 424 void 425 apldart_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map) 426 { 427 struct apldart_softc *sc = t->_cookie; 428 struct apldart_map_state *ams = map->_dm_cookie; 429 430 free(ams, M_DEVBUF, map->_dm_segcnt * sizeof(*ams)); 431 sc->sc_dmat->_dmamap_destroy(sc->sc_dmat, map); 432 } 433 434 int 435 apldart_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf, 436 size_t buflen, struct proc *p, int flags) 437 { 438 struct apldart_softc *sc = t->_cookie; 439 int error; 440 441 error = sc->sc_dmat->_dmamap_load(sc->sc_dmat, map, 442 buf, buflen, p, flags); 443 if (error) 444 return error; 445 446 error = apldart_load_map(sc, map); 447 if (error) 448 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); 449 450 return error; 451 } 452 453 int 454 apldart_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, 455 struct mbuf *m, int flags) 456 { 457 struct apldart_softc *sc = t->_cookie; 458 int error; 459 460 error = sc->sc_dmat->_dmamap_load_mbuf(sc->sc_dmat, map, 461 m, flags); 462 if (error) 463 return error; 464 465 error = apldart_load_map(sc, map); 466 if (error) 467 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); 468 469 return error; 470 } 471 472 int 473 apldart_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, 474 struct uio *uio, int flags) 475 { 476 struct apldart_softc *sc = t->_cookie; 477 int error; 478 479 error = sc->sc_dmat->_dmamap_load_uio(sc->sc_dmat, map, 480 uio, flags); 481 if (error) 482 return error; 483 484 error = apldart_load_map(sc, map); 485 if (error) 486 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); 487 488 return error; 489 } 490 491 int 492 apldart_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map, 493 bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags) 494 { 495 struct apldart_softc *sc = t->_cookie; 496 int error; 497 498 error = sc->sc_dmat->_dmamap_load_raw(sc->sc_dmat, map, 499 segs, nsegs, size, flags); 500 if (error) 501 return error; 502 503 error = apldart_load_map(sc, map); 504 if (error) 505 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); 506 507 return error; 508 } 509 510 void 511 apldart_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map) 512 { 513 struct apldart_softc *sc = t->_cookie; 514 515 apldart_unload_map(sc, map); 516 sc->sc_dmat->_dmamap_unload(sc->sc_dmat, map); 517 } 518 519 struct apldart_dmamem * 520 apldart_dmamem_alloc(bus_dma_tag_t dmat, bus_size_t size, bus_size_t align) 521 { 522 struct apldart_dmamem *adm; 523 int nsegs; 524 525 adm = malloc(sizeof(*adm), M_DEVBUF, M_WAITOK | M_ZERO); 526 adm->adm_size = size; 527 528 if (bus_dmamap_create(dmat, size, 1, size, 0, 529 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &adm->adm_map) != 0) 530 goto admfree; 531 532 if (bus_dmamem_alloc(dmat, size, align, 0, &adm->adm_seg, 1, 533 &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0) 534 goto destroy; 535 536 if (bus_dmamem_map(dmat, &adm->adm_seg, nsegs, size, 537 &adm->adm_kva, BUS_DMA_WAITOK | BUS_DMA_NOCACHE) != 0) 538 goto free; 539 540 if (bus_dmamap_load_raw(dmat, adm->adm_map, &adm->adm_seg, 541 nsegs, size, BUS_DMA_WAITOK) != 0) 542 goto unmap; 543 544 return adm; 545 546 unmap: 547 bus_dmamem_unmap(dmat, adm->adm_kva, size); 548 free: 549 bus_dmamem_free(dmat, &adm->adm_seg, 1); 550 destroy: 551 bus_dmamap_destroy(dmat, adm->adm_map); 552 admfree: 553 free(adm, M_DEVBUF, sizeof(*adm)); 554 555 return NULL; 556 } 557 558 void 559 apldart_dmamem_free(bus_dma_tag_t dmat, struct apldart_dmamem *adm) 560 { 561 bus_dmamem_unmap(dmat, adm->adm_kva, adm->adm_size); 562 bus_dmamem_free(dmat, &adm->adm_seg, 1); 563 bus_dmamap_destroy(dmat, adm->adm_map); 564 free(adm, M_DEVBUF, sizeof(*adm)); 565 } 566