1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23 * All rights reserved.
24 */
25 /*
26 * Copyright (c) 2009, Intel Corporation.
27 * All rights reserved.
28 */
29
30 /*
31 * DVMA code
32 * This file contains Intel IOMMU code that deals with DVMA
33 * i.e. DMA remapping.
34 */
35
36 #include <sys/sysmacros.h>
37 #include <sys/pcie.h>
38 #include <sys/pci_cfgspace.h>
39 #include <vm/hat_i86.h>
40 #include <sys/memlist.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/modhash.h>
44 #include <sys/immu.h>
45 #include <sys/x86_archext.h>
46 #include <sys/archsystm.h>
47
48 #undef TEST
49
50 /*
51 * Macros based on PCI spec
52 */
53 #define IMMU_PCI_REV2CLASS(r) ((r) >> 8) /* classcode from revid */
54 #define IMMU_PCI_CLASS2BASE(c) ((c) >> 16) /* baseclass from classcode */
55 #define IMMU_PCI_CLASS2SUB(c) (((c) >> 8) & 0xff); /* classcode */
56
57 #define IMMU_CONTIG_PADDR(d, p) \
58 ((d).dck_paddr && ((d).dck_paddr + IMMU_PAGESIZE) == (p))
59
60 typedef struct dvma_arg {
61 immu_t *dva_immu;
62 dev_info_t *dva_rdip;
63 dev_info_t *dva_ddip;
64 domain_t *dva_domain;
65 int dva_level;
66 immu_flags_t dva_flags;
67 list_t *dva_list;
68 int dva_error;
69 } dvma_arg_t;
70
71 static domain_t *domain_create(immu_t *immu, dev_info_t *ddip,
72 dev_info_t *rdip, immu_flags_t immu_flags);
73 static immu_devi_t *create_immu_devi(dev_info_t *rdip, int bus,
74 int dev, int func, immu_flags_t immu_flags);
75 static void destroy_immu_devi(immu_devi_t *immu_devi);
76 static boolean_t dvma_map(domain_t *domain, uint64_t sdvma,
77 uint64_t nvpages, immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
78 immu_flags_t immu_flags);
79
80 /* Extern globals */
81 extern struct memlist *phys_install;
82
83 /*
84 * iommulib interface functions.
85 */
86 static int immu_probe(iommulib_handle_t unitp, dev_info_t *dip);
87 static int immu_allochdl(iommulib_handle_t handle,
88 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
89 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep);
90 static int immu_freehdl(iommulib_handle_t handle,
91 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
92 static int immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
93 dev_info_t *rdip, ddi_dma_handle_t dma_handle, struct ddi_dma_req *dma_req,
94 ddi_dma_cookie_t *cookiep, uint_t *ccountp);
95 static int immu_unbindhdl(iommulib_handle_t handle,
96 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle);
97 static int immu_sync(iommulib_handle_t handle, dev_info_t *dip,
98 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off, size_t len,
99 uint_t cachefl);
100 static int immu_win(iommulib_handle_t handle, dev_info_t *dip,
101 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
102 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep, uint_t *ccountp);
103 static int immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
104 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
105 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao);
106 static int immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
107 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao);
108 static int immu_map(iommulib_handle_t handle, dev_info_t *dip,
109 dev_info_t *rdip, struct ddi_dma_req *dmareq,
110 ddi_dma_handle_t *dma_handle);
111 static int immu_mctl(iommulib_handle_t handle, dev_info_t *dip,
112 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
113 enum ddi_dma_ctlops request, off_t *offp, size_t *lenp,
114 caddr_t *objpp, uint_t cachefl);
115
116 /* static Globals */
117
118 /*
119 * Used to setup DMA objects (memory regions)
120 * for DMA reads by IOMMU units
121 */
122 static ddi_dma_attr_t immu_dma_attr = {
123 DMA_ATTR_V0,
124 0U,
125 0xffffffffffffffffULL,
126 0xffffffffU,
127 MMU_PAGESIZE, /* MMU page aligned */
128 0x1,
129 0x1,
130 0xffffffffU,
131 0xffffffffffffffffULL,
132 1,
133 4,
134 0
135 };
136
137 static ddi_device_acc_attr_t immu_acc_attr = {
138 DDI_DEVICE_ATTR_V0,
139 DDI_NEVERSWAP_ACC,
140 DDI_STRICTORDER_ACC
141 };
142
143 struct iommulib_ops immulib_ops = {
144 IOMMU_OPS_VERSION,
145 INTEL_IOMMU,
146 "Intel IOMMU",
147 NULL,
148 immu_probe,
149 immu_allochdl,
150 immu_freehdl,
151 immu_bindhdl,
152 immu_unbindhdl,
153 immu_sync,
154 immu_win,
155 immu_mapobject,
156 immu_unmapobject,
157 immu_map,
158 immu_mctl
159 };
160
161 /*
162 * Fake physical address range used to set up initial prealloc mappings.
163 * This memory is never actually accessed. It is mapped read-only,
164 * and is overwritten as soon as the first DMA bind operation is
165 * performed. Since 0 is a special case, just start at the 2nd
166 * physical page.
167 */
168
169 static immu_dcookie_t immu_precookie = { MMU_PAGESIZE, IMMU_NPREPTES };
170
171 /* globals private to this file */
172 static kmutex_t immu_domain_lock;
173 static list_t immu_unity_domain_list;
174 static list_t immu_xlate_domain_list;
175
176 /* structure used to store idx into each level of the page tables */
177 typedef struct xlate {
178 int xlt_level;
179 uint_t xlt_idx;
180 pgtable_t *xlt_pgtable;
181 } xlate_t;
182
183 /* 0 is reserved by Vt-d spec. Solaris reserves 1 */
184 #define IMMU_UNITY_DID 1
185
186 static mod_hash_t *bdf_domain_hash;
187
188 int immu_use_alh;
189 int immu_use_tm;
190
191 static domain_t *
bdf_domain_lookup(immu_devi_t * immu_devi)192 bdf_domain_lookup(immu_devi_t *immu_devi)
193 {
194 domain_t *domain;
195 int16_t seg = immu_devi->imd_seg;
196 int16_t bus = immu_devi->imd_bus;
197 int16_t devfunc = immu_devi->imd_devfunc;
198 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
199
200 if (seg < 0 || bus < 0 || devfunc < 0) {
201 return (NULL);
202 }
203
204 domain = NULL;
205 if (mod_hash_find(bdf_domain_hash,
206 (void *)bdf, (void *)&domain) == 0) {
207 ASSERT(domain);
208 ASSERT(domain->dom_did > 0);
209 return (domain);
210 } else {
211 return (NULL);
212 }
213 }
214
215 static void
bdf_domain_insert(immu_devi_t * immu_devi,domain_t * domain)216 bdf_domain_insert(immu_devi_t *immu_devi, domain_t *domain)
217 {
218 int16_t seg = immu_devi->imd_seg;
219 int16_t bus = immu_devi->imd_bus;
220 int16_t devfunc = immu_devi->imd_devfunc;
221 uintptr_t bdf = (seg << 16 | bus << 8 | devfunc);
222
223 if (seg < 0 || bus < 0 || devfunc < 0) {
224 return;
225 }
226
227 (void) mod_hash_insert(bdf_domain_hash, (void *)bdf, (void *)domain);
228 }
229
230 static int
match_lpc(dev_info_t * pdip,void * arg)231 match_lpc(dev_info_t *pdip, void *arg)
232 {
233 immu_devi_t *immu_devi;
234 dvma_arg_t *dvap = (dvma_arg_t *)arg;
235
236 if (list_is_empty(dvap->dva_list)) {
237 return (DDI_WALK_TERMINATE);
238 }
239
240 immu_devi = list_head(dvap->dva_list);
241 for (; immu_devi; immu_devi = list_next(dvap->dva_list,
242 immu_devi)) {
243 if (immu_devi->imd_dip == pdip) {
244 dvap->dva_ddip = pdip;
245 dvap->dva_error = DDI_SUCCESS;
246 return (DDI_WALK_TERMINATE);
247 }
248 }
249
250 return (DDI_WALK_CONTINUE);
251 }
252
253 static void
immu_devi_set_spclist(dev_info_t * dip,immu_t * immu)254 immu_devi_set_spclist(dev_info_t *dip, immu_t *immu)
255 {
256 list_t *spclist = NULL;
257 immu_devi_t *immu_devi;
258
259 immu_devi = IMMU_DEVI(dip);
260 if (immu_devi->imd_display == B_TRUE) {
261 spclist = &(immu->immu_dvma_gfx_list);
262 } else if (immu_devi->imd_lpc == B_TRUE) {
263 spclist = &(immu->immu_dvma_lpc_list);
264 }
265
266 if (spclist) {
267 mutex_enter(&(immu->immu_lock));
268 list_insert_head(spclist, immu_devi);
269 mutex_exit(&(immu->immu_lock));
270 }
271 }
272
273 /*
274 * Set the immu_devi struct in the immu_devi field of a devinfo node
275 */
276 int
immu_devi_set(dev_info_t * dip,immu_flags_t immu_flags)277 immu_devi_set(dev_info_t *dip, immu_flags_t immu_flags)
278 {
279 int bus, dev, func;
280 immu_devi_t *new_imd;
281 immu_devi_t *immu_devi;
282
283 immu_devi = immu_devi_get(dip);
284 if (immu_devi != NULL) {
285 return (DDI_SUCCESS);
286 }
287
288 bus = dev = func = -1;
289
290 /*
291 * Assume a new immu_devi struct is needed
292 */
293 if (!DEVI_IS_PCI(dip) || acpica_get_bdf(dip, &bus, &dev, &func) != 0) {
294 /*
295 * No BDF. Set bus = -1 to indicate this.
296 * We still need to create a immu_devi struct
297 * though
298 */
299 bus = -1;
300 dev = 0;
301 func = 0;
302 }
303
304 new_imd = create_immu_devi(dip, bus, dev, func, immu_flags);
305 if (new_imd == NULL) {
306 ddi_err(DER_WARN, dip, "Failed to create immu_devi "
307 "structure");
308 return (DDI_FAILURE);
309 }
310
311 /*
312 * Check if some other thread allocated a immu_devi while we
313 * didn't own the lock.
314 */
315 mutex_enter(&(DEVI(dip)->devi_lock));
316 if (IMMU_DEVI(dip) == NULL) {
317 IMMU_DEVI_SET(dip, new_imd);
318 } else {
319 destroy_immu_devi(new_imd);
320 }
321 mutex_exit(&(DEVI(dip)->devi_lock));
322
323 return (DDI_SUCCESS);
324 }
325
326 static dev_info_t *
get_lpc_devinfo(immu_t * immu,dev_info_t * rdip,immu_flags_t immu_flags)327 get_lpc_devinfo(immu_t *immu, dev_info_t *rdip, immu_flags_t immu_flags)
328 {
329 dvma_arg_t dvarg = {0};
330 dvarg.dva_list = &(immu->immu_dvma_lpc_list);
331 dvarg.dva_rdip = rdip;
332 dvarg.dva_error = DDI_FAILURE;
333
334 if (immu_walk_ancestor(rdip, NULL, match_lpc,
335 &dvarg, NULL, immu_flags) != DDI_SUCCESS) {
336 ddi_err(DER_MODE, rdip, "Could not walk ancestors to "
337 "find lpc_devinfo for ISA device");
338 return (NULL);
339 }
340
341 if (dvarg.dva_error != DDI_SUCCESS || dvarg.dva_ddip == NULL) {
342 ddi_err(DER_MODE, rdip, "Could not find lpc_devinfo for "
343 "ISA device");
344 return (NULL);
345 }
346
347 return (dvarg.dva_ddip);
348 }
349
350 static dev_info_t *
get_gfx_devinfo(dev_info_t * rdip)351 get_gfx_devinfo(dev_info_t *rdip)
352 {
353 immu_t *immu;
354 immu_devi_t *immu_devi;
355 list_t *list_gfx;
356
357 /*
358 * The GFX device may not be on the same iommu unit as "agpgart"
359 * so search globally
360 */
361 immu_devi = NULL;
362 immu = list_head(&immu_list);
363 for (; immu; immu = list_next(&immu_list, immu)) {
364 list_gfx = &(immu->immu_dvma_gfx_list);
365 if (!list_is_empty(list_gfx)) {
366 immu_devi = list_head(list_gfx);
367 break;
368 }
369 }
370
371 if (immu_devi == NULL) {
372 ddi_err(DER_WARN, rdip, "iommu: No GFX device. "
373 "Cannot redirect agpgart");
374 return (NULL);
375 }
376
377 ddi_err(DER_LOG, rdip, "iommu: GFX redirect to %s",
378 ddi_node_name(immu_devi->imd_dip));
379
380 return (immu_devi->imd_dip);
381 }
382
383 static immu_flags_t
dma_to_immu_flags(struct ddi_dma_req * dmareq)384 dma_to_immu_flags(struct ddi_dma_req *dmareq)
385 {
386 immu_flags_t flags = 0;
387
388 if (dmareq->dmar_fp == DDI_DMA_SLEEP) {
389 flags |= IMMU_FLAGS_SLEEP;
390 } else {
391 flags |= IMMU_FLAGS_NOSLEEP;
392 }
393
394 #ifdef BUGGY_DRIVERS
395
396 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
397
398 #else
399 /*
400 * Read and write flags need to be reversed.
401 * DMA_READ means read from device and write
402 * to memory. So DMA read means DVMA write.
403 */
404 if (dmareq->dmar_flags & DDI_DMA_READ)
405 flags |= IMMU_FLAGS_WRITE;
406
407 if (dmareq->dmar_flags & DDI_DMA_WRITE)
408 flags |= IMMU_FLAGS_READ;
409
410 /*
411 * Some buggy drivers specify neither READ or WRITE
412 * For such drivers set both read and write permissions
413 */
414 if ((dmareq->dmar_flags & (DDI_DMA_READ | DDI_DMA_WRITE)) == 0) {
415 flags |= (IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
416 }
417 #endif
418
419 return (flags);
420 }
421
422 /*ARGSUSED*/
423 int
pgtable_ctor(void * buf,void * arg,int kmflag)424 pgtable_ctor(void *buf, void *arg, int kmflag)
425 {
426 size_t actual_size = 0;
427 pgtable_t *pgtable;
428 int (*dmafp)(caddr_t);
429 caddr_t vaddr;
430 void *next;
431 uint_t flags;
432 immu_t *immu = arg;
433
434 pgtable = (pgtable_t *)buf;
435
436 dmafp = (kmflag & KM_NOSLEEP) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
437
438 next = kmem_zalloc(IMMU_PAGESIZE, kmflag);
439 if (next == NULL) {
440 return (-1);
441 }
442
443 if (ddi_dma_alloc_handle(root_devinfo, &immu_dma_attr,
444 dmafp, NULL, &pgtable->hwpg_dmahdl) != DDI_SUCCESS) {
445 kmem_free(next, IMMU_PAGESIZE);
446 return (-1);
447 }
448
449 flags = DDI_DMA_CONSISTENT;
450 if (!immu->immu_dvma_coherent)
451 flags |= IOMEM_DATA_UC_WR_COMBINE;
452
453 if (ddi_dma_mem_alloc(pgtable->hwpg_dmahdl, IMMU_PAGESIZE,
454 &immu_acc_attr, flags,
455 dmafp, NULL, &vaddr, &actual_size,
456 &pgtable->hwpg_memhdl) != DDI_SUCCESS) {
457 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
458 kmem_free(next, IMMU_PAGESIZE);
459 return (-1);
460 }
461
462 /*
463 * Memory allocation failure. Maybe a temporary condition
464 * so return error rather than panic, so we can try again
465 */
466 if (actual_size < IMMU_PAGESIZE) {
467 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
468 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
469 kmem_free(next, IMMU_PAGESIZE);
470 return (-1);
471 }
472
473 pgtable->hwpg_paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
474 pgtable->hwpg_vaddr = vaddr;
475 pgtable->swpg_next_array = next;
476
477 rw_init(&(pgtable->swpg_rwlock), NULL, RW_DEFAULT, NULL);
478
479 return (0);
480 }
481
482 /*ARGSUSED*/
483 void
pgtable_dtor(void * buf,void * arg)484 pgtable_dtor(void *buf, void *arg)
485 {
486 pgtable_t *pgtable;
487
488 pgtable = (pgtable_t *)buf;
489
490 /* destroy will panic if lock is held. */
491 rw_destroy(&(pgtable->swpg_rwlock));
492
493 ddi_dma_mem_free(&pgtable->hwpg_memhdl);
494 ddi_dma_free_handle(&pgtable->hwpg_dmahdl);
495 kmem_free(pgtable->swpg_next_array, IMMU_PAGESIZE);
496 }
497
498 /*
499 * pgtable_alloc()
500 * alloc a IOMMU pgtable structure.
501 * This same struct is used for root and context tables as well.
502 * This routine allocs the f/ollowing:
503 * - a pgtable_t struct
504 * - a HW page which holds PTEs/entries which is accesssed by HW
505 * so we set up DMA for this page
506 * - a SW page which is only for our bookeeping
507 * (for example to hold pointers to the next level pgtable).
508 * So a simple kmem_alloc suffices
509 */
510 static pgtable_t *
pgtable_alloc(immu_t * immu,immu_flags_t immu_flags)511 pgtable_alloc(immu_t *immu, immu_flags_t immu_flags)
512 {
513 pgtable_t *pgtable;
514 int kmflags;
515
516 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
517
518 pgtable = kmem_cache_alloc(immu->immu_pgtable_cache, kmflags);
519 if (pgtable == NULL) {
520 return (NULL);
521 }
522 return (pgtable);
523 }
524
525 static void
pgtable_zero(pgtable_t * pgtable)526 pgtable_zero(pgtable_t *pgtable)
527 {
528 bzero(pgtable->hwpg_vaddr, IMMU_PAGESIZE);
529 bzero(pgtable->swpg_next_array, IMMU_PAGESIZE);
530 }
531
532 static void
pgtable_free(immu_t * immu,pgtable_t * pgtable)533 pgtable_free(immu_t *immu, pgtable_t *pgtable)
534 {
535 kmem_cache_free(immu->immu_pgtable_cache, pgtable);
536 }
537
538 /*
539 * Function to identify a display device from the PCI class code
540 */
541 static boolean_t
device_is_display(uint_t classcode)542 device_is_display(uint_t classcode)
543 {
544 static uint_t disp_classes[] = {
545 0x000100,
546 0x030000,
547 0x030001
548 };
549 int i, nclasses = sizeof (disp_classes) / sizeof (uint_t);
550
551 for (i = 0; i < nclasses; i++) {
552 if (classcode == disp_classes[i])
553 return (B_TRUE);
554 }
555 return (B_FALSE);
556 }
557
558 /*
559 * Function that determines if device is PCIEX and/or PCIEX bridge
560 */
561 static boolean_t
device_is_pciex(uchar_t bus,uchar_t dev,uchar_t func,boolean_t * is_pcib)562 device_is_pciex(
563 uchar_t bus, uchar_t dev, uchar_t func, boolean_t *is_pcib)
564 {
565 ushort_t cap;
566 ushort_t capsp;
567 ushort_t cap_count = PCI_CAP_MAX_PTR;
568 ushort_t status;
569 boolean_t is_pciex = B_FALSE;
570
571 *is_pcib = B_FALSE;
572
573 status = pci_getw_func(bus, dev, func, PCI_CONF_STAT);
574 if (!(status & PCI_STAT_CAP))
575 return (B_FALSE);
576
577 capsp = pci_getb_func(bus, dev, func, PCI_CONF_CAP_PTR);
578 while (cap_count-- && capsp >= PCI_CAP_PTR_OFF) {
579 capsp &= PCI_CAP_PTR_MASK;
580 cap = pci_getb_func(bus, dev, func, capsp);
581
582 if (cap == PCI_CAP_ID_PCI_E) {
583 status = pci_getw_func(bus, dev, func, capsp + 2);
584 /*
585 * See section 7.8.2 of PCI-Express Base Spec v1.0a
586 * for Device/Port Type.
587 * PCIE_PCIECAP_DEV_TYPE_PCIE2PCI implies that the
588 * device is a PCIE2PCI bridge
589 */
590 *is_pcib =
591 ((status & PCIE_PCIECAP_DEV_TYPE_MASK) ==
592 PCIE_PCIECAP_DEV_TYPE_PCIE2PCI) ? B_TRUE : B_FALSE;
593 is_pciex = B_TRUE;
594 }
595
596 capsp = (*pci_getb_func)(bus, dev, func,
597 capsp + PCI_CAP_NEXT_PTR);
598 }
599
600 return (is_pciex);
601 }
602
603 static boolean_t
device_use_premap(uint_t classcode)604 device_use_premap(uint_t classcode)
605 {
606 if (IMMU_PCI_CLASS2BASE(classcode) == PCI_CLASS_NET)
607 return (B_TRUE);
608 return (B_FALSE);
609 }
610
611
612 /*
613 * immu_dvma_get_immu()
614 * get the immu unit structure for a dev_info node
615 */
616 immu_t *
immu_dvma_get_immu(dev_info_t * dip,immu_flags_t immu_flags)617 immu_dvma_get_immu(dev_info_t *dip, immu_flags_t immu_flags)
618 {
619 immu_devi_t *immu_devi;
620 immu_t *immu;
621
622 /*
623 * check if immu unit was already found earlier.
624 * If yes, then it will be stashed in immu_devi struct.
625 */
626 immu_devi = immu_devi_get(dip);
627 if (immu_devi == NULL) {
628 if (immu_devi_set(dip, immu_flags) != DDI_SUCCESS) {
629 /*
630 * May fail because of low memory. Return error rather
631 * than panic as we want driver to rey again later
632 */
633 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
634 "No immu_devi structure");
635 /*NOTREACHED*/
636 }
637 immu_devi = immu_devi_get(dip);
638 }
639
640 mutex_enter(&(DEVI(dip)->devi_lock));
641 if (immu_devi->imd_immu) {
642 immu = immu_devi->imd_immu;
643 mutex_exit(&(DEVI(dip)->devi_lock));
644 return (immu);
645 }
646 mutex_exit(&(DEVI(dip)->devi_lock));
647
648 immu = immu_dmar_get_immu(dip);
649 if (immu == NULL) {
650 ddi_err(DER_PANIC, dip, "immu_dvma_get_immu: "
651 "Cannot find immu_t for device");
652 /*NOTREACHED*/
653 }
654
655 /*
656 * Check if some other thread found immu
657 * while lock was not held
658 */
659 immu_devi = immu_devi_get(dip);
660 /* immu_devi should be present as we found it earlier */
661 if (immu_devi == NULL) {
662 ddi_err(DER_PANIC, dip,
663 "immu_dvma_get_immu: No immu_devi structure");
664 /*NOTREACHED*/
665 }
666
667 mutex_enter(&(DEVI(dip)->devi_lock));
668 if (immu_devi->imd_immu == NULL) {
669 /* nobody else set it, so we should do it */
670 immu_devi->imd_immu = immu;
671 immu_devi_set_spclist(dip, immu);
672 } else {
673 /*
674 * if some other thread got immu before
675 * us, it should get the same results
676 */
677 if (immu_devi->imd_immu != immu) {
678 ddi_err(DER_PANIC, dip, "Multiple "
679 "immu units found for device. Expected (%p), "
680 "actual (%p)", (void *)immu,
681 (void *)immu_devi->imd_immu);
682 mutex_exit(&(DEVI(dip)->devi_lock));
683 /*NOTREACHED*/
684 }
685 }
686 mutex_exit(&(DEVI(dip)->devi_lock));
687
688 return (immu);
689 }
690
691
692 /* ############################# IMMU_DEVI code ############################ */
693
694 /*
695 * Allocate a immu_devi structure and initialize it
696 */
697 static immu_devi_t *
create_immu_devi(dev_info_t * rdip,int bus,int dev,int func,immu_flags_t immu_flags)698 create_immu_devi(dev_info_t *rdip, int bus, int dev, int func,
699 immu_flags_t immu_flags)
700 {
701 uchar_t baseclass, subclass;
702 uint_t classcode, revclass;
703 immu_devi_t *immu_devi;
704 boolean_t pciex = B_FALSE;
705 int kmflags;
706 boolean_t is_pcib = B_FALSE;
707
708 /* bus == -1 indicate non-PCI device (no BDF) */
709 ASSERT(bus == -1 || bus >= 0);
710 ASSERT(dev >= 0);
711 ASSERT(func >= 0);
712
713 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
714 immu_devi = kmem_zalloc(sizeof (immu_devi_t), kmflags);
715 if (immu_devi == NULL) {
716 ddi_err(DER_WARN, rdip, "Failed to allocate memory for "
717 "Intel IOMMU immu_devi structure");
718 return (NULL);
719 }
720 immu_devi->imd_dip = rdip;
721 immu_devi->imd_seg = 0; /* Currently seg can only be 0 */
722 immu_devi->imd_bus = bus;
723 immu_devi->imd_pcib_type = IMMU_PCIB_BAD;
724
725 if (bus == -1) {
726 immu_devi->imd_pcib_type = IMMU_PCIB_NOBDF;
727 return (immu_devi);
728 }
729
730 immu_devi->imd_devfunc = IMMU_PCI_DEVFUNC(dev, func);
731 immu_devi->imd_sec = 0;
732 immu_devi->imd_sub = 0;
733
734 revclass = pci_getl_func(bus, dev, func, PCI_CONF_REVID);
735
736 classcode = IMMU_PCI_REV2CLASS(revclass);
737 baseclass = IMMU_PCI_CLASS2BASE(classcode);
738 subclass = IMMU_PCI_CLASS2SUB(classcode);
739
740 if (baseclass == PCI_CLASS_BRIDGE && subclass == PCI_BRIDGE_PCI) {
741
742 immu_devi->imd_sec = pci_getb_func(bus, dev, func,
743 PCI_BCNF_SECBUS);
744 immu_devi->imd_sub = pci_getb_func(bus, dev, func,
745 PCI_BCNF_SUBBUS);
746
747 pciex = device_is_pciex(bus, dev, func, &is_pcib);
748 if (pciex == B_TRUE && is_pcib == B_TRUE) {
749 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCI;
750 } else if (pciex == B_TRUE) {
751 immu_devi->imd_pcib_type = IMMU_PCIB_PCIE_PCIE;
752 } else {
753 immu_devi->imd_pcib_type = IMMU_PCIB_PCI_PCI;
754 }
755 } else {
756 immu_devi->imd_pcib_type = IMMU_PCIB_ENDPOINT;
757 }
758
759 /* check for certain special devices */
760 immu_devi->imd_display = device_is_display(classcode);
761 immu_devi->imd_lpc = ((baseclass == PCI_CLASS_BRIDGE) &&
762 (subclass == PCI_BRIDGE_ISA)) ? B_TRUE : B_FALSE;
763 immu_devi->imd_use_premap = device_use_premap(classcode);
764
765 immu_devi->imd_domain = NULL;
766
767 immu_devi->imd_dvma_flags = immu_global_dvma_flags;
768
769 return (immu_devi);
770 }
771
772 static void
destroy_immu_devi(immu_devi_t * immu_devi)773 destroy_immu_devi(immu_devi_t *immu_devi)
774 {
775 kmem_free(immu_devi, sizeof (immu_devi_t));
776 }
777
778 static domain_t *
immu_devi_domain(dev_info_t * rdip,dev_info_t ** ddipp)779 immu_devi_domain(dev_info_t *rdip, dev_info_t **ddipp)
780 {
781 immu_devi_t *immu_devi;
782 domain_t *domain;
783 dev_info_t *ddip;
784
785 *ddipp = NULL;
786
787 immu_devi = immu_devi_get(rdip);
788 if (immu_devi == NULL) {
789 return (NULL);
790 }
791
792 mutex_enter(&(DEVI(rdip)->devi_lock));
793 domain = immu_devi->imd_domain;
794 ddip = immu_devi->imd_ddip;
795 mutex_exit(&(DEVI(rdip)->devi_lock));
796
797 if (domain)
798 *ddipp = ddip;
799
800 return (domain);
801
802 }
803
804 /* ############################# END IMMU_DEVI code ######################## */
805 /* ############################# DOMAIN code ############################### */
806
807 /*
808 * This routine always succeeds
809 */
810 static int
did_alloc(immu_t * immu,dev_info_t * rdip,dev_info_t * ddip,immu_flags_t immu_flags)811 did_alloc(immu_t *immu, dev_info_t *rdip,
812 dev_info_t *ddip, immu_flags_t immu_flags)
813 {
814 int did;
815
816 did = (uintptr_t)vmem_alloc(immu->immu_did_arena, 1,
817 (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP);
818
819 if (did == 0) {
820 ddi_err(DER_WARN, rdip, "device domain-id alloc error"
821 " domain-device: %s%d. immu unit is %s. Using "
822 "unity domain with domain-id (%d)",
823 ddi_driver_name(ddip), ddi_get_instance(ddip),
824 immu->immu_name, immu->immu_unity_domain->dom_did);
825 did = immu->immu_unity_domain->dom_did;
826 }
827
828 return (did);
829 }
830
831 static int
get_branch_domain(dev_info_t * pdip,void * arg)832 get_branch_domain(dev_info_t *pdip, void *arg)
833 {
834 immu_devi_t *immu_devi;
835 domain_t *domain;
836 dev_info_t *ddip;
837 immu_t *immu;
838 dvma_arg_t *dvp = (dvma_arg_t *)arg;
839
840 /*
841 * The field dvp->dva_rdip is a work-in-progress
842 * and gets updated as we walk up the ancestor
843 * tree. The final ddip is set only when we reach
844 * the top of the tree. So the dvp->dva_ddip field cannot
845 * be relied on until we reach the top of the field.
846 */
847
848 /* immu_devi may not be set. */
849 immu_devi = immu_devi_get(pdip);
850 if (immu_devi == NULL) {
851 if (immu_devi_set(pdip, dvp->dva_flags) != DDI_SUCCESS) {
852 dvp->dva_error = DDI_FAILURE;
853 return (DDI_WALK_TERMINATE);
854 }
855 }
856
857 immu_devi = immu_devi_get(pdip);
858 immu = immu_devi->imd_immu;
859 if (immu == NULL)
860 immu = immu_dvma_get_immu(pdip, dvp->dva_flags);
861
862 /*
863 * If we encounter a PCIE_PCIE bridge *ANCESTOR* we need to
864 * terminate the walk (since the device under the PCIE bridge
865 * is a PCIE device and has an independent entry in the
866 * root/context table)
867 */
868 if (dvp->dva_rdip != pdip &&
869 immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCIE) {
870 return (DDI_WALK_TERMINATE);
871 }
872
873 /*
874 * In order to be a domain-dim, it must be a PCI device i.e.
875 * must have valid BDF. This also eliminates the root complex.
876 */
877 if (immu_devi->imd_pcib_type != IMMU_PCIB_BAD &&
878 immu_devi->imd_pcib_type != IMMU_PCIB_NOBDF) {
879 ASSERT(immu_devi->imd_bus >= 0);
880 ASSERT(immu_devi->imd_devfunc >= 0);
881 dvp->dva_ddip = pdip;
882 }
883
884 if (immu_devi->imd_display == B_TRUE ||
885 (dvp->dva_flags & IMMU_FLAGS_UNITY)) {
886 dvp->dva_domain = immu->immu_unity_domain;
887 /* continue walking to find ddip */
888 return (DDI_WALK_CONTINUE);
889 }
890
891 mutex_enter(&(DEVI(pdip)->devi_lock));
892 domain = immu_devi->imd_domain;
893 ddip = immu_devi->imd_ddip;
894 mutex_exit(&(DEVI(pdip)->devi_lock));
895
896 if (domain && ddip) {
897 /* if domain is set, it must be the same */
898 if (dvp->dva_domain) {
899 ASSERT(domain == dvp->dva_domain);
900 }
901 dvp->dva_domain = domain;
902 dvp->dva_ddip = ddip;
903 return (DDI_WALK_TERMINATE);
904 }
905
906 /* Domain may already be set, continue walking so that ddip gets set */
907 if (dvp->dva_domain) {
908 return (DDI_WALK_CONTINUE);
909 }
910
911 /* domain is not set in either immu_devi or dvp */
912 domain = bdf_domain_lookup(immu_devi);
913 if (domain == NULL) {
914 return (DDI_WALK_CONTINUE);
915 }
916
917 /* ok, the BDF hash had a domain for this BDF. */
918
919 /* Grab lock again to check if something else set immu_devi fields */
920 mutex_enter(&(DEVI(pdip)->devi_lock));
921 if (immu_devi->imd_domain != NULL) {
922 dvp->dva_domain = domain;
923 } else {
924 dvp->dva_domain = domain;
925 }
926 mutex_exit(&(DEVI(pdip)->devi_lock));
927
928 /*
929 * walk upwards until the topmost PCI bridge is found
930 */
931 return (DDI_WALK_CONTINUE);
932
933 }
934
935 static void
map_unity_domain(domain_t * domain)936 map_unity_domain(domain_t *domain)
937 {
938 struct memlist *mp;
939 uint64_t start;
940 uint64_t npages;
941 immu_dcookie_t dcookies[1] = {0};
942 int dcount = 0;
943
944 /*
945 * UNITY arenas are a mirror of the physical memory
946 * installed on the system.
947 */
948
949 #ifdef BUGGY_DRIVERS
950 /*
951 * Dont skip page0. Some broken HW/FW access it.
952 */
953 dcookies[0].dck_paddr = 0;
954 dcookies[0].dck_npages = 1;
955 dcount = 1;
956 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
957 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
958 #endif
959
960 memlist_read_lock();
961
962 mp = phys_install;
963
964 if (mp->ml_address == 0) {
965 /* since we already mapped page1 above */
966 start = IMMU_PAGESIZE;
967 } else {
968 start = mp->ml_address;
969 }
970 npages = mp->ml_size/IMMU_PAGESIZE + 1;
971
972 dcookies[0].dck_paddr = start;
973 dcookies[0].dck_npages = npages;
974 dcount = 1;
975 (void) dvma_map(domain, start, npages, dcookies,
976 dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
977
978 ddi_err(DER_LOG, domain->dom_dip, "iommu: mapping PHYS span [0x%" PRIx64
979 " - 0x%" PRIx64 "]", start, start + mp->ml_size);
980
981 mp = mp->ml_next;
982 while (mp) {
983 ddi_err(DER_LOG, domain->dom_dip,
984 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
985 mp->ml_address, mp->ml_address + mp->ml_size);
986
987 start = mp->ml_address;
988 npages = mp->ml_size/IMMU_PAGESIZE + 1;
989
990 dcookies[0].dck_paddr = start;
991 dcookies[0].dck_npages = npages;
992 dcount = 1;
993 (void) dvma_map(domain, start, npages,
994 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
995 mp = mp->ml_next;
996 }
997
998 mp = bios_rsvd;
999 while (mp) {
1000 ddi_err(DER_LOG, domain->dom_dip,
1001 "iommu: mapping PHYS span [0x%" PRIx64 " - 0x%" PRIx64 "]",
1002 mp->ml_address, mp->ml_address + mp->ml_size);
1003
1004 start = mp->ml_address;
1005 npages = mp->ml_size/IMMU_PAGESIZE + 1;
1006
1007 dcookies[0].dck_paddr = start;
1008 dcookies[0].dck_npages = npages;
1009 dcount = 1;
1010 (void) dvma_map(domain, start, npages,
1011 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
1012
1013 mp = mp->ml_next;
1014 }
1015
1016 memlist_read_unlock();
1017 }
1018
1019 /*
1020 * create_xlate_arena()
1021 * Create the dvma arena for a domain with translation
1022 * mapping
1023 */
1024 static void
create_xlate_arena(immu_t * immu,domain_t * domain,dev_info_t * rdip,immu_flags_t immu_flags)1025 create_xlate_arena(immu_t *immu, domain_t *domain,
1026 dev_info_t *rdip, immu_flags_t immu_flags)
1027 {
1028 char *arena_name;
1029 struct memlist *mp;
1030 int vmem_flags;
1031 uint64_t start;
1032 uint_t mgaw;
1033 uint64_t size;
1034 uint64_t maxaddr;
1035 void *vmem_ret;
1036
1037 arena_name = domain->dom_dvma_arena_name;
1038
1039 /* Note, don't do sizeof (arena_name) - it is just a pointer */
1040 (void) snprintf(arena_name,
1041 sizeof (domain->dom_dvma_arena_name),
1042 "%s-domain-%d-xlate-DVMA-arena", immu->immu_name,
1043 domain->dom_did);
1044
1045 vmem_flags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? VM_NOSLEEP : VM_SLEEP;
1046
1047 /* Restrict mgaddr (max guest addr) to MGAW */
1048 mgaw = IMMU_CAP_MGAW(immu->immu_regs_cap);
1049
1050 /*
1051 * To ensure we avoid ioapic and PCI MMIO ranges we just
1052 * use the physical memory address range of the system as the
1053 * range
1054 */
1055 maxaddr = ((uint64_t)1 << mgaw);
1056
1057 memlist_read_lock();
1058
1059 mp = phys_install;
1060
1061 if (mp->ml_address == 0)
1062 start = MMU_PAGESIZE;
1063 else
1064 start = mp->ml_address;
1065
1066 if (start + mp->ml_size > maxaddr)
1067 size = maxaddr - start;
1068 else
1069 size = mp->ml_size;
1070
1071 ddi_err(DER_VERB, rdip,
1072 "iommu: %s: Creating dvma vmem arena [0x%" PRIx64
1073 " - 0x%" PRIx64 "]", arena_name, start, start + size);
1074
1075 /*
1076 * We always allocate in quanta of IMMU_PAGESIZE
1077 */
1078 domain->dom_dvma_arena = vmem_create(arena_name,
1079 (void *)(uintptr_t)start, /* start addr */
1080 size, /* size */
1081 IMMU_PAGESIZE, /* quantum */
1082 NULL, /* afunc */
1083 NULL, /* ffunc */
1084 NULL, /* source */
1085 0, /* qcache_max */
1086 vmem_flags);
1087
1088 if (domain->dom_dvma_arena == NULL) {
1089 ddi_err(DER_PANIC, rdip,
1090 "Failed to allocate DVMA arena(%s) "
1091 "for domain ID (%d)", arena_name, domain->dom_did);
1092 /*NOTREACHED*/
1093 }
1094
1095 mp = mp->ml_next;
1096 while (mp) {
1097
1098 if (mp->ml_address == 0)
1099 start = MMU_PAGESIZE;
1100 else
1101 start = mp->ml_address;
1102
1103 if (start + mp->ml_size > maxaddr)
1104 size = maxaddr - start;
1105 else
1106 size = mp->ml_size;
1107
1108 ddi_err(DER_VERB, rdip,
1109 "iommu: %s: Adding dvma vmem span [0x%" PRIx64
1110 " - 0x%" PRIx64 "]", arena_name, start,
1111 start + size);
1112
1113 vmem_ret = vmem_add(domain->dom_dvma_arena,
1114 (void *)(uintptr_t)start, size, vmem_flags);
1115
1116 if (vmem_ret == NULL) {
1117 ddi_err(DER_PANIC, rdip,
1118 "Failed to allocate DVMA arena(%s) "
1119 "for domain ID (%d)",
1120 arena_name, domain->dom_did);
1121 /*NOTREACHED*/
1122 }
1123 mp = mp->ml_next;
1124 }
1125 memlist_read_unlock();
1126 }
1127
1128 /* ################################### DOMAIN CODE ######################### */
1129
1130 /*
1131 * Set the domain and domain-dip for a dip
1132 */
1133 static void
set_domain(dev_info_t * dip,dev_info_t * ddip,domain_t * domain)1134 set_domain(
1135 dev_info_t *dip,
1136 dev_info_t *ddip,
1137 domain_t *domain)
1138 {
1139 immu_devi_t *immu_devi;
1140 domain_t *fdomain;
1141 dev_info_t *fddip;
1142
1143 immu_devi = immu_devi_get(dip);
1144
1145 mutex_enter(&(DEVI(dip)->devi_lock));
1146 fddip = immu_devi->imd_ddip;
1147 fdomain = immu_devi->imd_domain;
1148
1149 if (fddip) {
1150 ASSERT(fddip == ddip);
1151 } else {
1152 immu_devi->imd_ddip = ddip;
1153 }
1154
1155 if (fdomain) {
1156 ASSERT(fdomain == domain);
1157 } else {
1158 immu_devi->imd_domain = domain;
1159 }
1160 mutex_exit(&(DEVI(dip)->devi_lock));
1161 }
1162
1163 /*
1164 * device_domain()
1165 * Get domain for a device. The domain may be global in which case it
1166 * is shared between all IOMMU units. Due to potential AGAW differences
1167 * between IOMMU units, such global domains *have to be* UNITY mapping
1168 * domains. Alternatively, the domain may be local to a IOMMU unit.
1169 * Local domains may be shared or immu_devi, although the
1170 * scope of sharing
1171 * is restricted to devices controlled by the IOMMU unit to
1172 * which the domain
1173 * belongs. If shared, they (currently) have to be UNITY domains. If
1174 * immu_devi a domain may be either UNITY or translation (XLATE) domain.
1175 */
1176 static domain_t *
device_domain(dev_info_t * rdip,dev_info_t ** ddipp,immu_flags_t immu_flags)1177 device_domain(dev_info_t *rdip, dev_info_t **ddipp, immu_flags_t immu_flags)
1178 {
1179 dev_info_t *ddip; /* topmost dip in domain i.e. domain owner */
1180 immu_t *immu;
1181 domain_t *domain;
1182 dvma_arg_t dvarg = {0};
1183 int level;
1184
1185 *ddipp = NULL;
1186
1187 /*
1188 * Check if the domain is already set. This is usually true
1189 * if this is not the first DVMA transaction.
1190 */
1191 ddip = NULL;
1192 domain = immu_devi_domain(rdip, &ddip);
1193 if (domain) {
1194 *ddipp = ddip;
1195 return (domain);
1196 }
1197
1198 immu = immu_dvma_get_immu(rdip, immu_flags);
1199 if (immu == NULL) {
1200 /*
1201 * possible that there is no IOMMU unit for this device
1202 * - BIOS bugs are one example.
1203 */
1204 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
1205 return (NULL);
1206 }
1207
1208 immu_flags |= immu_devi_get(rdip)->imd_dvma_flags;
1209
1210 dvarg.dva_rdip = rdip;
1211 dvarg.dva_ddip = NULL;
1212 dvarg.dva_domain = NULL;
1213 dvarg.dva_flags = immu_flags;
1214 level = 0;
1215 if (immu_walk_ancestor(rdip, NULL, get_branch_domain,
1216 &dvarg, &level, immu_flags) != DDI_SUCCESS) {
1217 /*
1218 * maybe low memory. return error,
1219 * so driver tries again later
1220 */
1221 return (NULL);
1222 }
1223
1224 /* should have walked at least 1 dip (i.e. edip) */
1225 ASSERT(level > 0);
1226
1227 ddip = dvarg.dva_ddip; /* must be present */
1228 domain = dvarg.dva_domain; /* may be NULL */
1229
1230 /*
1231 * We may find the domain during our ancestor walk on any one of our
1232 * ancestor dips, If the domain is found then the domain-dip
1233 * (i.e. ddip) will also be found in the same immu_devi struct.
1234 * The domain-dip is the highest ancestor dip which shares the
1235 * same domain with edip.
1236 * The domain may or may not be found, but the domain dip must
1237 * be found.
1238 */
1239 if (ddip == NULL) {
1240 ddi_err(DER_MODE, rdip, "Cannot find domain dip for device.");
1241 return (NULL);
1242 }
1243
1244 /*
1245 * Did we find a domain ?
1246 */
1247 if (domain) {
1248 goto found;
1249 }
1250
1251 /* nope, so allocate */
1252 domain = domain_create(immu, ddip, rdip, immu_flags);
1253 if (domain == NULL) {
1254 return (NULL);
1255 }
1256
1257 /*FALLTHROUGH*/
1258 found:
1259 /*
1260 * We know *domain *is* the right domain, so panic if
1261 * another domain is set for either the request-dip or
1262 * effective dip.
1263 */
1264 set_domain(ddip, ddip, domain);
1265 set_domain(rdip, ddip, domain);
1266
1267 *ddipp = ddip;
1268 return (domain);
1269 }
1270
1271 static void
create_unity_domain(immu_t * immu)1272 create_unity_domain(immu_t *immu)
1273 {
1274 domain_t *domain;
1275
1276 /* domain created during boot and always use sleep flag */
1277 domain = kmem_zalloc(sizeof (domain_t), KM_SLEEP);
1278
1279 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1280
1281 domain->dom_did = IMMU_UNITY_DID;
1282 domain->dom_maptype = IMMU_MAPTYPE_UNITY;
1283
1284 domain->dom_immu = immu;
1285 immu->immu_unity_domain = domain;
1286
1287 /*
1288 * Setup the domain's initial page table
1289 * should never fail.
1290 */
1291 domain->dom_pgtable_root = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1292 pgtable_zero(domain->dom_pgtable_root);
1293
1294 /*
1295 * Only map all physical memory in to the unity domain
1296 * if passthrough is not supported. If it is supported,
1297 * passthrough is set in the context entry instead.
1298 */
1299 if (!IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1300 map_unity_domain(domain);
1301
1302
1303 /*
1304 * put it on the system-wide UNITY domain list
1305 */
1306 mutex_enter(&(immu_domain_lock));
1307 list_insert_tail(&immu_unity_domain_list, domain);
1308 mutex_exit(&(immu_domain_lock));
1309 }
1310
1311 /*
1312 * ddip is the domain-dip - the topmost dip in a domain
1313 * rdip is the requesting-dip - the device which is
1314 * requesting DVMA setup
1315 * if domain is a non-shared domain rdip == ddip
1316 */
1317 static domain_t *
domain_create(immu_t * immu,dev_info_t * ddip,dev_info_t * rdip,immu_flags_t immu_flags)1318 domain_create(immu_t *immu, dev_info_t *ddip, dev_info_t *rdip,
1319 immu_flags_t immu_flags)
1320 {
1321 int kmflags;
1322 domain_t *domain;
1323 char mod_hash_name[128];
1324 immu_devi_t *immu_devi;
1325 int did;
1326 immu_dcookie_t dcookies[1] = {0};
1327 int dcount = 0;
1328
1329 immu_devi = immu_devi_get(rdip);
1330
1331 /*
1332 * First allocate a domainid.
1333 * This routine will never fail, since if we run out
1334 * of domains the unity domain will be allocated.
1335 */
1336 did = did_alloc(immu, rdip, ddip, immu_flags);
1337 if (did == IMMU_UNITY_DID) {
1338 /* domain overflow */
1339 ASSERT(immu->immu_unity_domain);
1340 return (immu->immu_unity_domain);
1341 }
1342
1343 kmflags = (immu_flags & IMMU_FLAGS_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
1344 domain = kmem_zalloc(sizeof (domain_t), kmflags);
1345 if (domain == NULL) {
1346 ddi_err(DER_PANIC, rdip, "Failed to alloc DVMA domain "
1347 "structure for device. IOMMU unit: %s", immu->immu_name);
1348 /*NOTREACHED*/
1349 }
1350
1351 rw_init(&(domain->dom_pgtable_rwlock), NULL, RW_DEFAULT, NULL);
1352
1353 (void) snprintf(mod_hash_name, sizeof (mod_hash_name),
1354 "immu%s-domain%d-pava-hash", immu->immu_name, did);
1355
1356 domain->dom_did = did;
1357 domain->dom_immu = immu;
1358 domain->dom_maptype = IMMU_MAPTYPE_XLATE;
1359 domain->dom_dip = ddip;
1360
1361 /*
1362 * Create xlate DVMA arena for this domain.
1363 */
1364 create_xlate_arena(immu, domain, rdip, immu_flags);
1365
1366 /*
1367 * Setup the domain's initial page table
1368 */
1369 domain->dom_pgtable_root = pgtable_alloc(immu, immu_flags);
1370 if (domain->dom_pgtable_root == NULL) {
1371 ddi_err(DER_PANIC, rdip, "Failed to alloc root "
1372 "pgtable for domain (%d). IOMMU unit: %s",
1373 domain->dom_did, immu->immu_name);
1374 /*NOTREACHED*/
1375 }
1376 pgtable_zero(domain->dom_pgtable_root);
1377
1378 /*
1379 * Since this is a immu unit-specific domain, put it on
1380 * the per-immu domain list.
1381 */
1382 mutex_enter(&(immu->immu_lock));
1383 list_insert_head(&immu->immu_domain_list, domain);
1384 mutex_exit(&(immu->immu_lock));
1385
1386 /*
1387 * Also put it on the system-wide xlate domain list
1388 */
1389 mutex_enter(&(immu_domain_lock));
1390 list_insert_head(&immu_xlate_domain_list, domain);
1391 mutex_exit(&(immu_domain_lock));
1392
1393 bdf_domain_insert(immu_devi, domain);
1394
1395 #ifdef BUGGY_DRIVERS
1396 /*
1397 * Map page0. Some broken HW/FW access it.
1398 */
1399 dcookies[0].dck_paddr = 0;
1400 dcookies[0].dck_npages = 1;
1401 dcount = 1;
1402 (void) dvma_map(domain, 0, 1, dcookies, dcount, NULL,
1403 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE | IMMU_FLAGS_PAGE1);
1404 #endif
1405 return (domain);
1406 }
1407
1408 /*
1409 * Create domainid arena.
1410 * Domainid 0 is reserved by Vt-d spec and cannot be used by
1411 * system software.
1412 * Domainid 1 is reserved by solaris and used for *all* of the following:
1413 * as the "uninitialized" domain - For devices not yet controlled
1414 * by Solaris
1415 * as the "unity" domain - For devices that will always belong
1416 * to the unity domain
1417 * as the "overflow" domain - Used for any new device after we
1418 * run out of domains
1419 * All of the above domains map into a single domain with
1420 * domainid 1 and UNITY DVMA mapping
1421 * Each IMMU unity has its own unity/uninit/overflow domain
1422 */
1423 static void
did_init(immu_t * immu)1424 did_init(immu_t *immu)
1425 {
1426 (void) snprintf(immu->immu_did_arena_name,
1427 sizeof (immu->immu_did_arena_name),
1428 "%s_domainid_arena", immu->immu_name);
1429
1430 ddi_err(DER_VERB, immu->immu_dip, "creating domainid arena %s",
1431 immu->immu_did_arena_name);
1432
1433 immu->immu_did_arena = vmem_create(
1434 immu->immu_did_arena_name,
1435 (void *)(uintptr_t)(IMMU_UNITY_DID + 1), /* start addr */
1436 immu->immu_max_domains - IMMU_UNITY_DID,
1437 1, /* quantum */
1438 NULL, /* afunc */
1439 NULL, /* ffunc */
1440 NULL, /* source */
1441 0, /* qcache_max */
1442 VM_SLEEP);
1443
1444 /* Even with SLEEP flag, vmem_create() can fail */
1445 if (immu->immu_did_arena == NULL) {
1446 ddi_err(DER_PANIC, NULL, "%s: Failed to create Intel "
1447 "IOMMU domainid allocator: %s", immu->immu_name,
1448 immu->immu_did_arena_name);
1449 }
1450 }
1451
1452 /* ######################### CONTEXT CODE ################################# */
1453
1454 static void
context_set(immu_t * immu,domain_t * domain,pgtable_t * root_table,int bus,int devfunc)1455 context_set(immu_t *immu, domain_t *domain, pgtable_t *root_table,
1456 int bus, int devfunc)
1457 {
1458 pgtable_t *context;
1459 pgtable_t *pgtable_root;
1460 hw_rce_t *hw_rent;
1461 hw_rce_t *hw_cent;
1462 hw_rce_t *ctxp;
1463 int sid;
1464 krw_t rwtype;
1465 boolean_t fill_root;
1466 boolean_t fill_ctx;
1467
1468 pgtable_root = domain->dom_pgtable_root;
1469
1470 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1471 context = *(pgtable_t **)(ctxp + bus);
1472 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr) + bus;
1473
1474 fill_root = B_FALSE;
1475 fill_ctx = B_FALSE;
1476
1477 /* Check the most common case first with reader lock */
1478 rw_enter(&(immu->immu_ctx_rwlock), RW_READER);
1479 rwtype = RW_READER;
1480 again:
1481 if (ROOT_GET_P(hw_rent)) {
1482 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1483 if (CONT_GET_AVAIL(hw_cent) == IMMU_CONT_INITED) {
1484 rw_exit(&(immu->immu_ctx_rwlock));
1485 return;
1486 } else {
1487 fill_ctx = B_TRUE;
1488 }
1489 } else {
1490 fill_root = B_TRUE;
1491 fill_ctx = B_TRUE;
1492 }
1493
1494 if (rwtype == RW_READER &&
1495 rw_tryupgrade(&(immu->immu_ctx_rwlock)) == 0) {
1496 rw_exit(&(immu->immu_ctx_rwlock));
1497 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1498 rwtype = RW_WRITER;
1499 goto again;
1500 }
1501 rwtype = RW_WRITER;
1502
1503 if (fill_root == B_TRUE) {
1504 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1505 ROOT_SET_P(hw_rent);
1506 immu_regs_cpu_flush(immu, (caddr_t)hw_rent, sizeof (hw_rce_t));
1507 }
1508
1509 if (fill_ctx == B_TRUE) {
1510 hw_cent = (hw_rce_t *)(context->hwpg_vaddr) + devfunc;
1511 /* need to disable context entry before reprogramming it */
1512 bzero(hw_cent, sizeof (hw_rce_t));
1513
1514 /* flush caches */
1515 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1516
1517 sid = ((bus << 8) | devfunc);
1518 immu_flush_context_fsi(immu, 0, sid, domain->dom_did,
1519 &immu->immu_ctx_inv_wait);
1520
1521 CONT_SET_AVAIL(hw_cent, IMMU_CONT_INITED);
1522 CONT_SET_DID(hw_cent, domain->dom_did);
1523 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1524 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1525 if (domain->dom_did == IMMU_UNITY_DID &&
1526 IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1527 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1528 else
1529 /*LINTED*/
1530 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1531 CONT_SET_P(hw_cent);
1532 if (IMMU_ECAP_GET_CH(immu->immu_regs_excap)) {
1533 CONT_SET_EH(hw_cent);
1534 if (immu_use_alh)
1535 CONT_SET_ALH(hw_cent);
1536 }
1537 immu_regs_cpu_flush(immu, (caddr_t)hw_cent, sizeof (hw_rce_t));
1538 }
1539 rw_exit(&(immu->immu_ctx_rwlock));
1540 }
1541
1542 static pgtable_t *
context_create(immu_t * immu)1543 context_create(immu_t *immu)
1544 {
1545 int bus;
1546 int devfunc;
1547 pgtable_t *root_table;
1548 pgtable_t *context;
1549 pgtable_t *pgtable_root;
1550 hw_rce_t *ctxp;
1551 hw_rce_t *hw_rent;
1552 hw_rce_t *hw_cent;
1553
1554 /* Allocate a zeroed root table (4K 256b entries) */
1555 root_table = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1556 pgtable_zero(root_table);
1557
1558 /*
1559 * Setup context tables for all possible root table entries.
1560 * Start out with unity domains for all entries.
1561 */
1562 ctxp = (hw_rce_t *)(root_table->swpg_next_array);
1563 hw_rent = (hw_rce_t *)(root_table->hwpg_vaddr);
1564 for (bus = 0; bus < IMMU_ROOT_NUM; bus++, ctxp++, hw_rent++) {
1565 context = pgtable_alloc(immu, IMMU_FLAGS_SLEEP);
1566 pgtable_zero(context);
1567 ROOT_SET_P(hw_rent);
1568 ROOT_SET_CONT(hw_rent, context->hwpg_paddr);
1569 hw_cent = (hw_rce_t *)(context->hwpg_vaddr);
1570 for (devfunc = 0; devfunc < IMMU_CONT_NUM;
1571 devfunc++, hw_cent++) {
1572 pgtable_root =
1573 immu->immu_unity_domain->dom_pgtable_root;
1574 CONT_SET_DID(hw_cent,
1575 immu->immu_unity_domain->dom_did);
1576 CONT_SET_AW(hw_cent, immu->immu_dvma_agaw);
1577 CONT_SET_ASR(hw_cent, pgtable_root->hwpg_paddr);
1578 if (IMMU_ECAP_GET_PT(immu->immu_regs_excap))
1579 CONT_SET_TTYPE(hw_cent, TTYPE_PASSTHRU);
1580 else
1581 /*LINTED*/
1582 CONT_SET_TTYPE(hw_cent, TTYPE_XLATE_ONLY);
1583 CONT_SET_AVAIL(hw_cent, IMMU_CONT_UNINITED);
1584 CONT_SET_P(hw_cent);
1585 }
1586 immu_regs_cpu_flush(immu, context->hwpg_vaddr, IMMU_PAGESIZE);
1587 *((pgtable_t **)ctxp) = context;
1588 }
1589
1590 return (root_table);
1591 }
1592
1593 /*
1594 * Called during rootnex attach, so no locks needed
1595 */
1596 static void
context_init(immu_t * immu)1597 context_init(immu_t *immu)
1598 {
1599 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
1600
1601 immu_init_inv_wait(&immu->immu_ctx_inv_wait, "ctxglobal", B_TRUE);
1602
1603 immu_regs_wbf_flush(immu);
1604
1605 immu->immu_ctx_root = context_create(immu);
1606
1607 immu_regs_set_root_table(immu);
1608
1609 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1610 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1611 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1612 rw_exit(&(immu->immu_ctx_rwlock));
1613 }
1614
1615
1616 /*
1617 * Find top pcib
1618 */
1619 static int
find_top_pcib(dev_info_t * dip,void * arg)1620 find_top_pcib(dev_info_t *dip, void *arg)
1621 {
1622 immu_devi_t *immu_devi;
1623 dev_info_t **pcibdipp = (dev_info_t **)arg;
1624
1625 immu_devi = immu_devi_get(dip);
1626
1627 if (immu_devi->imd_pcib_type == IMMU_PCIB_PCI_PCI) {
1628 *pcibdipp = dip;
1629 }
1630
1631 return (DDI_WALK_CONTINUE);
1632 }
1633
1634 static int
immu_context_update(immu_t * immu,domain_t * domain,dev_info_t * ddip,dev_info_t * rdip,immu_flags_t immu_flags)1635 immu_context_update(immu_t *immu, domain_t *domain, dev_info_t *ddip,
1636 dev_info_t *rdip, immu_flags_t immu_flags)
1637 {
1638 immu_devi_t *r_immu_devi;
1639 immu_devi_t *d_immu_devi;
1640 int r_bus;
1641 int d_bus;
1642 int r_devfunc;
1643 int d_devfunc;
1644 immu_pcib_t d_pcib_type;
1645 dev_info_t *pcibdip;
1646
1647 if (ddip == NULL || rdip == NULL ||
1648 ddip == root_devinfo || rdip == root_devinfo) {
1649 ddi_err(DER_MODE, rdip, "immu_contexts_update: domain-dip or "
1650 "request-dip are NULL or are root devinfo");
1651 return (DDI_FAILURE);
1652 }
1653
1654 /*
1655 * We need to set the context fields
1656 * based on what type of device rdip and ddip are.
1657 * To do that we need the immu_devi field.
1658 * Set the immu_devi field (if not already set)
1659 */
1660 if (immu_devi_set(ddip, immu_flags) == DDI_FAILURE) {
1661 ddi_err(DER_MODE, rdip,
1662 "immu_context_update: failed to set immu_devi for ddip");
1663 return (DDI_FAILURE);
1664 }
1665
1666 if (immu_devi_set(rdip, immu_flags) == DDI_FAILURE) {
1667 ddi_err(DER_MODE, rdip,
1668 "immu_context_update: failed to set immu_devi for rdip");
1669 return (DDI_FAILURE);
1670 }
1671
1672 d_immu_devi = immu_devi_get(ddip);
1673 r_immu_devi = immu_devi_get(rdip);
1674
1675 d_bus = d_immu_devi->imd_bus;
1676 d_devfunc = d_immu_devi->imd_devfunc;
1677 d_pcib_type = d_immu_devi->imd_pcib_type;
1678 r_bus = r_immu_devi->imd_bus;
1679 r_devfunc = r_immu_devi->imd_devfunc;
1680
1681 if (rdip == ddip) {
1682 /* rdip is a PCIE device. set context for it only */
1683 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1684 r_devfunc);
1685 #ifdef BUGGY_DRIVERS
1686 } else if (r_immu_devi == d_immu_devi) {
1687 #ifdef TEST
1688 ddi_err(DER_WARN, rdip, "Driver bug: Devices 0x%lx and "
1689 "0x%lx are identical", rdip, ddip);
1690 #endif
1691 /* rdip is a PCIE device. set context for it only */
1692 context_set(immu, domain, immu->immu_ctx_root, r_bus,
1693 r_devfunc);
1694 #endif
1695 } else if (d_pcib_type == IMMU_PCIB_PCIE_PCI) {
1696 /*
1697 * ddip is a PCIE_PCI bridge. Set context for ddip's
1698 * secondary bus. If rdip is on ddip's secondary
1699 * bus, set context for rdip. Else, set context
1700 * for rdip's PCI bridge on ddip's secondary bus.
1701 */
1702 context_set(immu, domain, immu->immu_ctx_root,
1703 d_immu_devi->imd_sec, 0);
1704 if (d_immu_devi->imd_sec == r_bus) {
1705 context_set(immu, domain, immu->immu_ctx_root,
1706 r_bus, r_devfunc);
1707 } else {
1708 pcibdip = NULL;
1709 if (immu_walk_ancestor(rdip, ddip, find_top_pcib,
1710 &pcibdip, NULL, immu_flags) == DDI_SUCCESS &&
1711 pcibdip != NULL) {
1712 r_immu_devi = immu_devi_get(pcibdip);
1713 r_bus = r_immu_devi->imd_bus;
1714 r_devfunc = r_immu_devi->imd_devfunc;
1715 context_set(immu, domain, immu->immu_ctx_root,
1716 r_bus, r_devfunc);
1717 } else {
1718 ddi_err(DER_PANIC, rdip, "Failed to find PCI "
1719 " bridge for PCI device");
1720 /*NOTREACHED*/
1721 }
1722 }
1723 } else if (d_pcib_type == IMMU_PCIB_PCI_PCI) {
1724 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1725 d_devfunc);
1726 } else if (d_pcib_type == IMMU_PCIB_ENDPOINT) {
1727 /*
1728 * ddip is a PCIE device which has a non-PCI device under it
1729 * i.e. it is a PCI-nonPCI bridge. Example: pciicde-ata
1730 */
1731 context_set(immu, domain, immu->immu_ctx_root, d_bus,
1732 d_devfunc);
1733 } else {
1734 ddi_err(DER_PANIC, rdip, "unknown device type. Cannot "
1735 "set iommu context.");
1736 /*NOTREACHED*/
1737 }
1738
1739 /* XXX do we need a membar_producer() here */
1740 return (DDI_SUCCESS);
1741 }
1742
1743 /* ##################### END CONTEXT CODE ################################## */
1744 /* ##################### MAPPING CODE ################################## */
1745
1746
1747 #ifdef DEBUG
1748 static boolean_t
PDTE_check(immu_t * immu,hw_pdte_t pdte,pgtable_t * next,paddr_t paddr,dev_info_t * rdip,immu_flags_t immu_flags)1749 PDTE_check(immu_t *immu, hw_pdte_t pdte, pgtable_t *next, paddr_t paddr,
1750 dev_info_t *rdip, immu_flags_t immu_flags)
1751 {
1752 /* The PDTE must be set i.e. present bit is set */
1753 if (!PDTE_P(pdte)) {
1754 ddi_err(DER_MODE, rdip, "No present flag");
1755 return (B_FALSE);
1756 }
1757
1758 /*
1759 * Just assert to check most significant system software field
1760 * (PDTE_SW4) as it is same as present bit and we
1761 * checked that above
1762 */
1763 ASSERT(PDTE_SW4(pdte));
1764
1765 /*
1766 * TM field should be clear if not reserved.
1767 * non-leaf is always reserved
1768 */
1769 if (next == NULL && immu->immu_TM_reserved == B_FALSE) {
1770 if (PDTE_TM(pdte)) {
1771 ddi_err(DER_MODE, rdip, "TM flag set");
1772 return (B_FALSE);
1773 }
1774 }
1775
1776 /*
1777 * The SW3 field is not used and must be clear
1778 */
1779 if (PDTE_SW3(pdte)) {
1780 ddi_err(DER_MODE, rdip, "SW3 set");
1781 return (B_FALSE);
1782 }
1783
1784 /*
1785 * PFN (for PTE) or next level pgtable-paddr (for PDE) must be set
1786 */
1787 if (next == NULL) {
1788 ASSERT(paddr % IMMU_PAGESIZE == 0);
1789 if (PDTE_PADDR(pdte) != paddr) {
1790 ddi_err(DER_MODE, rdip,
1791 "PTE paddr mismatch: %lx != %lx",
1792 PDTE_PADDR(pdte), paddr);
1793 return (B_FALSE);
1794 }
1795 } else {
1796 if (PDTE_PADDR(pdte) != next->hwpg_paddr) {
1797 ddi_err(DER_MODE, rdip,
1798 "PDE paddr mismatch: %lx != %lx",
1799 PDTE_PADDR(pdte), next->hwpg_paddr);
1800 return (B_FALSE);
1801 }
1802 }
1803
1804 /*
1805 * SNP field should be clear if not reserved.
1806 * non-leaf is always reserved
1807 */
1808 if (next == NULL && immu->immu_SNP_reserved == B_FALSE) {
1809 if (PDTE_SNP(pdte)) {
1810 ddi_err(DER_MODE, rdip, "SNP set");
1811 return (B_FALSE);
1812 }
1813 }
1814
1815 /* second field available for system software should be clear */
1816 if (PDTE_SW2(pdte)) {
1817 ddi_err(DER_MODE, rdip, "SW2 set");
1818 return (B_FALSE);
1819 }
1820
1821 /* Super pages field should be clear */
1822 if (PDTE_SP(pdte)) {
1823 ddi_err(DER_MODE, rdip, "SP set");
1824 return (B_FALSE);
1825 }
1826
1827 /*
1828 * least significant field available for
1829 * system software should be clear
1830 */
1831 if (PDTE_SW1(pdte)) {
1832 ddi_err(DER_MODE, rdip, "SW1 set");
1833 return (B_FALSE);
1834 }
1835
1836 if ((immu_flags & IMMU_FLAGS_READ) && !PDTE_READ(pdte)) {
1837 ddi_err(DER_MODE, rdip, "READ not set");
1838 return (B_FALSE);
1839 }
1840
1841 if ((immu_flags & IMMU_FLAGS_WRITE) && !PDTE_WRITE(pdte)) {
1842 ddi_err(DER_MODE, rdip, "WRITE not set");
1843 return (B_FALSE);
1844 }
1845
1846 return (B_TRUE);
1847 }
1848 #endif
1849
1850 /*ARGSUSED*/
1851 static void
PTE_clear_all(immu_t * immu,domain_t * domain,xlate_t * xlate,uint64_t * dvma_ptr,uint64_t * npages_ptr,dev_info_t * rdip)1852 PTE_clear_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
1853 uint64_t *dvma_ptr, uint64_t *npages_ptr, dev_info_t *rdip)
1854 {
1855 uint64_t npages;
1856 uint64_t dvma;
1857 pgtable_t *pgtable;
1858 hw_pdte_t *hwp;
1859 hw_pdte_t *shwp;
1860 int idx;
1861
1862 pgtable = xlate->xlt_pgtable;
1863 idx = xlate->xlt_idx;
1864
1865 dvma = *dvma_ptr;
1866 npages = *npages_ptr;
1867
1868 /*
1869 * since a caller gets a unique dvma for a physical address,
1870 * no other concurrent thread will be writing to the same
1871 * PTE even if it has the same paddr. So no locks needed.
1872 */
1873 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
1874
1875 hwp = shwp;
1876 for (; npages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
1877 PDTE_CLEAR_P(*hwp);
1878 dvma += IMMU_PAGESIZE;
1879 npages--;
1880 }
1881
1882 *dvma_ptr = dvma;
1883 *npages_ptr = npages;
1884
1885 xlate->xlt_idx = idx;
1886 }
1887
1888 static void
xlate_setup(uint64_t dvma,xlate_t * xlate,int nlevels)1889 xlate_setup(uint64_t dvma, xlate_t *xlate, int nlevels)
1890 {
1891 int level;
1892 uint64_t offbits;
1893
1894 /*
1895 * Skip the first 12 bits which is the offset into
1896 * 4K PFN (phys page frame based on IMMU_PAGESIZE)
1897 */
1898 offbits = dvma >> IMMU_PAGESHIFT;
1899
1900 /* skip to level 1 i.e. leaf PTE */
1901 for (level = 1, xlate++; level <= nlevels; level++, xlate++) {
1902 xlate->xlt_level = level;
1903 xlate->xlt_idx = (offbits & IMMU_PGTABLE_LEVEL_MASK);
1904 ASSERT(xlate->xlt_idx <= IMMU_PGTABLE_MAXIDX);
1905 xlate->xlt_pgtable = NULL;
1906 offbits >>= IMMU_PGTABLE_LEVEL_STRIDE;
1907 }
1908 }
1909
1910 /*
1911 * Read the pgtables
1912 */
1913 static boolean_t
PDE_lookup(domain_t * domain,xlate_t * xlate,int nlevels)1914 PDE_lookup(domain_t *domain, xlate_t *xlate, int nlevels)
1915 {
1916 pgtable_t *pgtable;
1917 pgtable_t *next;
1918 uint_t idx;
1919
1920 /* start with highest level pgtable i.e. root */
1921 xlate += nlevels;
1922
1923 if (xlate->xlt_pgtable == NULL) {
1924 xlate->xlt_pgtable = domain->dom_pgtable_root;
1925 }
1926
1927 for (; xlate->xlt_level > 1; xlate--) {
1928 idx = xlate->xlt_idx;
1929 pgtable = xlate->xlt_pgtable;
1930
1931 if ((xlate - 1)->xlt_pgtable) {
1932 continue;
1933 }
1934
1935 /* Lock the pgtable in read mode */
1936 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
1937
1938 /*
1939 * since we are unmapping, the pgtable should
1940 * already point to a leafier pgtable.
1941 */
1942 next = *(pgtable->swpg_next_array + idx);
1943 (xlate - 1)->xlt_pgtable = next;
1944 rw_exit(&(pgtable->swpg_rwlock));
1945 if (next == NULL)
1946 return (B_FALSE);
1947 }
1948
1949 return (B_TRUE);
1950 }
1951
1952 static void
immu_fault_walk(void * arg,void * base,size_t len)1953 immu_fault_walk(void *arg, void *base, size_t len)
1954 {
1955 uint64_t dvma, start;
1956
1957 dvma = *(uint64_t *)arg;
1958 start = (uint64_t)(uintptr_t)base;
1959
1960 if (dvma >= start && dvma < (start + len)) {
1961 ddi_err(DER_WARN, NULL,
1962 "faulting DVMA address is in vmem arena "
1963 "(%" PRIx64 "-%" PRIx64 ")",
1964 start, start + len);
1965 *(uint64_t *)arg = ~0ULL;
1966 }
1967 }
1968
1969 void
immu_print_fault_info(uint_t sid,uint64_t dvma)1970 immu_print_fault_info(uint_t sid, uint64_t dvma)
1971 {
1972 int nlevels;
1973 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
1974 xlate_t *xlatep;
1975 hw_pdte_t pte;
1976 domain_t *domain;
1977 immu_t *immu;
1978 uint64_t dvma_arg;
1979
1980 if (mod_hash_find(bdf_domain_hash,
1981 (void *)(uintptr_t)sid, (void *)&domain) != 0) {
1982 ddi_err(DER_WARN, NULL,
1983 "no domain for faulting SID %08x", sid);
1984 return;
1985 }
1986
1987 immu = domain->dom_immu;
1988
1989 dvma_arg = dvma;
1990 vmem_walk(domain->dom_dvma_arena, VMEM_ALLOC, immu_fault_walk,
1991 (void *)&dvma_arg);
1992 if (dvma_arg != ~0ULL)
1993 ddi_err(DER_WARN, domain->dom_dip,
1994 "faulting DVMA address is not in vmem arena");
1995
1996 nlevels = immu->immu_dvma_nlevels;
1997 xlate_setup(dvma, xlate, nlevels);
1998
1999 if (!PDE_lookup(domain, xlate, nlevels)) {
2000 ddi_err(DER_WARN, domain->dom_dip,
2001 "pte not found in domid %d for faulting addr %" PRIx64,
2002 domain->dom_did, dvma);
2003 return;
2004 }
2005
2006 xlatep = &xlate[1];
2007 pte = *((hw_pdte_t *)
2008 (xlatep->xlt_pgtable->hwpg_vaddr) + xlatep->xlt_idx);
2009
2010 ddi_err(DER_WARN, domain->dom_dip,
2011 "domid %d pte: %" PRIx64 "(paddr %" PRIx64 ")", domain->dom_did,
2012 (unsigned long long)pte, (unsigned long long)PDTE_PADDR(pte));
2013 }
2014
2015 /*ARGSUSED*/
2016 static void
PTE_set_one(immu_t * immu,hw_pdte_t * hwp,paddr_t paddr,dev_info_t * rdip,immu_flags_t immu_flags)2017 PTE_set_one(immu_t *immu, hw_pdte_t *hwp, paddr_t paddr,
2018 dev_info_t *rdip, immu_flags_t immu_flags)
2019 {
2020 hw_pdte_t pte;
2021
2022 #ifndef DEBUG
2023 pte = immu->immu_ptemask;
2024 PDTE_SET_PADDR(pte, paddr);
2025 #else
2026 pte = *hwp;
2027
2028 if (PDTE_P(pte)) {
2029 if (PDTE_PADDR(pte) != paddr) {
2030 ddi_err(DER_MODE, rdip, "PTE paddr %lx != paddr %lx",
2031 PDTE_PADDR(pte), paddr);
2032 }
2033 #ifdef BUGGY_DRIVERS
2034 return;
2035 #else
2036 goto out;
2037 #endif
2038 }
2039
2040 /* clear TM field if not reserved */
2041 if (immu->immu_TM_reserved == B_FALSE) {
2042 PDTE_CLEAR_TM(pte);
2043 }
2044
2045 /* Clear 3rd field for system software - not used */
2046 PDTE_CLEAR_SW3(pte);
2047
2048 /* Set paddr */
2049 ASSERT(paddr % IMMU_PAGESIZE == 0);
2050 PDTE_CLEAR_PADDR(pte);
2051 PDTE_SET_PADDR(pte, paddr);
2052
2053 /* clear SNP field if not reserved. */
2054 if (immu->immu_SNP_reserved == B_FALSE) {
2055 PDTE_CLEAR_SNP(pte);
2056 }
2057
2058 /* Clear SW2 field available for software */
2059 PDTE_CLEAR_SW2(pte);
2060
2061
2062 /* SP is don't care for PTEs. Clear it for cleanliness */
2063 PDTE_CLEAR_SP(pte);
2064
2065 /* Clear SW1 field available for software */
2066 PDTE_CLEAR_SW1(pte);
2067
2068 /*
2069 * Now that we are done writing the PTE
2070 * set the "present" flag. Note this present
2071 * flag is a bit in the PDE/PTE that the
2072 * spec says is available for system software.
2073 * This is an implementation detail of Solaris
2074 * bare-metal Intel IOMMU.
2075 * The present field in a PDE/PTE is not defined
2076 * by the Vt-d spec
2077 */
2078
2079 PDTE_SET_P(pte);
2080
2081 pte |= immu->immu_ptemask;
2082
2083 out:
2084 #endif /* DEBUG */
2085 #ifdef BUGGY_DRIVERS
2086 PDTE_SET_READ(pte);
2087 PDTE_SET_WRITE(pte);
2088 #else
2089 if (immu_flags & IMMU_FLAGS_READ)
2090 PDTE_SET_READ(pte);
2091 if (immu_flags & IMMU_FLAGS_WRITE)
2092 PDTE_SET_WRITE(pte);
2093 #endif /* BUGGY_DRIVERS */
2094
2095 *hwp = pte;
2096 }
2097
2098 /*ARGSUSED*/
2099 static void
PTE_set_all(immu_t * immu,domain_t * domain,xlate_t * xlate,uint64_t * dvma_ptr,uint64_t * nvpages_ptr,immu_dcookie_t * dcookies,int dcount,dev_info_t * rdip,immu_flags_t immu_flags)2100 PTE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate,
2101 uint64_t *dvma_ptr, uint64_t *nvpages_ptr, immu_dcookie_t *dcookies,
2102 int dcount, dev_info_t *rdip, immu_flags_t immu_flags)
2103 {
2104 paddr_t paddr;
2105 uint64_t nvpages;
2106 uint64_t nppages;
2107 uint64_t dvma;
2108 pgtable_t *pgtable;
2109 hw_pdte_t *hwp;
2110 hw_pdte_t *shwp;
2111 int idx, nset;
2112 int j;
2113
2114 pgtable = xlate->xlt_pgtable;
2115 idx = xlate->xlt_idx;
2116
2117 dvma = *dvma_ptr;
2118 nvpages = *nvpages_ptr;
2119
2120 /*
2121 * since a caller gets a unique dvma for a physical address,
2122 * no other concurrent thread will be writing to the same
2123 * PTE even if it has the same paddr. So no locks needed.
2124 */
2125 shwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2126
2127 hwp = shwp;
2128 for (j = dcount - 1; j >= 0; j--) {
2129 if (nvpages <= dcookies[j].dck_npages)
2130 break;
2131 nvpages -= dcookies[j].dck_npages;
2132 }
2133
2134 nppages = nvpages;
2135 paddr = dcookies[j].dck_paddr +
2136 (dcookies[j].dck_npages - nppages) * IMMU_PAGESIZE;
2137
2138 nvpages = *nvpages_ptr;
2139 nset = 0;
2140 for (; nvpages > 0 && idx <= IMMU_PGTABLE_MAXIDX; idx++, hwp++) {
2141 PTE_set_one(immu, hwp, paddr, rdip, immu_flags);
2142 nset++;
2143
2144 ASSERT(PDTE_check(immu, *hwp, NULL, paddr, rdip, immu_flags)
2145 == B_TRUE);
2146 nppages--;
2147 nvpages--;
2148 paddr += IMMU_PAGESIZE;
2149 dvma += IMMU_PAGESIZE;
2150
2151 if (nppages == 0) {
2152 j++;
2153 }
2154
2155 if (j == dcount)
2156 break;
2157
2158 if (nppages == 0) {
2159 nppages = dcookies[j].dck_npages;
2160 paddr = dcookies[j].dck_paddr;
2161 }
2162 }
2163
2164 if (nvpages) {
2165 *dvma_ptr = dvma;
2166 *nvpages_ptr = nvpages;
2167 } else {
2168 *dvma_ptr = 0;
2169 *nvpages_ptr = 0;
2170 }
2171
2172 xlate->xlt_idx = idx;
2173 }
2174
2175 /*ARGSUSED*/
2176 static void
PDE_set_one(immu_t * immu,hw_pdte_t * hwp,pgtable_t * next,dev_info_t * rdip,immu_flags_t immu_flags)2177 PDE_set_one(immu_t *immu, hw_pdte_t *hwp, pgtable_t *next,
2178 dev_info_t *rdip, immu_flags_t immu_flags)
2179 {
2180 hw_pdte_t pde;
2181
2182 pde = *hwp;
2183
2184 /* if PDE is already set, make sure it is correct */
2185 if (PDTE_P(pde)) {
2186 ASSERT(PDTE_PADDR(pde) == next->hwpg_paddr);
2187 #ifdef BUGGY_DRIVERS
2188 return;
2189 #else
2190 goto out;
2191 #endif
2192 }
2193
2194 /* Dont touch SW4, it is the present bit */
2195
2196 /* don't touch TM field it is reserved for PDEs */
2197
2198 /* 3rd field available for system software is not used */
2199 PDTE_CLEAR_SW3(pde);
2200
2201 /* Set next level pgtable-paddr for PDE */
2202 PDTE_CLEAR_PADDR(pde);
2203 PDTE_SET_PADDR(pde, next->hwpg_paddr);
2204
2205 /* don't touch SNP field it is reserved for PDEs */
2206
2207 /* Clear second field available for system software */
2208 PDTE_CLEAR_SW2(pde);
2209
2210 /* No super pages for PDEs */
2211 PDTE_CLEAR_SP(pde);
2212
2213 /* Clear SW1 for software */
2214 PDTE_CLEAR_SW1(pde);
2215
2216 /*
2217 * Now that we are done writing the PDE
2218 * set the "present" flag. Note this present
2219 * flag is a bit in the PDE/PTE that the
2220 * spec says is available for system software.
2221 * This is an implementation detail of Solaris
2222 * base-metal Intel IOMMU.
2223 * The present field in a PDE/PTE is not defined
2224 * by the Vt-d spec
2225 */
2226
2227 out:
2228 #ifdef BUGGY_DRIVERS
2229 PDTE_SET_READ(pde);
2230 PDTE_SET_WRITE(pde);
2231 #else
2232 if (immu_flags & IMMU_FLAGS_READ)
2233 PDTE_SET_READ(pde);
2234 if (immu_flags & IMMU_FLAGS_WRITE)
2235 PDTE_SET_WRITE(pde);
2236 #endif
2237
2238 PDTE_SET_P(pde);
2239
2240 *hwp = pde;
2241 }
2242
2243 /*
2244 * Used to set PDEs
2245 */
2246 static boolean_t
PDE_set_all(immu_t * immu,domain_t * domain,xlate_t * xlate,int nlevels,dev_info_t * rdip,immu_flags_t immu_flags)2247 PDE_set_all(immu_t *immu, domain_t *domain, xlate_t *xlate, int nlevels,
2248 dev_info_t *rdip, immu_flags_t immu_flags)
2249 {
2250 pgtable_t *pgtable;
2251 pgtable_t *new;
2252 pgtable_t *next;
2253 hw_pdte_t *hwp;
2254 int level;
2255 uint_t idx;
2256 krw_t rwtype;
2257 boolean_t set = B_FALSE;
2258
2259 /* start with highest level pgtable i.e. root */
2260 xlate += nlevels;
2261
2262 new = NULL;
2263 xlate->xlt_pgtable = domain->dom_pgtable_root;
2264 for (level = nlevels; level > 1; level--, xlate--) {
2265 idx = xlate->xlt_idx;
2266 pgtable = xlate->xlt_pgtable;
2267
2268 /* Lock the pgtable in READ mode first */
2269 rw_enter(&(pgtable->swpg_rwlock), RW_READER);
2270 rwtype = RW_READER;
2271 again:
2272 hwp = (hw_pdte_t *)(pgtable->hwpg_vaddr) + idx;
2273 next = (pgtable->swpg_next_array)[idx];
2274
2275 /*
2276 * check if leafier level already has a pgtable
2277 * if yes, verify
2278 */
2279 if (next == NULL) {
2280 if (new == NULL) {
2281
2282 IMMU_DPROBE2(immu__pdp__alloc, dev_info_t *,
2283 rdip, int, level);
2284
2285 new = pgtable_alloc(immu, immu_flags);
2286 if (new == NULL) {
2287 ddi_err(DER_PANIC, rdip,
2288 "pgtable alloc err");
2289 }
2290 pgtable_zero(new);
2291 }
2292
2293 /* Change to a write lock */
2294 if (rwtype == RW_READER &&
2295 rw_tryupgrade(&(pgtable->swpg_rwlock)) == 0) {
2296 rw_exit(&(pgtable->swpg_rwlock));
2297 rw_enter(&(pgtable->swpg_rwlock), RW_WRITER);
2298 rwtype = RW_WRITER;
2299 goto again;
2300 }
2301 rwtype = RW_WRITER;
2302 next = new;
2303 (pgtable->swpg_next_array)[idx] = next;
2304 new = NULL;
2305 PDE_set_one(immu, hwp, next, rdip, immu_flags);
2306 set = B_TRUE;
2307 rw_downgrade(&(pgtable->swpg_rwlock));
2308 rwtype = RW_READER;
2309 }
2310 #ifndef BUGGY_DRIVERS
2311 else {
2312 hw_pdte_t pde = *hwp;
2313
2314 /*
2315 * If buggy driver we already set permission
2316 * READ+WRITE so nothing to do for that case
2317 * XXX Check that read writer perms change before
2318 * actually setting perms. Also need to hold lock
2319 */
2320 if (immu_flags & IMMU_FLAGS_READ)
2321 PDTE_SET_READ(pde);
2322 if (immu_flags & IMMU_FLAGS_WRITE)
2323 PDTE_SET_WRITE(pde);
2324
2325 *hwp = pde;
2326 }
2327 #endif
2328
2329 ASSERT(PDTE_check(immu, *hwp, next, 0, rdip, immu_flags)
2330 == B_TRUE);
2331
2332 (xlate - 1)->xlt_pgtable = next;
2333 rw_exit(&(pgtable->swpg_rwlock));
2334 }
2335
2336 if (new) {
2337 pgtable_free(immu, new);
2338 }
2339
2340 return (set);
2341 }
2342
2343 /*
2344 * dvma_map()
2345 * map a contiguous range of DVMA pages
2346 *
2347 * immu: IOMMU unit for which we are generating DVMA cookies
2348 * domain: domain
2349 * sdvma: Starting dvma
2350 * spaddr: Starting paddr
2351 * npages: Number of pages
2352 * rdip: requesting device
2353 * immu_flags: flags
2354 */
2355 static boolean_t
dvma_map(domain_t * domain,uint64_t sdvma,uint64_t snvpages,immu_dcookie_t * dcookies,int dcount,dev_info_t * rdip,immu_flags_t immu_flags)2356 dvma_map(domain_t *domain, uint64_t sdvma, uint64_t snvpages,
2357 immu_dcookie_t *dcookies, int dcount, dev_info_t *rdip,
2358 immu_flags_t immu_flags)
2359 {
2360 uint64_t dvma;
2361 uint64_t n;
2362 immu_t *immu = domain->dom_immu;
2363 int nlevels = immu->immu_dvma_nlevels;
2364 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2365 boolean_t pde_set = B_FALSE;
2366
2367 n = snvpages;
2368 dvma = sdvma;
2369
2370 while (n > 0) {
2371 xlate_setup(dvma, xlate, nlevels);
2372
2373 /* Lookup or allocate PGDIRs and PGTABLEs if necessary */
2374 if (PDE_set_all(immu, domain, xlate, nlevels, rdip, immu_flags)
2375 == B_TRUE) {
2376 pde_set = B_TRUE;
2377 }
2378
2379 /* set all matching ptes that fit into this leaf pgtable */
2380 PTE_set_all(immu, domain, &xlate[1], &dvma, &n, dcookies,
2381 dcount, rdip, immu_flags);
2382 }
2383
2384 return (pde_set);
2385 }
2386
2387 /*
2388 * dvma_unmap()
2389 * unmap a range of DVMAs
2390 *
2391 * immu: IOMMU unit state
2392 * domain: domain for requesting device
2393 * ddip: domain-dip
2394 * dvma: starting DVMA
2395 * npages: Number of IMMU pages to be unmapped
2396 * rdip: requesting device
2397 */
2398 static void
dvma_unmap(domain_t * domain,uint64_t sdvma,uint64_t snpages,dev_info_t * rdip)2399 dvma_unmap(domain_t *domain, uint64_t sdvma, uint64_t snpages,
2400 dev_info_t *rdip)
2401 {
2402 immu_t *immu = domain->dom_immu;
2403 int nlevels = immu->immu_dvma_nlevels;
2404 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0};
2405 uint64_t n;
2406 uint64_t dvma;
2407
2408 dvma = sdvma;
2409 n = snpages;
2410
2411 while (n > 0) {
2412 /* setup the xlate array */
2413 xlate_setup(dvma, xlate, nlevels);
2414
2415 /* just lookup existing pgtables. Should never fail */
2416 if (!PDE_lookup(domain, xlate, nlevels))
2417 ddi_err(DER_PANIC, rdip,
2418 "PTE not found for addr %" PRIx64,
2419 (unsigned long long)dvma);
2420
2421 /* clear all matching ptes that fit into this leaf pgtable */
2422 PTE_clear_all(immu, domain, &xlate[1], &dvma, &n, rdip);
2423 }
2424
2425 /* No need to flush IOTLB after unmap */
2426 }
2427
2428 static uint64_t
dvma_alloc(domain_t * domain,ddi_dma_attr_t * dma_attr,uint_t npages,int kmf)2429 dvma_alloc(domain_t *domain, ddi_dma_attr_t *dma_attr, uint_t npages, int kmf)
2430 {
2431 uint64_t dvma;
2432 size_t xsize, align;
2433 uint64_t minaddr, maxaddr;
2434
2435 /* parameters */
2436 xsize = npages * IMMU_PAGESIZE;
2437 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2438 minaddr = dma_attr->dma_attr_addr_lo;
2439 maxaddr = dma_attr->dma_attr_addr_hi + 1;
2440
2441 /* handle the rollover cases */
2442 if (maxaddr < dma_attr->dma_attr_addr_hi) {
2443 maxaddr = dma_attr->dma_attr_addr_hi;
2444 }
2445
2446 /*
2447 * allocate from vmem arena.
2448 */
2449 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2450 xsize, align, 0, 0, (void *)(uintptr_t)minaddr,
2451 (void *)(uintptr_t)maxaddr, kmf);
2452
2453 return (dvma);
2454 }
2455
2456 static void
dvma_prealloc(dev_info_t * rdip,immu_hdl_priv_t * ihp,ddi_dma_attr_t * dma_attr)2457 dvma_prealloc(dev_info_t *rdip, immu_hdl_priv_t *ihp, ddi_dma_attr_t *dma_attr)
2458 {
2459 int nlevels;
2460 xlate_t xlate[IMMU_PGTABLE_MAX_LEVELS + 1] = {0}, *xlp;
2461 uint64_t dvma, n;
2462 size_t xsize, align;
2463 uint64_t minaddr, maxaddr, dmamax;
2464 int on, npte, pindex;
2465 hw_pdte_t *shwp;
2466 immu_t *immu;
2467 domain_t *domain;
2468
2469 /* parameters */
2470 domain = IMMU_DEVI(rdip)->imd_domain;
2471 immu = domain->dom_immu;
2472 nlevels = immu->immu_dvma_nlevels;
2473 xsize = IMMU_NPREPTES * IMMU_PAGESIZE;
2474 align = MAX((size_t)(dma_attr->dma_attr_align), IMMU_PAGESIZE);
2475 minaddr = dma_attr->dma_attr_addr_lo;
2476 if (dma_attr->dma_attr_flags & _DDI_DMA_BOUNCE_ON_SEG)
2477 dmamax = dma_attr->dma_attr_seg;
2478 else
2479 dmamax = dma_attr->dma_attr_addr_hi;
2480 maxaddr = dmamax + 1;
2481
2482 if (maxaddr < dmamax)
2483 maxaddr = dmamax;
2484
2485 dvma = (uint64_t)(uintptr_t)vmem_xalloc(domain->dom_dvma_arena,
2486 xsize, align, 0, dma_attr->dma_attr_seg + 1,
2487 (void *)(uintptr_t)minaddr, (void *)(uintptr_t)maxaddr, VM_NOSLEEP);
2488
2489 ihp->ihp_predvma = dvma;
2490 ihp->ihp_npremapped = 0;
2491 if (dvma == 0)
2492 return;
2493
2494 n = IMMU_NPREPTES;
2495 pindex = 0;
2496
2497 /*
2498 * Set up a mapping at address 0, just so that all PDPs get allocated
2499 * now. Although this initial mapping should never be used,
2500 * explicitly set it to read-only, just to be safe.
2501 */
2502 while (n > 0) {
2503 xlate_setup(dvma, xlate, nlevels);
2504
2505 (void) PDE_set_all(immu, domain, xlate, nlevels, rdip,
2506 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2507
2508 xlp = &xlate[1];
2509 shwp = (hw_pdte_t *)(xlp->xlt_pgtable->hwpg_vaddr)
2510 + xlp->xlt_idx;
2511 on = n;
2512
2513 PTE_set_all(immu, domain, xlp, &dvma, &n, &immu_precookie,
2514 1, rdip, IMMU_FLAGS_READ);
2515
2516 npte = on - n;
2517
2518 while (npte > 0) {
2519 ihp->ihp_preptes[pindex++] = shwp;
2520 #ifdef BUGGY_DRIVERS
2521 PDTE_CLEAR_WRITE(*shwp);
2522 #endif
2523 shwp++;
2524 npte--;
2525 }
2526 }
2527 }
2528
2529 static void
dvma_prefree(dev_info_t * rdip,immu_hdl_priv_t * ihp)2530 dvma_prefree(dev_info_t *rdip, immu_hdl_priv_t *ihp)
2531 {
2532 domain_t *domain;
2533
2534 domain = IMMU_DEVI(rdip)->imd_domain;
2535
2536 if (ihp->ihp_predvma != 0) {
2537 dvma_unmap(domain, ihp->ihp_predvma, IMMU_NPREPTES, rdip);
2538 vmem_free(domain->dom_dvma_arena,
2539 (void *)(uintptr_t)ihp->ihp_predvma,
2540 IMMU_NPREPTES * IMMU_PAGESIZE);
2541 }
2542 }
2543
2544 static void
dvma_free(domain_t * domain,uint64_t dvma,uint64_t npages)2545 dvma_free(domain_t *domain, uint64_t dvma, uint64_t npages)
2546 {
2547 uint64_t size = npages * IMMU_PAGESIZE;
2548
2549 if (domain->dom_maptype != IMMU_MAPTYPE_XLATE)
2550 return;
2551
2552 vmem_free(domain->dom_dvma_arena, (void *)(uintptr_t)dvma, size);
2553 }
2554
2555 static int
immu_map_dvmaseg(dev_info_t * rdip,ddi_dma_handle_t handle,immu_hdl_priv_t * ihp,struct ddi_dma_req * dmareq,ddi_dma_obj_t * dma_out)2556 immu_map_dvmaseg(dev_info_t *rdip, ddi_dma_handle_t handle,
2557 immu_hdl_priv_t *ihp, struct ddi_dma_req *dmareq,
2558 ddi_dma_obj_t *dma_out)
2559 {
2560 domain_t *domain;
2561 immu_t *immu;
2562 immu_flags_t immu_flags;
2563 ddi_dma_atyp_t buftype;
2564 ddi_dma_obj_t *dmar_object;
2565 ddi_dma_attr_t *attrp;
2566 uint64_t offset, paddr, dvma, sdvma, rwmask;
2567 size_t npages, npgalloc;
2568 uint_t psize, size, pcnt, dmax;
2569 page_t **pparray;
2570 caddr_t vaddr;
2571 page_t *page;
2572 struct as *vas;
2573 immu_dcookie_t *dcookies;
2574 int pde_set;
2575
2576 domain = IMMU_DEVI(rdip)->imd_domain;
2577 immu = domain->dom_immu;
2578 immu_flags = dma_to_immu_flags(dmareq);
2579
2580 attrp = &((ddi_dma_impl_t *)handle)->dmai_attr;
2581
2582 dmar_object = &dmareq->dmar_object;
2583 pparray = dmar_object->dmao_obj.virt_obj.v_priv;
2584 vaddr = dmar_object->dmao_obj.virt_obj.v_addr;
2585 buftype = dmar_object->dmao_type;
2586 size = dmar_object->dmao_size;
2587
2588 IMMU_DPROBE3(immu__map__dvma, dev_info_t *, rdip, ddi_dma_atyp_t,
2589 buftype, uint_t, size);
2590
2591 dcookies = &ihp->ihp_dcookies[0];
2592
2593 pcnt = dmax = 0;
2594
2595 /* retrieve paddr, psize, offset from dmareq */
2596 if (buftype == DMA_OTYP_PAGES) {
2597 page = dmar_object->dmao_obj.pp_obj.pp_pp;
2598 offset = dmar_object->dmao_obj.pp_obj.pp_offset &
2599 MMU_PAGEOFFSET;
2600 paddr = pfn_to_pa(page->p_pagenum) + offset;
2601 psize = MIN((MMU_PAGESIZE - offset), size);
2602 page = page->p_next;
2603 vas = dmar_object->dmao_obj.virt_obj.v_as;
2604 } else {
2605 if (vas == NULL) {
2606 vas = &kas;
2607 }
2608 offset = (uintptr_t)vaddr & MMU_PAGEOFFSET;
2609 if (pparray != NULL) {
2610 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum) + offset;
2611 psize = MIN((MMU_PAGESIZE - offset), size);
2612 pcnt++;
2613 } else {
2614 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat,
2615 vaddr)) + offset;
2616 psize = MIN(size, (MMU_PAGESIZE - offset));
2617 vaddr += psize;
2618 }
2619 }
2620
2621 npgalloc = IMMU_BTOPR(size + offset);
2622
2623 if (npgalloc <= IMMU_NPREPTES && ihp->ihp_predvma != 0) {
2624 #ifdef BUGGY_DRIVERS
2625 rwmask = PDTE_MASK_R | PDTE_MASK_W | immu->immu_ptemask;
2626 #else
2627 rwmask = immu->immu_ptemask;
2628 if (immu_flags & IMMU_FLAGS_READ)
2629 rwmask |= PDTE_MASK_R;
2630 if (immu_flags & IMMU_FLAGS_WRITE)
2631 rwmask |= PDTE_MASK_W;
2632 #endif
2633 #ifdef DEBUG
2634 rwmask |= PDTE_MASK_P;
2635 #endif
2636 sdvma = ihp->ihp_predvma;
2637 ihp->ihp_npremapped = npgalloc;
2638 *ihp->ihp_preptes[0] =
2639 PDTE_PADDR(paddr & ~MMU_PAGEOFFSET) | rwmask;
2640 } else {
2641 ihp->ihp_npremapped = 0;
2642 sdvma = dvma_alloc(domain, attrp, npgalloc,
2643 dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP);
2644 if (sdvma == 0)
2645 return (DDI_DMA_NORESOURCES);
2646
2647 dcookies[0].dck_paddr = (paddr & ~MMU_PAGEOFFSET);
2648 dcookies[0].dck_npages = 1;
2649 }
2650
2651 IMMU_DPROBE3(immu__dvma__alloc, dev_info_t *, rdip, uint64_t, npgalloc,
2652 uint64_t, sdvma);
2653
2654 dvma = sdvma;
2655 pde_set = 0;
2656 npages = 1;
2657 size -= psize;
2658 while (size > 0) {
2659 /* get the size for this page (i.e. partial or full page) */
2660 psize = MIN(size, MMU_PAGESIZE);
2661 if (buftype == DMA_OTYP_PAGES) {
2662 /* get the paddr from the page_t */
2663 paddr = pfn_to_pa(page->p_pagenum);
2664 page = page->p_next;
2665 } else if (pparray != NULL) {
2666 /* index into the array of page_t's to get the paddr */
2667 paddr = pfn_to_pa(pparray[pcnt]->p_pagenum);
2668 pcnt++;
2669 } else {
2670 /* call into the VM to get the paddr */
2671 paddr = pfn_to_pa(hat_getpfnum(vas->a_hat, vaddr));
2672 vaddr += psize;
2673 }
2674
2675 npages++;
2676
2677 if (ihp->ihp_npremapped > 0) {
2678 *ihp->ihp_preptes[npages - 1] =
2679 PDTE_PADDR(paddr) | rwmask;
2680 } else if (IMMU_CONTIG_PADDR(dcookies[dmax], paddr)) {
2681 dcookies[dmax].dck_npages++;
2682 } else {
2683 /* No, we need a new dcookie */
2684 if (dmax == (IMMU_NDCK - 1)) {
2685 /*
2686 * Ran out of dcookies. Map them now.
2687 */
2688 if (dvma_map(domain, dvma,
2689 npages, dcookies, dmax + 1, rdip,
2690 immu_flags))
2691 pde_set++;
2692
2693 IMMU_DPROBE4(immu__dvmamap__early,
2694 dev_info_t *, rdip, uint64_t, dvma,
2695 uint_t, npages, uint_t, dmax+1);
2696
2697 dvma += (npages << IMMU_PAGESHIFT);
2698 npages = 0;
2699 dmax = 0;
2700 } else
2701 dmax++;
2702 dcookies[dmax].dck_paddr = paddr;
2703 dcookies[dmax].dck_npages = 1;
2704 }
2705 size -= psize;
2706 }
2707
2708 /*
2709 * Finish up, mapping all, or all of the remaining,
2710 * physical memory ranges.
2711 */
2712 if (ihp->ihp_npremapped == 0 && npages > 0) {
2713 IMMU_DPROBE4(immu__dvmamap__late, dev_info_t *, rdip, \
2714 uint64_t, dvma, uint_t, npages, uint_t, dmax+1);
2715
2716 if (dvma_map(domain, dvma, npages, dcookies,
2717 dmax + 1, rdip, immu_flags))
2718 pde_set++;
2719 }
2720
2721 /* Invalidate the IOTLB */
2722 immu_flush_iotlb_psi(immu, domain->dom_did, sdvma, npgalloc,
2723 pde_set > 0 ? TLB_IVA_WHOLE : TLB_IVA_LEAF,
2724 &ihp->ihp_inv_wait);
2725
2726 ihp->ihp_ndvseg = 1;
2727 ihp->ihp_dvseg[0].dvs_start = sdvma;
2728 ihp->ihp_dvseg[0].dvs_len = dmar_object->dmao_size;
2729
2730 dma_out->dmao_size = dmar_object->dmao_size;
2731 dma_out->dmao_obj.dvma_obj.dv_off = offset & IMMU_PAGEOFFSET;
2732 dma_out->dmao_obj.dvma_obj.dv_nseg = 1;
2733 dma_out->dmao_obj.dvma_obj.dv_seg = &ihp->ihp_dvseg[0];
2734 dma_out->dmao_type = DMA_OTYP_DVADDR;
2735
2736 return (DDI_DMA_MAPPED);
2737 }
2738
2739 static int
immu_unmap_dvmaseg(dev_info_t * rdip,ddi_dma_obj_t * dmao)2740 immu_unmap_dvmaseg(dev_info_t *rdip, ddi_dma_obj_t *dmao)
2741 {
2742 uint64_t dvma, npages;
2743 domain_t *domain;
2744 struct dvmaseg *dvs;
2745
2746 domain = IMMU_DEVI(rdip)->imd_domain;
2747 dvs = dmao->dmao_obj.dvma_obj.dv_seg;
2748
2749 dvma = dvs[0].dvs_start;
2750 npages = IMMU_BTOPR(dvs[0].dvs_len + dmao->dmao_obj.dvma_obj.dv_off);
2751
2752 #ifdef DEBUG
2753 /* Unmap only in DEBUG mode */
2754 dvma_unmap(domain, dvma, npages, rdip);
2755 #endif
2756 dvma_free(domain, dvma, npages);
2757
2758 IMMU_DPROBE3(immu__dvma__free, dev_info_t *, rdip, uint_t, npages,
2759 uint64_t, dvma);
2760
2761 #ifdef DEBUG
2762 /*
2763 * In the DEBUG case, the unmap was actually done,
2764 * but an IOTLB flush was not done. So, an explicit
2765 * write back flush is needed.
2766 */
2767 immu_regs_wbf_flush(domain->dom_immu);
2768 #endif
2769
2770 return (DDI_SUCCESS);
2771 }
2772
2773 /* ############################# Functions exported ######################## */
2774
2775 /*
2776 * setup the DVMA subsystem
2777 * this code runs only for the first IOMMU unit
2778 */
2779 void
immu_dvma_setup(list_t * listp)2780 immu_dvma_setup(list_t *listp)
2781 {
2782 immu_t *immu;
2783 uint_t kval;
2784 size_t nchains;
2785
2786 /* locks */
2787 mutex_init(&immu_domain_lock, NULL, MUTEX_DEFAULT, NULL);
2788
2789 /* Create lists */
2790 list_create(&immu_unity_domain_list, sizeof (domain_t),
2791 offsetof(domain_t, dom_maptype_node));
2792 list_create(&immu_xlate_domain_list, sizeof (domain_t),
2793 offsetof(domain_t, dom_maptype_node));
2794
2795 /* Setup BDF domain hash */
2796 nchains = 0xff;
2797 kval = mod_hash_iddata_gen(nchains);
2798
2799 bdf_domain_hash = mod_hash_create_extended("BDF-DOMAIN_HASH",
2800 nchains, mod_hash_null_keydtor, mod_hash_null_valdtor,
2801 mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp,
2802 KM_NOSLEEP);
2803
2804 immu = list_head(listp);
2805 for (; immu; immu = list_next(listp, immu)) {
2806 create_unity_domain(immu);
2807 did_init(immu);
2808 context_init(immu);
2809 immu->immu_dvma_setup = B_TRUE;
2810 }
2811 }
2812
2813 /*
2814 * Startup up one DVMA unit
2815 */
2816 void
immu_dvma_startup(immu_t * immu)2817 immu_dvma_startup(immu_t *immu)
2818 {
2819 if (immu_gfxdvma_enable == B_FALSE &&
2820 immu->immu_dvma_gfx_only == B_TRUE) {
2821 return;
2822 }
2823
2824 /*
2825 * DVMA will start once IOMMU is "running"
2826 */
2827 immu->immu_dvma_running = B_TRUE;
2828 }
2829
2830 /*
2831 * immu_dvma_physmem_update()
2832 * called when the installed memory on a
2833 * system increases, to expand domain DVMA
2834 * for domains with UNITY mapping
2835 */
2836 void
immu_dvma_physmem_update(uint64_t addr,uint64_t size)2837 immu_dvma_physmem_update(uint64_t addr, uint64_t size)
2838 {
2839 uint64_t start;
2840 uint64_t npages;
2841 int dcount;
2842 immu_dcookie_t dcookies[1] = {0};
2843 domain_t *domain;
2844
2845 /*
2846 * Just walk the system-wide list of domains with
2847 * UNITY mapping. Both the list of *all* domains
2848 * and *UNITY* domains is protected by the same
2849 * single lock
2850 */
2851 mutex_enter(&immu_domain_lock);
2852 domain = list_head(&immu_unity_domain_list);
2853 for (; domain; domain = list_next(&immu_unity_domain_list, domain)) {
2854 /*
2855 * Nothing to do if the IOMMU supports passthrough.
2856 */
2857 if (IMMU_ECAP_GET_PT(domain->dom_immu->immu_regs_excap))
2858 continue;
2859
2860 /* There is no vmem_arena for unity domains. Just map it */
2861 ddi_err(DER_LOG, domain->dom_dip,
2862 "iommu: unity-domain: Adding map "
2863 "[0x%" PRIx64 " - 0x%" PRIx64 "]", addr, addr + size);
2864
2865 start = IMMU_ROUNDOWN(addr);
2866 npages = (IMMU_ROUNDUP(size) / IMMU_PAGESIZE) + 1;
2867
2868 dcookies[0].dck_paddr = start;
2869 dcookies[0].dck_npages = npages;
2870 dcount = 1;
2871 (void) dvma_map(domain, start, npages,
2872 dcookies, dcount, NULL, IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2873
2874 }
2875 mutex_exit(&immu_domain_lock);
2876 }
2877
2878 int
immu_dvma_device_setup(dev_info_t * rdip,immu_flags_t immu_flags)2879 immu_dvma_device_setup(dev_info_t *rdip, immu_flags_t immu_flags)
2880 {
2881 dev_info_t *ddip, *odip;
2882 immu_t *immu;
2883 domain_t *domain;
2884
2885 odip = rdip;
2886
2887 immu = immu_dvma_get_immu(rdip, immu_flags);
2888 if (immu == NULL) {
2889 /*
2890 * possible that there is no IOMMU unit for this device
2891 * - BIOS bugs are one example.
2892 */
2893 ddi_err(DER_WARN, rdip, "No iommu unit found for device");
2894 return (DDI_DMA_NORESOURCES);
2895 }
2896
2897 /*
2898 * redirect isa devices attached under lpc to lpc dip
2899 */
2900 if (strcmp(ddi_node_name(ddi_get_parent(rdip)), "isa") == 0) {
2901 rdip = get_lpc_devinfo(immu, rdip, immu_flags);
2902 if (rdip == NULL) {
2903 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2904 /*NOTREACHED*/
2905 }
2906 }
2907
2908 /* Reset immu, as redirection can change IMMU */
2909 immu = NULL;
2910
2911 /*
2912 * for gart, redirect to the real graphic devinfo
2913 */
2914 if (strcmp(ddi_node_name(rdip), "agpgart") == 0) {
2915 rdip = get_gfx_devinfo(rdip);
2916 if (rdip == NULL) {
2917 ddi_err(DER_PANIC, rdip, "iommu redirect failed");
2918 /*NOTREACHED*/
2919 }
2920 }
2921
2922 /*
2923 * Setup DVMA domain for the device. This does
2924 * work only the first time we do DVMA for a
2925 * device.
2926 */
2927 ddip = NULL;
2928 domain = device_domain(rdip, &ddip, immu_flags);
2929 if (domain == NULL) {
2930 ddi_err(DER_MODE, rdip, "Intel IOMMU setup failed for device");
2931 return (DDI_DMA_NORESOURCES);
2932 }
2933
2934 immu = domain->dom_immu;
2935
2936 /*
2937 * If a domain is found, we must also have a domain dip
2938 * which is the topmost ancestor dip of rdip that shares
2939 * the same domain with rdip.
2940 */
2941 if (domain->dom_did == 0 || ddip == NULL) {
2942 ddi_err(DER_MODE, rdip, "domain did 0(%d) or ddip NULL(%p)",
2943 domain->dom_did, ddip);
2944 return (DDI_DMA_NORESOURCES);
2945 }
2946
2947 if (odip != rdip)
2948 set_domain(odip, ddip, domain);
2949
2950 /*
2951 * Update the root and context entries
2952 */
2953 if (immu_context_update(immu, domain, ddip, rdip, immu_flags)
2954 != DDI_SUCCESS) {
2955 ddi_err(DER_MODE, rdip, "DVMA map: context update failed");
2956 return (DDI_DMA_NORESOURCES);
2957 }
2958
2959 return (DDI_SUCCESS);
2960 }
2961
2962 int
immu_map_memrange(dev_info_t * rdip,memrng_t * mrng)2963 immu_map_memrange(dev_info_t *rdip, memrng_t *mrng)
2964 {
2965 immu_dcookie_t dcookies[1] = {0};
2966 boolean_t pde_set;
2967 immu_t *immu;
2968 domain_t *domain;
2969 immu_inv_wait_t iw;
2970
2971 dcookies[0].dck_paddr = mrng->mrng_start;
2972 dcookies[0].dck_npages = mrng->mrng_npages;
2973
2974 domain = IMMU_DEVI(rdip)->imd_domain;
2975 immu = domain->dom_immu;
2976
2977 pde_set = dvma_map(domain, mrng->mrng_start,
2978 mrng->mrng_npages, dcookies, 1, rdip,
2979 IMMU_FLAGS_READ | IMMU_FLAGS_WRITE);
2980
2981 immu_init_inv_wait(&iw, "memrange", B_TRUE);
2982
2983 immu_flush_iotlb_psi(immu, domain->dom_did, mrng->mrng_start,
2984 mrng->mrng_npages, pde_set == B_TRUE ?
2985 TLB_IVA_WHOLE : TLB_IVA_LEAF, &iw);
2986
2987 return (DDI_SUCCESS);
2988 }
2989
2990 immu_devi_t *
immu_devi_get(dev_info_t * rdip)2991 immu_devi_get(dev_info_t *rdip)
2992 {
2993 immu_devi_t *immu_devi;
2994 volatile uintptr_t *vptr = (uintptr_t *)&(DEVI(rdip)->devi_iommu);
2995
2996 /* Just want atomic reads. No need for lock */
2997 immu_devi = (immu_devi_t *)(uintptr_t)atomic_or_64_nv((uint64_t *)vptr,
2998 0);
2999 return (immu_devi);
3000 }
3001
3002 /*ARGSUSED*/
3003 int
immu_hdl_priv_ctor(void * buf,void * arg,int kmf)3004 immu_hdl_priv_ctor(void *buf, void *arg, int kmf)
3005 {
3006 immu_hdl_priv_t *ihp;
3007
3008 ihp = buf;
3009 immu_init_inv_wait(&ihp->ihp_inv_wait, "dmahandle", B_FALSE);
3010
3011 return (0);
3012 }
3013
3014 /*
3015 * iommulib interface functions
3016 */
3017 static int
immu_probe(iommulib_handle_t handle,dev_info_t * dip)3018 immu_probe(iommulib_handle_t handle, dev_info_t *dip)
3019 {
3020 immu_devi_t *immu_devi;
3021 int ret;
3022
3023 if (!immu_enable)
3024 return (DDI_FAILURE);
3025
3026 /*
3027 * Make sure the device has all the IOMMU structures
3028 * initialized. If this device goes through an IOMMU
3029 * unit (e.g. this probe function returns success),
3030 * this will be called at most N times, with N being
3031 * the number of IOMMUs in the system.
3032 *
3033 * After that, when iommulib_nex_open succeeds,
3034 * we can always assume that this device has all
3035 * the structures initialized. IOMMU_USED(dip) will
3036 * be true. There is no need to find the controlling
3037 * IOMMU/domain again.
3038 */
3039 ret = immu_dvma_device_setup(dip, IMMU_FLAGS_NOSLEEP);
3040 if (ret != DDI_SUCCESS)
3041 return (ret);
3042
3043 immu_devi = IMMU_DEVI(dip);
3044
3045 /*
3046 * For unity domains, there is no need to call in to
3047 * the IOMMU code.
3048 */
3049 if (immu_devi->imd_domain->dom_did == IMMU_UNITY_DID)
3050 return (DDI_FAILURE);
3051
3052 if (immu_devi->imd_immu->immu_dip == iommulib_iommu_getdip(handle))
3053 return (DDI_SUCCESS);
3054
3055 return (DDI_FAILURE);
3056 }
3057
3058 /*ARGSUSED*/
3059 static int
immu_allochdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_attr_t * attr,int (* waitfp)(caddr_t),caddr_t arg,ddi_dma_handle_t * dma_handlep)3060 immu_allochdl(iommulib_handle_t handle,
3061 dev_info_t *dip, dev_info_t *rdip, ddi_dma_attr_t *attr,
3062 int (*waitfp)(caddr_t), caddr_t arg, ddi_dma_handle_t *dma_handlep)
3063 {
3064 int ret;
3065 immu_hdl_priv_t *ihp;
3066 immu_t *immu;
3067
3068 ret = iommulib_iommu_dma_allochdl(dip, rdip, attr, waitfp,
3069 arg, dma_handlep);
3070 if (ret == DDI_SUCCESS) {
3071 immu = IMMU_DEVI(rdip)->imd_immu;
3072
3073 ihp = kmem_cache_alloc(immu->immu_hdl_cache,
3074 waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
3075 if (ihp == NULL) {
3076 (void) iommulib_iommu_dma_freehdl(dip, rdip,
3077 *dma_handlep);
3078 return (DDI_DMA_NORESOURCES);
3079 }
3080
3081 if (IMMU_DEVI(rdip)->imd_use_premap)
3082 dvma_prealloc(rdip, ihp, attr);
3083 else {
3084 ihp->ihp_npremapped = 0;
3085 ihp->ihp_predvma = 0;
3086 }
3087 ret = iommulib_iommu_dmahdl_setprivate(dip, rdip, *dma_handlep,
3088 ihp);
3089 }
3090 return (ret);
3091 }
3092
3093 /*ARGSUSED*/
3094 static int
immu_freehdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle)3095 immu_freehdl(iommulib_handle_t handle,
3096 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3097 {
3098 immu_hdl_priv_t *ihp;
3099
3100 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3101 if (ihp != NULL) {
3102 if (IMMU_DEVI(rdip)->imd_use_premap)
3103 dvma_prefree(rdip, ihp);
3104 kmem_cache_free(IMMU_DEVI(rdip)->imd_immu->immu_hdl_cache, ihp);
3105 }
3106
3107 return (iommulib_iommu_dma_freehdl(dip, rdip, dma_handle));
3108 }
3109
3110
3111 /*ARGSUSED*/
3112 static int
immu_bindhdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,struct ddi_dma_req * dma_req,ddi_dma_cookie_t * cookiep,uint_t * ccountp)3113 immu_bindhdl(iommulib_handle_t handle, dev_info_t *dip,
3114 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3115 struct ddi_dma_req *dma_req, ddi_dma_cookie_t *cookiep,
3116 uint_t *ccountp)
3117 {
3118 int ret;
3119 immu_hdl_priv_t *ihp;
3120
3121 ret = iommulib_iommu_dma_bindhdl(dip, rdip, dma_handle,
3122 dma_req, cookiep, ccountp);
3123
3124 if (ret == DDI_DMA_MAPPED) {
3125 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3126 immu_flush_wait(IMMU_DEVI(rdip)->imd_immu, &ihp->ihp_inv_wait);
3127 }
3128
3129 return (ret);
3130 }
3131
3132 /*ARGSUSED*/
3133 static int
immu_unbindhdl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle)3134 immu_unbindhdl(iommulib_handle_t handle,
3135 dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t dma_handle)
3136 {
3137 return (iommulib_iommu_dma_unbindhdl(dip, rdip, dma_handle));
3138 }
3139
3140 /*ARGSUSED*/
3141 static int
immu_sync(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,off_t off,size_t len,uint_t cachefl)3142 immu_sync(iommulib_handle_t handle, dev_info_t *dip,
3143 dev_info_t *rdip, ddi_dma_handle_t dma_handle, off_t off,
3144 size_t len, uint_t cachefl)
3145 {
3146 return (iommulib_iommu_dma_sync(dip, rdip, dma_handle, off, len,
3147 cachefl));
3148 }
3149
3150 /*ARGSUSED*/
3151 static int
immu_win(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,uint_t win,off_t * offp,size_t * lenp,ddi_dma_cookie_t * cookiep,uint_t * ccountp)3152 immu_win(iommulib_handle_t handle, dev_info_t *dip,
3153 dev_info_t *rdip, ddi_dma_handle_t dma_handle, uint_t win,
3154 off_t *offp, size_t *lenp, ddi_dma_cookie_t *cookiep,
3155 uint_t *ccountp)
3156 {
3157 return (iommulib_iommu_dma_win(dip, rdip, dma_handle, win, offp,
3158 lenp, cookiep, ccountp));
3159 }
3160
3161 /*ARGSUSED*/
3162 static int
immu_mapobject(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,struct ddi_dma_req * dmareq,ddi_dma_obj_t * dmao)3163 immu_mapobject(iommulib_handle_t handle, dev_info_t *dip,
3164 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3165 struct ddi_dma_req *dmareq, ddi_dma_obj_t *dmao)
3166 {
3167 immu_hdl_priv_t *ihp;
3168
3169 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3170
3171 return (immu_map_dvmaseg(rdip, dma_handle, ihp, dmareq, dmao));
3172 }
3173
3174 /*ARGSUSED*/
3175 static int
immu_unmapobject(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,ddi_dma_obj_t * dmao)3176 immu_unmapobject(iommulib_handle_t handle, dev_info_t *dip,
3177 dev_info_t *rdip, ddi_dma_handle_t dma_handle, ddi_dma_obj_t *dmao)
3178 {
3179 immu_hdl_priv_t *ihp;
3180
3181 ihp = iommulib_iommu_dmahdl_getprivate(dip, rdip, dma_handle);
3182 if (ihp->ihp_npremapped > 0)
3183 return (DDI_SUCCESS);
3184 return (immu_unmap_dvmaseg(rdip, dmao));
3185 }
3186
3187 /*ARGSUSED*/
3188 static int
immu_map(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,struct ddi_dma_req * dmareq,ddi_dma_handle_t * dma_handle)3189 immu_map(iommulib_handle_t handle, dev_info_t *dip,
3190 dev_info_t *rdip, struct ddi_dma_req *dmareq,
3191 ddi_dma_handle_t *dma_handle)
3192 {
3193 ASSERT(0);
3194 return (DDI_FAILURE);
3195 }
3196
3197 /*ARGSUSED*/
3198 static int
immu_mctl(iommulib_handle_t handle,dev_info_t * dip,dev_info_t * rdip,ddi_dma_handle_t dma_handle,enum ddi_dma_ctlops request,off_t * offp,size_t * lenp,caddr_t * objpp,uint_t cachefl)3199 immu_mctl(iommulib_handle_t handle, dev_info_t *dip,
3200 dev_info_t *rdip, ddi_dma_handle_t dma_handle,
3201 enum ddi_dma_ctlops request, off_t *offp, size_t *lenp,
3202 caddr_t *objpp, uint_t cachefl)
3203 {
3204 ASSERT(0);
3205 return (DDI_FAILURE);
3206 }
3207