1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 /** 36 * DOC: Global GTT views 37 * 38 * Background and previous state 39 * 40 * Historically objects could exists (be bound) in global GTT space only as 41 * singular instances with a view representing all of the object's backing pages 42 * in a linear fashion. This view will be called a normal view. 43 * 44 * To support multiple views of the same object, where the number of mapped 45 * pages is not equal to the backing store, or where the layout of the pages 46 * is not linear, concept of a GGTT view was added. 47 * 48 * One example of an alternative view is a stereo display driven by a single 49 * image. In this case we would have a framebuffer looking like this 50 * (2x2 pages): 51 * 52 * 12 53 * 34 54 * 55 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 56 * rendering. In contrast, fed to the display engine would be an alternative 57 * view which could look something like this: 58 * 59 * 1212 60 * 3434 61 * 62 * In this example both the size and layout of pages in the alternative view is 63 * different from the normal view. 64 * 65 * Implementation and usage 66 * 67 * GGTT views are implemented using VMAs and are distinguished via enum 68 * i915_ggtt_view_type and struct i915_ggtt_view. 69 * 70 * A new flavour of core GEM functions which work with GGTT bound objects were 71 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 72 * renaming in large amounts of code. They take the struct i915_ggtt_view 73 * parameter encapsulating all metadata required to implement a view. 74 * 75 * As a helper for callers which are only interested in the normal view, 76 * globally const i915_ggtt_view_normal singleton instance exists. All old core 77 * GEM API functions, the ones not taking the view parameter, are operating on, 78 * or with the normal GGTT view. 79 * 80 * Code wanting to add or use a new GGTT view needs to: 81 * 82 * 1. Add a new enum with a suitable name. 83 * 2. Extend the metadata in the i915_ggtt_view structure if required. 84 * 3. Add support to i915_get_vma_pages(). 85 * 86 * New views are required to build a scatter-gather table from within the 87 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 88 * exists for the lifetime of an VMA. 89 * 90 * Core API is designed to have copy semantics which means that passed in 91 * struct i915_ggtt_view does not need to be persistent (left around after 92 * calling the core API functions). 93 * 94 */ 95 96 static inline struct i915_ggtt * 97 i915_vm_to_ggtt(struct i915_address_space *vm) 98 { 99 GEM_BUG_ON(!i915_is_ggtt(vm)); 100 return container_of(vm, struct i915_ggtt, base); 101 } 102 103 static int 104 i915_get_ggtt_vma_pages(struct i915_vma *vma); 105 106 const struct i915_ggtt_view i915_ggtt_view_normal = { 107 .type = I915_GGTT_VIEW_NORMAL, 108 }; 109 const struct i915_ggtt_view i915_ggtt_view_rotated = { 110 .type = I915_GGTT_VIEW_ROTATED, 111 }; 112 113 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 114 int enable_ppgtt) 115 { 116 bool has_aliasing_ppgtt; 117 bool has_full_ppgtt; 118 bool has_full_48bit_ppgtt; 119 120 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 121 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 122 has_full_48bit_ppgtt = 123 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 124 125 if (intel_vgpu_active(dev_priv)) 126 has_full_ppgtt = false; /* emulation is too hard */ 127 128 if (!has_aliasing_ppgtt) 129 return 0; 130 131 /* 132 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 133 * execlists, the sole mechanism available to submit work. 134 */ 135 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 136 return 0; 137 138 if (enable_ppgtt == 1) 139 return 1; 140 141 if (enable_ppgtt == 2 && has_full_ppgtt) 142 return 2; 143 144 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 145 return 3; 146 147 #ifdef CONFIG_INTEL_IOMMU 148 /* Disable ppgtt on SNB if VT-d is on. */ 149 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 150 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 151 return 0; 152 } 153 #endif 154 155 /* Early VLV doesn't have this */ 156 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 157 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 158 return 0; 159 } 160 161 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) 162 return has_full_48bit_ppgtt ? 3 : 2; 163 else 164 return has_aliasing_ppgtt ? 1 : 0; 165 } 166 167 static int ppgtt_bind_vma(struct i915_vma *vma, 168 enum i915_cache_level cache_level, 169 u32 unused) 170 { 171 u32 pte_flags = 0; 172 173 /* Currently applicable only to VLV */ 174 if (vma->obj->gt_ro) 175 pte_flags |= PTE_READ_ONLY; 176 177 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 178 cache_level, pte_flags); 179 180 return 0; 181 } 182 183 static void ppgtt_unbind_vma(struct i915_vma *vma) 184 { 185 vma->vm->clear_range(vma->vm, 186 vma->node.start, 187 vma->size, 188 true); 189 } 190 191 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 192 enum i915_cache_level level, 193 bool valid) 194 { 195 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 196 pte |= addr; 197 198 switch (level) { 199 case I915_CACHE_NONE: 200 pte |= PPAT_UNCACHED_INDEX; 201 break; 202 case I915_CACHE_WT: 203 pte |= PPAT_DISPLAY_ELLC_INDEX; 204 break; 205 default: 206 pte |= PPAT_CACHED_INDEX; 207 break; 208 } 209 210 return pte; 211 } 212 213 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 214 const enum i915_cache_level level) 215 { 216 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 217 pde |= addr; 218 if (level != I915_CACHE_NONE) 219 pde |= PPAT_CACHED_PDE_INDEX; 220 else 221 pde |= PPAT_UNCACHED_INDEX; 222 return pde; 223 } 224 225 #define gen8_pdpe_encode gen8_pde_encode 226 #define gen8_pml4e_encode gen8_pde_encode 227 228 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 229 enum i915_cache_level level, 230 bool valid, u32 unused) 231 { 232 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 233 pte |= GEN6_PTE_ADDR_ENCODE(addr); 234 235 switch (level) { 236 case I915_CACHE_L3_LLC: 237 case I915_CACHE_LLC: 238 pte |= GEN6_PTE_CACHE_LLC; 239 break; 240 case I915_CACHE_NONE: 241 pte |= GEN6_PTE_UNCACHED; 242 break; 243 default: 244 MISSING_CASE(level); 245 } 246 247 return pte; 248 } 249 250 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 251 enum i915_cache_level level, 252 bool valid, u32 unused) 253 { 254 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 255 pte |= GEN6_PTE_ADDR_ENCODE(addr); 256 257 switch (level) { 258 case I915_CACHE_L3_LLC: 259 pte |= GEN7_PTE_CACHE_L3_LLC; 260 break; 261 case I915_CACHE_LLC: 262 pte |= GEN6_PTE_CACHE_LLC; 263 break; 264 case I915_CACHE_NONE: 265 pte |= GEN6_PTE_UNCACHED; 266 break; 267 default: 268 MISSING_CASE(level); 269 } 270 271 return pte; 272 } 273 274 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 275 enum i915_cache_level level, 276 bool valid, u32 flags) 277 { 278 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 279 pte |= GEN6_PTE_ADDR_ENCODE(addr); 280 281 if (!(flags & PTE_READ_ONLY)) 282 pte |= BYT_PTE_WRITEABLE; 283 284 if (level != I915_CACHE_NONE) 285 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 286 287 return pte; 288 } 289 290 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 291 enum i915_cache_level level, 292 bool valid, u32 unused) 293 { 294 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 295 pte |= HSW_PTE_ADDR_ENCODE(addr); 296 297 if (level != I915_CACHE_NONE) 298 pte |= HSW_WB_LLC_AGE3; 299 300 return pte; 301 } 302 303 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 304 enum i915_cache_level level, 305 bool valid, u32 unused) 306 { 307 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 308 pte |= HSW_PTE_ADDR_ENCODE(addr); 309 310 switch (level) { 311 case I915_CACHE_NONE: 312 break; 313 case I915_CACHE_WT: 314 pte |= HSW_WT_ELLC_LLC_AGE3; 315 break; 316 default: 317 pte |= HSW_WB_ELLC_LLC_AGE3; 318 break; 319 } 320 321 return pte; 322 } 323 324 static int __setup_page_dma(struct drm_device *dev, 325 struct i915_page_dma *p, gfp_t flags) 326 { 327 struct device *device = &dev->pdev->dev; 328 329 p->page = alloc_page(flags); 330 if (!p->page) 331 return -ENOMEM; 332 333 p->daddr = dma_map_page(device, 334 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 335 336 if (dma_mapping_error(device, p->daddr)) { 337 __free_page(p->page); 338 return -EINVAL; 339 } 340 341 return 0; 342 } 343 344 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 345 { 346 return __setup_page_dma(dev, p, GFP_KERNEL); 347 } 348 349 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 350 { 351 if (WARN_ON(!p->page)) 352 return; 353 354 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 355 __free_page(p->page); 356 memset(p, 0, sizeof(*p)); 357 } 358 359 static void *kmap_page_dma(struct i915_page_dma *p) 360 { 361 return kmap_atomic(p->page); 362 } 363 364 /* We use the flushing unmap only with ppgtt structures: 365 * page directories, page tables and scratch pages. 366 */ 367 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 368 { 369 /* There are only few exceptions for gen >=6. chv and bxt. 370 * And we are not sure about the latter so play safe for now. 371 */ 372 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 373 drm_clflush_virt_range(vaddr, PAGE_SIZE); 374 375 kunmap_atomic(vaddr); 376 } 377 378 #define kmap_px(px) kmap_page_dma(px_base(px)) 379 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 380 381 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 382 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 383 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 384 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 385 386 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 387 const uint64_t val) 388 { 389 int i; 390 uint64_t * const vaddr = kmap_page_dma(p); 391 392 for (i = 0; i < 512; i++) 393 vaddr[i] = val; 394 395 kunmap_page_dma(dev, vaddr); 396 } 397 398 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 399 const uint32_t val32) 400 { 401 uint64_t v = val32; 402 403 v = v << 32 | val32; 404 405 fill_page_dma(dev, p, v); 406 } 407 408 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) 409 { 410 struct i915_page_scratch *sp; 411 int ret; 412 413 sp = kzalloc(sizeof(*sp), GFP_KERNEL); 414 if (sp == NULL) 415 return ERR_PTR(-ENOMEM); 416 417 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); 418 if (ret) { 419 kfree(sp); 420 return ERR_PTR(ret); 421 } 422 423 set_pages_uc(px_page(sp), 1); 424 425 return sp; 426 } 427 428 static void free_scratch_page(struct drm_device *dev, 429 struct i915_page_scratch *sp) 430 { 431 set_pages_wb(px_page(sp), 1); 432 433 cleanup_px(dev, sp); 434 kfree(sp); 435 } 436 437 static struct i915_page_table *alloc_pt(struct drm_device *dev) 438 { 439 struct i915_page_table *pt; 440 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 441 GEN8_PTES : GEN6_PTES; 442 int ret = -ENOMEM; 443 444 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 445 if (!pt) 446 return ERR_PTR(-ENOMEM); 447 448 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 449 GFP_KERNEL); 450 451 if (!pt->used_ptes) 452 goto fail_bitmap; 453 454 ret = setup_px(dev, pt); 455 if (ret) 456 goto fail_page_m; 457 458 return pt; 459 460 fail_page_m: 461 kfree(pt->used_ptes); 462 fail_bitmap: 463 kfree(pt); 464 465 return ERR_PTR(ret); 466 } 467 468 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 469 { 470 cleanup_px(dev, pt); 471 kfree(pt->used_ptes); 472 kfree(pt); 473 } 474 475 static void gen8_initialize_pt(struct i915_address_space *vm, 476 struct i915_page_table *pt) 477 { 478 gen8_pte_t scratch_pte; 479 480 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 481 I915_CACHE_LLC, true); 482 483 fill_px(vm->dev, pt, scratch_pte); 484 } 485 486 static void gen6_initialize_pt(struct i915_address_space *vm, 487 struct i915_page_table *pt) 488 { 489 gen6_pte_t scratch_pte; 490 491 WARN_ON(px_dma(vm->scratch_page) == 0); 492 493 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 494 I915_CACHE_LLC, true, 0); 495 496 fill32_px(vm->dev, pt, scratch_pte); 497 } 498 499 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 500 { 501 struct i915_page_directory *pd; 502 int ret = -ENOMEM; 503 504 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 505 if (!pd) 506 return ERR_PTR(-ENOMEM); 507 508 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 509 sizeof(*pd->used_pdes), GFP_KERNEL); 510 if (!pd->used_pdes) 511 goto fail_bitmap; 512 513 ret = setup_px(dev, pd); 514 if (ret) 515 goto fail_page_m; 516 517 return pd; 518 519 fail_page_m: 520 kfree(pd->used_pdes); 521 fail_bitmap: 522 kfree(pd); 523 524 return ERR_PTR(ret); 525 } 526 527 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 528 { 529 if (px_page(pd)) { 530 cleanup_px(dev, pd); 531 kfree(pd->used_pdes); 532 kfree(pd); 533 } 534 } 535 536 static void gen8_initialize_pd(struct i915_address_space *vm, 537 struct i915_page_directory *pd) 538 { 539 gen8_pde_t scratch_pde; 540 541 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 542 543 fill_px(vm->dev, pd, scratch_pde); 544 } 545 546 static int __pdp_init(struct drm_device *dev, 547 struct i915_page_directory_pointer *pdp) 548 { 549 size_t pdpes = I915_PDPES_PER_PDP(dev); 550 551 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 552 sizeof(unsigned long), 553 GFP_KERNEL); 554 if (!pdp->used_pdpes) 555 return -ENOMEM; 556 557 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 558 GFP_KERNEL); 559 if (!pdp->page_directory) { 560 kfree(pdp->used_pdpes); 561 /* the PDP might be the statically allocated top level. Keep it 562 * as clean as possible */ 563 pdp->used_pdpes = NULL; 564 return -ENOMEM; 565 } 566 567 return 0; 568 } 569 570 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 571 { 572 kfree(pdp->used_pdpes); 573 kfree(pdp->page_directory); 574 pdp->page_directory = NULL; 575 } 576 577 static struct 578 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 579 { 580 struct i915_page_directory_pointer *pdp; 581 int ret = -ENOMEM; 582 583 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 584 585 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 586 if (!pdp) 587 return ERR_PTR(-ENOMEM); 588 589 ret = __pdp_init(dev, pdp); 590 if (ret) 591 goto fail_bitmap; 592 593 ret = setup_px(dev, pdp); 594 if (ret) 595 goto fail_page_m; 596 597 return pdp; 598 599 fail_page_m: 600 __pdp_fini(pdp); 601 fail_bitmap: 602 kfree(pdp); 603 604 return ERR_PTR(ret); 605 } 606 607 static void free_pdp(struct drm_device *dev, 608 struct i915_page_directory_pointer *pdp) 609 { 610 __pdp_fini(pdp); 611 if (USES_FULL_48BIT_PPGTT(dev)) { 612 cleanup_px(dev, pdp); 613 kfree(pdp); 614 } 615 } 616 617 static void gen8_initialize_pdp(struct i915_address_space *vm, 618 struct i915_page_directory_pointer *pdp) 619 { 620 gen8_ppgtt_pdpe_t scratch_pdpe; 621 622 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 623 624 fill_px(vm->dev, pdp, scratch_pdpe); 625 } 626 627 static void gen8_initialize_pml4(struct i915_address_space *vm, 628 struct i915_pml4 *pml4) 629 { 630 gen8_ppgtt_pml4e_t scratch_pml4e; 631 632 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 633 I915_CACHE_LLC); 634 635 fill_px(vm->dev, pml4, scratch_pml4e); 636 } 637 638 static void 639 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 640 struct i915_page_directory_pointer *pdp, 641 struct i915_page_directory *pd, 642 int index) 643 { 644 gen8_ppgtt_pdpe_t *page_directorypo; 645 646 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 647 return; 648 649 page_directorypo = kmap_px(pdp); 650 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 651 kunmap_px(ppgtt, page_directorypo); 652 } 653 654 static void 655 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 656 struct i915_pml4 *pml4, 657 struct i915_page_directory_pointer *pdp, 658 int index) 659 { 660 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 661 662 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 663 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 664 kunmap_px(ppgtt, pagemap); 665 } 666 667 /* Broadwell Page Directory Pointer Descriptors */ 668 static int gen8_write_pdp(struct drm_i915_gem_request *req, 669 unsigned entry, 670 dma_addr_t addr) 671 { 672 struct intel_ring *ring = req->ring; 673 struct intel_engine_cs *engine = req->engine; 674 int ret; 675 676 BUG_ON(entry >= 4); 677 678 ret = intel_ring_begin(req, 6); 679 if (ret) 680 return ret; 681 682 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 683 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 684 intel_ring_emit(ring, upper_32_bits(addr)); 685 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 686 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 687 intel_ring_emit(ring, lower_32_bits(addr)); 688 intel_ring_advance(ring); 689 690 return 0; 691 } 692 693 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 694 struct drm_i915_gem_request *req) 695 { 696 int i, ret; 697 698 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 699 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 700 701 ret = gen8_write_pdp(req, i, pd_daddr); 702 if (ret) 703 return ret; 704 } 705 706 return 0; 707 } 708 709 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 710 struct drm_i915_gem_request *req) 711 { 712 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 713 } 714 715 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 716 struct i915_page_directory_pointer *pdp, 717 uint64_t start, 718 uint64_t length, 719 gen8_pte_t scratch_pte) 720 { 721 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 722 gen8_pte_t *pt_vaddr; 723 unsigned pdpe = gen8_pdpe_index(start); 724 unsigned pde = gen8_pde_index(start); 725 unsigned pte = gen8_pte_index(start); 726 unsigned num_entries = length >> PAGE_SHIFT; 727 unsigned last_pte, i; 728 729 if (WARN_ON(!pdp)) 730 return; 731 732 while (num_entries) { 733 struct i915_page_directory *pd; 734 struct i915_page_table *pt; 735 736 if (WARN_ON(!pdp->page_directory[pdpe])) 737 break; 738 739 pd = pdp->page_directory[pdpe]; 740 741 if (WARN_ON(!pd->page_table[pde])) 742 break; 743 744 pt = pd->page_table[pde]; 745 746 if (WARN_ON(!px_page(pt))) 747 break; 748 749 last_pte = pte + num_entries; 750 if (last_pte > GEN8_PTES) 751 last_pte = GEN8_PTES; 752 753 pt_vaddr = kmap_px(pt); 754 755 for (i = pte; i < last_pte; i++) { 756 pt_vaddr[i] = scratch_pte; 757 num_entries--; 758 } 759 760 kunmap_px(ppgtt, pt_vaddr); 761 762 pte = 0; 763 if (++pde == I915_PDES) { 764 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 765 break; 766 pde = 0; 767 } 768 } 769 } 770 771 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 772 uint64_t start, 773 uint64_t length, 774 bool use_scratch) 775 { 776 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 777 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 778 I915_CACHE_LLC, use_scratch); 779 780 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 781 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 782 scratch_pte); 783 } else { 784 uint64_t pml4e; 785 struct i915_page_directory_pointer *pdp; 786 787 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 788 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 789 scratch_pte); 790 } 791 } 792 } 793 794 static void 795 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 796 struct i915_page_directory_pointer *pdp, 797 struct sg_page_iter *sg_iter, 798 uint64_t start, 799 enum i915_cache_level cache_level) 800 { 801 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 802 gen8_pte_t *pt_vaddr; 803 unsigned pdpe = gen8_pdpe_index(start); 804 unsigned pde = gen8_pde_index(start); 805 unsigned pte = gen8_pte_index(start); 806 807 pt_vaddr = NULL; 808 809 while (__sg_page_iter_next(sg_iter)) { 810 if (pt_vaddr == NULL) { 811 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 812 struct i915_page_table *pt = pd->page_table[pde]; 813 pt_vaddr = kmap_px(pt); 814 } 815 816 pt_vaddr[pte] = 817 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 818 cache_level, true); 819 if (++pte == GEN8_PTES) { 820 kunmap_px(ppgtt, pt_vaddr); 821 pt_vaddr = NULL; 822 if (++pde == I915_PDES) { 823 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 824 break; 825 pde = 0; 826 } 827 pte = 0; 828 } 829 } 830 831 if (pt_vaddr) 832 kunmap_px(ppgtt, pt_vaddr); 833 } 834 835 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 836 struct sg_table *pages, 837 uint64_t start, 838 enum i915_cache_level cache_level, 839 u32 unused) 840 { 841 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 842 struct sg_page_iter sg_iter; 843 844 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 845 846 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 847 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 848 cache_level); 849 } else { 850 struct i915_page_directory_pointer *pdp; 851 uint64_t pml4e; 852 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 853 854 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 855 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 856 start, cache_level); 857 } 858 } 859 } 860 861 static void gen8_free_page_tables(struct drm_device *dev, 862 struct i915_page_directory *pd) 863 { 864 int i; 865 866 if (!px_page(pd)) 867 return; 868 869 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 870 if (WARN_ON(!pd->page_table[i])) 871 continue; 872 873 free_pt(dev, pd->page_table[i]); 874 pd->page_table[i] = NULL; 875 } 876 } 877 878 static int gen8_init_scratch(struct i915_address_space *vm) 879 { 880 struct drm_device *dev = vm->dev; 881 int ret; 882 883 vm->scratch_page = alloc_scratch_page(dev); 884 if (IS_ERR(vm->scratch_page)) 885 return PTR_ERR(vm->scratch_page); 886 887 vm->scratch_pt = alloc_pt(dev); 888 if (IS_ERR(vm->scratch_pt)) { 889 ret = PTR_ERR(vm->scratch_pt); 890 goto free_scratch_page; 891 } 892 893 vm->scratch_pd = alloc_pd(dev); 894 if (IS_ERR(vm->scratch_pd)) { 895 ret = PTR_ERR(vm->scratch_pd); 896 goto free_pt; 897 } 898 899 if (USES_FULL_48BIT_PPGTT(dev)) { 900 vm->scratch_pdp = alloc_pdp(dev); 901 if (IS_ERR(vm->scratch_pdp)) { 902 ret = PTR_ERR(vm->scratch_pdp); 903 goto free_pd; 904 } 905 } 906 907 gen8_initialize_pt(vm, vm->scratch_pt); 908 gen8_initialize_pd(vm, vm->scratch_pd); 909 if (USES_FULL_48BIT_PPGTT(dev)) 910 gen8_initialize_pdp(vm, vm->scratch_pdp); 911 912 return 0; 913 914 free_pd: 915 free_pd(dev, vm->scratch_pd); 916 free_pt: 917 free_pt(dev, vm->scratch_pt); 918 free_scratch_page: 919 free_scratch_page(dev, vm->scratch_page); 920 921 return ret; 922 } 923 924 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 925 { 926 enum vgt_g2v_type msg; 927 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 928 int i; 929 930 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 931 u64 daddr = px_dma(&ppgtt->pml4); 932 933 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 934 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 935 936 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 937 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 938 } else { 939 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 940 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 941 942 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 943 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 944 } 945 946 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 947 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 948 } 949 950 I915_WRITE(vgtif_reg(g2v_notify), msg); 951 952 return 0; 953 } 954 955 static void gen8_free_scratch(struct i915_address_space *vm) 956 { 957 struct drm_device *dev = vm->dev; 958 959 if (USES_FULL_48BIT_PPGTT(dev)) 960 free_pdp(dev, vm->scratch_pdp); 961 free_pd(dev, vm->scratch_pd); 962 free_pt(dev, vm->scratch_pt); 963 free_scratch_page(dev, vm->scratch_page); 964 } 965 966 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 967 struct i915_page_directory_pointer *pdp) 968 { 969 int i; 970 971 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 972 if (WARN_ON(!pdp->page_directory[i])) 973 continue; 974 975 gen8_free_page_tables(dev, pdp->page_directory[i]); 976 free_pd(dev, pdp->page_directory[i]); 977 } 978 979 free_pdp(dev, pdp); 980 } 981 982 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 983 { 984 int i; 985 986 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 987 if (WARN_ON(!ppgtt->pml4.pdps[i])) 988 continue; 989 990 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 991 } 992 993 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 994 } 995 996 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 997 { 998 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 999 1000 if (intel_vgpu_active(to_i915(vm->dev))) 1001 gen8_ppgtt_notify_vgt(ppgtt, false); 1002 1003 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 1004 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 1005 else 1006 gen8_ppgtt_cleanup_4lvl(ppgtt); 1007 1008 gen8_free_scratch(vm); 1009 } 1010 1011 /** 1012 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1013 * @vm: Master vm structure. 1014 * @pd: Page directory for this address range. 1015 * @start: Starting virtual address to begin allocations. 1016 * @length: Size of the allocations. 1017 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1018 * caller to free on error. 1019 * 1020 * Allocate the required number of page tables. Extremely similar to 1021 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1022 * the page directory boundary (instead of the page directory pointer). That 1023 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1024 * possible, and likely that the caller will need to use multiple calls of this 1025 * function to achieve the appropriate allocation. 1026 * 1027 * Return: 0 if success; negative error code otherwise. 1028 */ 1029 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1030 struct i915_page_directory *pd, 1031 uint64_t start, 1032 uint64_t length, 1033 unsigned long *new_pts) 1034 { 1035 struct drm_device *dev = vm->dev; 1036 struct i915_page_table *pt; 1037 uint32_t pde; 1038 1039 gen8_for_each_pde(pt, pd, start, length, pde) { 1040 /* Don't reallocate page tables */ 1041 if (test_bit(pde, pd->used_pdes)) { 1042 /* Scratch is never allocated this way */ 1043 WARN_ON(pt == vm->scratch_pt); 1044 continue; 1045 } 1046 1047 pt = alloc_pt(dev); 1048 if (IS_ERR(pt)) 1049 goto unwind_out; 1050 1051 gen8_initialize_pt(vm, pt); 1052 pd->page_table[pde] = pt; 1053 __set_bit(pde, new_pts); 1054 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1055 } 1056 1057 return 0; 1058 1059 unwind_out: 1060 for_each_set_bit(pde, new_pts, I915_PDES) 1061 free_pt(dev, pd->page_table[pde]); 1062 1063 return -ENOMEM; 1064 } 1065 1066 /** 1067 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1068 * @vm: Master vm structure. 1069 * @pdp: Page directory pointer for this address range. 1070 * @start: Starting virtual address to begin allocations. 1071 * @length: Size of the allocations. 1072 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1073 * caller to free on error. 1074 * 1075 * Allocate the required number of page directories starting at the pde index of 1076 * @start, and ending at the pde index @start + @length. This function will skip 1077 * over already allocated page directories within the range, and only allocate 1078 * new ones, setting the appropriate pointer within the pdp as well as the 1079 * correct position in the bitmap @new_pds. 1080 * 1081 * The function will only allocate the pages within the range for a give page 1082 * directory pointer. In other words, if @start + @length straddles a virtually 1083 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1084 * required by the caller, This is not currently possible, and the BUG in the 1085 * code will prevent it. 1086 * 1087 * Return: 0 if success; negative error code otherwise. 1088 */ 1089 static int 1090 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1091 struct i915_page_directory_pointer *pdp, 1092 uint64_t start, 1093 uint64_t length, 1094 unsigned long *new_pds) 1095 { 1096 struct drm_device *dev = vm->dev; 1097 struct i915_page_directory *pd; 1098 uint32_t pdpe; 1099 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1100 1101 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1102 1103 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1104 if (test_bit(pdpe, pdp->used_pdpes)) 1105 continue; 1106 1107 pd = alloc_pd(dev); 1108 if (IS_ERR(pd)) 1109 goto unwind_out; 1110 1111 gen8_initialize_pd(vm, pd); 1112 pdp->page_directory[pdpe] = pd; 1113 __set_bit(pdpe, new_pds); 1114 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1115 } 1116 1117 return 0; 1118 1119 unwind_out: 1120 for_each_set_bit(pdpe, new_pds, pdpes) 1121 free_pd(dev, pdp->page_directory[pdpe]); 1122 1123 return -ENOMEM; 1124 } 1125 1126 /** 1127 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1128 * @vm: Master vm structure. 1129 * @pml4: Page map level 4 for this address range. 1130 * @start: Starting virtual address to begin allocations. 1131 * @length: Size of the allocations. 1132 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1133 * caller to free on error. 1134 * 1135 * Allocate the required number of page directory pointers. Extremely similar to 1136 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1137 * The main difference is here we are limited by the pml4 boundary (instead of 1138 * the page directory pointer). 1139 * 1140 * Return: 0 if success; negative error code otherwise. 1141 */ 1142 static int 1143 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1144 struct i915_pml4 *pml4, 1145 uint64_t start, 1146 uint64_t length, 1147 unsigned long *new_pdps) 1148 { 1149 struct drm_device *dev = vm->dev; 1150 struct i915_page_directory_pointer *pdp; 1151 uint32_t pml4e; 1152 1153 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1154 1155 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1156 if (!test_bit(pml4e, pml4->used_pml4es)) { 1157 pdp = alloc_pdp(dev); 1158 if (IS_ERR(pdp)) 1159 goto unwind_out; 1160 1161 gen8_initialize_pdp(vm, pdp); 1162 pml4->pdps[pml4e] = pdp; 1163 __set_bit(pml4e, new_pdps); 1164 trace_i915_page_directory_pointer_entry_alloc(vm, 1165 pml4e, 1166 start, 1167 GEN8_PML4E_SHIFT); 1168 } 1169 } 1170 1171 return 0; 1172 1173 unwind_out: 1174 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1175 free_pdp(dev, pml4->pdps[pml4e]); 1176 1177 return -ENOMEM; 1178 } 1179 1180 static void 1181 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1182 { 1183 kfree(new_pts); 1184 kfree(new_pds); 1185 } 1186 1187 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1188 * of these are based on the number of PDPEs in the system. 1189 */ 1190 static 1191 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1192 unsigned long **new_pts, 1193 uint32_t pdpes) 1194 { 1195 unsigned long *pds; 1196 unsigned long *pts; 1197 1198 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1199 if (!pds) 1200 return -ENOMEM; 1201 1202 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1203 GFP_TEMPORARY); 1204 if (!pts) 1205 goto err_out; 1206 1207 *new_pds = pds; 1208 *new_pts = pts; 1209 1210 return 0; 1211 1212 err_out: 1213 free_gen8_temp_bitmaps(pds, pts); 1214 return -ENOMEM; 1215 } 1216 1217 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1218 * the page table structures, we mark them dirty so that 1219 * context switching/execlist queuing code takes extra steps 1220 * to ensure that tlbs are flushed. 1221 */ 1222 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1223 { 1224 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1225 } 1226 1227 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1228 struct i915_page_directory_pointer *pdp, 1229 uint64_t start, 1230 uint64_t length) 1231 { 1232 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1233 unsigned long *new_page_dirs, *new_page_tables; 1234 struct drm_device *dev = vm->dev; 1235 struct i915_page_directory *pd; 1236 const uint64_t orig_start = start; 1237 const uint64_t orig_length = length; 1238 uint32_t pdpe; 1239 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1240 int ret; 1241 1242 /* Wrap is never okay since we can only represent 48b, and we don't 1243 * actually use the other side of the canonical address space. 1244 */ 1245 if (WARN_ON(start + length < start)) 1246 return -ENODEV; 1247 1248 if (WARN_ON(start + length > vm->total)) 1249 return -ENODEV; 1250 1251 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1252 if (ret) 1253 return ret; 1254 1255 /* Do the allocations first so we can easily bail out */ 1256 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1257 new_page_dirs); 1258 if (ret) { 1259 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1260 return ret; 1261 } 1262 1263 /* For every page directory referenced, allocate page tables */ 1264 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1265 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1266 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1267 if (ret) 1268 goto err_out; 1269 } 1270 1271 start = orig_start; 1272 length = orig_length; 1273 1274 /* Allocations have completed successfully, so set the bitmaps, and do 1275 * the mappings. */ 1276 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1277 gen8_pde_t *const page_directory = kmap_px(pd); 1278 struct i915_page_table *pt; 1279 uint64_t pd_len = length; 1280 uint64_t pd_start = start; 1281 uint32_t pde; 1282 1283 /* Every pd should be allocated, we just did that above. */ 1284 WARN_ON(!pd); 1285 1286 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1287 /* Same reasoning as pd */ 1288 WARN_ON(!pt); 1289 WARN_ON(!pd_len); 1290 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1291 1292 /* Set our used ptes within the page table */ 1293 bitmap_set(pt->used_ptes, 1294 gen8_pte_index(pd_start), 1295 gen8_pte_count(pd_start, pd_len)); 1296 1297 /* Our pde is now pointing to the pagetable, pt */ 1298 __set_bit(pde, pd->used_pdes); 1299 1300 /* Map the PDE to the page table */ 1301 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1302 I915_CACHE_LLC); 1303 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1304 gen8_pte_index(start), 1305 gen8_pte_count(start, length), 1306 GEN8_PTES); 1307 1308 /* NB: We haven't yet mapped ptes to pages. At this 1309 * point we're still relying on insert_entries() */ 1310 } 1311 1312 kunmap_px(ppgtt, page_directory); 1313 __set_bit(pdpe, pdp->used_pdpes); 1314 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1315 } 1316 1317 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1318 mark_tlbs_dirty(ppgtt); 1319 return 0; 1320 1321 err_out: 1322 while (pdpe--) { 1323 unsigned long temp; 1324 1325 for_each_set_bit(temp, new_page_tables + pdpe * 1326 BITS_TO_LONGS(I915_PDES), I915_PDES) 1327 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1328 } 1329 1330 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1331 free_pd(dev, pdp->page_directory[pdpe]); 1332 1333 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1334 mark_tlbs_dirty(ppgtt); 1335 return ret; 1336 } 1337 1338 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1339 struct i915_pml4 *pml4, 1340 uint64_t start, 1341 uint64_t length) 1342 { 1343 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1344 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1345 struct i915_page_directory_pointer *pdp; 1346 uint64_t pml4e; 1347 int ret = 0; 1348 1349 /* Do the pml4 allocations first, so we don't need to track the newly 1350 * allocated tables below the pdp */ 1351 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1352 1353 /* The pagedirectory and pagetable allocations are done in the shared 3 1354 * and 4 level code. Just allocate the pdps. 1355 */ 1356 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1357 new_pdps); 1358 if (ret) 1359 return ret; 1360 1361 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1362 "The allocation has spanned more than 512GB. " 1363 "It is highly likely this is incorrect."); 1364 1365 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1366 WARN_ON(!pdp); 1367 1368 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1369 if (ret) 1370 goto err_out; 1371 1372 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1373 } 1374 1375 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1376 GEN8_PML4ES_PER_PML4); 1377 1378 return 0; 1379 1380 err_out: 1381 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1382 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1383 1384 return ret; 1385 } 1386 1387 static int gen8_alloc_va_range(struct i915_address_space *vm, 1388 uint64_t start, uint64_t length) 1389 { 1390 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1391 1392 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1393 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1394 else 1395 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1396 } 1397 1398 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1399 uint64_t start, uint64_t length, 1400 gen8_pte_t scratch_pte, 1401 struct seq_file *m) 1402 { 1403 struct i915_page_directory *pd; 1404 uint32_t pdpe; 1405 1406 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1407 struct i915_page_table *pt; 1408 uint64_t pd_len = length; 1409 uint64_t pd_start = start; 1410 uint32_t pde; 1411 1412 if (!test_bit(pdpe, pdp->used_pdpes)) 1413 continue; 1414 1415 seq_printf(m, "\tPDPE #%d\n", pdpe); 1416 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1417 uint32_t pte; 1418 gen8_pte_t *pt_vaddr; 1419 1420 if (!test_bit(pde, pd->used_pdes)) 1421 continue; 1422 1423 pt_vaddr = kmap_px(pt); 1424 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1425 uint64_t va = 1426 (pdpe << GEN8_PDPE_SHIFT) | 1427 (pde << GEN8_PDE_SHIFT) | 1428 (pte << GEN8_PTE_SHIFT); 1429 int i; 1430 bool found = false; 1431 1432 for (i = 0; i < 4; i++) 1433 if (pt_vaddr[pte + i] != scratch_pte) 1434 found = true; 1435 if (!found) 1436 continue; 1437 1438 seq_printf(m, "\t\t0x%lx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1439 for (i = 0; i < 4; i++) { 1440 if (pt_vaddr[pte + i] != scratch_pte) 1441 seq_printf(m, " %lx", pt_vaddr[pte + i]); 1442 else 1443 seq_puts(m, " SCRATCH "); 1444 } 1445 seq_puts(m, "\n"); 1446 } 1447 /* don't use kunmap_px, it could trigger 1448 * an unnecessary flush. 1449 */ 1450 kunmap_atomic(pt_vaddr); 1451 } 1452 } 1453 } 1454 1455 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1456 { 1457 struct i915_address_space *vm = &ppgtt->base; 1458 uint64_t start = ppgtt->base.start; 1459 uint64_t length = ppgtt->base.total; 1460 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 1461 I915_CACHE_LLC, true); 1462 1463 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1464 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1465 } else { 1466 uint64_t pml4e; 1467 struct i915_pml4 *pml4 = &ppgtt->pml4; 1468 struct i915_page_directory_pointer *pdp; 1469 1470 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1471 if (!test_bit(pml4e, pml4->used_pml4es)) 1472 continue; 1473 1474 seq_printf(m, " PML4E #%lu\n", pml4e); 1475 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1476 } 1477 } 1478 } 1479 1480 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1481 { 1482 unsigned long *new_page_dirs, *new_page_tables; 1483 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1484 int ret; 1485 1486 /* We allocate temp bitmap for page tables for no gain 1487 * but as this is for init only, lets keep the things simple 1488 */ 1489 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1490 if (ret) 1491 return ret; 1492 1493 /* Allocate for all pdps regardless of how the ppgtt 1494 * was defined. 1495 */ 1496 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1497 0, 1ULL << 32, 1498 new_page_dirs); 1499 if (!ret) 1500 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1501 1502 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1503 1504 return ret; 1505 } 1506 1507 /* 1508 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1509 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1510 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1511 * space. 1512 * 1513 */ 1514 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1515 { 1516 int ret; 1517 1518 ret = gen8_init_scratch(&ppgtt->base); 1519 if (ret) 1520 return ret; 1521 1522 ppgtt->base.start = 0; 1523 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1524 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1525 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1526 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1527 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1528 ppgtt->base.bind_vma = ppgtt_bind_vma; 1529 ppgtt->debug_dump = gen8_dump_ppgtt; 1530 1531 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1532 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1533 if (ret) 1534 goto free_scratch; 1535 1536 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1537 1538 ppgtt->base.total = 1ULL << 48; 1539 ppgtt->switch_mm = gen8_48b_mm_switch; 1540 } else { 1541 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1542 if (ret) 1543 goto free_scratch; 1544 1545 ppgtt->base.total = 1ULL << 32; 1546 ppgtt->switch_mm = gen8_legacy_mm_switch; 1547 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1548 0, 0, 1549 GEN8_PML4E_SHIFT); 1550 1551 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) { 1552 ret = gen8_preallocate_top_level_pdps(ppgtt); 1553 if (ret) 1554 goto free_scratch; 1555 } 1556 } 1557 1558 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) 1559 gen8_ppgtt_notify_vgt(ppgtt, true); 1560 1561 return 0; 1562 1563 free_scratch: 1564 gen8_free_scratch(&ppgtt->base); 1565 return ret; 1566 } 1567 1568 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1569 { 1570 struct i915_address_space *vm = &ppgtt->base; 1571 struct i915_page_table *unused; 1572 gen6_pte_t scratch_pte; 1573 uint32_t pd_entry; 1574 uint32_t pte, pde; 1575 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1576 1577 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1578 I915_CACHE_LLC, true, 0); 1579 1580 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1581 u32 expected; 1582 gen6_pte_t *pt_vaddr; 1583 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1584 pd_entry = readl(ppgtt->pd_addr + pde); 1585 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1586 1587 if (pd_entry != expected) 1588 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1589 pde, 1590 pd_entry, 1591 expected); 1592 seq_printf(m, "\tPDE: %x\n", pd_entry); 1593 1594 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1595 1596 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1597 unsigned long va = 1598 (pde * PAGE_SIZE * GEN6_PTES) + 1599 (pte * PAGE_SIZE); 1600 int i; 1601 bool found = false; 1602 for (i = 0; i < 4; i++) 1603 if (pt_vaddr[pte + i] != scratch_pte) 1604 found = true; 1605 if (!found) 1606 continue; 1607 1608 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1609 for (i = 0; i < 4; i++) { 1610 if (pt_vaddr[pte + i] != scratch_pte) 1611 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1612 else 1613 seq_puts(m, " SCRATCH "); 1614 } 1615 seq_puts(m, "\n"); 1616 } 1617 kunmap_px(ppgtt, pt_vaddr); 1618 } 1619 } 1620 1621 /* Write pde (index) from the page directory @pd to the page table @pt */ 1622 static void gen6_write_pde(struct i915_page_directory *pd, 1623 const int pde, struct i915_page_table *pt) 1624 { 1625 /* Caller needs to make sure the write completes if necessary */ 1626 struct i915_hw_ppgtt *ppgtt = 1627 container_of(pd, struct i915_hw_ppgtt, pd); 1628 u32 pd_entry; 1629 1630 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1631 pd_entry |= GEN6_PDE_VALID; 1632 1633 writel(pd_entry, ppgtt->pd_addr + pde); 1634 } 1635 1636 /* Write all the page tables found in the ppgtt structure to incrementing page 1637 * directories. */ 1638 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1639 struct i915_page_directory *pd, 1640 uint32_t start, uint32_t length) 1641 { 1642 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1643 struct i915_page_table *pt; 1644 uint32_t pde; 1645 1646 gen6_for_each_pde(pt, pd, start, length, pde) 1647 gen6_write_pde(pd, pde, pt); 1648 1649 /* Make sure write is complete before other code can use this page 1650 * table. Also require for WC mapped PTEs */ 1651 readl(ggtt->gsm); 1652 } 1653 1654 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1655 { 1656 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1657 1658 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1659 } 1660 1661 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1662 struct drm_i915_gem_request *req) 1663 { 1664 struct intel_ring *ring = req->ring; 1665 struct intel_engine_cs *engine = req->engine; 1666 int ret; 1667 1668 /* NB: TLBs must be flushed and invalidated before a switch */ 1669 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1670 if (ret) 1671 return ret; 1672 1673 ret = intel_ring_begin(req, 6); 1674 if (ret) 1675 return ret; 1676 1677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1678 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1679 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1680 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1681 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1682 intel_ring_emit(ring, MI_NOOP); 1683 intel_ring_advance(ring); 1684 1685 return 0; 1686 } 1687 1688 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1689 struct drm_i915_gem_request *req) 1690 { 1691 struct intel_ring *ring = req->ring; 1692 struct intel_engine_cs *engine = req->engine; 1693 int ret; 1694 1695 /* NB: TLBs must be flushed and invalidated before a switch */ 1696 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1697 if (ret) 1698 return ret; 1699 1700 ret = intel_ring_begin(req, 6); 1701 if (ret) 1702 return ret; 1703 1704 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1705 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1706 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1707 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1708 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1709 intel_ring_emit(ring, MI_NOOP); 1710 intel_ring_advance(ring); 1711 1712 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1713 if (engine->id != RCS) { 1714 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1715 if (ret) 1716 return ret; 1717 } 1718 1719 return 0; 1720 } 1721 1722 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1723 struct drm_i915_gem_request *req) 1724 { 1725 struct intel_engine_cs *engine = req->engine; 1726 struct drm_i915_private *dev_priv = req->i915; 1727 1728 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1729 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1730 return 0; 1731 } 1732 1733 static void gen8_ppgtt_enable(struct drm_device *dev) 1734 { 1735 struct drm_i915_private *dev_priv = to_i915(dev); 1736 struct intel_engine_cs *engine; 1737 1738 for_each_engine(engine, dev_priv) { 1739 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1740 I915_WRITE(RING_MODE_GEN7(engine), 1741 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1742 } 1743 } 1744 1745 static void gen7_ppgtt_enable(struct drm_device *dev) 1746 { 1747 struct drm_i915_private *dev_priv = to_i915(dev); 1748 struct intel_engine_cs *engine; 1749 uint32_t ecochk, ecobits; 1750 1751 ecobits = I915_READ(GAC_ECO_BITS); 1752 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1753 1754 ecochk = I915_READ(GAM_ECOCHK); 1755 if (IS_HASWELL(dev)) { 1756 ecochk |= ECOCHK_PPGTT_WB_HSW; 1757 } else { 1758 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1759 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1760 } 1761 I915_WRITE(GAM_ECOCHK, ecochk); 1762 1763 for_each_engine(engine, dev_priv) { 1764 /* GFX_MODE is per-ring on gen7+ */ 1765 I915_WRITE(RING_MODE_GEN7(engine), 1766 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1767 } 1768 } 1769 1770 static void gen6_ppgtt_enable(struct drm_device *dev) 1771 { 1772 struct drm_i915_private *dev_priv = to_i915(dev); 1773 uint32_t ecochk, gab_ctl, ecobits; 1774 1775 ecobits = I915_READ(GAC_ECO_BITS); 1776 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1777 ECOBITS_PPGTT_CACHE64B); 1778 1779 gab_ctl = I915_READ(GAB_CTL); 1780 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1781 1782 ecochk = I915_READ(GAM_ECOCHK); 1783 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1784 1785 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1786 } 1787 1788 /* PPGTT support for Sandybdrige/Gen6 and later */ 1789 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1790 uint64_t start, 1791 uint64_t length, 1792 bool use_scratch) 1793 { 1794 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1795 gen6_pte_t *pt_vaddr, scratch_pte; 1796 unsigned first_entry = start >> PAGE_SHIFT; 1797 unsigned num_entries = length >> PAGE_SHIFT; 1798 unsigned act_pt = first_entry / GEN6_PTES; 1799 unsigned first_pte = first_entry % GEN6_PTES; 1800 unsigned last_pte, i; 1801 1802 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1803 I915_CACHE_LLC, true, 0); 1804 1805 while (num_entries) { 1806 last_pte = first_pte + num_entries; 1807 if (last_pte > GEN6_PTES) 1808 last_pte = GEN6_PTES; 1809 1810 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1811 1812 for (i = first_pte; i < last_pte; i++) 1813 pt_vaddr[i] = scratch_pte; 1814 1815 kunmap_px(ppgtt, pt_vaddr); 1816 1817 num_entries -= last_pte - first_pte; 1818 first_pte = 0; 1819 act_pt++; 1820 } 1821 } 1822 1823 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1824 struct sg_table *pages, 1825 uint64_t start, 1826 enum i915_cache_level cache_level, u32 flags) 1827 { 1828 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1829 unsigned first_entry = start >> PAGE_SHIFT; 1830 unsigned act_pt = first_entry / GEN6_PTES; 1831 unsigned act_pte = first_entry % GEN6_PTES; 1832 gen6_pte_t *pt_vaddr = NULL; 1833 struct sgt_iter sgt_iter; 1834 dma_addr_t addr; 1835 1836 for_each_sgt_dma(addr, sgt_iter, pages) { 1837 if (pt_vaddr == NULL) 1838 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1839 1840 pt_vaddr[act_pte] = 1841 vm->pte_encode(addr, cache_level, true, flags); 1842 1843 if (++act_pte == GEN6_PTES) { 1844 kunmap_px(ppgtt, pt_vaddr); 1845 pt_vaddr = NULL; 1846 act_pt++; 1847 act_pte = 0; 1848 } 1849 } 1850 1851 if (pt_vaddr) 1852 kunmap_px(ppgtt, pt_vaddr); 1853 } 1854 1855 static int gen6_alloc_va_range(struct i915_address_space *vm, 1856 uint64_t start_in, uint64_t length_in) 1857 { 1858 DECLARE_BITMAP(new_page_tables, I915_PDES); 1859 struct drm_device *dev = vm->dev; 1860 struct drm_i915_private *dev_priv = to_i915(dev); 1861 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1862 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1863 struct i915_page_table *pt; 1864 uint32_t start, length, start_save, length_save; 1865 uint32_t pde; 1866 int ret; 1867 1868 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1869 return -ENODEV; 1870 1871 start = start_save = start_in; 1872 length = length_save = length_in; 1873 1874 bitmap_zero(new_page_tables, I915_PDES); 1875 1876 /* The allocation is done in two stages so that we can bail out with 1877 * minimal amount of pain. The first stage finds new page tables that 1878 * need allocation. The second stage marks use ptes within the page 1879 * tables. 1880 */ 1881 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1882 if (pt != vm->scratch_pt) { 1883 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1884 continue; 1885 } 1886 1887 /* We've already allocated a page table */ 1888 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1889 1890 pt = alloc_pt(dev); 1891 if (IS_ERR(pt)) { 1892 ret = PTR_ERR(pt); 1893 goto unwind_out; 1894 } 1895 1896 gen6_initialize_pt(vm, pt); 1897 1898 ppgtt->pd.page_table[pde] = pt; 1899 __set_bit(pde, new_page_tables); 1900 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1901 } 1902 1903 start = start_save; 1904 length = length_save; 1905 1906 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1907 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1908 1909 bitmap_zero(tmp_bitmap, GEN6_PTES); 1910 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1911 gen6_pte_count(start, length)); 1912 1913 if (__test_and_clear_bit(pde, new_page_tables)) 1914 gen6_write_pde(&ppgtt->pd, pde, pt); 1915 1916 trace_i915_page_table_entry_map(vm, pde, pt, 1917 gen6_pte_index(start), 1918 gen6_pte_count(start, length), 1919 GEN6_PTES); 1920 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1921 GEN6_PTES); 1922 } 1923 1924 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1925 1926 /* Make sure write is complete before other code can use this page 1927 * table. Also require for WC mapped PTEs */ 1928 readl(ggtt->gsm); 1929 1930 mark_tlbs_dirty(ppgtt); 1931 return 0; 1932 1933 unwind_out: 1934 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1935 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1936 1937 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1938 free_pt(vm->dev, pt); 1939 } 1940 1941 mark_tlbs_dirty(ppgtt); 1942 return ret; 1943 } 1944 1945 static int gen6_init_scratch(struct i915_address_space *vm) 1946 { 1947 struct drm_device *dev = vm->dev; 1948 1949 vm->scratch_page = alloc_scratch_page(dev); 1950 if (IS_ERR(vm->scratch_page)) 1951 return PTR_ERR(vm->scratch_page); 1952 1953 vm->scratch_pt = alloc_pt(dev); 1954 if (IS_ERR(vm->scratch_pt)) { 1955 free_scratch_page(dev, vm->scratch_page); 1956 return PTR_ERR(vm->scratch_pt); 1957 } 1958 1959 gen6_initialize_pt(vm, vm->scratch_pt); 1960 1961 return 0; 1962 } 1963 1964 static void gen6_free_scratch(struct i915_address_space *vm) 1965 { 1966 struct drm_device *dev = vm->dev; 1967 1968 free_pt(dev, vm->scratch_pt); 1969 free_scratch_page(dev, vm->scratch_page); 1970 } 1971 1972 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1973 { 1974 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1975 struct i915_page_directory *pd = &ppgtt->pd; 1976 struct drm_device *dev = vm->dev; 1977 struct i915_page_table *pt; 1978 uint32_t pde; 1979 1980 drm_mm_remove_node(&ppgtt->node); 1981 1982 gen6_for_all_pdes(pt, pd, pde) 1983 if (pt != vm->scratch_pt) 1984 free_pt(dev, pt); 1985 1986 gen6_free_scratch(vm); 1987 } 1988 1989 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1990 { 1991 struct i915_address_space *vm = &ppgtt->base; 1992 struct drm_device *dev = ppgtt->base.dev; 1993 struct drm_i915_private *dev_priv = to_i915(dev); 1994 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1995 bool retried = false; 1996 int ret; 1997 1998 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1999 * allocator works in address space sizes, so it's multiplied by page 2000 * size. We allocate at the top of the GTT to avoid fragmentation. 2001 */ 2002 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2003 2004 ret = gen6_init_scratch(vm); 2005 if (ret) 2006 return ret; 2007 2008 alloc: 2009 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2010 &ppgtt->node, GEN6_PD_SIZE, 2011 GEN6_PD_ALIGN, 0, 2012 0, ggtt->base.total, 2013 DRM_MM_TOPDOWN); 2014 if (ret == -ENOSPC && !retried) { 2015 ret = i915_gem_evict_something(&ggtt->base, 2016 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2017 I915_CACHE_NONE, 2018 0, ggtt->base.total, 2019 0); 2020 if (ret) 2021 goto err_out; 2022 2023 retried = true; 2024 goto alloc; 2025 } 2026 2027 if (ret) 2028 goto err_out; 2029 2030 2031 if (ppgtt->node.start < ggtt->mappable_end) 2032 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2033 2034 return 0; 2035 2036 err_out: 2037 gen6_free_scratch(vm); 2038 return ret; 2039 } 2040 2041 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2042 { 2043 return gen6_ppgtt_allocate_page_directories(ppgtt); 2044 } 2045 2046 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2047 uint64_t start, uint64_t length) 2048 { 2049 struct i915_page_table *unused; 2050 uint32_t pde; 2051 2052 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2053 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2054 } 2055 2056 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2057 { 2058 struct drm_device *dev = ppgtt->base.dev; 2059 struct drm_i915_private *dev_priv = to_i915(dev); 2060 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2061 int ret; 2062 2063 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2064 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev)) 2065 ppgtt->switch_mm = gen6_mm_switch; 2066 else if (IS_HASWELL(dev)) 2067 ppgtt->switch_mm = hsw_mm_switch; 2068 else if (IS_GEN7(dev)) 2069 ppgtt->switch_mm = gen7_mm_switch; 2070 else 2071 BUG(); 2072 2073 ret = gen6_ppgtt_alloc(ppgtt); 2074 if (ret) 2075 return ret; 2076 2077 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2078 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2079 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2080 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2081 ppgtt->base.bind_vma = ppgtt_bind_vma; 2082 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2083 ppgtt->base.start = 0; 2084 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2085 ppgtt->debug_dump = gen6_dump_ppgtt; 2086 2087 ppgtt->pd.base.ggtt_offset = 2088 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2089 2090 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2091 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2092 2093 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2094 2095 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2096 2097 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2098 ppgtt->node.size >> 20, 2099 ppgtt->node.start / PAGE_SIZE); 2100 2101 DRM_DEBUG("Adding PPGTT at offset %x\n", 2102 ppgtt->pd.base.ggtt_offset << 10); 2103 2104 return 0; 2105 } 2106 2107 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2108 struct drm_i915_private *dev_priv) 2109 { 2110 ppgtt->base.dev = &dev_priv->drm; 2111 2112 if (INTEL_INFO(dev_priv)->gen < 8) 2113 return gen6_ppgtt_init(ppgtt); 2114 else 2115 return gen8_ppgtt_init(ppgtt); 2116 } 2117 2118 static void i915_address_space_init(struct i915_address_space *vm, 2119 struct drm_i915_private *dev_priv) 2120 { 2121 drm_mm_init(&vm->mm, vm->start, vm->total); 2122 INIT_LIST_HEAD(&vm->active_list); 2123 INIT_LIST_HEAD(&vm->inactive_list); 2124 INIT_LIST_HEAD(&vm->unbound_list); 2125 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2126 } 2127 2128 static void gtt_write_workarounds(struct drm_device *dev) 2129 { 2130 struct drm_i915_private *dev_priv = to_i915(dev); 2131 2132 /* This function is for gtt related workarounds. This function is 2133 * called on driver load and after a GPU reset, so you can place 2134 * workarounds here even if they get overwritten by GPU reset. 2135 */ 2136 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2137 if (IS_BROADWELL(dev)) 2138 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2139 else if (IS_CHERRYVIEW(dev)) 2140 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2141 else if (IS_SKYLAKE(dev)) 2142 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2143 else if (IS_BROXTON(dev)) 2144 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2145 } 2146 2147 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2148 struct drm_i915_private *dev_priv, 2149 struct drm_i915_file_private *file_priv) 2150 { 2151 int ret; 2152 2153 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2154 if (ret == 0) { 2155 kref_init(&ppgtt->ref); 2156 i915_address_space_init(&ppgtt->base, dev_priv); 2157 ppgtt->base.file = file_priv; 2158 } 2159 2160 return ret; 2161 } 2162 2163 int i915_ppgtt_init_hw(struct drm_device *dev) 2164 { 2165 gtt_write_workarounds(dev); 2166 2167 /* In the case of execlists, PPGTT is enabled by the context descriptor 2168 * and the PDPs are contained within the context itself. We don't 2169 * need to do anything here. */ 2170 if (i915.enable_execlists) 2171 return 0; 2172 2173 if (!USES_PPGTT(dev)) 2174 return 0; 2175 2176 if (IS_GEN6(dev)) 2177 gen6_ppgtt_enable(dev); 2178 else if (IS_GEN7(dev)) 2179 gen7_ppgtt_enable(dev); 2180 else if (INTEL_INFO(dev)->gen >= 8) 2181 gen8_ppgtt_enable(dev); 2182 else 2183 MISSING_CASE(INTEL_INFO(dev)->gen); 2184 2185 return 0; 2186 } 2187 2188 struct i915_hw_ppgtt * 2189 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2190 struct drm_i915_file_private *fpriv) 2191 { 2192 struct i915_hw_ppgtt *ppgtt; 2193 int ret; 2194 2195 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2196 if (!ppgtt) 2197 return ERR_PTR(-ENOMEM); 2198 2199 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); 2200 if (ret) { 2201 kfree(ppgtt); 2202 return ERR_PTR(ret); 2203 } 2204 2205 trace_i915_ppgtt_create(&ppgtt->base); 2206 2207 return ppgtt; 2208 } 2209 2210 void i915_ppgtt_release(struct kref *kref) 2211 { 2212 struct i915_hw_ppgtt *ppgtt = 2213 container_of(kref, struct i915_hw_ppgtt, ref); 2214 2215 trace_i915_ppgtt_release(&ppgtt->base); 2216 2217 /* vmas should already be unbound and destroyed */ 2218 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2219 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2220 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2221 2222 list_del(&ppgtt->base.global_link); 2223 drm_mm_takedown(&ppgtt->base.mm); 2224 2225 ppgtt->base.cleanup(&ppgtt->base); 2226 kfree(ppgtt); 2227 } 2228 2229 /* Certain Gen5 chipsets require require idling the GPU before 2230 * unmapping anything from the GTT when VT-d is enabled. 2231 */ 2232 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2233 { 2234 #ifdef CONFIG_INTEL_IOMMU 2235 /* Query intel_iommu to see if we need the workaround. Presumably that 2236 * was loaded first. 2237 */ 2238 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2239 return true; 2240 #endif 2241 return false; 2242 } 2243 2244 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2245 { 2246 struct intel_engine_cs *engine; 2247 2248 if (INTEL_INFO(dev_priv)->gen < 6) 2249 return; 2250 2251 for_each_engine(engine, dev_priv) { 2252 u32 fault_reg; 2253 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2254 if (fault_reg & RING_FAULT_VALID) { 2255 DRM_DEBUG_DRIVER("Unexpected fault\n" 2256 "\tAddr: 0x%08ux\n" 2257 "\tAddress space: %s\n" 2258 "\tSource ID: %d\n" 2259 "\tType: %d\n", 2260 fault_reg & LINUX_PAGE_MASK, 2261 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2262 RING_FAULT_SRCID(fault_reg), 2263 RING_FAULT_FAULT_TYPE(fault_reg)); 2264 I915_WRITE(RING_FAULT_REG(engine), 2265 fault_reg & ~RING_FAULT_VALID); 2266 } 2267 } 2268 POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS])); 2269 } 2270 2271 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2272 { 2273 if (INTEL_INFO(dev_priv)->gen < 6) { 2274 intel_gtt_chipset_flush(); 2275 } else { 2276 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2277 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2278 } 2279 } 2280 2281 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2282 { 2283 struct drm_i915_private *dev_priv = to_i915(dev); 2284 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2285 2286 /* Don't bother messing with faults pre GEN6 as we have little 2287 * documentation supporting that it's a good idea. 2288 */ 2289 if (INTEL_INFO(dev)->gen < 6) 2290 return; 2291 2292 i915_check_and_clear_faults(dev_priv); 2293 2294 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 2295 true); 2296 2297 i915_ggtt_flush(dev_priv); 2298 } 2299 2300 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2301 { 2302 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2303 obj->pages->sgl, obj->pages->nents, 2304 PCI_DMA_BIDIRECTIONAL)) 2305 return -ENOSPC; 2306 2307 return 0; 2308 } 2309 2310 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2311 { 2312 #ifdef writeq 2313 writeq(pte, addr); 2314 #else 2315 iowrite32((u32)pte, addr); 2316 iowrite32(pte >> 32, addr + 4); 2317 #endif 2318 } 2319 2320 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2321 dma_addr_t addr, 2322 uint64_t offset, 2323 enum i915_cache_level level, 2324 u32 unused) 2325 { 2326 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2327 gen8_pte_t __iomem *pte = 2328 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2329 (offset >> PAGE_SHIFT); 2330 int rpm_atomic_seq; 2331 2332 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2333 2334 gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); 2335 2336 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2337 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2338 2339 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2340 } 2341 2342 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2343 struct sg_table *st, 2344 uint64_t start, 2345 enum i915_cache_level level, u32 unused) 2346 { 2347 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2348 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2349 struct sgt_iter sgt_iter; 2350 gen8_pte_t __iomem *gtt_entries; 2351 gen8_pte_t gtt_entry; 2352 dma_addr_t addr; 2353 int rpm_atomic_seq; 2354 int i = 0; 2355 2356 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2357 2358 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2359 2360 for_each_sgt_dma(addr, sgt_iter, st) { 2361 gtt_entry = gen8_pte_encode(addr, level, true); 2362 gen8_set_pte(>t_entries[i++], gtt_entry); 2363 } 2364 2365 /* 2366 * XXX: This serves as a posting read to make sure that the PTE has 2367 * actually been updated. There is some concern that even though 2368 * registers and PTEs are within the same BAR that they are potentially 2369 * of NUMA access patterns. Therefore, even with the way we assume 2370 * hardware should work, we must keep this posting read for paranoia. 2371 */ 2372 if (i != 0) 2373 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2374 2375 /* This next bit makes the above posting read even more important. We 2376 * want to flush the TLBs only after we're certain all the PTE updates 2377 * have finished. 2378 */ 2379 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2380 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2381 2382 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2383 } 2384 2385 struct insert_entries { 2386 struct i915_address_space *vm; 2387 struct sg_table *st; 2388 uint64_t start; 2389 enum i915_cache_level level; 2390 u32 flags; 2391 }; 2392 2393 static int gen8_ggtt_insert_entries__cb(void *_arg) 2394 { 2395 struct insert_entries *arg = _arg; 2396 gen8_ggtt_insert_entries(arg->vm, arg->st, 2397 arg->start, arg->level, arg->flags); 2398 return 0; 2399 } 2400 2401 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2402 struct sg_table *st, 2403 uint64_t start, 2404 enum i915_cache_level level, 2405 u32 flags) 2406 { 2407 struct insert_entries arg = { vm, st, start, level, flags }; 2408 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2409 } 2410 2411 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2412 dma_addr_t addr, 2413 uint64_t offset, 2414 enum i915_cache_level level, 2415 u32 flags) 2416 { 2417 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2418 gen6_pte_t __iomem *pte = 2419 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2420 (offset >> PAGE_SHIFT); 2421 int rpm_atomic_seq; 2422 2423 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2424 2425 iowrite32(vm->pte_encode(addr, level, true, flags), pte); 2426 2427 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2428 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2429 2430 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2431 } 2432 2433 /* 2434 * Binds an object into the global gtt with the specified cache level. The object 2435 * will be accessible to the GPU via commands whose operands reference offsets 2436 * within the global GTT as well as accessible by the GPU through the GMADR 2437 * mapped BAR (dev_priv->mm.gtt->gtt). 2438 */ 2439 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2440 struct sg_table *st, 2441 uint64_t start, 2442 enum i915_cache_level level, u32 flags) 2443 { 2444 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2445 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2446 struct sgt_iter sgt_iter; 2447 gen6_pte_t __iomem *gtt_entries; 2448 gen6_pte_t gtt_entry; 2449 dma_addr_t addr; 2450 int rpm_atomic_seq; 2451 int i = 0; 2452 2453 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2454 2455 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2456 2457 for_each_sgt_dma(addr, sgt_iter, st) { 2458 gtt_entry = vm->pte_encode(addr, level, true, flags); 2459 iowrite32(gtt_entry, >t_entries[i++]); 2460 } 2461 2462 /* XXX: This serves as a posting read to make sure that the PTE has 2463 * actually been updated. There is some concern that even though 2464 * registers and PTEs are within the same BAR that they are potentially 2465 * of NUMA access patterns. Therefore, even with the way we assume 2466 * hardware should work, we must keep this posting read for paranoia. 2467 */ 2468 if (i != 0) 2469 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2470 2471 /* This next bit makes the above posting read even more important. We 2472 * want to flush the TLBs only after we're certain all the PTE updates 2473 * have finished. 2474 */ 2475 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2476 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2477 2478 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2479 } 2480 2481 static void nop_clear_range(struct i915_address_space *vm, 2482 uint64_t start, 2483 uint64_t length, 2484 bool use_scratch) 2485 { 2486 } 2487 2488 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2489 uint64_t start, 2490 uint64_t length, 2491 bool use_scratch) 2492 { 2493 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2494 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2495 unsigned first_entry = start >> PAGE_SHIFT; 2496 unsigned num_entries = length >> PAGE_SHIFT; 2497 gen8_pte_t scratch_pte, __iomem *gtt_base = 2498 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2499 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2500 int i; 2501 int rpm_atomic_seq; 2502 2503 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2504 2505 if (WARN(num_entries > max_entries, 2506 "First entry = %d; Num entries = %d (max=%d)\n", 2507 first_entry, num_entries, max_entries)) 2508 num_entries = max_entries; 2509 2510 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 2511 I915_CACHE_LLC, 2512 use_scratch); 2513 for (i = 0; i < num_entries; i++) 2514 gen8_set_pte(>t_base[i], scratch_pte); 2515 readl(gtt_base); 2516 2517 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2518 } 2519 2520 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2521 uint64_t start, 2522 uint64_t length, 2523 bool use_scratch) 2524 { 2525 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2526 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2527 unsigned first_entry = start >> PAGE_SHIFT; 2528 unsigned num_entries = length >> PAGE_SHIFT; 2529 gen6_pte_t scratch_pte, __iomem *gtt_base = 2530 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2531 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2532 int i; 2533 int rpm_atomic_seq; 2534 2535 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2536 2537 if (WARN(num_entries > max_entries, 2538 "First entry = %d; Num entries = %d (max=%d)\n", 2539 first_entry, num_entries, max_entries)) 2540 num_entries = max_entries; 2541 2542 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 2543 I915_CACHE_LLC, use_scratch, 0); 2544 2545 for (i = 0; i < num_entries; i++) 2546 iowrite32(scratch_pte, >t_base[i]); 2547 readl(gtt_base); 2548 2549 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2550 } 2551 2552 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2553 dma_addr_t addr, 2554 uint64_t offset, 2555 enum i915_cache_level cache_level, 2556 u32 unused) 2557 { 2558 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2559 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2560 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2561 int rpm_atomic_seq; 2562 2563 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2564 2565 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2566 2567 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2568 } 2569 2570 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2571 struct sg_table *pages, 2572 uint64_t start, 2573 enum i915_cache_level cache_level, u32 unused) 2574 { 2575 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2576 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2577 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2578 int rpm_atomic_seq; 2579 2580 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2581 2582 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2583 2584 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2585 2586 } 2587 2588 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2589 uint64_t start, 2590 uint64_t length, 2591 bool unused) 2592 { 2593 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2594 unsigned first_entry = start >> PAGE_SHIFT; 2595 unsigned num_entries = length >> PAGE_SHIFT; 2596 int rpm_atomic_seq; 2597 2598 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2599 2600 intel_gtt_clear_range(first_entry, num_entries); 2601 2602 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2603 } 2604 2605 static int ggtt_bind_vma(struct i915_vma *vma, 2606 enum i915_cache_level cache_level, 2607 u32 flags) 2608 { 2609 struct drm_i915_gem_object *obj = vma->obj; 2610 u32 pte_flags = 0; 2611 int ret; 2612 2613 ret = i915_get_ggtt_vma_pages(vma); 2614 if (ret) 2615 return ret; 2616 2617 /* Currently applicable only to VLV */ 2618 if (obj->gt_ro) 2619 pte_flags |= PTE_READ_ONLY; 2620 2621 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2622 vma->node.start, 2623 cache_level, pte_flags); 2624 2625 /* 2626 * Without aliasing PPGTT there's no difference between 2627 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2628 * upgrade to both bound if we bind either to avoid double-binding. 2629 */ 2630 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2631 2632 return 0; 2633 } 2634 2635 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2636 enum i915_cache_level cache_level, 2637 u32 flags) 2638 { 2639 u32 pte_flags; 2640 int ret; 2641 2642 ret = i915_get_ggtt_vma_pages(vma); 2643 if (ret) 2644 return ret; 2645 2646 /* Currently applicable only to VLV */ 2647 pte_flags = 0; 2648 if (vma->obj->gt_ro) 2649 pte_flags |= PTE_READ_ONLY; 2650 2651 2652 if (flags & I915_VMA_GLOBAL_BIND) { 2653 vma->vm->insert_entries(vma->vm, 2654 vma->ggtt_view.pages, 2655 vma->node.start, 2656 cache_level, pte_flags); 2657 } 2658 2659 if (flags & I915_VMA_LOCAL_BIND) { 2660 struct i915_hw_ppgtt *appgtt = 2661 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2662 appgtt->base.insert_entries(&appgtt->base, 2663 vma->ggtt_view.pages, 2664 vma->node.start, 2665 cache_level, pte_flags); 2666 } 2667 2668 return 0; 2669 } 2670 2671 static void ggtt_unbind_vma(struct i915_vma *vma) 2672 { 2673 struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2674 const u64 size = min(vma->size, vma->node.size); 2675 2676 if (vma->flags & I915_VMA_GLOBAL_BIND) 2677 vma->vm->clear_range(vma->vm, 2678 vma->node.start, size, 2679 true); 2680 2681 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2682 appgtt->base.clear_range(&appgtt->base, 2683 vma->node.start, size, 2684 true); 2685 } 2686 2687 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2688 { 2689 struct drm_device *dev = obj->base.dev; 2690 struct drm_i915_private *dev_priv = to_i915(dev); 2691 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2692 2693 if (unlikely(ggtt->do_idle_maps)) { 2694 if (i915_gem_wait_for_idle(dev_priv, false)) { 2695 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2696 /* Wait a bit, in hopes it avoids the hang */ 2697 udelay(10); 2698 } 2699 } 2700 2701 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2702 PCI_DMA_BIDIRECTIONAL); 2703 } 2704 2705 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2706 unsigned long color, 2707 u64 *start, 2708 u64 *end) 2709 { 2710 if (node->color != color) 2711 *start += 4096; 2712 2713 node = list_first_entry_or_null(&node->node_list, 2714 struct drm_mm_node, 2715 node_list); 2716 if (node && node->allocated && node->color != color) 2717 *end -= 4096; 2718 } 2719 2720 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2721 { 2722 /* Let GEM Manage all of the aperture. 2723 * 2724 * However, leave one page at the end still bound to the scratch page. 2725 * There are a number of places where the hardware apparently prefetches 2726 * past the end of the object, and we've seen multiple hangs with the 2727 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2728 * aperture. One page should be enough to keep any prefetching inside 2729 * of the aperture. 2730 */ 2731 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2732 unsigned long hole_start, hole_end; 2733 struct drm_mm_node *entry; 2734 int ret; 2735 unsigned long mappable = min(ggtt->base.total, ggtt->mappable_end); 2736 2737 ret = intel_vgt_balloon(dev_priv); 2738 if (ret) 2739 return ret; 2740 2741 /* Clear any non-preallocated blocks */ 2742 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2743 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2744 hole_start, hole_end); 2745 ggtt->base.clear_range(&ggtt->base, hole_start, 2746 hole_end - hole_start, true); 2747 } 2748 2749 #ifdef __DragonFly__ 2750 DRM_INFO("taking over the fictitious range 0x%llx-0x%llx\n", 2751 dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_end); 2752 vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base, 2753 dev_priv->ggtt.mappable_base + mappable, VM_MEMATTR_WRITE_COMBINING); 2754 #endif 2755 2756 /* And finally clear the reserved guard page */ 2757 ggtt->base.clear_range(&ggtt->base, 2758 ggtt->base.total - PAGE_SIZE, PAGE_SIZE, 2759 true); 2760 2761 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2762 struct i915_hw_ppgtt *ppgtt; 2763 2764 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2765 if (!ppgtt) 2766 return -ENOMEM; 2767 2768 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2769 if (ret) { 2770 kfree(ppgtt); 2771 return ret; 2772 } 2773 2774 if (ppgtt->base.allocate_va_range) 2775 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2776 ppgtt->base.total); 2777 if (ret) { 2778 ppgtt->base.cleanup(&ppgtt->base); 2779 kfree(ppgtt); 2780 return ret; 2781 } 2782 2783 ppgtt->base.clear_range(&ppgtt->base, 2784 ppgtt->base.start, 2785 ppgtt->base.total, 2786 true); 2787 2788 dev_priv->mm.aliasing_ppgtt = ppgtt; 2789 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2790 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2791 } 2792 2793 return 0; 2794 } 2795 2796 /** 2797 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2798 * @dev_priv: i915 device 2799 */ 2800 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2801 { 2802 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2803 2804 if (dev_priv->mm.aliasing_ppgtt) { 2805 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2806 2807 ppgtt->base.cleanup(&ppgtt->base); 2808 kfree(ppgtt); 2809 } 2810 2811 i915_gem_cleanup_stolen(&dev_priv->drm); 2812 2813 if (drm_mm_initialized(&ggtt->base.mm)) { 2814 intel_vgt_deballoon(dev_priv); 2815 2816 drm_mm_takedown(&ggtt->base.mm); 2817 list_del(&ggtt->base.global_link); 2818 } 2819 2820 ggtt->base.cleanup(&ggtt->base); 2821 2822 arch_phys_wc_del(ggtt->mtrr); 2823 io_mapping_free(ggtt->mappable); 2824 } 2825 2826 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2827 { 2828 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2829 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2830 return snb_gmch_ctl << 20; 2831 } 2832 2833 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2834 { 2835 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2836 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2837 if (bdw_gmch_ctl) 2838 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2839 2840 #ifdef CONFIG_X86_32 2841 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2842 if (bdw_gmch_ctl > 4) 2843 bdw_gmch_ctl = 4; 2844 #endif 2845 2846 return bdw_gmch_ctl << 20; 2847 } 2848 2849 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2850 { 2851 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2852 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2853 2854 if (gmch_ctrl) 2855 return 1 << (20 + gmch_ctrl); 2856 2857 return 0; 2858 } 2859 2860 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2861 { 2862 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2863 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2864 return snb_gmch_ctl << 25; /* 32 MB units */ 2865 } 2866 2867 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2868 { 2869 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2870 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2871 return bdw_gmch_ctl << 25; /* 32 MB units */ 2872 } 2873 2874 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2875 { 2876 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2877 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2878 2879 /* 2880 * 0x0 to 0x10: 32MB increments starting at 0MB 2881 * 0x11 to 0x16: 4MB increments starting at 8MB 2882 * 0x17 to 0x1d: 4MB increments start at 36MB 2883 */ 2884 if (gmch_ctrl < 0x11) 2885 return gmch_ctrl << 25; 2886 else if (gmch_ctrl < 0x17) 2887 return (gmch_ctrl - 0x11 + 2) << 22; 2888 else 2889 return (gmch_ctrl - 0x17 + 9) << 22; 2890 } 2891 2892 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2893 { 2894 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2895 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2896 2897 if (gen9_gmch_ctl < 0xf0) 2898 return gen9_gmch_ctl << 25; /* 32 MB units */ 2899 else 2900 /* 4MB increments starting at 0xf0 for 4MB */ 2901 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2902 } 2903 2904 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2905 { 2906 struct pci_dev *pdev = ggtt->base.dev->pdev; 2907 struct i915_page_scratch *scratch_page; 2908 phys_addr_t phys_addr; 2909 2910 /* For Modern GENs the PTEs and register space are split in the BAR */ 2911 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2912 2913 /* 2914 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2915 * dropped. For WC mappings in general we have 64 byte burst writes 2916 * when the WC buffer is flushed, so we can't use it, but have to 2917 * resort to an uncached mapping. The WC issue is easily caught by the 2918 * readback check when writing GTT PTE entries. 2919 */ 2920 if (IS_BROXTON(ggtt->base.dev)) 2921 ggtt->gsm = ioremap_nocache(phys_addr, size); 2922 else 2923 ggtt->gsm = ioremap_wc(phys_addr, size); 2924 if (!ggtt->gsm) { 2925 DRM_ERROR("Failed to map the ggtt page table\n"); 2926 return -ENOMEM; 2927 } 2928 2929 scratch_page = alloc_scratch_page(ggtt->base.dev); 2930 if (IS_ERR(scratch_page)) { 2931 DRM_ERROR("Scratch setup failed\n"); 2932 /* iounmap will also get called at remove, but meh */ 2933 iounmap(ggtt->gsm); 2934 return PTR_ERR(scratch_page); 2935 } 2936 2937 ggtt->base.scratch_page = scratch_page; 2938 2939 return 0; 2940 } 2941 2942 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2943 * bits. When using advanced contexts each context stores its own PAT, but 2944 * writing this data shouldn't be harmful even in those cases. */ 2945 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2946 { 2947 uint64_t pat; 2948 2949 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2950 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2951 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2952 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2953 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2954 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2955 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2956 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2957 2958 if (!USES_PPGTT(dev_priv)) 2959 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2960 * so RTL will always use the value corresponding to 2961 * pat_sel = 000". 2962 * So let's disable cache for GGTT to avoid screen corruptions. 2963 * MOCS still can be used though. 2964 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2965 * before this patch, i.e. the same uncached + snooping access 2966 * like on gen6/7 seems to be in effect. 2967 * - So this just fixes blitter/render access. Again it looks 2968 * like it's not just uncached access, but uncached + snooping. 2969 * So we can still hold onto all our assumptions wrt cpu 2970 * clflushing on LLC machines. 2971 */ 2972 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2973 2974 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2975 * write would work. */ 2976 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2977 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2978 } 2979 2980 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2981 { 2982 uint64_t pat; 2983 2984 /* 2985 * Map WB on BDW to snooped on CHV. 2986 * 2987 * Only the snoop bit has meaning for CHV, the rest is 2988 * ignored. 2989 * 2990 * The hardware will never snoop for certain types of accesses: 2991 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2992 * - PPGTT page tables 2993 * - some other special cycles 2994 * 2995 * As with BDW, we also need to consider the following for GT accesses: 2996 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2997 * so RTL will always use the value corresponding to 2998 * pat_sel = 000". 2999 * Which means we must set the snoop bit in PAT entry 0 3000 * in order to keep the global status page working. 3001 */ 3002 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3003 GEN8_PPAT(1, 0) | 3004 GEN8_PPAT(2, 0) | 3005 GEN8_PPAT(3, 0) | 3006 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3007 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3008 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3009 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3010 3011 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3012 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3013 } 3014 3015 static void gen6_gmch_remove(struct i915_address_space *vm) 3016 { 3017 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3018 3019 iounmap(ggtt->gsm); 3020 free_scratch_page(vm->dev, vm->scratch_page); 3021 } 3022 3023 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3024 { 3025 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3026 struct pci_dev *pdev = dev_priv->drm.pdev; 3027 unsigned int size; 3028 u16 snb_gmch_ctl; 3029 3030 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3031 ggtt->mappable_base = pci_resource_start(pdev, 2); 3032 ggtt->mappable_end = pci_resource_len(pdev, 2); 3033 3034 #if 0 3035 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3036 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3037 #endif 3038 3039 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3040 3041 if (INTEL_GEN(dev_priv) >= 9) { 3042 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3043 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3044 } else if (IS_CHERRYVIEW(dev_priv)) { 3045 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3046 size = chv_get_total_gtt_size(snb_gmch_ctl); 3047 } else { 3048 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3049 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3050 } 3051 3052 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3053 3054 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3055 chv_setup_private_ppat(dev_priv); 3056 else 3057 bdw_setup_private_ppat(dev_priv); 3058 3059 ggtt->base.cleanup = gen6_gmch_remove; 3060 ggtt->base.bind_vma = ggtt_bind_vma; 3061 ggtt->base.unbind_vma = ggtt_unbind_vma; 3062 ggtt->base.insert_page = gen8_ggtt_insert_page; 3063 ggtt->base.clear_range = nop_clear_range; 3064 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3065 ggtt->base.clear_range = gen8_ggtt_clear_range; 3066 3067 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3068 if (IS_CHERRYVIEW(dev_priv)) 3069 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3070 3071 return ggtt_probe_common(ggtt, size); 3072 } 3073 3074 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3075 { 3076 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3077 struct pci_dev *pdev = dev_priv->drm.pdev; 3078 unsigned int size; 3079 u16 snb_gmch_ctl; 3080 3081 ggtt->mappable_base = pci_resource_start(pdev, 2); 3082 ggtt->mappable_end = pci_resource_len(pdev, 2); 3083 3084 /* 64/512MB is the current min/max we actually know of, but this is just 3085 * a coarse sanity check. 3086 */ 3087 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3088 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3089 return -ENXIO; 3090 } 3091 3092 #if 0 3093 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3094 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3095 #endif 3096 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3097 3098 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3099 3100 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3101 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3102 3103 ggtt->base.clear_range = gen6_ggtt_clear_range; 3104 ggtt->base.insert_page = gen6_ggtt_insert_page; 3105 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3106 ggtt->base.bind_vma = ggtt_bind_vma; 3107 ggtt->base.unbind_vma = ggtt_unbind_vma; 3108 ggtt->base.cleanup = gen6_gmch_remove; 3109 3110 if (HAS_EDRAM(dev_priv)) 3111 ggtt->base.pte_encode = iris_pte_encode; 3112 else if (IS_HASWELL(dev_priv)) 3113 ggtt->base.pte_encode = hsw_pte_encode; 3114 else if (IS_VALLEYVIEW(dev_priv)) 3115 ggtt->base.pte_encode = byt_pte_encode; 3116 else if (INTEL_GEN(dev_priv) >= 7) 3117 ggtt->base.pte_encode = ivb_pte_encode; 3118 else 3119 ggtt->base.pte_encode = snb_pte_encode; 3120 3121 return ggtt_probe_common(ggtt, size); 3122 } 3123 3124 static void i915_gmch_remove(struct i915_address_space *vm) 3125 { 3126 intel_gmch_remove(); 3127 } 3128 3129 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3130 { 3131 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3132 #if 0 3133 int ret; 3134 3135 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3136 if (!ret) { 3137 DRM_ERROR("failed to set up gmch\n"); 3138 return -EIO; 3139 } 3140 #endif 3141 3142 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3143 &ggtt->mappable_base, &ggtt->mappable_end); 3144 3145 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3146 ggtt->base.insert_page = i915_ggtt_insert_page; 3147 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3148 ggtt->base.clear_range = i915_ggtt_clear_range; 3149 ggtt->base.bind_vma = ggtt_bind_vma; 3150 ggtt->base.unbind_vma = ggtt_unbind_vma; 3151 ggtt->base.cleanup = i915_gmch_remove; 3152 3153 if (unlikely(ggtt->do_idle_maps)) 3154 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3155 3156 return 0; 3157 } 3158 3159 /** 3160 * i915_ggtt_probe_hw - Probe GGTT hardware location 3161 * @dev_priv: i915 device 3162 */ 3163 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3164 { 3165 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3166 int ret; 3167 3168 ggtt->base.dev = &dev_priv->drm; 3169 3170 if (INTEL_GEN(dev_priv) <= 5) 3171 ret = i915_gmch_probe(ggtt); 3172 else if (INTEL_GEN(dev_priv) < 8) 3173 ret = gen6_gmch_probe(ggtt); 3174 else 3175 ret = gen8_gmch_probe(ggtt); 3176 if (ret) 3177 return ret; 3178 3179 if ((ggtt->base.total - 1) >> 32) { 3180 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3181 " of address space! Found %lldM!\n", 3182 ggtt->base.total >> 20); 3183 ggtt->base.total = 1ULL << 32; 3184 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3185 } 3186 3187 if (ggtt->mappable_end > ggtt->base.total) { 3188 DRM_ERROR("mappable aperture extends past end of GGTT," 3189 " aperture=%llx, total=%llx\n", 3190 ggtt->mappable_end, ggtt->base.total); 3191 ggtt->mappable_end = ggtt->base.total; 3192 } 3193 3194 /* GMADR is the PCI mmio aperture into the global GTT. */ 3195 DRM_INFO("Memory usable by graphics device = %lluM\n", 3196 ggtt->base.total >> 20); 3197 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3198 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3199 #ifdef CONFIG_INTEL_IOMMU 3200 if (intel_iommu_gfx_mapped) 3201 DRM_INFO("VT-d active for gfx access\n"); 3202 #endif 3203 3204 return 0; 3205 } 3206 3207 /** 3208 * i915_ggtt_init_hw - Initialize GGTT hardware 3209 * @dev_priv: i915 device 3210 */ 3211 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3212 { 3213 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3214 int ret; 3215 3216 INIT_LIST_HEAD(&dev_priv->vm_list); 3217 3218 /* Subtract the guard page before address space initialization to 3219 * shrink the range used by drm_mm. 3220 */ 3221 ggtt->base.total -= PAGE_SIZE; 3222 i915_address_space_init(&ggtt->base, dev_priv); 3223 ggtt->base.total += PAGE_SIZE; 3224 if (!HAS_LLC(dev_priv)) 3225 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3226 3227 ggtt->mappable = 3228 io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end); 3229 if (!ggtt->mappable) { 3230 ret = -EIO; 3231 goto out_gtt_cleanup; 3232 } 3233 3234 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3235 3236 /* 3237 * Initialise stolen early so that we may reserve preallocated 3238 * objects for the BIOS to KMS transition. 3239 */ 3240 ret = i915_gem_init_stolen(&dev_priv->drm); 3241 if (ret) 3242 goto out_gtt_cleanup; 3243 3244 return 0; 3245 3246 out_gtt_cleanup: 3247 ggtt->base.cleanup(&ggtt->base); 3248 return ret; 3249 } 3250 3251 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3252 { 3253 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3254 return -EIO; 3255 3256 return 0; 3257 } 3258 3259 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3260 { 3261 struct drm_i915_private *dev_priv = to_i915(dev); 3262 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3263 struct drm_i915_gem_object *obj; 3264 struct i915_vma *vma; 3265 3266 i915_check_and_clear_faults(dev_priv); 3267 3268 /* First fill our portion of the GTT with scratch pages */ 3269 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 3270 true); 3271 3272 /* Cache flush objects bound into GGTT and rebind them. */ 3273 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 3274 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3275 if (vma->vm != &ggtt->base) 3276 continue; 3277 3278 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3279 PIN_UPDATE)); 3280 } 3281 3282 if (obj->pin_display) 3283 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3284 } 3285 3286 if (INTEL_INFO(dev)->gen >= 8) { 3287 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3288 chv_setup_private_ppat(dev_priv); 3289 else 3290 bdw_setup_private_ppat(dev_priv); 3291 3292 return; 3293 } 3294 3295 if (USES_PPGTT(dev)) { 3296 struct i915_address_space *vm; 3297 3298 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3299 /* TODO: Perhaps it shouldn't be gen6 specific */ 3300 3301 struct i915_hw_ppgtt *ppgtt; 3302 3303 if (i915_is_ggtt(vm)) 3304 ppgtt = dev_priv->mm.aliasing_ppgtt; 3305 else 3306 ppgtt = i915_vm_to_ppgtt(vm); 3307 3308 gen6_write_page_range(dev_priv, &ppgtt->pd, 3309 0, ppgtt->base.total); 3310 } 3311 } 3312 3313 i915_ggtt_flush(dev_priv); 3314 } 3315 3316 static void 3317 i915_vma_retire(struct i915_gem_active *active, 3318 struct drm_i915_gem_request *rq) 3319 { 3320 const unsigned int idx = rq->engine->id; 3321 struct i915_vma *vma = 3322 container_of(active, struct i915_vma, last_read[idx]); 3323 3324 GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); 3325 3326 i915_vma_clear_active(vma, idx); 3327 if (i915_vma_is_active(vma)) 3328 return; 3329 3330 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3331 if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) 3332 WARN_ON(i915_vma_unbind(vma)); 3333 } 3334 3335 void i915_vma_destroy(struct i915_vma *vma) 3336 { 3337 GEM_BUG_ON(vma->node.allocated); 3338 GEM_BUG_ON(i915_vma_is_active(vma)); 3339 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3340 3341 list_del(&vma->vm_link); 3342 if (!i915_vma_is_ggtt(vma)) 3343 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 3344 3345 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 3346 } 3347 3348 void i915_vma_close(struct i915_vma *vma) 3349 { 3350 GEM_BUG_ON(i915_vma_is_closed(vma)); 3351 vma->flags |= I915_VMA_CLOSED; 3352 3353 list_del_init(&vma->obj_link); 3354 if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) 3355 WARN_ON(i915_vma_unbind(vma)); 3356 } 3357 3358 static struct i915_vma * 3359 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 3360 struct i915_address_space *vm, 3361 const struct i915_ggtt_view *view) 3362 { 3363 struct i915_vma *vma; 3364 int i; 3365 3366 GEM_BUG_ON(vm->closed); 3367 3368 if (WARN_ON(i915_is_ggtt(vm) != !!view)) 3369 return ERR_PTR(-EINVAL); 3370 3371 vma = kzalloc(sizeof(*vma), GFP_KERNEL); 3372 if (vma == NULL) 3373 return ERR_PTR(-ENOMEM); 3374 3375 INIT_LIST_HEAD(&vma->obj_link); 3376 INIT_LIST_HEAD(&vma->exec_list); 3377 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3378 init_request_active(&vma->last_read[i], i915_vma_retire); 3379 list_add(&vma->vm_link, &vm->unbound_list); 3380 vma->vm = vm; 3381 vma->obj = obj; 3382 vma->size = obj->base.size; 3383 3384 if (i915_is_ggtt(vm)) { 3385 vma->flags |= I915_VMA_GGTT; 3386 vma->ggtt_view = *view; 3387 if (view->type == I915_GGTT_VIEW_PARTIAL) { 3388 vma->size = view->params.partial.size; 3389 vma->size <<= PAGE_SHIFT; 3390 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3391 vma->size = 3392 intel_rotation_info_size(&view->params.rotated); 3393 vma->size <<= PAGE_SHIFT; 3394 } 3395 } else { 3396 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3397 } 3398 3399 list_add_tail(&vma->obj_link, &obj->vma_list); 3400 3401 return vma; 3402 } 3403 3404 struct i915_vma * 3405 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3406 struct i915_address_space *vm) 3407 { 3408 struct i915_vma *vma; 3409 3410 vma = i915_gem_obj_to_vma(obj, vm); 3411 if (!vma) 3412 vma = __i915_gem_vma_create(obj, vm, 3413 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 3414 3415 return vma; 3416 } 3417 3418 struct i915_vma * 3419 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3420 const struct i915_ggtt_view *view) 3421 { 3422 struct drm_device *dev = obj->base.dev; 3423 struct drm_i915_private *dev_priv = to_i915(dev); 3424 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3425 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); 3426 3427 GEM_BUG_ON(!view); 3428 3429 if (!vma) 3430 vma = __i915_gem_vma_create(obj, &ggtt->base, view); 3431 3432 GEM_BUG_ON(i915_vma_is_closed(vma)); 3433 return vma; 3434 3435 } 3436 3437 static struct scatterlist * 3438 rotate_pages(const dma_addr_t *in, unsigned int offset, 3439 unsigned int width, unsigned int height, 3440 unsigned int stride, 3441 struct sg_table *st, struct scatterlist *sg) 3442 { 3443 unsigned int column, row; 3444 unsigned int src_idx; 3445 3446 for (column = 0; column < width; column++) { 3447 src_idx = stride * (height - 1) + column; 3448 for (row = 0; row < height; row++) { 3449 st->nents++; 3450 /* We don't need the pages, but need to initialize 3451 * the entries so the sg list can be happily traversed. 3452 * The only thing we need are DMA addresses. 3453 */ 3454 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3455 sg_dma_address(sg) = in[offset + src_idx]; 3456 sg_dma_len(sg) = PAGE_SIZE; 3457 sg = sg_next(sg); 3458 src_idx -= stride; 3459 } 3460 } 3461 3462 return sg; 3463 } 3464 3465 static struct sg_table * 3466 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, 3467 struct drm_i915_gem_object *obj) 3468 { 3469 const size_t n_pages = obj->base.size / PAGE_SIZE; 3470 unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; 3471 unsigned int size_pages_uv; 3472 struct sgt_iter sgt_iter; 3473 dma_addr_t dma_addr; 3474 unsigned long i; 3475 dma_addr_t *page_addr_list; 3476 struct sg_table *st; 3477 unsigned int uv_start_page; 3478 struct scatterlist *sg; 3479 int ret = -ENOMEM; 3480 3481 /* Allocate a temporary list of source pages for random access. */ 3482 page_addr_list = drm_malloc_gfp(n_pages, 3483 sizeof(dma_addr_t), 3484 GFP_TEMPORARY); 3485 if (!page_addr_list) 3486 return ERR_PTR(ret); 3487 3488 /* Account for UV plane with NV12. */ 3489 if (rot_info->pixel_format == DRM_FORMAT_NV12) 3490 size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; 3491 else 3492 size_pages_uv = 0; 3493 3494 /* Allocate target SG list. */ 3495 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3496 if (!st) 3497 goto err_st_alloc; 3498 3499 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3500 if (ret) 3501 goto err_sg_alloc; 3502 3503 /* Populate source page list from the object. */ 3504 i = 0; 3505 for_each_sgt_dma(dma_addr, sgt_iter, obj->pages) 3506 page_addr_list[i++] = dma_addr; 3507 3508 GEM_BUG_ON(i != n_pages); 3509 st->nents = 0; 3510 sg = st->sgl; 3511 3512 /* Rotate the pages. */ 3513 sg = rotate_pages(page_addr_list, 0, 3514 rot_info->plane[0].width, rot_info->plane[0].height, 3515 rot_info->plane[0].width, 3516 st, sg); 3517 3518 /* Append the UV plane if NV12. */ 3519 if (rot_info->pixel_format == DRM_FORMAT_NV12) { 3520 uv_start_page = size_pages; 3521 3522 /* Check for tile-row un-alignment. */ 3523 if (offset_in_page(rot_info->uv_offset)) 3524 uv_start_page--; 3525 3526 rot_info->uv_start_page = uv_start_page; 3527 3528 sg = rotate_pages(page_addr_list, rot_info->uv_start_page, 3529 rot_info->plane[1].width, rot_info->plane[1].height, 3530 rot_info->plane[1].width, 3531 st, sg); 3532 } 3533 3534 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", 3535 obj->base.size, rot_info->plane[0].width, 3536 rot_info->plane[0].height, size_pages + size_pages_uv, 3537 size_pages); 3538 3539 drm_free_large(page_addr_list); 3540 3541 return st; 3542 3543 err_sg_alloc: 3544 kfree(st); 3545 err_st_alloc: 3546 drm_free_large(page_addr_list); 3547 3548 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", 3549 obj->base.size, ret, rot_info->plane[0].width, 3550 rot_info->plane[0].height, size_pages + size_pages_uv, 3551 size_pages); 3552 return ERR_PTR(ret); 3553 } 3554 3555 static struct sg_table * 3556 intel_partial_pages(const struct i915_ggtt_view *view, 3557 struct drm_i915_gem_object *obj) 3558 { 3559 struct sg_table *st; 3560 struct scatterlist *sg; 3561 struct sg_page_iter obj_sg_iter; 3562 int ret = -ENOMEM; 3563 3564 st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL); 3565 if (!st) 3566 goto err_st_alloc; 3567 3568 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3569 if (ret) 3570 goto err_sg_alloc; 3571 3572 sg = st->sgl; 3573 st->nents = 0; 3574 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3575 view->params.partial.offset) 3576 { 3577 if (st->nents >= view->params.partial.size) 3578 break; 3579 3580 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3581 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3582 sg_dma_len(sg) = PAGE_SIZE; 3583 3584 sg = sg_next(sg); 3585 st->nents++; 3586 } 3587 3588 return st; 3589 3590 err_sg_alloc: 3591 kfree(st); 3592 err_st_alloc: 3593 return ERR_PTR(ret); 3594 } 3595 3596 static int 3597 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3598 { 3599 int ret = 0; 3600 3601 if (vma->ggtt_view.pages) 3602 return 0; 3603 3604 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3605 vma->ggtt_view.pages = vma->obj->pages; 3606 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3607 vma->ggtt_view.pages = 3608 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3609 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3610 vma->ggtt_view.pages = 3611 intel_partial_pages(&vma->ggtt_view, vma->obj); 3612 else 3613 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3614 vma->ggtt_view.type); 3615 3616 if (!vma->ggtt_view.pages) { 3617 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3618 vma->ggtt_view.type); 3619 ret = -EINVAL; 3620 } else if (IS_ERR(vma->ggtt_view.pages)) { 3621 ret = PTR_ERR(vma->ggtt_view.pages); 3622 vma->ggtt_view.pages = NULL; 3623 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3624 vma->ggtt_view.type, ret); 3625 } 3626 3627 return ret; 3628 } 3629 3630 /** 3631 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3632 * @vma: VMA to map 3633 * @cache_level: mapping cache level 3634 * @flags: flags like global or local mapping 3635 * 3636 * DMA addresses are taken from the scatter-gather table of this object (or of 3637 * this VMA in case of non-default GGTT views) and PTE entries set up. 3638 * Note that DMA addresses are also the only part of the SG table we care about. 3639 */ 3640 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3641 u32 flags) 3642 { 3643 u32 bind_flags; 3644 u32 vma_flags; 3645 int ret; 3646 3647 if (WARN_ON(flags == 0)) 3648 return -EINVAL; 3649 3650 bind_flags = 0; 3651 if (flags & PIN_GLOBAL) 3652 bind_flags |= I915_VMA_GLOBAL_BIND; 3653 if (flags & PIN_USER) 3654 bind_flags |= I915_VMA_LOCAL_BIND; 3655 3656 vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 3657 if (flags & PIN_UPDATE) 3658 bind_flags |= vma_flags; 3659 else 3660 bind_flags &= ~vma_flags; 3661 if (bind_flags == 0) 3662 return 0; 3663 3664 if (vma_flags == 0 && vma->vm->allocate_va_range) { 3665 trace_i915_va_alloc(vma); 3666 ret = vma->vm->allocate_va_range(vma->vm, 3667 vma->node.start, 3668 vma->node.size); 3669 if (ret) 3670 return ret; 3671 } 3672 3673 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3674 if (ret) 3675 return ret; 3676 3677 vma->flags |= bind_flags; 3678 return 0; 3679 } 3680 3681 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3682 { 3683 void __iomem *ptr; 3684 3685 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3686 if (WARN_ON(!vma->obj->map_and_fenceable)) 3687 return IO_ERR_PTR(-ENODEV); 3688 3689 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 3690 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); 3691 3692 ptr = vma->iomap; 3693 if (ptr == NULL) { 3694 ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable, 3695 vma->node.start, 3696 vma->node.size); 3697 if (ptr == NULL) 3698 return IO_ERR_PTR(-ENOMEM); 3699 3700 vma->iomap = ptr; 3701 } 3702 3703 __i915_vma_pin(vma); 3704 return ptr; 3705 } 3706