1 /* $OpenBSD: i915_gem_gtt.c,v 1.14 2015/10/18 18:00:45 kettenis Exp $ */ 2 /* 3 * Copyright © 2010 Daniel Vetter 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <dev/pci/drm/drmP.h> 27 #include <dev/pci/drm/i915_drm.h> 28 #include "i915_drv.h" 29 #include "i915_trace.h" 30 #include "intel_drv.h" 31 32 /* XXX */ 33 #define _PAGE_PRESENT PG_V 34 #define _PAGE_RW PG_RW 35 #define _PAGE_PAT PG_PAT 36 #define _PAGE_PWT PG_WT 37 #define _PAGE_PCD PG_N 38 39 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv); 40 41 #define GEN6_PPGTT_PD_ENTRIES 512 42 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) 43 typedef uint64_t gen8_gtt_pte_t; 44 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; 45 46 /* PPGTT stuff */ 47 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 48 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) 49 50 #define GEN6_PDE_VALID (1 << 0) 51 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 52 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 53 54 #define GEN6_PTE_VALID (1 << 0) 55 #define GEN6_PTE_UNCACHED (1 << 1) 56 #define HSW_PTE_UNCACHED (0) 57 #define GEN6_PTE_CACHE_LLC (2 << 1) 58 #define GEN7_PTE_CACHE_L3_LLC (3 << 1) 59 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 60 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) 61 62 /* Cacheability Control is a 4-bit value. The low three bits are stored in * 63 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. 64 */ 65 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ 66 (((bits) & 0x8) << (11 - 3))) 67 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) 68 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) 69 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) 70 #define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) 71 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) 72 #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) 73 74 #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) 75 #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) 76 #define GEN8_LEGACY_PDPS 4 77 78 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) 79 #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ 80 #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ 81 #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ 82 83 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 84 enum i915_cache_level level, 85 bool valid) 86 { 87 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 88 pte |= addr; 89 if (level != I915_CACHE_NONE) 90 pte |= PPAT_CACHED_INDEX; 91 else 92 pte |= PPAT_UNCACHED_INDEX; 93 return pte; 94 } 95 96 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 97 dma_addr_t addr, 98 enum i915_cache_level level) 99 { 100 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 101 pde |= addr; 102 if (level != I915_CACHE_NONE) 103 pde |= PPAT_CACHED_PDE_INDEX; 104 else 105 pde |= PPAT_UNCACHED_INDEX; 106 return pde; 107 } 108 109 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 110 enum i915_cache_level level, 111 bool valid) 112 { 113 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 114 pte |= GEN6_PTE_ADDR_ENCODE(addr); 115 116 switch (level) { 117 case I915_CACHE_L3_LLC: 118 case I915_CACHE_LLC: 119 pte |= GEN6_PTE_CACHE_LLC; 120 break; 121 case I915_CACHE_NONE: 122 pte |= GEN6_PTE_UNCACHED; 123 break; 124 default: 125 WARN_ON(1); 126 } 127 128 return pte; 129 } 130 131 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 132 enum i915_cache_level level, 133 bool valid) 134 { 135 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 136 pte |= GEN6_PTE_ADDR_ENCODE(addr); 137 138 switch (level) { 139 case I915_CACHE_L3_LLC: 140 pte |= GEN7_PTE_CACHE_L3_LLC; 141 break; 142 case I915_CACHE_LLC: 143 pte |= GEN6_PTE_CACHE_LLC; 144 break; 145 case I915_CACHE_NONE: 146 pte |= GEN6_PTE_UNCACHED; 147 break; 148 default: 149 WARN_ON(1); 150 } 151 152 return pte; 153 } 154 155 #define BYT_PTE_WRITEABLE (1 << 1) 156 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 157 158 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 159 enum i915_cache_level level, 160 bool valid) 161 { 162 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 163 pte |= GEN6_PTE_ADDR_ENCODE(addr); 164 165 /* Mark the page as writeable. Other platforms don't have a 166 * setting for read-only/writable, so this matches that behavior. 167 */ 168 pte |= BYT_PTE_WRITEABLE; 169 170 if (level != I915_CACHE_NONE) 171 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 172 173 return pte; 174 } 175 176 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 177 enum i915_cache_level level, 178 bool valid) 179 { 180 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 181 pte |= HSW_PTE_ADDR_ENCODE(addr); 182 183 if (level != I915_CACHE_NONE) 184 pte |= HSW_WB_LLC_AGE3; 185 186 return pte; 187 } 188 189 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 190 enum i915_cache_level level, 191 bool valid) 192 { 193 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 194 pte |= HSW_PTE_ADDR_ENCODE(addr); 195 196 switch (level) { 197 case I915_CACHE_NONE: 198 break; 199 case I915_CACHE_WT: 200 pte |= HSW_WT_ELLC_LLC_AGE3; 201 break; 202 default: 203 pte |= HSW_WB_ELLC_LLC_AGE3; 204 break; 205 } 206 207 return pte; 208 } 209 210 /* Broadwell Page Directory Pointer Descriptors */ 211 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, 212 uint64_t val, bool synchronous) 213 { 214 struct drm_i915_private *dev_priv = ring->dev->dev_private; 215 int ret; 216 217 BUG_ON(entry >= 4); 218 219 if (synchronous) { 220 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 221 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 222 return 0; 223 } 224 225 ret = intel_ring_begin(ring, 6); 226 if (ret) 227 return ret; 228 229 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 230 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 231 intel_ring_emit(ring, (u32)(val >> 32)); 232 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 233 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 234 intel_ring_emit(ring, (u32)(val)); 235 intel_ring_advance(ring); 236 237 return 0; 238 } 239 240 static int gen8_ppgtt_enable(struct drm_device *dev) 241 { 242 struct drm_i915_private *dev_priv = dev->dev_private; 243 struct intel_ring_buffer *ring; 244 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 245 int i, j, ret; 246 247 /* bit of a hack to find the actual last used pd */ 248 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 249 250 for_each_ring(ring, dev_priv, j) { 251 I915_WRITE(RING_MODE_GEN7(ring), 252 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 253 } 254 255 for (i = used_pd - 1; i >= 0; i--) { 256 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 257 for_each_ring(ring, dev_priv, j) { 258 ret = gen8_write_pdp(ring, i, addr, 259 i915_reset_in_progress(&dev_priv->gpu_error)); 260 if (ret) 261 goto err_out; 262 } 263 } 264 return 0; 265 266 err_out: 267 for_each_ring(ring, dev_priv, j) 268 I915_WRITE(RING_MODE_GEN7(ring), 269 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 270 return ret; 271 } 272 273 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 274 unsigned first_entry, 275 unsigned num_entries, 276 bool use_scratch) 277 { 278 struct i915_hw_ppgtt *ppgtt = 279 container_of(vm, struct i915_hw_ppgtt, base); 280 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 281 unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; 282 unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; 283 unsigned last_pte, i; 284 285 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 286 I915_CACHE_LLC, use_scratch); 287 288 while (num_entries) { 289 struct vm_page *page_table = &ppgtt->gen8_pt_pages[act_pt]; 290 291 last_pte = first_pte + num_entries; 292 if (last_pte > GEN8_PTES_PER_PAGE) 293 last_pte = GEN8_PTES_PER_PAGE; 294 295 pt_vaddr = kmap_atomic(page_table); 296 297 for (i = first_pte; i < last_pte; i++) 298 pt_vaddr[i] = scratch_pte; 299 300 kunmap_atomic(pt_vaddr); 301 302 num_entries -= last_pte - first_pte; 303 first_pte = 0; 304 act_pt++; 305 } 306 } 307 308 #ifdef __linux__ 309 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 310 struct sg_table *pages, 311 unsigned first_entry, 312 enum i915_cache_level cache_level) 313 { 314 struct i915_hw_ppgtt *ppgtt = 315 container_of(vm, struct i915_hw_ppgtt, base); 316 gen8_gtt_pte_t *pt_vaddr; 317 unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; 318 unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; 319 struct sg_page_iter sg_iter; 320 321 pt_vaddr = NULL; 322 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 323 if (pt_vaddr == NULL) 324 pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); 325 326 pt_vaddr[act_pte] = 327 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 328 cache_level, true); 329 if (++act_pte == GEN8_PTES_PER_PAGE) { 330 kunmap_atomic(pt_vaddr); 331 pt_vaddr = NULL; 332 act_pt++; 333 act_pte = 0; 334 } 335 } 336 if (pt_vaddr) 337 kunmap_atomic(pt_vaddr); 338 } 339 #else 340 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 341 struct vm_page **pages, 342 unsigned int num_entries, 343 unsigned first_entry, 344 enum i915_cache_level cache_level) 345 { 346 struct i915_hw_ppgtt *ppgtt = 347 container_of(vm, struct i915_hw_ppgtt, base); 348 gen8_gtt_pte_t *pt_vaddr; 349 unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; 350 unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; 351 int i; 352 353 pt_vaddr = NULL; 354 for (i = 0; i < num_entries; i++) { 355 if (pt_vaddr == NULL) 356 pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); 357 358 pt_vaddr[act_pte] = 359 gen8_pte_encode(VM_PAGE_TO_PHYS(pages[i]), 360 cache_level, true); 361 if (++act_pte == GEN8_PTES_PER_PAGE) { 362 kunmap_atomic(pt_vaddr); 363 pt_vaddr = NULL; 364 act_pt++; 365 act_pte = 0; 366 } 367 } 368 if (pt_vaddr) 369 kunmap_atomic(pt_vaddr); 370 } 371 #endif 372 373 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 374 { 375 struct i915_hw_ppgtt *ppgtt = 376 container_of(vm, struct i915_hw_ppgtt, base); 377 int i, j; 378 379 drm_mm_takedown(&vm->mm); 380 381 for (i = 0; i < ppgtt->num_pd_pages ; i++) { 382 if (ppgtt->pd_dma_addr[i]) { 383 pci_unmap_page(ppgtt->base.dev->pdev, 384 ppgtt->pd_dma_addr[i], 385 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 386 387 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 388 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 389 if (addr) 390 pci_unmap_page(ppgtt->base.dev->pdev, 391 addr, 392 PAGE_SIZE, 393 PCI_DMA_BIDIRECTIONAL); 394 395 } 396 } 397 kfree(ppgtt->gen8_pt_dma_addr[i]); 398 } 399 400 __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT)); 401 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 402 } 403 404 /** 405 * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a 406 * net effect resembling a 2-level page table in normal x86 terms. Each PDP 407 * represents 1GB of memory 408 * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. 409 * 410 * TODO: Do something with the size parameter 411 **/ 412 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 413 { 414 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 415 struct vm_page *pt_pages; 416 int i, j, ret = -ENOMEM; 417 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 418 const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 419 420 if (size % (1<<30)) 421 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 422 423 /* FIXME: split allocation into smaller pieces. For now we only ever do 424 * this once, but with full PPGTT, the multiple contiguous allocations 425 * will be bad. 426 */ 427 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 428 if (!ppgtt->pd_pages) 429 return -ENOMEM; 430 431 pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); 432 if (!pt_pages) { 433 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 434 return -ENOMEM; 435 } 436 437 ppgtt->gen8_pt_pages = pt_pages; 438 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 439 ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); 440 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 441 ppgtt->enable = gen8_ppgtt_enable; 442 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 443 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 444 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 445 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 446 ppgtt->base.start = 0; 447 ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; 448 449 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 450 451 /* 452 * - Create a mapping for the page directories. 453 * - For each page directory: 454 * allocate space for page table mappings. 455 * map each page table 456 */ 457 for (i = 0; i < max_pdp; i++) { 458 dma_addr_t temp; 459 temp = pci_map_page(ppgtt->base.dev->pdev, 460 &ppgtt->pd_pages[i], 0, 461 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 462 if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) 463 goto err_out; 464 465 ppgtt->pd_dma_addr[i] = temp; 466 467 ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); 468 if (!ppgtt->gen8_pt_dma_addr[i]) 469 goto err_out; 470 471 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 472 struct vm_page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; 473 temp = pci_map_page(ppgtt->base.dev->pdev, 474 p, 0, PAGE_SIZE, 475 PCI_DMA_BIDIRECTIONAL); 476 477 if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) 478 goto err_out; 479 480 ppgtt->gen8_pt_dma_addr[i][j] = temp; 481 } 482 } 483 484 /* For now, the PPGTT helper functions all require that the PDEs are 485 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 486 * will never need to touch the PDEs again */ 487 for (i = 0; i < max_pdp; i++) { 488 gen8_ppgtt_pde_t *pd_vaddr; 489 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 490 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 491 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 492 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 493 I915_CACHE_LLC); 494 } 495 kunmap_atomic(pd_vaddr); 496 } 497 498 ppgtt->base.clear_range(&ppgtt->base, 0, 499 ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, 500 true); 501 502 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 503 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 504 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 505 ppgtt->num_pt_pages, 506 (ppgtt->num_pt_pages - num_pt_pages) + 507 size % (1<<30)); 508 return 0; 509 510 err_out: 511 ppgtt->base.cleanup(&ppgtt->base); 512 return ret; 513 } 514 515 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 516 { 517 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 518 gen6_gtt_pte_t __iomem *pd_addr; 519 uint32_t pd_entry; 520 int i; 521 522 WARN_ON(ppgtt->pd_offset & 0x3f); 523 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 524 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 525 for (i = 0; i < ppgtt->num_pd_entries; i++) { 526 dma_addr_t pt_addr; 527 528 pt_addr = ppgtt->pt_dma_addr[i]; 529 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 530 pd_entry |= GEN6_PDE_VALID; 531 532 writel(pd_entry, pd_addr + i); 533 } 534 readl(pd_addr); 535 } 536 537 static int gen6_ppgtt_enable(struct drm_device *dev) 538 { 539 drm_i915_private_t *dev_priv = dev->dev_private; 540 uint32_t pd_offset; 541 struct intel_ring_buffer *ring; 542 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 543 int i; 544 545 BUG_ON(ppgtt->pd_offset & 0x3f); 546 547 gen6_write_pdes(ppgtt); 548 549 pd_offset = ppgtt->pd_offset; 550 pd_offset /= 64; /* in cachelines, */ 551 pd_offset <<= 16; 552 553 if (INTEL_INFO(dev)->gen == 6) { 554 uint32_t ecochk, gab_ctl, ecobits; 555 556 ecobits = I915_READ(GAC_ECO_BITS); 557 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 558 ECOBITS_PPGTT_CACHE64B); 559 560 gab_ctl = I915_READ(GAB_CTL); 561 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 562 563 ecochk = I915_READ(GAM_ECOCHK); 564 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 565 ECOCHK_PPGTT_CACHE64B); 566 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 567 } else if (INTEL_INFO(dev)->gen >= 7) { 568 uint32_t ecochk, ecobits; 569 570 ecobits = I915_READ(GAC_ECO_BITS); 571 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 572 573 ecochk = I915_READ(GAM_ECOCHK); 574 if (IS_HASWELL(dev)) { 575 ecochk |= ECOCHK_PPGTT_WB_HSW; 576 } else { 577 ecochk |= ECOCHK_PPGTT_LLC_IVB; 578 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 579 } 580 I915_WRITE(GAM_ECOCHK, ecochk); 581 /* GFX_MODE is per-ring on gen7+ */ 582 } 583 584 for_each_ring(ring, dev_priv, i) { 585 if (INTEL_INFO(dev)->gen >= 7) 586 I915_WRITE(RING_MODE_GEN7(ring), 587 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 588 589 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 590 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 591 } 592 return 0; 593 } 594 595 /* PPGTT support for Sandybdrige/Gen6 and later */ 596 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 597 unsigned first_entry, 598 unsigned num_entries, 599 bool use_scratch) 600 { 601 struct i915_hw_ppgtt *ppgtt = 602 container_of(vm, struct i915_hw_ppgtt, base); 603 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 604 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 605 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 606 unsigned last_pte, i; 607 608 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 609 610 while (num_entries) { 611 last_pte = first_pte + num_entries; 612 if (last_pte > I915_PPGTT_PT_ENTRIES) 613 last_pte = I915_PPGTT_PT_ENTRIES; 614 615 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 616 617 for (i = first_pte; i < last_pte; i++) 618 pt_vaddr[i] = scratch_pte; 619 620 kunmap_atomic(pt_vaddr); 621 622 num_entries -= last_pte - first_pte; 623 first_pte = 0; 624 act_pt++; 625 } 626 } 627 628 #ifdef __linux__ 629 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 630 struct sg_table *pages, 631 unsigned first_entry, 632 enum i915_cache_level cache_level) 633 { 634 struct i915_hw_ppgtt *ppgtt = 635 container_of(vm, struct i915_hw_ppgtt, base); 636 gen6_gtt_pte_t *pt_vaddr; 637 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 638 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 639 struct sg_page_iter sg_iter; 640 641 pt_vaddr = NULL; 642 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 643 if (pt_vaddr == NULL) 644 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 645 646 pt_vaddr[act_pte] = 647 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 648 cache_level, true); 649 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 650 kunmap_atomic(pt_vaddr); 651 pt_vaddr = NULL; 652 act_pt++; 653 act_pte = 0; 654 } 655 } 656 if (pt_vaddr) 657 kunmap_atomic(pt_vaddr); 658 } 659 #else 660 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 661 struct vm_page **pages, 662 unsigned int num_entries, 663 unsigned first_entry, 664 enum i915_cache_level cache_level) 665 { 666 struct i915_hw_ppgtt *ppgtt = 667 container_of(vm, struct i915_hw_ppgtt, base); 668 gen6_gtt_pte_t *pt_vaddr; 669 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 670 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 671 int i; 672 673 pt_vaddr = NULL; 674 for (i = 0; i < num_entries; i++) { 675 if (pt_vaddr == NULL) 676 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 677 678 pt_vaddr[act_pte] = 679 vm->pte_encode(VM_PAGE_TO_PHYS(pages[i]), 680 cache_level, true); 681 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 682 kunmap_atomic(pt_vaddr); 683 pt_vaddr = NULL; 684 act_pt++; 685 act_pte = 0; 686 } 687 } 688 if (pt_vaddr) 689 kunmap_atomic(pt_vaddr); 690 } 691 #endif 692 693 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 694 { 695 struct i915_hw_ppgtt *ppgtt = 696 container_of(vm, struct i915_hw_ppgtt, base); 697 int i; 698 699 drm_mm_takedown(&ppgtt->base.mm); 700 701 if (ppgtt->pt_dma_addr) { 702 for (i = 0; i < ppgtt->num_pd_entries; i++) 703 pci_unmap_page(ppgtt->base.dev->pdev, 704 ppgtt->pt_dma_addr[i], 705 4096, PCI_DMA_BIDIRECTIONAL); 706 } 707 708 kfree(ppgtt->pt_dma_addr); 709 for (i = 0; i < ppgtt->num_pd_entries; i++) 710 __free_page(ppgtt->pt_pages[i]); 711 kfree(ppgtt->pt_pages); 712 kfree(ppgtt); 713 } 714 715 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 716 { 717 struct drm_device *dev = ppgtt->base.dev; 718 struct drm_i915_private *dev_priv = dev->dev_private; 719 unsigned first_pd_entry_in_global_pt; 720 int i; 721 int ret = -ENOMEM; 722 723 /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 724 * entries. For aliasing ppgtt support we just steal them at the end for 725 * now. */ 726 first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); 727 728 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 729 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 730 ppgtt->enable = gen6_ppgtt_enable; 731 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 732 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 733 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 734 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 735 ppgtt->base.start = 0; 736 ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 737 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 738 GFP_KERNEL); 739 if (!ppgtt->pt_pages) 740 return -ENOMEM; 741 742 for (i = 0; i < ppgtt->num_pd_entries; i++) { 743 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 744 if (!ppgtt->pt_pages[i]) 745 goto err_pt_alloc; 746 } 747 748 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 749 GFP_KERNEL); 750 if (!ppgtt->pt_dma_addr) 751 goto err_pt_alloc; 752 753 for (i = 0; i < ppgtt->num_pd_entries; i++) { 754 dma_addr_t pt_addr; 755 756 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 757 PCI_DMA_BIDIRECTIONAL); 758 759 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 760 ret = -EIO; 761 goto err_pd_pin; 762 763 } 764 ppgtt->pt_dma_addr[i] = pt_addr; 765 } 766 767 ppgtt->base.clear_range(&ppgtt->base, 0, 768 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); 769 770 ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); 771 772 return 0; 773 774 err_pd_pin: 775 if (ppgtt->pt_dma_addr) { 776 for (i--; i >= 0; i--) 777 pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i], 778 4096, PCI_DMA_BIDIRECTIONAL); 779 } 780 err_pt_alloc: 781 kfree(ppgtt->pt_dma_addr); 782 for (i = 0; i < ppgtt->num_pd_entries; i++) { 783 if (ppgtt->pt_pages[i]) 784 __free_page(ppgtt->pt_pages[i]); 785 } 786 kfree(ppgtt->pt_pages); 787 788 return ret; 789 } 790 791 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 792 { 793 struct drm_i915_private *dev_priv = dev->dev_private; 794 struct i915_hw_ppgtt *ppgtt; 795 int ret; 796 797 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 798 if (!ppgtt) 799 return -ENOMEM; 800 801 ppgtt->base.dev = dev; 802 803 if (INTEL_INFO(dev)->gen < 8) 804 ret = gen6_ppgtt_init(ppgtt); 805 else if (IS_GEN8(dev)) 806 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 807 else 808 BUG(); 809 810 if (ret) 811 kfree(ppgtt); 812 else { 813 dev_priv->mm.aliasing_ppgtt = ppgtt; 814 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 815 ppgtt->base.total); 816 } 817 818 return ret; 819 } 820 821 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 822 { 823 struct drm_i915_private *dev_priv = dev->dev_private; 824 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 825 826 if (!ppgtt) 827 return; 828 829 ppgtt->base.cleanup(&ppgtt->base); 830 dev_priv->mm.aliasing_ppgtt = NULL; 831 } 832 833 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 834 struct drm_i915_gem_object *obj, 835 enum i915_cache_level cache_level) 836 { 837 ppgtt->base.insert_entries(&ppgtt->base, obj->pages, 838 obj->base.size >> PAGE_SHIFT, 839 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 840 cache_level); 841 } 842 843 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 844 struct drm_i915_gem_object *obj) 845 { 846 ppgtt->base.clear_range(&ppgtt->base, 847 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 848 obj->base.size >> PAGE_SHIFT, 849 true); 850 } 851 852 extern int intel_iommu_gfx_mapped; 853 /* Certain Gen5 chipsets require require idling the GPU before 854 * unmapping anything from the GTT when VT-d is enabled. 855 */ 856 static inline bool needs_idle_maps(struct drm_device *dev) 857 { 858 #ifdef CONFIG_INTEL_IOMMU 859 /* Query intel_iommu to see if we need the workaround. Presumably that 860 * was loaded first. 861 */ 862 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 863 return true; 864 #endif 865 return false; 866 } 867 868 static bool do_idling(struct drm_i915_private *dev_priv) 869 { 870 bool ret = dev_priv->mm.interruptible; 871 872 if (unlikely(dev_priv->gtt.do_idle_maps)) { 873 dev_priv->mm.interruptible = false; 874 if (i915_gpu_idle(dev_priv->dev)) { 875 DRM_ERROR("Couldn't idle GPU\n"); 876 /* Wait a bit, in hopes it avoids the hang */ 877 udelay(10); 878 } 879 } 880 881 return ret; 882 } 883 884 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 885 { 886 if (unlikely(dev_priv->gtt.do_idle_maps)) 887 dev_priv->mm.interruptible = interruptible; 888 } 889 890 void i915_check_and_clear_faults(struct drm_device *dev) 891 { 892 struct drm_i915_private *dev_priv = dev->dev_private; 893 struct intel_ring_buffer *ring; 894 int i; 895 896 if (INTEL_INFO(dev)->gen < 6) 897 return; 898 899 for_each_ring(ring, dev_priv, i) { 900 u32 fault_reg; 901 fault_reg = I915_READ(RING_FAULT_REG(ring)); 902 if (fault_reg & RING_FAULT_VALID) { 903 DRM_DEBUG_DRIVER("Unexpected fault\n" 904 "\tAddr: 0x%08x\\n" 905 "\tAddress space: %s\n" 906 "\tSource ID: %d\n" 907 "\tType: %d\n", 908 fault_reg & ~PAGE_MASK, 909 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 910 RING_FAULT_SRCID(fault_reg), 911 RING_FAULT_FAULT_TYPE(fault_reg)); 912 I915_WRITE(RING_FAULT_REG(ring), 913 fault_reg & ~RING_FAULT_VALID); 914 } 915 } 916 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 917 } 918 919 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 920 { 921 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 922 intel_gtt_chipset_flush(); 923 } else { 924 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 925 POSTING_READ(GFX_FLSH_CNTL_GEN6); 926 } 927 } 928 929 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 930 { 931 struct drm_i915_private *dev_priv = dev->dev_private; 932 933 /* Don't bother messing with faults pre GEN6 as we have little 934 * documentation supporting that it's a good idea. 935 */ 936 if (INTEL_INFO(dev)->gen < 6) 937 return; 938 939 i915_check_and_clear_faults(dev); 940 941 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 942 dev_priv->gtt.base.start / PAGE_SIZE, 943 dev_priv->gtt.base.total / PAGE_SIZE, 944 true); 945 946 i915_ggtt_flush(dev_priv); 947 } 948 949 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 950 { 951 struct drm_i915_private *dev_priv = dev->dev_private; 952 struct drm_i915_gem_object *obj; 953 954 i915_check_and_clear_faults(dev); 955 956 /* First fill our portion of the GTT with scratch pages */ 957 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 958 dev_priv->gtt.base.start / PAGE_SIZE, 959 dev_priv->gtt.base.total / PAGE_SIZE, 960 true); 961 962 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 963 i915_gem_clflush_object(obj, obj->pin_display); 964 i915_gem_gtt_bind_object(obj, obj->cache_level); 965 } 966 967 if (INTEL_INFO(dev)->gen >= 8) 968 gen8_setup_private_ppat(dev_priv); 969 970 i915_ggtt_flush(dev_priv); 971 } 972 973 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 974 { 975 if (obj->has_dma_mapping) 976 return 0; 977 978 #ifdef __linux__ 979 if (!dma_map_sg(&obj->base.dev->pdev->dev, 980 obj->pages->sgl, obj->pages->nents, 981 PCI_DMA_BIDIRECTIONAL)) 982 return -ENOSPC; 983 #endif 984 985 return 0; 986 } 987 988 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 989 { 990 #ifdef writeq 991 writeq(pte, addr); 992 #else 993 iowrite32((u32)pte, addr); 994 iowrite32(pte >> 32, addr + 4); 995 #endif 996 } 997 998 #ifdef __linux__ 999 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1000 struct sg_table *st, 1001 unsigned int first_entry, 1002 enum i915_cache_level level) 1003 { 1004 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1005 gen8_gtt_pte_t __iomem *gtt_entries = 1006 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1007 int i = 0; 1008 struct sg_page_iter sg_iter; 1009 dma_addr_t addr; 1010 1011 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1012 addr = sg_dma_address(sg_iter.sg) + 1013 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1014 gen8_set_pte(>t_entries[i], 1015 gen8_pte_encode(addr, level, true)); 1016 i++; 1017 } 1018 1019 /* 1020 * XXX: This serves as a posting read to make sure that the PTE has 1021 * actually been updated. There is some concern that even though 1022 * registers and PTEs are within the same BAR that they are potentially 1023 * of NUMA access patterns. Therefore, even with the way we assume 1024 * hardware should work, we must keep this posting read for paranoia. 1025 */ 1026 if (i != 0) 1027 WARN_ON(readq(>t_entries[i-1]) 1028 != gen8_pte_encode(addr, level, true)); 1029 1030 /* This next bit makes the above posting read even more important. We 1031 * want to flush the TLBs only after we're certain all the PTE updates 1032 * have finished. 1033 */ 1034 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1035 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1036 } 1037 #else 1038 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1039 struct vm_page **pages, 1040 unsigned int num_entries, 1041 unsigned int first_entry, 1042 enum i915_cache_level level) 1043 { 1044 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1045 gen8_gtt_pte_t __iomem *gtt_entries = 1046 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1047 int i = 0; 1048 dma_addr_t addr; 1049 1050 while (i < num_entries) { 1051 addr = VM_PAGE_TO_PHYS(pages[i]); 1052 gen8_set_pte(>t_entries[i], 1053 gen8_pte_encode(addr, level, true)); 1054 i++; 1055 } 1056 1057 /* 1058 * XXX: This serves as a posting read to make sure that the PTE has 1059 * actually been updated. There is some concern that even though 1060 * registers and PTEs are within the same BAR that they are potentially 1061 * of NUMA access patterns. Therefore, even with the way we assume 1062 * hardware should work, we must keep this posting read for paranoia. 1063 */ 1064 if (i != 0) 1065 WARN_ON(readq(>t_entries[i-1]) 1066 != gen8_pte_encode(addr, level, true)); 1067 1068 /* This next bit makes the above posting read even more important. We 1069 * want to flush the TLBs only after we're certain all the PTE updates 1070 * have finished. 1071 */ 1072 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1073 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1074 } 1075 #endif 1076 1077 /* 1078 * Binds an object into the global gtt with the specified cache level. The object 1079 * will be accessible to the GPU via commands whose operands reference offsets 1080 * within the global GTT as well as accessible by the GPU through the GMADR 1081 * mapped BAR (dev_priv->mm.gtt->gtt). 1082 */ 1083 #ifdef __linux__ 1084 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1085 struct sg_table *st, 1086 unsigned int first_entry, 1087 enum i915_cache_level level) 1088 { 1089 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1090 gen6_gtt_pte_t __iomem *gtt_entries = 1091 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1092 int i = 0; 1093 struct sg_page_iter sg_iter; 1094 dma_addr_t addr; 1095 1096 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1097 addr = sg_page_iter_dma_address(&sg_iter); 1098 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1099 i++; 1100 } 1101 1102 /* XXX: This serves as a posting read to make sure that the PTE has 1103 * actually been updated. There is some concern that even though 1104 * registers and PTEs are within the same BAR that they are potentially 1105 * of NUMA access patterns. Therefore, even with the way we assume 1106 * hardware should work, we must keep this posting read for paranoia. 1107 */ 1108 if (i != 0) 1109 WARN_ON(readl(>t_entries[i-1]) != 1110 vm->pte_encode(addr, level, true)); 1111 1112 /* This next bit makes the above posting read even more important. We 1113 * want to flush the TLBs only after we're certain all the PTE updates 1114 * have finished. 1115 */ 1116 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1117 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1118 } 1119 #else 1120 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1121 struct vm_page **pages, 1122 unsigned int num_entries, 1123 unsigned int first_entry, 1124 enum i915_cache_level level) 1125 { 1126 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1127 gen6_gtt_pte_t __iomem *gtt_entries = 1128 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1129 int i = 0; 1130 dma_addr_t addr; 1131 1132 while (i < num_entries) { 1133 addr = VM_PAGE_TO_PHYS(pages[i]); 1134 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1135 i++; 1136 } 1137 1138 /* XXX: This serves as a posting read to make sure that the PTE has 1139 * actually been updated. There is some concern that even though 1140 * registers and PTEs are within the same BAR that they are potentially 1141 * of NUMA access patterns. Therefore, even with the way we assume 1142 * hardware should work, we must keep this posting read for paranoia. 1143 */ 1144 if (i != 0) 1145 WARN_ON(readl(>t_entries[i-1]) != 1146 vm->pte_encode(addr, level, true)); 1147 1148 /* This next bit makes the above posting read even more important. We 1149 * want to flush the TLBs only after we're certain all the PTE updates 1150 * have finished. 1151 */ 1152 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1153 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1154 } 1155 #endif 1156 1157 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1158 unsigned int first_entry, 1159 unsigned int num_entries, 1160 bool use_scratch) 1161 { 1162 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1163 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1164 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1165 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1166 int i; 1167 1168 if (WARN(num_entries > max_entries, 1169 "First entry = %d; Num entries = %d (max=%d)\n", 1170 first_entry, num_entries, max_entries)) 1171 num_entries = max_entries; 1172 1173 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1174 I915_CACHE_LLC, 1175 use_scratch); 1176 for (i = 0; i < num_entries; i++) 1177 gen8_set_pte(>t_base[i], scratch_pte); 1178 readl(gtt_base); 1179 } 1180 1181 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1182 unsigned int first_entry, 1183 unsigned int num_entries, 1184 bool use_scratch) 1185 { 1186 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1187 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1188 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1189 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1190 int i; 1191 1192 if (WARN(num_entries > max_entries, 1193 "First entry = %d; Num entries = %d (max=%d)\n", 1194 first_entry, num_entries, max_entries)) 1195 num_entries = max_entries; 1196 1197 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 1198 1199 for (i = 0; i < num_entries; i++) 1200 iowrite32(scratch_pte, >t_base[i]); 1201 readl(gtt_base); 1202 } 1203 1204 #ifdef __linux__ 1205 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 1206 struct sg_table *st, 1207 unsigned int pg_start, 1208 enum i915_cache_level cache_level) 1209 { 1210 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1211 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1212 1213 intel_gtt_insert_sg_entries(st, pg_start, flags); 1214 1215 } 1216 #else 1217 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 1218 struct vm_page **pages, 1219 unsigned int num_entries, 1220 unsigned int pg_start, 1221 enum i915_cache_level cache_level) 1222 { 1223 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1224 0 : BUS_DMA_COHERENT; 1225 struct agp_softc *sc = vm->dev->agp->agpdev; 1226 bus_addr_t addr = sc->sc_apaddr + (pg_start << PAGE_SHIFT); 1227 int i; 1228 1229 for (i = 0; i < num_entries; i++) { 1230 sc->sc_methods->bind_page(sc->sc_chipc, addr, 1231 VM_PAGE_TO_PHYS(pages[i]), flags); 1232 addr += PAGE_SIZE; 1233 } 1234 } 1235 #endif 1236 1237 #ifdef __linux__ 1238 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1239 unsigned int first_entry, 1240 unsigned int num_entries, 1241 bool unused) 1242 { 1243 intel_gtt_clear_range(first_entry, num_entries); 1244 } 1245 #else 1246 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1247 unsigned int first_entry, 1248 unsigned int num_entries, 1249 bool unused) 1250 { 1251 struct agp_softc *sc = vm->dev->agp->agpdev; 1252 bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT); 1253 int i; 1254 1255 for (i = 0; i < num_entries; i++) { 1256 sc->sc_methods->unbind_page(sc->sc_chipc, addr); 1257 addr += PAGE_SIZE; 1258 } 1259 } 1260 #endif 1261 1262 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 1263 enum i915_cache_level cache_level) 1264 { 1265 struct drm_device *dev = obj->base.dev; 1266 struct drm_i915_private *dev_priv = dev->dev_private; 1267 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 1268 1269 dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, 1270 obj->base.size >> PAGE_SHIFT, 1271 entry, 1272 cache_level); 1273 1274 obj->has_global_gtt_mapping = 1; 1275 } 1276 1277 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 1278 { 1279 struct drm_device *dev = obj->base.dev; 1280 struct drm_i915_private *dev_priv = dev->dev_private; 1281 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 1282 1283 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1284 entry, 1285 obj->base.size >> PAGE_SHIFT, 1286 true); 1287 1288 obj->has_global_gtt_mapping = 0; 1289 } 1290 1291 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1292 { 1293 struct drm_device *dev = obj->base.dev; 1294 struct drm_i915_private *dev_priv = dev->dev_private; 1295 bool interruptible; 1296 1297 interruptible = do_idling(dev_priv); 1298 1299 #ifdef __linux__ 1300 if (!obj->has_dma_mapping) 1301 dma_unmap_sg(&dev->pdev->dev, 1302 obj->pages->sgl, obj->pages->nents, 1303 PCI_DMA_BIDIRECTIONAL); 1304 #endif 1305 1306 undo_idling(dev_priv, interruptible); 1307 } 1308 1309 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1310 unsigned long color, 1311 unsigned long *start, 1312 unsigned long *end) 1313 { 1314 if (node->color != color) 1315 *start += 4096; 1316 1317 if (!list_empty(&node->node_list)) { 1318 node = list_entry(node->node_list.next, 1319 struct drm_mm_node, 1320 node_list); 1321 if (node->allocated && node->color != color) 1322 *end -= 4096; 1323 } 1324 } 1325 1326 void i915_gem_setup_global_gtt(struct drm_device *dev, 1327 unsigned long start, 1328 unsigned long mappable_end, 1329 unsigned long end) 1330 { 1331 /* Let GEM Manage all of the aperture. 1332 * 1333 * However, leave one page at the end still bound to the scratch page. 1334 * There are a number of places where the hardware apparently prefetches 1335 * past the end of the object, and we've seen multiple hangs with the 1336 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1337 * aperture. One page should be enough to keep any prefetching inside 1338 * of the aperture. 1339 */ 1340 struct drm_i915_private *dev_priv = dev->dev_private; 1341 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1342 struct drm_mm_node *entry; 1343 struct drm_i915_gem_object *obj; 1344 unsigned long hole_start, hole_end; 1345 1346 BUG_ON(mappable_end > end); 1347 1348 /* Subtract the guard page ... */ 1349 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1350 if (!HAS_LLC(dev)) 1351 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1352 1353 /* Mark any preallocated objects as occupied */ 1354 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1355 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1356 int ret; 1357 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1358 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1359 1360 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1361 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1362 if (ret) 1363 DRM_DEBUG_KMS("Reservation failed\n"); 1364 obj->has_global_gtt_mapping = 1; 1365 } 1366 1367 dev_priv->gtt.base.start = start; 1368 dev_priv->gtt.base.total = end - start; 1369 1370 /* Clear any non-preallocated blocks */ 1371 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1372 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; 1373 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1374 hole_start, hole_end); 1375 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); 1376 } 1377 1378 /* And finally clear the reserved guard page */ 1379 ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); 1380 } 1381 1382 static bool 1383 intel_enable_ppgtt(struct drm_device *dev) 1384 { 1385 if (i915_enable_ppgtt >= 0) 1386 return i915_enable_ppgtt; 1387 1388 #ifdef CONFIG_INTEL_IOMMU 1389 /* Disable ppgtt on SNB if VT-d is on. */ 1390 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 1391 return false; 1392 #endif 1393 1394 return true; 1395 } 1396 1397 void i915_gem_init_global_gtt(struct drm_device *dev) 1398 { 1399 struct drm_i915_private *dev_priv = dev->dev_private; 1400 unsigned long gtt_size, mappable_size; 1401 1402 gtt_size = dev_priv->gtt.base.total; 1403 mappable_size = dev_priv->gtt.mappable_end; 1404 1405 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 1406 int ret; 1407 1408 if (INTEL_INFO(dev)->gen <= 7) { 1409 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 1410 * aperture accordingly when using aliasing ppgtt. */ 1411 gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 1412 } 1413 1414 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1415 1416 ret = i915_gem_init_aliasing_ppgtt(dev); 1417 if (!ret) 1418 return; 1419 1420 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); 1421 drm_mm_takedown(&dev_priv->gtt.base.mm); 1422 if (INTEL_INFO(dev)->gen < 8) 1423 gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE; 1424 } 1425 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1426 } 1427 1428 #ifdef __linux__ 1429 1430 static int setup_scratch_page(struct drm_device *dev) 1431 { 1432 struct drm_i915_private *dev_priv = dev->dev_private; 1433 struct page *page; 1434 dma_addr_t dma_addr; 1435 1436 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1437 if (page == NULL) 1438 return -ENOMEM; 1439 get_page(page); 1440 set_pages_uc(page, 1); 1441 1442 #ifdef CONFIG_INTEL_IOMMU 1443 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1444 PCI_DMA_BIDIRECTIONAL); 1445 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1446 return -EINVAL; 1447 #else 1448 dma_addr = page_to_phys(page); 1449 #endif 1450 dev_priv->gtt.base.scratch.page = page; 1451 dev_priv->gtt.base.scratch.addr = dma_addr; 1452 1453 return 0; 1454 } 1455 1456 static void teardown_scratch_page(struct drm_device *dev) 1457 { 1458 struct drm_i915_private *dev_priv = dev->dev_private; 1459 struct page *page = dev_priv->gtt.base.scratch.page; 1460 1461 set_pages_wb(page, 1); 1462 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1463 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1464 put_page(page); 1465 __free_page(page); 1466 } 1467 1468 #else 1469 1470 static int setup_scratch_page(struct drm_device *dev) 1471 { 1472 struct drm_i915_private *dev_priv = dev->dev_private; 1473 struct drm_dmamem *page; 1474 1475 page = drm_dmamem_alloc(dev_priv->dmat, PAGE_SIZE, 0, 1, PAGE_SIZE, 1476 BUS_DMA_NOCACHE, 0); 1477 if (page == NULL) 1478 return -ENOMEM; 1479 1480 dev_priv->gtt.base.scratch.page = page; 1481 dev_priv->gtt.base.scratch.addr = page->segs[0].ds_addr; 1482 1483 return 0; 1484 } 1485 1486 static void teardown_scratch_page(struct drm_device *dev) 1487 { 1488 struct drm_i915_private *dev_priv = dev->dev_private; 1489 1490 drm_dmamem_free(dev_priv->dmat, dev_priv->gtt.base.scratch.page); 1491 } 1492 1493 #endif 1494 1495 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1496 { 1497 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1498 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1499 return snb_gmch_ctl << 20; 1500 } 1501 1502 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1503 { 1504 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1505 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1506 if (bdw_gmch_ctl) 1507 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1508 if (bdw_gmch_ctl > 4) { 1509 WARN_ON(!i915_preliminary_hw_support); 1510 return 4<<20; 1511 } 1512 1513 return bdw_gmch_ctl << 20; 1514 } 1515 1516 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1517 { 1518 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1519 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1520 return snb_gmch_ctl << 25; /* 32 MB units */ 1521 } 1522 1523 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1524 { 1525 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1526 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1527 return bdw_gmch_ctl << 25; /* 32 MB units */ 1528 } 1529 1530 #ifdef __linux__ 1531 1532 static int ggtt_probe_common(struct drm_device *dev, 1533 size_t gtt_size) 1534 { 1535 struct drm_i915_private *dev_priv = dev->dev_private; 1536 phys_addr_t gtt_phys_addr; 1537 int ret; 1538 1539 /* For Modern GENs the PTEs and register space are split in the BAR */ 1540 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1541 (pci_resource_len(dev->pdev, 0) / 2); 1542 1543 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1544 if (!dev_priv->gtt.gsm) { 1545 DRM_ERROR("Failed to map the gtt page table\n"); 1546 return -ENOMEM; 1547 } 1548 1549 ret = setup_scratch_page(dev); 1550 if (ret) { 1551 DRM_ERROR("Scratch setup failed\n"); 1552 /* iounmap will also get called at remove, but meh */ 1553 iounmap(dev_priv->gtt.gsm); 1554 } 1555 1556 return ret; 1557 } 1558 1559 #else 1560 1561 static int ggtt_probe_common(struct drm_device *dev, 1562 size_t gtt_size) 1563 { 1564 struct drm_i915_private *dev_priv = dev->dev_private; 1565 bus_space_handle_t gsm; 1566 bus_addr_t addr; 1567 bus_size_t size; 1568 pcireg_t type; 1569 int ret; 1570 1571 type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x10); 1572 ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x10, type, 1573 &addr, &size, NULL); 1574 if (ret) 1575 return ret; 1576 1577 /* For Modern GENs the PTEs and register space are split in the BAR */ 1578 ret = -bus_space_map(dev_priv->bst, addr + size / 2, gtt_size, 1579 BUS_SPACE_MAP_PREFETCHABLE | BUS_SPACE_MAP_LINEAR, &gsm); 1580 if (ret) { 1581 DRM_ERROR("Failed to map the gtt page table\n"); 1582 return ret; 1583 } 1584 dev_priv->gtt.gsm = bus_space_vaddr(dev_priv->bst, gsm); 1585 1586 ret = setup_scratch_page(dev); 1587 if (ret) 1588 DRM_ERROR("Scratch setup failed\n"); 1589 1590 return ret; 1591 } 1592 1593 #endif 1594 1595 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1596 * bits. When using advanced contexts each context stores its own PAT, but 1597 * writing this data shouldn't be harmful even in those cases. */ 1598 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv) 1599 { 1600 #define GEN8_PPAT_UC (0<<0) 1601 #define GEN8_PPAT_WC (1<<0) 1602 #define GEN8_PPAT_WT (2<<0) 1603 #define GEN8_PPAT_WB (3<<0) 1604 #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) 1605 /* FIXME(BDW): Bspec is completely confused about cache control bits. */ 1606 #define GEN8_PPAT_LLC (1<<2) 1607 #define GEN8_PPAT_LLCELLC (2<<2) 1608 #define GEN8_PPAT_LLCeLLC (3<<2) 1609 #define GEN8_PPAT_AGE(x) (x<<4) 1610 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) 1611 uint64_t pat; 1612 1613 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1614 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1615 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1616 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1617 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1618 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1619 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1620 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1621 1622 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1623 * write would work. */ 1624 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1625 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1626 } 1627 1628 static int gen8_gmch_probe(struct drm_device *dev, 1629 size_t *gtt_total, 1630 size_t *stolen, 1631 phys_addr_t *mappable_base, 1632 unsigned long *mappable_end) 1633 { 1634 struct drm_i915_private *dev_priv = dev->dev_private; 1635 unsigned int gtt_size; 1636 u16 snb_gmch_ctl; 1637 int ret; 1638 1639 #ifdef __linux__ 1640 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1641 *mappable_base = pci_resource_start(dev->pdev, 2); 1642 *mappable_end = pci_resource_len(dev->pdev, 2); 1643 1644 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1645 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1646 #else 1647 pcireg_t type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x18); 1648 ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, type, 1649 mappable_base, mappable_end, NULL); 1650 if (ret) 1651 return ret; 1652 #endif 1653 1654 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1655 1656 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1657 1658 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1659 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1660 1661 gen8_setup_private_ppat(dev_priv); 1662 1663 ret = ggtt_probe_common(dev, gtt_size); 1664 1665 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1666 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1667 1668 return ret; 1669 } 1670 1671 static int gen6_gmch_probe(struct drm_device *dev, 1672 size_t *gtt_total, 1673 size_t *stolen, 1674 phys_addr_t *mappable_base, 1675 unsigned long *mappable_end) 1676 { 1677 struct drm_i915_private *dev_priv = dev->dev_private; 1678 unsigned int gtt_size; 1679 u16 snb_gmch_ctl; 1680 int ret; 1681 1682 #ifdef __linux__ 1683 *mappable_base = pci_resource_start(dev->pdev, 2); 1684 *mappable_end = pci_resource_len(dev->pdev, 2); 1685 #else 1686 pcireg_t type = pci_mapreg_type(dev_priv->pc, dev_priv->tag, 0x18); 1687 ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, type, 1688 mappable_base, mappable_end, NULL); 1689 if (ret) 1690 return ret; 1691 #endif 1692 1693 /* 64/512MB is the current min/max we actually know of, but this is just 1694 * a coarse sanity check. 1695 */ 1696 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1697 DRM_ERROR("Unknown GMADR size (%lx)\n", 1698 dev_priv->gtt.mappable_end); 1699 return -ENXIO; 1700 } 1701 1702 #ifdef __linux__ 1703 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1704 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1705 #endif 1706 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1707 1708 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1709 1710 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 1711 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 1712 1713 ret = ggtt_probe_common(dev, gtt_size); 1714 1715 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 1716 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 1717 1718 return ret; 1719 } 1720 1721 static void gen6_gmch_remove(struct i915_address_space *vm) 1722 { 1723 1724 #ifdef __linux__ 1725 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 1726 #endif 1727 1728 drm_mm_takedown(&vm->mm); 1729 #ifdef __linux__ 1730 iounmap(gtt->gsm); 1731 #endif 1732 teardown_scratch_page(vm->dev); 1733 } 1734 1735 static int i915_gmch_probe(struct drm_device *dev, 1736 size_t *gtt_total, 1737 size_t *stolen, 1738 phys_addr_t *mappable_base, 1739 unsigned long *mappable_end) 1740 { 1741 struct drm_i915_private *dev_priv = dev->dev_private; 1742 int ret; 1743 1744 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 1745 if (!ret) { 1746 DRM_ERROR("failed to set up gmch\n"); 1747 return -EIO; 1748 } 1749 1750 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 1751 1752 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 1753 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 1754 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; 1755 1756 if (unlikely(dev_priv->gtt.do_idle_maps)) 1757 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 1758 1759 return 0; 1760 } 1761 1762 static void i915_gmch_remove(struct i915_address_space *vm) 1763 { 1764 intel_gmch_remove(); 1765 } 1766 1767 int i915_gem_gtt_init(struct drm_device *dev) 1768 { 1769 struct drm_i915_private *dev_priv = dev->dev_private; 1770 struct i915_gtt *gtt = &dev_priv->gtt; 1771 int ret; 1772 1773 if (INTEL_INFO(dev)->gen <= 5) { 1774 gtt->gtt_probe = i915_gmch_probe; 1775 gtt->base.cleanup = i915_gmch_remove; 1776 } else if (INTEL_INFO(dev)->gen < 8) { 1777 gtt->gtt_probe = gen6_gmch_probe; 1778 gtt->base.cleanup = gen6_gmch_remove; 1779 if (IS_HASWELL(dev) && dev_priv->ellc_size) 1780 gtt->base.pte_encode = iris_pte_encode; 1781 else if (IS_HASWELL(dev)) 1782 gtt->base.pte_encode = hsw_pte_encode; 1783 else if (IS_VALLEYVIEW(dev)) 1784 gtt->base.pte_encode = byt_pte_encode; 1785 else if (INTEL_INFO(dev)->gen >= 7) 1786 gtt->base.pte_encode = ivb_pte_encode; 1787 else 1788 gtt->base.pte_encode = snb_pte_encode; 1789 } else { 1790 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 1791 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 1792 } 1793 1794 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 1795 >t->mappable_base, >t->mappable_end); 1796 if (ret) 1797 return ret; 1798 1799 gtt->base.dev = dev; 1800 1801 /* GMADR is the PCI mmio aperture into the global GTT. */ 1802 DRM_INFO("Memory usable by graphics device = %zdM\n", 1803 gtt->base.total >> 20); 1804 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 1805 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 1806 1807 return 0; 1808 } 1809