1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * $FreeBSD: src/sys/dev/drm2/i915/i915_gem_gtt.c,v 1.1 2012/05/22 11:07:44 kib Exp $ 24 */ 25 26 #include <sys/sfbuf.h> 27 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "intel_drv.h" 32 33 typedef uint32_t gtt_pte_t; 34 35 /* PPGTT stuff */ 36 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 37 38 #define GEN6_PDE_VALID (1 << 0) 39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 40 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 41 42 #define GEN6_PTE_VALID (1 << 0) 43 #define GEN6_PTE_UNCACHED (1 << 1) 44 #define HSW_PTE_UNCACHED (0) 45 #define GEN6_PTE_CACHE_LLC (2 << 1) 46 #define GEN6_PTE_CACHE_LLC_MLC (3 << 1) 47 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 48 49 static inline gtt_pte_t pte_encode(struct drm_device *dev, 50 dma_addr_t addr, 51 enum i915_cache_level level) 52 { 53 gtt_pte_t pte = GEN6_PTE_VALID; 54 pte |= GEN6_PTE_ADDR_ENCODE(addr); 55 56 switch (level) { 57 case I915_CACHE_LLC_MLC: 58 /* Haswell doesn't set L3 this way */ 59 if (IS_HASWELL(dev)) 60 pte |= GEN6_PTE_CACHE_LLC; 61 else 62 pte |= GEN6_PTE_CACHE_LLC_MLC; 63 break; 64 case I915_CACHE_LLC: 65 pte |= GEN6_PTE_CACHE_LLC; 66 break; 67 case I915_CACHE_NONE: 68 if (IS_HASWELL(dev)) 69 pte |= HSW_PTE_UNCACHED; 70 else 71 pte |= GEN6_PTE_UNCACHED; 72 break; 73 default: 74 BUG(); 75 } 76 77 78 return pte; 79 } 80 81 /* PPGTT support for Sandybdrige/Gen6 and later */ 82 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, 83 unsigned first_entry, 84 unsigned num_entries) 85 { 86 gtt_pte_t *pt_vaddr; 87 gtt_pte_t scratch_pte; 88 struct sf_buf *sf; 89 unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 90 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 91 unsigned last_pte, i; 92 93 scratch_pte = GEN6_GTT_ADDR_ENCODE(ppgtt->scratch_page_dma_addr); 94 scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC; 95 96 while (num_entries) { 97 last_pte = first_pte + num_entries; 98 if (last_pte > I915_PPGTT_PT_ENTRIES) 99 last_pte = I915_PPGTT_PT_ENTRIES; 100 101 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]); 102 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); 103 104 for (i = first_pte; i < last_pte; i++) 105 pt_vaddr[i] = scratch_pte; 106 107 sf_buf_free(sf); 108 109 num_entries -= last_pte - first_pte; 110 first_pte = 0; 111 act_pd++; 112 } 113 } 114 115 int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 116 { 117 struct drm_i915_private *dev_priv = dev->dev_private; 118 struct i915_hw_ppgtt *ppgtt; 119 u_int first_pd_entry_in_global_pt, i; 120 121 /* 122 * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 123 * entries. For aliasing ppgtt support we just steal them at the end for 124 * now. 125 */ 126 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 127 128 ppgtt = kmalloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO); 129 130 ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; 131 ppgtt->pt_pages = kmalloc(sizeof(vm_page_t) * ppgtt->num_pd_entries, 132 DRM_I915_GEM, M_WAITOK | M_ZERO); 133 134 for (i = 0; i < ppgtt->num_pd_entries; i++) { 135 ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0, 136 VM_ALLOC_NORMAL | VM_ALLOC_ZERO); 137 if (ppgtt->pt_pages[i] == NULL) { 138 dev_priv->mm.aliasing_ppgtt = ppgtt; 139 i915_gem_cleanup_aliasing_ppgtt(dev); 140 return (-ENOMEM); 141 } 142 } 143 144 ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt->scratch_page_dma; 145 146 i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries * 147 I915_PPGTT_PT_ENTRIES); 148 ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t); 149 dev_priv->mm.aliasing_ppgtt = ppgtt; 150 return (0); 151 } 152 153 void 154 i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 155 { 156 struct drm_i915_private *dev_priv; 157 struct i915_hw_ppgtt *ppgtt; 158 vm_page_t m; 159 int i; 160 161 dev_priv = dev->dev_private; 162 ppgtt = dev_priv->mm.aliasing_ppgtt; 163 if (ppgtt == NULL) 164 return; 165 dev_priv->mm.aliasing_ppgtt = NULL; 166 167 for (i = 0; i < ppgtt->num_pd_entries; i++) { 168 m = ppgtt->pt_pages[i]; 169 if (m != NULL) { 170 vm_page_busy_wait(m, FALSE, "i915gem"); 171 vm_page_unwire(m, 0); 172 vm_page_free(m); 173 } 174 } 175 drm_free(ppgtt->pt_pages, DRM_I915_GEM); 176 drm_free(ppgtt, DRM_I915_GEM); 177 } 178 179 180 static void 181 i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, 182 unsigned num_entries, vm_page_t *pages, uint32_t pte_flags) 183 { 184 uint32_t *pt_vaddr, pte; 185 struct sf_buf *sf; 186 unsigned act_pd, first_pte; 187 unsigned last_pte, i; 188 vm_paddr_t page_addr; 189 190 act_pd = first_entry / I915_PPGTT_PT_ENTRIES; 191 first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 192 193 while (num_entries) { 194 last_pte = first_pte + num_entries; 195 if (last_pte > I915_PPGTT_PT_ENTRIES) 196 last_pte = I915_PPGTT_PT_ENTRIES; 197 198 sf = sf_buf_alloc(ppgtt->pt_pages[act_pd]); 199 pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); 200 201 for (i = first_pte; i < last_pte; i++) { 202 page_addr = VM_PAGE_TO_PHYS(*pages); 203 pte = GEN6_PTE_ADDR_ENCODE(page_addr); 204 pt_vaddr[i] = pte | pte_flags; 205 206 pages++; 207 } 208 209 sf_buf_free(sf); 210 211 num_entries -= last_pte - first_pte; 212 first_pte = 0; 213 act_pd++; 214 } 215 } 216 217 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 218 struct drm_i915_gem_object *obj, 219 enum i915_cache_level cache_level) 220 { 221 struct drm_device *dev; 222 struct drm_i915_private *dev_priv; 223 uint32_t pte_flags; 224 225 dev = obj->base.dev; 226 dev_priv = dev->dev_private; 227 pte_flags = GEN6_PTE_VALID; 228 229 switch (cache_level) { 230 case I915_CACHE_LLC_MLC: 231 pte_flags |= GEN6_PTE_CACHE_LLC_MLC; 232 break; 233 case I915_CACHE_LLC: 234 pte_flags |= GEN6_PTE_CACHE_LLC; 235 break; 236 case I915_CACHE_NONE: 237 pte_flags |= GEN6_PTE_UNCACHED; 238 break; 239 default: 240 panic("cache mode"); 241 } 242 243 i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 244 obj->base.size >> PAGE_SHIFT, obj->pages, pte_flags); 245 } 246 247 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 248 struct drm_i915_gem_object *obj) 249 { 250 i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, 251 obj->base.size >> PAGE_SHIFT); 252 } 253 254 void i915_gem_init_ppgtt(struct drm_device *dev) 255 { 256 drm_i915_private_t *dev_priv = dev->dev_private; 257 uint32_t pd_offset; 258 struct intel_ring_buffer *ring; 259 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 260 uint32_t pd_entry, first_pd_entry_in_global_pt; 261 int i; 262 263 if (!dev_priv->mm.aliasing_ppgtt) 264 return; 265 266 first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; 267 for (i = 0; i < ppgtt->num_pd_entries; i++) { 268 vm_paddr_t pt_addr; 269 270 pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]); 271 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 272 pd_entry |= GEN6_PDE_VALID; 273 274 intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry); 275 } 276 intel_gtt_read_pte(first_pd_entry_in_global_pt); 277 278 pd_offset = ppgtt->pd_offset; 279 pd_offset /= 64; /* in cachelines, */ 280 pd_offset <<= 16; 281 282 if (INTEL_INFO(dev)->gen == 6) { 283 uint32_t ecochk, gab_ctl, ecobits; 284 285 ecobits = I915_READ(GAC_ECO_BITS); 286 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 287 288 gab_ctl = I915_READ(GAB_CTL); 289 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 290 291 ecochk = I915_READ(GAM_ECOCHK); 292 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 293 ECOCHK_PPGTT_CACHE64B); 294 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 295 } else if (INTEL_INFO(dev)->gen >= 7) { 296 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 297 /* GFX_MODE is per-ring on gen7+ */ 298 } 299 300 for_each_ring(ring, dev_priv, i) { 301 if (INTEL_INFO(dev)->gen >= 7) 302 I915_WRITE(RING_MODE_GEN7(ring), 303 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 304 305 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 306 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 307 } 308 } 309 310 static bool do_idling(struct drm_i915_private *dev_priv) 311 { 312 bool ret = dev_priv->mm.interruptible; 313 314 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) { 315 dev_priv->mm.interruptible = false; 316 if (i915_gpu_idle(dev_priv->dev)) { 317 DRM_ERROR("Couldn't idle GPU\n"); 318 /* Wait a bit, in hopes it avoids the hang */ 319 DELAY(10); 320 } 321 } 322 323 return ret; 324 } 325 326 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 327 { 328 329 if (unlikely(dev_priv->mm.gtt->do_idle_maps)) 330 dev_priv->mm.interruptible = interruptible; 331 } 332 333 #if 0 334 static void i915_ggtt_clear_range(struct drm_device *dev, 335 unsigned first_entry, 336 unsigned num_entries) 337 { 338 struct drm_i915_private *dev_priv = dev->dev_private; 339 gtt_pte_t scratch_pte; 340 gtt_pte_t __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry; 341 const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; 342 int i; 343 344 if (INTEL_INFO(dev)->gen < 6) { 345 intel_gtt_clear_range(first_entry, num_entries); 346 return; 347 } 348 349 if (WARN(num_entries > max_entries, 350 "First entry = %d; Num entries = %d (max=%d)\n", 351 first_entry, num_entries, max_entries)) 352 num_entries = max_entries; 353 354 scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC); 355 for (i = 0; i < num_entries; i++) 356 iowrite32(scratch_pte, >t_base[i]); 357 readl(gtt_base); 358 } 359 #endif 360 361 void 362 i915_gem_restore_gtt_mappings(struct drm_device *dev) 363 { 364 struct drm_i915_private *dev_priv; 365 struct drm_i915_gem_object *obj; 366 367 dev_priv = dev->dev_private; 368 369 /* First fill our portion of the GTT with scratch pages */ 370 intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, 371 (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); 372 373 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 374 i915_gem_clflush_object(obj); 375 i915_gem_gtt_bind_object(obj, obj->cache_level); 376 } 377 378 intel_gtt_chipset_flush(); 379 } 380 381 #if 0 382 /* 383 * Binds an object into the global gtt with the specified cache level. The object 384 * will be accessible to the GPU via commands whose operands reference offsets 385 * within the global GTT as well as accessible by the GPU through the GMADR 386 * mapped BAR (dev_priv->mm.gtt->gtt). 387 */ 388 static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, 389 enum i915_cache_level level) 390 { 391 struct drm_device *dev = obj->base.dev; 392 struct drm_i915_private *dev_priv = dev->dev_private; 393 struct sg_table *st = obj->pages; 394 struct scatterlist *sg = st->sgl; 395 const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; 396 const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; 397 gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry; 398 int unused, i = 0; 399 unsigned int len, m = 0; 400 dma_addr_t addr; 401 402 for_each_sg(st->sgl, sg, st->nents, unused) { 403 len = sg_dma_len(sg) >> PAGE_SHIFT; 404 for (m = 0; m < len; m++) { 405 addr = sg_dma_address(sg) + (m << PAGE_SHIFT); 406 iowrite32(pte_encode(dev, addr, level), >t_entries[i]); 407 i++; 408 } 409 } 410 411 BUG_ON(i > max_entries); 412 BUG_ON(i != obj->base.size / PAGE_SIZE); 413 414 /* XXX: This serves as a posting read to make sure that the PTE has 415 * actually been updated. There is some concern that even though 416 * registers and PTEs are within the same BAR that they are potentially 417 * of NUMA access patterns. Therefore, even with the way we assume 418 * hardware should work, we must keep this posting read for paranoia. 419 */ 420 if (i != 0) 421 WARN_ON(readl(>t_entries[i-1]) != pte_encode(dev, addr, level)); 422 423 /* This next bit makes the above posting read even more important. We 424 * want to flush the TLBs only after we're certain all the PTE updates 425 * have finished. 426 */ 427 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 428 POSTING_READ(GFX_FLSH_CNTL_GEN6); 429 } 430 #endif 431 432 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 433 enum i915_cache_level cache_level) 434 { 435 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 436 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 437 intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, 438 obj->base.size >> PAGE_SHIFT, obj->pages, flags); 439 440 obj->has_global_gtt_mapping = 1; 441 } 442 443 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 444 { 445 struct drm_device *dev = obj->base.dev; 446 struct drm_i915_private *dev_priv = dev->dev_private; 447 bool interruptible; 448 449 interruptible = do_idling(dev_priv); 450 451 intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, 452 obj->base.size >> PAGE_SHIFT); 453 454 undo_idling(dev_priv, interruptible); 455 obj->has_global_gtt_mapping = 0; 456 } 457