1e3adcf8fSFrançois Tigeot /* 2e3adcf8fSFrançois Tigeot * Copyright © 2008 Intel Corporation 3e3adcf8fSFrançois Tigeot * 4e3adcf8fSFrançois Tigeot * Permission is hereby granted, free of charge, to any person obtaining a 5e3adcf8fSFrançois Tigeot * copy of this software and associated documentation files (the "Software"), 6e3adcf8fSFrançois Tigeot * to deal in the Software without restriction, including without limitation 7e3adcf8fSFrançois Tigeot * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8e3adcf8fSFrançois Tigeot * and/or sell copies of the Software, and to permit persons to whom the 9e3adcf8fSFrançois Tigeot * Software is furnished to do so, subject to the following conditions: 10e3adcf8fSFrançois Tigeot * 11e3adcf8fSFrançois Tigeot * The above copyright notice and this permission notice (including the next 12e3adcf8fSFrançois Tigeot * paragraph) shall be included in all copies or substantial portions of the 13e3adcf8fSFrançois Tigeot * Software. 14e3adcf8fSFrançois Tigeot * 15e3adcf8fSFrançois Tigeot * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16e3adcf8fSFrançois Tigeot * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17e3adcf8fSFrançois Tigeot * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18e3adcf8fSFrançois Tigeot * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19e3adcf8fSFrançois Tigeot * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20e3adcf8fSFrançois Tigeot * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21e3adcf8fSFrançois Tigeot * IN THE SOFTWARE. 22e3adcf8fSFrançois Tigeot * 23e3adcf8fSFrançois Tigeot * Authors: 24e3adcf8fSFrançois Tigeot * Eric Anholt <eric@anholt.net> 25e3adcf8fSFrançois Tigeot * 26e3adcf8fSFrançois Tigeot */ 27e3adcf8fSFrançois Tigeot 28*e3440f96SFrançois Tigeot #include <linux/bitops.h> 2918e26a6dSFrançois Tigeot #include <drm/drmP.h> 305c6c6f23SFrançois Tigeot #include <drm/i915_drm.h> 31e3adcf8fSFrançois Tigeot #include "i915_drv.h" 32*e3440f96SFrançois Tigeot #include <linux/highmem.h> 33e3adcf8fSFrançois Tigeot 34e3adcf8fSFrançois Tigeot /** @file i915_gem_tiling.c 35e3adcf8fSFrançois Tigeot * 36e3adcf8fSFrançois Tigeot * Support for managing tiling state of buffer objects. 37e3adcf8fSFrançois Tigeot * 38e3adcf8fSFrançois Tigeot * The idea behind tiling is to increase cache hit rates by rearranging 39e3adcf8fSFrançois Tigeot * pixel data so that a group of pixel accesses are in the same cacheline. 40e3adcf8fSFrançois Tigeot * Performance improvement from doing this on the back/depth buffer are on 41e3adcf8fSFrançois Tigeot * the order of 30%. 42e3adcf8fSFrançois Tigeot * 43e3adcf8fSFrançois Tigeot * Intel architectures make this somewhat more complicated, though, by 44e3adcf8fSFrançois Tigeot * adjustments made to addressing of data when the memory is in interleaved 45e3adcf8fSFrançois Tigeot * mode (matched pairs of DIMMS) to improve memory bandwidth. 46e3adcf8fSFrançois Tigeot * For interleaved memory, the CPU sends every sequential 64 bytes 47e3adcf8fSFrançois Tigeot * to an alternate memory channel so it can get the bandwidth from both. 48e3adcf8fSFrançois Tigeot * 49e3adcf8fSFrançois Tigeot * The GPU also rearranges its accesses for increased bandwidth to interleaved 50e3adcf8fSFrançois Tigeot * memory, and it matches what the CPU does for non-tiled. However, when tiled 51e3adcf8fSFrançois Tigeot * it does it a little differently, since one walks addresses not just in the 52e3adcf8fSFrançois Tigeot * X direction but also Y. So, along with alternating channels when bit 53e3adcf8fSFrançois Tigeot * 6 of the address flips, it also alternates when other bits flip -- Bits 9 54e3adcf8fSFrançois Tigeot * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) 55e3adcf8fSFrançois Tigeot * are common to both the 915 and 965-class hardware. 56e3adcf8fSFrançois Tigeot * 57e3adcf8fSFrançois Tigeot * The CPU also sometimes XORs in higher bits as well, to improve 58e3adcf8fSFrançois Tigeot * bandwidth doing strided access like we do so frequently in graphics. This 59e3adcf8fSFrançois Tigeot * is called "Channel XOR Randomization" in the MCH documentation. The result 60e3adcf8fSFrançois Tigeot * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address 61e3adcf8fSFrançois Tigeot * decode. 62e3adcf8fSFrançois Tigeot * 63e3adcf8fSFrançois Tigeot * All of this bit 6 XORing has an effect on our memory management, 64e3adcf8fSFrançois Tigeot * as we need to make sure that the 3d driver can correctly address object 65e3adcf8fSFrançois Tigeot * contents. 66e3adcf8fSFrançois Tigeot * 67e3adcf8fSFrançois Tigeot * If we don't have interleaved memory, all tiling is safe and no swizzling is 68e3adcf8fSFrançois Tigeot * required. 69e3adcf8fSFrançois Tigeot * 70e3adcf8fSFrançois Tigeot * When bit 17 is XORed in, we simply refuse to tile at all. Bit 71e3adcf8fSFrançois Tigeot * 17 is not just a page offset, so as we page an objet out and back in, 72e3adcf8fSFrançois Tigeot * individual pages in it will have different bit 17 addresses, resulting in 73e3adcf8fSFrançois Tigeot * each 64 bytes being swapped with its neighbor! 74e3adcf8fSFrançois Tigeot * 75e3adcf8fSFrançois Tigeot * Otherwise, if interleaved, we have to tell the 3d driver what the address 76e3adcf8fSFrançois Tigeot * swizzling it needs to do is, since it's writing with the CPU to the pages 77e3adcf8fSFrançois Tigeot * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the 78e3adcf8fSFrançois Tigeot * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling 79e3adcf8fSFrançois Tigeot * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order 80e3adcf8fSFrançois Tigeot * to match what the GPU expects. 81e3adcf8fSFrançois Tigeot */ 82e3adcf8fSFrançois Tigeot 83e3adcf8fSFrançois Tigeot /** 84e3adcf8fSFrançois Tigeot * Detects bit 6 swizzling of address lookup between IGD access and CPU 85e3adcf8fSFrançois Tigeot * access through main memory. 86e3adcf8fSFrançois Tigeot */ 87e3adcf8fSFrançois Tigeot void 88e3adcf8fSFrançois Tigeot i915_gem_detect_bit_6_swizzle(struct drm_device *dev) 89e3adcf8fSFrançois Tigeot { 90e3adcf8fSFrançois Tigeot drm_i915_private_t *dev_priv = dev->dev_private; 91e3adcf8fSFrançois Tigeot uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 92e3adcf8fSFrançois Tigeot uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 93e3adcf8fSFrançois Tigeot 94e9243325SFrançois Tigeot if (IS_VALLEYVIEW(dev)) { 95e9243325SFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_NONE; 96e9243325SFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_NONE; 97e9243325SFrançois Tigeot } else if (INTEL_INFO(dev)->gen >= 6) { 98e3adcf8fSFrançois Tigeot uint32_t dimm_c0, dimm_c1; 99e3adcf8fSFrançois Tigeot dimm_c0 = I915_READ(MAD_DIMM_C0); 100e3adcf8fSFrançois Tigeot dimm_c1 = I915_READ(MAD_DIMM_C1); 101e3adcf8fSFrançois Tigeot dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 102e3adcf8fSFrançois Tigeot dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; 103e3adcf8fSFrançois Tigeot /* Enable swizzling when the channels are populated with 104e3adcf8fSFrançois Tigeot * identically sized dimms. We don't need to check the 3rd 105e3adcf8fSFrançois Tigeot * channel because no cpu with gpu attached ships in that 106e3adcf8fSFrançois Tigeot * configuration. Also, swizzling only makes sense for 2 107e3adcf8fSFrançois Tigeot * channels anyway. */ 108e3adcf8fSFrançois Tigeot if (dimm_c0 == dimm_c1) { 109e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10; 110e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9; 111e3adcf8fSFrançois Tigeot } else { 112e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_NONE; 113e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_NONE; 114e3adcf8fSFrançois Tigeot } 115e3adcf8fSFrançois Tigeot } else if (IS_GEN5(dev)) { 116e3adcf8fSFrançois Tigeot /* On Ironlake whatever DRAM config, GPU always do 117e3adcf8fSFrançois Tigeot * same swizzling setup. 118e3adcf8fSFrançois Tigeot */ 119e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10; 120e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9; 121e3adcf8fSFrançois Tigeot } else if (IS_GEN2(dev)) { 122e3adcf8fSFrançois Tigeot /* As far as we know, the 865 doesn't have these bit 6 123e3adcf8fSFrançois Tigeot * swizzling issues. 124e3adcf8fSFrançois Tigeot */ 125e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_NONE; 126e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_NONE; 127e3adcf8fSFrançois Tigeot } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) { 128e3adcf8fSFrançois Tigeot uint32_t dcc; 129e3adcf8fSFrançois Tigeot 130e3adcf8fSFrançois Tigeot /* On 9xx chipsets, channel interleave by the CPU is 131e3adcf8fSFrançois Tigeot * determined by DCC. For single-channel, neither the CPU 132e3adcf8fSFrançois Tigeot * nor the GPU do swizzling. For dual channel interleaved, 133e3adcf8fSFrançois Tigeot * the GPU's interleave is bit 9 and 10 for X tiled, and bit 134e3adcf8fSFrançois Tigeot * 9 for Y tiled. The CPU's interleave is independent, and 135e3adcf8fSFrançois Tigeot * can be based on either bit 11 (haven't seen this yet) or 136e3adcf8fSFrançois Tigeot * bit 17 (common). 137e3adcf8fSFrançois Tigeot */ 138e3adcf8fSFrançois Tigeot dcc = I915_READ(DCC); 139e3adcf8fSFrançois Tigeot switch (dcc & DCC_ADDRESSING_MODE_MASK) { 140e3adcf8fSFrançois Tigeot case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: 141e3adcf8fSFrançois Tigeot case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: 142e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_NONE; 143e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_NONE; 144e3adcf8fSFrançois Tigeot break; 145e3adcf8fSFrançois Tigeot case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: 146e3adcf8fSFrançois Tigeot if (dcc & DCC_CHANNEL_XOR_DISABLE) { 147e3adcf8fSFrançois Tigeot /* This is the base swizzling by the GPU for 148e3adcf8fSFrançois Tigeot * tiled buffers. 149e3adcf8fSFrançois Tigeot */ 150e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10; 151e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9; 152e3adcf8fSFrançois Tigeot } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { 153e3adcf8fSFrançois Tigeot /* Bit 11 swizzling by the CPU in addition. */ 154e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; 155e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9_11; 156e3adcf8fSFrançois Tigeot } else { 157e3adcf8fSFrançois Tigeot /* Bit 17 swizzling by the CPU in addition. */ 158e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; 159e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9_17; 160e3adcf8fSFrançois Tigeot } 161e3adcf8fSFrançois Tigeot break; 162e3adcf8fSFrançois Tigeot } 163e3adcf8fSFrançois Tigeot if (dcc == 0xffffffff) { 164e3adcf8fSFrançois Tigeot DRM_ERROR("Couldn't read from MCHBAR. " 165e3adcf8fSFrançois Tigeot "Disabling tiling.\n"); 166e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; 167e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; 168e3adcf8fSFrançois Tigeot } 169e3adcf8fSFrançois Tigeot } else { 170e3adcf8fSFrançois Tigeot /* The 965, G33, and newer, have a very flexible memory 171e3adcf8fSFrançois Tigeot * configuration. It will enable dual-channel mode 172e3adcf8fSFrançois Tigeot * (interleaving) on as much memory as it can, and the GPU 173e3adcf8fSFrançois Tigeot * will additionally sometimes enable different bit 6 174e3adcf8fSFrançois Tigeot * swizzling for tiled objects from the CPU. 175e3adcf8fSFrançois Tigeot * 176e3adcf8fSFrançois Tigeot * Here's what I found on the G965: 177e3adcf8fSFrançois Tigeot * slot fill memory size swizzling 178e3adcf8fSFrançois Tigeot * 0A 0B 1A 1B 1-ch 2-ch 179e3adcf8fSFrançois Tigeot * 512 0 0 0 512 0 O 180e3adcf8fSFrançois Tigeot * 512 0 512 0 16 1008 X 181e3adcf8fSFrançois Tigeot * 512 0 0 512 16 1008 X 182e3adcf8fSFrançois Tigeot * 0 512 0 512 16 1008 X 183e3adcf8fSFrançois Tigeot * 1024 1024 1024 0 2048 1024 O 184e3adcf8fSFrançois Tigeot * 185e3adcf8fSFrançois Tigeot * We could probably detect this based on either the DRB 186e3adcf8fSFrançois Tigeot * matching, which was the case for the swizzling required in 187e3adcf8fSFrançois Tigeot * the table above, or from the 1-ch value being less than 188e3adcf8fSFrançois Tigeot * the minimum size of a rank. 189e3adcf8fSFrançois Tigeot */ 190e3adcf8fSFrançois Tigeot if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) { 191e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_NONE; 192e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_NONE; 193e3adcf8fSFrançois Tigeot } else { 194e3adcf8fSFrançois Tigeot swizzle_x = I915_BIT_6_SWIZZLE_9_10; 195e3adcf8fSFrançois Tigeot swizzle_y = I915_BIT_6_SWIZZLE_9; 196e3adcf8fSFrançois Tigeot } 197e3adcf8fSFrançois Tigeot } 198e3adcf8fSFrançois Tigeot 199e3adcf8fSFrançois Tigeot dev_priv->mm.bit_6_swizzle_x = swizzle_x; 200e3adcf8fSFrançois Tigeot dev_priv->mm.bit_6_swizzle_y = swizzle_y; 201e3adcf8fSFrançois Tigeot } 202e3adcf8fSFrançois Tigeot 203e3adcf8fSFrançois Tigeot /* Check pitch constriants for all chips & tiling formats */ 204e3adcf8fSFrançois Tigeot static bool 205e3adcf8fSFrançois Tigeot i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) 206e3adcf8fSFrançois Tigeot { 207e3adcf8fSFrançois Tigeot int tile_width; 208e3adcf8fSFrançois Tigeot 209e3adcf8fSFrançois Tigeot /* Linear is always fine */ 210e3adcf8fSFrançois Tigeot if (tiling_mode == I915_TILING_NONE) 211e9243325SFrançois Tigeot return true; 212e3adcf8fSFrançois Tigeot 213e3adcf8fSFrançois Tigeot if (IS_GEN2(dev) || 214e3adcf8fSFrançois Tigeot (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) 215e3adcf8fSFrançois Tigeot tile_width = 128; 216e3adcf8fSFrançois Tigeot else 217e3adcf8fSFrançois Tigeot tile_width = 512; 218e3adcf8fSFrançois Tigeot 219e3adcf8fSFrançois Tigeot /* check maximum stride & object size */ 220e3adcf8fSFrançois Tigeot if (INTEL_INFO(dev)->gen >= 4) { 221e3adcf8fSFrançois Tigeot /* i965 stores the end address of the gtt mapping in the fence 222e3adcf8fSFrançois Tigeot * reg, so dont bother to check the size */ 223e3adcf8fSFrançois Tigeot if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) 224e9243325SFrançois Tigeot return false; 225e3adcf8fSFrançois Tigeot } else { 226e3adcf8fSFrançois Tigeot if (stride > 8192) 227e9243325SFrançois Tigeot return false; 228e3adcf8fSFrançois Tigeot 229e3adcf8fSFrançois Tigeot if (IS_GEN3(dev)) { 230e3adcf8fSFrançois Tigeot if (size > I830_FENCE_MAX_SIZE_VAL << 20) 231e9243325SFrançois Tigeot return false; 232e3adcf8fSFrançois Tigeot } else { 233e3adcf8fSFrançois Tigeot if (size > I830_FENCE_MAX_SIZE_VAL << 19) 234e9243325SFrançois Tigeot return false; 235e3adcf8fSFrançois Tigeot } 236e3adcf8fSFrançois Tigeot } 237e3adcf8fSFrançois Tigeot 238e3adcf8fSFrançois Tigeot /* 965+ just needs multiples of tile width */ 239e3adcf8fSFrançois Tigeot if (INTEL_INFO(dev)->gen >= 4) { 240e3adcf8fSFrançois Tigeot if (stride & (tile_width - 1)) 241e9243325SFrançois Tigeot return false; 242e9243325SFrançois Tigeot return true; 243e3adcf8fSFrançois Tigeot } 244e3adcf8fSFrançois Tigeot 245e3adcf8fSFrançois Tigeot /* Pre-965 needs power of two tile widths */ 246e3adcf8fSFrançois Tigeot if (stride < tile_width) 247e9243325SFrançois Tigeot return false; 248e3adcf8fSFrançois Tigeot 249e3adcf8fSFrançois Tigeot if (stride & (stride - 1)) 250e9243325SFrançois Tigeot return false; 251e3adcf8fSFrançois Tigeot 252e9243325SFrançois Tigeot return true; 253e3adcf8fSFrançois Tigeot } 254e3adcf8fSFrançois Tigeot 255e3adcf8fSFrançois Tigeot /* Is the current GTT allocation valid for the change in tiling? */ 256e3adcf8fSFrançois Tigeot static bool 257e3adcf8fSFrançois Tigeot i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) 258e3adcf8fSFrançois Tigeot { 259e3adcf8fSFrançois Tigeot u32 size; 260e3adcf8fSFrançois Tigeot 261e3adcf8fSFrançois Tigeot if (tiling_mode == I915_TILING_NONE) 262e9243325SFrançois Tigeot return true; 263e3adcf8fSFrançois Tigeot 264e3adcf8fSFrançois Tigeot if (INTEL_INFO(obj->base.dev)->gen >= 4) 265e9243325SFrançois Tigeot return true; 266e3adcf8fSFrançois Tigeot 267e3adcf8fSFrançois Tigeot if (INTEL_INFO(obj->base.dev)->gen == 3) { 268e3adcf8fSFrançois Tigeot if (obj->gtt_offset & ~I915_FENCE_START_MASK) 269e9243325SFrançois Tigeot return false; 270e3adcf8fSFrançois Tigeot } else { 271e3adcf8fSFrançois Tigeot if (obj->gtt_offset & ~I830_FENCE_START_MASK) 272e9243325SFrançois Tigeot return false; 273e3adcf8fSFrançois Tigeot } 274e3adcf8fSFrançois Tigeot 275e3adcf8fSFrançois Tigeot /* 276e3adcf8fSFrançois Tigeot * Previous chips need to be aligned to the size of the smallest 277e3adcf8fSFrançois Tigeot * fence register that can contain the object. 278e3adcf8fSFrançois Tigeot */ 279e3adcf8fSFrançois Tigeot if (INTEL_INFO(obj->base.dev)->gen == 3) 280e3adcf8fSFrançois Tigeot size = 1024*1024; 281e3adcf8fSFrançois Tigeot else 282e3adcf8fSFrançois Tigeot size = 512*1024; 283e3adcf8fSFrançois Tigeot 284e3adcf8fSFrançois Tigeot while (size < obj->base.size) 285e3adcf8fSFrançois Tigeot size <<= 1; 286e3adcf8fSFrançois Tigeot 287e3adcf8fSFrançois Tigeot if (obj->gtt_space->size != size) 288e9243325SFrançois Tigeot return false; 289e3adcf8fSFrançois Tigeot 290e3adcf8fSFrançois Tigeot if (obj->gtt_offset & (size - 1)) 291e9243325SFrançois Tigeot return false; 292e3adcf8fSFrançois Tigeot 293e9243325SFrançois Tigeot return true; 294e3adcf8fSFrançois Tigeot } 295e3adcf8fSFrançois Tigeot 296e3adcf8fSFrançois Tigeot /** 297e3adcf8fSFrançois Tigeot * Sets the tiling mode of an object, returning the required swizzling of 298e3adcf8fSFrançois Tigeot * bit 6 of addresses in the object. 299e3adcf8fSFrançois Tigeot */ 300e3adcf8fSFrançois Tigeot int 301e3adcf8fSFrançois Tigeot i915_gem_set_tiling(struct drm_device *dev, void *data, 302e3adcf8fSFrançois Tigeot struct drm_file *file) 303e3adcf8fSFrançois Tigeot { 304e3adcf8fSFrançois Tigeot struct drm_i915_gem_set_tiling *args = data; 305e3adcf8fSFrançois Tigeot drm_i915_private_t *dev_priv = dev->dev_private; 306e3adcf8fSFrançois Tigeot struct drm_i915_gem_object *obj; 307e9243325SFrançois Tigeot int ret = 0; 308e3adcf8fSFrançois Tigeot 309e3adcf8fSFrançois Tigeot obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 310e3adcf8fSFrançois Tigeot if (&obj->base == NULL) 311e3adcf8fSFrançois Tigeot return -ENOENT; 312e3adcf8fSFrançois Tigeot 313e3adcf8fSFrançois Tigeot if (!i915_tiling_ok(dev, 314e3adcf8fSFrançois Tigeot args->stride, obj->base.size, args->tiling_mode)) { 315f192107fSFrançois Tigeot drm_gem_object_unreference_unlocked(&obj->base); 316e3adcf8fSFrançois Tigeot return -EINVAL; 317e3adcf8fSFrançois Tigeot } 318e3adcf8fSFrançois Tigeot 319e3adcf8fSFrançois Tigeot if (obj->pin_count) { 320f192107fSFrançois Tigeot drm_gem_object_unreference_unlocked(&obj->base); 321e3adcf8fSFrançois Tigeot return -EBUSY; 322e3adcf8fSFrançois Tigeot } 323e3adcf8fSFrançois Tigeot 324e3adcf8fSFrançois Tigeot if (args->tiling_mode == I915_TILING_NONE) { 325e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 326e3adcf8fSFrançois Tigeot args->stride = 0; 327e3adcf8fSFrançois Tigeot } else { 328e3adcf8fSFrançois Tigeot if (args->tiling_mode == I915_TILING_X) 329e3adcf8fSFrançois Tigeot args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; 330e3adcf8fSFrançois Tigeot else 331e3adcf8fSFrançois Tigeot args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; 332e3adcf8fSFrançois Tigeot 333e3adcf8fSFrançois Tigeot /* Hide bit 17 swizzling from the user. This prevents old Mesa 334e3adcf8fSFrançois Tigeot * from aborting the application on sw fallbacks to bit 17, 335e3adcf8fSFrançois Tigeot * and we use the pread/pwrite bit17 paths to swizzle for it. 336e3adcf8fSFrançois Tigeot * If there was a user that was relying on the swizzle 337e3adcf8fSFrançois Tigeot * information for drm_intel_bo_map()ed reads/writes this would 338e3adcf8fSFrançois Tigeot * break it, but we don't have any of those. 339e3adcf8fSFrançois Tigeot */ 340e3adcf8fSFrançois Tigeot if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) 341e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_9; 342e3adcf8fSFrançois Tigeot if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) 343e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; 344e3adcf8fSFrançois Tigeot 345e3adcf8fSFrançois Tigeot /* If we can't handle the swizzling, make it untiled. */ 346e3adcf8fSFrançois Tigeot if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { 347e3adcf8fSFrançois Tigeot args->tiling_mode = I915_TILING_NONE; 348e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 349e3adcf8fSFrançois Tigeot args->stride = 0; 350e3adcf8fSFrançois Tigeot } 351e3adcf8fSFrançois Tigeot } 352e3adcf8fSFrançois Tigeot 353283d6aceSFrançois Tigeot DRM_LOCK(dev); 354e3adcf8fSFrançois Tigeot if (args->tiling_mode != obj->tiling_mode || 355e3adcf8fSFrançois Tigeot args->stride != obj->stride) { 356e3adcf8fSFrançois Tigeot /* We need to rebind the object if its current allocation 357e3adcf8fSFrançois Tigeot * no longer meets the alignment restrictions for its new 358e3adcf8fSFrançois Tigeot * tiling mode. Otherwise we can just leave it alone, but 359*e3440f96SFrançois Tigeot * need to ensure that any fence register is updated before 360*e3440f96SFrançois Tigeot * the next fenced (either through the GTT or by the BLT unit 361*e3440f96SFrançois Tigeot * on older GPUs) access. 362e9243325SFrançois Tigeot * 363e9243325SFrançois Tigeot * After updating the tiling parameters, we then flag whether 364e9243325SFrançois Tigeot * we need to update an associated fence register. Note this 365e9243325SFrançois Tigeot * has to also include the unfenced register the GPU uses 366e9243325SFrançois Tigeot * whilst executing a fenced command for an untiled object. 367e3adcf8fSFrançois Tigeot */ 368e3adcf8fSFrançois Tigeot 369e9243325SFrançois Tigeot obj->map_and_fenceable = 370e9243325SFrançois Tigeot obj->gtt_space == NULL || 371e9243325SFrançois Tigeot (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && 372e3adcf8fSFrançois Tigeot i915_gem_object_fence_ok(obj, args->tiling_mode)); 373e3adcf8fSFrançois Tigeot 374e3adcf8fSFrançois Tigeot /* Rebind if we need a change of alignment */ 375e3adcf8fSFrançois Tigeot if (!obj->map_and_fenceable) { 376e9243325SFrançois Tigeot u32 unfenced_alignment = 377e3adcf8fSFrançois Tigeot i915_gem_get_unfenced_gtt_alignment(dev, 378e9243325SFrançois Tigeot obj->base.size, 379e9243325SFrançois Tigeot args->tiling_mode); 380e3adcf8fSFrançois Tigeot if (obj->gtt_offset & (unfenced_alignment - 1)) 381e3adcf8fSFrançois Tigeot ret = i915_gem_object_unbind(obj); 382e3adcf8fSFrançois Tigeot } 383f192107fSFrançois Tigeot 384e3adcf8fSFrançois Tigeot if (ret == 0) { 385f192107fSFrançois Tigeot obj->fence_dirty = 386f192107fSFrançois Tigeot obj->fenced_gpu_access || 387f192107fSFrançois Tigeot obj->fence_reg != I915_FENCE_REG_NONE; 388f192107fSFrançois Tigeot 389e3adcf8fSFrançois Tigeot obj->tiling_mode = args->tiling_mode; 390e3adcf8fSFrançois Tigeot obj->stride = args->stride; 391*e3440f96SFrançois Tigeot 392*e3440f96SFrançois Tigeot /* Force the fence to be reacquired for GTT access */ 393*e3440f96SFrançois Tigeot i915_gem_release_mmap(obj); 394e3adcf8fSFrançois Tigeot } 395e3adcf8fSFrançois Tigeot } 396e3adcf8fSFrançois Tigeot /* we have to maintain this existing ABI... */ 397e3adcf8fSFrançois Tigeot args->stride = obj->stride; 398e3adcf8fSFrançois Tigeot args->tiling_mode = obj->tiling_mode; 399e3adcf8fSFrançois Tigeot drm_gem_object_unreference(&obj->base); 400283d6aceSFrançois Tigeot DRM_UNLOCK(dev); 401e3adcf8fSFrançois Tigeot 402e9243325SFrançois Tigeot return ret; 403e3adcf8fSFrançois Tigeot } 404e3adcf8fSFrançois Tigeot 405e3adcf8fSFrançois Tigeot /** 406e3adcf8fSFrançois Tigeot * Returns the current tiling mode and required bit 6 swizzling for the object. 407e3adcf8fSFrançois Tigeot */ 408e3adcf8fSFrançois Tigeot int 409e3adcf8fSFrançois Tigeot i915_gem_get_tiling(struct drm_device *dev, void *data, 410e3adcf8fSFrançois Tigeot struct drm_file *file) 411e3adcf8fSFrançois Tigeot { 412e3adcf8fSFrançois Tigeot struct drm_i915_gem_get_tiling *args = data; 413e3adcf8fSFrançois Tigeot drm_i915_private_t *dev_priv = dev->dev_private; 414e3adcf8fSFrançois Tigeot struct drm_i915_gem_object *obj; 415e3adcf8fSFrançois Tigeot 416e3adcf8fSFrançois Tigeot obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); 417e3adcf8fSFrançois Tigeot if (&obj->base == NULL) 418e3adcf8fSFrançois Tigeot return -ENOENT; 419e3adcf8fSFrançois Tigeot 420283d6aceSFrançois Tigeot DRM_LOCK(dev); 421283d6aceSFrançois Tigeot 422e3adcf8fSFrançois Tigeot args->tiling_mode = obj->tiling_mode; 423e3adcf8fSFrançois Tigeot switch (obj->tiling_mode) { 424e3adcf8fSFrançois Tigeot case I915_TILING_X: 425e3adcf8fSFrançois Tigeot args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; 426e3adcf8fSFrançois Tigeot break; 427e3adcf8fSFrançois Tigeot case I915_TILING_Y: 428e3adcf8fSFrançois Tigeot args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; 429e3adcf8fSFrançois Tigeot break; 430e3adcf8fSFrançois Tigeot case I915_TILING_NONE: 431e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 432e3adcf8fSFrançois Tigeot break; 433e3adcf8fSFrançois Tigeot default: 434e3adcf8fSFrançois Tigeot DRM_ERROR("unknown tiling mode\n"); 435e3adcf8fSFrançois Tigeot } 436e3adcf8fSFrançois Tigeot 437e3adcf8fSFrançois Tigeot /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ 438e3adcf8fSFrançois Tigeot if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) 439e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_9; 440e3adcf8fSFrançois Tigeot if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) 441e3adcf8fSFrançois Tigeot args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; 442e3adcf8fSFrançois Tigeot 443e3adcf8fSFrançois Tigeot drm_gem_object_unreference(&obj->base); 444283d6aceSFrançois Tigeot DRM_UNLOCK(dev); 445e3adcf8fSFrançois Tigeot 446e3adcf8fSFrançois Tigeot return 0; 447e3adcf8fSFrançois Tigeot } 448e3adcf8fSFrançois Tigeot 449e3adcf8fSFrançois Tigeot /** 450e3adcf8fSFrançois Tigeot * Swap every 64 bytes of this page around, to account for it having a new 451e3adcf8fSFrançois Tigeot * bit 17 of its physical address and therefore being interpreted differently 452e3adcf8fSFrançois Tigeot * by the GPU. 453e3adcf8fSFrançois Tigeot */ 454e3adcf8fSFrançois Tigeot static void 455*e3440f96SFrançois Tigeot i915_gem_swizzle_page(struct vm_page *page) 456e3adcf8fSFrançois Tigeot { 457e3adcf8fSFrançois Tigeot char temp[64]; 458e3adcf8fSFrançois Tigeot char *vaddr; 459e3adcf8fSFrançois Tigeot int i; 460e3adcf8fSFrançois Tigeot 461*e3440f96SFrançois Tigeot vaddr = kmap(page); 462e3adcf8fSFrançois Tigeot 463e3adcf8fSFrançois Tigeot for (i = 0; i < PAGE_SIZE; i += 128) { 464e3adcf8fSFrançois Tigeot memcpy(temp, &vaddr[i], 64); 465e3adcf8fSFrançois Tigeot memcpy(&vaddr[i], &vaddr[i + 64], 64); 466e3adcf8fSFrançois Tigeot memcpy(&vaddr[i + 64], temp, 64); 467e3adcf8fSFrançois Tigeot } 468e3adcf8fSFrançois Tigeot 469*e3440f96SFrançois Tigeot kunmap(page); 470e3adcf8fSFrançois Tigeot } 471e3adcf8fSFrançois Tigeot 472e3adcf8fSFrançois Tigeot void 473e3adcf8fSFrançois Tigeot i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) 474e3adcf8fSFrançois Tigeot { 475e3adcf8fSFrançois Tigeot int page_count = obj->base.size >> PAGE_SHIFT; 476e3adcf8fSFrançois Tigeot int i; 477e3adcf8fSFrançois Tigeot 478e3adcf8fSFrançois Tigeot if (obj->bit_17 == NULL) 479e3adcf8fSFrançois Tigeot return; 480e3adcf8fSFrançois Tigeot 481e3adcf8fSFrançois Tigeot for (i = 0; i < page_count; i++) { 482e3adcf8fSFrançois Tigeot char new_bit_17 = VM_PAGE_TO_PHYS(obj->pages[i]) >> 17; 483e3adcf8fSFrançois Tigeot if ((new_bit_17 & 0x1) != 484e3adcf8fSFrançois Tigeot (test_bit(i, obj->bit_17) != 0)) { 485e3adcf8fSFrançois Tigeot i915_gem_swizzle_page(obj->pages[i]); 486e3adcf8fSFrançois Tigeot vm_page_dirty(obj->pages[i]); 487e3adcf8fSFrançois Tigeot } 488e3adcf8fSFrançois Tigeot } 489e3adcf8fSFrançois Tigeot } 490e3adcf8fSFrançois Tigeot 491e3adcf8fSFrançois Tigeot void 492e3adcf8fSFrançois Tigeot i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) 493e3adcf8fSFrançois Tigeot { 494e3adcf8fSFrançois Tigeot int page_count = obj->base.size >> PAGE_SHIFT; 495e3adcf8fSFrançois Tigeot int i; 496e3adcf8fSFrançois Tigeot 497e3adcf8fSFrançois Tigeot if (obj->bit_17 == NULL) { 498e3adcf8fSFrançois Tigeot obj->bit_17 = kmalloc(BITS_TO_LONGS(page_count) * 4995a3b77d5SFrançois Tigeot sizeof(long), M_DRM, M_WAITOK); 500f192107fSFrançois Tigeot if (obj->bit_17 == NULL) { 501f192107fSFrançois Tigeot DRM_ERROR("Failed to allocate memory for bit 17 " 502f192107fSFrançois Tigeot "record\n"); 503f192107fSFrançois Tigeot return; 504f192107fSFrançois Tigeot } 505e3adcf8fSFrançois Tigeot } 506e3adcf8fSFrançois Tigeot 507e3adcf8fSFrançois Tigeot for (i = 0; i < page_count; i++) { 508e3adcf8fSFrançois Tigeot if (VM_PAGE_TO_PHYS(obj->pages[i]) & (1 << 17)) 509*e3440f96SFrançois Tigeot __set_bit(i, obj->bit_17); 510e3adcf8fSFrançois Tigeot else 511*e3440f96SFrançois Tigeot __clear_bit(i, obj->bit_17); 512e3adcf8fSFrançois Tigeot } 513e3adcf8fSFrançois Tigeot } 514