11099013bSjsg /* 21099013bSjsg * Copyright 2009 Jerome Glisse. 31099013bSjsg * All Rights Reserved. 41099013bSjsg * 51099013bSjsg * Permission is hereby granted, free of charge, to any person obtaining a 61099013bSjsg * copy of this software and associated documentation files (the 71099013bSjsg * "Software"), to deal in the Software without restriction, including 81099013bSjsg * without limitation the rights to use, copy, modify, merge, publish, 91099013bSjsg * distribute, sub license, and/or sell copies of the Software, and to 101099013bSjsg * permit persons to whom the Software is furnished to do so, subject to 111099013bSjsg * the following conditions: 121099013bSjsg * 131099013bSjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 141099013bSjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 151099013bSjsg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 161099013bSjsg * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 171099013bSjsg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 181099013bSjsg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 191099013bSjsg * USE OR OTHER DEALINGS IN THE SOFTWARE. 201099013bSjsg * 211099013bSjsg * The above copyright notice and this permission notice (including the 221099013bSjsg * next paragraph) shall be included in all copies or substantial portions 231099013bSjsg * of the Software. 241099013bSjsg * 251099013bSjsg */ 261099013bSjsg /* 271099013bSjsg * Authors: 281099013bSjsg * Jerome Glisse <glisse@freedesktop.org> 291099013bSjsg * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> 301099013bSjsg * Dave Airlie 311099013bSjsg */ 32c349dbc7Sjsg 33c349dbc7Sjsg #include <linux/io.h> 347f4dd379Sjsg #include <linux/list.h> 357f4dd379Sjsg #include <linux/slab.h> 36c349dbc7Sjsg 377f4dd379Sjsg #include <drm/drm_cache.h> 38c349dbc7Sjsg #include <drm/drm_prime.h> 39c349dbc7Sjsg #include <drm/radeon_drm.h> 40c349dbc7Sjsg 411099013bSjsg #include "radeon.h" 421099013bSjsg #include "radeon_trace.h" 435ca02815Sjsg #include "radeon_ttm.h" 441099013bSjsg 454b6e5ceaSjsg static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); 461099013bSjsg 471099013bSjsg /* 481099013bSjsg * To exclude mutual BO access we rely on bo_reserve exclusion, as all 491099013bSjsg * function are calling it. 501099013bSjsg */ 511099013bSjsg 521099013bSjsg static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) 531099013bSjsg { 541099013bSjsg struct radeon_bo *bo; 551099013bSjsg 561099013bSjsg bo = container_of(tbo, struct radeon_bo, tbo); 577ccd5a2cSjsg 58528273cbSjsg mutex_lock(&bo->rdev->gem.mutex); 591099013bSjsg list_del_init(&bo->list); 60528273cbSjsg mutex_unlock(&bo->rdev->gem.mutex); 611099013bSjsg radeon_bo_clear_surface_reg(bo); 627f4dd379Sjsg WARN_ON_ONCE(!list_empty(&bo->va)); 63c349dbc7Sjsg if (bo->tbo.base.import_attach) 64c349dbc7Sjsg drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); 65c349dbc7Sjsg drm_gem_object_release(&bo->tbo.base); 661099013bSjsg pool_put(&bo->rdev->ddev->objpl, bo); 671099013bSjsg } 681099013bSjsg 691099013bSjsg bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) 701099013bSjsg { 711099013bSjsg if (bo->destroy == &radeon_ttm_bo_destroy) 721099013bSjsg return true; 731099013bSjsg return false; 741099013bSjsg } 751099013bSjsg 761099013bSjsg void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) 771099013bSjsg { 787ccd5a2cSjsg u32 c = 0, i; 791099013bSjsg 801099013bSjsg rbo->placement.placement = rbo->placements; 811099013bSjsg rbo->placement.busy_placement = rbo->placements; 827ccd5a2cSjsg if (domain & RADEON_GEM_DOMAIN_VRAM) { 837ccd5a2cSjsg /* Try placing BOs which don't need CPU access outside of the 847ccd5a2cSjsg * CPU accessible part of VRAM 857ccd5a2cSjsg */ 867ccd5a2cSjsg if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && 877ccd5a2cSjsg rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { 887ccd5a2cSjsg rbo->placements[c].fpfn = 897ccd5a2cSjsg rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 90ad8b1aafSjsg rbo->placements[c].mem_type = TTM_PL_VRAM; 915ca02815Sjsg rbo->placements[c++].flags = 0; 927ccd5a2cSjsg } 937ccd5a2cSjsg 947ccd5a2cSjsg rbo->placements[c].fpfn = 0; 95ad8b1aafSjsg rbo->placements[c].mem_type = TTM_PL_VRAM; 965ca02815Sjsg rbo->placements[c++].flags = 0; 977ccd5a2cSjsg } 987ccd5a2cSjsg 991099013bSjsg if (domain & RADEON_GEM_DOMAIN_GTT) { 1007ccd5a2cSjsg rbo->placements[c].fpfn = 0; 101ad8b1aafSjsg rbo->placements[c].mem_type = TTM_PL_TT; 1025ca02815Sjsg rbo->placements[c++].flags = 0; 1031099013bSjsg } 1047ccd5a2cSjsg 1051099013bSjsg if (domain & RADEON_GEM_DOMAIN_CPU) { 1067ccd5a2cSjsg rbo->placements[c].fpfn = 0; 107ad8b1aafSjsg rbo->placements[c].mem_type = TTM_PL_SYSTEM; 1085ca02815Sjsg rbo->placements[c++].flags = 0; 1091099013bSjsg } 1107ccd5a2cSjsg if (!c) { 1117ccd5a2cSjsg rbo->placements[c].fpfn = 0; 112ad8b1aafSjsg rbo->placements[c].mem_type = TTM_PL_SYSTEM; 1135ca02815Sjsg rbo->placements[c++].flags = 0; 1147ccd5a2cSjsg } 1157ccd5a2cSjsg 1161099013bSjsg rbo->placement.num_placement = c; 1171099013bSjsg rbo->placement.num_busy_placement = c; 1187ccd5a2cSjsg 1197ccd5a2cSjsg for (i = 0; i < c; ++i) { 1207ccd5a2cSjsg if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && 121ad8b1aafSjsg (rbo->placements[i].mem_type == TTM_PL_VRAM) && 1227ccd5a2cSjsg !rbo->placements[i].fpfn) 1237ccd5a2cSjsg rbo->placements[i].lpfn = 1247ccd5a2cSjsg rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 1257ccd5a2cSjsg else 1267ccd5a2cSjsg rbo->placements[i].lpfn = 0; 1277ccd5a2cSjsg } 1281099013bSjsg } 1291099013bSjsg 1301099013bSjsg int radeon_bo_create(struct radeon_device *rdev, 1317ccd5a2cSjsg unsigned long size, int byte_align, bool kernel, 1327ccd5a2cSjsg u32 domain, u32 flags, struct sg_table *sg, 133c349dbc7Sjsg struct dma_resv *resv, 1347ccd5a2cSjsg struct radeon_bo **bo_ptr) 1351099013bSjsg { 1361099013bSjsg struct radeon_bo *bo; 1371099013bSjsg enum ttm_bo_type type; 1381099013bSjsg unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; 1391099013bSjsg int r; 1401099013bSjsg 141f005ef32Sjsg size = ALIGN(size, PAGE_SIZE); 1421099013bSjsg 1431099013bSjsg if (kernel) { 1441099013bSjsg type = ttm_bo_type_kernel; 1451099013bSjsg } else if (sg) { 1461099013bSjsg type = ttm_bo_type_sg; 1471099013bSjsg } else { 1481099013bSjsg type = ttm_bo_type_device; 1491099013bSjsg } 1501099013bSjsg *bo_ptr = NULL; 1511099013bSjsg 1521099013bSjsg bo = pool_get(&rdev->ddev->objpl, PR_WAITOK | PR_ZERO); 1531099013bSjsg if (bo == NULL) 1541099013bSjsg return -ENOMEM; 155*33a3edb1Sjsg drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size); 1561099013bSjsg bo->rdev = rdev; 1571099013bSjsg bo->surface_reg = -1; 1581099013bSjsg INIT_LIST_HEAD(&bo->list); 1591099013bSjsg INIT_LIST_HEAD(&bo->va); 1607ccd5a2cSjsg bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | 1617ccd5a2cSjsg RADEON_GEM_DOMAIN_GTT | 1627ccd5a2cSjsg RADEON_GEM_DOMAIN_CPU); 1637ccd5a2cSjsg 1647ccd5a2cSjsg bo->flags = flags; 1657ccd5a2cSjsg /* PCI GART is always snooped */ 1667ccd5a2cSjsg if (!(rdev->flags & RADEON_IS_PCIE)) 1677ccd5a2cSjsg bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 1687ccd5a2cSjsg 1697ccd5a2cSjsg /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx 1707ccd5a2cSjsg * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 1717ccd5a2cSjsg */ 1727ccd5a2cSjsg if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) 1737ccd5a2cSjsg bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 1747ccd5a2cSjsg 1757ccd5a2cSjsg #ifdef CONFIG_X86_32 1767ccd5a2cSjsg /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit 1777ccd5a2cSjsg * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 1787ccd5a2cSjsg */ 1797ccd5a2cSjsg bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 1807ccd5a2cSjsg #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) 1817ccd5a2cSjsg /* Don't try to enable write-combining when it can't work, or things 1827ccd5a2cSjsg * may be slow 1837ccd5a2cSjsg * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 1847ccd5a2cSjsg */ 1857ccd5a2cSjsg #ifndef CONFIG_COMPILE_TEST 1867ccd5a2cSjsg #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ 1877ccd5a2cSjsg thanks to write-combining 1887ccd5a2cSjsg #endif 1897ccd5a2cSjsg 1907ccd5a2cSjsg if (bo->flags & RADEON_GEM_GTT_WC) 1917ccd5a2cSjsg DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " 1927ccd5a2cSjsg "better performance thanks to write-combining\n"); 1937ccd5a2cSjsg bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); 1947ccd5a2cSjsg #else 1957ccd5a2cSjsg /* For architectures that don't support WC memory, 1967ccd5a2cSjsg * mask out the WC flag from the BO 1977ccd5a2cSjsg */ 1987ccd5a2cSjsg if (!drm_arch_can_wc_memory()) 1997ccd5a2cSjsg bo->flags &= ~RADEON_GEM_GTT_WC; 2007ccd5a2cSjsg #endif 2017ccd5a2cSjsg 2021099013bSjsg radeon_ttm_placement_from_domain(bo, domain); 2031099013bSjsg /* Kernel allocation are uninterruptible */ 204528273cbSjsg down_read(&rdev->pm.mclk_lock); 2051bb76ff1Sjsg r = ttm_bo_init_validate(&rdev->mman.bdev, &bo->tbo, type, 2065ca02815Sjsg &bo->placement, page_align, !kernel, sg, resv, 2075ca02815Sjsg &radeon_ttm_bo_destroy); 208528273cbSjsg up_read(&rdev->pm.mclk_lock); 2091099013bSjsg if (unlikely(r != 0)) { 2101099013bSjsg return r; 2111099013bSjsg } 2121099013bSjsg *bo_ptr = bo; 2131099013bSjsg 2141099013bSjsg trace_radeon_bo_create(bo); 2151099013bSjsg 2161099013bSjsg return 0; 2171099013bSjsg } 2181099013bSjsg 2191099013bSjsg int radeon_bo_kmap(struct radeon_bo *bo, void **ptr) 2201099013bSjsg { 2211099013bSjsg bool is_iomem; 2221bb76ff1Sjsg long r; 2231bb76ff1Sjsg 2241bb76ff1Sjsg r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, 2251bb76ff1Sjsg false, MAX_SCHEDULE_TIMEOUT); 2261bb76ff1Sjsg if (r < 0) 2271bb76ff1Sjsg return r; 2281099013bSjsg 2291099013bSjsg if (bo->kptr) { 2301099013bSjsg if (ptr) { 2311099013bSjsg *ptr = bo->kptr; 2321099013bSjsg } 2331099013bSjsg return 0; 2341099013bSjsg } 235f005ef32Sjsg r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap); 2361099013bSjsg if (r) { 2371099013bSjsg return r; 2381099013bSjsg } 2391099013bSjsg bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); 2401099013bSjsg if (ptr) { 2411099013bSjsg *ptr = bo->kptr; 2421099013bSjsg } 2431099013bSjsg radeon_bo_check_tiling(bo, 0, 0); 2441099013bSjsg return 0; 2451099013bSjsg } 2461099013bSjsg 2471099013bSjsg void radeon_bo_kunmap(struct radeon_bo *bo) 2481099013bSjsg { 2491099013bSjsg if (bo->kptr == NULL) 2501099013bSjsg return; 2511099013bSjsg bo->kptr = NULL; 2521099013bSjsg radeon_bo_check_tiling(bo, 0, 0); 2531099013bSjsg ttm_bo_kunmap(&bo->kmap); 2541099013bSjsg } 2551099013bSjsg 2567ccd5a2cSjsg struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) 2577ccd5a2cSjsg { 2587ccd5a2cSjsg if (bo == NULL) 2597ccd5a2cSjsg return NULL; 2607ccd5a2cSjsg 2617f4dd379Sjsg ttm_bo_get(&bo->tbo); 2627ccd5a2cSjsg return bo; 2637ccd5a2cSjsg } 2647ccd5a2cSjsg 2651099013bSjsg void radeon_bo_unref(struct radeon_bo **bo) 2661099013bSjsg { 2671099013bSjsg struct ttm_buffer_object *tbo; 2681099013bSjsg 2691099013bSjsg if ((*bo) == NULL) 2701099013bSjsg return; 2711099013bSjsg tbo = &((*bo)->tbo); 2727f4dd379Sjsg ttm_bo_put(tbo); 2731099013bSjsg *bo = NULL; 2741099013bSjsg } 2751099013bSjsg 2761099013bSjsg int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, 2771099013bSjsg u64 *gpu_addr) 2781099013bSjsg { 2797f4dd379Sjsg struct ttm_operation_ctx ctx = { false, false }; 2801099013bSjsg int r, i; 2811099013bSjsg 282ad8b1aafSjsg if (radeon_ttm_tt_has_userptr(bo->rdev, bo->tbo.ttm)) 2837ccd5a2cSjsg return -EPERM; 2847ccd5a2cSjsg 2855ca02815Sjsg if (bo->tbo.pin_count) { 2865ca02815Sjsg ttm_bo_pin(&bo->tbo); 2871099013bSjsg if (gpu_addr) 2881099013bSjsg *gpu_addr = radeon_bo_gpu_offset(bo); 2891099013bSjsg 2901099013bSjsg if (max_offset != 0) { 2911099013bSjsg u64 domain_start; 2921099013bSjsg 2931099013bSjsg if (domain == RADEON_GEM_DOMAIN_VRAM) 2941099013bSjsg domain_start = bo->rdev->mc.vram_start; 2951099013bSjsg else 2961099013bSjsg domain_start = bo->rdev->mc.gtt_start; 2971099013bSjsg WARN_ON_ONCE(max_offset < 2981099013bSjsg (radeon_bo_gpu_offset(bo) - domain_start)); 2991099013bSjsg } 3001099013bSjsg 3011099013bSjsg return 0; 3021099013bSjsg } 3037f4dd379Sjsg if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) { 3047f4dd379Sjsg /* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */ 3057f4dd379Sjsg return -EINVAL; 3067f4dd379Sjsg } 3077f4dd379Sjsg 3081099013bSjsg radeon_ttm_placement_from_domain(bo, domain); 3097ccd5a2cSjsg for (i = 0; i < bo->placement.num_placement; i++) { 3101099013bSjsg /* force to pin into visible video ram */ 311ad8b1aafSjsg if ((bo->placements[i].mem_type == TTM_PL_VRAM) && 3127ccd5a2cSjsg !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && 3137ccd5a2cSjsg (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) 3147ccd5a2cSjsg bo->placements[i].lpfn = 3157ccd5a2cSjsg bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; 3167ccd5a2cSjsg else 3177ccd5a2cSjsg bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; 318f3eef2b6Sderaadt } 3197ccd5a2cSjsg 3207f4dd379Sjsg r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 3211099013bSjsg if (likely(r == 0)) { 3225ca02815Sjsg ttm_bo_pin(&bo->tbo); 3231099013bSjsg if (gpu_addr != NULL) 3241099013bSjsg *gpu_addr = radeon_bo_gpu_offset(bo); 3257ccd5a2cSjsg if (domain == RADEON_GEM_DOMAIN_VRAM) 3267ccd5a2cSjsg bo->rdev->vram_pin_size += radeon_bo_size(bo); 3277ccd5a2cSjsg else 3287ccd5a2cSjsg bo->rdev->gart_pin_size += radeon_bo_size(bo); 3297ccd5a2cSjsg } else { 330f3eef2b6Sderaadt dev_err(bo->rdev->dev, "%p pin failed\n", bo); 3317ccd5a2cSjsg } 3321099013bSjsg return r; 3331099013bSjsg } 3341099013bSjsg 3351099013bSjsg int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) 3361099013bSjsg { 3371099013bSjsg return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); 3381099013bSjsg } 3391099013bSjsg 3405ca02815Sjsg void radeon_bo_unpin(struct radeon_bo *bo) 3411099013bSjsg { 3425ca02815Sjsg ttm_bo_unpin(&bo->tbo); 3435ca02815Sjsg if (!bo->tbo.pin_count) { 3445ca02815Sjsg if (bo->tbo.resource->mem_type == TTM_PL_VRAM) 3457ccd5a2cSjsg bo->rdev->vram_pin_size -= radeon_bo_size(bo); 3467ccd5a2cSjsg else 3477ccd5a2cSjsg bo->rdev->gart_pin_size -= radeon_bo_size(bo); 3487ccd5a2cSjsg } 3491099013bSjsg } 3501099013bSjsg 3511099013bSjsg int radeon_bo_evict_vram(struct radeon_device *rdev) 3521099013bSjsg { 3535ca02815Sjsg struct ttm_device *bdev = &rdev->mman.bdev; 3545ca02815Sjsg struct ttm_resource_manager *man; 3555ca02815Sjsg 3561099013bSjsg /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ 357c349dbc7Sjsg #ifndef CONFIG_HIBERNATION 358c349dbc7Sjsg if (rdev->flags & RADEON_IS_IGP) { 3591099013bSjsg if (rdev->mc.igp_sideport_enabled == false) 3601099013bSjsg /* Useless to evict on IGP chips */ 3611099013bSjsg return 0; 3621099013bSjsg } 363c349dbc7Sjsg #endif 3645ca02815Sjsg man = ttm_manager_type(bdev, TTM_PL_VRAM); 3655ca02815Sjsg if (!man) 3665ca02815Sjsg return 0; 3675ca02815Sjsg return ttm_resource_manager_evict_all(bdev, man); 3681099013bSjsg } 3691099013bSjsg 3701099013bSjsg void radeon_bo_force_delete(struct radeon_device *rdev) 3711099013bSjsg { 3721099013bSjsg struct radeon_bo *bo, *n; 3731099013bSjsg 3741099013bSjsg if (list_empty(&rdev->gem.objects)) { 3751099013bSjsg return; 3761099013bSjsg } 377d765308cSjsg dev_err(rdev->dev, "Userspace still has active objects !\n"); 3781099013bSjsg list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) { 379d765308cSjsg dev_err(rdev->dev, "%p %p %lu %lu force free\n", 380c349dbc7Sjsg &bo->tbo.base, bo, (unsigned long)bo->tbo.base.size, 381c349dbc7Sjsg *((unsigned long *)&bo->tbo.base.refcount)); 382528273cbSjsg mutex_lock(&bo->rdev->gem.mutex); 3831099013bSjsg list_del_init(&bo->list); 384528273cbSjsg mutex_unlock(&bo->rdev->gem.mutex); 3851099013bSjsg /* this should unref the ttm bo */ 386ad8b1aafSjsg drm_gem_object_put(&bo->tbo.base); 3871099013bSjsg } 3881099013bSjsg } 3891099013bSjsg 3901099013bSjsg int radeon_bo_init(struct radeon_device *rdev) 3911099013bSjsg { 3921099013bSjsg paddr_t start, end; 3931099013bSjsg 39420916ea0Sjsg #ifdef __linux__ 39520916ea0Sjsg /* reserve PAT memory space to WC for VRAM */ 39620916ea0Sjsg arch_io_reserve_memtype_wc(rdev->mc.aper_base, 39720916ea0Sjsg rdev->mc.aper_size); 39820916ea0Sjsg #endif 39920916ea0Sjsg 4001099013bSjsg /* Add an MTRR for the VRAM */ 4017ccd5a2cSjsg if (!rdev->fastfb_working) { 4027ccd5a2cSjsg #ifdef __linux__ 4037ccd5a2cSjsg rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base, 4047ccd5a2cSjsg rdev->mc.aper_size); 4057ccd5a2cSjsg #else 406440ae382Skettenis drm_mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC); 4071099013bSjsg /* fake a 'cookie', seems to be unused? */ 4081099013bSjsg rdev->mc.vram_mtrr = 1; 4097ccd5a2cSjsg #endif 4107ccd5a2cSjsg } 4111099013bSjsg 4121099013bSjsg start = atop(bus_space_mmap(rdev->memt, rdev->mc.aper_base, 0, 0, 0)); 4131099013bSjsg end = start + atop(rdev->mc.aper_size); 4141099013bSjsg uvm_page_physload(start, end, start, end, PHYSLOAD_DEVICE); 4151099013bSjsg 4161099013bSjsg DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", 4171099013bSjsg rdev->mc.mc_vram_size >> 20, 4181099013bSjsg (unsigned long long)rdev->mc.aper_size >> 20); 4191099013bSjsg DRM_INFO("RAM width %dbits %cDR\n", 4201099013bSjsg rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); 4211099013bSjsg return radeon_ttm_init(rdev); 4221099013bSjsg } 4231099013bSjsg 4241099013bSjsg void radeon_bo_fini(struct radeon_device *rdev) 4251099013bSjsg { 4261099013bSjsg radeon_ttm_fini(rdev); 4277ccd5a2cSjsg #ifdef __linux__ 4287ccd5a2cSjsg arch_phys_wc_del(rdev->mc.vram_mtrr); 42920916ea0Sjsg arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size); 4307ccd5a2cSjsg #else 4317ccd5a2cSjsg drm_mtrr_del(0, rdev->mc.aper_base, rdev->mc.aper_size, DRM_MTRR_WC); 4327ccd5a2cSjsg #endif 4331099013bSjsg } 4341099013bSjsg 4357ccd5a2cSjsg /* Returns how many bytes TTM can move per IB. 4367ccd5a2cSjsg */ 4377ccd5a2cSjsg static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) 4381099013bSjsg { 4397ccd5a2cSjsg u64 real_vram_size = rdev->mc.real_vram_size; 4401bb76ff1Sjsg struct ttm_resource_manager *man = 4411bb76ff1Sjsg ttm_manager_type(&rdev->mman.bdev, TTM_PL_VRAM); 4421bb76ff1Sjsg u64 vram_usage = ttm_resource_manager_usage(man); 4437ccd5a2cSjsg 4447ccd5a2cSjsg /* This function is based on the current VRAM usage. 4457ccd5a2cSjsg * 4467ccd5a2cSjsg * - If all of VRAM is free, allow relocating the number of bytes that 4477ccd5a2cSjsg * is equal to 1/4 of the size of VRAM for this IB. 4487ccd5a2cSjsg 4497ccd5a2cSjsg * - If more than one half of VRAM is occupied, only allow relocating 4507ccd5a2cSjsg * 1 MB of data for this IB. 4517ccd5a2cSjsg * 4527ccd5a2cSjsg * - From 0 to one half of used VRAM, the threshold decreases 4537ccd5a2cSjsg * linearly. 4547ccd5a2cSjsg * __________________ 4557ccd5a2cSjsg * 1/4 of -|\ | 4567ccd5a2cSjsg * VRAM | \ | 4577ccd5a2cSjsg * | \ | 4587ccd5a2cSjsg * | \ | 4597ccd5a2cSjsg * | \ | 4607ccd5a2cSjsg * | \ | 4617ccd5a2cSjsg * | \ | 4627ccd5a2cSjsg * | \________|1 MB 4637ccd5a2cSjsg * |----------------| 4647ccd5a2cSjsg * VRAM 0 % 100 % 4657ccd5a2cSjsg * used used 4667ccd5a2cSjsg * 4677ccd5a2cSjsg * Note: It's a threshold, not a limit. The threshold must be crossed 4687ccd5a2cSjsg * for buffer relocations to stop, so any buffer of an arbitrary size 4697ccd5a2cSjsg * can be moved as long as the threshold isn't crossed before 4707ccd5a2cSjsg * the relocation takes place. We don't want to disable buffer 4717ccd5a2cSjsg * relocations completely. 4727ccd5a2cSjsg * 4737ccd5a2cSjsg * The idea is that buffers should be placed in VRAM at creation time 4747ccd5a2cSjsg * and TTM should only do a minimum number of relocations during 4757ccd5a2cSjsg * command submission. In practice, you need to submit at least 4767ccd5a2cSjsg * a dozen IBs to move all buffers to VRAM if they are in GTT. 4777ccd5a2cSjsg * 4787ccd5a2cSjsg * Also, things can get pretty crazy under memory pressure and actual 4797ccd5a2cSjsg * VRAM usage can change a lot, so playing safe even at 50% does 4807ccd5a2cSjsg * consistently increase performance. 4817ccd5a2cSjsg */ 4827ccd5a2cSjsg 4837ccd5a2cSjsg u64 half_vram = real_vram_size >> 1; 4847ccd5a2cSjsg u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 4857ccd5a2cSjsg u64 bytes_moved_threshold = half_free_vram >> 1; 4867ccd5a2cSjsg return max(bytes_moved_threshold, 1024*1024ull); 4871099013bSjsg } 4881099013bSjsg 4897ccd5a2cSjsg int radeon_bo_list_validate(struct radeon_device *rdev, 4907ccd5a2cSjsg struct ww_acquire_ctx *ticket, 4917ccd5a2cSjsg struct list_head *head, int ring) 4921099013bSjsg { 4937f4dd379Sjsg struct ttm_operation_ctx ctx = { true, false }; 4941099013bSjsg struct radeon_bo_list *lobj; 4957ccd5a2cSjsg struct list_head duplicates; 4961099013bSjsg int r; 4977ccd5a2cSjsg u64 bytes_moved = 0, initial_bytes_moved; 4987ccd5a2cSjsg u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); 4991099013bSjsg 5007ccd5a2cSjsg INIT_LIST_HEAD(&duplicates); 5017ccd5a2cSjsg r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); 5021099013bSjsg if (unlikely(r != 0)) { 5031099013bSjsg return r; 5041099013bSjsg } 5057ccd5a2cSjsg 5061099013bSjsg list_for_each_entry(lobj, head, tv.head) { 5077ccd5a2cSjsg struct radeon_bo *bo = lobj->robj; 5085ca02815Sjsg if (!bo->tbo.pin_count) { 5097f4dd379Sjsg u32 domain = lobj->preferred_domains; 5107ccd5a2cSjsg u32 allowed = lobj->allowed_domains; 5117ccd5a2cSjsg u32 current_domain = 5125ca02815Sjsg radeon_mem_type_to_domain(bo->tbo.resource->mem_type); 5137ccd5a2cSjsg 5147ccd5a2cSjsg /* Check if this buffer will be moved and don't move it 5157ccd5a2cSjsg * if we have moved too many buffers for this IB already. 5167ccd5a2cSjsg * 5177ccd5a2cSjsg * Note that this allows moving at least one buffer of 5187ccd5a2cSjsg * any size, because it doesn't take the current "bo" 5197ccd5a2cSjsg * into account. We don't want to disallow buffer moves 5207ccd5a2cSjsg * completely. 5217ccd5a2cSjsg */ 5227ccd5a2cSjsg if ((allowed & current_domain) != 0 && 5237ccd5a2cSjsg (domain & current_domain) == 0 && /* will be moved */ 5247ccd5a2cSjsg bytes_moved > bytes_moved_threshold) { 5257ccd5a2cSjsg /* don't move it */ 5267ccd5a2cSjsg domain = current_domain; 5277ccd5a2cSjsg } 5281099013bSjsg 5291099013bSjsg retry: 5301099013bSjsg radeon_ttm_placement_from_domain(bo, domain); 5317ccd5a2cSjsg if (ring == R600_RING_TYPE_UVD_INDEX) 5327ccd5a2cSjsg radeon_uvd_force_into_uvd_segment(bo, allowed); 5337ccd5a2cSjsg 5347ccd5a2cSjsg initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); 5357f4dd379Sjsg r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 5367ccd5a2cSjsg bytes_moved += atomic64_read(&rdev->num_bytes_moved) - 5377ccd5a2cSjsg initial_bytes_moved; 5387ccd5a2cSjsg 5391099013bSjsg if (unlikely(r)) { 5407ccd5a2cSjsg if (r != -ERESTARTSYS && 5417ccd5a2cSjsg domain != lobj->allowed_domains) { 5427ccd5a2cSjsg domain = lobj->allowed_domains; 5431099013bSjsg goto retry; 5441099013bSjsg } 5457ccd5a2cSjsg ttm_eu_backoff_reservation(ticket, head); 5461099013bSjsg return r; 5471099013bSjsg } 5481099013bSjsg } 5491099013bSjsg lobj->gpu_offset = radeon_bo_gpu_offset(bo); 5501099013bSjsg lobj->tiling_flags = bo->tiling_flags; 5511099013bSjsg } 5527ccd5a2cSjsg 5537ccd5a2cSjsg list_for_each_entry(lobj, &duplicates, tv.head) { 5547ccd5a2cSjsg lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); 5557ccd5a2cSjsg lobj->tiling_flags = lobj->robj->tiling_flags; 5561099013bSjsg } 5571099013bSjsg 5587ccd5a2cSjsg return 0; 559f3eef2b6Sderaadt } 560f3eef2b6Sderaadt 5611099013bSjsg int radeon_bo_get_surface_reg(struct radeon_bo *bo) 5621099013bSjsg { 5631099013bSjsg struct radeon_device *rdev = bo->rdev; 5641099013bSjsg struct radeon_surface_reg *reg; 5651099013bSjsg struct radeon_bo *old_object; 5661099013bSjsg int steal; 5671099013bSjsg int i; 5681099013bSjsg 569c349dbc7Sjsg dma_resv_assert_held(bo->tbo.base.resv); 5701099013bSjsg 5711099013bSjsg if (!bo->tiling_flags) 5721099013bSjsg return 0; 5731099013bSjsg 5741099013bSjsg if (bo->surface_reg >= 0) { 5751099013bSjsg i = bo->surface_reg; 5761099013bSjsg goto out; 5771099013bSjsg } 5781099013bSjsg 5791099013bSjsg steal = -1; 5801099013bSjsg for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) { 5811099013bSjsg 5821099013bSjsg reg = &rdev->surface_regs[i]; 5831099013bSjsg if (!reg->bo) 5841099013bSjsg break; 5851099013bSjsg 5861099013bSjsg old_object = reg->bo; 5875ca02815Sjsg if (old_object->tbo.pin_count == 0) 5881099013bSjsg steal = i; 5891099013bSjsg } 5901099013bSjsg 5911099013bSjsg /* if we are all out */ 5921099013bSjsg if (i == RADEON_GEM_MAX_SURFACES) { 5931099013bSjsg if (steal == -1) 5941099013bSjsg return -ENOMEM; 5951099013bSjsg /* find someone with a surface reg and nuke their BO */ 5961099013bSjsg reg = &rdev->surface_regs[steal]; 5971099013bSjsg old_object = reg->bo; 5981099013bSjsg /* blow away the mapping */ 5991099013bSjsg DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object); 6001099013bSjsg ttm_bo_unmap_virtual(&old_object->tbo); 6011099013bSjsg old_object->surface_reg = -1; 6021099013bSjsg i = steal; 6031099013bSjsg } 6041099013bSjsg 6051099013bSjsg bo->surface_reg = i; 6061099013bSjsg reg->bo = bo; 6071099013bSjsg 6081099013bSjsg out: 6091099013bSjsg radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch, 6105ca02815Sjsg bo->tbo.resource->start << PAGE_SHIFT, 6115ca02815Sjsg bo->tbo.base.size); 6121099013bSjsg return 0; 6131099013bSjsg } 6141099013bSjsg 6154b6e5ceaSjsg static void radeon_bo_clear_surface_reg(struct radeon_bo *bo) 6161099013bSjsg { 6171099013bSjsg struct radeon_device *rdev = bo->rdev; 6181099013bSjsg struct radeon_surface_reg *reg; 6191099013bSjsg 6201099013bSjsg if (bo->surface_reg == -1) 6211099013bSjsg return; 6221099013bSjsg 6231099013bSjsg reg = &rdev->surface_regs[bo->surface_reg]; 6241099013bSjsg radeon_clear_surface_reg(rdev, bo->surface_reg); 6251099013bSjsg 6261099013bSjsg reg->bo = NULL; 6271099013bSjsg bo->surface_reg = -1; 6281099013bSjsg } 6291099013bSjsg 6301099013bSjsg int radeon_bo_set_tiling_flags(struct radeon_bo *bo, 6311099013bSjsg uint32_t tiling_flags, uint32_t pitch) 6321099013bSjsg { 6331099013bSjsg struct radeon_device *rdev = bo->rdev; 6341099013bSjsg int r; 6351099013bSjsg 6361099013bSjsg if (rdev->family >= CHIP_CEDAR) { 6371099013bSjsg unsigned bankw, bankh, mtaspect, tilesplit, stilesplit; 6381099013bSjsg 6391099013bSjsg bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; 6401099013bSjsg bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; 6411099013bSjsg mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; 6421099013bSjsg tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; 6431099013bSjsg stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK; 6441099013bSjsg switch (bankw) { 6451099013bSjsg case 0: 6461099013bSjsg case 1: 6471099013bSjsg case 2: 6481099013bSjsg case 4: 6491099013bSjsg case 8: 6501099013bSjsg break; 6511099013bSjsg default: 6521099013bSjsg return -EINVAL; 6531099013bSjsg } 6541099013bSjsg switch (bankh) { 6551099013bSjsg case 0: 6561099013bSjsg case 1: 6571099013bSjsg case 2: 6581099013bSjsg case 4: 6591099013bSjsg case 8: 6601099013bSjsg break; 6611099013bSjsg default: 6621099013bSjsg return -EINVAL; 6631099013bSjsg } 6641099013bSjsg switch (mtaspect) { 6651099013bSjsg case 0: 6661099013bSjsg case 1: 6671099013bSjsg case 2: 6681099013bSjsg case 4: 6691099013bSjsg case 8: 6701099013bSjsg break; 6711099013bSjsg default: 6721099013bSjsg return -EINVAL; 6731099013bSjsg } 6741099013bSjsg if (tilesplit > 6) { 6751099013bSjsg return -EINVAL; 6761099013bSjsg } 6771099013bSjsg if (stilesplit > 6) { 6781099013bSjsg return -EINVAL; 6791099013bSjsg } 6801099013bSjsg } 6811099013bSjsg r = radeon_bo_reserve(bo, false); 6821099013bSjsg if (unlikely(r != 0)) 6831099013bSjsg return r; 6841099013bSjsg bo->tiling_flags = tiling_flags; 6851099013bSjsg bo->pitch = pitch; 6861099013bSjsg radeon_bo_unreserve(bo); 6871099013bSjsg return 0; 6881099013bSjsg } 6891099013bSjsg 6901099013bSjsg void radeon_bo_get_tiling_flags(struct radeon_bo *bo, 6911099013bSjsg uint32_t *tiling_flags, 6921099013bSjsg uint32_t *pitch) 6931099013bSjsg { 694c349dbc7Sjsg dma_resv_assert_held(bo->tbo.base.resv); 6957ccd5a2cSjsg 6961099013bSjsg if (tiling_flags) 6971099013bSjsg *tiling_flags = bo->tiling_flags; 6981099013bSjsg if (pitch) 6991099013bSjsg *pitch = bo->pitch; 7001099013bSjsg } 7011099013bSjsg 7021099013bSjsg int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, 7031099013bSjsg bool force_drop) 7041099013bSjsg { 7057ccd5a2cSjsg if (!force_drop) 706c349dbc7Sjsg dma_resv_assert_held(bo->tbo.base.resv); 7071099013bSjsg 7081099013bSjsg if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) 7091099013bSjsg return 0; 7101099013bSjsg 7111099013bSjsg if (force_drop) { 7121099013bSjsg radeon_bo_clear_surface_reg(bo); 7131099013bSjsg return 0; 7141099013bSjsg } 7151099013bSjsg 7165ca02815Sjsg if (bo->tbo.resource->mem_type != TTM_PL_VRAM) { 7171099013bSjsg if (!has_moved) 7181099013bSjsg return 0; 7191099013bSjsg 7201099013bSjsg if (bo->surface_reg >= 0) 7211099013bSjsg radeon_bo_clear_surface_reg(bo); 7221099013bSjsg return 0; 7231099013bSjsg } 7241099013bSjsg 7251099013bSjsg if ((bo->surface_reg >= 0) && !has_moved) 7261099013bSjsg return 0; 7271099013bSjsg 7281099013bSjsg return radeon_bo_get_surface_reg(bo); 7291099013bSjsg } 7301099013bSjsg 7311bb76ff1Sjsg void radeon_bo_move_notify(struct ttm_buffer_object *bo) 7321099013bSjsg { 7331099013bSjsg struct radeon_bo *rbo; 7347ccd5a2cSjsg 7351099013bSjsg if (!radeon_ttm_bo_is_radeon_bo(bo)) 7361099013bSjsg return; 7377ccd5a2cSjsg 7381099013bSjsg rbo = container_of(bo, struct radeon_bo, tbo); 7391099013bSjsg radeon_bo_check_tiling(rbo, 0, 1); 7401099013bSjsg radeon_vm_bo_invalidate(rbo->rdev, rbo); 7411099013bSjsg } 7421099013bSjsg 7435ca02815Sjsg vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) 7441099013bSjsg { 7457f4dd379Sjsg struct ttm_operation_ctx ctx = { false, false }; 7461099013bSjsg struct radeon_device *rdev; 7471099013bSjsg struct radeon_bo *rbo; 7487ccd5a2cSjsg unsigned long offset, size, lpfn; 7497ccd5a2cSjsg int i, r; 7501099013bSjsg 7511099013bSjsg if (!radeon_ttm_bo_is_radeon_bo(bo)) 7521099013bSjsg return 0; 7531099013bSjsg rbo = container_of(bo, struct radeon_bo, tbo); 7541099013bSjsg radeon_bo_check_tiling(rbo, 0, 0); 7551099013bSjsg rdev = rbo->rdev; 7565ca02815Sjsg if (bo->resource->mem_type != TTM_PL_VRAM) 757966c8d80Sjsg return 0; 758966c8d80Sjsg 759f005ef32Sjsg size = bo->resource->size; 7605ca02815Sjsg offset = bo->resource->start << PAGE_SHIFT; 761966c8d80Sjsg if ((offset + size) <= rdev->mc.visible_vram_size) 762966c8d80Sjsg return 0; 763966c8d80Sjsg 7647f4dd379Sjsg /* Can't move a pinned BO to visible VRAM */ 7655ca02815Sjsg if (rbo->tbo.pin_count > 0) 7665ca02815Sjsg return VM_FAULT_SIGBUS; 7677f4dd379Sjsg 7681099013bSjsg /* hurrah the memory is not visible ! */ 7691099013bSjsg radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); 7707ccd5a2cSjsg lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; 7717ccd5a2cSjsg for (i = 0; i < rbo->placement.num_placement; i++) { 7727ccd5a2cSjsg /* Force into visible VRAM */ 773ad8b1aafSjsg if ((rbo->placements[i].mem_type == TTM_PL_VRAM) && 7747ccd5a2cSjsg (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn)) 7757ccd5a2cSjsg rbo->placements[i].lpfn = lpfn; 7767ccd5a2cSjsg } 7777f4dd379Sjsg r = ttm_bo_validate(bo, &rbo->placement, &ctx); 778966c8d80Sjsg if (unlikely(r == -ENOMEM)) { 779966c8d80Sjsg radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); 7805ca02815Sjsg r = ttm_bo_validate(bo, &rbo->placement, &ctx); 7815ca02815Sjsg } else if (likely(!r)) { 7825ca02815Sjsg offset = bo->resource->start << PAGE_SHIFT; 783966c8d80Sjsg /* this should never happen */ 7841099013bSjsg if ((offset + size) > rdev->mc.visible_vram_size) 7855ca02815Sjsg return VM_FAULT_SIGBUS; 7861099013bSjsg } 7871099013bSjsg 7885ca02815Sjsg if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) 7895ca02815Sjsg return VM_FAULT_NOPAGE; 7905ca02815Sjsg else if (unlikely(r)) 7915ca02815Sjsg return VM_FAULT_SIGBUS; 7921099013bSjsg 7935ca02815Sjsg ttm_bo_move_to_lru_tail_unlocked(bo); 7945ca02815Sjsg return 0; 7951099013bSjsg } 7961099013bSjsg 797f3eef2b6Sderaadt /** 7987ccd5a2cSjsg * radeon_bo_fence - add fence to buffer object 799f3eef2b6Sderaadt * 8007ccd5a2cSjsg * @bo: buffer object in question 8017ccd5a2cSjsg * @fence: fence to add 8027ccd5a2cSjsg * @shared: true if fence should be added shared 8037ccd5a2cSjsg * 804f3eef2b6Sderaadt */ 8057ccd5a2cSjsg void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, 8067ccd5a2cSjsg bool shared) 807f3eef2b6Sderaadt { 808c349dbc7Sjsg struct dma_resv *resv = bo->tbo.base.resv; 8091bb76ff1Sjsg int r; 810f3eef2b6Sderaadt 8111bb76ff1Sjsg r = dma_resv_reserve_fences(resv, 1); 8121bb76ff1Sjsg if (r) { 8131bb76ff1Sjsg /* As last resort on OOM we block for the fence */ 8141bb76ff1Sjsg dma_fence_wait(&fence->base, false); 8151bb76ff1Sjsg return; 8161bb76ff1Sjsg } 8171bb76ff1Sjsg 8181bb76ff1Sjsg dma_resv_add_fence(resv, &fence->base, shared ? 8191bb76ff1Sjsg DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); 8201099013bSjsg } 821