1 /* $NetBSD: amdgpu_gmc.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $ */ 2 3 /* 4 * Copyright 2018 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 * 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: amdgpu_gmc.c,v 1.3 2021/12/19 12:02:39 riastradh Exp $"); 31 32 #include <linux/io-64-nonatomic-lo-hi.h> 33 34 #include "amdgpu.h" 35 #include "amdgpu_ras.h" 36 #include "amdgpu_xgmi.h" 37 38 #include <linux/nbsd-namespace.h> 39 40 /** 41 * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO 42 * 43 * @bo: the BO to get the PDE for 44 * @level: the level in the PD hirarchy 45 * @addr: resulting addr 46 * @flags: resulting flags 47 * 48 * Get the address and flags to be used for a PDE (Page Directory Entry). 49 */ 50 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, 51 uint64_t *addr, uint64_t *flags) 52 { 53 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 54 struct ttm_dma_tt *ttm; 55 56 switch (bo->tbo.mem.mem_type) { 57 case TTM_PL_TT: 58 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); 59 #ifdef __NetBSD__ 60 *addr = ttm->dma_address->dm_segs[0].ds_addr; 61 #else 62 *addr = ttm->dma_address[0]; 63 #endif 64 break; 65 case TTM_PL_VRAM: 66 *addr = amdgpu_bo_gpu_offset(bo); 67 break; 68 default: 69 *addr = 0; 70 break; 71 } 72 *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem); 73 amdgpu_gmc_get_vm_pde(adev, level, addr, flags); 74 } 75 76 /** 77 * amdgpu_gmc_pd_addr - return the address of the root directory 78 * 79 */ 80 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) 81 { 82 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); 83 uint64_t pd_addr; 84 85 /* TODO: move that into ASIC specific code */ 86 if (adev->asic_type >= CHIP_VEGA10) { 87 uint64_t flags = AMDGPU_PTE_VALID; 88 89 amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags); 90 pd_addr |= flags; 91 } else { 92 pd_addr = amdgpu_bo_gpu_offset(bo); 93 } 94 return pd_addr; 95 } 96 97 /** 98 * amdgpu_gmc_set_pte_pde - update the page tables using CPU 99 * 100 * @adev: amdgpu_device pointer 101 * @cpu_pt_addr: cpu address of the page table 102 * @gpu_page_idx: entry in the page table to update 103 * @addr: dst addr to write into pte/pde 104 * @flags: access flags 105 * 106 * Update the page tables using CPU. 107 */ 108 int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, 109 uint32_t gpu_page_idx, uint64_t addr, 110 uint64_t flags) 111 { 112 #ifndef __NetBSD__ 113 void __iomem *ptr = (void *)cpu_pt_addr; 114 #endif 115 uint64_t value; 116 117 /* 118 * The following is for PTE only. GART does not have PDEs. 119 */ 120 value = addr & 0x0000FFFFFFFFF000ULL; 121 value |= flags; 122 #ifdef __NetBSD__ 123 /* Caller must issue appropriate bus_dmamap_sync before use. */ 124 ((uint64_t *)cpu_pt_addr)[gpu_page_idx] = value; 125 #else 126 writeq(value, ptr + (gpu_page_idx * 8)); 127 #endif 128 return 0; 129 } 130 131 /** 132 * amdgpu_gmc_agp_addr - return the address in the AGP address space 133 * 134 * @tbo: TTM BO which needs the address, must be in GTT domain 135 * 136 * Tries to figure out how to access the BO through the AGP aperture. Returns 137 * AMDGPU_BO_INVALID_OFFSET if that is not possible. 138 */ 139 uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) 140 { 141 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); 142 struct ttm_dma_tt *ttm; 143 resource_size_t addr; 144 145 if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached) 146 return AMDGPU_BO_INVALID_OFFSET; 147 148 ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm); 149 #ifdef __NetBSD__ 150 addr = ttm->dma_address->dm_segs[0].ds_addr; 151 #else 152 addr = ttm->dma_address[0]; 153 #endif 154 if (addr + PAGE_SIZE >= adev->gmc.agp_size) 155 return AMDGPU_BO_INVALID_OFFSET; 156 157 return adev->gmc.agp_start + addr; 158 } 159 160 /** 161 * amdgpu_gmc_vram_location - try to find VRAM location 162 * 163 * @adev: amdgpu device structure holding all necessary informations 164 * @mc: memory controller structure holding memory informations 165 * @base: base address at which to put VRAM 166 * 167 * Function will try to place VRAM at base address provided 168 * as parameter. 169 */ 170 void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, 171 u64 base) 172 { 173 uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; 174 175 mc->vram_start = base; 176 mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; 177 if (limit && limit < mc->real_vram_size) 178 mc->real_vram_size = limit; 179 180 if (mc->xgmi.num_physical_nodes == 0) { 181 mc->fb_start = mc->vram_start; 182 mc->fb_end = mc->vram_end; 183 } 184 dev_info(adev->dev, "VRAM: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64" (%"PRIu64"M used)\n", 185 mc->mc_vram_size >> 20, mc->vram_start, 186 mc->vram_end, mc->real_vram_size >> 20); 187 } 188 189 /** 190 * amdgpu_gmc_gart_location - try to find GART location 191 * 192 * @adev: amdgpu device structure holding all necessary informations 193 * @mc: memory controller structure holding memory informations 194 * 195 * Function will place try to place GART before or after VRAM. 196 * 197 * If GART size is bigger than space left then we ajust GART size. 198 * Thus function will never fails. 199 */ 200 void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) 201 { 202 const uint64_t four_gb = 0x100000000ULL; 203 u64 size_af, size_bf; 204 /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ 205 u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); 206 207 mc->gart_size += adev->pm.smu_prv_buffer_size; 208 209 /* VCE doesn't like it when BOs cross a 4GB segment, so align 210 * the GART base on a 4GB boundary as well. 211 */ 212 size_bf = mc->fb_start; 213 size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb); 214 215 if (mc->gart_size > max(size_bf, size_af)) { 216 dev_warn(adev->dev, "limiting GART\n"); 217 mc->gart_size = max(size_bf, size_af); 218 } 219 220 if ((size_bf >= mc->gart_size && size_bf < size_af) || 221 (size_af < mc->gart_size)) 222 mc->gart_start = 0; 223 else 224 mc->gart_start = max_mc_address - mc->gart_size + 1; 225 226 mc->gart_start &= ~(four_gb - 1); 227 mc->gart_end = mc->gart_start + mc->gart_size - 1; 228 dev_info(adev->dev, "GART: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64"\n", 229 mc->gart_size >> 20, mc->gart_start, mc->gart_end); 230 } 231 232 /** 233 * amdgpu_gmc_agp_location - try to find AGP location 234 * @adev: amdgpu device structure holding all necessary informations 235 * @mc: memory controller structure holding memory informations 236 * 237 * Function will place try to find a place for the AGP BAR in the MC address 238 * space. 239 * 240 * AGP BAR will be assigned the largest available hole in the address space. 241 * Should be called after VRAM and GART locations are setup. 242 */ 243 void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) 244 { 245 const uint64_t sixteen_gb = 1ULL << 34; 246 const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); 247 u64 size_af, size_bf; 248 249 if (amdgpu_sriov_vf(adev)) { 250 mc->agp_start = 0xffffffffffff; 251 mc->agp_end = 0x0; 252 mc->agp_size = 0; 253 254 return; 255 } 256 257 if (mc->fb_start > mc->gart_start) { 258 size_bf = (mc->fb_start & sixteen_gb_mask) - 259 ALIGN(mc->gart_end + 1, sixteen_gb); 260 size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb); 261 } else { 262 size_bf = mc->fb_start & sixteen_gb_mask; 263 size_af = (mc->gart_start & sixteen_gb_mask) - 264 ALIGN(mc->fb_end + 1, sixteen_gb); 265 } 266 267 if (size_bf > size_af) { 268 mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask; 269 mc->agp_size = size_bf; 270 } else { 271 mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb); 272 mc->agp_size = size_af; 273 } 274 275 mc->agp_end = mc->agp_start + mc->agp_size - 1; 276 dev_info(adev->dev, "AGP: %"PRIu64"M 0x%016"PRIX64" - 0x%016"PRIX64"\n", 277 mc->agp_size >> 20, mc->agp_start, mc->agp_end); 278 } 279 280 /** 281 * amdgpu_gmc_filter_faults - filter VM faults 282 * 283 * @adev: amdgpu device structure 284 * @addr: address of the VM fault 285 * @pasid: PASID of the process causing the fault 286 * @timestamp: timestamp of the fault 287 * 288 * Returns: 289 * True if the fault was filtered and should not be processed further. 290 * False if the fault is a new one and needs to be handled. 291 */ 292 bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, 293 uint16_t pasid, uint64_t timestamp) 294 { 295 struct amdgpu_gmc *gmc = &adev->gmc; 296 297 uint64_t stamp, key = addr << 4 | pasid; 298 struct amdgpu_gmc_fault *fault; 299 uint32_t hash; 300 301 /* If we don't have space left in the ring buffer return immediately */ 302 stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) - 303 AMDGPU_GMC_FAULT_TIMEOUT; 304 if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp) 305 return true; 306 307 /* Try to find the fault in the hash */ 308 hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); 309 fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; 310 while (fault->timestamp >= stamp) { 311 uint64_t tmp; 312 313 if (fault->key == key) 314 return true; 315 316 tmp = fault->timestamp; 317 fault = &gmc->fault_ring[fault->next]; 318 319 /* Check if the entry was reused */ 320 if (fault->timestamp >= tmp) 321 break; 322 } 323 324 /* Add the fault to the ring */ 325 fault = &gmc->fault_ring[gmc->last_fault]; 326 fault->key = key; 327 fault->timestamp = timestamp; 328 329 /* And update the hash */ 330 fault->next = gmc->fault_hash[hash].idx; 331 gmc->fault_hash[hash].idx = gmc->last_fault++; 332 return false; 333 } 334 335 int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) 336 { 337 int r; 338 339 if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { 340 r = adev->umc.funcs->ras_late_init(adev); 341 if (r) 342 return r; 343 } 344 345 if (adev->mmhub.funcs && adev->mmhub.funcs->ras_late_init) { 346 r = adev->mmhub.funcs->ras_late_init(adev); 347 if (r) 348 return r; 349 } 350 351 return amdgpu_xgmi_ras_late_init(adev); 352 } 353 354 void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) 355 { 356 amdgpu_umc_ras_fini(adev); 357 amdgpu_mmhub_ras_fini(adev); 358 amdgpu_xgmi_ras_fini(adev); 359 } 360 361 /* 362 * The latest engine allocation on gfx9/10 is: 363 * Engine 2, 3: firmware 364 * Engine 0, 1, 4~16: amdgpu ring, 365 * subject to change when ring number changes 366 * Engine 17: Gart flushes 367 */ 368 #define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 369 #define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 370 371 int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) 372 { 373 struct amdgpu_ring *ring; 374 unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = 375 {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, 376 GFXHUB_FREE_VM_INV_ENGS_BITMAP}; 377 unsigned i; 378 unsigned vmhub, inv_eng; 379 380 for (i = 0; i < adev->num_rings; ++i) { 381 ring = adev->rings[i]; 382 vmhub = ring->funcs->vmhub; 383 384 inv_eng = ffs(vm_inv_engs[vmhub]); 385 if (!inv_eng) { 386 dev_err(adev->dev, "no VM inv eng for ring %s\n", 387 ring->name); 388 return -EINVAL; 389 } 390 391 ring->vm_inv_eng = inv_eng - 1; 392 vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); 393 394 dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", 395 ring->name, ring->vm_inv_eng, ring->funcs->vmhub); 396 } 397 398 return 0; 399 } 400