1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/power_supply.h> 29 #include <linux/kthread.h> 30 #include <linux/module.h> 31 #include <linux/console.h> 32 #include <linux/slab.h> 33 #include <linux/iommu.h> 34 #include <linux/pci.h> 35 #include <linux/devcoredump.h> 36 #include <generated/utsrelease.h> 37 #include <linux/pci-p2pdma.h> 38 #include <linux/apple-gmux.h> 39 40 #include <drm/drm_aperture.h> 41 #include <drm/drm_atomic_helper.h> 42 #include <drm/drm_crtc_helper.h> 43 #include <drm/drm_fb_helper.h> 44 #include <drm/drm_probe_helper.h> 45 #include <drm/amdgpu_drm.h> 46 #include <linux/device.h> 47 #include <linux/vgaarb.h> 48 #include <linux/vga_switcheroo.h> 49 #include <linux/efi.h> 50 #include "amdgpu.h" 51 #include "amdgpu_trace.h" 52 #include "amdgpu_i2c.h" 53 #include "atom.h" 54 #include "amdgpu_atombios.h" 55 #include "amdgpu_atomfirmware.h" 56 #include "amd_pcie.h" 57 #ifdef CONFIG_DRM_AMDGPU_SI 58 #include "si.h" 59 #endif 60 #ifdef CONFIG_DRM_AMDGPU_CIK 61 #include "cik.h" 62 #endif 63 #include "vi.h" 64 #include "soc15.h" 65 #include "nv.h" 66 #include "bif/bif_4_1_d.h" 67 #include <linux/firmware.h> 68 #include "amdgpu_vf_error.h" 69 70 #include "amdgpu_amdkfd.h" 71 #include "amdgpu_pm.h" 72 73 #include "amdgpu_xgmi.h" 74 #include "amdgpu_ras.h" 75 #include "amdgpu_pmu.h" 76 #include "amdgpu_fru_eeprom.h" 77 #include "amdgpu_reset.h" 78 79 #include <linux/suspend.h> 80 #include <drm/task_barrier.h> 81 #include <linux/pm_runtime.h> 82 83 #include <drm/drm_drv.h> 84 85 #if IS_ENABLED(CONFIG_X86) && defined(__linux__) 86 #include <asm/intel-family.h> 87 #endif 88 89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 96 97 #define AMDGPU_RESUME_MS 2000 98 #define AMDGPU_MAX_RETRY_LIMIT 2 99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) 100 101 static const struct drm_driver amdgpu_kms_driver; 102 103 const char *amdgpu_asic_name[] = { 104 "TAHITI", 105 "PITCAIRN", 106 "VERDE", 107 "OLAND", 108 "HAINAN", 109 "BONAIRE", 110 "KAVERI", 111 "KABINI", 112 "HAWAII", 113 "MULLINS", 114 "TOPAZ", 115 "TONGA", 116 "FIJI", 117 "CARRIZO", 118 "STONEY", 119 "POLARIS10", 120 "POLARIS11", 121 "POLARIS12", 122 "VEGAM", 123 "VEGA10", 124 "VEGA12", 125 "VEGA20", 126 "RAVEN", 127 "ARCTURUS", 128 "RENOIR", 129 "ALDEBARAN", 130 "NAVI10", 131 "CYAN_SKILLFISH", 132 "NAVI14", 133 "NAVI12", 134 "SIENNA_CICHLID", 135 "NAVY_FLOUNDER", 136 "VANGOGH", 137 "DIMGREY_CAVEFISH", 138 "BEIGE_GOBY", 139 "YELLOW_CARP", 140 "IP DISCOVERY", 141 "LAST", 142 }; 143 144 /** 145 * DOC: pcie_replay_count 146 * 147 * The amdgpu driver provides a sysfs API for reporting the total number 148 * of PCIe replays (NAKs) 149 * The file pcie_replay_count is used for this and returns the total 150 * number of replays as a sum of the NAKs generated and NAKs received 151 */ 152 153 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 154 struct device_attribute *attr, char *buf) 155 { 156 struct drm_device *ddev = dev_get_drvdata(dev); 157 struct amdgpu_device *adev = drm_to_adev(ddev); 158 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 159 160 return sysfs_emit(buf, "%llu\n", cnt); 161 } 162 163 static DEVICE_ATTR(pcie_replay_count, 0444, 164 amdgpu_device_get_pcie_replay_count, NULL); 165 166 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 167 168 169 /** 170 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control 171 * 172 * @dev: drm_device pointer 173 * 174 * Returns true if the device is a dGPU with ATPX power control, 175 * otherwise return false. 176 */ 177 bool amdgpu_device_supports_px(struct drm_device *dev) 178 { 179 struct amdgpu_device *adev = drm_to_adev(dev); 180 181 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) 182 return true; 183 return false; 184 } 185 186 /** 187 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources 188 * 189 * @dev: drm_device pointer 190 * 191 * Returns true if the device is a dGPU with ACPI power control, 192 * otherwise return false. 193 */ 194 bool amdgpu_device_supports_boco(struct drm_device *dev) 195 { 196 struct amdgpu_device *adev = drm_to_adev(dev); 197 198 if (adev->has_pr3 || 199 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) 200 return true; 201 return false; 202 } 203 204 /** 205 * amdgpu_device_supports_baco - Does the device support BACO 206 * 207 * @dev: drm_device pointer 208 * 209 * Returns true if the device supporte BACO, 210 * otherwise return false. 211 */ 212 bool amdgpu_device_supports_baco(struct drm_device *dev) 213 { 214 struct amdgpu_device *adev = drm_to_adev(dev); 215 216 return amdgpu_asic_supports_baco(adev); 217 } 218 219 /** 220 * amdgpu_device_supports_smart_shift - Is the device dGPU with 221 * smart shift support 222 * 223 * @dev: drm_device pointer 224 * 225 * Returns true if the device is a dGPU with Smart Shift support, 226 * otherwise returns false. 227 */ 228 bool amdgpu_device_supports_smart_shift(struct drm_device *dev) 229 { 230 return (amdgpu_device_supports_boco(dev) && 231 amdgpu_acpi_is_power_shift_control_supported()); 232 } 233 234 /* 235 * VRAM access helper functions 236 */ 237 238 /** 239 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA 240 * 241 * @adev: amdgpu_device pointer 242 * @pos: offset of the buffer in vram 243 * @buf: virtual address of the buffer in system memory 244 * @size: read/write size, sizeof(@buf) must > @size 245 * @write: true - write to vram, otherwise - read from vram 246 */ 247 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, 248 void *buf, size_t size, bool write) 249 { 250 unsigned long flags; 251 uint32_t hi = ~0, tmp = 0; 252 uint32_t *data = buf; 253 uint64_t last; 254 int idx; 255 256 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 257 return; 258 259 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); 260 261 spin_lock_irqsave(&adev->mmio_idx_lock, flags); 262 for (last = pos + size; pos < last; pos += 4) { 263 tmp = pos >> 31; 264 265 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 266 if (tmp != hi) { 267 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 268 hi = tmp; 269 } 270 if (write) 271 WREG32_NO_KIQ(mmMM_DATA, *data++); 272 else 273 *data++ = RREG32_NO_KIQ(mmMM_DATA); 274 } 275 276 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 277 drm_dev_exit(idx); 278 } 279 280 /** 281 * amdgpu_device_aper_access - access vram by vram aperature 282 * 283 * @adev: amdgpu_device pointer 284 * @pos: offset of the buffer in vram 285 * @buf: virtual address of the buffer in system memory 286 * @size: read/write size, sizeof(@buf) must > @size 287 * @write: true - write to vram, otherwise - read from vram 288 * 289 * The return value means how many bytes have been transferred. 290 */ 291 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, 292 void *buf, size_t size, bool write) 293 { 294 #ifdef CONFIG_64BIT 295 void __iomem *addr; 296 size_t count = 0; 297 uint64_t last; 298 299 if (!adev->mman.aper_base_kaddr) 300 return 0; 301 302 last = min(pos + size, adev->gmc.visible_vram_size); 303 if (last > pos) { 304 addr = adev->mman.aper_base_kaddr + pos; 305 count = last - pos; 306 307 if (write) { 308 memcpy_toio(addr, buf, count); 309 /* Make sure HDP write cache flush happens without any reordering 310 * after the system memory contents are sent over PCIe device 311 */ 312 mb(); 313 amdgpu_device_flush_hdp(adev, NULL); 314 } else { 315 amdgpu_device_invalidate_hdp(adev, NULL); 316 /* Make sure HDP read cache is invalidated before issuing a read 317 * to the PCIe device 318 */ 319 mb(); 320 memcpy_fromio(buf, addr, count); 321 } 322 323 } 324 325 return count; 326 #else 327 return 0; 328 #endif 329 } 330 331 /** 332 * amdgpu_device_vram_access - read/write a buffer in vram 333 * 334 * @adev: amdgpu_device pointer 335 * @pos: offset of the buffer in vram 336 * @buf: virtual address of the buffer in system memory 337 * @size: read/write size, sizeof(@buf) must > @size 338 * @write: true - write to vram, otherwise - read from vram 339 */ 340 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 341 void *buf, size_t size, bool write) 342 { 343 size_t count; 344 345 /* try to using vram apreature to access vram first */ 346 count = amdgpu_device_aper_access(adev, pos, buf, size, write); 347 size -= count; 348 if (size) { 349 /* using MM to access rest vram */ 350 pos += count; 351 buf += count; 352 amdgpu_device_mm_access(adev, pos, buf, size, write); 353 } 354 } 355 356 /* 357 * register access helper functions. 358 */ 359 360 /* Check if hw access should be skipped because of hotplug or device error */ 361 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) 362 { 363 if (adev->no_hw_access) 364 return true; 365 366 #ifdef CONFIG_LOCKDEP 367 /* 368 * This is a bit complicated to understand, so worth a comment. What we assert 369 * here is that the GPU reset is not running on another thread in parallel. 370 * 371 * For this we trylock the read side of the reset semaphore, if that succeeds 372 * we know that the reset is not running in paralell. 373 * 374 * If the trylock fails we assert that we are either already holding the read 375 * side of the lock or are the reset thread itself and hold the write side of 376 * the lock. 377 */ 378 if (in_task()) { 379 if (down_read_trylock(&adev->reset_domain->sem)) 380 up_read(&adev->reset_domain->sem); 381 else 382 lockdep_assert_held(&adev->reset_domain->sem); 383 } 384 #endif 385 return false; 386 } 387 388 /** 389 * amdgpu_device_rreg - read a memory mapped IO or indirect register 390 * 391 * @adev: amdgpu_device pointer 392 * @reg: dword aligned register offset 393 * @acc_flags: access flags which require special behavior 394 * 395 * Returns the 32 bit value from the offset specified. 396 */ 397 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, 398 uint32_t reg, uint32_t acc_flags) 399 { 400 uint32_t ret; 401 402 if (amdgpu_device_skip_hw_access(adev)) 403 return 0; 404 405 if ((reg * 4) < adev->rmmio_size) { 406 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 407 amdgpu_sriov_runtime(adev) && 408 down_read_trylock(&adev->reset_domain->sem)) { 409 ret = amdgpu_kiq_rreg(adev, reg); 410 up_read(&adev->reset_domain->sem); 411 } else { 412 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 413 } 414 } else { 415 ret = adev->pcie_rreg(adev, reg * 4); 416 } 417 418 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 419 420 return ret; 421 } 422 423 /* 424 * MMIO register read with bytes helper functions 425 * @offset:bytes offset from MMIO start 426 */ 427 428 /** 429 * amdgpu_mm_rreg8 - read a memory mapped IO register 430 * 431 * @adev: amdgpu_device pointer 432 * @offset: byte aligned register offset 433 * 434 * Returns the 8 bit value from the offset specified. 435 */ 436 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) 437 { 438 if (amdgpu_device_skip_hw_access(adev)) 439 return 0; 440 441 if (offset < adev->rmmio_size) 442 return (readb(adev->rmmio + offset)); 443 BUG(); 444 } 445 446 /* 447 * MMIO register write with bytes helper functions 448 * @offset:bytes offset from MMIO start 449 * @value: the value want to be written to the register 450 */ 451 452 /** 453 * amdgpu_mm_wreg8 - read a memory mapped IO register 454 * 455 * @adev: amdgpu_device pointer 456 * @offset: byte aligned register offset 457 * @value: 8 bit value to write 458 * 459 * Writes the value specified to the offset specified. 460 */ 461 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) 462 { 463 if (amdgpu_device_skip_hw_access(adev)) 464 return; 465 466 if (offset < adev->rmmio_size) 467 writeb(value, adev->rmmio + offset); 468 else 469 BUG(); 470 } 471 472 /** 473 * amdgpu_device_wreg - write to a memory mapped IO or indirect register 474 * 475 * @adev: amdgpu_device pointer 476 * @reg: dword aligned register offset 477 * @v: 32 bit value to write to the register 478 * @acc_flags: access flags which require special behavior 479 * 480 * Writes the value specified to the offset specified. 481 */ 482 void amdgpu_device_wreg(struct amdgpu_device *adev, 483 uint32_t reg, uint32_t v, 484 uint32_t acc_flags) 485 { 486 if (amdgpu_device_skip_hw_access(adev)) 487 return; 488 489 if ((reg * 4) < adev->rmmio_size) { 490 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 491 amdgpu_sriov_runtime(adev) && 492 down_read_trylock(&adev->reset_domain->sem)) { 493 amdgpu_kiq_wreg(adev, reg, v); 494 up_read(&adev->reset_domain->sem); 495 } else { 496 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 497 } 498 } else { 499 adev->pcie_wreg(adev, reg * 4, v); 500 } 501 502 trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 503 } 504 505 /** 506 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range 507 * 508 * @adev: amdgpu_device pointer 509 * @reg: mmio/rlc register 510 * @v: value to write 511 * 512 * this function is invoked only for the debugfs register access 513 */ 514 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, 515 uint32_t reg, uint32_t v, 516 uint32_t xcc_id) 517 { 518 if (amdgpu_device_skip_hw_access(adev)) 519 return; 520 521 if (amdgpu_sriov_fullaccess(adev) && 522 adev->gfx.rlc.funcs && 523 adev->gfx.rlc.funcs->is_rlcg_access_range) { 524 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 525 return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id); 526 } else if ((reg * 4) >= adev->rmmio_size) { 527 adev->pcie_wreg(adev, reg * 4, v); 528 } else { 529 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 530 } 531 } 532 533 /** 534 * amdgpu_device_indirect_rreg - read an indirect register 535 * 536 * @adev: amdgpu_device pointer 537 * @reg_addr: indirect register address to read from 538 * 539 * Returns the value of indirect register @reg_addr 540 */ 541 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, 542 u32 reg_addr) 543 { 544 unsigned long flags, pcie_index, pcie_data; 545 void __iomem *pcie_index_offset; 546 void __iomem *pcie_data_offset; 547 u32 r; 548 549 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 550 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 551 552 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 553 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 554 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 555 556 writel(reg_addr, pcie_index_offset); 557 readl(pcie_index_offset); 558 r = readl(pcie_data_offset); 559 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 560 561 return r; 562 } 563 564 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, 565 u64 reg_addr) 566 { 567 unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 568 u32 r; 569 void __iomem *pcie_index_offset; 570 void __iomem *pcie_index_hi_offset; 571 void __iomem *pcie_data_offset; 572 573 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 574 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 575 if (adev->nbio.funcs->get_pcie_index_hi_offset) 576 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 577 else 578 pcie_index_hi = 0; 579 580 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 581 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 582 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 583 if (pcie_index_hi != 0) 584 pcie_index_hi_offset = (void __iomem *)adev->rmmio + 585 pcie_index_hi * 4; 586 587 writel(reg_addr, pcie_index_offset); 588 readl(pcie_index_offset); 589 if (pcie_index_hi != 0) { 590 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 591 readl(pcie_index_hi_offset); 592 } 593 r = readl(pcie_data_offset); 594 595 /* clear the high bits */ 596 if (pcie_index_hi != 0) { 597 writel(0, pcie_index_hi_offset); 598 readl(pcie_index_hi_offset); 599 } 600 601 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 602 603 return r; 604 } 605 606 /** 607 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register 608 * 609 * @adev: amdgpu_device pointer 610 * @reg_addr: indirect register address to read from 611 * 612 * Returns the value of indirect register @reg_addr 613 */ 614 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, 615 u32 reg_addr) 616 { 617 unsigned long flags, pcie_index, pcie_data; 618 void __iomem *pcie_index_offset; 619 void __iomem *pcie_data_offset; 620 u64 r; 621 622 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 623 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 624 625 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 626 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 627 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 628 629 /* read low 32 bits */ 630 writel(reg_addr, pcie_index_offset); 631 readl(pcie_index_offset); 632 r = readl(pcie_data_offset); 633 /* read high 32 bits */ 634 writel(reg_addr + 4, pcie_index_offset); 635 readl(pcie_index_offset); 636 r |= ((u64)readl(pcie_data_offset) << 32); 637 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 638 639 return r; 640 } 641 642 /** 643 * amdgpu_device_indirect_wreg - write an indirect register address 644 * 645 * @adev: amdgpu_device pointer 646 * @reg_addr: indirect register offset 647 * @reg_data: indirect register data 648 * 649 */ 650 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, 651 u32 reg_addr, u32 reg_data) 652 { 653 unsigned long flags, pcie_index, pcie_data; 654 void __iomem *pcie_index_offset; 655 void __iomem *pcie_data_offset; 656 657 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 658 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 659 660 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 661 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 662 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 663 664 writel(reg_addr, pcie_index_offset); 665 readl(pcie_index_offset); 666 writel(reg_data, pcie_data_offset); 667 readl(pcie_data_offset); 668 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 669 } 670 671 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, 672 u64 reg_addr, u32 reg_data) 673 { 674 unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 675 void __iomem *pcie_index_offset; 676 void __iomem *pcie_index_hi_offset; 677 void __iomem *pcie_data_offset; 678 679 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 680 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 681 if (adev->nbio.funcs->get_pcie_index_hi_offset) 682 pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 683 else 684 pcie_index_hi = 0; 685 686 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 687 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 688 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 689 if (pcie_index_hi != 0) 690 pcie_index_hi_offset = (void __iomem *)adev->rmmio + 691 pcie_index_hi * 4; 692 693 writel(reg_addr, pcie_index_offset); 694 readl(pcie_index_offset); 695 if (pcie_index_hi != 0) { 696 writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 697 readl(pcie_index_hi_offset); 698 } 699 writel(reg_data, pcie_data_offset); 700 readl(pcie_data_offset); 701 702 /* clear the high bits */ 703 if (pcie_index_hi != 0) { 704 writel(0, pcie_index_hi_offset); 705 readl(pcie_index_hi_offset); 706 } 707 708 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 709 } 710 711 /** 712 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address 713 * 714 * @adev: amdgpu_device pointer 715 * @reg_addr: indirect register offset 716 * @reg_data: indirect register data 717 * 718 */ 719 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, 720 u32 reg_addr, u64 reg_data) 721 { 722 unsigned long flags, pcie_index, pcie_data; 723 void __iomem *pcie_index_offset; 724 void __iomem *pcie_data_offset; 725 726 pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 727 pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 728 729 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 730 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 731 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 732 733 /* write low 32 bits */ 734 writel(reg_addr, pcie_index_offset); 735 readl(pcie_index_offset); 736 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); 737 readl(pcie_data_offset); 738 /* write high 32 bits */ 739 writel(reg_addr + 4, pcie_index_offset); 740 readl(pcie_index_offset); 741 writel((u32)(reg_data >> 32), pcie_data_offset); 742 readl(pcie_data_offset); 743 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 744 } 745 746 /** 747 * amdgpu_device_get_rev_id - query device rev_id 748 * 749 * @adev: amdgpu_device pointer 750 * 751 * Return device rev_id 752 */ 753 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) 754 { 755 return adev->nbio.funcs->get_rev_id(adev); 756 } 757 758 /** 759 * amdgpu_invalid_rreg - dummy reg read function 760 * 761 * @adev: amdgpu_device pointer 762 * @reg: offset of register 763 * 764 * Dummy register read function. Used for register blocks 765 * that certain asics don't have (all asics). 766 * Returns the value in the register. 767 */ 768 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 769 { 770 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 771 BUG(); 772 return 0; 773 } 774 775 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) 776 { 777 DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); 778 BUG(); 779 return 0; 780 } 781 782 /** 783 * amdgpu_invalid_wreg - dummy reg write function 784 * 785 * @adev: amdgpu_device pointer 786 * @reg: offset of register 787 * @v: value to write to the register 788 * 789 * Dummy register read function. Used for register blocks 790 * that certain asics don't have (all asics). 791 */ 792 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 793 { 794 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 795 reg, v); 796 BUG(); 797 } 798 799 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) 800 { 801 DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", 802 reg, v); 803 BUG(); 804 } 805 806 /** 807 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 808 * 809 * @adev: amdgpu_device pointer 810 * @reg: offset of register 811 * 812 * Dummy register read function. Used for register blocks 813 * that certain asics don't have (all asics). 814 * Returns the value in the register. 815 */ 816 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 817 { 818 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 819 BUG(); 820 return 0; 821 } 822 823 /** 824 * amdgpu_invalid_wreg64 - dummy reg write function 825 * 826 * @adev: amdgpu_device pointer 827 * @reg: offset of register 828 * @v: value to write to the register 829 * 830 * Dummy register read function. Used for register blocks 831 * that certain asics don't have (all asics). 832 */ 833 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 834 { 835 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 836 reg, v); 837 BUG(); 838 } 839 840 /** 841 * amdgpu_block_invalid_rreg - dummy reg read function 842 * 843 * @adev: amdgpu_device pointer 844 * @block: offset of instance 845 * @reg: offset of register 846 * 847 * Dummy register read function. Used for register blocks 848 * that certain asics don't have (all asics). 849 * Returns the value in the register. 850 */ 851 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 852 uint32_t block, uint32_t reg) 853 { 854 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 855 reg, block); 856 BUG(); 857 return 0; 858 } 859 860 /** 861 * amdgpu_block_invalid_wreg - dummy reg write function 862 * 863 * @adev: amdgpu_device pointer 864 * @block: offset of instance 865 * @reg: offset of register 866 * @v: value to write to the register 867 * 868 * Dummy register read function. Used for register blocks 869 * that certain asics don't have (all asics). 870 */ 871 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 872 uint32_t block, 873 uint32_t reg, uint32_t v) 874 { 875 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 876 reg, block, v); 877 BUG(); 878 } 879 880 /** 881 * amdgpu_device_asic_init - Wrapper for atom asic_init 882 * 883 * @adev: amdgpu_device pointer 884 * 885 * Does any asic specific work and then calls atom asic init. 886 */ 887 static int amdgpu_device_asic_init(struct amdgpu_device *adev) 888 { 889 int ret; 890 891 amdgpu_asic_pre_asic_init(adev); 892 893 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || 894 adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { 895 amdgpu_psp_wait_for_bootloader(adev); 896 ret = amdgpu_atomfirmware_asic_init(adev, true); 897 return ret; 898 } else { 899 return amdgpu_atom_asic_init(adev->mode_info.atom_context); 900 } 901 902 return 0; 903 } 904 905 /** 906 * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page 907 * 908 * @adev: amdgpu_device pointer 909 * 910 * Allocates a scratch page of VRAM for use by various things in the 911 * driver. 912 */ 913 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev) 914 { 915 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, 916 AMDGPU_GEM_DOMAIN_VRAM | 917 AMDGPU_GEM_DOMAIN_GTT, 918 &adev->mem_scratch.robj, 919 &adev->mem_scratch.gpu_addr, 920 (void **)&adev->mem_scratch.ptr); 921 } 922 923 /** 924 * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page 925 * 926 * @adev: amdgpu_device pointer 927 * 928 * Frees the VRAM scratch page. 929 */ 930 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) 931 { 932 amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL); 933 } 934 935 /** 936 * amdgpu_device_program_register_sequence - program an array of registers. 937 * 938 * @adev: amdgpu_device pointer 939 * @registers: pointer to the register array 940 * @array_size: size of the register array 941 * 942 * Programs an array or registers with and or masks. 943 * This is a helper for setting golden registers. 944 */ 945 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 946 const u32 *registers, 947 const u32 array_size) 948 { 949 u32 tmp, reg, and_mask, or_mask; 950 int i; 951 952 if (array_size % 3) 953 return; 954 955 for (i = 0; i < array_size; i += 3) { 956 reg = registers[i + 0]; 957 and_mask = registers[i + 1]; 958 or_mask = registers[i + 2]; 959 960 if (and_mask == 0xffffffff) { 961 tmp = or_mask; 962 } else { 963 tmp = RREG32(reg); 964 tmp &= ~and_mask; 965 if (adev->family >= AMDGPU_FAMILY_AI) 966 tmp |= (or_mask & and_mask); 967 else 968 tmp |= or_mask; 969 } 970 WREG32(reg, tmp); 971 } 972 } 973 974 /** 975 * amdgpu_device_pci_config_reset - reset the GPU 976 * 977 * @adev: amdgpu_device pointer 978 * 979 * Resets the GPU using the pci config reset sequence. 980 * Only applicable to asics prior to vega10. 981 */ 982 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 983 { 984 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 985 } 986 987 /** 988 * amdgpu_device_pci_reset - reset the GPU using generic PCI means 989 * 990 * @adev: amdgpu_device pointer 991 * 992 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). 993 */ 994 int amdgpu_device_pci_reset(struct amdgpu_device *adev) 995 { 996 STUB(); 997 return -ENOSYS; 998 #ifdef notyet 999 return pci_reset_function(adev->pdev); 1000 #endif 1001 } 1002 1003 /* 1004 * amdgpu_device_wb_*() 1005 * Writeback is the method by which the GPU updates special pages in memory 1006 * with the status of certain GPU events (fences, ring pointers,etc.). 1007 */ 1008 1009 /** 1010 * amdgpu_device_wb_fini - Disable Writeback and free memory 1011 * 1012 * @adev: amdgpu_device pointer 1013 * 1014 * Disables Writeback and frees the Writeback memory (all asics). 1015 * Used at driver shutdown. 1016 */ 1017 static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 1018 { 1019 if (adev->wb.wb_obj) { 1020 amdgpu_bo_free_kernel(&adev->wb.wb_obj, 1021 &adev->wb.gpu_addr, 1022 (void **)&adev->wb.wb); 1023 adev->wb.wb_obj = NULL; 1024 } 1025 } 1026 1027 /** 1028 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory 1029 * 1030 * @adev: amdgpu_device pointer 1031 * 1032 * Initializes writeback and allocates writeback memory (all asics). 1033 * Used at driver startup. 1034 * Returns 0 on success or an -error on failure. 1035 */ 1036 static int amdgpu_device_wb_init(struct amdgpu_device *adev) 1037 { 1038 int r; 1039 1040 if (adev->wb.wb_obj == NULL) { 1041 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 1042 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 1043 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1044 &adev->wb.wb_obj, &adev->wb.gpu_addr, 1045 (void **)&adev->wb.wb); 1046 if (r) { 1047 dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 1048 return r; 1049 } 1050 1051 adev->wb.num_wb = AMDGPU_MAX_WB; 1052 memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 1053 1054 /* clear wb memory */ 1055 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 1056 } 1057 1058 return 0; 1059 } 1060 1061 /** 1062 * amdgpu_device_wb_get - Allocate a wb entry 1063 * 1064 * @adev: amdgpu_device pointer 1065 * @wb: wb index 1066 * 1067 * Allocate a wb slot for use by the driver (all asics). 1068 * Returns 0 on success or -EINVAL on failure. 1069 */ 1070 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 1071 { 1072 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 1073 1074 if (offset < adev->wb.num_wb) { 1075 __set_bit(offset, adev->wb.used); 1076 *wb = offset << 3; /* convert to dw offset */ 1077 return 0; 1078 } else { 1079 return -EINVAL; 1080 } 1081 } 1082 1083 /** 1084 * amdgpu_device_wb_free - Free a wb entry 1085 * 1086 * @adev: amdgpu_device pointer 1087 * @wb: wb index 1088 * 1089 * Free a wb slot allocated for use by the driver (all asics) 1090 */ 1091 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 1092 { 1093 wb >>= 3; 1094 if (wb < adev->wb.num_wb) 1095 __clear_bit(wb, adev->wb.used); 1096 } 1097 1098 /** 1099 * amdgpu_device_resize_fb_bar - try to resize FB BAR 1100 * 1101 * @adev: amdgpu_device pointer 1102 * 1103 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 1104 * to fail, but if any of the BARs is not accessible after the size we abort 1105 * driver loading by returning -ENODEV. 1106 */ 1107 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 1108 { 1109 #ifdef __linux__ 1110 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size); 1111 struct pci_bus *root; 1112 struct resource *res; 1113 unsigned int i; 1114 u16 cmd; 1115 int r; 1116 1117 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) 1118 return 0; 1119 1120 /* Bypass for VF */ 1121 if (amdgpu_sriov_vf(adev)) 1122 return 0; 1123 1124 /* skip if the bios has already enabled large BAR */ 1125 if (adev->gmc.real_vram_size && 1126 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) 1127 return 0; 1128 1129 /* Check if the root BUS has 64bit memory resources */ 1130 root = adev->pdev->bus; 1131 while (root->parent) 1132 root = root->parent; 1133 1134 pci_bus_for_each_resource(root, res, i) { 1135 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 1136 res->start > 0x100000000ull) 1137 break; 1138 } 1139 1140 /* Trying to resize is pointless without a root hub window above 4GB */ 1141 if (!res) 1142 return 0; 1143 1144 /* Limit the BAR size to what is available */ 1145 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1, 1146 rbar_size); 1147 1148 /* Disable memory decoding while we change the BAR addresses and size */ 1149 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 1150 pci_write_config_word(adev->pdev, PCI_COMMAND, 1151 cmd & ~PCI_COMMAND_MEMORY); 1152 1153 /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 1154 amdgpu_doorbell_fini(adev); 1155 if (adev->asic_type >= CHIP_BONAIRE) 1156 pci_release_resource(adev->pdev, 2); 1157 1158 pci_release_resource(adev->pdev, 0); 1159 1160 r = pci_resize_resource(adev->pdev, 0, rbar_size); 1161 if (r == -ENOSPC) 1162 DRM_INFO("Not enough PCI address space for a large BAR."); 1163 else if (r && r != -ENOTSUPP) 1164 DRM_ERROR("Problem resizing BAR0 (%d).", r); 1165 1166 pci_assign_unassigned_bus_resources(adev->pdev->bus); 1167 1168 /* When the doorbell or fb BAR isn't available we have no chance of 1169 * using the device. 1170 */ 1171 r = amdgpu_doorbell_init(adev); 1172 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 1173 return -ENODEV; 1174 1175 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 1176 #endif /* __linux__ */ 1177 1178 return 0; 1179 } 1180 1181 static bool amdgpu_device_read_bios(struct amdgpu_device *adev) 1182 { 1183 if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) 1184 return false; 1185 1186 return true; 1187 } 1188 1189 /* 1190 * GPU helpers function. 1191 */ 1192 /** 1193 * amdgpu_device_need_post - check if the hw need post or not 1194 * 1195 * @adev: amdgpu_device pointer 1196 * 1197 * Check if the asic has been initialized (all asics) at driver startup 1198 * or post is needed if hw reset is performed. 1199 * Returns true if need or false if not. 1200 */ 1201 bool amdgpu_device_need_post(struct amdgpu_device *adev) 1202 { 1203 uint32_t reg; 1204 1205 if (amdgpu_sriov_vf(adev)) 1206 return false; 1207 1208 if (!amdgpu_device_read_bios(adev)) 1209 return false; 1210 1211 if (amdgpu_passthrough(adev)) { 1212 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 1213 * some old smc fw still need driver do vPost otherwise gpu hang, while 1214 * those smc fw version above 22.15 doesn't have this flaw, so we force 1215 * vpost executed for smc version below 22.15 1216 */ 1217 if (adev->asic_type == CHIP_FIJI) { 1218 int err; 1219 uint32_t fw_ver; 1220 1221 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 1222 /* force vPost if error occured */ 1223 if (err) 1224 return true; 1225 1226 fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 1227 release_firmware(adev->pm.fw); 1228 if (fw_ver < 0x00160e00) 1229 return true; 1230 } 1231 } 1232 1233 /* Don't post if we need to reset whole hive on init */ 1234 if (adev->gmc.xgmi.pending_reset) 1235 return false; 1236 1237 if (adev->has_hw_reset) { 1238 adev->has_hw_reset = false; 1239 return true; 1240 } 1241 1242 /* bios scratch used on CIK+ */ 1243 if (adev->asic_type >= CHIP_BONAIRE) 1244 return amdgpu_atombios_scratch_need_asic_init(adev); 1245 1246 /* check MEM_SIZE for older asics */ 1247 reg = amdgpu_asic_get_config_memsize(adev); 1248 1249 if ((reg != 0) && (reg != 0xffffffff)) 1250 return false; 1251 1252 return true; 1253 } 1254 1255 /* 1256 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic 1257 * speed switching. Until we have confirmation from Intel that a specific host 1258 * supports it, it's safer that we keep it disabled for all. 1259 * 1260 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ 1261 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 1262 */ 1263 bool amdgpu_device_pcie_dynamic_switching_supported(void) 1264 { 1265 #if IS_ENABLED(CONFIG_X86) 1266 #ifdef __linux__ 1267 struct cpuinfo_x86 *c = &cpu_data(0); 1268 1269 if (c->x86_vendor == X86_VENDOR_INTEL) 1270 #else 1271 if (strcmp(cpu_vendor, "GenuineIntel") == 0) 1272 #endif 1273 return false; 1274 #endif 1275 return true; 1276 } 1277 1278 /** 1279 * amdgpu_device_should_use_aspm - check if the device should program ASPM 1280 * 1281 * @adev: amdgpu_device pointer 1282 * 1283 * Confirm whether the module parameter and pcie bridge agree that ASPM should 1284 * be set for this device. 1285 * 1286 * Returns true if it should be used or false if not. 1287 */ 1288 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) 1289 { 1290 switch (amdgpu_aspm) { 1291 case -1: 1292 break; 1293 case 0: 1294 return false; 1295 case 1: 1296 return true; 1297 default: 1298 return false; 1299 } 1300 return pcie_aspm_enabled(adev->pdev); 1301 } 1302 1303 bool amdgpu_device_aspm_support_quirk(void) 1304 { 1305 #if IS_ENABLED(CONFIG_X86) 1306 struct cpu_info *ci = curcpu(); 1307 1308 return !(ci->ci_family == 6 && ci->ci_model == 0x97); 1309 #else 1310 return true; 1311 #endif 1312 } 1313 1314 /* if we get transitioned to only one device, take VGA back */ 1315 /** 1316 * amdgpu_device_vga_set_decode - enable/disable vga decode 1317 * 1318 * @pdev: PCI device pointer 1319 * @state: enable/disable vga decode 1320 * 1321 * Enable/disable vga decode (all asics). 1322 * Returns VGA resource flags. 1323 */ 1324 #ifdef notyet 1325 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, 1326 bool state) 1327 { 1328 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); 1329 1330 amdgpu_asic_set_vga_state(adev, state); 1331 if (state) 1332 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1333 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1334 else 1335 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1336 } 1337 #endif 1338 1339 /** 1340 * amdgpu_device_check_block_size - validate the vm block size 1341 * 1342 * @adev: amdgpu_device pointer 1343 * 1344 * Validates the vm block size specified via module parameter. 1345 * The vm block size defines number of bits in page table versus page directory, 1346 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1347 * page table and the remaining bits are in the page directory. 1348 */ 1349 static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1350 { 1351 /* defines number of bits in page table versus page directory, 1352 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1353 * page table and the remaining bits are in the page directory 1354 */ 1355 if (amdgpu_vm_block_size == -1) 1356 return; 1357 1358 if (amdgpu_vm_block_size < 9) { 1359 dev_warn(adev->dev, "VM page table size (%d) too small\n", 1360 amdgpu_vm_block_size); 1361 amdgpu_vm_block_size = -1; 1362 } 1363 } 1364 1365 /** 1366 * amdgpu_device_check_vm_size - validate the vm size 1367 * 1368 * @adev: amdgpu_device pointer 1369 * 1370 * Validates the vm size in GB specified via module parameter. 1371 * The VM size is the size of the GPU virtual memory space in GB. 1372 */ 1373 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1374 { 1375 /* no need to check the default value */ 1376 if (amdgpu_vm_size == -1) 1377 return; 1378 1379 if (amdgpu_vm_size < 1) { 1380 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1381 amdgpu_vm_size); 1382 amdgpu_vm_size = -1; 1383 } 1384 } 1385 1386 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1387 { 1388 #ifdef __linux__ 1389 struct sysinfo si; 1390 #endif 1391 bool is_os_64 = (sizeof(void *) == 8); 1392 uint64_t total_memory; 1393 uint64_t dram_size_seven_GB = 0x1B8000000; 1394 uint64_t dram_size_three_GB = 0xB8000000; 1395 1396 if (amdgpu_smu_memory_pool_size == 0) 1397 return; 1398 1399 if (!is_os_64) { 1400 DRM_WARN("Not 64-bit OS, feature not supported\n"); 1401 goto def_value; 1402 } 1403 #ifdef __linux__ 1404 si_meminfo(&si); 1405 total_memory = (uint64_t)si.totalram * si.mem_unit; 1406 #else 1407 total_memory = ptoa(physmem); 1408 #endif 1409 1410 if ((amdgpu_smu_memory_pool_size == 1) || 1411 (amdgpu_smu_memory_pool_size == 2)) { 1412 if (total_memory < dram_size_three_GB) 1413 goto def_value1; 1414 } else if ((amdgpu_smu_memory_pool_size == 4) || 1415 (amdgpu_smu_memory_pool_size == 8)) { 1416 if (total_memory < dram_size_seven_GB) 1417 goto def_value1; 1418 } else { 1419 DRM_WARN("Smu memory pool size not supported\n"); 1420 goto def_value; 1421 } 1422 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1423 1424 return; 1425 1426 def_value1: 1427 DRM_WARN("No enough system memory\n"); 1428 def_value: 1429 adev->pm.smu_prv_buffer_size = 0; 1430 } 1431 1432 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) 1433 { 1434 if (!(adev->flags & AMD_IS_APU) || 1435 adev->asic_type < CHIP_RAVEN) 1436 return 0; 1437 1438 switch (adev->asic_type) { 1439 case CHIP_RAVEN: 1440 if (adev->pdev->device == 0x15dd) 1441 adev->apu_flags |= AMD_APU_IS_RAVEN; 1442 if (adev->pdev->device == 0x15d8) 1443 adev->apu_flags |= AMD_APU_IS_PICASSO; 1444 break; 1445 case CHIP_RENOIR: 1446 if ((adev->pdev->device == 0x1636) || 1447 (adev->pdev->device == 0x164c)) 1448 adev->apu_flags |= AMD_APU_IS_RENOIR; 1449 else 1450 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; 1451 break; 1452 case CHIP_VANGOGH: 1453 adev->apu_flags |= AMD_APU_IS_VANGOGH; 1454 break; 1455 case CHIP_YELLOW_CARP: 1456 break; 1457 case CHIP_CYAN_SKILLFISH: 1458 if ((adev->pdev->device == 0x13FE) || 1459 (adev->pdev->device == 0x143F)) 1460 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; 1461 break; 1462 default: 1463 break; 1464 } 1465 1466 return 0; 1467 } 1468 1469 /** 1470 * amdgpu_device_check_arguments - validate module params 1471 * 1472 * @adev: amdgpu_device pointer 1473 * 1474 * Validates certain module parameters and updates 1475 * the associated values used by the driver (all asics). 1476 */ 1477 static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1478 { 1479 if (amdgpu_sched_jobs < 4) { 1480 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1481 amdgpu_sched_jobs); 1482 amdgpu_sched_jobs = 4; 1483 } else if (!is_power_of_2(amdgpu_sched_jobs)) { 1484 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1485 amdgpu_sched_jobs); 1486 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1487 } 1488 1489 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1490 /* gart size must be greater or equal to 32M */ 1491 dev_warn(adev->dev, "gart size (%d) too small\n", 1492 amdgpu_gart_size); 1493 amdgpu_gart_size = -1; 1494 } 1495 1496 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1497 /* gtt size must be greater or equal to 32M */ 1498 dev_warn(adev->dev, "gtt size (%d) too small\n", 1499 amdgpu_gtt_size); 1500 amdgpu_gtt_size = -1; 1501 } 1502 1503 /* valid range is between 4 and 9 inclusive */ 1504 if (amdgpu_vm_fragment_size != -1 && 1505 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1506 dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1507 amdgpu_vm_fragment_size = -1; 1508 } 1509 1510 if (amdgpu_sched_hw_submission < 2) { 1511 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", 1512 amdgpu_sched_hw_submission); 1513 amdgpu_sched_hw_submission = 2; 1514 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) { 1515 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", 1516 amdgpu_sched_hw_submission); 1517 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); 1518 } 1519 1520 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) { 1521 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n"); 1522 amdgpu_reset_method = -1; 1523 } 1524 1525 amdgpu_device_check_smu_prv_buffer_size(adev); 1526 1527 amdgpu_device_check_vm_size(adev); 1528 1529 amdgpu_device_check_block_size(adev); 1530 1531 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1532 1533 return 0; 1534 } 1535 1536 #ifdef __linux__ 1537 /** 1538 * amdgpu_switcheroo_set_state - set switcheroo state 1539 * 1540 * @pdev: pci dev pointer 1541 * @state: vga_switcheroo state 1542 * 1543 * Callback for the switcheroo driver. Suspends or resumes 1544 * the asics before or after it is powered up using ACPI methods. 1545 */ 1546 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, 1547 enum vga_switcheroo_state state) 1548 { 1549 struct drm_device *dev = pci_get_drvdata(pdev); 1550 int r; 1551 1552 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) 1553 return; 1554 1555 if (state == VGA_SWITCHEROO_ON) { 1556 pr_info("switched on\n"); 1557 /* don't suspend or resume card normally */ 1558 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1559 1560 pci_set_power_state(pdev, PCI_D0); 1561 amdgpu_device_load_pci_state(pdev); 1562 r = pci_enable_device(pdev); 1563 if (r) 1564 DRM_WARN("pci_enable_device failed (%d)\n", r); 1565 amdgpu_device_resume(dev, true); 1566 1567 dev->switch_power_state = DRM_SWITCH_POWER_ON; 1568 } else { 1569 pr_info("switched off\n"); 1570 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1571 amdgpu_device_prepare(dev); 1572 amdgpu_device_suspend(dev, true); 1573 amdgpu_device_cache_pci_state(pdev); 1574 /* Shut down the device */ 1575 pci_disable_device(pdev); 1576 pci_set_power_state(pdev, PCI_D3cold); 1577 dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1578 } 1579 } 1580 1581 /** 1582 * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1583 * 1584 * @pdev: pci dev pointer 1585 * 1586 * Callback for the switcheroo driver. Check of the switcheroo 1587 * state can be changed. 1588 * Returns true if the state can be changed, false if not. 1589 */ 1590 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1591 { 1592 struct drm_device *dev = pci_get_drvdata(pdev); 1593 1594 /* 1595 * FIXME: open_count is protected by drm_global_mutex but that would lead to 1596 * locking inversion with the driver load path. And the access here is 1597 * completely racy anyway. So don't bother with locking for now. 1598 */ 1599 return atomic_read(&dev->open_count) == 0; 1600 } 1601 #endif /* __linux__ */ 1602 1603 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 1604 #ifdef notyet 1605 .set_gpu_state = amdgpu_switcheroo_set_state, 1606 .reprobe = NULL, 1607 .can_switch = amdgpu_switcheroo_can_switch, 1608 #endif 1609 }; 1610 1611 /** 1612 * amdgpu_device_ip_set_clockgating_state - set the CG state 1613 * 1614 * @dev: amdgpu_device pointer 1615 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1616 * @state: clockgating state (gate or ungate) 1617 * 1618 * Sets the requested clockgating state for all instances of 1619 * the hardware IP specified. 1620 * Returns the error code from the last instance. 1621 */ 1622 int amdgpu_device_ip_set_clockgating_state(void *dev, 1623 enum amd_ip_block_type block_type, 1624 enum amd_clockgating_state state) 1625 { 1626 struct amdgpu_device *adev = dev; 1627 int i, r = 0; 1628 1629 for (i = 0; i < adev->num_ip_blocks; i++) { 1630 if (!adev->ip_blocks[i].status.valid) 1631 continue; 1632 if (adev->ip_blocks[i].version->type != block_type) 1633 continue; 1634 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1635 continue; 1636 r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1637 (void *)adev, state); 1638 if (r) 1639 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1640 adev->ip_blocks[i].version->funcs->name, r); 1641 } 1642 return r; 1643 } 1644 1645 /** 1646 * amdgpu_device_ip_set_powergating_state - set the PG state 1647 * 1648 * @dev: amdgpu_device pointer 1649 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1650 * @state: powergating state (gate or ungate) 1651 * 1652 * Sets the requested powergating state for all instances of 1653 * the hardware IP specified. 1654 * Returns the error code from the last instance. 1655 */ 1656 int amdgpu_device_ip_set_powergating_state(void *dev, 1657 enum amd_ip_block_type block_type, 1658 enum amd_powergating_state state) 1659 { 1660 struct amdgpu_device *adev = dev; 1661 int i, r = 0; 1662 1663 for (i = 0; i < adev->num_ip_blocks; i++) { 1664 if (!adev->ip_blocks[i].status.valid) 1665 continue; 1666 if (adev->ip_blocks[i].version->type != block_type) 1667 continue; 1668 if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1669 continue; 1670 r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1671 (void *)adev, state); 1672 if (r) 1673 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1674 adev->ip_blocks[i].version->funcs->name, r); 1675 } 1676 return r; 1677 } 1678 1679 /** 1680 * amdgpu_device_ip_get_clockgating_state - get the CG state 1681 * 1682 * @adev: amdgpu_device pointer 1683 * @flags: clockgating feature flags 1684 * 1685 * Walks the list of IPs on the device and updates the clockgating 1686 * flags for each IP. 1687 * Updates @flags with the feature flags for each hardware IP where 1688 * clockgating is enabled. 1689 */ 1690 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 1691 u64 *flags) 1692 { 1693 int i; 1694 1695 for (i = 0; i < adev->num_ip_blocks; i++) { 1696 if (!adev->ip_blocks[i].status.valid) 1697 continue; 1698 if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1699 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1700 } 1701 } 1702 1703 /** 1704 * amdgpu_device_ip_wait_for_idle - wait for idle 1705 * 1706 * @adev: amdgpu_device pointer 1707 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1708 * 1709 * Waits for the request hardware IP to be idle. 1710 * Returns 0 for success or a negative error code on failure. 1711 */ 1712 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1713 enum amd_ip_block_type block_type) 1714 { 1715 int i, r; 1716 1717 for (i = 0; i < adev->num_ip_blocks; i++) { 1718 if (!adev->ip_blocks[i].status.valid) 1719 continue; 1720 if (adev->ip_blocks[i].version->type == block_type) { 1721 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1722 if (r) 1723 return r; 1724 break; 1725 } 1726 } 1727 return 0; 1728 1729 } 1730 1731 /** 1732 * amdgpu_device_ip_is_idle - is the hardware IP idle 1733 * 1734 * @adev: amdgpu_device pointer 1735 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1736 * 1737 * Check if the hardware IP is idle or not. 1738 * Returns true if it the IP is idle, false if not. 1739 */ 1740 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1741 enum amd_ip_block_type block_type) 1742 { 1743 int i; 1744 1745 for (i = 0; i < adev->num_ip_blocks; i++) { 1746 if (!adev->ip_blocks[i].status.valid) 1747 continue; 1748 if (adev->ip_blocks[i].version->type == block_type) 1749 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1750 } 1751 return true; 1752 1753 } 1754 1755 /** 1756 * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1757 * 1758 * @adev: amdgpu_device pointer 1759 * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1760 * 1761 * Returns a pointer to the hardware IP block structure 1762 * if it exists for the asic, otherwise NULL. 1763 */ 1764 struct amdgpu_ip_block * 1765 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1766 enum amd_ip_block_type type) 1767 { 1768 int i; 1769 1770 for (i = 0; i < adev->num_ip_blocks; i++) 1771 if (adev->ip_blocks[i].version->type == type) 1772 return &adev->ip_blocks[i]; 1773 1774 return NULL; 1775 } 1776 1777 /** 1778 * amdgpu_device_ip_block_version_cmp 1779 * 1780 * @adev: amdgpu_device pointer 1781 * @type: enum amd_ip_block_type 1782 * @major: major version 1783 * @minor: minor version 1784 * 1785 * return 0 if equal or greater 1786 * return 1 if smaller or the ip_block doesn't exist 1787 */ 1788 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1789 enum amd_ip_block_type type, 1790 u32 major, u32 minor) 1791 { 1792 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1793 1794 if (ip_block && ((ip_block->version->major > major) || 1795 ((ip_block->version->major == major) && 1796 (ip_block->version->minor >= minor)))) 1797 return 0; 1798 1799 return 1; 1800 } 1801 1802 /** 1803 * amdgpu_device_ip_block_add 1804 * 1805 * @adev: amdgpu_device pointer 1806 * @ip_block_version: pointer to the IP to add 1807 * 1808 * Adds the IP block driver information to the collection of IPs 1809 * on the asic. 1810 */ 1811 int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1812 const struct amdgpu_ip_block_version *ip_block_version) 1813 { 1814 if (!ip_block_version) 1815 return -EINVAL; 1816 1817 switch (ip_block_version->type) { 1818 case AMD_IP_BLOCK_TYPE_VCN: 1819 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) 1820 return 0; 1821 break; 1822 case AMD_IP_BLOCK_TYPE_JPEG: 1823 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) 1824 return 0; 1825 break; 1826 default: 1827 break; 1828 } 1829 1830 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1831 ip_block_version->funcs->name); 1832 1833 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1834 1835 return 0; 1836 } 1837 1838 /** 1839 * amdgpu_device_enable_virtual_display - enable virtual display feature 1840 * 1841 * @adev: amdgpu_device pointer 1842 * 1843 * Enabled the virtual display feature if the user has enabled it via 1844 * the module parameter virtual_display. This feature provides a virtual 1845 * display hardware on headless boards or in virtualized environments. 1846 * This function parses and validates the configuration string specified by 1847 * the user and configues the virtual display configuration (number of 1848 * virtual connectors, crtcs, etc.) specified. 1849 */ 1850 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1851 { 1852 adev->enable_virtual_display = false; 1853 1854 #ifdef notyet 1855 if (amdgpu_virtual_display) { 1856 const char *pci_address_name = pci_name(adev->pdev); 1857 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1858 1859 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1860 pciaddstr_tmp = pciaddstr; 1861 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1862 pciaddname = strsep(&pciaddname_tmp, ","); 1863 if (!strcmp("all", pciaddname) 1864 || !strcmp(pci_address_name, pciaddname)) { 1865 long num_crtc; 1866 int res = -1; 1867 1868 adev->enable_virtual_display = true; 1869 1870 if (pciaddname_tmp) 1871 res = kstrtol(pciaddname_tmp, 10, 1872 &num_crtc); 1873 1874 if (!res) { 1875 if (num_crtc < 1) 1876 num_crtc = 1; 1877 if (num_crtc > 6) 1878 num_crtc = 6; 1879 adev->mode_info.num_crtc = num_crtc; 1880 } else { 1881 adev->mode_info.num_crtc = 1; 1882 } 1883 break; 1884 } 1885 } 1886 1887 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1888 amdgpu_virtual_display, pci_address_name, 1889 adev->enable_virtual_display, adev->mode_info.num_crtc); 1890 1891 kfree(pciaddstr); 1892 } 1893 #endif 1894 } 1895 1896 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) 1897 { 1898 if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { 1899 adev->mode_info.num_crtc = 1; 1900 adev->enable_virtual_display = true; 1901 DRM_INFO("virtual_display:%d, num_crtc:%d\n", 1902 adev->enable_virtual_display, adev->mode_info.num_crtc); 1903 } 1904 } 1905 1906 /** 1907 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1908 * 1909 * @adev: amdgpu_device pointer 1910 * 1911 * Parses the asic configuration parameters specified in the gpu info 1912 * firmware and makes them availale to the driver for use in configuring 1913 * the asic. 1914 * Returns 0 on success, -EINVAL on failure. 1915 */ 1916 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1917 { 1918 const char *chip_name; 1919 char fw_name[40]; 1920 int err; 1921 const struct gpu_info_firmware_header_v1_0 *hdr; 1922 1923 adev->firmware.gpu_info_fw = NULL; 1924 1925 if (adev->mman.discovery_bin) 1926 return 0; 1927 1928 switch (adev->asic_type) { 1929 default: 1930 return 0; 1931 case CHIP_VEGA10: 1932 chip_name = "vega10"; 1933 break; 1934 case CHIP_VEGA12: 1935 chip_name = "vega12"; 1936 break; 1937 case CHIP_RAVEN: 1938 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1939 chip_name = "raven2"; 1940 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1941 chip_name = "picasso"; 1942 else 1943 chip_name = "raven"; 1944 break; 1945 case CHIP_ARCTURUS: 1946 chip_name = "arcturus"; 1947 break; 1948 case CHIP_NAVI12: 1949 chip_name = "navi12"; 1950 break; 1951 } 1952 1953 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1954 err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name); 1955 if (err) { 1956 dev_err(adev->dev, 1957 "Failed to get gpu_info firmware \"%s\"\n", 1958 fw_name); 1959 goto out; 1960 } 1961 1962 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1963 amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1964 1965 switch (hdr->version_major) { 1966 case 1: 1967 { 1968 const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1969 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1970 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1971 1972 /* 1973 * Should be droped when DAL no longer needs it. 1974 */ 1975 if (adev->asic_type == CHIP_NAVI12) 1976 goto parse_soc_bounding_box; 1977 1978 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1979 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1980 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1981 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1982 adev->gfx.config.max_texture_channel_caches = 1983 le32_to_cpu(gpu_info_fw->gc_num_tccs); 1984 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1985 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1986 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1987 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1988 adev->gfx.config.double_offchip_lds_buf = 1989 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1990 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1991 adev->gfx.cu_info.max_waves_per_simd = 1992 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1993 adev->gfx.cu_info.max_scratch_slots_per_cu = 1994 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1995 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1996 if (hdr->version_minor >= 1) { 1997 const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1998 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1999 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2000 adev->gfx.config.num_sc_per_sh = 2001 le32_to_cpu(gpu_info_fw->num_sc_per_sh); 2002 adev->gfx.config.num_packer_per_sc = 2003 le32_to_cpu(gpu_info_fw->num_packer_per_sc); 2004 } 2005 2006 parse_soc_bounding_box: 2007 /* 2008 * soc bounding box info is not integrated in disocovery table, 2009 * we always need to parse it from gpu info firmware if needed. 2010 */ 2011 if (hdr->version_minor == 2) { 2012 const struct gpu_info_firmware_v1_2 *gpu_info_fw = 2013 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 2014 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2015 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 2016 } 2017 break; 2018 } 2019 default: 2020 dev_err(adev->dev, 2021 "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 2022 err = -EINVAL; 2023 goto out; 2024 } 2025 out: 2026 return err; 2027 } 2028 2029 /** 2030 * amdgpu_device_ip_early_init - run early init for hardware IPs 2031 * 2032 * @adev: amdgpu_device pointer 2033 * 2034 * Early initialization pass for hardware IPs. The hardware IPs that make 2035 * up each asic are discovered each IP's early_init callback is run. This 2036 * is the first stage in initializing the asic. 2037 * Returns 0 on success, negative error code on failure. 2038 */ 2039 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 2040 { 2041 struct pci_dev *parent; 2042 int i, r; 2043 bool total; 2044 2045 amdgpu_device_enable_virtual_display(adev); 2046 2047 if (amdgpu_sriov_vf(adev)) { 2048 r = amdgpu_virt_request_full_gpu(adev, true); 2049 if (r) 2050 return r; 2051 } 2052 2053 switch (adev->asic_type) { 2054 #ifdef CONFIG_DRM_AMDGPU_SI 2055 case CHIP_VERDE: 2056 case CHIP_TAHITI: 2057 case CHIP_PITCAIRN: 2058 case CHIP_OLAND: 2059 case CHIP_HAINAN: 2060 adev->family = AMDGPU_FAMILY_SI; 2061 r = si_set_ip_blocks(adev); 2062 if (r) 2063 return r; 2064 break; 2065 #endif 2066 #ifdef CONFIG_DRM_AMDGPU_CIK 2067 case CHIP_BONAIRE: 2068 case CHIP_HAWAII: 2069 case CHIP_KAVERI: 2070 case CHIP_KABINI: 2071 case CHIP_MULLINS: 2072 if (adev->flags & AMD_IS_APU) 2073 adev->family = AMDGPU_FAMILY_KV; 2074 else 2075 adev->family = AMDGPU_FAMILY_CI; 2076 2077 r = cik_set_ip_blocks(adev); 2078 if (r) 2079 return r; 2080 break; 2081 #endif 2082 case CHIP_TOPAZ: 2083 case CHIP_TONGA: 2084 case CHIP_FIJI: 2085 case CHIP_POLARIS10: 2086 case CHIP_POLARIS11: 2087 case CHIP_POLARIS12: 2088 case CHIP_VEGAM: 2089 case CHIP_CARRIZO: 2090 case CHIP_STONEY: 2091 if (adev->flags & AMD_IS_APU) 2092 adev->family = AMDGPU_FAMILY_CZ; 2093 else 2094 adev->family = AMDGPU_FAMILY_VI; 2095 2096 r = vi_set_ip_blocks(adev); 2097 if (r) 2098 return r; 2099 break; 2100 default: 2101 r = amdgpu_discovery_set_ip_blocks(adev); 2102 if (r) 2103 return r; 2104 break; 2105 } 2106 2107 if (amdgpu_has_atpx() && 2108 (amdgpu_is_atpx_hybrid() || 2109 amdgpu_has_atpx_dgpu_power_cntl()) && 2110 ((adev->flags & AMD_IS_APU) == 0) && 2111 !dev_is_removable(&adev->pdev->dev)) 2112 adev->flags |= AMD_IS_PX; 2113 2114 if (!(adev->flags & AMD_IS_APU)) { 2115 #ifdef notyet 2116 parent = pcie_find_root_port(adev->pdev); 2117 adev->has_pr3 = parent ? pci_pr3_present(parent) : false; 2118 #else 2119 adev->has_pr3 = false; 2120 #endif 2121 } 2122 2123 2124 adev->pm.pp_feature = amdgpu_pp_feature_mask; 2125 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 2126 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 2127 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) 2128 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; 2129 if (!amdgpu_device_pcie_dynamic_switching_supported()) 2130 adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; 2131 2132 total = true; 2133 for (i = 0; i < adev->num_ip_blocks; i++) { 2134 if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 2135 DRM_WARN("disabled ip block: %d <%s>\n", 2136 i, adev->ip_blocks[i].version->funcs->name); 2137 adev->ip_blocks[i].status.valid = false; 2138 } else { 2139 if (adev->ip_blocks[i].version->funcs->early_init) { 2140 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 2141 if (r == -ENOENT) { 2142 adev->ip_blocks[i].status.valid = false; 2143 } else if (r) { 2144 DRM_ERROR("early_init of IP block <%s> failed %d\n", 2145 adev->ip_blocks[i].version->funcs->name, r); 2146 total = false; 2147 } else { 2148 adev->ip_blocks[i].status.valid = true; 2149 } 2150 } else { 2151 adev->ip_blocks[i].status.valid = true; 2152 } 2153 } 2154 /* get the vbios after the asic_funcs are set up */ 2155 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2156 r = amdgpu_device_parse_gpu_info_fw(adev); 2157 if (r) 2158 return r; 2159 2160 /* Read BIOS */ 2161 if (amdgpu_device_read_bios(adev)) { 2162 if (!amdgpu_get_bios(adev)) 2163 return -EINVAL; 2164 2165 r = amdgpu_atombios_init(adev); 2166 if (r) { 2167 dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2168 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2169 return r; 2170 } 2171 } 2172 2173 /*get pf2vf msg info at it's earliest time*/ 2174 if (amdgpu_sriov_vf(adev)) 2175 amdgpu_virt_init_data_exchange(adev); 2176 2177 } 2178 } 2179 if (!total) 2180 return -ENODEV; 2181 2182 amdgpu_amdkfd_device_probe(adev); 2183 adev->cg_flags &= amdgpu_cg_mask; 2184 adev->pg_flags &= amdgpu_pg_mask; 2185 2186 return 0; 2187 } 2188 2189 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 2190 { 2191 int i, r; 2192 2193 for (i = 0; i < adev->num_ip_blocks; i++) { 2194 if (!adev->ip_blocks[i].status.sw) 2195 continue; 2196 if (adev->ip_blocks[i].status.hw) 2197 continue; 2198 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2199 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 2200 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2201 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2202 if (r) { 2203 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2204 adev->ip_blocks[i].version->funcs->name, r); 2205 return r; 2206 } 2207 adev->ip_blocks[i].status.hw = true; 2208 } 2209 } 2210 2211 return 0; 2212 } 2213 2214 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 2215 { 2216 int i, r; 2217 2218 for (i = 0; i < adev->num_ip_blocks; i++) { 2219 if (!adev->ip_blocks[i].status.sw) 2220 continue; 2221 if (adev->ip_blocks[i].status.hw) 2222 continue; 2223 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2224 if (r) { 2225 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2226 adev->ip_blocks[i].version->funcs->name, r); 2227 return r; 2228 } 2229 adev->ip_blocks[i].status.hw = true; 2230 } 2231 2232 return 0; 2233 } 2234 2235 static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 2236 { 2237 int r = 0; 2238 int i; 2239 uint32_t smu_version; 2240 2241 if (adev->asic_type >= CHIP_VEGA10) { 2242 for (i = 0; i < adev->num_ip_blocks; i++) { 2243 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 2244 continue; 2245 2246 if (!adev->ip_blocks[i].status.sw) 2247 continue; 2248 2249 /* no need to do the fw loading again if already done*/ 2250 if (adev->ip_blocks[i].status.hw == true) 2251 break; 2252 2253 if (amdgpu_in_reset(adev) || adev->in_suspend) { 2254 r = adev->ip_blocks[i].version->funcs->resume(adev); 2255 if (r) { 2256 DRM_ERROR("resume of IP block <%s> failed %d\n", 2257 adev->ip_blocks[i].version->funcs->name, r); 2258 return r; 2259 } 2260 } else { 2261 r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2262 if (r) { 2263 DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2264 adev->ip_blocks[i].version->funcs->name, r); 2265 return r; 2266 } 2267 } 2268 2269 adev->ip_blocks[i].status.hw = true; 2270 break; 2271 } 2272 } 2273 2274 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 2275 r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 2276 2277 return r; 2278 } 2279 2280 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) 2281 { 2282 long timeout; 2283 int r, i; 2284 2285 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 2286 struct amdgpu_ring *ring = adev->rings[i]; 2287 2288 /* No need to setup the GPU scheduler for rings that don't need it */ 2289 if (!ring || ring->no_scheduler) 2290 continue; 2291 2292 switch (ring->funcs->type) { 2293 case AMDGPU_RING_TYPE_GFX: 2294 timeout = adev->gfx_timeout; 2295 break; 2296 case AMDGPU_RING_TYPE_COMPUTE: 2297 timeout = adev->compute_timeout; 2298 break; 2299 case AMDGPU_RING_TYPE_SDMA: 2300 timeout = adev->sdma_timeout; 2301 break; 2302 default: 2303 timeout = adev->video_timeout; 2304 break; 2305 } 2306 2307 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, 2308 ring->num_hw_submission, 0, 2309 timeout, adev->reset_domain->wq, 2310 ring->sched_score, ring->name, 2311 adev->dev); 2312 if (r) { 2313 DRM_ERROR("Failed to create scheduler on ring %s.\n", 2314 ring->name); 2315 return r; 2316 } 2317 } 2318 2319 amdgpu_xcp_update_partition_sched_list(adev); 2320 2321 return 0; 2322 } 2323 2324 2325 /** 2326 * amdgpu_device_ip_init - run init for hardware IPs 2327 * 2328 * @adev: amdgpu_device pointer 2329 * 2330 * Main initialization pass for hardware IPs. The list of all the hardware 2331 * IPs that make up the asic is walked and the sw_init and hw_init callbacks 2332 * are run. sw_init initializes the software state associated with each IP 2333 * and hw_init initializes the hardware associated with each IP. 2334 * Returns 0 on success, negative error code on failure. 2335 */ 2336 static int amdgpu_device_ip_init(struct amdgpu_device *adev) 2337 { 2338 int i, r; 2339 2340 r = amdgpu_ras_init(adev); 2341 if (r) 2342 return r; 2343 2344 for (i = 0; i < adev->num_ip_blocks; i++) { 2345 if (!adev->ip_blocks[i].status.valid) 2346 continue; 2347 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 2348 if (r) { 2349 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 2350 adev->ip_blocks[i].version->funcs->name, r); 2351 goto init_failed; 2352 } 2353 adev->ip_blocks[i].status.sw = true; 2354 2355 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2356 /* need to do common hw init early so everything is set up for gmc */ 2357 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2358 if (r) { 2359 DRM_ERROR("hw_init %d failed %d\n", i, r); 2360 goto init_failed; 2361 } 2362 adev->ip_blocks[i].status.hw = true; 2363 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2364 /* need to do gmc hw init early so we can allocate gpu mem */ 2365 /* Try to reserve bad pages early */ 2366 if (amdgpu_sriov_vf(adev)) 2367 amdgpu_virt_exchange_data(adev); 2368 2369 r = amdgpu_device_mem_scratch_init(adev); 2370 if (r) { 2371 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); 2372 goto init_failed; 2373 } 2374 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2375 if (r) { 2376 DRM_ERROR("hw_init %d failed %d\n", i, r); 2377 goto init_failed; 2378 } 2379 r = amdgpu_device_wb_init(adev); 2380 if (r) { 2381 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 2382 goto init_failed; 2383 } 2384 adev->ip_blocks[i].status.hw = true; 2385 2386 /* right after GMC hw init, we create CSA */ 2387 if (adev->gfx.mcbp) { 2388 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2389 AMDGPU_GEM_DOMAIN_VRAM | 2390 AMDGPU_GEM_DOMAIN_GTT, 2391 AMDGPU_CSA_SIZE); 2392 if (r) { 2393 DRM_ERROR("allocate CSA failed %d\n", r); 2394 goto init_failed; 2395 } 2396 } 2397 } 2398 } 2399 2400 if (amdgpu_sriov_vf(adev)) 2401 amdgpu_virt_init_data_exchange(adev); 2402 2403 r = amdgpu_ib_pool_init(adev); 2404 if (r) { 2405 dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2406 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2407 goto init_failed; 2408 } 2409 2410 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2411 if (r) 2412 goto init_failed; 2413 2414 r = amdgpu_device_ip_hw_init_phase1(adev); 2415 if (r) 2416 goto init_failed; 2417 2418 r = amdgpu_device_fw_loading(adev); 2419 if (r) 2420 goto init_failed; 2421 2422 r = amdgpu_device_ip_hw_init_phase2(adev); 2423 if (r) 2424 goto init_failed; 2425 2426 /* 2427 * retired pages will be loaded from eeprom and reserved here, 2428 * it should be called after amdgpu_device_ip_hw_init_phase2 since 2429 * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2430 * for I2C communication which only true at this point. 2431 * 2432 * amdgpu_ras_recovery_init may fail, but the upper only cares the 2433 * failure from bad gpu situation and stop amdgpu init process 2434 * accordingly. For other failed cases, it will still release all 2435 * the resource and print error message, rather than returning one 2436 * negative value to upper level. 2437 * 2438 * Note: theoretically, this should be called before all vram allocations 2439 * to protect retired page from abusing 2440 */ 2441 r = amdgpu_ras_recovery_init(adev); 2442 if (r) 2443 goto init_failed; 2444 2445 /** 2446 * In case of XGMI grab extra reference for reset domain for this device 2447 */ 2448 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2449 if (amdgpu_xgmi_add_device(adev) == 0) { 2450 if (!amdgpu_sriov_vf(adev)) { 2451 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 2452 2453 if (WARN_ON(!hive)) { 2454 r = -ENOENT; 2455 goto init_failed; 2456 } 2457 2458 if (!hive->reset_domain || 2459 !amdgpu_reset_get_reset_domain(hive->reset_domain)) { 2460 r = -ENOENT; 2461 amdgpu_put_xgmi_hive(hive); 2462 goto init_failed; 2463 } 2464 2465 /* Drop the early temporary reset domain we created for device */ 2466 amdgpu_reset_put_reset_domain(adev->reset_domain); 2467 adev->reset_domain = hive->reset_domain; 2468 amdgpu_put_xgmi_hive(hive); 2469 } 2470 } 2471 } 2472 2473 r = amdgpu_device_init_schedulers(adev); 2474 if (r) 2475 goto init_failed; 2476 2477 /* Don't init kfd if whole hive need to be reset during init */ 2478 if (!adev->gmc.xgmi.pending_reset) { 2479 kgd2kfd_init_zone_device(adev); 2480 amdgpu_amdkfd_device_init(adev); 2481 } 2482 2483 amdgpu_fru_get_product_info(adev); 2484 2485 init_failed: 2486 2487 return r; 2488 } 2489 2490 /** 2491 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2492 * 2493 * @adev: amdgpu_device pointer 2494 * 2495 * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2496 * this function before a GPU reset. If the value is retained after a 2497 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2498 */ 2499 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2500 { 2501 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2502 } 2503 2504 /** 2505 * amdgpu_device_check_vram_lost - check if vram is valid 2506 * 2507 * @adev: amdgpu_device pointer 2508 * 2509 * Checks the reset magic value written to the gart pointer in VRAM. 2510 * The driver calls this after a GPU reset to see if the contents of 2511 * VRAM is lost or now. 2512 * returns true if vram is lost, false if not. 2513 */ 2514 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2515 { 2516 if (memcmp(adev->gart.ptr, adev->reset_magic, 2517 AMDGPU_RESET_MAGIC_NUM)) 2518 return true; 2519 2520 if (!amdgpu_in_reset(adev)) 2521 return false; 2522 2523 /* 2524 * For all ASICs with baco/mode1 reset, the VRAM is 2525 * always assumed to be lost. 2526 */ 2527 switch (amdgpu_asic_reset_method(adev)) { 2528 case AMD_RESET_METHOD_BACO: 2529 case AMD_RESET_METHOD_MODE1: 2530 return true; 2531 default: 2532 return false; 2533 } 2534 } 2535 2536 /** 2537 * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2538 * 2539 * @adev: amdgpu_device pointer 2540 * @state: clockgating state (gate or ungate) 2541 * 2542 * The list of all the hardware IPs that make up the asic is walked and the 2543 * set_clockgating_state callbacks are run. 2544 * Late initialization pass enabling clockgating for hardware IPs. 2545 * Fini or suspend, pass disabling clockgating for hardware IPs. 2546 * Returns 0 on success, negative error code on failure. 2547 */ 2548 2549 int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2550 enum amd_clockgating_state state) 2551 { 2552 int i, j, r; 2553 2554 if (amdgpu_emu_mode == 1) 2555 return 0; 2556 2557 for (j = 0; j < adev->num_ip_blocks; j++) { 2558 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2559 if (!adev->ip_blocks[i].status.late_initialized) 2560 continue; 2561 /* skip CG for GFX, SDMA on S0ix */ 2562 if (adev->in_s0ix && 2563 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2564 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 2565 continue; 2566 /* skip CG for VCE/UVD, it's handled specially */ 2567 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2568 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2569 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2570 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2571 adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2572 /* enable clockgating to save power */ 2573 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2574 state); 2575 if (r) { 2576 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2577 adev->ip_blocks[i].version->funcs->name, r); 2578 return r; 2579 } 2580 } 2581 } 2582 2583 return 0; 2584 } 2585 2586 int amdgpu_device_set_pg_state(struct amdgpu_device *adev, 2587 enum amd_powergating_state state) 2588 { 2589 int i, j, r; 2590 2591 if (amdgpu_emu_mode == 1) 2592 return 0; 2593 2594 for (j = 0; j < adev->num_ip_blocks; j++) { 2595 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2596 if (!adev->ip_blocks[i].status.late_initialized) 2597 continue; 2598 /* skip PG for GFX, SDMA on S0ix */ 2599 if (adev->in_s0ix && 2600 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2601 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 2602 continue; 2603 /* skip CG for VCE/UVD, it's handled specially */ 2604 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2605 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2606 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2607 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2608 adev->ip_blocks[i].version->funcs->set_powergating_state) { 2609 /* enable powergating to save power */ 2610 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2611 state); 2612 if (r) { 2613 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2614 adev->ip_blocks[i].version->funcs->name, r); 2615 return r; 2616 } 2617 } 2618 } 2619 return 0; 2620 } 2621 2622 static int amdgpu_device_enable_mgpu_fan_boost(void) 2623 { 2624 struct amdgpu_gpu_instance *gpu_ins; 2625 struct amdgpu_device *adev; 2626 int i, ret = 0; 2627 2628 mutex_lock(&mgpu_info.mutex); 2629 2630 /* 2631 * MGPU fan boost feature should be enabled 2632 * only when there are two or more dGPUs in 2633 * the system 2634 */ 2635 if (mgpu_info.num_dgpu < 2) 2636 goto out; 2637 2638 for (i = 0; i < mgpu_info.num_dgpu; i++) { 2639 gpu_ins = &(mgpu_info.gpu_ins[i]); 2640 adev = gpu_ins->adev; 2641 if (!(adev->flags & AMD_IS_APU) && 2642 !gpu_ins->mgpu_fan_enabled) { 2643 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2644 if (ret) 2645 break; 2646 2647 gpu_ins->mgpu_fan_enabled = 1; 2648 } 2649 } 2650 2651 out: 2652 mutex_unlock(&mgpu_info.mutex); 2653 2654 return ret; 2655 } 2656 2657 /** 2658 * amdgpu_device_ip_late_init - run late init for hardware IPs 2659 * 2660 * @adev: amdgpu_device pointer 2661 * 2662 * Late initialization pass for hardware IPs. The list of all the hardware 2663 * IPs that make up the asic is walked and the late_init callbacks are run. 2664 * late_init covers any special initialization that an IP requires 2665 * after all of the have been initialized or something that needs to happen 2666 * late in the init process. 2667 * Returns 0 on success, negative error code on failure. 2668 */ 2669 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2670 { 2671 struct amdgpu_gpu_instance *gpu_instance; 2672 int i = 0, r; 2673 2674 for (i = 0; i < adev->num_ip_blocks; i++) { 2675 if (!adev->ip_blocks[i].status.hw) 2676 continue; 2677 if (adev->ip_blocks[i].version->funcs->late_init) { 2678 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2679 if (r) { 2680 DRM_ERROR("late_init of IP block <%s> failed %d\n", 2681 adev->ip_blocks[i].version->funcs->name, r); 2682 return r; 2683 } 2684 } 2685 adev->ip_blocks[i].status.late_initialized = true; 2686 } 2687 2688 r = amdgpu_ras_late_init(adev); 2689 if (r) { 2690 DRM_ERROR("amdgpu_ras_late_init failed %d", r); 2691 return r; 2692 } 2693 2694 amdgpu_ras_set_error_query_ready(adev, true); 2695 2696 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2697 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2698 2699 amdgpu_device_fill_reset_magic(adev); 2700 2701 r = amdgpu_device_enable_mgpu_fan_boost(); 2702 if (r) 2703 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2704 2705 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ 2706 if (amdgpu_passthrough(adev) && 2707 ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || 2708 adev->asic_type == CHIP_ALDEBARAN)) 2709 amdgpu_dpm_handle_passthrough_sbr(adev, true); 2710 2711 if (adev->gmc.xgmi.num_physical_nodes > 1) { 2712 mutex_lock(&mgpu_info.mutex); 2713 2714 /* 2715 * Reset device p-state to low as this was booted with high. 2716 * 2717 * This should be performed only after all devices from the same 2718 * hive get initialized. 2719 * 2720 * However, it's unknown how many device in the hive in advance. 2721 * As this is counted one by one during devices initializations. 2722 * 2723 * So, we wait for all XGMI interlinked devices initialized. 2724 * This may bring some delays as those devices may come from 2725 * different hives. But that should be OK. 2726 */ 2727 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2728 for (i = 0; i < mgpu_info.num_gpu; i++) { 2729 gpu_instance = &(mgpu_info.gpu_ins[i]); 2730 if (gpu_instance->adev->flags & AMD_IS_APU) 2731 continue; 2732 2733 r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2734 AMDGPU_XGMI_PSTATE_MIN); 2735 if (r) { 2736 DRM_ERROR("pstate setting failed (%d).\n", r); 2737 break; 2738 } 2739 } 2740 } 2741 2742 mutex_unlock(&mgpu_info.mutex); 2743 } 2744 2745 return 0; 2746 } 2747 2748 /** 2749 * amdgpu_device_smu_fini_early - smu hw_fini wrapper 2750 * 2751 * @adev: amdgpu_device pointer 2752 * 2753 * For ASICs need to disable SMC first 2754 */ 2755 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) 2756 { 2757 int i, r; 2758 2759 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) 2760 return; 2761 2762 for (i = 0; i < adev->num_ip_blocks; i++) { 2763 if (!adev->ip_blocks[i].status.hw) 2764 continue; 2765 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 2766 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2767 /* XXX handle errors */ 2768 if (r) { 2769 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2770 adev->ip_blocks[i].version->funcs->name, r); 2771 } 2772 adev->ip_blocks[i].status.hw = false; 2773 break; 2774 } 2775 } 2776 } 2777 2778 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) 2779 { 2780 int i, r; 2781 2782 for (i = 0; i < adev->num_ip_blocks; i++) { 2783 if (!adev->ip_blocks[i].version->funcs->early_fini) 2784 continue; 2785 2786 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); 2787 if (r) { 2788 DRM_DEBUG("early_fini of IP block <%s> failed %d\n", 2789 adev->ip_blocks[i].version->funcs->name, r); 2790 } 2791 } 2792 2793 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2794 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2795 2796 amdgpu_amdkfd_suspend(adev, false); 2797 2798 /* Workaroud for ASICs need to disable SMC first */ 2799 amdgpu_device_smu_fini_early(adev); 2800 2801 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2802 if (!adev->ip_blocks[i].status.hw) 2803 continue; 2804 2805 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2806 /* XXX handle errors */ 2807 if (r) { 2808 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2809 adev->ip_blocks[i].version->funcs->name, r); 2810 } 2811 2812 adev->ip_blocks[i].status.hw = false; 2813 } 2814 2815 if (amdgpu_sriov_vf(adev)) { 2816 if (amdgpu_virt_release_full_gpu(adev, false)) 2817 DRM_ERROR("failed to release exclusive mode on fini\n"); 2818 } 2819 2820 return 0; 2821 } 2822 2823 /** 2824 * amdgpu_device_ip_fini - run fini for hardware IPs 2825 * 2826 * @adev: amdgpu_device pointer 2827 * 2828 * Main teardown pass for hardware IPs. The list of all the hardware 2829 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 2830 * are run. hw_fini tears down the hardware associated with each IP 2831 * and sw_fini tears down any software state associated with each IP. 2832 * Returns 0 on success, negative error code on failure. 2833 */ 2834 static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 2835 { 2836 int i, r; 2837 2838 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) 2839 amdgpu_virt_release_ras_err_handler_data(adev); 2840 2841 if (adev->gmc.xgmi.num_physical_nodes > 1) 2842 amdgpu_xgmi_remove_device(adev); 2843 2844 amdgpu_amdkfd_device_fini_sw(adev); 2845 2846 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2847 if (!adev->ip_blocks[i].status.sw) 2848 continue; 2849 2850 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2851 amdgpu_ucode_free_bo(adev); 2852 amdgpu_free_static_csa(&adev->virt.csa_obj); 2853 amdgpu_device_wb_fini(adev); 2854 amdgpu_device_mem_scratch_fini(adev); 2855 amdgpu_ib_pool_fini(adev); 2856 } 2857 2858 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2859 /* XXX handle errors */ 2860 if (r) { 2861 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2862 adev->ip_blocks[i].version->funcs->name, r); 2863 } 2864 adev->ip_blocks[i].status.sw = false; 2865 adev->ip_blocks[i].status.valid = false; 2866 } 2867 2868 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2869 if (!adev->ip_blocks[i].status.late_initialized) 2870 continue; 2871 if (adev->ip_blocks[i].version->funcs->late_fini) 2872 adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2873 adev->ip_blocks[i].status.late_initialized = false; 2874 } 2875 2876 amdgpu_ras_fini(adev); 2877 2878 return 0; 2879 } 2880 2881 /** 2882 * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2883 * 2884 * @work: work_struct. 2885 */ 2886 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2887 { 2888 struct amdgpu_device *adev = 2889 container_of(work, struct amdgpu_device, delayed_init_work.work); 2890 int r; 2891 2892 r = amdgpu_ib_ring_tests(adev); 2893 if (r) 2894 DRM_ERROR("ib ring test failed (%d).\n", r); 2895 } 2896 2897 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2898 { 2899 struct amdgpu_device *adev = 2900 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2901 2902 WARN_ON_ONCE(adev->gfx.gfx_off_state); 2903 WARN_ON_ONCE(adev->gfx.gfx_off_req_count); 2904 2905 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2906 adev->gfx.gfx_off_state = true; 2907 } 2908 2909 /** 2910 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2911 * 2912 * @adev: amdgpu_device pointer 2913 * 2914 * Main suspend function for hardware IPs. The list of all the hardware 2915 * IPs that make up the asic is walked, clockgating is disabled and the 2916 * suspend callbacks are run. suspend puts the hardware and software state 2917 * in each IP into a state suitable for suspend. 2918 * Returns 0 on success, negative error code on failure. 2919 */ 2920 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2921 { 2922 int i, r; 2923 2924 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2925 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2926 2927 /* 2928 * Per PMFW team's suggestion, driver needs to handle gfxoff 2929 * and df cstate features disablement for gpu reset(e.g. Mode1Reset) 2930 * scenario. Add the missing df cstate disablement here. 2931 */ 2932 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) 2933 dev_warn(adev->dev, "Failed to disallow df cstate"); 2934 2935 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2936 if (!adev->ip_blocks[i].status.valid) 2937 continue; 2938 2939 /* displays are handled separately */ 2940 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) 2941 continue; 2942 2943 /* XXX handle errors */ 2944 r = adev->ip_blocks[i].version->funcs->suspend(adev); 2945 /* XXX handle errors */ 2946 if (r) { 2947 DRM_ERROR("suspend of IP block <%s> failed %d\n", 2948 adev->ip_blocks[i].version->funcs->name, r); 2949 return r; 2950 } 2951 2952 adev->ip_blocks[i].status.hw = false; 2953 } 2954 2955 return 0; 2956 } 2957 2958 /** 2959 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2960 * 2961 * @adev: amdgpu_device pointer 2962 * 2963 * Main suspend function for hardware IPs. The list of all the hardware 2964 * IPs that make up the asic is walked, clockgating is disabled and the 2965 * suspend callbacks are run. suspend puts the hardware and software state 2966 * in each IP into a state suitable for suspend. 2967 * Returns 0 on success, negative error code on failure. 2968 */ 2969 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2970 { 2971 int i, r; 2972 2973 if (adev->in_s0ix) 2974 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); 2975 2976 for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2977 if (!adev->ip_blocks[i].status.valid) 2978 continue; 2979 /* displays are handled in phase1 */ 2980 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2981 continue; 2982 /* PSP lost connection when err_event_athub occurs */ 2983 if (amdgpu_ras_intr_triggered() && 2984 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2985 adev->ip_blocks[i].status.hw = false; 2986 continue; 2987 } 2988 2989 /* skip unnecessary suspend if we do not initialize them yet */ 2990 if (adev->gmc.xgmi.pending_reset && 2991 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 2992 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || 2993 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2994 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { 2995 adev->ip_blocks[i].status.hw = false; 2996 continue; 2997 } 2998 2999 /* skip suspend of gfx/mes and psp for S0ix 3000 * gfx is in gfxoff state, so on resume it will exit gfxoff just 3001 * like at runtime. PSP is also part of the always on hardware 3002 * so no need to suspend it. 3003 */ 3004 if (adev->in_s0ix && 3005 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || 3006 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 3007 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) 3008 continue; 3009 3010 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ 3011 if (adev->in_s0ix && 3012 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) && 3013 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 3014 continue; 3015 3016 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. 3017 * These are in TMR, hence are expected to be reused by PSP-TOS to reload 3018 * from this location and RLC Autoload automatically also gets loaded 3019 * from here based on PMFW -> PSP message during re-init sequence. 3020 * Therefore, the psp suspend & resume should be skipped to avoid destroy 3021 * the TMR and reload FWs again for IMU enabled APU ASICs. 3022 */ 3023 if (amdgpu_in_reset(adev) && 3024 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs && 3025 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3026 continue; 3027 3028 /* XXX handle errors */ 3029 r = adev->ip_blocks[i].version->funcs->suspend(adev); 3030 /* XXX handle errors */ 3031 if (r) { 3032 DRM_ERROR("suspend of IP block <%s> failed %d\n", 3033 adev->ip_blocks[i].version->funcs->name, r); 3034 } 3035 adev->ip_blocks[i].status.hw = false; 3036 /* handle putting the SMC in the appropriate state */ 3037 if (!amdgpu_sriov_vf(adev)) { 3038 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 3039 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 3040 if (r) { 3041 DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 3042 adev->mp1_state, r); 3043 return r; 3044 } 3045 } 3046 } 3047 } 3048 3049 return 0; 3050 } 3051 3052 /** 3053 * amdgpu_device_ip_suspend - run suspend for hardware IPs 3054 * 3055 * @adev: amdgpu_device pointer 3056 * 3057 * Main suspend function for hardware IPs. The list of all the hardware 3058 * IPs that make up the asic is walked, clockgating is disabled and the 3059 * suspend callbacks are run. suspend puts the hardware and software state 3060 * in each IP into a state suitable for suspend. 3061 * Returns 0 on success, negative error code on failure. 3062 */ 3063 int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 3064 { 3065 int r; 3066 3067 if (amdgpu_sriov_vf(adev)) { 3068 amdgpu_virt_fini_data_exchange(adev); 3069 amdgpu_virt_request_full_gpu(adev, false); 3070 } 3071 3072 r = amdgpu_device_ip_suspend_phase1(adev); 3073 if (r) 3074 return r; 3075 r = amdgpu_device_ip_suspend_phase2(adev); 3076 3077 if (amdgpu_sriov_vf(adev)) 3078 amdgpu_virt_release_full_gpu(adev, false); 3079 3080 return r; 3081 } 3082 3083 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 3084 { 3085 int i, r; 3086 3087 static enum amd_ip_block_type ip_order[] = { 3088 AMD_IP_BLOCK_TYPE_COMMON, 3089 AMD_IP_BLOCK_TYPE_GMC, 3090 AMD_IP_BLOCK_TYPE_PSP, 3091 AMD_IP_BLOCK_TYPE_IH, 3092 }; 3093 3094 for (i = 0; i < adev->num_ip_blocks; i++) { 3095 int j; 3096 struct amdgpu_ip_block *block; 3097 3098 block = &adev->ip_blocks[i]; 3099 block->status.hw = false; 3100 3101 for (j = 0; j < ARRAY_SIZE(ip_order); j++) { 3102 3103 if (block->version->type != ip_order[j] || 3104 !block->status.valid) 3105 continue; 3106 3107 r = block->version->funcs->hw_init(adev); 3108 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3109 if (r) 3110 return r; 3111 block->status.hw = true; 3112 } 3113 } 3114 3115 return 0; 3116 } 3117 3118 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 3119 { 3120 int i, r; 3121 3122 static enum amd_ip_block_type ip_order[] = { 3123 AMD_IP_BLOCK_TYPE_SMC, 3124 AMD_IP_BLOCK_TYPE_DCE, 3125 AMD_IP_BLOCK_TYPE_GFX, 3126 AMD_IP_BLOCK_TYPE_SDMA, 3127 AMD_IP_BLOCK_TYPE_MES, 3128 AMD_IP_BLOCK_TYPE_UVD, 3129 AMD_IP_BLOCK_TYPE_VCE, 3130 AMD_IP_BLOCK_TYPE_VCN, 3131 AMD_IP_BLOCK_TYPE_JPEG 3132 }; 3133 3134 for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 3135 int j; 3136 struct amdgpu_ip_block *block; 3137 3138 for (j = 0; j < adev->num_ip_blocks; j++) { 3139 block = &adev->ip_blocks[j]; 3140 3141 if (block->version->type != ip_order[i] || 3142 !block->status.valid || 3143 block->status.hw) 3144 continue; 3145 3146 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 3147 r = block->version->funcs->resume(adev); 3148 else 3149 r = block->version->funcs->hw_init(adev); 3150 3151 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3152 if (r) 3153 return r; 3154 block->status.hw = true; 3155 } 3156 } 3157 3158 return 0; 3159 } 3160 3161 /** 3162 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 3163 * 3164 * @adev: amdgpu_device pointer 3165 * 3166 * First resume function for hardware IPs. The list of all the hardware 3167 * IPs that make up the asic is walked and the resume callbacks are run for 3168 * COMMON, GMC, and IH. resume puts the hardware into a functional state 3169 * after a suspend and updates the software state as necessary. This 3170 * function is also used for restoring the GPU after a GPU reset. 3171 * Returns 0 on success, negative error code on failure. 3172 */ 3173 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 3174 { 3175 int i, r; 3176 3177 for (i = 0; i < adev->num_ip_blocks; i++) { 3178 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3179 continue; 3180 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3181 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3182 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3183 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { 3184 3185 r = adev->ip_blocks[i].version->funcs->resume(adev); 3186 if (r) { 3187 DRM_ERROR("resume of IP block <%s> failed %d\n", 3188 adev->ip_blocks[i].version->funcs->name, r); 3189 return r; 3190 } 3191 adev->ip_blocks[i].status.hw = true; 3192 } 3193 } 3194 3195 return 0; 3196 } 3197 3198 /** 3199 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 3200 * 3201 * @adev: amdgpu_device pointer 3202 * 3203 * First resume function for hardware IPs. The list of all the hardware 3204 * IPs that make up the asic is walked and the resume callbacks are run for 3205 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 3206 * functional state after a suspend and updates the software state as 3207 * necessary. This function is also used for restoring the GPU after a GPU 3208 * reset. 3209 * Returns 0 on success, negative error code on failure. 3210 */ 3211 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 3212 { 3213 int i, r; 3214 3215 for (i = 0; i < adev->num_ip_blocks; i++) { 3216 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3217 continue; 3218 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3219 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3220 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3221 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3222 continue; 3223 r = adev->ip_blocks[i].version->funcs->resume(adev); 3224 if (r) { 3225 DRM_ERROR("resume of IP block <%s> failed %d\n", 3226 adev->ip_blocks[i].version->funcs->name, r); 3227 return r; 3228 } 3229 adev->ip_blocks[i].status.hw = true; 3230 } 3231 3232 return 0; 3233 } 3234 3235 /** 3236 * amdgpu_device_ip_resume - run resume for hardware IPs 3237 * 3238 * @adev: amdgpu_device pointer 3239 * 3240 * Main resume function for hardware IPs. The hardware IPs 3241 * are split into two resume functions because they are 3242 * also used in recovering from a GPU reset and some additional 3243 * steps need to be take between them. In this case (S3/S4) they are 3244 * run sequentially. 3245 * Returns 0 on success, negative error code on failure. 3246 */ 3247 static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 3248 { 3249 int r; 3250 3251 r = amdgpu_device_ip_resume_phase1(adev); 3252 if (r) 3253 return r; 3254 3255 r = amdgpu_device_fw_loading(adev); 3256 if (r) 3257 return r; 3258 3259 r = amdgpu_device_ip_resume_phase2(adev); 3260 3261 return r; 3262 } 3263 3264 /** 3265 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 3266 * 3267 * @adev: amdgpu_device pointer 3268 * 3269 * Query the VBIOS data tables to determine if the board supports SR-IOV. 3270 */ 3271 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 3272 { 3273 if (amdgpu_sriov_vf(adev)) { 3274 if (adev->is_atom_fw) { 3275 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) 3276 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3277 } else { 3278 if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 3279 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3280 } 3281 3282 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 3283 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 3284 } 3285 } 3286 3287 /** 3288 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 3289 * 3290 * @asic_type: AMD asic type 3291 * 3292 * Check if there is DC (new modesetting infrastructre) support for an asic. 3293 * returns true if DC has support, false if not. 3294 */ 3295 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 3296 { 3297 switch (asic_type) { 3298 #ifdef CONFIG_DRM_AMDGPU_SI 3299 case CHIP_HAINAN: 3300 #endif 3301 case CHIP_TOPAZ: 3302 /* chips with no display hardware */ 3303 return false; 3304 #if defined(CONFIG_DRM_AMD_DC) 3305 case CHIP_TAHITI: 3306 case CHIP_PITCAIRN: 3307 case CHIP_VERDE: 3308 case CHIP_OLAND: 3309 /* 3310 * We have systems in the wild with these ASICs that require 3311 * LVDS and VGA support which is not supported with DC. 3312 * 3313 * Fallback to the non-DC driver here by default so as not to 3314 * cause regressions. 3315 */ 3316 #if defined(CONFIG_DRM_AMD_DC_SI) 3317 return amdgpu_dc > 0; 3318 #else 3319 return false; 3320 #endif 3321 case CHIP_BONAIRE: 3322 case CHIP_KAVERI: 3323 case CHIP_KABINI: 3324 case CHIP_MULLINS: 3325 /* 3326 * We have systems in the wild with these ASICs that require 3327 * VGA support which is not supported with DC. 3328 * 3329 * Fallback to the non-DC driver here by default so as not to 3330 * cause regressions. 3331 */ 3332 return amdgpu_dc > 0; 3333 default: 3334 return amdgpu_dc != 0; 3335 #else 3336 default: 3337 if (amdgpu_dc > 0) 3338 DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); 3339 return false; 3340 #endif 3341 } 3342 } 3343 3344 /** 3345 * amdgpu_device_has_dc_support - check if dc is supported 3346 * 3347 * @adev: amdgpu_device pointer 3348 * 3349 * Returns true for supported, false for not supported 3350 */ 3351 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 3352 { 3353 if (adev->enable_virtual_display || 3354 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 3355 return false; 3356 3357 return amdgpu_device_asic_has_dc_support(adev->asic_type); 3358 } 3359 3360 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 3361 { 3362 struct amdgpu_device *adev = 3363 container_of(__work, struct amdgpu_device, xgmi_reset_work); 3364 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 3365 3366 /* It's a bug to not have a hive within this function */ 3367 if (WARN_ON(!hive)) 3368 return; 3369 3370 /* 3371 * Use task barrier to synchronize all xgmi reset works across the 3372 * hive. task_barrier_enter and task_barrier_exit will block 3373 * until all the threads running the xgmi reset works reach 3374 * those points. task_barrier_full will do both blocks. 3375 */ 3376 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 3377 3378 task_barrier_enter(&hive->tb); 3379 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); 3380 3381 if (adev->asic_reset_res) 3382 goto fail; 3383 3384 task_barrier_exit(&hive->tb); 3385 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); 3386 3387 if (adev->asic_reset_res) 3388 goto fail; 3389 3390 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && 3391 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 3392 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); 3393 } else { 3394 3395 task_barrier_full(&hive->tb); 3396 adev->asic_reset_res = amdgpu_asic_reset(adev); 3397 } 3398 3399 fail: 3400 if (adev->asic_reset_res) 3401 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 3402 adev->asic_reset_res, adev_to_drm(adev)->unique); 3403 amdgpu_put_xgmi_hive(hive); 3404 } 3405 3406 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 3407 { 3408 char *input = amdgpu_lockup_timeout; 3409 char *timeout_setting = NULL; 3410 int index = 0; 3411 long timeout; 3412 int ret = 0; 3413 3414 /* 3415 * By default timeout for non compute jobs is 10000 3416 * and 60000 for compute jobs. 3417 * In SR-IOV or passthrough mode, timeout for compute 3418 * jobs are 60000 by default. 3419 */ 3420 adev->gfx_timeout = msecs_to_jiffies(10000); 3421 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3422 if (amdgpu_sriov_vf(adev)) 3423 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? 3424 msecs_to_jiffies(60000) : msecs_to_jiffies(10000); 3425 else 3426 adev->compute_timeout = msecs_to_jiffies(60000); 3427 3428 #ifdef notyet 3429 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3430 while ((timeout_setting = strsep(&input, ",")) && 3431 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3432 ret = kstrtol(timeout_setting, 0, &timeout); 3433 if (ret) 3434 return ret; 3435 3436 if (timeout == 0) { 3437 index++; 3438 continue; 3439 } else if (timeout < 0) { 3440 timeout = MAX_SCHEDULE_TIMEOUT; 3441 dev_warn(adev->dev, "lockup timeout disabled"); 3442 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 3443 } else { 3444 timeout = msecs_to_jiffies(timeout); 3445 } 3446 3447 switch (index++) { 3448 case 0: 3449 adev->gfx_timeout = timeout; 3450 break; 3451 case 1: 3452 adev->compute_timeout = timeout; 3453 break; 3454 case 2: 3455 adev->sdma_timeout = timeout; 3456 break; 3457 case 3: 3458 adev->video_timeout = timeout; 3459 break; 3460 default: 3461 break; 3462 } 3463 } 3464 /* 3465 * There is only one value specified and 3466 * it should apply to all non-compute jobs. 3467 */ 3468 if (index == 1) { 3469 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3470 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 3471 adev->compute_timeout = adev->gfx_timeout; 3472 } 3473 } 3474 #endif 3475 3476 return ret; 3477 } 3478 3479 /** 3480 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU 3481 * 3482 * @adev: amdgpu_device pointer 3483 * 3484 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode 3485 */ 3486 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) 3487 { 3488 #ifdef notyet 3489 struct iommu_domain *domain; 3490 3491 domain = iommu_get_domain_for_dev(adev->dev); 3492 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) 3493 #endif 3494 adev->ram_is_direct_mapped = true; 3495 } 3496 3497 static const struct attribute *amdgpu_dev_attributes[] = { 3498 &dev_attr_pcie_replay_count.attr, 3499 NULL 3500 }; 3501 3502 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) 3503 { 3504 if (amdgpu_mcbp == 1) 3505 adev->gfx.mcbp = true; 3506 else if (amdgpu_mcbp == 0) 3507 adev->gfx.mcbp = false; 3508 3509 if (amdgpu_sriov_vf(adev)) 3510 adev->gfx.mcbp = true; 3511 3512 if (adev->gfx.mcbp) 3513 DRM_INFO("MCBP is enabled\n"); 3514 } 3515 3516 /** 3517 * amdgpu_device_init - initialize the driver 3518 * 3519 * @adev: amdgpu_device pointer 3520 * @flags: driver flags 3521 * 3522 * Initializes the driver info and hw (all asics). 3523 * Returns 0 for success or an error on failure. 3524 * Called at driver startup. 3525 */ 3526 int amdgpu_device_init(struct amdgpu_device *adev, 3527 uint32_t flags) 3528 { 3529 struct drm_device *ddev = adev_to_drm(adev); 3530 struct pci_dev *pdev = adev->pdev; 3531 int r, i; 3532 bool px = false; 3533 u32 max_MBps; 3534 int tmp; 3535 3536 adev->shutdown = false; 3537 adev->flags = flags; 3538 3539 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 3540 adev->asic_type = amdgpu_force_asic_type; 3541 else 3542 adev->asic_type = flags & AMD_ASIC_MASK; 3543 3544 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 3545 if (amdgpu_emu_mode == 1) 3546 adev->usec_timeout *= 10; 3547 adev->gmc.gart_size = 512 * 1024 * 1024; 3548 adev->accel_working = false; 3549 adev->num_rings = 0; 3550 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); 3551 adev->mman.buffer_funcs = NULL; 3552 adev->mman.buffer_funcs_ring = NULL; 3553 adev->vm_manager.vm_pte_funcs = NULL; 3554 adev->vm_manager.vm_pte_num_scheds = 0; 3555 adev->gmc.gmc_funcs = NULL; 3556 adev->harvest_ip_mask = 0x0; 3557 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3558 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 3559 3560 adev->smc_rreg = &amdgpu_invalid_rreg; 3561 adev->smc_wreg = &amdgpu_invalid_wreg; 3562 adev->pcie_rreg = &amdgpu_invalid_rreg; 3563 adev->pcie_wreg = &amdgpu_invalid_wreg; 3564 adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; 3565 adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; 3566 adev->pciep_rreg = &amdgpu_invalid_rreg; 3567 adev->pciep_wreg = &amdgpu_invalid_wreg; 3568 adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 3569 adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 3570 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 3571 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 3572 adev->didt_rreg = &amdgpu_invalid_rreg; 3573 adev->didt_wreg = &amdgpu_invalid_wreg; 3574 adev->gc_cac_rreg = &amdgpu_invalid_rreg; 3575 adev->gc_cac_wreg = &amdgpu_invalid_wreg; 3576 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 3577 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 3578 3579 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 3580 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 3581 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 3582 3583 /* mutex initialization are all done here so we 3584 * can recall function without having locking issues 3585 */ 3586 rw_init(&adev->firmware.mutex, "agfw"); 3587 rw_init(&adev->pm.mutex, "agpm"); 3588 rw_init(&adev->gfx.gpu_clock_mutex, "gfxclk"); 3589 rw_init(&adev->srbm_mutex, "srbm"); 3590 rw_init(&adev->gfx.pipe_reserve_mutex, "pipers"); 3591 rw_init(&adev->gfx.gfx_off_mutex, "gfxoff"); 3592 rw_init(&adev->gfx.partition_mutex, "gfxpar"); 3593 rw_init(&adev->grbm_idx_mutex, "grbmidx"); 3594 rw_init(&adev->mn_lock, "agpumn"); 3595 rw_init(&adev->virt.vf_errors.lock, "vferr"); 3596 rw_init(&adev->virt.rlcg_reg_lock, "vrlcg"); 3597 hash_init(adev->mn_hash); 3598 rw_init(&adev->psp.mutex, "agpsp"); 3599 rw_init(&adev->notifier_lock, "agnf"); 3600 rw_init(&adev->pm.stable_pstate_ctx_lock, "agps"); 3601 rw_init(&adev->benchmark_mutex, "agbm"); 3602 3603 amdgpu_device_init_apu_flags(adev); 3604 3605 r = amdgpu_device_check_arguments(adev); 3606 if (r) 3607 return r; 3608 3609 mtx_init(&adev->mmio_idx_lock, IPL_TTY); 3610 mtx_init(&adev->smc_idx_lock, IPL_TTY); 3611 mtx_init(&adev->pcie_idx_lock, IPL_TTY); 3612 mtx_init(&adev->uvd_ctx_idx_lock, IPL_TTY); 3613 mtx_init(&adev->didt_idx_lock, IPL_TTY); 3614 mtx_init(&adev->gc_cac_idx_lock, IPL_TTY); 3615 mtx_init(&adev->se_cac_idx_lock, IPL_TTY); 3616 mtx_init(&adev->audio_endpt_idx_lock, IPL_TTY); 3617 mtx_init(&adev->mm_stats.lock, IPL_NONE); 3618 3619 INIT_LIST_HEAD(&adev->shadow_list); 3620 rw_init(&adev->shadow_list_lock, "sdwlst"); 3621 3622 INIT_LIST_HEAD(&adev->reset_list); 3623 3624 INIT_LIST_HEAD(&adev->ras_list); 3625 3626 INIT_DELAYED_WORK(&adev->delayed_init_work, 3627 amdgpu_device_delayed_init_work_handler); 3628 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3629 amdgpu_device_delay_enable_gfx_off); 3630 3631 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3632 3633 adev->gfx.gfx_off_req_count = 1; 3634 adev->gfx.gfx_off_residency = 0; 3635 adev->gfx.gfx_off_entrycount = 0; 3636 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3637 3638 atomic_set(&adev->throttling_logging_enabled, 1); 3639 /* 3640 * If throttling continues, logging will be performed every minute 3641 * to avoid log flooding. "-1" is subtracted since the thermal 3642 * throttling interrupt comes every second. Thus, the total logging 3643 * interval is 59 seconds(retelimited printk interval) + 1(waiting 3644 * for throttling interrupt) = 60 seconds. 3645 */ 3646 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); 3647 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); 3648 3649 #ifdef __linux__ 3650 /* Registers mapping */ 3651 /* TODO: block userspace mapping of io register */ 3652 if (adev->asic_type >= CHIP_BONAIRE) { 3653 adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3654 adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3655 } else { 3656 adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3657 adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3658 } 3659 #endif 3660 3661 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++) 3662 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN); 3663 3664 #ifdef __linux__ 3665 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3666 if (!adev->rmmio) 3667 return -ENOMEM; 3668 #endif 3669 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3670 DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); 3671 3672 /* 3673 * Reset domain needs to be present early, before XGMI hive discovered 3674 * (if any) and intitialized to use reset sem and in_gpu reset flag 3675 * early on during init and before calling to RREG32. 3676 */ 3677 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); 3678 if (!adev->reset_domain) 3679 return -ENOMEM; 3680 3681 /* detect hw virtualization here */ 3682 amdgpu_detect_virtualization(adev); 3683 3684 amdgpu_device_get_pcie_info(adev); 3685 3686 r = amdgpu_device_get_job_timeout_settings(adev); 3687 if (r) { 3688 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 3689 return r; 3690 } 3691 3692 /* early init functions */ 3693 r = amdgpu_device_ip_early_init(adev); 3694 if (r) 3695 return r; 3696 3697 amdgpu_device_set_mcbp(adev); 3698 3699 /* Get rid of things like offb */ 3700 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); 3701 if (r) 3702 return r; 3703 3704 /* Enable TMZ based on IP_VERSION */ 3705 amdgpu_gmc_tmz_set(adev); 3706 3707 amdgpu_gmc_noretry_set(adev); 3708 /* Need to get xgmi info early to decide the reset behavior*/ 3709 if (adev->gmc.xgmi.supported) { 3710 r = adev->gfxhub.funcs->get_xgmi_info(adev); 3711 if (r) 3712 return r; 3713 } 3714 3715 /* enable PCIE atomic ops */ 3716 #ifdef notyet 3717 if (amdgpu_sriov_vf(adev)) { 3718 if (adev->virt.fw_reserve.p_pf2vf) 3719 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) 3720 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == 3721 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3722 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a 3723 * internal path natively support atomics, set have_atomics_support to true. 3724 */ 3725 } else if ((adev->flags & AMD_IS_APU) && 3726 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) { 3727 adev->have_atomics_support = true; 3728 } else { 3729 adev->have_atomics_support = 3730 !pci_enable_atomic_ops_to_root(adev->pdev, 3731 PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 3732 PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3733 } 3734 3735 if (!adev->have_atomics_support) 3736 dev_info(adev->dev, "PCIE atomic ops is not supported\n"); 3737 #else 3738 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a 3739 * internal path natively support atomics, set have_atomics_support to true. 3740 */ 3741 if ((adev->flags & AMD_IS_APU) && 3742 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) 3743 adev->have_atomics_support = true; 3744 else 3745 adev->have_atomics_support = false; 3746 #endif 3747 3748 /* doorbell bar mapping and doorbell index init*/ 3749 amdgpu_doorbell_init(adev); 3750 3751 if (amdgpu_emu_mode == 1) { 3752 /* post the asic on emulation mode */ 3753 emu_soc_asic_init(adev); 3754 goto fence_driver_init; 3755 } 3756 3757 amdgpu_reset_init(adev); 3758 3759 /* detect if we are with an SRIOV vbios */ 3760 if (adev->bios) 3761 amdgpu_device_detect_sriov_bios(adev); 3762 3763 /* check if we need to reset the asic 3764 * E.g., driver was not cleanly unloaded previously, etc. 3765 */ 3766 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 3767 if (adev->gmc.xgmi.num_physical_nodes) { 3768 dev_info(adev->dev, "Pending hive reset.\n"); 3769 adev->gmc.xgmi.pending_reset = true; 3770 /* Only need to init necessary block for SMU to handle the reset */ 3771 for (i = 0; i < adev->num_ip_blocks; i++) { 3772 if (!adev->ip_blocks[i].status.valid) 3773 continue; 3774 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3775 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3776 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3777 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { 3778 DRM_DEBUG("IP %s disabled for hw_init.\n", 3779 adev->ip_blocks[i].version->funcs->name); 3780 adev->ip_blocks[i].status.hw = true; 3781 } 3782 } 3783 } else { 3784 tmp = amdgpu_reset_method; 3785 /* It should do a default reset when loading or reloading the driver, 3786 * regardless of the module parameter reset_method. 3787 */ 3788 amdgpu_reset_method = AMD_RESET_METHOD_NONE; 3789 r = amdgpu_asic_reset(adev); 3790 amdgpu_reset_method = tmp; 3791 if (r) { 3792 dev_err(adev->dev, "asic reset on init failed\n"); 3793 goto failed; 3794 } 3795 } 3796 } 3797 3798 /* Post card if necessary */ 3799 if (amdgpu_device_need_post(adev)) { 3800 if (!adev->bios) { 3801 dev_err(adev->dev, "no vBIOS found\n"); 3802 r = -EINVAL; 3803 goto failed; 3804 } 3805 DRM_INFO("GPU posting now...\n"); 3806 r = amdgpu_device_asic_init(adev); 3807 if (r) { 3808 dev_err(adev->dev, "gpu post error!\n"); 3809 goto failed; 3810 } 3811 } 3812 3813 if (adev->bios) { 3814 if (adev->is_atom_fw) { 3815 /* Initialize clocks */ 3816 r = amdgpu_atomfirmware_get_clock_info(adev); 3817 if (r) { 3818 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3819 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3820 goto failed; 3821 } 3822 } else { 3823 /* Initialize clocks */ 3824 r = amdgpu_atombios_get_clock_info(adev); 3825 if (r) { 3826 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3827 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3828 goto failed; 3829 } 3830 /* init i2c buses */ 3831 if (!amdgpu_device_has_dc_support(adev)) 3832 amdgpu_atombios_i2c_init(adev); 3833 } 3834 } 3835 3836 fence_driver_init: 3837 /* Fence driver */ 3838 r = amdgpu_fence_driver_sw_init(adev); 3839 if (r) { 3840 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); 3841 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3842 goto failed; 3843 } 3844 3845 /* init the mode config */ 3846 drm_mode_config_init(adev_to_drm(adev)); 3847 3848 r = amdgpu_device_ip_init(adev); 3849 if (r) { 3850 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3851 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 3852 goto release_ras_con; 3853 } 3854 3855 amdgpu_fence_driver_hw_init(adev); 3856 3857 dev_info(adev->dev, 3858 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3859 adev->gfx.config.max_shader_engines, 3860 adev->gfx.config.max_sh_per_se, 3861 adev->gfx.config.max_cu_per_sh, 3862 adev->gfx.cu_info.number); 3863 3864 #ifdef __OpenBSD__ 3865 { 3866 const char *chip_name; 3867 uint32_t version = adev->ip_versions[GC_HWIP][0]; 3868 int maj, min, rev; 3869 3870 switch (adev->asic_type) { 3871 case CHIP_RAVEN: 3872 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 3873 chip_name = "RAVEN2"; 3874 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 3875 chip_name = "PICASSO"; 3876 else 3877 chip_name = "RAVEN"; 3878 break; 3879 case CHIP_RENOIR: 3880 if (adev->apu_flags & AMD_APU_IS_RENOIR) 3881 chip_name = "RENOIR"; 3882 else 3883 chip_name = "GREEN_SARDINE"; 3884 break; 3885 default: 3886 chip_name = amdgpu_asic_name[adev->asic_type]; 3887 } 3888 3889 printf("%s: %s", adev->self.dv_xname, chip_name); 3890 /* show graphics/compute ip block version, not set on < GFX9 */ 3891 if (version) { 3892 maj = IP_VERSION_MAJ(version); 3893 min = IP_VERSION_MIN(version); 3894 rev = IP_VERSION_REV(version); 3895 printf(" GC %d.%d.%d", maj, min, rev); 3896 } 3897 printf(" %d CU rev 0x%02x\n", adev->gfx.cu_info.number, adev->rev_id); 3898 } 3899 #endif 3900 3901 adev->accel_working = true; 3902 3903 amdgpu_vm_check_compute_bug(adev); 3904 3905 /* Initialize the buffer migration limit. */ 3906 if (amdgpu_moverate >= 0) 3907 max_MBps = amdgpu_moverate; 3908 else 3909 max_MBps = 8; /* Allow 8 MB/s. */ 3910 /* Get a log2 for easy divisions. */ 3911 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3912 3913 r = amdgpu_atombios_sysfs_init(adev); 3914 if (r) 3915 drm_err(&adev->ddev, 3916 "registering atombios sysfs failed (%d).\n", r); 3917 3918 r = amdgpu_pm_sysfs_init(adev); 3919 if (r) 3920 DRM_ERROR("registering pm sysfs failed (%d).\n", r); 3921 3922 r = amdgpu_ucode_sysfs_init(adev); 3923 if (r) { 3924 adev->ucode_sysfs_en = false; 3925 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3926 } else 3927 adev->ucode_sysfs_en = true; 3928 3929 /* 3930 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3931 * Otherwise the mgpu fan boost feature will be skipped due to the 3932 * gpu instance is counted less. 3933 */ 3934 amdgpu_register_gpu_instance(adev); 3935 3936 /* enable clockgating, etc. after ib tests, etc. since some blocks require 3937 * explicit gating rather than handling it automatically. 3938 */ 3939 if (!adev->gmc.xgmi.pending_reset) { 3940 r = amdgpu_device_ip_late_init(adev); 3941 if (r) { 3942 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3943 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 3944 goto release_ras_con; 3945 } 3946 /* must succeed. */ 3947 amdgpu_ras_resume(adev); 3948 queue_delayed_work(system_wq, &adev->delayed_init_work, 3949 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3950 } 3951 3952 if (amdgpu_sriov_vf(adev)) { 3953 amdgpu_virt_release_full_gpu(adev, true); 3954 flush_delayed_work(&adev->delayed_init_work); 3955 } 3956 3957 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 3958 if (r) 3959 dev_err(adev->dev, "Could not create amdgpu device attr\n"); 3960 3961 amdgpu_fru_sysfs_init(adev); 3962 3963 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 3964 r = amdgpu_pmu_init(adev); 3965 if (r) 3966 dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 3967 3968 /* Have stored pci confspace at hand for restore in sudden PCI error */ 3969 if (amdgpu_device_cache_pci_state(adev->pdev)) 3970 pci_restore_state(pdev); 3971 3972 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 3973 /* this will fail for cards that aren't VGA class devices, just 3974 * ignore it 3975 */ 3976 #ifdef notyet 3977 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 3978 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); 3979 #endif 3980 3981 px = amdgpu_device_supports_px(ddev); 3982 3983 if (px || (!dev_is_removable(&adev->pdev->dev) && 3984 apple_gmux_detect(NULL, NULL))) 3985 vga_switcheroo_register_client(adev->pdev, 3986 &amdgpu_switcheroo_ops, px); 3987 3988 if (px) 3989 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 3990 3991 if (adev->gmc.xgmi.pending_reset) 3992 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, 3993 msecs_to_jiffies(AMDGPU_RESUME_MS)); 3994 3995 amdgpu_device_check_iommu_direct_map(adev); 3996 3997 return 0; 3998 3999 release_ras_con: 4000 if (amdgpu_sriov_vf(adev)) 4001 amdgpu_virt_release_full_gpu(adev, true); 4002 4003 /* failed in exclusive mode due to timeout */ 4004 if (amdgpu_sriov_vf(adev) && 4005 !amdgpu_sriov_runtime(adev) && 4006 amdgpu_virt_mmio_blocked(adev) && 4007 !amdgpu_virt_wait_reset(adev)) { 4008 dev_err(adev->dev, "VF exclusive mode timeout\n"); 4009 /* Don't send request since VF is inactive. */ 4010 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 4011 adev->virt.ops = NULL; 4012 r = -EAGAIN; 4013 } 4014 amdgpu_release_ras_context(adev); 4015 4016 failed: 4017 amdgpu_vf_error_trans_all(adev); 4018 4019 return r; 4020 } 4021 4022 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) 4023 { 4024 STUB(); 4025 #ifdef notyet 4026 4027 /* Clear all CPU mappings pointing to this device */ 4028 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); 4029 #endif 4030 4031 /* Unmap all mapped bars - Doorbell, registers and VRAM */ 4032 amdgpu_doorbell_fini(adev); 4033 4034 #ifdef __linux__ 4035 iounmap(adev->rmmio); 4036 adev->rmmio = NULL; 4037 if (adev->mman.aper_base_kaddr) 4038 iounmap(adev->mman.aper_base_kaddr); 4039 adev->mman.aper_base_kaddr = NULL; 4040 #else 4041 if (adev->rmmio_size > 0) 4042 bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh, 4043 adev->rmmio_size); 4044 adev->rmmio_size = 0; 4045 adev->rmmio = NULL; 4046 if (adev->mman.aper_base_kaddr) 4047 bus_space_unmap(adev->memt, adev->mman.aper_bsh, 4048 adev->gmc.visible_vram_size); 4049 adev->mman.aper_base_kaddr = NULL; 4050 #endif 4051 4052 /* Memory manager related */ 4053 if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { 4054 #ifdef __linux__ 4055 arch_phys_wc_del(adev->gmc.vram_mtrr); 4056 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); 4057 #else 4058 drm_mtrr_del(0, adev->gmc.aper_base, adev->gmc.aper_size, DRM_MTRR_WC); 4059 #endif 4060 } 4061 } 4062 4063 /** 4064 * amdgpu_device_fini_hw - tear down the driver 4065 * 4066 * @adev: amdgpu_device pointer 4067 * 4068 * Tear down the driver info (all asics). 4069 * Called at driver shutdown. 4070 */ 4071 void amdgpu_device_fini_hw(struct amdgpu_device *adev) 4072 { 4073 dev_info(adev->dev, "amdgpu: finishing device.\n"); 4074 flush_delayed_work(&adev->delayed_init_work); 4075 adev->shutdown = true; 4076 4077 /* make sure IB test finished before entering exclusive mode 4078 * to avoid preemption on IB test 4079 */ 4080 if (amdgpu_sriov_vf(adev)) { 4081 amdgpu_virt_request_full_gpu(adev, false); 4082 amdgpu_virt_fini_data_exchange(adev); 4083 } 4084 4085 /* disable all interrupts */ 4086 amdgpu_irq_disable_all(adev); 4087 if (adev->mode_info.mode_config_initialized) { 4088 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) 4089 drm_helper_force_disable_all(adev_to_drm(adev)); 4090 else 4091 drm_atomic_helper_shutdown(adev_to_drm(adev)); 4092 } 4093 amdgpu_fence_driver_hw_fini(adev); 4094 4095 if (adev->mman.initialized) 4096 drain_workqueue(adev->mman.bdev.wq); 4097 4098 if (adev->pm.sysfs_initialized) 4099 amdgpu_pm_sysfs_fini(adev); 4100 if (adev->ucode_sysfs_en) 4101 amdgpu_ucode_sysfs_fini(adev); 4102 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 4103 amdgpu_fru_sysfs_fini(adev); 4104 4105 /* disable ras feature must before hw fini */ 4106 amdgpu_ras_pre_fini(adev); 4107 4108 amdgpu_device_ip_fini_early(adev); 4109 4110 amdgpu_irq_fini_hw(adev); 4111 4112 if (adev->mman.initialized) 4113 ttm_device_clear_dma_mappings(&adev->mman.bdev); 4114 4115 amdgpu_gart_dummy_page_fini(adev); 4116 4117 if (drm_dev_is_unplugged(adev_to_drm(adev))) 4118 amdgpu_device_unmap_mmio(adev); 4119 4120 } 4121 4122 void amdgpu_device_fini_sw(struct amdgpu_device *adev) 4123 { 4124 int idx; 4125 bool px; 4126 4127 amdgpu_fence_driver_sw_fini(adev); 4128 amdgpu_device_ip_fini(adev); 4129 amdgpu_ucode_release(&adev->firmware.gpu_info_fw); 4130 adev->accel_working = false; 4131 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); 4132 4133 amdgpu_reset_fini(adev); 4134 4135 /* free i2c buses */ 4136 if (!amdgpu_device_has_dc_support(adev)) 4137 amdgpu_i2c_fini(adev); 4138 4139 if (amdgpu_emu_mode != 1) 4140 amdgpu_atombios_fini(adev); 4141 4142 kfree(adev->bios); 4143 adev->bios = NULL; 4144 4145 px = amdgpu_device_supports_px(adev_to_drm(adev)); 4146 4147 if (px || (!dev_is_removable(&adev->pdev->dev) && 4148 apple_gmux_detect(NULL, NULL))) 4149 vga_switcheroo_unregister_client(adev->pdev); 4150 4151 if (px) 4152 vga_switcheroo_fini_domain_pm_ops(adev->dev); 4153 4154 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 4155 vga_client_unregister(adev->pdev); 4156 4157 if (drm_dev_enter(adev_to_drm(adev), &idx)) { 4158 #ifdef __linux__ 4159 iounmap(adev->rmmio); 4160 adev->rmmio = NULL; 4161 #else 4162 if (adev->rmmio_size > 0) 4163 bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh, 4164 adev->rmmio_size); 4165 adev->rmmio_size = 0; 4166 adev->rmmio = NULL; 4167 #endif 4168 amdgpu_doorbell_fini(adev); 4169 drm_dev_exit(idx); 4170 } 4171 4172 if (IS_ENABLED(CONFIG_PERF_EVENTS)) 4173 amdgpu_pmu_fini(adev); 4174 if (adev->mman.discovery_bin) 4175 amdgpu_discovery_fini(adev); 4176 4177 amdgpu_reset_put_reset_domain(adev->reset_domain); 4178 adev->reset_domain = NULL; 4179 4180 kfree(adev->pci_state); 4181 4182 } 4183 4184 /** 4185 * amdgpu_device_evict_resources - evict device resources 4186 * @adev: amdgpu device object 4187 * 4188 * Evicts all ttm device resources(vram BOs, gart table) from the lru list 4189 * of the vram memory type. Mainly used for evicting device resources 4190 * at suspend time. 4191 * 4192 */ 4193 static int amdgpu_device_evict_resources(struct amdgpu_device *adev) 4194 { 4195 int ret; 4196 4197 /* No need to evict vram on APUs for suspend to ram or s2idle */ 4198 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) 4199 return 0; 4200 4201 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); 4202 if (ret) 4203 DRM_WARN("evicting device resources failed\n"); 4204 return ret; 4205 } 4206 4207 /* 4208 * Suspend & resume. 4209 */ 4210 /** 4211 * amdgpu_device_prepare - prepare for device suspend 4212 * 4213 * @dev: drm dev pointer 4214 * 4215 * Prepare to put the hw in the suspend state (all asics). 4216 * Returns 0 for success or an error on failure. 4217 * Called at driver suspend. 4218 */ 4219 int amdgpu_device_prepare(struct drm_device *dev) 4220 { 4221 struct amdgpu_device *adev = drm_to_adev(dev); 4222 int i, r; 4223 4224 amdgpu_choose_low_power_state(adev); 4225 4226 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4227 return 0; 4228 4229 /* Evict the majority of BOs before starting suspend sequence */ 4230 r = amdgpu_device_evict_resources(adev); 4231 if (r) 4232 goto unprepare; 4233 4234 flush_delayed_work(&adev->gfx.gfx_off_delay_work); 4235 4236 for (i = 0; i < adev->num_ip_blocks; i++) { 4237 if (!adev->ip_blocks[i].status.valid) 4238 continue; 4239 if (!adev->ip_blocks[i].version->funcs->prepare_suspend) 4240 continue; 4241 r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); 4242 if (r) 4243 goto unprepare; 4244 } 4245 4246 return 0; 4247 4248 unprepare: 4249 adev->in_s0ix = adev->in_s3 = false; 4250 4251 return r; 4252 } 4253 4254 /** 4255 * amdgpu_device_suspend - initiate device suspend 4256 * 4257 * @dev: drm dev pointer 4258 * @fbcon : notify the fbdev of suspend 4259 * 4260 * Puts the hw in the suspend state (all asics). 4261 * Returns 0 for success or an error on failure. 4262 * Called at driver suspend. 4263 */ 4264 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 4265 { 4266 struct amdgpu_device *adev = drm_to_adev(dev); 4267 int r = 0; 4268 4269 if (adev->shutdown) 4270 return 0; 4271 4272 #ifdef notyet 4273 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4274 return 0; 4275 #endif 4276 4277 adev->in_suspend = true; 4278 4279 if (amdgpu_sriov_vf(adev)) { 4280 amdgpu_virt_fini_data_exchange(adev); 4281 r = amdgpu_virt_request_full_gpu(adev, false); 4282 if (r) 4283 return r; 4284 } 4285 4286 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) 4287 DRM_WARN("smart shift update failed\n"); 4288 4289 if (fbcon) 4290 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); 4291 4292 cancel_delayed_work_sync(&adev->delayed_init_work); 4293 4294 amdgpu_ras_suspend(adev); 4295 4296 amdgpu_device_ip_suspend_phase1(adev); 4297 4298 if (!adev->in_s0ix) 4299 amdgpu_amdkfd_suspend(adev, adev->in_runpm); 4300 4301 r = amdgpu_device_evict_resources(adev); 4302 if (r) 4303 return r; 4304 4305 amdgpu_fence_driver_hw_fini(adev); 4306 4307 amdgpu_device_ip_suspend_phase2(adev); 4308 4309 if (amdgpu_sriov_vf(adev)) 4310 amdgpu_virt_release_full_gpu(adev, false); 4311 4312 return 0; 4313 } 4314 4315 /** 4316 * amdgpu_device_resume - initiate device resume 4317 * 4318 * @dev: drm dev pointer 4319 * @fbcon : notify the fbdev of resume 4320 * 4321 * Bring the hw back to operating state (all asics). 4322 * Returns 0 for success or an error on failure. 4323 * Called at driver resume. 4324 */ 4325 int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 4326 { 4327 struct amdgpu_device *adev = drm_to_adev(dev); 4328 int r = 0; 4329 4330 if (amdgpu_sriov_vf(adev)) { 4331 r = amdgpu_virt_request_full_gpu(adev, true); 4332 if (r) 4333 return r; 4334 } 4335 4336 #ifdef notyet 4337 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4338 return 0; 4339 #endif 4340 4341 if (adev->in_s0ix) 4342 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry); 4343 4344 /* post card */ 4345 if (amdgpu_device_need_post(adev)) { 4346 r = amdgpu_device_asic_init(adev); 4347 if (r) 4348 dev_err(adev->dev, "amdgpu asic init failed\n"); 4349 } 4350 4351 r = amdgpu_device_ip_resume(adev); 4352 4353 if (r) { 4354 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); 4355 goto exit; 4356 } 4357 amdgpu_fence_driver_hw_init(adev); 4358 4359 r = amdgpu_device_ip_late_init(adev); 4360 if (r) 4361 goto exit; 4362 4363 queue_delayed_work(system_wq, &adev->delayed_init_work, 4364 msecs_to_jiffies(AMDGPU_RESUME_MS)); 4365 4366 if (!adev->in_s0ix) { 4367 r = amdgpu_amdkfd_resume(adev, adev->in_runpm); 4368 if (r) 4369 goto exit; 4370 } 4371 4372 exit: 4373 if (amdgpu_sriov_vf(adev)) { 4374 amdgpu_virt_init_data_exchange(adev); 4375 amdgpu_virt_release_full_gpu(adev, true); 4376 } 4377 4378 if (r) 4379 return r; 4380 4381 /* Make sure IB tests flushed */ 4382 flush_delayed_work(&adev->delayed_init_work); 4383 4384 if (fbcon) 4385 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); 4386 4387 amdgpu_ras_resume(adev); 4388 4389 if (adev->mode_info.num_crtc) { 4390 /* 4391 * Most of the connector probing functions try to acquire runtime pm 4392 * refs to ensure that the GPU is powered on when connector polling is 4393 * performed. Since we're calling this from a runtime PM callback, 4394 * trying to acquire rpm refs will cause us to deadlock. 4395 * 4396 * Since we're guaranteed to be holding the rpm lock, it's safe to 4397 * temporarily disable the rpm helpers so this doesn't deadlock us. 4398 */ 4399 #if defined(CONFIG_PM) && defined(__linux__) 4400 dev->dev->power.disable_depth++; 4401 #endif 4402 if (!adev->dc_enabled) 4403 drm_helper_hpd_irq_event(dev); 4404 else 4405 drm_kms_helper_hotplug_event(dev); 4406 #if defined(CONFIG_PM) && defined(__linux__) 4407 dev->dev->power.disable_depth--; 4408 #endif 4409 } 4410 adev->in_suspend = false; 4411 4412 if (adev->enable_mes) 4413 amdgpu_mes_self_test(adev); 4414 4415 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) 4416 DRM_WARN("smart shift update failed\n"); 4417 4418 return 0; 4419 } 4420 4421 /** 4422 * amdgpu_device_ip_check_soft_reset - did soft reset succeed 4423 * 4424 * @adev: amdgpu_device pointer 4425 * 4426 * The list of all the hardware IPs that make up the asic is walked and 4427 * the check_soft_reset callbacks are run. check_soft_reset determines 4428 * if the asic is still hung or not. 4429 * Returns true if any of the IPs are still in a hung state, false if not. 4430 */ 4431 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 4432 { 4433 int i; 4434 bool asic_hang = false; 4435 4436 if (amdgpu_sriov_vf(adev)) 4437 return true; 4438 4439 if (amdgpu_asic_need_full_reset(adev)) 4440 return true; 4441 4442 for (i = 0; i < adev->num_ip_blocks; i++) { 4443 if (!adev->ip_blocks[i].status.valid) 4444 continue; 4445 if (adev->ip_blocks[i].version->funcs->check_soft_reset) 4446 adev->ip_blocks[i].status.hang = 4447 adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 4448 if (adev->ip_blocks[i].status.hang) { 4449 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 4450 asic_hang = true; 4451 } 4452 } 4453 return asic_hang; 4454 } 4455 4456 /** 4457 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 4458 * 4459 * @adev: amdgpu_device pointer 4460 * 4461 * The list of all the hardware IPs that make up the asic is walked and the 4462 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 4463 * handles any IP specific hardware or software state changes that are 4464 * necessary for a soft reset to succeed. 4465 * Returns 0 on success, negative error code on failure. 4466 */ 4467 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 4468 { 4469 int i, r = 0; 4470 4471 for (i = 0; i < adev->num_ip_blocks; i++) { 4472 if (!adev->ip_blocks[i].status.valid) 4473 continue; 4474 if (adev->ip_blocks[i].status.hang && 4475 adev->ip_blocks[i].version->funcs->pre_soft_reset) { 4476 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 4477 if (r) 4478 return r; 4479 } 4480 } 4481 4482 return 0; 4483 } 4484 4485 /** 4486 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 4487 * 4488 * @adev: amdgpu_device pointer 4489 * 4490 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 4491 * reset is necessary to recover. 4492 * Returns true if a full asic reset is required, false if not. 4493 */ 4494 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 4495 { 4496 int i; 4497 4498 if (amdgpu_asic_need_full_reset(adev)) 4499 return true; 4500 4501 for (i = 0; i < adev->num_ip_blocks; i++) { 4502 if (!adev->ip_blocks[i].status.valid) 4503 continue; 4504 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 4505 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 4506 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 4507 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 4508 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 4509 if (adev->ip_blocks[i].status.hang) { 4510 dev_info(adev->dev, "Some block need full reset!\n"); 4511 return true; 4512 } 4513 } 4514 } 4515 return false; 4516 } 4517 4518 /** 4519 * amdgpu_device_ip_soft_reset - do a soft reset 4520 * 4521 * @adev: amdgpu_device pointer 4522 * 4523 * The list of all the hardware IPs that make up the asic is walked and the 4524 * soft_reset callbacks are run if the block is hung. soft_reset handles any 4525 * IP specific hardware or software state changes that are necessary to soft 4526 * reset the IP. 4527 * Returns 0 on success, negative error code on failure. 4528 */ 4529 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 4530 { 4531 int i, r = 0; 4532 4533 for (i = 0; i < adev->num_ip_blocks; i++) { 4534 if (!adev->ip_blocks[i].status.valid) 4535 continue; 4536 if (adev->ip_blocks[i].status.hang && 4537 adev->ip_blocks[i].version->funcs->soft_reset) { 4538 r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 4539 if (r) 4540 return r; 4541 } 4542 } 4543 4544 return 0; 4545 } 4546 4547 /** 4548 * amdgpu_device_ip_post_soft_reset - clean up from soft reset 4549 * 4550 * @adev: amdgpu_device pointer 4551 * 4552 * The list of all the hardware IPs that make up the asic is walked and the 4553 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 4554 * handles any IP specific hardware or software state changes that are 4555 * necessary after the IP has been soft reset. 4556 * Returns 0 on success, negative error code on failure. 4557 */ 4558 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 4559 { 4560 int i, r = 0; 4561 4562 for (i = 0; i < adev->num_ip_blocks; i++) { 4563 if (!adev->ip_blocks[i].status.valid) 4564 continue; 4565 if (adev->ip_blocks[i].status.hang && 4566 adev->ip_blocks[i].version->funcs->post_soft_reset) 4567 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 4568 if (r) 4569 return r; 4570 } 4571 4572 return 0; 4573 } 4574 4575 /** 4576 * amdgpu_device_recover_vram - Recover some VRAM contents 4577 * 4578 * @adev: amdgpu_device pointer 4579 * 4580 * Restores the contents of VRAM buffers from the shadows in GTT. Used to 4581 * restore things like GPUVM page tables after a GPU reset where 4582 * the contents of VRAM might be lost. 4583 * 4584 * Returns: 4585 * 0 on success, negative error code on failure. 4586 */ 4587 static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 4588 { 4589 struct dma_fence *fence = NULL, *next = NULL; 4590 struct amdgpu_bo *shadow; 4591 struct amdgpu_bo_vm *vmbo; 4592 long r = 1, tmo; 4593 4594 if (amdgpu_sriov_runtime(adev)) 4595 tmo = msecs_to_jiffies(8000); 4596 else 4597 tmo = msecs_to_jiffies(100); 4598 4599 dev_info(adev->dev, "recover vram bo from shadow start\n"); 4600 mutex_lock(&adev->shadow_list_lock); 4601 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { 4602 /* If vm is compute context or adev is APU, shadow will be NULL */ 4603 if (!vmbo->shadow) 4604 continue; 4605 shadow = vmbo->shadow; 4606 4607 /* No need to recover an evicted BO */ 4608 if (shadow->tbo.resource->mem_type != TTM_PL_TT || 4609 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || 4610 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) 4611 continue; 4612 4613 r = amdgpu_bo_restore_shadow(shadow, &next); 4614 if (r) 4615 break; 4616 4617 if (fence) { 4618 tmo = dma_fence_wait_timeout(fence, false, tmo); 4619 dma_fence_put(fence); 4620 fence = next; 4621 if (tmo == 0) { 4622 r = -ETIMEDOUT; 4623 break; 4624 } else if (tmo < 0) { 4625 r = tmo; 4626 break; 4627 } 4628 } else { 4629 fence = next; 4630 } 4631 } 4632 mutex_unlock(&adev->shadow_list_lock); 4633 4634 if (fence) 4635 tmo = dma_fence_wait_timeout(fence, false, tmo); 4636 dma_fence_put(fence); 4637 4638 if (r < 0 || tmo <= 0) { 4639 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 4640 return -EIO; 4641 } 4642 4643 dev_info(adev->dev, "recover vram bo from shadow done\n"); 4644 return 0; 4645 } 4646 4647 4648 /** 4649 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 4650 * 4651 * @adev: amdgpu_device pointer 4652 * @from_hypervisor: request from hypervisor 4653 * 4654 * do VF FLR and reinitialize Asic 4655 * return 0 means succeeded otherwise failed 4656 */ 4657 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 4658 bool from_hypervisor) 4659 { 4660 int r; 4661 struct amdgpu_hive_info *hive = NULL; 4662 int retry_limit = 0; 4663 4664 retry: 4665 amdgpu_amdkfd_pre_reset(adev); 4666 4667 if (from_hypervisor) 4668 r = amdgpu_virt_request_full_gpu(adev, true); 4669 else 4670 r = amdgpu_virt_reset_gpu(adev); 4671 if (r) 4672 return r; 4673 amdgpu_irq_gpu_reset_resume_helper(adev); 4674 4675 /* some sw clean up VF needs to do before recover */ 4676 amdgpu_virt_post_reset(adev); 4677 4678 /* Resume IP prior to SMC */ 4679 r = amdgpu_device_ip_reinit_early_sriov(adev); 4680 if (r) 4681 goto error; 4682 4683 amdgpu_virt_init_data_exchange(adev); 4684 4685 r = amdgpu_device_fw_loading(adev); 4686 if (r) 4687 return r; 4688 4689 /* now we are okay to resume SMC/CP/SDMA */ 4690 r = amdgpu_device_ip_reinit_late_sriov(adev); 4691 if (r) 4692 goto error; 4693 4694 hive = amdgpu_get_xgmi_hive(adev); 4695 /* Update PSP FW topology after reset */ 4696 if (hive && adev->gmc.xgmi.num_physical_nodes > 1) 4697 r = amdgpu_xgmi_update_topology(hive, adev); 4698 4699 if (hive) 4700 amdgpu_put_xgmi_hive(hive); 4701 4702 if (!r) { 4703 r = amdgpu_ib_ring_tests(adev); 4704 4705 amdgpu_amdkfd_post_reset(adev); 4706 } 4707 4708 error: 4709 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 4710 amdgpu_inc_vram_lost(adev); 4711 r = amdgpu_device_recover_vram(adev); 4712 } 4713 amdgpu_virt_release_full_gpu(adev, true); 4714 4715 if (AMDGPU_RETRY_SRIOV_RESET(r)) { 4716 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { 4717 retry_limit++; 4718 goto retry; 4719 } else 4720 DRM_ERROR("GPU reset retry is beyond the retry limit\n"); 4721 } 4722 4723 return r; 4724 } 4725 4726 /** 4727 * amdgpu_device_has_job_running - check if there is any job in mirror list 4728 * 4729 * @adev: amdgpu_device pointer 4730 * 4731 * check if there is any job in mirror list 4732 */ 4733 bool amdgpu_device_has_job_running(struct amdgpu_device *adev) 4734 { 4735 int i; 4736 struct drm_sched_job *job; 4737 4738 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4739 struct amdgpu_ring *ring = adev->rings[i]; 4740 4741 if (!ring || !ring->sched.thread) 4742 continue; 4743 4744 spin_lock(&ring->sched.job_list_lock); 4745 job = list_first_entry_or_null(&ring->sched.pending_list, 4746 struct drm_sched_job, list); 4747 spin_unlock(&ring->sched.job_list_lock); 4748 if (job) 4749 return true; 4750 } 4751 return false; 4752 } 4753 4754 /** 4755 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 4756 * 4757 * @adev: amdgpu_device pointer 4758 * 4759 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 4760 * a hung GPU. 4761 */ 4762 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 4763 { 4764 4765 if (amdgpu_gpu_recovery == 0) 4766 goto disabled; 4767 4768 /* Skip soft reset check in fatal error mode */ 4769 if (!amdgpu_ras_is_poison_mode_supported(adev)) 4770 return true; 4771 4772 if (amdgpu_sriov_vf(adev)) 4773 return true; 4774 4775 if (amdgpu_gpu_recovery == -1) { 4776 switch (adev->asic_type) { 4777 #ifdef CONFIG_DRM_AMDGPU_SI 4778 case CHIP_VERDE: 4779 case CHIP_TAHITI: 4780 case CHIP_PITCAIRN: 4781 case CHIP_OLAND: 4782 case CHIP_HAINAN: 4783 #endif 4784 #ifdef CONFIG_DRM_AMDGPU_CIK 4785 case CHIP_KAVERI: 4786 case CHIP_KABINI: 4787 case CHIP_MULLINS: 4788 #endif 4789 case CHIP_CARRIZO: 4790 case CHIP_STONEY: 4791 case CHIP_CYAN_SKILLFISH: 4792 goto disabled; 4793 default: 4794 break; 4795 } 4796 } 4797 4798 return true; 4799 4800 disabled: 4801 dev_info(adev->dev, "GPU recovery disabled.\n"); 4802 return false; 4803 } 4804 4805 int amdgpu_device_mode1_reset(struct amdgpu_device *adev) 4806 { 4807 u32 i; 4808 int ret = 0; 4809 4810 amdgpu_atombios_scratch_regs_engine_hung(adev, true); 4811 4812 dev_info(adev->dev, "GPU mode1 reset\n"); 4813 4814 /* Cache the state before bus master disable. The saved config space 4815 * values are used in other cases like restore after mode-2 reset. 4816 */ 4817 amdgpu_device_cache_pci_state(adev->pdev); 4818 4819 /* disable BM */ 4820 pci_clear_master(adev->pdev); 4821 4822 if (amdgpu_dpm_is_mode1_reset_supported(adev)) { 4823 dev_info(adev->dev, "GPU smu mode1 reset\n"); 4824 ret = amdgpu_dpm_mode1_reset(adev); 4825 } else { 4826 dev_info(adev->dev, "GPU psp mode1 reset\n"); 4827 ret = psp_gpu_reset(adev); 4828 } 4829 4830 if (ret) 4831 goto mode1_reset_failed; 4832 4833 amdgpu_device_load_pci_state(adev->pdev); 4834 ret = amdgpu_psp_wait_for_bootloader(adev); 4835 if (ret) 4836 goto mode1_reset_failed; 4837 4838 /* wait for asic to come out of reset */ 4839 for (i = 0; i < adev->usec_timeout; i++) { 4840 u32 memsize = adev->nbio.funcs->get_memsize(adev); 4841 4842 if (memsize != 0xffffffff) 4843 break; 4844 udelay(1); 4845 } 4846 4847 if (i >= adev->usec_timeout) { 4848 ret = -ETIMEDOUT; 4849 goto mode1_reset_failed; 4850 } 4851 4852 amdgpu_atombios_scratch_regs_engine_hung(adev, false); 4853 4854 return 0; 4855 4856 mode1_reset_failed: 4857 dev_err(adev->dev, "GPU mode1 reset failed\n"); 4858 return ret; 4859 } 4860 4861 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 4862 struct amdgpu_reset_context *reset_context) 4863 { 4864 int i, r = 0; 4865 struct amdgpu_job *job = NULL; 4866 bool need_full_reset = 4867 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4868 4869 if (reset_context->reset_req_dev == adev) 4870 job = reset_context->job; 4871 4872 if (amdgpu_sriov_vf(adev)) { 4873 /* stop the data exchange thread */ 4874 amdgpu_virt_fini_data_exchange(adev); 4875 } 4876 4877 amdgpu_fence_driver_isr_toggle(adev, true); 4878 4879 /* block all schedulers and reset given job's ring */ 4880 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4881 struct amdgpu_ring *ring = adev->rings[i]; 4882 4883 if (!ring || !ring->sched.thread) 4884 continue; 4885 4886 /* Clear job fence from fence drv to avoid force_completion 4887 * leave NULL and vm flush fence in fence drv 4888 */ 4889 amdgpu_fence_driver_clear_job_fences(ring); 4890 4891 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 4892 amdgpu_fence_driver_force_completion(ring); 4893 } 4894 4895 amdgpu_fence_driver_isr_toggle(adev, false); 4896 4897 if (job && job->vm) 4898 drm_sched_increase_karma(&job->base); 4899 4900 r = amdgpu_reset_prepare_hwcontext(adev, reset_context); 4901 /* If reset handler not implemented, continue; otherwise return */ 4902 if (r == -EOPNOTSUPP) 4903 r = 0; 4904 else 4905 return r; 4906 4907 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 4908 if (!amdgpu_sriov_vf(adev)) { 4909 4910 if (!need_full_reset) 4911 need_full_reset = amdgpu_device_ip_need_full_reset(adev); 4912 4913 if (!need_full_reset && amdgpu_gpu_recovery && 4914 amdgpu_device_ip_check_soft_reset(adev)) { 4915 amdgpu_device_ip_pre_soft_reset(adev); 4916 r = amdgpu_device_ip_soft_reset(adev); 4917 amdgpu_device_ip_post_soft_reset(adev); 4918 if (r || amdgpu_device_ip_check_soft_reset(adev)) { 4919 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n"); 4920 need_full_reset = true; 4921 } 4922 } 4923 4924 if (need_full_reset) 4925 r = amdgpu_device_ip_suspend(adev); 4926 if (need_full_reset) 4927 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 4928 else 4929 clear_bit(AMDGPU_NEED_FULL_RESET, 4930 &reset_context->flags); 4931 } 4932 4933 return r; 4934 } 4935 4936 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) 4937 { 4938 int i; 4939 4940 lockdep_assert_held(&adev->reset_domain->sem); 4941 4942 for (i = 0; i < adev->num_regs; i++) { 4943 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); 4944 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], 4945 adev->reset_dump_reg_value[i]); 4946 } 4947 4948 return 0; 4949 } 4950 4951 #ifdef CONFIG_DEV_COREDUMP 4952 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, 4953 size_t count, void *data, size_t datalen) 4954 { 4955 struct drm_printer p; 4956 struct amdgpu_device *adev = data; 4957 struct drm_print_iterator iter; 4958 int i; 4959 4960 iter.data = buffer; 4961 iter.offset = 0; 4962 iter.start = offset; 4963 iter.remain = count; 4964 4965 p = drm_coredump_printer(&iter); 4966 4967 drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); 4968 drm_printf(&p, "kernel: " UTS_RELEASE "\n"); 4969 drm_printf(&p, "module: " KBUILD_MODNAME "\n"); 4970 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); 4971 if (adev->reset_task_info.pid) 4972 drm_printf(&p, "process_name: %s PID: %d\n", 4973 adev->reset_task_info.process_name, 4974 adev->reset_task_info.pid); 4975 4976 if (adev->reset_vram_lost) 4977 drm_printf(&p, "VRAM is lost due to GPU reset!\n"); 4978 if (adev->num_regs) { 4979 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); 4980 4981 for (i = 0; i < adev->num_regs; i++) 4982 drm_printf(&p, "0x%08x: 0x%08x\n", 4983 adev->reset_dump_reg_list[i], 4984 adev->reset_dump_reg_value[i]); 4985 } 4986 4987 return count - iter.remain; 4988 } 4989 4990 static void amdgpu_devcoredump_free(void *data) 4991 { 4992 } 4993 4994 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) 4995 { 4996 struct drm_device *dev = adev_to_drm(adev); 4997 4998 ktime_get_ts64(&adev->reset_time); 4999 dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT, 5000 amdgpu_devcoredump_read, amdgpu_devcoredump_free); 5001 } 5002 #endif 5003 5004 int amdgpu_do_asic_reset(struct list_head *device_list_handle, 5005 struct amdgpu_reset_context *reset_context) 5006 { 5007 struct amdgpu_device *tmp_adev = NULL; 5008 bool need_full_reset, skip_hw_reset, vram_lost = false; 5009 int r = 0; 5010 bool gpu_reset_for_dev_remove = 0; 5011 5012 /* Try reset handler method first */ 5013 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5014 reset_list); 5015 amdgpu_reset_reg_dumps(tmp_adev); 5016 5017 reset_context->reset_device_list = device_list_handle; 5018 r = amdgpu_reset_perform_reset(tmp_adev, reset_context); 5019 /* If reset handler not implemented, continue; otherwise return */ 5020 if (r == -EOPNOTSUPP) 5021 r = 0; 5022 else 5023 return r; 5024 5025 /* Reset handler not implemented, use the default method */ 5026 need_full_reset = 5027 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5028 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); 5029 5030 gpu_reset_for_dev_remove = 5031 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5032 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5033 5034 /* 5035 * ASIC reset has to be done on all XGMI hive nodes ASAP 5036 * to allow proper links negotiation in FW (within 1 sec) 5037 */ 5038 if (!skip_hw_reset && need_full_reset) { 5039 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5040 /* For XGMI run all resets in parallel to speed up the process */ 5041 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 5042 tmp_adev->gmc.xgmi.pending_reset = false; 5043 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 5044 r = -EALREADY; 5045 } else 5046 r = amdgpu_asic_reset(tmp_adev); 5047 5048 if (r) { 5049 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", 5050 r, adev_to_drm(tmp_adev)->unique); 5051 break; 5052 } 5053 } 5054 5055 /* For XGMI wait for all resets to complete before proceed */ 5056 if (!r) { 5057 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5058 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 5059 flush_work(&tmp_adev->xgmi_reset_work); 5060 r = tmp_adev->asic_reset_res; 5061 if (r) 5062 break; 5063 } 5064 } 5065 } 5066 } 5067 5068 if (!r && amdgpu_ras_intr_triggered()) { 5069 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5070 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && 5071 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 5072 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); 5073 } 5074 5075 amdgpu_ras_intr_cleared(); 5076 } 5077 5078 /* Since the mode1 reset affects base ip blocks, the 5079 * phase1 ip blocks need to be resumed. Otherwise there 5080 * will be a BIOS signature error and the psp bootloader 5081 * can't load kdb on the next amdgpu install. 5082 */ 5083 if (gpu_reset_for_dev_remove) { 5084 list_for_each_entry(tmp_adev, device_list_handle, reset_list) 5085 amdgpu_device_ip_resume_phase1(tmp_adev); 5086 5087 goto end; 5088 } 5089 5090 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5091 if (need_full_reset) { 5092 /* post card */ 5093 r = amdgpu_device_asic_init(tmp_adev); 5094 if (r) { 5095 dev_warn(tmp_adev->dev, "asic atom init failed!"); 5096 } else { 5097 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 5098 5099 r = amdgpu_device_ip_resume_phase1(tmp_adev); 5100 if (r) 5101 goto out; 5102 5103 vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 5104 #ifdef CONFIG_DEV_COREDUMP 5105 tmp_adev->reset_vram_lost = vram_lost; 5106 memset(&tmp_adev->reset_task_info, 0, 5107 sizeof(tmp_adev->reset_task_info)); 5108 if (reset_context->job && reset_context->job->vm) 5109 tmp_adev->reset_task_info = 5110 reset_context->job->vm->task_info; 5111 amdgpu_reset_capture_coredumpm(tmp_adev); 5112 #endif 5113 if (vram_lost) { 5114 DRM_INFO("VRAM is lost due to GPU reset!\n"); 5115 amdgpu_inc_vram_lost(tmp_adev); 5116 } 5117 5118 r = amdgpu_device_fw_loading(tmp_adev); 5119 if (r) 5120 return r; 5121 5122 r = amdgpu_device_ip_resume_phase2(tmp_adev); 5123 if (r) 5124 goto out; 5125 5126 if (vram_lost) 5127 amdgpu_device_fill_reset_magic(tmp_adev); 5128 5129 /* 5130 * Add this ASIC as tracked as reset was already 5131 * complete successfully. 5132 */ 5133 amdgpu_register_gpu_instance(tmp_adev); 5134 5135 if (!reset_context->hive && 5136 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 5137 amdgpu_xgmi_add_device(tmp_adev); 5138 5139 r = amdgpu_device_ip_late_init(tmp_adev); 5140 if (r) 5141 goto out; 5142 5143 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); 5144 5145 /* 5146 * The GPU enters bad state once faulty pages 5147 * by ECC has reached the threshold, and ras 5148 * recovery is scheduled next. So add one check 5149 * here to break recovery if it indeed exceeds 5150 * bad page threshold, and remind user to 5151 * retire this GPU or setting one bigger 5152 * bad_page_threshold value to fix this once 5153 * probing driver again. 5154 */ 5155 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { 5156 /* must succeed. */ 5157 amdgpu_ras_resume(tmp_adev); 5158 } else { 5159 r = -EINVAL; 5160 goto out; 5161 } 5162 5163 /* Update PSP FW topology after reset */ 5164 if (reset_context->hive && 5165 tmp_adev->gmc.xgmi.num_physical_nodes > 1) 5166 r = amdgpu_xgmi_update_topology( 5167 reset_context->hive, tmp_adev); 5168 } 5169 } 5170 5171 out: 5172 if (!r) { 5173 amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 5174 r = amdgpu_ib_ring_tests(tmp_adev); 5175 if (r) { 5176 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 5177 need_full_reset = true; 5178 r = -EAGAIN; 5179 goto end; 5180 } 5181 } 5182 5183 if (!r) 5184 r = amdgpu_device_recover_vram(tmp_adev); 5185 else 5186 tmp_adev->asic_reset_res = r; 5187 } 5188 5189 end: 5190 if (need_full_reset) 5191 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5192 else 5193 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5194 return r; 5195 } 5196 5197 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) 5198 { 5199 5200 switch (amdgpu_asic_reset_method(adev)) { 5201 case AMD_RESET_METHOD_MODE1: 5202 adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 5203 break; 5204 case AMD_RESET_METHOD_MODE2: 5205 adev->mp1_state = PP_MP1_STATE_RESET; 5206 break; 5207 default: 5208 adev->mp1_state = PP_MP1_STATE_NONE; 5209 break; 5210 } 5211 5212 pci_dev_put(p); 5213 } 5214 5215 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) 5216 { 5217 amdgpu_vf_error_trans_all(adev); 5218 adev->mp1_state = PP_MP1_STATE_NONE; 5219 } 5220 5221 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 5222 { 5223 STUB(); 5224 #ifdef notyet 5225 struct pci_dev *p = NULL; 5226 5227 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5228 adev->pdev->bus->number, 1); 5229 if (p) { 5230 pm_runtime_enable(&(p->dev)); 5231 pm_runtime_resume(&(p->dev)); 5232 } 5233 #endif 5234 } 5235 5236 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 5237 { 5238 enum amd_reset_method reset_method; 5239 struct pci_dev *p = NULL; 5240 u64 expires; 5241 5242 /* 5243 * For now, only BACO and mode1 reset are confirmed 5244 * to suffer the audio issue without proper suspended. 5245 */ 5246 reset_method = amdgpu_asic_reset_method(adev); 5247 if ((reset_method != AMD_RESET_METHOD_BACO) && 5248 (reset_method != AMD_RESET_METHOD_MODE1)) 5249 return -EINVAL; 5250 5251 STUB(); 5252 return -ENOSYS; 5253 #ifdef notyet 5254 5255 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5256 adev->pdev->bus->number, 1); 5257 if (!p) 5258 return -ENODEV; 5259 5260 expires = pm_runtime_autosuspend_expiration(&(p->dev)); 5261 if (!expires) 5262 /* 5263 * If we cannot get the audio device autosuspend delay, 5264 * a fixed 4S interval will be used. Considering 3S is 5265 * the audio controller default autosuspend delay setting. 5266 * 4S used here is guaranteed to cover that. 5267 */ 5268 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 5269 5270 while (!pm_runtime_status_suspended(&(p->dev))) { 5271 if (!pm_runtime_suspend(&(p->dev))) 5272 break; 5273 5274 if (expires < ktime_get_mono_fast_ns()) { 5275 dev_warn(adev->dev, "failed to suspend display audio\n"); 5276 pci_dev_put(p); 5277 /* TODO: abort the succeeding gpu reset? */ 5278 return -ETIMEDOUT; 5279 } 5280 } 5281 5282 pm_runtime_disable(&(p->dev)); 5283 5284 pci_dev_put(p); 5285 return 0; 5286 #endif 5287 } 5288 5289 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) 5290 { 5291 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 5292 5293 #if defined(CONFIG_DEBUG_FS) 5294 if (!amdgpu_sriov_vf(adev)) 5295 cancel_work(&adev->reset_work); 5296 #endif 5297 5298 if (adev->kfd.dev) 5299 cancel_work(&adev->kfd.reset_work); 5300 5301 if (amdgpu_sriov_vf(adev)) 5302 cancel_work(&adev->virt.flr_work); 5303 5304 if (con && adev->ras_enabled) 5305 cancel_work(&con->recovery_work); 5306 5307 } 5308 5309 /** 5310 * amdgpu_device_gpu_recover - reset the asic and recover scheduler 5311 * 5312 * @adev: amdgpu_device pointer 5313 * @job: which job trigger hang 5314 * @reset_context: amdgpu reset context pointer 5315 * 5316 * Attempt to reset the GPU if it has hung (all asics). 5317 * Attempt to do soft-reset or full-reset and reinitialize Asic 5318 * Returns 0 for success or an error on failure. 5319 */ 5320 5321 int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 5322 struct amdgpu_job *job, 5323 struct amdgpu_reset_context *reset_context) 5324 { 5325 struct list_head device_list, *device_list_handle = NULL; 5326 bool job_signaled = false; 5327 struct amdgpu_hive_info *hive = NULL; 5328 struct amdgpu_device *tmp_adev = NULL; 5329 int i, r = 0; 5330 bool need_emergency_restart = false; 5331 bool audio_suspended = false; 5332 bool gpu_reset_for_dev_remove = false; 5333 5334 gpu_reset_for_dev_remove = 5335 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 5336 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5337 5338 /* 5339 * Special case: RAS triggered and full reset isn't supported 5340 */ 5341 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); 5342 5343 /* 5344 * Flush RAM to disk so that after reboot 5345 * the user can read log and see why the system rebooted. 5346 */ 5347 if (need_emergency_restart && amdgpu_ras_get_context(adev) && 5348 amdgpu_ras_get_context(adev)->reboot) { 5349 DRM_WARN("Emergency reboot."); 5350 5351 #ifdef notyet 5352 ksys_sync_helper(); 5353 emergency_restart(); 5354 #else 5355 panic("emergency_restart"); 5356 #endif 5357 } 5358 5359 dev_info(adev->dev, "GPU %s begin!\n", 5360 need_emergency_restart ? "jobs stop":"reset"); 5361 5362 if (!amdgpu_sriov_vf(adev)) 5363 hive = amdgpu_get_xgmi_hive(adev); 5364 if (hive) 5365 mutex_lock(&hive->hive_lock); 5366 5367 reset_context->job = job; 5368 reset_context->hive = hive; 5369 /* 5370 * Build list of devices to reset. 5371 * In case we are in XGMI hive mode, resort the device list 5372 * to put adev in the 1st position. 5373 */ 5374 INIT_LIST_HEAD(&device_list); 5375 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { 5376 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 5377 list_add_tail(&tmp_adev->reset_list, &device_list); 5378 if (gpu_reset_for_dev_remove && adev->shutdown) 5379 tmp_adev->shutdown = true; 5380 } 5381 if (!list_is_first(&adev->reset_list, &device_list)) 5382 list_rotate_to_front(&adev->reset_list, &device_list); 5383 device_list_handle = &device_list; 5384 } else { 5385 list_add_tail(&adev->reset_list, &device_list); 5386 device_list_handle = &device_list; 5387 } 5388 5389 /* We need to lock reset domain only once both for XGMI and single device */ 5390 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5391 reset_list); 5392 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); 5393 5394 /* block all schedulers and reset given job's ring */ 5395 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5396 5397 amdgpu_device_set_mp1_state(tmp_adev); 5398 5399 /* 5400 * Try to put the audio codec into suspend state 5401 * before gpu reset started. 5402 * 5403 * Due to the power domain of the graphics device 5404 * is shared with AZ power domain. Without this, 5405 * we may change the audio hardware from behind 5406 * the audio driver's back. That will trigger 5407 * some audio codec errors. 5408 */ 5409 if (!amdgpu_device_suspend_display_audio(tmp_adev)) 5410 audio_suspended = true; 5411 5412 amdgpu_ras_set_error_query_ready(tmp_adev, false); 5413 5414 cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 5415 5416 if (!amdgpu_sriov_vf(tmp_adev)) 5417 amdgpu_amdkfd_pre_reset(tmp_adev); 5418 5419 /* 5420 * Mark these ASICs to be reseted as untracked first 5421 * And add them back after reset completed 5422 */ 5423 amdgpu_unregister_gpu_instance(tmp_adev); 5424 5425 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); 5426 5427 /* disable ras on ALL IPs */ 5428 if (!need_emergency_restart && 5429 amdgpu_device_ip_need_full_reset(tmp_adev)) 5430 amdgpu_ras_suspend(tmp_adev); 5431 5432 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5433 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5434 5435 if (!ring || !ring->sched.thread) 5436 continue; 5437 5438 drm_sched_stop(&ring->sched, job ? &job->base : NULL); 5439 5440 if (need_emergency_restart) 5441 amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 5442 } 5443 atomic_inc(&tmp_adev->gpu_reset_counter); 5444 } 5445 5446 if (need_emergency_restart) 5447 goto skip_sched_resume; 5448 5449 /* 5450 * Must check guilty signal here since after this point all old 5451 * HW fences are force signaled. 5452 * 5453 * job->base holds a reference to parent fence 5454 */ 5455 if (job && dma_fence_is_signaled(&job->hw_fence)) { 5456 job_signaled = true; 5457 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 5458 goto skip_hw_reset; 5459 } 5460 5461 retry: /* Rest of adevs pre asic reset from XGMI hive. */ 5462 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5463 if (gpu_reset_for_dev_remove) { 5464 /* Workaroud for ASICs need to disable SMC first */ 5465 amdgpu_device_smu_fini_early(tmp_adev); 5466 } 5467 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); 5468 /*TODO Should we stop ?*/ 5469 if (r) { 5470 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", 5471 r, adev_to_drm(tmp_adev)->unique); 5472 tmp_adev->asic_reset_res = r; 5473 } 5474 5475 /* 5476 * Drop all pending non scheduler resets. Scheduler resets 5477 * were already dropped during drm_sched_stop 5478 */ 5479 amdgpu_device_stop_pending_resets(tmp_adev); 5480 } 5481 5482 /* Actual ASIC resets if needed.*/ 5483 /* Host driver will handle XGMI hive reset for SRIOV */ 5484 if (amdgpu_sriov_vf(adev)) { 5485 r = amdgpu_device_reset_sriov(adev, job ? false : true); 5486 if (r) 5487 adev->asic_reset_res = r; 5488 5489 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ 5490 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || 5491 adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) 5492 amdgpu_ras_resume(adev); 5493 } else { 5494 r = amdgpu_do_asic_reset(device_list_handle, reset_context); 5495 if (r && r == -EAGAIN) 5496 goto retry; 5497 5498 if (!r && gpu_reset_for_dev_remove) 5499 goto recover_end; 5500 } 5501 5502 skip_hw_reset: 5503 5504 /* Post ASIC reset for all devs .*/ 5505 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5506 5507 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5508 struct amdgpu_ring *ring = tmp_adev->rings[i]; 5509 5510 if (!ring || !ring->sched.thread) 5511 continue; 5512 5513 drm_sched_start(&ring->sched, true); 5514 } 5515 5516 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) 5517 amdgpu_mes_self_test(tmp_adev); 5518 5519 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) 5520 drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); 5521 5522 if (tmp_adev->asic_reset_res) 5523 r = tmp_adev->asic_reset_res; 5524 5525 tmp_adev->asic_reset_res = 0; 5526 5527 if (r) { 5528 /* bad news, how to tell it to userspace ? */ 5529 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5530 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 5531 } else { 5532 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5533 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) 5534 DRM_WARN("smart shift update failed\n"); 5535 } 5536 } 5537 5538 skip_sched_resume: 5539 list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5540 /* unlock kfd: SRIOV would do it separately */ 5541 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) 5542 amdgpu_amdkfd_post_reset(tmp_adev); 5543 5544 /* kfd_post_reset will do nothing if kfd device is not initialized, 5545 * need to bring up kfd here if it's not be initialized before 5546 */ 5547 if (!adev->kfd.init_complete) 5548 amdgpu_amdkfd_device_init(adev); 5549 5550 if (audio_suspended) 5551 amdgpu_device_resume_display_audio(tmp_adev); 5552 5553 amdgpu_device_unset_mp1_state(tmp_adev); 5554 5555 amdgpu_ras_set_error_query_ready(tmp_adev, true); 5556 } 5557 5558 recover_end: 5559 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 5560 reset_list); 5561 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); 5562 5563 if (hive) { 5564 mutex_unlock(&hive->hive_lock); 5565 amdgpu_put_xgmi_hive(hive); 5566 } 5567 5568 if (r) 5569 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 5570 5571 atomic_set(&adev->reset_domain->reset_res, r); 5572 return r; 5573 } 5574 5575 /** 5576 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 5577 * 5578 * @adev: amdgpu_device pointer 5579 * 5580 * Fetchs and stores in the driver the PCIE capabilities (gen speed 5581 * and lanes) of the slot the device is in. Handles APUs and 5582 * virtualized environments where PCIE config space may not be available. 5583 */ 5584 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 5585 { 5586 struct pci_dev *pdev; 5587 enum pci_bus_speed speed_cap, platform_speed_cap; 5588 enum pcie_link_width platform_link_width; 5589 5590 if (amdgpu_pcie_gen_cap) 5591 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 5592 5593 if (amdgpu_pcie_lane_cap) 5594 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 5595 5596 /* covers APUs as well */ 5597 if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) { 5598 if (adev->pm.pcie_gen_mask == 0) 5599 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 5600 if (adev->pm.pcie_mlw_mask == 0) 5601 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 5602 return; 5603 } 5604 5605 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 5606 return; 5607 5608 pcie_bandwidth_available(adev->pdev, NULL, 5609 &platform_speed_cap, &platform_link_width); 5610 5611 if (adev->pm.pcie_gen_mask == 0) { 5612 /* asic caps */ 5613 pdev = adev->pdev; 5614 speed_cap = pcie_get_speed_cap(pdev); 5615 if (speed_cap == PCI_SPEED_UNKNOWN) { 5616 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5617 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5618 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5619 } else { 5620 if (speed_cap == PCIE_SPEED_32_0GT) 5621 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5622 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5623 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5624 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5625 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5); 5626 else if (speed_cap == PCIE_SPEED_16_0GT) 5627 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5628 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5629 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5630 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 5631 else if (speed_cap == PCIE_SPEED_8_0GT) 5632 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5633 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5634 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5635 else if (speed_cap == PCIE_SPEED_5_0GT) 5636 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5637 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 5638 else 5639 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 5640 } 5641 /* platform caps */ 5642 if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 5643 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5644 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5645 } else { 5646 if (platform_speed_cap == PCIE_SPEED_32_0GT) 5647 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5648 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5649 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5650 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 | 5651 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5); 5652 else if (platform_speed_cap == PCIE_SPEED_16_0GT) 5653 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5654 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5655 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5656 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 5657 else if (platform_speed_cap == PCIE_SPEED_8_0GT) 5658 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5659 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5660 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 5661 else if (platform_speed_cap == PCIE_SPEED_5_0GT) 5662 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5663 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5664 else 5665 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 5666 5667 } 5668 } 5669 if (adev->pm.pcie_mlw_mask == 0) { 5670 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 5671 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 5672 } else { 5673 switch (platform_link_width) { 5674 case PCIE_LNK_X32: 5675 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 5676 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5677 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5678 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5679 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5680 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5681 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5682 break; 5683 case PCIE_LNK_X16: 5684 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5685 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5686 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5687 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5688 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5689 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5690 break; 5691 case PCIE_LNK_X12: 5692 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5693 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5694 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5695 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5696 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5697 break; 5698 case PCIE_LNK_X8: 5699 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5700 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5701 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5702 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5703 break; 5704 case PCIE_LNK_X4: 5705 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5706 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5707 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5708 break; 5709 case PCIE_LNK_X2: 5710 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5711 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5712 break; 5713 case PCIE_LNK_X1: 5714 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 5715 break; 5716 default: 5717 break; 5718 } 5719 } 5720 } 5721 } 5722 5723 /** 5724 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR 5725 * 5726 * @adev: amdgpu_device pointer 5727 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev 5728 * 5729 * Return true if @peer_adev can access (DMA) @adev through the PCIe 5730 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of 5731 * @peer_adev. 5732 */ 5733 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, 5734 struct amdgpu_device *peer_adev) 5735 { 5736 #ifdef CONFIG_HSA_AMD_P2P 5737 uint64_t address_mask = peer_adev->dev->dma_mask ? 5738 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); 5739 resource_size_t aper_limit = 5740 adev->gmc.aper_base + adev->gmc.aper_size - 1; 5741 bool p2p_access = 5742 !adev->gmc.xgmi.connected_to_cpu && 5743 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); 5744 5745 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && 5746 adev->gmc.real_vram_size == adev->gmc.visible_vram_size && 5747 !(adev->gmc.aper_base & address_mask || 5748 aper_limit & address_mask)); 5749 #else 5750 return false; 5751 #endif 5752 } 5753 5754 int amdgpu_device_baco_enter(struct drm_device *dev) 5755 { 5756 struct amdgpu_device *adev = drm_to_adev(dev); 5757 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5758 5759 if (!amdgpu_device_supports_baco(dev)) 5760 return -ENOTSUPP; 5761 5762 if (ras && adev->ras_enabled && 5763 adev->nbio.funcs->enable_doorbell_interrupt) 5764 adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 5765 5766 return amdgpu_dpm_baco_enter(adev); 5767 } 5768 5769 int amdgpu_device_baco_exit(struct drm_device *dev) 5770 { 5771 struct amdgpu_device *adev = drm_to_adev(dev); 5772 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5773 int ret = 0; 5774 5775 if (!amdgpu_device_supports_baco(dev)) 5776 return -ENOTSUPP; 5777 5778 ret = amdgpu_dpm_baco_exit(adev); 5779 if (ret) 5780 return ret; 5781 5782 if (ras && adev->ras_enabled && 5783 adev->nbio.funcs->enable_doorbell_interrupt) 5784 adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 5785 5786 if (amdgpu_passthrough(adev) && adev->nbio.funcs && 5787 adev->nbio.funcs->clear_doorbell_interrupt) 5788 adev->nbio.funcs->clear_doorbell_interrupt(adev); 5789 5790 return 0; 5791 } 5792 5793 /** 5794 * amdgpu_pci_error_detected - Called when a PCI error is detected. 5795 * @pdev: PCI device struct 5796 * @state: PCI channel state 5797 * 5798 * Description: Called when a PCI error is detected. 5799 * 5800 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. 5801 */ 5802 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) 5803 { 5804 STUB(); 5805 return 0; 5806 #ifdef notyet 5807 struct drm_device *dev = pci_get_drvdata(pdev); 5808 struct amdgpu_device *adev = drm_to_adev(dev); 5809 int i; 5810 5811 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); 5812 5813 if (adev->gmc.xgmi.num_physical_nodes > 1) { 5814 DRM_WARN("No support for XGMI hive yet..."); 5815 return PCI_ERS_RESULT_DISCONNECT; 5816 } 5817 5818 adev->pci_channel_state = state; 5819 5820 switch (state) { 5821 case pci_channel_io_normal: 5822 return PCI_ERS_RESULT_CAN_RECOVER; 5823 /* Fatal error, prepare for slot reset */ 5824 case pci_channel_io_frozen: 5825 /* 5826 * Locking adev->reset_domain->sem will prevent any external access 5827 * to GPU during PCI error recovery 5828 */ 5829 amdgpu_device_lock_reset_domain(adev->reset_domain); 5830 amdgpu_device_set_mp1_state(adev); 5831 5832 /* 5833 * Block any work scheduling as we do for regular GPU reset 5834 * for the duration of the recovery 5835 */ 5836 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5837 struct amdgpu_ring *ring = adev->rings[i]; 5838 5839 if (!ring || !ring->sched.thread) 5840 continue; 5841 5842 drm_sched_stop(&ring->sched, NULL); 5843 } 5844 atomic_inc(&adev->gpu_reset_counter); 5845 return PCI_ERS_RESULT_NEED_RESET; 5846 case pci_channel_io_perm_failure: 5847 /* Permanent error, prepare for device removal */ 5848 return PCI_ERS_RESULT_DISCONNECT; 5849 } 5850 5851 return PCI_ERS_RESULT_NEED_RESET; 5852 #endif 5853 } 5854 5855 /** 5856 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers 5857 * @pdev: pointer to PCI device 5858 */ 5859 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) 5860 { 5861 5862 DRM_INFO("PCI error: mmio enabled callback!!\n"); 5863 5864 /* TODO - dump whatever for debugging purposes */ 5865 5866 /* This called only if amdgpu_pci_error_detected returns 5867 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still 5868 * works, no need to reset slot. 5869 */ 5870 5871 return PCI_ERS_RESULT_RECOVERED; 5872 } 5873 5874 /** 5875 * amdgpu_pci_slot_reset - Called when PCI slot has been reset. 5876 * @pdev: PCI device struct 5877 * 5878 * Description: This routine is called by the pci error recovery 5879 * code after the PCI slot has been reset, just before we 5880 * should resume normal operations. 5881 */ 5882 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) 5883 { 5884 STUB(); 5885 return PCI_ERS_RESULT_RECOVERED; 5886 #ifdef notyet 5887 struct drm_device *dev = pci_get_drvdata(pdev); 5888 struct amdgpu_device *adev = drm_to_adev(dev); 5889 int r, i; 5890 struct amdgpu_reset_context reset_context; 5891 u32 memsize; 5892 struct list_head device_list; 5893 5894 DRM_INFO("PCI error: slot reset callback!!\n"); 5895 5896 memset(&reset_context, 0, sizeof(reset_context)); 5897 5898 INIT_LIST_HEAD(&device_list); 5899 list_add_tail(&adev->reset_list, &device_list); 5900 5901 /* wait for asic to come out of reset */ 5902 drm_msleep(500); 5903 5904 /* Restore PCI confspace */ 5905 amdgpu_device_load_pci_state(pdev); 5906 5907 /* confirm ASIC came out of reset */ 5908 for (i = 0; i < adev->usec_timeout; i++) { 5909 memsize = amdgpu_asic_get_config_memsize(adev); 5910 5911 if (memsize != 0xffffffff) 5912 break; 5913 udelay(1); 5914 } 5915 if (memsize == 0xffffffff) { 5916 r = -ETIME; 5917 goto out; 5918 } 5919 5920 reset_context.method = AMD_RESET_METHOD_NONE; 5921 reset_context.reset_req_dev = adev; 5922 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 5923 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); 5924 5925 adev->no_hw_access = true; 5926 r = amdgpu_device_pre_asic_reset(adev, &reset_context); 5927 adev->no_hw_access = false; 5928 if (r) 5929 goto out; 5930 5931 r = amdgpu_do_asic_reset(&device_list, &reset_context); 5932 5933 out: 5934 if (!r) { 5935 if (amdgpu_device_cache_pci_state(adev->pdev)) 5936 pci_restore_state(adev->pdev); 5937 5938 DRM_INFO("PCIe error recovery succeeded\n"); 5939 } else { 5940 DRM_ERROR("PCIe error recovery failed, err:%d", r); 5941 amdgpu_device_unset_mp1_state(adev); 5942 amdgpu_device_unlock_reset_domain(adev->reset_domain); 5943 } 5944 5945 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 5946 #endif 5947 } 5948 5949 /** 5950 * amdgpu_pci_resume() - resume normal ops after PCI reset 5951 * @pdev: pointer to PCI device 5952 * 5953 * Called when the error recovery driver tells us that its 5954 * OK to resume normal operation. 5955 */ 5956 void amdgpu_pci_resume(struct pci_dev *pdev) 5957 { 5958 STUB(); 5959 #ifdef notyet 5960 struct drm_device *dev = pci_get_drvdata(pdev); 5961 struct amdgpu_device *adev = drm_to_adev(dev); 5962 int i; 5963 5964 5965 DRM_INFO("PCI error: resume callback!!\n"); 5966 5967 /* Only continue execution for the case of pci_channel_io_frozen */ 5968 if (adev->pci_channel_state != pci_channel_io_frozen) 5969 return; 5970 5971 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5972 struct amdgpu_ring *ring = adev->rings[i]; 5973 5974 if (!ring || !ring->sched.thread) 5975 continue; 5976 5977 drm_sched_start(&ring->sched, true); 5978 } 5979 5980 amdgpu_device_unset_mp1_state(adev); 5981 amdgpu_device_unlock_reset_domain(adev->reset_domain); 5982 #endif 5983 } 5984 5985 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) 5986 { 5987 return false; 5988 #ifdef notyet 5989 struct drm_device *dev = pci_get_drvdata(pdev); 5990 struct amdgpu_device *adev = drm_to_adev(dev); 5991 int r; 5992 5993 r = pci_save_state(pdev); 5994 if (!r) { 5995 kfree(adev->pci_state); 5996 5997 adev->pci_state = pci_store_saved_state(pdev); 5998 5999 if (!adev->pci_state) { 6000 DRM_ERROR("Failed to store PCI saved state"); 6001 return false; 6002 } 6003 } else { 6004 DRM_WARN("Failed to save PCI state, err:%d\n", r); 6005 return false; 6006 } 6007 6008 return true; 6009 #endif 6010 } 6011 6012 bool amdgpu_device_load_pci_state(struct pci_dev *pdev) 6013 { 6014 STUB(); 6015 return false; 6016 #ifdef notyet 6017 struct drm_device *dev = pci_get_drvdata(pdev); 6018 struct amdgpu_device *adev = drm_to_adev(dev); 6019 int r; 6020 6021 if (!adev->pci_state) 6022 return false; 6023 6024 r = pci_load_saved_state(pdev, adev->pci_state); 6025 6026 if (!r) { 6027 pci_restore_state(pdev); 6028 } else { 6029 DRM_WARN("Failed to load PCI state, err:%d\n", r); 6030 return false; 6031 } 6032 6033 return true; 6034 #endif 6035 } 6036 6037 void amdgpu_device_flush_hdp(struct amdgpu_device *adev, 6038 struct amdgpu_ring *ring) 6039 { 6040 #ifdef CONFIG_X86_64 6041 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 6042 return; 6043 #endif 6044 if (adev->gmc.xgmi.connected_to_cpu) 6045 return; 6046 6047 if (ring && ring->funcs->emit_hdp_flush) 6048 amdgpu_ring_emit_hdp_flush(ring); 6049 else 6050 amdgpu_asic_flush_hdp(adev, ring); 6051 } 6052 6053 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, 6054 struct amdgpu_ring *ring) 6055 { 6056 #ifdef CONFIG_X86_64 6057 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 6058 return; 6059 #endif 6060 if (adev->gmc.xgmi.connected_to_cpu) 6061 return; 6062 6063 amdgpu_asic_invalidate_hdp(adev, ring); 6064 } 6065 6066 int amdgpu_in_reset(struct amdgpu_device *adev) 6067 { 6068 return atomic_read(&adev->reset_domain->in_gpu_reset); 6069 } 6070 6071 /** 6072 * amdgpu_device_halt() - bring hardware to some kind of halt state 6073 * 6074 * @adev: amdgpu_device pointer 6075 * 6076 * Bring hardware to some kind of halt state so that no one can touch it 6077 * any more. It will help to maintain error context when error occurred. 6078 * Compare to a simple hang, the system will keep stable at least for SSH 6079 * access. Then it should be trivial to inspect the hardware state and 6080 * see what's going on. Implemented as following: 6081 * 6082 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), 6083 * clears all CPU mappings to device, disallows remappings through page faults 6084 * 2. amdgpu_irq_disable_all() disables all interrupts 6085 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences 6086 * 4. set adev->no_hw_access to avoid potential crashes after setp 5 6087 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings 6088 * 6. pci_disable_device() and pci_wait_for_pending_transaction() 6089 * flush any in flight DMA operations 6090 */ 6091 void amdgpu_device_halt(struct amdgpu_device *adev) 6092 { 6093 struct pci_dev *pdev = adev->pdev; 6094 struct drm_device *ddev = adev_to_drm(adev); 6095 6096 amdgpu_xcp_dev_unplug(adev); 6097 drm_dev_unplug(ddev); 6098 6099 amdgpu_irq_disable_all(adev); 6100 6101 amdgpu_fence_driver_hw_fini(adev); 6102 6103 adev->no_hw_access = true; 6104 6105 amdgpu_device_unmap_mmio(adev); 6106 6107 pci_disable_device(pdev); 6108 pci_wait_for_pending_transaction(pdev); 6109 } 6110 6111 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, 6112 u32 reg) 6113 { 6114 unsigned long flags, address, data; 6115 u32 r; 6116 6117 address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 6118 data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 6119 6120 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 6121 WREG32(address, reg * 4); 6122 (void)RREG32(address); 6123 r = RREG32(data); 6124 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 6125 return r; 6126 } 6127 6128 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, 6129 u32 reg, u32 v) 6130 { 6131 unsigned long flags, address, data; 6132 6133 address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 6134 data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 6135 6136 spin_lock_irqsave(&adev->pcie_idx_lock, flags); 6137 WREG32(address, reg * 4); 6138 (void)RREG32(address); 6139 WREG32(data, v); 6140 (void)RREG32(data); 6141 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 6142 } 6143 6144 /** 6145 * amdgpu_device_switch_gang - switch to a new gang 6146 * @adev: amdgpu_device pointer 6147 * @gang: the gang to switch to 6148 * 6149 * Try to switch to a new gang. 6150 * Returns: NULL if we switched to the new gang or a reference to the current 6151 * gang leader. 6152 */ 6153 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, 6154 struct dma_fence *gang) 6155 { 6156 struct dma_fence *old = NULL; 6157 6158 do { 6159 dma_fence_put(old); 6160 rcu_read_lock(); 6161 old = dma_fence_get_rcu_safe(&adev->gang_submit); 6162 rcu_read_unlock(); 6163 6164 if (old == gang) 6165 break; 6166 6167 if (!dma_fence_is_signaled(old)) 6168 return old; 6169 6170 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, 6171 old, gang) != old); 6172 6173 dma_fence_put(old); 6174 return NULL; 6175 } 6176 6177 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) 6178 { 6179 switch (adev->asic_type) { 6180 #ifdef CONFIG_DRM_AMDGPU_SI 6181 case CHIP_HAINAN: 6182 #endif 6183 case CHIP_TOPAZ: 6184 /* chips with no display hardware */ 6185 return false; 6186 #ifdef CONFIG_DRM_AMDGPU_SI 6187 case CHIP_TAHITI: 6188 case CHIP_PITCAIRN: 6189 case CHIP_VERDE: 6190 case CHIP_OLAND: 6191 #endif 6192 #ifdef CONFIG_DRM_AMDGPU_CIK 6193 case CHIP_BONAIRE: 6194 case CHIP_HAWAII: 6195 case CHIP_KAVERI: 6196 case CHIP_KABINI: 6197 case CHIP_MULLINS: 6198 #endif 6199 case CHIP_TONGA: 6200 case CHIP_FIJI: 6201 case CHIP_POLARIS10: 6202 case CHIP_POLARIS11: 6203 case CHIP_POLARIS12: 6204 case CHIP_VEGAM: 6205 case CHIP_CARRIZO: 6206 case CHIP_STONEY: 6207 /* chips with display hardware */ 6208 return true; 6209 default: 6210 /* IP discovery */ 6211 if (!adev->ip_versions[DCE_HWIP][0] || 6212 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 6213 return false; 6214 return true; 6215 } 6216 } 6217 6218 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, 6219 uint32_t inst, uint32_t reg_addr, char reg_name[], 6220 uint32_t expected_value, uint32_t mask) 6221 { 6222 uint32_t ret = 0; 6223 uint32_t old_ = 0; 6224 uint32_t tmp_ = RREG32(reg_addr); 6225 uint32_t loop = adev->usec_timeout; 6226 6227 while ((tmp_ & (mask)) != (expected_value)) { 6228 if (old_ != tmp_) { 6229 loop = adev->usec_timeout; 6230 old_ = tmp_; 6231 } else 6232 udelay(1); 6233 tmp_ = RREG32(reg_addr); 6234 loop--; 6235 if (!loop) { 6236 DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", 6237 inst, reg_name, (uint32_t)expected_value, 6238 (uint32_t)(tmp_ & (mask))); 6239 ret = -ETIMEDOUT; 6240 break; 6241 } 6242 } 6243 return ret; 6244 } 6245