1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2008 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * Copyright 2008 Red Hat Inc. 4fb4d8502Sjsg * Copyright 2009 Jerome Glisse. 5fb4d8502Sjsg * 6fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 7fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 8fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 9fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 11fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 12fb4d8502Sjsg * 13fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 14fb4d8502Sjsg * all copies or substantial portions of the Software. 15fb4d8502Sjsg * 16fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 23fb4d8502Sjsg * 24fb4d8502Sjsg * Authors: Dave Airlie 25fb4d8502Sjsg * Alex Deucher 26fb4d8502Sjsg * Jerome Glisse 27fb4d8502Sjsg */ 28fb4d8502Sjsg #include <linux/power_supply.h> 29fb4d8502Sjsg #include <linux/kthread.h> 30c349dbc7Sjsg #include <linux/module.h> 31fb4d8502Sjsg #include <linux/console.h> 32fb4d8502Sjsg #include <linux/slab.h> 331bb76ff1Sjsg #include <linux/iommu.h> 34f9c49ec7Sjsg #include <linux/pci.h> 351bb76ff1Sjsg #include <linux/devcoredump.h> 361bb76ff1Sjsg #include <generated/utsrelease.h> 371bb76ff1Sjsg #include <linux/pci-p2pdma.h> 3878c2b773Sjsg #include <linux/apple-gmux.h> 39c349dbc7Sjsg 40269b8745Sjsg #include <drm/drm_aperture.h> 41fb4d8502Sjsg #include <drm/drm_atomic_helper.h> 42f005ef32Sjsg #include <drm/drm_crtc_helper.h> 43f005ef32Sjsg #include <drm/drm_fb_helper.h> 44c349dbc7Sjsg #include <drm/drm_probe_helper.h> 45fb4d8502Sjsg #include <drm/amdgpu_drm.h> 46997286d4Sjsg #include <linux/device.h> 47fb4d8502Sjsg #include <linux/vgaarb.h> 48fb4d8502Sjsg #include <linux/vga_switcheroo.h> 49fb4d8502Sjsg #include <linux/efi.h> 50fb4d8502Sjsg #include "amdgpu.h" 51fb4d8502Sjsg #include "amdgpu_trace.h" 52fb4d8502Sjsg #include "amdgpu_i2c.h" 53fb4d8502Sjsg #include "atom.h" 54fb4d8502Sjsg #include "amdgpu_atombios.h" 55fb4d8502Sjsg #include "amdgpu_atomfirmware.h" 56fb4d8502Sjsg #include "amd_pcie.h" 57fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 58fb4d8502Sjsg #include "si.h" 59fb4d8502Sjsg #endif 60fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK 61fb4d8502Sjsg #include "cik.h" 62fb4d8502Sjsg #endif 63fb4d8502Sjsg #include "vi.h" 64fb4d8502Sjsg #include "soc15.h" 65c349dbc7Sjsg #include "nv.h" 66fb4d8502Sjsg #include "bif/bif_4_1_d.h" 67fb4d8502Sjsg #include <linux/firmware.h> 68fb4d8502Sjsg #include "amdgpu_vf_error.h" 69fb4d8502Sjsg 70fb4d8502Sjsg #include "amdgpu_amdkfd.h" 71fb4d8502Sjsg #include "amdgpu_pm.h" 72fb4d8502Sjsg 73c349dbc7Sjsg #include "amdgpu_xgmi.h" 74c349dbc7Sjsg #include "amdgpu_ras.h" 75c349dbc7Sjsg #include "amdgpu_pmu.h" 76ad8b1aafSjsg #include "amdgpu_fru_eeprom.h" 775ca02815Sjsg #include "amdgpu_reset.h" 78c349dbc7Sjsg 79c349dbc7Sjsg #include <linux/suspend.h> 80c349dbc7Sjsg #include <drm/task_barrier.h> 81ad8b1aafSjsg #include <linux/pm_runtime.h> 82c349dbc7Sjsg 835ca02815Sjsg #include <drm/drm_drv.h> 845ca02815Sjsg 85e73b7337Sjsg #if IS_ENABLED(CONFIG_X86) && defined(__linux__) 86e73b7337Sjsg #include <asm/intel-family.h> 87e73b7337Sjsg #endif 88e73b7337Sjsg 89fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); 90fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); 91fb4d8502Sjsg MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); 924fe6e3f4Sjsg MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); 93c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); 94c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); 95c349dbc7Sjsg MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); 96fb4d8502Sjsg 97fb4d8502Sjsg #define AMDGPU_RESUME_MS 2000 981bb76ff1Sjsg #define AMDGPU_MAX_RETRY_LIMIT 2 991bb76ff1Sjsg #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) 100fb4d8502Sjsg 101269b8745Sjsg static const struct drm_driver amdgpu_kms_driver; 102269b8745Sjsg 103c349dbc7Sjsg const char *amdgpu_asic_name[] = { 104fb4d8502Sjsg "TAHITI", 105fb4d8502Sjsg "PITCAIRN", 106fb4d8502Sjsg "VERDE", 107fb4d8502Sjsg "OLAND", 108fb4d8502Sjsg "HAINAN", 109fb4d8502Sjsg "BONAIRE", 110fb4d8502Sjsg "KAVERI", 111fb4d8502Sjsg "KABINI", 112fb4d8502Sjsg "HAWAII", 113fb4d8502Sjsg "MULLINS", 114fb4d8502Sjsg "TOPAZ", 115fb4d8502Sjsg "TONGA", 116fb4d8502Sjsg "FIJI", 117fb4d8502Sjsg "CARRIZO", 118fb4d8502Sjsg "STONEY", 119fb4d8502Sjsg "POLARIS10", 120fb4d8502Sjsg "POLARIS11", 121fb4d8502Sjsg "POLARIS12", 122fb4d8502Sjsg "VEGAM", 123fb4d8502Sjsg "VEGA10", 124fb4d8502Sjsg "VEGA12", 125fb4d8502Sjsg "VEGA20", 126fb4d8502Sjsg "RAVEN", 127c349dbc7Sjsg "ARCTURUS", 128c349dbc7Sjsg "RENOIR", 1295ca02815Sjsg "ALDEBARAN", 130c349dbc7Sjsg "NAVI10", 1315ca02815Sjsg "CYAN_SKILLFISH", 132c349dbc7Sjsg "NAVI14", 133c349dbc7Sjsg "NAVI12", 134ad8b1aafSjsg "SIENNA_CICHLID", 135ad8b1aafSjsg "NAVY_FLOUNDER", 1365ca02815Sjsg "VANGOGH", 1375ca02815Sjsg "DIMGREY_CAVEFISH", 1385ca02815Sjsg "BEIGE_GOBY", 1395ca02815Sjsg "YELLOW_CARP", 1401bb76ff1Sjsg "IP DISCOVERY", 141fb4d8502Sjsg "LAST", 142fb4d8502Sjsg }; 143fb4d8502Sjsg 144c349dbc7Sjsg /** 145c349dbc7Sjsg * DOC: pcie_replay_count 146c349dbc7Sjsg * 147c349dbc7Sjsg * The amdgpu driver provides a sysfs API for reporting the total number 148c349dbc7Sjsg * of PCIe replays (NAKs) 149c349dbc7Sjsg * The file pcie_replay_count is used for this and returns the total 150c349dbc7Sjsg * number of replays as a sum of the NAKs generated and NAKs received 151c349dbc7Sjsg */ 152c349dbc7Sjsg 153c349dbc7Sjsg static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, 154c349dbc7Sjsg struct device_attribute *attr, char *buf) 155c349dbc7Sjsg { 156c349dbc7Sjsg struct drm_device *ddev = dev_get_drvdata(dev); 157ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(ddev); 158c349dbc7Sjsg uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); 159c349dbc7Sjsg 1605ca02815Sjsg return sysfs_emit(buf, "%llu\n", cnt); 161c349dbc7Sjsg } 162c349dbc7Sjsg 163f005ef32Sjsg static DEVICE_ATTR(pcie_replay_count, 0444, 164c349dbc7Sjsg amdgpu_device_get_pcie_replay_count, NULL); 165c349dbc7Sjsg 166fb4d8502Sjsg static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); 167fb4d8502Sjsg 168ad8b1aafSjsg 169ad8b1aafSjsg /** 1705ca02815Sjsg * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control 171fb4d8502Sjsg * 172fb4d8502Sjsg * @dev: drm_device pointer 173fb4d8502Sjsg * 1745ca02815Sjsg * Returns true if the device is a dGPU with ATPX power control, 1755ca02815Sjsg * otherwise return false. 1765ca02815Sjsg */ 1775ca02815Sjsg bool amdgpu_device_supports_px(struct drm_device *dev) 1785ca02815Sjsg { 1795ca02815Sjsg struct amdgpu_device *adev = drm_to_adev(dev); 1805ca02815Sjsg 1815ca02815Sjsg if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) 1825ca02815Sjsg return true; 1835ca02815Sjsg return false; 1845ca02815Sjsg } 1855ca02815Sjsg 1865ca02815Sjsg /** 1875ca02815Sjsg * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources 1885ca02815Sjsg * 1895ca02815Sjsg * @dev: drm_device pointer 1905ca02815Sjsg * 1915ca02815Sjsg * Returns true if the device is a dGPU with ACPI power control, 192fb4d8502Sjsg * otherwise return false. 193fb4d8502Sjsg */ 194c349dbc7Sjsg bool amdgpu_device_supports_boco(struct drm_device *dev) 195fb4d8502Sjsg { 196ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 197fb4d8502Sjsg 1985ca02815Sjsg if (adev->has_pr3 || 1995ca02815Sjsg ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) 200fb4d8502Sjsg return true; 201fb4d8502Sjsg return false; 202fb4d8502Sjsg } 203fb4d8502Sjsg 204c349dbc7Sjsg /** 205c349dbc7Sjsg * amdgpu_device_supports_baco - Does the device support BACO 206c349dbc7Sjsg * 207c349dbc7Sjsg * @dev: drm_device pointer 208c349dbc7Sjsg * 209c349dbc7Sjsg * Returns true if the device supporte BACO, 210c349dbc7Sjsg * otherwise return false. 211c349dbc7Sjsg */ 212c349dbc7Sjsg bool amdgpu_device_supports_baco(struct drm_device *dev) 213c349dbc7Sjsg { 214ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 215c349dbc7Sjsg 216c349dbc7Sjsg return amdgpu_asic_supports_baco(adev); 217c349dbc7Sjsg } 218c349dbc7Sjsg 2195ca02815Sjsg /** 2205ca02815Sjsg * amdgpu_device_supports_smart_shift - Is the device dGPU with 2215ca02815Sjsg * smart shift support 2225ca02815Sjsg * 2235ca02815Sjsg * @dev: drm_device pointer 2245ca02815Sjsg * 2255ca02815Sjsg * Returns true if the device is a dGPU with Smart Shift support, 2265ca02815Sjsg * otherwise returns false. 2275ca02815Sjsg */ 2285ca02815Sjsg bool amdgpu_device_supports_smart_shift(struct drm_device *dev) 2295ca02815Sjsg { 2305ca02815Sjsg return (amdgpu_device_supports_boco(dev) && 2315ca02815Sjsg amdgpu_acpi_is_power_shift_control_supported()); 2325ca02815Sjsg } 2335ca02815Sjsg 234ad8b1aafSjsg /* 235ad8b1aafSjsg * VRAM access helper functions 236ad8b1aafSjsg */ 237ad8b1aafSjsg 238c349dbc7Sjsg /** 2395ca02815Sjsg * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA 2405ca02815Sjsg * 2415ca02815Sjsg * @adev: amdgpu_device pointer 2425ca02815Sjsg * @pos: offset of the buffer in vram 2435ca02815Sjsg * @buf: virtual address of the buffer in system memory 2445ca02815Sjsg * @size: read/write size, sizeof(@buf) must > @size 2455ca02815Sjsg * @write: true - write to vram, otherwise - read from vram 2465ca02815Sjsg */ 2475ca02815Sjsg void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, 2485ca02815Sjsg void *buf, size_t size, bool write) 2495ca02815Sjsg { 2505ca02815Sjsg unsigned long flags; 2515ca02815Sjsg uint32_t hi = ~0, tmp = 0; 2525ca02815Sjsg uint32_t *data = buf; 2535ca02815Sjsg uint64_t last; 2545ca02815Sjsg int idx; 2555ca02815Sjsg 2561bb76ff1Sjsg if (!drm_dev_enter(adev_to_drm(adev), &idx)) 2575ca02815Sjsg return; 2585ca02815Sjsg 2595ca02815Sjsg BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); 2605ca02815Sjsg 2615ca02815Sjsg spin_lock_irqsave(&adev->mmio_idx_lock, flags); 2625ca02815Sjsg for (last = pos + size; pos < last; pos += 4) { 2635ca02815Sjsg tmp = pos >> 31; 2645ca02815Sjsg 2655ca02815Sjsg WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); 2665ca02815Sjsg if (tmp != hi) { 2675ca02815Sjsg WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); 2685ca02815Sjsg hi = tmp; 2695ca02815Sjsg } 2705ca02815Sjsg if (write) 2715ca02815Sjsg WREG32_NO_KIQ(mmMM_DATA, *data++); 2725ca02815Sjsg else 2735ca02815Sjsg *data++ = RREG32_NO_KIQ(mmMM_DATA); 2745ca02815Sjsg } 2755ca02815Sjsg 2765ca02815Sjsg spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); 2775ca02815Sjsg drm_dev_exit(idx); 2785ca02815Sjsg } 2795ca02815Sjsg 2805ca02815Sjsg /** 2811bb76ff1Sjsg * amdgpu_device_aper_access - access vram by vram aperature 2825ca02815Sjsg * 2835ca02815Sjsg * @adev: amdgpu_device pointer 2845ca02815Sjsg * @pos: offset of the buffer in vram 2855ca02815Sjsg * @buf: virtual address of the buffer in system memory 2865ca02815Sjsg * @size: read/write size, sizeof(@buf) must > @size 2875ca02815Sjsg * @write: true - write to vram, otherwise - read from vram 2885ca02815Sjsg * 2895ca02815Sjsg * The return value means how many bytes have been transferred. 2905ca02815Sjsg */ 2915ca02815Sjsg size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, 2925ca02815Sjsg void *buf, size_t size, bool write) 2935ca02815Sjsg { 2945ca02815Sjsg #ifdef CONFIG_64BIT 2955ca02815Sjsg void __iomem *addr; 2965ca02815Sjsg size_t count = 0; 2975ca02815Sjsg uint64_t last; 2985ca02815Sjsg 2995ca02815Sjsg if (!adev->mman.aper_base_kaddr) 3005ca02815Sjsg return 0; 3015ca02815Sjsg 3025ca02815Sjsg last = min(pos + size, adev->gmc.visible_vram_size); 3035ca02815Sjsg if (last > pos) { 3045ca02815Sjsg addr = adev->mman.aper_base_kaddr + pos; 3055ca02815Sjsg count = last - pos; 3065ca02815Sjsg 3075ca02815Sjsg if (write) { 3085ca02815Sjsg memcpy_toio(addr, buf, count); 309f005ef32Sjsg /* Make sure HDP write cache flush happens without any reordering 310f005ef32Sjsg * after the system memory contents are sent over PCIe device 311f005ef32Sjsg */ 3125ca02815Sjsg mb(); 3135ca02815Sjsg amdgpu_device_flush_hdp(adev, NULL); 3145ca02815Sjsg } else { 3155ca02815Sjsg amdgpu_device_invalidate_hdp(adev, NULL); 316f005ef32Sjsg /* Make sure HDP read cache is invalidated before issuing a read 317f005ef32Sjsg * to the PCIe device 318f005ef32Sjsg */ 3195ca02815Sjsg mb(); 3205ca02815Sjsg memcpy_fromio(buf, addr, count); 3215ca02815Sjsg } 3225ca02815Sjsg 3235ca02815Sjsg } 3245ca02815Sjsg 3255ca02815Sjsg return count; 3265ca02815Sjsg #else 3275ca02815Sjsg return 0; 3285ca02815Sjsg #endif 3295ca02815Sjsg } 3305ca02815Sjsg 3315ca02815Sjsg /** 332c349dbc7Sjsg * amdgpu_device_vram_access - read/write a buffer in vram 333c349dbc7Sjsg * 334c349dbc7Sjsg * @adev: amdgpu_device pointer 335c349dbc7Sjsg * @pos: offset of the buffer in vram 336c349dbc7Sjsg * @buf: virtual address of the buffer in system memory 337c349dbc7Sjsg * @size: read/write size, sizeof(@buf) must > @size 338c349dbc7Sjsg * @write: true - write to vram, otherwise - read from vram 339c349dbc7Sjsg */ 340c349dbc7Sjsg void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, 3415ca02815Sjsg void *buf, size_t size, bool write) 342c349dbc7Sjsg { 3435ca02815Sjsg size_t count; 344c349dbc7Sjsg 3455ca02815Sjsg /* try to using vram apreature to access vram first */ 3465ca02815Sjsg count = amdgpu_device_aper_access(adev, pos, buf, size, write); 347c349dbc7Sjsg size -= count; 3485ca02815Sjsg if (size) { 3495ca02815Sjsg /* using MM to access rest vram */ 3505ca02815Sjsg pos += count; 3515ca02815Sjsg buf += count; 3525ca02815Sjsg amdgpu_device_mm_access(adev, pos, buf, size, write); 353c349dbc7Sjsg } 354c349dbc7Sjsg } 355c349dbc7Sjsg 356fb4d8502Sjsg /* 357ad8b1aafSjsg * register access helper functions. 358fb4d8502Sjsg */ 3595ca02815Sjsg 3605ca02815Sjsg /* Check if hw access should be skipped because of hotplug or device error */ 3615ca02815Sjsg bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) 3625ca02815Sjsg { 3635ca02815Sjsg if (adev->no_hw_access) 3645ca02815Sjsg return true; 3655ca02815Sjsg 3665ca02815Sjsg #ifdef CONFIG_LOCKDEP 3675ca02815Sjsg /* 3685ca02815Sjsg * This is a bit complicated to understand, so worth a comment. What we assert 3695ca02815Sjsg * here is that the GPU reset is not running on another thread in parallel. 3705ca02815Sjsg * 3715ca02815Sjsg * For this we trylock the read side of the reset semaphore, if that succeeds 3725ca02815Sjsg * we know that the reset is not running in paralell. 3735ca02815Sjsg * 3745ca02815Sjsg * If the trylock fails we assert that we are either already holding the read 3755ca02815Sjsg * side of the lock or are the reset thread itself and hold the write side of 3765ca02815Sjsg * the lock. 3775ca02815Sjsg */ 3785ca02815Sjsg if (in_task()) { 3791bb76ff1Sjsg if (down_read_trylock(&adev->reset_domain->sem)) 3801bb76ff1Sjsg up_read(&adev->reset_domain->sem); 3815ca02815Sjsg else 3821bb76ff1Sjsg lockdep_assert_held(&adev->reset_domain->sem); 3835ca02815Sjsg } 3845ca02815Sjsg #endif 3855ca02815Sjsg return false; 3865ca02815Sjsg } 3875ca02815Sjsg 388fb4d8502Sjsg /** 389ad8b1aafSjsg * amdgpu_device_rreg - read a memory mapped IO or indirect register 390fb4d8502Sjsg * 391fb4d8502Sjsg * @adev: amdgpu_device pointer 392fb4d8502Sjsg * @reg: dword aligned register offset 393fb4d8502Sjsg * @acc_flags: access flags which require special behavior 394fb4d8502Sjsg * 395fb4d8502Sjsg * Returns the 32 bit value from the offset specified. 396fb4d8502Sjsg */ 397ad8b1aafSjsg uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, 398ad8b1aafSjsg uint32_t reg, uint32_t acc_flags) 399fb4d8502Sjsg { 400fb4d8502Sjsg uint32_t ret; 401fb4d8502Sjsg 4025ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 403ad8b1aafSjsg return 0; 404fb4d8502Sjsg 405ad8b1aafSjsg if ((reg * 4) < adev->rmmio_size) { 406ad8b1aafSjsg if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 407ad8b1aafSjsg amdgpu_sriov_runtime(adev) && 4081bb76ff1Sjsg down_read_trylock(&adev->reset_domain->sem)) { 409ad8b1aafSjsg ret = amdgpu_kiq_rreg(adev, reg); 4101bb76ff1Sjsg up_read(&adev->reset_domain->sem); 411ad8b1aafSjsg } else { 412e54dbfe7Sjsg ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); 413fb4d8502Sjsg } 414ad8b1aafSjsg } else { 415ad8b1aafSjsg ret = adev->pcie_rreg(adev, reg * 4); 416ad8b1aafSjsg } 417ad8b1aafSjsg 418ad8b1aafSjsg trace_amdgpu_device_rreg(adev->pdev->device, reg, ret); 419ad8b1aafSjsg 420fb4d8502Sjsg return ret; 421fb4d8502Sjsg } 422fb4d8502Sjsg 423fb4d8502Sjsg /* 424fb4d8502Sjsg * MMIO register read with bytes helper functions 425fb4d8502Sjsg * @offset:bytes offset from MMIO start 426fb4d8502Sjsg */ 427fb4d8502Sjsg 428fb4d8502Sjsg /** 429fb4d8502Sjsg * amdgpu_mm_rreg8 - read a memory mapped IO register 430fb4d8502Sjsg * 431fb4d8502Sjsg * @adev: amdgpu_device pointer 432fb4d8502Sjsg * @offset: byte aligned register offset 433fb4d8502Sjsg * 434fb4d8502Sjsg * Returns the 8 bit value from the offset specified. 435fb4d8502Sjsg */ 436ad8b1aafSjsg uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) 437ad8b1aafSjsg { 4385ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 439ad8b1aafSjsg return 0; 440ad8b1aafSjsg 441fb4d8502Sjsg if (offset < adev->rmmio_size) 442e54dbfe7Sjsg return (readb(adev->rmmio + offset)); 443fb4d8502Sjsg BUG(); 444fb4d8502Sjsg } 445fb4d8502Sjsg 446fb4d8502Sjsg /* 447fb4d8502Sjsg * MMIO register write with bytes helper functions 448fb4d8502Sjsg * @offset:bytes offset from MMIO start 449fb4d8502Sjsg * @value: the value want to be written to the register 450fb4d8502Sjsg */ 451f005ef32Sjsg 452fb4d8502Sjsg /** 453fb4d8502Sjsg * amdgpu_mm_wreg8 - read a memory mapped IO register 454fb4d8502Sjsg * 455fb4d8502Sjsg * @adev: amdgpu_device pointer 456fb4d8502Sjsg * @offset: byte aligned register offset 457fb4d8502Sjsg * @value: 8 bit value to write 458fb4d8502Sjsg * 459fb4d8502Sjsg * Writes the value specified to the offset specified. 460fb4d8502Sjsg */ 461ad8b1aafSjsg void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) 462ad8b1aafSjsg { 4635ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 464ad8b1aafSjsg return; 465ad8b1aafSjsg 466fb4d8502Sjsg if (offset < adev->rmmio_size) 467e54dbfe7Sjsg writeb(value, adev->rmmio + offset); 468fb4d8502Sjsg else 469fb4d8502Sjsg BUG(); 470fb4d8502Sjsg } 471fb4d8502Sjsg 472fb4d8502Sjsg /** 473ad8b1aafSjsg * amdgpu_device_wreg - write to a memory mapped IO or indirect register 474c349dbc7Sjsg * 475c349dbc7Sjsg * @adev: amdgpu_device pointer 476c349dbc7Sjsg * @reg: dword aligned register offset 477c349dbc7Sjsg * @v: 32 bit value to write to the register 478c349dbc7Sjsg * @acc_flags: access flags which require special behavior 479c349dbc7Sjsg * 480c349dbc7Sjsg * Writes the value specified to the offset specified. 481c349dbc7Sjsg */ 482ad8b1aafSjsg void amdgpu_device_wreg(struct amdgpu_device *adev, 483ad8b1aafSjsg uint32_t reg, uint32_t v, 484c349dbc7Sjsg uint32_t acc_flags) 485c349dbc7Sjsg { 4865ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 487ad8b1aafSjsg return; 488ad8b1aafSjsg 489ad8b1aafSjsg if ((reg * 4) < adev->rmmio_size) { 490ad8b1aafSjsg if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && 491ad8b1aafSjsg amdgpu_sriov_runtime(adev) && 4921bb76ff1Sjsg down_read_trylock(&adev->reset_domain->sem)) { 493ad8b1aafSjsg amdgpu_kiq_wreg(adev, reg, v); 4941bb76ff1Sjsg up_read(&adev->reset_domain->sem); 495ad8b1aafSjsg } else { 496ad8b1aafSjsg writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 497ad8b1aafSjsg } 498ad8b1aafSjsg } else { 499ad8b1aafSjsg adev->pcie_wreg(adev, reg * 4, v); 500c349dbc7Sjsg } 501c349dbc7Sjsg 502ad8b1aafSjsg trace_amdgpu_device_wreg(adev->pdev->device, reg, v); 503c349dbc7Sjsg } 504c349dbc7Sjsg 5051bb76ff1Sjsg /** 5061bb76ff1Sjsg * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range 507c349dbc7Sjsg * 5081bb76ff1Sjsg * @adev: amdgpu_device pointer 5091bb76ff1Sjsg * @reg: mmio/rlc register 5101bb76ff1Sjsg * @v: value to write 5111bb76ff1Sjsg * 5121bb76ff1Sjsg * this function is invoked only for the debugfs register access 5131bb76ff1Sjsg */ 514ad8b1aafSjsg void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, 515f005ef32Sjsg uint32_t reg, uint32_t v, 516f005ef32Sjsg uint32_t xcc_id) 517c349dbc7Sjsg { 5185ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 519ad8b1aafSjsg return; 520ad8b1aafSjsg 521c349dbc7Sjsg if (amdgpu_sriov_fullaccess(adev) && 522c349dbc7Sjsg adev->gfx.rlc.funcs && 523c349dbc7Sjsg adev->gfx.rlc.funcs->is_rlcg_access_range) { 524c349dbc7Sjsg if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) 525f005ef32Sjsg return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id); 5261bb76ff1Sjsg } else if ((reg * 4) >= adev->rmmio_size) { 5271bb76ff1Sjsg adev->pcie_wreg(adev, reg * 4, v); 528ad8b1aafSjsg } else { 529ad8b1aafSjsg writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); 530c349dbc7Sjsg } 531c349dbc7Sjsg } 532c349dbc7Sjsg 533c349dbc7Sjsg /** 534ad8b1aafSjsg * amdgpu_device_indirect_rreg - read an indirect register 535ad8b1aafSjsg * 536ad8b1aafSjsg * @adev: amdgpu_device pointer 5375ca02815Sjsg * @reg_addr: indirect register address to read from 538ad8b1aafSjsg * 539ad8b1aafSjsg * Returns the value of indirect register @reg_addr 540ad8b1aafSjsg */ 541ad8b1aafSjsg u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, 542ad8b1aafSjsg u32 reg_addr) 543ad8b1aafSjsg { 544f005ef32Sjsg unsigned long flags, pcie_index, pcie_data; 545ad8b1aafSjsg void __iomem *pcie_index_offset; 546ad8b1aafSjsg void __iomem *pcie_data_offset; 547f005ef32Sjsg u32 r; 548f005ef32Sjsg 549f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 550f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 551ad8b1aafSjsg 552ad8b1aafSjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 553ad8b1aafSjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 554ad8b1aafSjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 555ad8b1aafSjsg 556ad8b1aafSjsg writel(reg_addr, pcie_index_offset); 557ad8b1aafSjsg readl(pcie_index_offset); 558ad8b1aafSjsg r = readl(pcie_data_offset); 559ad8b1aafSjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 560ad8b1aafSjsg 561ad8b1aafSjsg return r; 562ad8b1aafSjsg } 563ad8b1aafSjsg 564f005ef32Sjsg u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, 565f005ef32Sjsg u64 reg_addr) 566f005ef32Sjsg { 567f005ef32Sjsg unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 568f005ef32Sjsg u32 r; 569f005ef32Sjsg void __iomem *pcie_index_offset; 570f005ef32Sjsg void __iomem *pcie_index_hi_offset; 571f005ef32Sjsg void __iomem *pcie_data_offset; 572f005ef32Sjsg 573f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 574f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 575f005ef32Sjsg if (adev->nbio.funcs->get_pcie_index_hi_offset) 576f005ef32Sjsg pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 577f005ef32Sjsg else 578f005ef32Sjsg pcie_index_hi = 0; 579f005ef32Sjsg 580f005ef32Sjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 581f005ef32Sjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 582f005ef32Sjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 583f005ef32Sjsg if (pcie_index_hi != 0) 584f005ef32Sjsg pcie_index_hi_offset = (void __iomem *)adev->rmmio + 585f005ef32Sjsg pcie_index_hi * 4; 586f005ef32Sjsg 587f005ef32Sjsg writel(reg_addr, pcie_index_offset); 588f005ef32Sjsg readl(pcie_index_offset); 589f005ef32Sjsg if (pcie_index_hi != 0) { 590f005ef32Sjsg writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 591f005ef32Sjsg readl(pcie_index_hi_offset); 592f005ef32Sjsg } 593f005ef32Sjsg r = readl(pcie_data_offset); 594f005ef32Sjsg 595f005ef32Sjsg /* clear the high bits */ 596f005ef32Sjsg if (pcie_index_hi != 0) { 597f005ef32Sjsg writel(0, pcie_index_hi_offset); 598f005ef32Sjsg readl(pcie_index_hi_offset); 599f005ef32Sjsg } 600f005ef32Sjsg 601f005ef32Sjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 602f005ef32Sjsg 603f005ef32Sjsg return r; 604f005ef32Sjsg } 605f005ef32Sjsg 606ad8b1aafSjsg /** 607ad8b1aafSjsg * amdgpu_device_indirect_rreg64 - read a 64bits indirect register 608ad8b1aafSjsg * 609ad8b1aafSjsg * @adev: amdgpu_device pointer 6105ca02815Sjsg * @reg_addr: indirect register address to read from 611ad8b1aafSjsg * 612ad8b1aafSjsg * Returns the value of indirect register @reg_addr 613ad8b1aafSjsg */ 614ad8b1aafSjsg u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, 615ad8b1aafSjsg u32 reg_addr) 616ad8b1aafSjsg { 617f005ef32Sjsg unsigned long flags, pcie_index, pcie_data; 618ad8b1aafSjsg void __iomem *pcie_index_offset; 619ad8b1aafSjsg void __iomem *pcie_data_offset; 620f005ef32Sjsg u64 r; 621f005ef32Sjsg 622f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 623f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 624ad8b1aafSjsg 625ad8b1aafSjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 626ad8b1aafSjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 627ad8b1aafSjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 628ad8b1aafSjsg 629ad8b1aafSjsg /* read low 32 bits */ 630ad8b1aafSjsg writel(reg_addr, pcie_index_offset); 631ad8b1aafSjsg readl(pcie_index_offset); 632ad8b1aafSjsg r = readl(pcie_data_offset); 633ad8b1aafSjsg /* read high 32 bits */ 634ad8b1aafSjsg writel(reg_addr + 4, pcie_index_offset); 635ad8b1aafSjsg readl(pcie_index_offset); 636ad8b1aafSjsg r |= ((u64)readl(pcie_data_offset) << 32); 637ad8b1aafSjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 638ad8b1aafSjsg 639ad8b1aafSjsg return r; 640ad8b1aafSjsg } 641ad8b1aafSjsg 642ad8b1aafSjsg /** 643ad8b1aafSjsg * amdgpu_device_indirect_wreg - write an indirect register address 644ad8b1aafSjsg * 645ad8b1aafSjsg * @adev: amdgpu_device pointer 646ad8b1aafSjsg * @reg_addr: indirect register offset 647ad8b1aafSjsg * @reg_data: indirect register data 648ad8b1aafSjsg * 649ad8b1aafSjsg */ 650ad8b1aafSjsg void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, 651ad8b1aafSjsg u32 reg_addr, u32 reg_data) 652ad8b1aafSjsg { 653f005ef32Sjsg unsigned long flags, pcie_index, pcie_data; 654ad8b1aafSjsg void __iomem *pcie_index_offset; 655ad8b1aafSjsg void __iomem *pcie_data_offset; 656ad8b1aafSjsg 657f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 658f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 659f005ef32Sjsg 660ad8b1aafSjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 661ad8b1aafSjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 662ad8b1aafSjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 663ad8b1aafSjsg 664ad8b1aafSjsg writel(reg_addr, pcie_index_offset); 665ad8b1aafSjsg readl(pcie_index_offset); 666ad8b1aafSjsg writel(reg_data, pcie_data_offset); 667ad8b1aafSjsg readl(pcie_data_offset); 668ad8b1aafSjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 669ad8b1aafSjsg } 670ad8b1aafSjsg 671f005ef32Sjsg void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, 672f005ef32Sjsg u64 reg_addr, u32 reg_data) 673f005ef32Sjsg { 674f005ef32Sjsg unsigned long flags, pcie_index, pcie_index_hi, pcie_data; 675f005ef32Sjsg void __iomem *pcie_index_offset; 676f005ef32Sjsg void __iomem *pcie_index_hi_offset; 677f005ef32Sjsg void __iomem *pcie_data_offset; 678f005ef32Sjsg 679f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 680f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 681f005ef32Sjsg if (adev->nbio.funcs->get_pcie_index_hi_offset) 682f005ef32Sjsg pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); 683f005ef32Sjsg else 684f005ef32Sjsg pcie_index_hi = 0; 685f005ef32Sjsg 686f005ef32Sjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 687f005ef32Sjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 688f005ef32Sjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 689f005ef32Sjsg if (pcie_index_hi != 0) 690f005ef32Sjsg pcie_index_hi_offset = (void __iomem *)adev->rmmio + 691f005ef32Sjsg pcie_index_hi * 4; 692f005ef32Sjsg 693f005ef32Sjsg writel(reg_addr, pcie_index_offset); 694f005ef32Sjsg readl(pcie_index_offset); 695f005ef32Sjsg if (pcie_index_hi != 0) { 696f005ef32Sjsg writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); 697f005ef32Sjsg readl(pcie_index_hi_offset); 698f005ef32Sjsg } 699f005ef32Sjsg writel(reg_data, pcie_data_offset); 700f005ef32Sjsg readl(pcie_data_offset); 701f005ef32Sjsg 702f005ef32Sjsg /* clear the high bits */ 703f005ef32Sjsg if (pcie_index_hi != 0) { 704f005ef32Sjsg writel(0, pcie_index_hi_offset); 705f005ef32Sjsg readl(pcie_index_hi_offset); 706f005ef32Sjsg } 707f005ef32Sjsg 708f005ef32Sjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 709f005ef32Sjsg } 710f005ef32Sjsg 711ad8b1aafSjsg /** 712ad8b1aafSjsg * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address 713ad8b1aafSjsg * 714ad8b1aafSjsg * @adev: amdgpu_device pointer 715ad8b1aafSjsg * @reg_addr: indirect register offset 716ad8b1aafSjsg * @reg_data: indirect register data 717ad8b1aafSjsg * 718ad8b1aafSjsg */ 719ad8b1aafSjsg void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, 720ad8b1aafSjsg u32 reg_addr, u64 reg_data) 721ad8b1aafSjsg { 722f005ef32Sjsg unsigned long flags, pcie_index, pcie_data; 723ad8b1aafSjsg void __iomem *pcie_index_offset; 724ad8b1aafSjsg void __iomem *pcie_data_offset; 725ad8b1aafSjsg 726f005ef32Sjsg pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); 727f005ef32Sjsg pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); 728f005ef32Sjsg 729ad8b1aafSjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 730ad8b1aafSjsg pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 731ad8b1aafSjsg pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; 732ad8b1aafSjsg 733ad8b1aafSjsg /* write low 32 bits */ 734ad8b1aafSjsg writel(reg_addr, pcie_index_offset); 735ad8b1aafSjsg readl(pcie_index_offset); 736ad8b1aafSjsg writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); 737ad8b1aafSjsg readl(pcie_data_offset); 738ad8b1aafSjsg /* write high 32 bits */ 739ad8b1aafSjsg writel(reg_addr + 4, pcie_index_offset); 740ad8b1aafSjsg readl(pcie_index_offset); 741ad8b1aafSjsg writel((u32)(reg_data >> 32), pcie_data_offset); 742ad8b1aafSjsg readl(pcie_data_offset); 743ad8b1aafSjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 744ad8b1aafSjsg } 745ad8b1aafSjsg 746ad8b1aafSjsg /** 747f005ef32Sjsg * amdgpu_device_get_rev_id - query device rev_id 748f005ef32Sjsg * 749f005ef32Sjsg * @adev: amdgpu_device pointer 750f005ef32Sjsg * 751f005ef32Sjsg * Return device rev_id 752f005ef32Sjsg */ 753f005ef32Sjsg u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) 754f005ef32Sjsg { 755f005ef32Sjsg return adev->nbio.funcs->get_rev_id(adev); 756f005ef32Sjsg } 757f005ef32Sjsg 758f005ef32Sjsg /** 759fb4d8502Sjsg * amdgpu_invalid_rreg - dummy reg read function 760fb4d8502Sjsg * 761ad8b1aafSjsg * @adev: amdgpu_device pointer 762fb4d8502Sjsg * @reg: offset of register 763fb4d8502Sjsg * 764fb4d8502Sjsg * Dummy register read function. Used for register blocks 765fb4d8502Sjsg * that certain asics don't have (all asics). 766fb4d8502Sjsg * Returns the value in the register. 767fb4d8502Sjsg */ 768fb4d8502Sjsg static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) 769fb4d8502Sjsg { 770fb4d8502Sjsg DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); 771fb4d8502Sjsg BUG(); 772fb4d8502Sjsg return 0; 773fb4d8502Sjsg } 774fb4d8502Sjsg 775f005ef32Sjsg static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) 776f005ef32Sjsg { 777f005ef32Sjsg DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); 778f005ef32Sjsg BUG(); 779f005ef32Sjsg return 0; 780f005ef32Sjsg } 781f005ef32Sjsg 782fb4d8502Sjsg /** 783fb4d8502Sjsg * amdgpu_invalid_wreg - dummy reg write function 784fb4d8502Sjsg * 785ad8b1aafSjsg * @adev: amdgpu_device pointer 786fb4d8502Sjsg * @reg: offset of register 787fb4d8502Sjsg * @v: value to write to the register 788fb4d8502Sjsg * 789fb4d8502Sjsg * Dummy register read function. Used for register blocks 790fb4d8502Sjsg * that certain asics don't have (all asics). 791fb4d8502Sjsg */ 792fb4d8502Sjsg static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 793fb4d8502Sjsg { 794fb4d8502Sjsg DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", 795fb4d8502Sjsg reg, v); 796fb4d8502Sjsg BUG(); 797fb4d8502Sjsg } 798fb4d8502Sjsg 799f005ef32Sjsg static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) 800f005ef32Sjsg { 801f005ef32Sjsg DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", 802f005ef32Sjsg reg, v); 803f005ef32Sjsg BUG(); 804f005ef32Sjsg } 805f005ef32Sjsg 806fb4d8502Sjsg /** 807c349dbc7Sjsg * amdgpu_invalid_rreg64 - dummy 64 bit reg read function 808c349dbc7Sjsg * 809ad8b1aafSjsg * @adev: amdgpu_device pointer 810c349dbc7Sjsg * @reg: offset of register 811c349dbc7Sjsg * 812c349dbc7Sjsg * Dummy register read function. Used for register blocks 813c349dbc7Sjsg * that certain asics don't have (all asics). 814c349dbc7Sjsg * Returns the value in the register. 815c349dbc7Sjsg */ 816c349dbc7Sjsg static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) 817c349dbc7Sjsg { 818c349dbc7Sjsg DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); 819c349dbc7Sjsg BUG(); 820c349dbc7Sjsg return 0; 821c349dbc7Sjsg } 822c349dbc7Sjsg 823c349dbc7Sjsg /** 824c349dbc7Sjsg * amdgpu_invalid_wreg64 - dummy reg write function 825c349dbc7Sjsg * 826ad8b1aafSjsg * @adev: amdgpu_device pointer 827c349dbc7Sjsg * @reg: offset of register 828c349dbc7Sjsg * @v: value to write to the register 829c349dbc7Sjsg * 830c349dbc7Sjsg * Dummy register read function. Used for register blocks 831c349dbc7Sjsg * that certain asics don't have (all asics). 832c349dbc7Sjsg */ 833c349dbc7Sjsg static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) 834c349dbc7Sjsg { 835c349dbc7Sjsg DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", 836c349dbc7Sjsg reg, v); 837c349dbc7Sjsg BUG(); 838c349dbc7Sjsg } 839c349dbc7Sjsg 840c349dbc7Sjsg /** 841fb4d8502Sjsg * amdgpu_block_invalid_rreg - dummy reg read function 842fb4d8502Sjsg * 843ad8b1aafSjsg * @adev: amdgpu_device pointer 844fb4d8502Sjsg * @block: offset of instance 845fb4d8502Sjsg * @reg: offset of register 846fb4d8502Sjsg * 847fb4d8502Sjsg * Dummy register read function. Used for register blocks 848fb4d8502Sjsg * that certain asics don't have (all asics). 849fb4d8502Sjsg * Returns the value in the register. 850fb4d8502Sjsg */ 851fb4d8502Sjsg static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, 852fb4d8502Sjsg uint32_t block, uint32_t reg) 853fb4d8502Sjsg { 854fb4d8502Sjsg DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", 855fb4d8502Sjsg reg, block); 856fb4d8502Sjsg BUG(); 857fb4d8502Sjsg return 0; 858fb4d8502Sjsg } 859fb4d8502Sjsg 860fb4d8502Sjsg /** 861fb4d8502Sjsg * amdgpu_block_invalid_wreg - dummy reg write function 862fb4d8502Sjsg * 863ad8b1aafSjsg * @adev: amdgpu_device pointer 864fb4d8502Sjsg * @block: offset of instance 865fb4d8502Sjsg * @reg: offset of register 866fb4d8502Sjsg * @v: value to write to the register 867fb4d8502Sjsg * 868fb4d8502Sjsg * Dummy register read function. Used for register blocks 869fb4d8502Sjsg * that certain asics don't have (all asics). 870fb4d8502Sjsg */ 871fb4d8502Sjsg static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, 872fb4d8502Sjsg uint32_t block, 873fb4d8502Sjsg uint32_t reg, uint32_t v) 874fb4d8502Sjsg { 875fb4d8502Sjsg DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", 876fb4d8502Sjsg reg, block, v); 877fb4d8502Sjsg BUG(); 878fb4d8502Sjsg } 879fb4d8502Sjsg 880fb4d8502Sjsg /** 881ad8b1aafSjsg * amdgpu_device_asic_init - Wrapper for atom asic_init 882ad8b1aafSjsg * 883ad8b1aafSjsg * @adev: amdgpu_device pointer 884ad8b1aafSjsg * 885ad8b1aafSjsg * Does any asic specific work and then calls atom asic init. 886ad8b1aafSjsg */ 887ad8b1aafSjsg static int amdgpu_device_asic_init(struct amdgpu_device *adev) 888ad8b1aafSjsg { 889f005ef32Sjsg int ret; 890f005ef32Sjsg 891ad8b1aafSjsg amdgpu_asic_pre_asic_init(adev); 892ad8b1aafSjsg 893f005ef32Sjsg if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || 894f005ef32Sjsg adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { 895f005ef32Sjsg amdgpu_psp_wait_for_bootloader(adev); 896f005ef32Sjsg ret = amdgpu_atomfirmware_asic_init(adev, true); 897f005ef32Sjsg return ret; 898f005ef32Sjsg } else { 899ad8b1aafSjsg return amdgpu_atom_asic_init(adev->mode_info.atom_context); 900ad8b1aafSjsg } 901ad8b1aafSjsg 902f005ef32Sjsg return 0; 903f005ef32Sjsg } 904f005ef32Sjsg 905ad8b1aafSjsg /** 906f005ef32Sjsg * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page 907fb4d8502Sjsg * 908ad8b1aafSjsg * @adev: amdgpu_device pointer 909fb4d8502Sjsg * 910fb4d8502Sjsg * Allocates a scratch page of VRAM for use by various things in the 911fb4d8502Sjsg * driver. 912fb4d8502Sjsg */ 913f005ef32Sjsg static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev) 914fb4d8502Sjsg { 915f005ef32Sjsg return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, 916f005ef32Sjsg AMDGPU_GEM_DOMAIN_VRAM | 917f005ef32Sjsg AMDGPU_GEM_DOMAIN_GTT, 918f005ef32Sjsg &adev->mem_scratch.robj, 919f005ef32Sjsg &adev->mem_scratch.gpu_addr, 920f005ef32Sjsg (void **)&adev->mem_scratch.ptr); 921fb4d8502Sjsg } 922fb4d8502Sjsg 923fb4d8502Sjsg /** 924f005ef32Sjsg * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page 925fb4d8502Sjsg * 926ad8b1aafSjsg * @adev: amdgpu_device pointer 927fb4d8502Sjsg * 928fb4d8502Sjsg * Frees the VRAM scratch page. 929fb4d8502Sjsg */ 930f005ef32Sjsg static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) 931fb4d8502Sjsg { 932f005ef32Sjsg amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL); 933fb4d8502Sjsg } 934fb4d8502Sjsg 935fb4d8502Sjsg /** 936fb4d8502Sjsg * amdgpu_device_program_register_sequence - program an array of registers. 937fb4d8502Sjsg * 938fb4d8502Sjsg * @adev: amdgpu_device pointer 939fb4d8502Sjsg * @registers: pointer to the register array 940fb4d8502Sjsg * @array_size: size of the register array 941fb4d8502Sjsg * 942f005ef32Sjsg * Programs an array or registers with and or masks. 943fb4d8502Sjsg * This is a helper for setting golden registers. 944fb4d8502Sjsg */ 945fb4d8502Sjsg void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, 946fb4d8502Sjsg const u32 *registers, 947fb4d8502Sjsg const u32 array_size) 948fb4d8502Sjsg { 949fb4d8502Sjsg u32 tmp, reg, and_mask, or_mask; 950fb4d8502Sjsg int i; 951fb4d8502Sjsg 952fb4d8502Sjsg if (array_size % 3) 953fb4d8502Sjsg return; 954fb4d8502Sjsg 955fb4d8502Sjsg for (i = 0; i < array_size; i += 3) { 956fb4d8502Sjsg reg = registers[i + 0]; 957fb4d8502Sjsg and_mask = registers[i + 1]; 958fb4d8502Sjsg or_mask = registers[i + 2]; 959fb4d8502Sjsg 960fb4d8502Sjsg if (and_mask == 0xffffffff) { 961fb4d8502Sjsg tmp = or_mask; 962fb4d8502Sjsg } else { 963fb4d8502Sjsg tmp = RREG32(reg); 964fb4d8502Sjsg tmp &= ~and_mask; 965c349dbc7Sjsg if (adev->family >= AMDGPU_FAMILY_AI) 966c349dbc7Sjsg tmp |= (or_mask & and_mask); 967c349dbc7Sjsg else 968fb4d8502Sjsg tmp |= or_mask; 969fb4d8502Sjsg } 970fb4d8502Sjsg WREG32(reg, tmp); 971fb4d8502Sjsg } 972fb4d8502Sjsg } 973fb4d8502Sjsg 974fb4d8502Sjsg /** 975fb4d8502Sjsg * amdgpu_device_pci_config_reset - reset the GPU 976fb4d8502Sjsg * 977fb4d8502Sjsg * @adev: amdgpu_device pointer 978fb4d8502Sjsg * 979fb4d8502Sjsg * Resets the GPU using the pci config reset sequence. 980fb4d8502Sjsg * Only applicable to asics prior to vega10. 981fb4d8502Sjsg */ 982fb4d8502Sjsg void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) 983fb4d8502Sjsg { 984fb4d8502Sjsg pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); 985fb4d8502Sjsg } 986fb4d8502Sjsg 9875ca02815Sjsg /** 9885ca02815Sjsg * amdgpu_device_pci_reset - reset the GPU using generic PCI means 9895ca02815Sjsg * 9905ca02815Sjsg * @adev: amdgpu_device pointer 9915ca02815Sjsg * 9925ca02815Sjsg * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). 9935ca02815Sjsg */ 9945ca02815Sjsg int amdgpu_device_pci_reset(struct amdgpu_device *adev) 9955ca02815Sjsg { 9965ca02815Sjsg STUB(); 9975ca02815Sjsg return -ENOSYS; 9985ca02815Sjsg #ifdef notyet 9995ca02815Sjsg return pci_reset_function(adev->pdev); 10005ca02815Sjsg #endif 10015ca02815Sjsg } 10025ca02815Sjsg 1003fb4d8502Sjsg /* 1004fb4d8502Sjsg * amdgpu_device_wb_*() 1005fb4d8502Sjsg * Writeback is the method by which the GPU updates special pages in memory 1006fb4d8502Sjsg * with the status of certain GPU events (fences, ring pointers,etc.). 1007fb4d8502Sjsg */ 1008fb4d8502Sjsg 1009fb4d8502Sjsg /** 1010fb4d8502Sjsg * amdgpu_device_wb_fini - Disable Writeback and free memory 1011fb4d8502Sjsg * 1012fb4d8502Sjsg * @adev: amdgpu_device pointer 1013fb4d8502Sjsg * 1014fb4d8502Sjsg * Disables Writeback and frees the Writeback memory (all asics). 1015fb4d8502Sjsg * Used at driver shutdown. 1016fb4d8502Sjsg */ 1017fb4d8502Sjsg static void amdgpu_device_wb_fini(struct amdgpu_device *adev) 1018fb4d8502Sjsg { 1019fb4d8502Sjsg if (adev->wb.wb_obj) { 1020fb4d8502Sjsg amdgpu_bo_free_kernel(&adev->wb.wb_obj, 1021fb4d8502Sjsg &adev->wb.gpu_addr, 1022fb4d8502Sjsg (void **)&adev->wb.wb); 1023fb4d8502Sjsg adev->wb.wb_obj = NULL; 1024fb4d8502Sjsg } 1025fb4d8502Sjsg } 1026fb4d8502Sjsg 1027fb4d8502Sjsg /** 1028fb4d8502Sjsg * amdgpu_device_wb_init - Init Writeback driver info and allocate memory 1029fb4d8502Sjsg * 1030fb4d8502Sjsg * @adev: amdgpu_device pointer 1031fb4d8502Sjsg * 1032fb4d8502Sjsg * Initializes writeback and allocates writeback memory (all asics). 1033fb4d8502Sjsg * Used at driver startup. 1034fb4d8502Sjsg * Returns 0 on success or an -error on failure. 1035fb4d8502Sjsg */ 1036fb4d8502Sjsg static int amdgpu_device_wb_init(struct amdgpu_device *adev) 1037fb4d8502Sjsg { 1038fb4d8502Sjsg int r; 1039fb4d8502Sjsg 1040fb4d8502Sjsg if (adev->wb.wb_obj == NULL) { 1041fb4d8502Sjsg /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ 1042fb4d8502Sjsg r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, 1043fb4d8502Sjsg PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1044fb4d8502Sjsg &adev->wb.wb_obj, &adev->wb.gpu_addr, 1045fb4d8502Sjsg (void **)&adev->wb.wb); 1046fb4d8502Sjsg if (r) { 1047fb4d8502Sjsg dev_warn(adev->dev, "(%d) create WB bo failed\n", r); 1048fb4d8502Sjsg return r; 1049fb4d8502Sjsg } 1050fb4d8502Sjsg 1051fb4d8502Sjsg adev->wb.num_wb = AMDGPU_MAX_WB; 1052fb4d8502Sjsg memset(&adev->wb.used, 0, sizeof(adev->wb.used)); 1053fb4d8502Sjsg 1054fb4d8502Sjsg /* clear wb memory */ 1055fb4d8502Sjsg memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); 1056fb4d8502Sjsg } 1057fb4d8502Sjsg 1058fb4d8502Sjsg return 0; 1059fb4d8502Sjsg } 1060fb4d8502Sjsg 1061fb4d8502Sjsg /** 1062fb4d8502Sjsg * amdgpu_device_wb_get - Allocate a wb entry 1063fb4d8502Sjsg * 1064fb4d8502Sjsg * @adev: amdgpu_device pointer 1065fb4d8502Sjsg * @wb: wb index 1066fb4d8502Sjsg * 1067fb4d8502Sjsg * Allocate a wb slot for use by the driver (all asics). 1068fb4d8502Sjsg * Returns 0 on success or -EINVAL on failure. 1069fb4d8502Sjsg */ 1070fb4d8502Sjsg int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) 1071fb4d8502Sjsg { 1072fb4d8502Sjsg unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); 1073fb4d8502Sjsg 1074fb4d8502Sjsg if (offset < adev->wb.num_wb) { 1075fb4d8502Sjsg __set_bit(offset, adev->wb.used); 1076fb4d8502Sjsg *wb = offset << 3; /* convert to dw offset */ 1077fb4d8502Sjsg return 0; 1078fb4d8502Sjsg } else { 1079fb4d8502Sjsg return -EINVAL; 1080fb4d8502Sjsg } 1081fb4d8502Sjsg } 1082fb4d8502Sjsg 1083fb4d8502Sjsg /** 1084fb4d8502Sjsg * amdgpu_device_wb_free - Free a wb entry 1085fb4d8502Sjsg * 1086fb4d8502Sjsg * @adev: amdgpu_device pointer 1087fb4d8502Sjsg * @wb: wb index 1088fb4d8502Sjsg * 1089fb4d8502Sjsg * Free a wb slot allocated for use by the driver (all asics) 1090fb4d8502Sjsg */ 1091fb4d8502Sjsg void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) 1092fb4d8502Sjsg { 1093fb4d8502Sjsg wb >>= 3; 1094fb4d8502Sjsg if (wb < adev->wb.num_wb) 1095fb4d8502Sjsg __clear_bit(wb, adev->wb.used); 1096fb4d8502Sjsg } 1097fb4d8502Sjsg 1098fb4d8502Sjsg /** 1099fb4d8502Sjsg * amdgpu_device_resize_fb_bar - try to resize FB BAR 1100fb4d8502Sjsg * 1101fb4d8502Sjsg * @adev: amdgpu_device pointer 1102fb4d8502Sjsg * 1103fb4d8502Sjsg * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not 1104fb4d8502Sjsg * to fail, but if any of the BARs is not accessible after the size we abort 1105fb4d8502Sjsg * driver loading by returning -ENODEV. 1106fb4d8502Sjsg */ 1107fb4d8502Sjsg int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) 1108fb4d8502Sjsg { 110950f19d19Skettenis #ifdef __linux__ 11105ca02815Sjsg int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size); 1111fb4d8502Sjsg struct pci_bus *root; 1112fb4d8502Sjsg struct resource *res; 1113f005ef32Sjsg unsigned int i; 1114fb4d8502Sjsg u16 cmd; 1115fb4d8502Sjsg int r; 1116fb4d8502Sjsg 11177e1de2c2Sjsg if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) 11187e1de2c2Sjsg return 0; 11197e1de2c2Sjsg 1120fb4d8502Sjsg /* Bypass for VF */ 1121fb4d8502Sjsg if (amdgpu_sriov_vf(adev)) 1122fb4d8502Sjsg return 0; 1123fb4d8502Sjsg 1124ad8b1aafSjsg /* skip if the bios has already enabled large BAR */ 1125ad8b1aafSjsg if (adev->gmc.real_vram_size && 1126ad8b1aafSjsg (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) 1127ad8b1aafSjsg return 0; 1128ad8b1aafSjsg 1129fb4d8502Sjsg /* Check if the root BUS has 64bit memory resources */ 1130fb4d8502Sjsg root = adev->pdev->bus; 1131fb4d8502Sjsg while (root->parent) 1132fb4d8502Sjsg root = root->parent; 1133fb4d8502Sjsg 1134fb4d8502Sjsg pci_bus_for_each_resource(root, res, i) { 1135fb4d8502Sjsg if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 1136fb4d8502Sjsg res->start > 0x100000000ull) 1137fb4d8502Sjsg break; 1138fb4d8502Sjsg } 1139fb4d8502Sjsg 1140fb4d8502Sjsg /* Trying to resize is pointless without a root hub window above 4GB */ 1141fb4d8502Sjsg if (!res) 1142fb4d8502Sjsg return 0; 1143fb4d8502Sjsg 11445ca02815Sjsg /* Limit the BAR size to what is available */ 11455ca02815Sjsg rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1, 11465ca02815Sjsg rbar_size); 11475ca02815Sjsg 1148fb4d8502Sjsg /* Disable memory decoding while we change the BAR addresses and size */ 1149fb4d8502Sjsg pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); 1150fb4d8502Sjsg pci_write_config_word(adev->pdev, PCI_COMMAND, 1151fb4d8502Sjsg cmd & ~PCI_COMMAND_MEMORY); 1152fb4d8502Sjsg 1153fb4d8502Sjsg /* Free the VRAM and doorbell BAR, we most likely need to move both. */ 1154f005ef32Sjsg amdgpu_doorbell_fini(adev); 1155fb4d8502Sjsg if (adev->asic_type >= CHIP_BONAIRE) 1156fb4d8502Sjsg pci_release_resource(adev->pdev, 2); 1157fb4d8502Sjsg 1158fb4d8502Sjsg pci_release_resource(adev->pdev, 0); 1159fb4d8502Sjsg 1160fb4d8502Sjsg r = pci_resize_resource(adev->pdev, 0, rbar_size); 1161fb4d8502Sjsg if (r == -ENOSPC) 1162fb4d8502Sjsg DRM_INFO("Not enough PCI address space for a large BAR."); 1163fb4d8502Sjsg else if (r && r != -ENOTSUPP) 1164fb4d8502Sjsg DRM_ERROR("Problem resizing BAR0 (%d).", r); 1165fb4d8502Sjsg 1166fb4d8502Sjsg pci_assign_unassigned_bus_resources(adev->pdev->bus); 1167fb4d8502Sjsg 1168fb4d8502Sjsg /* When the doorbell or fb BAR isn't available we have no chance of 1169fb4d8502Sjsg * using the device. 1170fb4d8502Sjsg */ 1171f005ef32Sjsg r = amdgpu_doorbell_init(adev); 1172fb4d8502Sjsg if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) 1173fb4d8502Sjsg return -ENODEV; 1174fb4d8502Sjsg 1175fb4d8502Sjsg pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); 117650f19d19Skettenis #endif /* __linux__ */ 1177fb4d8502Sjsg 1178fb4d8502Sjsg return 0; 1179fb4d8502Sjsg } 1180fb4d8502Sjsg 1181f005ef32Sjsg static bool amdgpu_device_read_bios(struct amdgpu_device *adev) 1182f005ef32Sjsg { 1183f005ef32Sjsg if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) 1184f005ef32Sjsg return false; 1185f005ef32Sjsg 1186f005ef32Sjsg return true; 1187f005ef32Sjsg } 1188f005ef32Sjsg 1189fb4d8502Sjsg /* 1190fb4d8502Sjsg * GPU helpers function. 1191fb4d8502Sjsg */ 1192fb4d8502Sjsg /** 1193fb4d8502Sjsg * amdgpu_device_need_post - check if the hw need post or not 1194fb4d8502Sjsg * 1195fb4d8502Sjsg * @adev: amdgpu_device pointer 1196fb4d8502Sjsg * 1197fb4d8502Sjsg * Check if the asic has been initialized (all asics) at driver startup 1198fb4d8502Sjsg * or post is needed if hw reset is performed. 1199fb4d8502Sjsg * Returns true if need or false if not. 1200fb4d8502Sjsg */ 1201fb4d8502Sjsg bool amdgpu_device_need_post(struct amdgpu_device *adev) 1202fb4d8502Sjsg { 1203fb4d8502Sjsg uint32_t reg; 1204fb4d8502Sjsg 1205fb4d8502Sjsg if (amdgpu_sriov_vf(adev)) 1206fb4d8502Sjsg return false; 1207fb4d8502Sjsg 1208f005ef32Sjsg if (!amdgpu_device_read_bios(adev)) 1209f005ef32Sjsg return false; 1210f005ef32Sjsg 1211fb4d8502Sjsg if (amdgpu_passthrough(adev)) { 1212fb4d8502Sjsg /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot 1213fb4d8502Sjsg * some old smc fw still need driver do vPost otherwise gpu hang, while 1214fb4d8502Sjsg * those smc fw version above 22.15 doesn't have this flaw, so we force 1215fb4d8502Sjsg * vpost executed for smc version below 22.15 1216fb4d8502Sjsg */ 1217fb4d8502Sjsg if (adev->asic_type == CHIP_FIJI) { 1218fb4d8502Sjsg int err; 1219fb4d8502Sjsg uint32_t fw_ver; 1220f005ef32Sjsg 1221fb4d8502Sjsg err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); 1222fb4d8502Sjsg /* force vPost if error occured */ 1223fb4d8502Sjsg if (err) 1224fb4d8502Sjsg return true; 1225fb4d8502Sjsg 1226fb4d8502Sjsg fw_ver = *((uint32_t *)adev->pm.fw->data + 69); 12275a307a65Sjsg release_firmware(adev->pm.fw); 1228fb4d8502Sjsg if (fw_ver < 0x00160e00) 1229fb4d8502Sjsg return true; 1230fb4d8502Sjsg } 1231fb4d8502Sjsg } 1232fb4d8502Sjsg 12335ca02815Sjsg /* Don't post if we need to reset whole hive on init */ 12345ca02815Sjsg if (adev->gmc.xgmi.pending_reset) 12355ca02815Sjsg return false; 12365ca02815Sjsg 1237fb4d8502Sjsg if (adev->has_hw_reset) { 1238fb4d8502Sjsg adev->has_hw_reset = false; 1239fb4d8502Sjsg return true; 1240fb4d8502Sjsg } 1241fb4d8502Sjsg 1242fb4d8502Sjsg /* bios scratch used on CIK+ */ 1243fb4d8502Sjsg if (adev->asic_type >= CHIP_BONAIRE) 1244fb4d8502Sjsg return amdgpu_atombios_scratch_need_asic_init(adev); 1245fb4d8502Sjsg 1246fb4d8502Sjsg /* check MEM_SIZE for older asics */ 1247fb4d8502Sjsg reg = amdgpu_asic_get_config_memsize(adev); 1248fb4d8502Sjsg 1249fb4d8502Sjsg if ((reg != 0) && (reg != 0xffffffff)) 1250fb4d8502Sjsg return false; 1251fb4d8502Sjsg 1252fb4d8502Sjsg return true; 1253fb4d8502Sjsg } 1254fb4d8502Sjsg 12559da60799Sjsg /* 12569da60799Sjsg * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic 12579da60799Sjsg * speed switching. Until we have confirmation from Intel that a specific host 12589da60799Sjsg * supports it, it's safer that we keep it disabled for all. 12599da60799Sjsg * 12609da60799Sjsg * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ 12619da60799Sjsg * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 12629da60799Sjsg */ 12639da60799Sjsg bool amdgpu_device_pcie_dynamic_switching_supported(void) 12649da60799Sjsg { 12659da60799Sjsg #if IS_ENABLED(CONFIG_X86) 12669da60799Sjsg #ifdef __linux__ 12679da60799Sjsg struct cpuinfo_x86 *c = &cpu_data(0); 12689da60799Sjsg 12699da60799Sjsg if (c->x86_vendor == X86_VENDOR_INTEL) 12709da60799Sjsg #else 12719da60799Sjsg if (strcmp(cpu_vendor, "GenuineIntel") == 0) 12729da60799Sjsg #endif 12739da60799Sjsg return false; 12749da60799Sjsg #endif 12759da60799Sjsg return true; 12769da60799Sjsg } 12779da60799Sjsg 1278a9d9cd9cSjsg /** 1279a9d9cd9cSjsg * amdgpu_device_should_use_aspm - check if the device should program ASPM 1280a9d9cd9cSjsg * 1281a9d9cd9cSjsg * @adev: amdgpu_device pointer 1282a9d9cd9cSjsg * 1283a9d9cd9cSjsg * Confirm whether the module parameter and pcie bridge agree that ASPM should 1284a9d9cd9cSjsg * be set for this device. 1285a9d9cd9cSjsg * 1286a9d9cd9cSjsg * Returns true if it should be used or false if not. 1287a9d9cd9cSjsg */ 1288a9d9cd9cSjsg bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) 1289a9d9cd9cSjsg { 1290a9d9cd9cSjsg switch (amdgpu_aspm) { 1291a9d9cd9cSjsg case -1: 1292a9d9cd9cSjsg break; 1293a9d9cd9cSjsg case 0: 1294a9d9cd9cSjsg return false; 1295a9d9cd9cSjsg case 1: 1296a9d9cd9cSjsg return true; 1297a9d9cd9cSjsg default: 1298a9d9cd9cSjsg return false; 1299a9d9cd9cSjsg } 1300a9d9cd9cSjsg return pcie_aspm_enabled(adev->pdev); 1301a9d9cd9cSjsg } 1302a9d9cd9cSjsg 1303e73b7337Sjsg bool amdgpu_device_aspm_support_quirk(void) 1304e73b7337Sjsg { 1305e73b7337Sjsg #if IS_ENABLED(CONFIG_X86) 1306e73b7337Sjsg struct cpu_info *ci = curcpu(); 1307e73b7337Sjsg 1308e73b7337Sjsg return !(ci->ci_family == 6 && ci->ci_model == 0x97); 1309e73b7337Sjsg #else 1310e73b7337Sjsg return true; 1311e73b7337Sjsg #endif 1312e73b7337Sjsg } 1313e73b7337Sjsg 1314fb4d8502Sjsg /* if we get transitioned to only one device, take VGA back */ 1315fb4d8502Sjsg /** 1316fb4d8502Sjsg * amdgpu_device_vga_set_decode - enable/disable vga decode 1317fb4d8502Sjsg * 13185ca02815Sjsg * @pdev: PCI device pointer 1319fb4d8502Sjsg * @state: enable/disable vga decode 1320fb4d8502Sjsg * 1321fb4d8502Sjsg * Enable/disable vga decode (all asics). 1322fb4d8502Sjsg * Returns VGA resource flags. 1323fb4d8502Sjsg */ 1324fb4d8502Sjsg #ifdef notyet 13255ca02815Sjsg static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, 13265ca02815Sjsg bool state) 1327fb4d8502Sjsg { 13285ca02815Sjsg struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); 1329f005ef32Sjsg 1330fb4d8502Sjsg amdgpu_asic_set_vga_state(adev, state); 1331fb4d8502Sjsg if (state) 1332fb4d8502Sjsg return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | 1333fb4d8502Sjsg VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1334fb4d8502Sjsg else 1335fb4d8502Sjsg return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; 1336fb4d8502Sjsg } 1337fb4d8502Sjsg #endif 1338fb4d8502Sjsg 1339fb4d8502Sjsg /** 1340fb4d8502Sjsg * amdgpu_device_check_block_size - validate the vm block size 1341fb4d8502Sjsg * 1342fb4d8502Sjsg * @adev: amdgpu_device pointer 1343fb4d8502Sjsg * 1344fb4d8502Sjsg * Validates the vm block size specified via module parameter. 1345fb4d8502Sjsg * The vm block size defines number of bits in page table versus page directory, 1346fb4d8502Sjsg * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1347fb4d8502Sjsg * page table and the remaining bits are in the page directory. 1348fb4d8502Sjsg */ 1349fb4d8502Sjsg static void amdgpu_device_check_block_size(struct amdgpu_device *adev) 1350fb4d8502Sjsg { 1351fb4d8502Sjsg /* defines number of bits in page table versus page directory, 1352fb4d8502Sjsg * a page is 4KB so we have 12 bits offset, minimum 9 bits in the 1353f005ef32Sjsg * page table and the remaining bits are in the page directory 1354f005ef32Sjsg */ 1355fb4d8502Sjsg if (amdgpu_vm_block_size == -1) 1356fb4d8502Sjsg return; 1357fb4d8502Sjsg 1358fb4d8502Sjsg if (amdgpu_vm_block_size < 9) { 1359fb4d8502Sjsg dev_warn(adev->dev, "VM page table size (%d) too small\n", 1360fb4d8502Sjsg amdgpu_vm_block_size); 1361fb4d8502Sjsg amdgpu_vm_block_size = -1; 1362fb4d8502Sjsg } 1363fb4d8502Sjsg } 1364fb4d8502Sjsg 1365fb4d8502Sjsg /** 1366fb4d8502Sjsg * amdgpu_device_check_vm_size - validate the vm size 1367fb4d8502Sjsg * 1368fb4d8502Sjsg * @adev: amdgpu_device pointer 1369fb4d8502Sjsg * 1370fb4d8502Sjsg * Validates the vm size in GB specified via module parameter. 1371fb4d8502Sjsg * The VM size is the size of the GPU virtual memory space in GB. 1372fb4d8502Sjsg */ 1373fb4d8502Sjsg static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) 1374fb4d8502Sjsg { 1375fb4d8502Sjsg /* no need to check the default value */ 1376fb4d8502Sjsg if (amdgpu_vm_size == -1) 1377fb4d8502Sjsg return; 1378fb4d8502Sjsg 1379fb4d8502Sjsg if (amdgpu_vm_size < 1) { 1380fb4d8502Sjsg dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", 1381fb4d8502Sjsg amdgpu_vm_size); 1382fb4d8502Sjsg amdgpu_vm_size = -1; 1383fb4d8502Sjsg } 1384fb4d8502Sjsg } 1385fb4d8502Sjsg 1386fb4d8502Sjsg static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) 1387fb4d8502Sjsg { 1388fb4d8502Sjsg #ifdef __linux__ 1389fb4d8502Sjsg struct sysinfo si; 1390fb4d8502Sjsg #endif 1391c349dbc7Sjsg bool is_os_64 = (sizeof(void *) == 8); 1392fb4d8502Sjsg uint64_t total_memory; 1393fb4d8502Sjsg uint64_t dram_size_seven_GB = 0x1B8000000; 1394fb4d8502Sjsg uint64_t dram_size_three_GB = 0xB8000000; 1395fb4d8502Sjsg 1396fb4d8502Sjsg if (amdgpu_smu_memory_pool_size == 0) 1397fb4d8502Sjsg return; 1398fb4d8502Sjsg 1399fb4d8502Sjsg if (!is_os_64) { 1400fb4d8502Sjsg DRM_WARN("Not 64-bit OS, feature not supported\n"); 1401fb4d8502Sjsg goto def_value; 1402fb4d8502Sjsg } 1403fb4d8502Sjsg #ifdef __linux__ 1404fb4d8502Sjsg si_meminfo(&si); 1405fb4d8502Sjsg total_memory = (uint64_t)si.totalram * si.mem_unit; 1406fb4d8502Sjsg #else 1407fb4d8502Sjsg total_memory = ptoa(physmem); 1408fb4d8502Sjsg #endif 1409fb4d8502Sjsg 1410fb4d8502Sjsg if ((amdgpu_smu_memory_pool_size == 1) || 1411fb4d8502Sjsg (amdgpu_smu_memory_pool_size == 2)) { 1412fb4d8502Sjsg if (total_memory < dram_size_three_GB) 1413fb4d8502Sjsg goto def_value1; 1414fb4d8502Sjsg } else if ((amdgpu_smu_memory_pool_size == 4) || 1415fb4d8502Sjsg (amdgpu_smu_memory_pool_size == 8)) { 1416fb4d8502Sjsg if (total_memory < dram_size_seven_GB) 1417fb4d8502Sjsg goto def_value1; 1418fb4d8502Sjsg } else { 1419fb4d8502Sjsg DRM_WARN("Smu memory pool size not supported\n"); 1420fb4d8502Sjsg goto def_value; 1421fb4d8502Sjsg } 1422fb4d8502Sjsg adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; 1423fb4d8502Sjsg 1424fb4d8502Sjsg return; 1425fb4d8502Sjsg 1426fb4d8502Sjsg def_value1: 1427fb4d8502Sjsg DRM_WARN("No enough system memory\n"); 1428fb4d8502Sjsg def_value: 1429fb4d8502Sjsg adev->pm.smu_prv_buffer_size = 0; 1430fb4d8502Sjsg } 1431fb4d8502Sjsg 14325ca02815Sjsg static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) 14335ca02815Sjsg { 14345ca02815Sjsg if (!(adev->flags & AMD_IS_APU) || 14355ca02815Sjsg adev->asic_type < CHIP_RAVEN) 14365ca02815Sjsg return 0; 14375ca02815Sjsg 14385ca02815Sjsg switch (adev->asic_type) { 14395ca02815Sjsg case CHIP_RAVEN: 14405ca02815Sjsg if (adev->pdev->device == 0x15dd) 14415ca02815Sjsg adev->apu_flags |= AMD_APU_IS_RAVEN; 14425ca02815Sjsg if (adev->pdev->device == 0x15d8) 14435ca02815Sjsg adev->apu_flags |= AMD_APU_IS_PICASSO; 14445ca02815Sjsg break; 14455ca02815Sjsg case CHIP_RENOIR: 14465ca02815Sjsg if ((adev->pdev->device == 0x1636) || 14475ca02815Sjsg (adev->pdev->device == 0x164c)) 14485ca02815Sjsg adev->apu_flags |= AMD_APU_IS_RENOIR; 14495ca02815Sjsg else 14505ca02815Sjsg adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; 14515ca02815Sjsg break; 14525ca02815Sjsg case CHIP_VANGOGH: 14535ca02815Sjsg adev->apu_flags |= AMD_APU_IS_VANGOGH; 14545ca02815Sjsg break; 14555ca02815Sjsg case CHIP_YELLOW_CARP: 14565ca02815Sjsg break; 14575ca02815Sjsg case CHIP_CYAN_SKILLFISH: 14581bb76ff1Sjsg if ((adev->pdev->device == 0x13FE) || 14591bb76ff1Sjsg (adev->pdev->device == 0x143F)) 14605ca02815Sjsg adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; 14615ca02815Sjsg break; 14625ca02815Sjsg default: 14631bb76ff1Sjsg break; 14645ca02815Sjsg } 14655ca02815Sjsg 14665ca02815Sjsg return 0; 14675ca02815Sjsg } 14685ca02815Sjsg 1469fb4d8502Sjsg /** 1470fb4d8502Sjsg * amdgpu_device_check_arguments - validate module params 1471fb4d8502Sjsg * 1472fb4d8502Sjsg * @adev: amdgpu_device pointer 1473fb4d8502Sjsg * 1474fb4d8502Sjsg * Validates certain module parameters and updates 1475fb4d8502Sjsg * the associated values used by the driver (all asics). 1476fb4d8502Sjsg */ 1477c349dbc7Sjsg static int amdgpu_device_check_arguments(struct amdgpu_device *adev) 1478fb4d8502Sjsg { 1479fb4d8502Sjsg if (amdgpu_sched_jobs < 4) { 1480fb4d8502Sjsg dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", 1481fb4d8502Sjsg amdgpu_sched_jobs); 1482fb4d8502Sjsg amdgpu_sched_jobs = 4; 1483fb4d8502Sjsg } else if (!is_power_of_2(amdgpu_sched_jobs)) { 1484fb4d8502Sjsg dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", 1485fb4d8502Sjsg amdgpu_sched_jobs); 1486fb4d8502Sjsg amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); 1487fb4d8502Sjsg } 1488fb4d8502Sjsg 1489fb4d8502Sjsg if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { 1490fb4d8502Sjsg /* gart size must be greater or equal to 32M */ 1491fb4d8502Sjsg dev_warn(adev->dev, "gart size (%d) too small\n", 1492fb4d8502Sjsg amdgpu_gart_size); 1493fb4d8502Sjsg amdgpu_gart_size = -1; 1494fb4d8502Sjsg } 1495fb4d8502Sjsg 1496fb4d8502Sjsg if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { 1497fb4d8502Sjsg /* gtt size must be greater or equal to 32M */ 1498fb4d8502Sjsg dev_warn(adev->dev, "gtt size (%d) too small\n", 1499fb4d8502Sjsg amdgpu_gtt_size); 1500fb4d8502Sjsg amdgpu_gtt_size = -1; 1501fb4d8502Sjsg } 1502fb4d8502Sjsg 1503fb4d8502Sjsg /* valid range is between 4 and 9 inclusive */ 1504fb4d8502Sjsg if (amdgpu_vm_fragment_size != -1 && 1505fb4d8502Sjsg (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { 1506fb4d8502Sjsg dev_warn(adev->dev, "valid range is between 4 and 9\n"); 1507fb4d8502Sjsg amdgpu_vm_fragment_size = -1; 1508fb4d8502Sjsg } 1509fb4d8502Sjsg 1510ad8b1aafSjsg if (amdgpu_sched_hw_submission < 2) { 1511ad8b1aafSjsg dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", 1512ad8b1aafSjsg amdgpu_sched_hw_submission); 1513ad8b1aafSjsg amdgpu_sched_hw_submission = 2; 1514ad8b1aafSjsg } else if (!is_power_of_2(amdgpu_sched_hw_submission)) { 1515ad8b1aafSjsg dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", 1516ad8b1aafSjsg amdgpu_sched_hw_submission); 1517ad8b1aafSjsg amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); 1518ad8b1aafSjsg } 1519ad8b1aafSjsg 15201bb76ff1Sjsg if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) { 15211bb76ff1Sjsg dev_warn(adev->dev, "invalid option for reset method, reverting to default\n"); 15221bb76ff1Sjsg amdgpu_reset_method = -1; 15231bb76ff1Sjsg } 15241bb76ff1Sjsg 1525fb4d8502Sjsg amdgpu_device_check_smu_prv_buffer_size(adev); 1526fb4d8502Sjsg 1527fb4d8502Sjsg amdgpu_device_check_vm_size(adev); 1528fb4d8502Sjsg 1529fb4d8502Sjsg amdgpu_device_check_block_size(adev); 1530fb4d8502Sjsg 1531fb4d8502Sjsg adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); 1532c349dbc7Sjsg 1533c349dbc7Sjsg return 0; 1534fb4d8502Sjsg } 1535fb4d8502Sjsg 1536fb4d8502Sjsg #ifdef __linux__ 1537fb4d8502Sjsg /** 1538fb4d8502Sjsg * amdgpu_switcheroo_set_state - set switcheroo state 1539fb4d8502Sjsg * 1540fb4d8502Sjsg * @pdev: pci dev pointer 1541fb4d8502Sjsg * @state: vga_switcheroo state 1542fb4d8502Sjsg * 1543f005ef32Sjsg * Callback for the switcheroo driver. Suspends or resumes 1544fb4d8502Sjsg * the asics before or after it is powered up using ACPI methods. 1545fb4d8502Sjsg */ 1546ad8b1aafSjsg static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, 1547ad8b1aafSjsg enum vga_switcheroo_state state) 1548fb4d8502Sjsg { 1549fb4d8502Sjsg struct drm_device *dev = pci_get_drvdata(pdev); 1550c349dbc7Sjsg int r; 1551fb4d8502Sjsg 15525ca02815Sjsg if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) 1553fb4d8502Sjsg return; 1554fb4d8502Sjsg 1555fb4d8502Sjsg if (state == VGA_SWITCHEROO_ON) { 1556ad8b1aafSjsg pr_info("switched on\n"); 1557fb4d8502Sjsg /* don't suspend or resume card normally */ 1558fb4d8502Sjsg dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 1559fb4d8502Sjsg 15605ca02815Sjsg pci_set_power_state(pdev, PCI_D0); 15615ca02815Sjsg amdgpu_device_load_pci_state(pdev); 15625ca02815Sjsg r = pci_enable_device(pdev); 1563c349dbc7Sjsg if (r) 1564c349dbc7Sjsg DRM_WARN("pci_enable_device failed (%d)\n", r); 1565c349dbc7Sjsg amdgpu_device_resume(dev, true); 1566fb4d8502Sjsg 1567fb4d8502Sjsg dev->switch_power_state = DRM_SWITCH_POWER_ON; 1568fb4d8502Sjsg } else { 1569ad8b1aafSjsg pr_info("switched off\n"); 1570fb4d8502Sjsg dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; 157136668b15Sjsg amdgpu_device_prepare(dev); 1572c349dbc7Sjsg amdgpu_device_suspend(dev, true); 15735ca02815Sjsg amdgpu_device_cache_pci_state(pdev); 1574c349dbc7Sjsg /* Shut down the device */ 15755ca02815Sjsg pci_disable_device(pdev); 15765ca02815Sjsg pci_set_power_state(pdev, PCI_D3cold); 1577fb4d8502Sjsg dev->switch_power_state = DRM_SWITCH_POWER_OFF; 1578fb4d8502Sjsg } 1579fb4d8502Sjsg } 1580fb4d8502Sjsg 1581fb4d8502Sjsg /** 1582fb4d8502Sjsg * amdgpu_switcheroo_can_switch - see if switcheroo state can change 1583fb4d8502Sjsg * 1584fb4d8502Sjsg * @pdev: pci dev pointer 1585fb4d8502Sjsg * 1586fb4d8502Sjsg * Callback for the switcheroo driver. Check of the switcheroo 1587fb4d8502Sjsg * state can be changed. 1588fb4d8502Sjsg * Returns true if the state can be changed, false if not. 1589fb4d8502Sjsg */ 1590fb4d8502Sjsg static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) 1591fb4d8502Sjsg { 1592fb4d8502Sjsg struct drm_device *dev = pci_get_drvdata(pdev); 1593fb4d8502Sjsg 1594fb4d8502Sjsg /* 1595fb4d8502Sjsg * FIXME: open_count is protected by drm_global_mutex but that would lead to 1596fb4d8502Sjsg * locking inversion with the driver load path. And the access here is 1597fb4d8502Sjsg * completely racy anyway. So don't bother with locking for now. 1598fb4d8502Sjsg */ 1599c349dbc7Sjsg return atomic_read(&dev->open_count) == 0; 1600fb4d8502Sjsg } 160149261a46Sjsg #endif /* __linux__ */ 1602fb4d8502Sjsg 1603fb4d8502Sjsg static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { 160449261a46Sjsg #ifdef notyet 1605fb4d8502Sjsg .set_gpu_state = amdgpu_switcheroo_set_state, 1606fb4d8502Sjsg .reprobe = NULL, 1607fb4d8502Sjsg .can_switch = amdgpu_switcheroo_can_switch, 160849261a46Sjsg #endif 1609fb4d8502Sjsg }; 1610fb4d8502Sjsg 1611fb4d8502Sjsg /** 1612fb4d8502Sjsg * amdgpu_device_ip_set_clockgating_state - set the CG state 1613fb4d8502Sjsg * 1614fb4d8502Sjsg * @dev: amdgpu_device pointer 1615fb4d8502Sjsg * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1616fb4d8502Sjsg * @state: clockgating state (gate or ungate) 1617fb4d8502Sjsg * 1618fb4d8502Sjsg * Sets the requested clockgating state for all instances of 1619fb4d8502Sjsg * the hardware IP specified. 1620fb4d8502Sjsg * Returns the error code from the last instance. 1621fb4d8502Sjsg */ 1622fb4d8502Sjsg int amdgpu_device_ip_set_clockgating_state(void *dev, 1623fb4d8502Sjsg enum amd_ip_block_type block_type, 1624fb4d8502Sjsg enum amd_clockgating_state state) 1625fb4d8502Sjsg { 1626fb4d8502Sjsg struct amdgpu_device *adev = dev; 1627fb4d8502Sjsg int i, r = 0; 1628fb4d8502Sjsg 1629fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 1630fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 1631fb4d8502Sjsg continue; 1632fb4d8502Sjsg if (adev->ip_blocks[i].version->type != block_type) 1633fb4d8502Sjsg continue; 1634fb4d8502Sjsg if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) 1635fb4d8502Sjsg continue; 1636fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->set_clockgating_state( 1637fb4d8502Sjsg (void *)adev, state); 1638fb4d8502Sjsg if (r) 1639fb4d8502Sjsg DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", 1640fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 1641fb4d8502Sjsg } 1642fb4d8502Sjsg return r; 1643fb4d8502Sjsg } 1644fb4d8502Sjsg 1645fb4d8502Sjsg /** 1646fb4d8502Sjsg * amdgpu_device_ip_set_powergating_state - set the PG state 1647fb4d8502Sjsg * 1648fb4d8502Sjsg * @dev: amdgpu_device pointer 1649fb4d8502Sjsg * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1650fb4d8502Sjsg * @state: powergating state (gate or ungate) 1651fb4d8502Sjsg * 1652fb4d8502Sjsg * Sets the requested powergating state for all instances of 1653fb4d8502Sjsg * the hardware IP specified. 1654fb4d8502Sjsg * Returns the error code from the last instance. 1655fb4d8502Sjsg */ 1656fb4d8502Sjsg int amdgpu_device_ip_set_powergating_state(void *dev, 1657fb4d8502Sjsg enum amd_ip_block_type block_type, 1658fb4d8502Sjsg enum amd_powergating_state state) 1659fb4d8502Sjsg { 1660fb4d8502Sjsg struct amdgpu_device *adev = dev; 1661fb4d8502Sjsg int i, r = 0; 1662fb4d8502Sjsg 1663fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 1664fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 1665fb4d8502Sjsg continue; 1666fb4d8502Sjsg if (adev->ip_blocks[i].version->type != block_type) 1667fb4d8502Sjsg continue; 1668fb4d8502Sjsg if (!adev->ip_blocks[i].version->funcs->set_powergating_state) 1669fb4d8502Sjsg continue; 1670fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->set_powergating_state( 1671fb4d8502Sjsg (void *)adev, state); 1672fb4d8502Sjsg if (r) 1673fb4d8502Sjsg DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", 1674fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 1675fb4d8502Sjsg } 1676fb4d8502Sjsg return r; 1677fb4d8502Sjsg } 1678fb4d8502Sjsg 1679fb4d8502Sjsg /** 1680fb4d8502Sjsg * amdgpu_device_ip_get_clockgating_state - get the CG state 1681fb4d8502Sjsg * 1682fb4d8502Sjsg * @adev: amdgpu_device pointer 1683fb4d8502Sjsg * @flags: clockgating feature flags 1684fb4d8502Sjsg * 1685fb4d8502Sjsg * Walks the list of IPs on the device and updates the clockgating 1686fb4d8502Sjsg * flags for each IP. 1687fb4d8502Sjsg * Updates @flags with the feature flags for each hardware IP where 1688fb4d8502Sjsg * clockgating is enabled. 1689fb4d8502Sjsg */ 1690fb4d8502Sjsg void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, 16911bb76ff1Sjsg u64 *flags) 1692fb4d8502Sjsg { 1693fb4d8502Sjsg int i; 1694fb4d8502Sjsg 1695fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 1696fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 1697fb4d8502Sjsg continue; 1698fb4d8502Sjsg if (adev->ip_blocks[i].version->funcs->get_clockgating_state) 1699fb4d8502Sjsg adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); 1700fb4d8502Sjsg } 1701fb4d8502Sjsg } 1702fb4d8502Sjsg 1703fb4d8502Sjsg /** 1704fb4d8502Sjsg * amdgpu_device_ip_wait_for_idle - wait for idle 1705fb4d8502Sjsg * 1706fb4d8502Sjsg * @adev: amdgpu_device pointer 1707fb4d8502Sjsg * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1708fb4d8502Sjsg * 1709fb4d8502Sjsg * Waits for the request hardware IP to be idle. 1710fb4d8502Sjsg * Returns 0 for success or a negative error code on failure. 1711fb4d8502Sjsg */ 1712fb4d8502Sjsg int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, 1713fb4d8502Sjsg enum amd_ip_block_type block_type) 1714fb4d8502Sjsg { 1715fb4d8502Sjsg int i, r; 1716fb4d8502Sjsg 1717fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 1718fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 1719fb4d8502Sjsg continue; 1720fb4d8502Sjsg if (adev->ip_blocks[i].version->type == block_type) { 1721fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev); 1722fb4d8502Sjsg if (r) 1723fb4d8502Sjsg return r; 1724fb4d8502Sjsg break; 1725fb4d8502Sjsg } 1726fb4d8502Sjsg } 1727fb4d8502Sjsg return 0; 1728fb4d8502Sjsg 1729fb4d8502Sjsg } 1730fb4d8502Sjsg 1731fb4d8502Sjsg /** 1732fb4d8502Sjsg * amdgpu_device_ip_is_idle - is the hardware IP idle 1733fb4d8502Sjsg * 1734fb4d8502Sjsg * @adev: amdgpu_device pointer 1735fb4d8502Sjsg * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) 1736fb4d8502Sjsg * 1737fb4d8502Sjsg * Check if the hardware IP is idle or not. 1738fb4d8502Sjsg * Returns true if it the IP is idle, false if not. 1739fb4d8502Sjsg */ 1740fb4d8502Sjsg bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, 1741fb4d8502Sjsg enum amd_ip_block_type block_type) 1742fb4d8502Sjsg { 1743fb4d8502Sjsg int i; 1744fb4d8502Sjsg 1745fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 1746fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 1747fb4d8502Sjsg continue; 1748fb4d8502Sjsg if (adev->ip_blocks[i].version->type == block_type) 1749fb4d8502Sjsg return adev->ip_blocks[i].version->funcs->is_idle((void *)adev); 1750fb4d8502Sjsg } 1751fb4d8502Sjsg return true; 1752fb4d8502Sjsg 1753fb4d8502Sjsg } 1754fb4d8502Sjsg 1755fb4d8502Sjsg /** 1756fb4d8502Sjsg * amdgpu_device_ip_get_ip_block - get a hw IP pointer 1757fb4d8502Sjsg * 1758fb4d8502Sjsg * @adev: amdgpu_device pointer 1759fb4d8502Sjsg * @type: Type of hardware IP (SMU, GFX, UVD, etc.) 1760fb4d8502Sjsg * 1761fb4d8502Sjsg * Returns a pointer to the hardware IP block structure 1762fb4d8502Sjsg * if it exists for the asic, otherwise NULL. 1763fb4d8502Sjsg */ 1764fb4d8502Sjsg struct amdgpu_ip_block * 1765fb4d8502Sjsg amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, 1766fb4d8502Sjsg enum amd_ip_block_type type) 1767fb4d8502Sjsg { 1768fb4d8502Sjsg int i; 1769fb4d8502Sjsg 1770fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) 1771fb4d8502Sjsg if (adev->ip_blocks[i].version->type == type) 1772fb4d8502Sjsg return &adev->ip_blocks[i]; 1773fb4d8502Sjsg 1774fb4d8502Sjsg return NULL; 1775fb4d8502Sjsg } 1776fb4d8502Sjsg 1777fb4d8502Sjsg /** 1778fb4d8502Sjsg * amdgpu_device_ip_block_version_cmp 1779fb4d8502Sjsg * 1780fb4d8502Sjsg * @adev: amdgpu_device pointer 1781fb4d8502Sjsg * @type: enum amd_ip_block_type 1782fb4d8502Sjsg * @major: major version 1783fb4d8502Sjsg * @minor: minor version 1784fb4d8502Sjsg * 1785fb4d8502Sjsg * return 0 if equal or greater 1786fb4d8502Sjsg * return 1 if smaller or the ip_block doesn't exist 1787fb4d8502Sjsg */ 1788fb4d8502Sjsg int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, 1789fb4d8502Sjsg enum amd_ip_block_type type, 1790fb4d8502Sjsg u32 major, u32 minor) 1791fb4d8502Sjsg { 1792fb4d8502Sjsg struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); 1793fb4d8502Sjsg 1794fb4d8502Sjsg if (ip_block && ((ip_block->version->major > major) || 1795fb4d8502Sjsg ((ip_block->version->major == major) && 1796fb4d8502Sjsg (ip_block->version->minor >= minor)))) 1797fb4d8502Sjsg return 0; 1798fb4d8502Sjsg 1799fb4d8502Sjsg return 1; 1800fb4d8502Sjsg } 1801fb4d8502Sjsg 1802fb4d8502Sjsg /** 1803fb4d8502Sjsg * amdgpu_device_ip_block_add 1804fb4d8502Sjsg * 1805fb4d8502Sjsg * @adev: amdgpu_device pointer 1806fb4d8502Sjsg * @ip_block_version: pointer to the IP to add 1807fb4d8502Sjsg * 1808fb4d8502Sjsg * Adds the IP block driver information to the collection of IPs 1809fb4d8502Sjsg * on the asic. 1810fb4d8502Sjsg */ 1811fb4d8502Sjsg int amdgpu_device_ip_block_add(struct amdgpu_device *adev, 1812fb4d8502Sjsg const struct amdgpu_ip_block_version *ip_block_version) 1813fb4d8502Sjsg { 1814fb4d8502Sjsg if (!ip_block_version) 1815fb4d8502Sjsg return -EINVAL; 1816fb4d8502Sjsg 18175ca02815Sjsg switch (ip_block_version->type) { 18185ca02815Sjsg case AMD_IP_BLOCK_TYPE_VCN: 18195ca02815Sjsg if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) 18205ca02815Sjsg return 0; 18215ca02815Sjsg break; 18225ca02815Sjsg case AMD_IP_BLOCK_TYPE_JPEG: 18235ca02815Sjsg if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) 18245ca02815Sjsg return 0; 18255ca02815Sjsg break; 18265ca02815Sjsg default: 18275ca02815Sjsg break; 18285ca02815Sjsg } 18295ca02815Sjsg 1830fb4d8502Sjsg DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, 1831fb4d8502Sjsg ip_block_version->funcs->name); 1832fb4d8502Sjsg 1833fb4d8502Sjsg adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; 1834fb4d8502Sjsg 1835fb4d8502Sjsg return 0; 1836fb4d8502Sjsg } 1837fb4d8502Sjsg 1838fb4d8502Sjsg /** 1839fb4d8502Sjsg * amdgpu_device_enable_virtual_display - enable virtual display feature 1840fb4d8502Sjsg * 1841fb4d8502Sjsg * @adev: amdgpu_device pointer 1842fb4d8502Sjsg * 1843fb4d8502Sjsg * Enabled the virtual display feature if the user has enabled it via 1844fb4d8502Sjsg * the module parameter virtual_display. This feature provides a virtual 1845fb4d8502Sjsg * display hardware on headless boards or in virtualized environments. 1846fb4d8502Sjsg * This function parses and validates the configuration string specified by 1847fb4d8502Sjsg * the user and configues the virtual display configuration (number of 1848fb4d8502Sjsg * virtual connectors, crtcs, etc.) specified. 1849fb4d8502Sjsg */ 1850fb4d8502Sjsg static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) 1851fb4d8502Sjsg { 1852fb4d8502Sjsg adev->enable_virtual_display = false; 1853fb4d8502Sjsg 1854fb4d8502Sjsg #ifdef notyet 1855fb4d8502Sjsg if (amdgpu_virtual_display) { 18565ca02815Sjsg const char *pci_address_name = pci_name(adev->pdev); 1857fb4d8502Sjsg char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; 1858fb4d8502Sjsg 1859fb4d8502Sjsg pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL); 1860fb4d8502Sjsg pciaddstr_tmp = pciaddstr; 1861fb4d8502Sjsg while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { 1862fb4d8502Sjsg pciaddname = strsep(&pciaddname_tmp, ","); 1863fb4d8502Sjsg if (!strcmp("all", pciaddname) 1864fb4d8502Sjsg || !strcmp(pci_address_name, pciaddname)) { 1865fb4d8502Sjsg long num_crtc; 1866fb4d8502Sjsg int res = -1; 1867fb4d8502Sjsg 1868fb4d8502Sjsg adev->enable_virtual_display = true; 1869fb4d8502Sjsg 1870fb4d8502Sjsg if (pciaddname_tmp) 1871fb4d8502Sjsg res = kstrtol(pciaddname_tmp, 10, 1872fb4d8502Sjsg &num_crtc); 1873fb4d8502Sjsg 1874fb4d8502Sjsg if (!res) { 1875fb4d8502Sjsg if (num_crtc < 1) 1876fb4d8502Sjsg num_crtc = 1; 1877fb4d8502Sjsg if (num_crtc > 6) 1878fb4d8502Sjsg num_crtc = 6; 1879fb4d8502Sjsg adev->mode_info.num_crtc = num_crtc; 1880fb4d8502Sjsg } else { 1881fb4d8502Sjsg adev->mode_info.num_crtc = 1; 1882fb4d8502Sjsg } 1883fb4d8502Sjsg break; 1884fb4d8502Sjsg } 1885fb4d8502Sjsg } 1886fb4d8502Sjsg 1887fb4d8502Sjsg DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", 1888fb4d8502Sjsg amdgpu_virtual_display, pci_address_name, 1889fb4d8502Sjsg adev->enable_virtual_display, adev->mode_info.num_crtc); 1890fb4d8502Sjsg 1891fb4d8502Sjsg kfree(pciaddstr); 1892fb4d8502Sjsg } 1893fb4d8502Sjsg #endif 1894fb4d8502Sjsg } 1895fb4d8502Sjsg 1896f005ef32Sjsg void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) 1897f005ef32Sjsg { 1898f005ef32Sjsg if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { 1899f005ef32Sjsg adev->mode_info.num_crtc = 1; 1900f005ef32Sjsg adev->enable_virtual_display = true; 1901f005ef32Sjsg DRM_INFO("virtual_display:%d, num_crtc:%d\n", 1902f005ef32Sjsg adev->enable_virtual_display, adev->mode_info.num_crtc); 1903f005ef32Sjsg } 1904f005ef32Sjsg } 1905f005ef32Sjsg 1906fb4d8502Sjsg /** 1907fb4d8502Sjsg * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware 1908fb4d8502Sjsg * 1909fb4d8502Sjsg * @adev: amdgpu_device pointer 1910fb4d8502Sjsg * 1911fb4d8502Sjsg * Parses the asic configuration parameters specified in the gpu info 1912fb4d8502Sjsg * firmware and makes them availale to the driver for use in configuring 1913fb4d8502Sjsg * the asic. 1914fb4d8502Sjsg * Returns 0 on success, -EINVAL on failure. 1915fb4d8502Sjsg */ 1916fb4d8502Sjsg static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) 1917fb4d8502Sjsg { 1918fb4d8502Sjsg const char *chip_name; 1919ad8b1aafSjsg char fw_name[40]; 1920fb4d8502Sjsg int err; 1921fb4d8502Sjsg const struct gpu_info_firmware_header_v1_0 *hdr; 1922fb4d8502Sjsg 1923fb4d8502Sjsg adev->firmware.gpu_info_fw = NULL; 1924fb4d8502Sjsg 1925f005ef32Sjsg if (adev->mman.discovery_bin) 1926ad8b1aafSjsg return 0; 1927ad8b1aafSjsg 1928fb4d8502Sjsg switch (adev->asic_type) { 1929fb4d8502Sjsg default: 1930fb4d8502Sjsg return 0; 1931fb4d8502Sjsg case CHIP_VEGA10: 1932fb4d8502Sjsg chip_name = "vega10"; 1933fb4d8502Sjsg break; 1934fb4d8502Sjsg case CHIP_VEGA12: 1935fb4d8502Sjsg chip_name = "vega12"; 1936fb4d8502Sjsg break; 1937fb4d8502Sjsg case CHIP_RAVEN: 1938ad8b1aafSjsg if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1939c349dbc7Sjsg chip_name = "raven2"; 1940ad8b1aafSjsg else if (adev->apu_flags & AMD_APU_IS_PICASSO) 19414fe6e3f4Sjsg chip_name = "picasso"; 19423ee1c80bSjsg else 19433ee1c80bSjsg chip_name = "raven"; 19444fe6e3f4Sjsg break; 1945c349dbc7Sjsg case CHIP_ARCTURUS: 1946c349dbc7Sjsg chip_name = "arcturus"; 1947c349dbc7Sjsg break; 1948c349dbc7Sjsg case CHIP_NAVI12: 1949c349dbc7Sjsg chip_name = "navi12"; 1950c349dbc7Sjsg break; 1951fb4d8502Sjsg } 1952fb4d8502Sjsg 1953fb4d8502Sjsg snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); 1954f005ef32Sjsg err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name); 1955fb4d8502Sjsg if (err) { 1956fb4d8502Sjsg dev_err(adev->dev, 1957f005ef32Sjsg "Failed to get gpu_info firmware \"%s\"\n", 1958fb4d8502Sjsg fw_name); 1959fb4d8502Sjsg goto out; 1960fb4d8502Sjsg } 1961fb4d8502Sjsg 1962fb4d8502Sjsg hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; 1963fb4d8502Sjsg amdgpu_ucode_print_gpu_info_hdr(&hdr->header); 1964fb4d8502Sjsg 1965fb4d8502Sjsg switch (hdr->version_major) { 1966fb4d8502Sjsg case 1: 1967fb4d8502Sjsg { 1968fb4d8502Sjsg const struct gpu_info_firmware_v1_0 *gpu_info_fw = 1969fb4d8502Sjsg (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + 1970fb4d8502Sjsg le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1971fb4d8502Sjsg 1972ad8b1aafSjsg /* 1973ad8b1aafSjsg * Should be droped when DAL no longer needs it. 1974ad8b1aafSjsg */ 1975ad8b1aafSjsg if (adev->asic_type == CHIP_NAVI12) 1976c349dbc7Sjsg goto parse_soc_bounding_box; 1977c349dbc7Sjsg 1978fb4d8502Sjsg adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); 1979fb4d8502Sjsg adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); 1980fb4d8502Sjsg adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); 1981fb4d8502Sjsg adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); 1982fb4d8502Sjsg adev->gfx.config.max_texture_channel_caches = 1983fb4d8502Sjsg le32_to_cpu(gpu_info_fw->gc_num_tccs); 1984fb4d8502Sjsg adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); 1985fb4d8502Sjsg adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); 1986fb4d8502Sjsg adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); 1987fb4d8502Sjsg adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); 1988fb4d8502Sjsg adev->gfx.config.double_offchip_lds_buf = 1989fb4d8502Sjsg le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); 1990fb4d8502Sjsg adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); 1991fb4d8502Sjsg adev->gfx.cu_info.max_waves_per_simd = 1992fb4d8502Sjsg le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); 1993fb4d8502Sjsg adev->gfx.cu_info.max_scratch_slots_per_cu = 1994fb4d8502Sjsg le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); 1995fb4d8502Sjsg adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); 1996c349dbc7Sjsg if (hdr->version_minor >= 1) { 1997c349dbc7Sjsg const struct gpu_info_firmware_v1_1 *gpu_info_fw = 1998c349dbc7Sjsg (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + 1999c349dbc7Sjsg le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2000c349dbc7Sjsg adev->gfx.config.num_sc_per_sh = 2001c349dbc7Sjsg le32_to_cpu(gpu_info_fw->num_sc_per_sh); 2002c349dbc7Sjsg adev->gfx.config.num_packer_per_sc = 2003c349dbc7Sjsg le32_to_cpu(gpu_info_fw->num_packer_per_sc); 2004c349dbc7Sjsg } 2005c349dbc7Sjsg 2006c349dbc7Sjsg parse_soc_bounding_box: 2007c349dbc7Sjsg /* 2008c349dbc7Sjsg * soc bounding box info is not integrated in disocovery table, 2009ad8b1aafSjsg * we always need to parse it from gpu info firmware if needed. 2010c349dbc7Sjsg */ 2011c349dbc7Sjsg if (hdr->version_minor == 2) { 2012c349dbc7Sjsg const struct gpu_info_firmware_v1_2 *gpu_info_fw = 2013c349dbc7Sjsg (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + 2014c349dbc7Sjsg le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2015c349dbc7Sjsg adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; 2016c349dbc7Sjsg } 2017fb4d8502Sjsg break; 2018fb4d8502Sjsg } 2019fb4d8502Sjsg default: 2020fb4d8502Sjsg dev_err(adev->dev, 2021fb4d8502Sjsg "Unsupported gpu_info table %d\n", hdr->header.ucode_version); 2022fb4d8502Sjsg err = -EINVAL; 2023fb4d8502Sjsg goto out; 2024fb4d8502Sjsg } 2025fb4d8502Sjsg out: 2026fb4d8502Sjsg return err; 2027fb4d8502Sjsg } 2028fb4d8502Sjsg 2029fb4d8502Sjsg /** 2030fb4d8502Sjsg * amdgpu_device_ip_early_init - run early init for hardware IPs 2031fb4d8502Sjsg * 2032fb4d8502Sjsg * @adev: amdgpu_device pointer 2033fb4d8502Sjsg * 2034fb4d8502Sjsg * Early initialization pass for hardware IPs. The hardware IPs that make 2035fb4d8502Sjsg * up each asic are discovered each IP's early_init callback is run. This 2036fb4d8502Sjsg * is the first stage in initializing the asic. 2037fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2038fb4d8502Sjsg */ 2039fb4d8502Sjsg static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) 2040fb4d8502Sjsg { 2041f9c49ec7Sjsg struct pci_dev *parent; 2042fb4d8502Sjsg int i, r; 2043f005ef32Sjsg bool total; 2044fb4d8502Sjsg 2045fb4d8502Sjsg amdgpu_device_enable_virtual_display(adev); 2046fb4d8502Sjsg 2047ad8b1aafSjsg if (amdgpu_sriov_vf(adev)) { 2048ad8b1aafSjsg r = amdgpu_virt_request_full_gpu(adev, true); 2049fb4d8502Sjsg if (r) 2050fb4d8502Sjsg return r; 2051ad8b1aafSjsg } 2052ad8b1aafSjsg 2053ad8b1aafSjsg switch (adev->asic_type) { 2054fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 2055fb4d8502Sjsg case CHIP_VERDE: 2056fb4d8502Sjsg case CHIP_TAHITI: 2057fb4d8502Sjsg case CHIP_PITCAIRN: 2058fb4d8502Sjsg case CHIP_OLAND: 2059fb4d8502Sjsg case CHIP_HAINAN: 2060fb4d8502Sjsg adev->family = AMDGPU_FAMILY_SI; 2061fb4d8502Sjsg r = si_set_ip_blocks(adev); 2062fb4d8502Sjsg if (r) 2063fb4d8502Sjsg return r; 2064fb4d8502Sjsg break; 2065fb4d8502Sjsg #endif 2066fb4d8502Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK 2067fb4d8502Sjsg case CHIP_BONAIRE: 2068fb4d8502Sjsg case CHIP_HAWAII: 2069fb4d8502Sjsg case CHIP_KAVERI: 2070fb4d8502Sjsg case CHIP_KABINI: 2071fb4d8502Sjsg case CHIP_MULLINS: 2072ad8b1aafSjsg if (adev->flags & AMD_IS_APU) 2073fb4d8502Sjsg adev->family = AMDGPU_FAMILY_KV; 2074ad8b1aafSjsg else 2075ad8b1aafSjsg adev->family = AMDGPU_FAMILY_CI; 2076fb4d8502Sjsg 2077fb4d8502Sjsg r = cik_set_ip_blocks(adev); 2078fb4d8502Sjsg if (r) 2079fb4d8502Sjsg return r; 2080fb4d8502Sjsg break; 2081fb4d8502Sjsg #endif 2082ad8b1aafSjsg case CHIP_TOPAZ: 2083ad8b1aafSjsg case CHIP_TONGA: 2084ad8b1aafSjsg case CHIP_FIJI: 2085ad8b1aafSjsg case CHIP_POLARIS10: 2086ad8b1aafSjsg case CHIP_POLARIS11: 2087ad8b1aafSjsg case CHIP_POLARIS12: 2088ad8b1aafSjsg case CHIP_VEGAM: 2089ad8b1aafSjsg case CHIP_CARRIZO: 2090ad8b1aafSjsg case CHIP_STONEY: 2091ad8b1aafSjsg if (adev->flags & AMD_IS_APU) 2092ad8b1aafSjsg adev->family = AMDGPU_FAMILY_CZ; 2093ad8b1aafSjsg else 2094ad8b1aafSjsg adev->family = AMDGPU_FAMILY_VI; 2095ad8b1aafSjsg 2096ad8b1aafSjsg r = vi_set_ip_blocks(adev); 2097ad8b1aafSjsg if (r) 2098ad8b1aafSjsg return r; 2099ad8b1aafSjsg break; 2100fb4d8502Sjsg default: 21011bb76ff1Sjsg r = amdgpu_discovery_set_ip_blocks(adev); 21021bb76ff1Sjsg if (r) 21031bb76ff1Sjsg return r; 21041bb76ff1Sjsg break; 2105fb4d8502Sjsg } 2106fb4d8502Sjsg 2107f9c49ec7Sjsg if (amdgpu_has_atpx() && 2108f9c49ec7Sjsg (amdgpu_is_atpx_hybrid() || 2109f9c49ec7Sjsg amdgpu_has_atpx_dgpu_power_cntl()) && 2110f9c49ec7Sjsg ((adev->flags & AMD_IS_APU) == 0) && 2111997286d4Sjsg !dev_is_removable(&adev->pdev->dev)) 2112f9c49ec7Sjsg adev->flags |= AMD_IS_PX; 2113f9c49ec7Sjsg 2114b9d500ebSjsg if (!(adev->flags & AMD_IS_APU)) { 2115c9d1c6fcSjsg #ifdef notyet 2116c9d1c6fcSjsg parent = pcie_find_root_port(adev->pdev); 2117f9c49ec7Sjsg adev->has_pr3 = parent ? pci_pr3_present(parent) : false; 2118c9d1c6fcSjsg #else 2119c9d1c6fcSjsg adev->has_pr3 = false; 2120c9d1c6fcSjsg #endif 2121b9d500ebSjsg } 2122f9c49ec7Sjsg 2123fb4d8502Sjsg 2124c349dbc7Sjsg adev->pm.pp_feature = amdgpu_pp_feature_mask; 2125c349dbc7Sjsg if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) 2126c349dbc7Sjsg adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 21275ca02815Sjsg if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) 21285ca02815Sjsg adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; 2129f005ef32Sjsg if (!amdgpu_device_pcie_dynamic_switching_supported()) 2130f005ef32Sjsg adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; 2131fb4d8502Sjsg 2132f005ef32Sjsg total = true; 2133fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2134fb4d8502Sjsg if ((amdgpu_ip_block_mask & (1 << i)) == 0) { 2135f005ef32Sjsg DRM_WARN("disabled ip block: %d <%s>\n", 2136fb4d8502Sjsg i, adev->ip_blocks[i].version->funcs->name); 2137fb4d8502Sjsg adev->ip_blocks[i].status.valid = false; 2138fb4d8502Sjsg } else { 2139fb4d8502Sjsg if (adev->ip_blocks[i].version->funcs->early_init) { 2140fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->early_init((void *)adev); 2141fb4d8502Sjsg if (r == -ENOENT) { 2142fb4d8502Sjsg adev->ip_blocks[i].status.valid = false; 2143fb4d8502Sjsg } else if (r) { 2144fb4d8502Sjsg DRM_ERROR("early_init of IP block <%s> failed %d\n", 2145fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2146f005ef32Sjsg total = false; 2147fb4d8502Sjsg } else { 2148fb4d8502Sjsg adev->ip_blocks[i].status.valid = true; 2149fb4d8502Sjsg } 2150fb4d8502Sjsg } else { 2151fb4d8502Sjsg adev->ip_blocks[i].status.valid = true; 2152fb4d8502Sjsg } 2153fb4d8502Sjsg } 2154c349dbc7Sjsg /* get the vbios after the asic_funcs are set up */ 2155c349dbc7Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2156a89df194Sjsg r = amdgpu_device_parse_gpu_info_fw(adev); 2157a89df194Sjsg if (r) 2158a89df194Sjsg return r; 2159a89df194Sjsg 2160c349dbc7Sjsg /* Read BIOS */ 2161f005ef32Sjsg if (amdgpu_device_read_bios(adev)) { 2162c349dbc7Sjsg if (!amdgpu_get_bios(adev)) 2163c349dbc7Sjsg return -EINVAL; 2164c349dbc7Sjsg 2165c349dbc7Sjsg r = amdgpu_atombios_init(adev); 2166c349dbc7Sjsg if (r) { 2167c349dbc7Sjsg dev_err(adev->dev, "amdgpu_atombios_init failed\n"); 2168c349dbc7Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); 2169c349dbc7Sjsg return r; 2170c349dbc7Sjsg } 2171f005ef32Sjsg } 21725ca02815Sjsg 21735ca02815Sjsg /*get pf2vf msg info at it's earliest time*/ 21745ca02815Sjsg if (amdgpu_sriov_vf(adev)) 21755ca02815Sjsg amdgpu_virt_init_data_exchange(adev); 21765ca02815Sjsg 2177c349dbc7Sjsg } 2178fb4d8502Sjsg } 2179f005ef32Sjsg if (!total) 2180f005ef32Sjsg return -ENODEV; 2181fb4d8502Sjsg 2182f005ef32Sjsg amdgpu_amdkfd_device_probe(adev); 2183fb4d8502Sjsg adev->cg_flags &= amdgpu_cg_mask; 2184fb4d8502Sjsg adev->pg_flags &= amdgpu_pg_mask; 2185fb4d8502Sjsg 2186fb4d8502Sjsg return 0; 2187fb4d8502Sjsg } 2188fb4d8502Sjsg 2189c349dbc7Sjsg static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) 2190c349dbc7Sjsg { 2191c349dbc7Sjsg int i, r; 2192c349dbc7Sjsg 2193c349dbc7Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2194c349dbc7Sjsg if (!adev->ip_blocks[i].status.sw) 2195c349dbc7Sjsg continue; 2196c349dbc7Sjsg if (adev->ip_blocks[i].status.hw) 2197c349dbc7Sjsg continue; 2198c349dbc7Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 2199c349dbc7Sjsg (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || 2200c349dbc7Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { 2201c349dbc7Sjsg r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2202c349dbc7Sjsg if (r) { 2203c349dbc7Sjsg DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2204c349dbc7Sjsg adev->ip_blocks[i].version->funcs->name, r); 2205c349dbc7Sjsg return r; 2206c349dbc7Sjsg } 2207c349dbc7Sjsg adev->ip_blocks[i].status.hw = true; 2208c349dbc7Sjsg } 2209c349dbc7Sjsg } 2210c349dbc7Sjsg 2211c349dbc7Sjsg return 0; 2212c349dbc7Sjsg } 2213c349dbc7Sjsg 2214c349dbc7Sjsg static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) 2215c349dbc7Sjsg { 2216c349dbc7Sjsg int i, r; 2217c349dbc7Sjsg 2218c349dbc7Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2219c349dbc7Sjsg if (!adev->ip_blocks[i].status.sw) 2220c349dbc7Sjsg continue; 2221c349dbc7Sjsg if (adev->ip_blocks[i].status.hw) 2222c349dbc7Sjsg continue; 2223c349dbc7Sjsg r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2224c349dbc7Sjsg if (r) { 2225c349dbc7Sjsg DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2226c349dbc7Sjsg adev->ip_blocks[i].version->funcs->name, r); 2227c349dbc7Sjsg return r; 2228c349dbc7Sjsg } 2229c349dbc7Sjsg adev->ip_blocks[i].status.hw = true; 2230c349dbc7Sjsg } 2231c349dbc7Sjsg 2232c349dbc7Sjsg return 0; 2233c349dbc7Sjsg } 2234c349dbc7Sjsg 2235c349dbc7Sjsg static int amdgpu_device_fw_loading(struct amdgpu_device *adev) 2236c349dbc7Sjsg { 2237c349dbc7Sjsg int r = 0; 2238c349dbc7Sjsg int i; 2239c349dbc7Sjsg uint32_t smu_version; 2240c349dbc7Sjsg 2241c349dbc7Sjsg if (adev->asic_type >= CHIP_VEGA10) { 2242c349dbc7Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2243c349dbc7Sjsg if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) 2244c349dbc7Sjsg continue; 2245c349dbc7Sjsg 22465ca02815Sjsg if (!adev->ip_blocks[i].status.sw) 22475ca02815Sjsg continue; 22485ca02815Sjsg 2249c349dbc7Sjsg /* no need to do the fw loading again if already done*/ 2250c349dbc7Sjsg if (adev->ip_blocks[i].status.hw == true) 2251c349dbc7Sjsg break; 2252c349dbc7Sjsg 2253ad8b1aafSjsg if (amdgpu_in_reset(adev) || adev->in_suspend) { 2254c349dbc7Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 2255c349dbc7Sjsg if (r) { 2256c349dbc7Sjsg DRM_ERROR("resume of IP block <%s> failed %d\n", 2257c349dbc7Sjsg adev->ip_blocks[i].version->funcs->name, r); 2258c349dbc7Sjsg return r; 2259c349dbc7Sjsg } 2260c349dbc7Sjsg } else { 2261c349dbc7Sjsg r = adev->ip_blocks[i].version->funcs->hw_init(adev); 2262c349dbc7Sjsg if (r) { 2263c349dbc7Sjsg DRM_ERROR("hw_init of IP block <%s> failed %d\n", 2264c349dbc7Sjsg adev->ip_blocks[i].version->funcs->name, r); 2265c349dbc7Sjsg return r; 2266c349dbc7Sjsg } 2267c349dbc7Sjsg } 2268c349dbc7Sjsg 2269c349dbc7Sjsg adev->ip_blocks[i].status.hw = true; 2270c349dbc7Sjsg break; 2271c349dbc7Sjsg } 2272c349dbc7Sjsg } 2273c349dbc7Sjsg 2274c349dbc7Sjsg if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) 2275c349dbc7Sjsg r = amdgpu_pm_load_smu_firmware(adev, &smu_version); 2276c349dbc7Sjsg 2277c349dbc7Sjsg return r; 2278c349dbc7Sjsg } 2279c349dbc7Sjsg 22801bb76ff1Sjsg static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) 22811bb76ff1Sjsg { 22821bb76ff1Sjsg long timeout; 22831bb76ff1Sjsg int r, i; 22841bb76ff1Sjsg 22851bb76ff1Sjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 22861bb76ff1Sjsg struct amdgpu_ring *ring = adev->rings[i]; 22871bb76ff1Sjsg 22881bb76ff1Sjsg /* No need to setup the GPU scheduler for rings that don't need it */ 22891bb76ff1Sjsg if (!ring || ring->no_scheduler) 22901bb76ff1Sjsg continue; 22911bb76ff1Sjsg 22921bb76ff1Sjsg switch (ring->funcs->type) { 22931bb76ff1Sjsg case AMDGPU_RING_TYPE_GFX: 22941bb76ff1Sjsg timeout = adev->gfx_timeout; 22951bb76ff1Sjsg break; 22961bb76ff1Sjsg case AMDGPU_RING_TYPE_COMPUTE: 22971bb76ff1Sjsg timeout = adev->compute_timeout; 22981bb76ff1Sjsg break; 22991bb76ff1Sjsg case AMDGPU_RING_TYPE_SDMA: 23001bb76ff1Sjsg timeout = adev->sdma_timeout; 23011bb76ff1Sjsg break; 23021bb76ff1Sjsg default: 23031bb76ff1Sjsg timeout = adev->video_timeout; 23041bb76ff1Sjsg break; 23051bb76ff1Sjsg } 23061bb76ff1Sjsg 23071bb76ff1Sjsg r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, 2308f005ef32Sjsg ring->num_hw_submission, 0, 23091bb76ff1Sjsg timeout, adev->reset_domain->wq, 23101bb76ff1Sjsg ring->sched_score, ring->name, 23111bb76ff1Sjsg adev->dev); 23121bb76ff1Sjsg if (r) { 23131bb76ff1Sjsg DRM_ERROR("Failed to create scheduler on ring %s.\n", 23141bb76ff1Sjsg ring->name); 23151bb76ff1Sjsg return r; 23161bb76ff1Sjsg } 23171bb76ff1Sjsg } 23181bb76ff1Sjsg 2319f005ef32Sjsg amdgpu_xcp_update_partition_sched_list(adev); 2320f005ef32Sjsg 23211bb76ff1Sjsg return 0; 23221bb76ff1Sjsg } 23231bb76ff1Sjsg 23241bb76ff1Sjsg 2325fb4d8502Sjsg /** 2326fb4d8502Sjsg * amdgpu_device_ip_init - run init for hardware IPs 2327fb4d8502Sjsg * 2328fb4d8502Sjsg * @adev: amdgpu_device pointer 2329fb4d8502Sjsg * 2330fb4d8502Sjsg * Main initialization pass for hardware IPs. The list of all the hardware 2331fb4d8502Sjsg * IPs that make up the asic is walked and the sw_init and hw_init callbacks 2332fb4d8502Sjsg * are run. sw_init initializes the software state associated with each IP 2333fb4d8502Sjsg * and hw_init initializes the hardware associated with each IP. 2334fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2335fb4d8502Sjsg */ 2336fb4d8502Sjsg static int amdgpu_device_ip_init(struct amdgpu_device *adev) 2337fb4d8502Sjsg { 2338fb4d8502Sjsg int i, r; 2339fb4d8502Sjsg 2340c349dbc7Sjsg r = amdgpu_ras_init(adev); 2341c349dbc7Sjsg if (r) 2342c349dbc7Sjsg return r; 2343c349dbc7Sjsg 2344fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2345fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 2346fb4d8502Sjsg continue; 2347fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev); 2348fb4d8502Sjsg if (r) { 2349fb4d8502Sjsg DRM_ERROR("sw_init of IP block <%s> failed %d\n", 2350fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2351c349dbc7Sjsg goto init_failed; 2352fb4d8502Sjsg } 2353fb4d8502Sjsg adev->ip_blocks[i].status.sw = true; 2354fb4d8502Sjsg 2355bdc47e44Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { 2356bdc47e44Sjsg /* need to do common hw init early so everything is set up for gmc */ 2357bdc47e44Sjsg r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2358bdc47e44Sjsg if (r) { 2359bdc47e44Sjsg DRM_ERROR("hw_init %d failed %d\n", i, r); 2360bdc47e44Sjsg goto init_failed; 2361bdc47e44Sjsg } 2362bdc47e44Sjsg adev->ip_blocks[i].status.hw = true; 2363bdc47e44Sjsg } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2364fb4d8502Sjsg /* need to do gmc hw init early so we can allocate gpu mem */ 236515f9b5f9Sjsg /* Try to reserve bad pages early */ 236615f9b5f9Sjsg if (amdgpu_sriov_vf(adev)) 236715f9b5f9Sjsg amdgpu_virt_exchange_data(adev); 236815f9b5f9Sjsg 2369f005ef32Sjsg r = amdgpu_device_mem_scratch_init(adev); 2370fb4d8502Sjsg if (r) { 2371f005ef32Sjsg DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); 2372c349dbc7Sjsg goto init_failed; 2373fb4d8502Sjsg } 2374fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 2375fb4d8502Sjsg if (r) { 2376fb4d8502Sjsg DRM_ERROR("hw_init %d failed %d\n", i, r); 2377c349dbc7Sjsg goto init_failed; 2378fb4d8502Sjsg } 2379fb4d8502Sjsg r = amdgpu_device_wb_init(adev); 2380fb4d8502Sjsg if (r) { 2381fb4d8502Sjsg DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 2382c349dbc7Sjsg goto init_failed; 2383fb4d8502Sjsg } 2384fb4d8502Sjsg adev->ip_blocks[i].status.hw = true; 2385fb4d8502Sjsg 2386fb4d8502Sjsg /* right after GMC hw init, we create CSA */ 2387f005ef32Sjsg if (adev->gfx.mcbp) { 2388c349dbc7Sjsg r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, 2389f005ef32Sjsg AMDGPU_GEM_DOMAIN_VRAM | 2390f005ef32Sjsg AMDGPU_GEM_DOMAIN_GTT, 2391c349dbc7Sjsg AMDGPU_CSA_SIZE); 2392fb4d8502Sjsg if (r) { 2393fb4d8502Sjsg DRM_ERROR("allocate CSA failed %d\n", r); 2394c349dbc7Sjsg goto init_failed; 2395fb4d8502Sjsg } 2396fb4d8502Sjsg } 2397fb4d8502Sjsg } 2398fb4d8502Sjsg } 2399fb4d8502Sjsg 2400c349dbc7Sjsg if (amdgpu_sriov_vf(adev)) 240133331580Sjsg amdgpu_virt_init_data_exchange(adev); 2402c349dbc7Sjsg 2403c349dbc7Sjsg r = amdgpu_ib_pool_init(adev); 2404fb4d8502Sjsg if (r) { 2405c349dbc7Sjsg dev_err(adev->dev, "IB initialization failed (%d).\n", r); 2406c349dbc7Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); 2407c349dbc7Sjsg goto init_failed; 2408fb4d8502Sjsg } 2409fb4d8502Sjsg 2410c349dbc7Sjsg r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 2411c349dbc7Sjsg if (r) 2412c349dbc7Sjsg goto init_failed; 2413c349dbc7Sjsg 2414c349dbc7Sjsg r = amdgpu_device_ip_hw_init_phase1(adev); 2415c349dbc7Sjsg if (r) 2416c349dbc7Sjsg goto init_failed; 2417c349dbc7Sjsg 2418c349dbc7Sjsg r = amdgpu_device_fw_loading(adev); 2419c349dbc7Sjsg if (r) 2420c349dbc7Sjsg goto init_failed; 2421c349dbc7Sjsg 2422c349dbc7Sjsg r = amdgpu_device_ip_hw_init_phase2(adev); 2423c349dbc7Sjsg if (r) 2424c349dbc7Sjsg goto init_failed; 2425c349dbc7Sjsg 2426c349dbc7Sjsg /* 2427c349dbc7Sjsg * retired pages will be loaded from eeprom and reserved here, 2428c349dbc7Sjsg * it should be called after amdgpu_device_ip_hw_init_phase2 since 2429c349dbc7Sjsg * for some ASICs the RAS EEPROM code relies on SMU fully functioning 2430c349dbc7Sjsg * for I2C communication which only true at this point. 2431ad8b1aafSjsg * 2432ad8b1aafSjsg * amdgpu_ras_recovery_init may fail, but the upper only cares the 2433ad8b1aafSjsg * failure from bad gpu situation and stop amdgpu init process 2434ad8b1aafSjsg * accordingly. For other failed cases, it will still release all 2435ad8b1aafSjsg * the resource and print error message, rather than returning one 2436ad8b1aafSjsg * negative value to upper level. 2437c349dbc7Sjsg * 2438c349dbc7Sjsg * Note: theoretically, this should be called before all vram allocations 2439c349dbc7Sjsg * to protect retired page from abusing 2440c349dbc7Sjsg */ 2441ad8b1aafSjsg r = amdgpu_ras_recovery_init(adev); 2442ad8b1aafSjsg if (r) 2443ad8b1aafSjsg goto init_failed; 2444c349dbc7Sjsg 24451bb76ff1Sjsg /** 24461bb76ff1Sjsg * In case of XGMI grab extra reference for reset domain for this device 24471bb76ff1Sjsg */ 24481bb76ff1Sjsg if (adev->gmc.xgmi.num_physical_nodes > 1) { 24491bb76ff1Sjsg if (amdgpu_xgmi_add_device(adev) == 0) { 24501bb76ff1Sjsg if (!amdgpu_sriov_vf(adev)) { 24511bb76ff1Sjsg struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 24521bb76ff1Sjsg 24531bb76ff1Sjsg if (WARN_ON(!hive)) { 24541bb76ff1Sjsg r = -ENOENT; 24551bb76ff1Sjsg goto init_failed; 24561bb76ff1Sjsg } 24571bb76ff1Sjsg 24581bb76ff1Sjsg if (!hive->reset_domain || 24591bb76ff1Sjsg !amdgpu_reset_get_reset_domain(hive->reset_domain)) { 24601bb76ff1Sjsg r = -ENOENT; 24611bb76ff1Sjsg amdgpu_put_xgmi_hive(hive); 24621bb76ff1Sjsg goto init_failed; 24631bb76ff1Sjsg } 24641bb76ff1Sjsg 24651bb76ff1Sjsg /* Drop the early temporary reset domain we created for device */ 24661bb76ff1Sjsg amdgpu_reset_put_reset_domain(adev->reset_domain); 24671bb76ff1Sjsg adev->reset_domain = hive->reset_domain; 24681bb76ff1Sjsg amdgpu_put_xgmi_hive(hive); 24691bb76ff1Sjsg } 24701bb76ff1Sjsg } 24711bb76ff1Sjsg } 24721bb76ff1Sjsg 24731bb76ff1Sjsg r = amdgpu_device_init_schedulers(adev); 24741bb76ff1Sjsg if (r) 24751bb76ff1Sjsg goto init_failed; 24765ca02815Sjsg 24775ca02815Sjsg /* Don't init kfd if whole hive need to be reset during init */ 2478f005ef32Sjsg if (!adev->gmc.xgmi.pending_reset) { 2479f005ef32Sjsg kgd2kfd_init_zone_device(adev); 2480fb4d8502Sjsg amdgpu_amdkfd_device_init(adev); 2481f005ef32Sjsg } 2482fb4d8502Sjsg 2483ad8b1aafSjsg amdgpu_fru_get_product_info(adev); 2484ad8b1aafSjsg 2485c349dbc7Sjsg init_failed: 2486fb4d8502Sjsg 2487c349dbc7Sjsg return r; 2488fb4d8502Sjsg } 2489fb4d8502Sjsg 2490fb4d8502Sjsg /** 2491fb4d8502Sjsg * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer 2492fb4d8502Sjsg * 2493fb4d8502Sjsg * @adev: amdgpu_device pointer 2494fb4d8502Sjsg * 2495fb4d8502Sjsg * Writes a reset magic value to the gart pointer in VRAM. The driver calls 2496fb4d8502Sjsg * this function before a GPU reset. If the value is retained after a 2497fb4d8502Sjsg * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. 2498fb4d8502Sjsg */ 2499fb4d8502Sjsg static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) 2500fb4d8502Sjsg { 2501fb4d8502Sjsg memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); 2502fb4d8502Sjsg } 2503fb4d8502Sjsg 2504fb4d8502Sjsg /** 2505fb4d8502Sjsg * amdgpu_device_check_vram_lost - check if vram is valid 2506fb4d8502Sjsg * 2507fb4d8502Sjsg * @adev: amdgpu_device pointer 2508fb4d8502Sjsg * 2509fb4d8502Sjsg * Checks the reset magic value written to the gart pointer in VRAM. 2510fb4d8502Sjsg * The driver calls this after a GPU reset to see if the contents of 2511fb4d8502Sjsg * VRAM is lost or now. 2512fb4d8502Sjsg * returns true if vram is lost, false if not. 2513fb4d8502Sjsg */ 2514fb4d8502Sjsg static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) 2515fb4d8502Sjsg { 2516c349dbc7Sjsg if (memcmp(adev->gart.ptr, adev->reset_magic, 2517c349dbc7Sjsg AMDGPU_RESET_MAGIC_NUM)) 2518c349dbc7Sjsg return true; 2519c349dbc7Sjsg 2520ad8b1aafSjsg if (!amdgpu_in_reset(adev)) 2521c349dbc7Sjsg return false; 2522c349dbc7Sjsg 2523c349dbc7Sjsg /* 2524c349dbc7Sjsg * For all ASICs with baco/mode1 reset, the VRAM is 2525c349dbc7Sjsg * always assumed to be lost. 2526c349dbc7Sjsg */ 2527c349dbc7Sjsg switch (amdgpu_asic_reset_method(adev)) { 2528c349dbc7Sjsg case AMD_RESET_METHOD_BACO: 2529c349dbc7Sjsg case AMD_RESET_METHOD_MODE1: 2530c349dbc7Sjsg return true; 2531c349dbc7Sjsg default: 2532c349dbc7Sjsg return false; 2533c349dbc7Sjsg } 2534fb4d8502Sjsg } 2535fb4d8502Sjsg 2536fb4d8502Sjsg /** 2537c349dbc7Sjsg * amdgpu_device_set_cg_state - set clockgating for amdgpu device 2538fb4d8502Sjsg * 2539fb4d8502Sjsg * @adev: amdgpu_device pointer 2540c349dbc7Sjsg * @state: clockgating state (gate or ungate) 2541fb4d8502Sjsg * 2542fb4d8502Sjsg * The list of all the hardware IPs that make up the asic is walked and the 2543c349dbc7Sjsg * set_clockgating_state callbacks are run. 2544c349dbc7Sjsg * Late initialization pass enabling clockgating for hardware IPs. 2545c349dbc7Sjsg * Fini or suspend, pass disabling clockgating for hardware IPs. 2546fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2547fb4d8502Sjsg */ 2548c349dbc7Sjsg 25495ca02815Sjsg int amdgpu_device_set_cg_state(struct amdgpu_device *adev, 2550c349dbc7Sjsg enum amd_clockgating_state state) 2551fb4d8502Sjsg { 2552c349dbc7Sjsg int i, j, r; 2553fb4d8502Sjsg 2554fb4d8502Sjsg if (amdgpu_emu_mode == 1) 2555fb4d8502Sjsg return 0; 2556fb4d8502Sjsg 2557c349dbc7Sjsg for (j = 0; j < adev->num_ip_blocks; j++) { 2558c349dbc7Sjsg i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2559c349dbc7Sjsg if (!adev->ip_blocks[i].status.late_initialized) 2560fb4d8502Sjsg continue; 2561f005ef32Sjsg /* skip CG for GFX, SDMA on S0ix */ 25625ca02815Sjsg if (adev->in_s0ix && 2563f005ef32Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2564f005ef32Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 25655ca02815Sjsg continue; 2566fb4d8502Sjsg /* skip CG for VCE/UVD, it's handled specially */ 2567fb4d8502Sjsg if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2568fb4d8502Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2569fb4d8502Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2570c349dbc7Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2571fb4d8502Sjsg adev->ip_blocks[i].version->funcs->set_clockgating_state) { 2572fb4d8502Sjsg /* enable clockgating to save power */ 2573fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, 2574c349dbc7Sjsg state); 2575fb4d8502Sjsg if (r) { 2576fb4d8502Sjsg DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", 2577fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2578fb4d8502Sjsg return r; 2579fb4d8502Sjsg } 2580fb4d8502Sjsg } 2581fb4d8502Sjsg } 2582fb4d8502Sjsg 2583fb4d8502Sjsg return 0; 2584fb4d8502Sjsg } 2585fb4d8502Sjsg 25865ca02815Sjsg int amdgpu_device_set_pg_state(struct amdgpu_device *adev, 25875ca02815Sjsg enum amd_powergating_state state) 2588fb4d8502Sjsg { 2589c349dbc7Sjsg int i, j, r; 2590fb4d8502Sjsg 2591fb4d8502Sjsg if (amdgpu_emu_mode == 1) 2592fb4d8502Sjsg return 0; 2593fb4d8502Sjsg 2594c349dbc7Sjsg for (j = 0; j < adev->num_ip_blocks; j++) { 2595c349dbc7Sjsg i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; 2596c349dbc7Sjsg if (!adev->ip_blocks[i].status.late_initialized) 2597fb4d8502Sjsg continue; 2598f005ef32Sjsg /* skip PG for GFX, SDMA on S0ix */ 25995ca02815Sjsg if (adev->in_s0ix && 2600f005ef32Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 2601f005ef32Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 26025ca02815Sjsg continue; 2603fb4d8502Sjsg /* skip CG for VCE/UVD, it's handled specially */ 2604fb4d8502Sjsg if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && 2605fb4d8502Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && 2606fb4d8502Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && 2607c349dbc7Sjsg adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && 2608fb4d8502Sjsg adev->ip_blocks[i].version->funcs->set_powergating_state) { 2609fb4d8502Sjsg /* enable powergating to save power */ 2610fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, 2611c349dbc7Sjsg state); 2612fb4d8502Sjsg if (r) { 2613fb4d8502Sjsg DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", 2614fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2615fb4d8502Sjsg return r; 2616fb4d8502Sjsg } 2617fb4d8502Sjsg } 2618fb4d8502Sjsg } 2619fb4d8502Sjsg return 0; 2620fb4d8502Sjsg } 2621fb4d8502Sjsg 2622c349dbc7Sjsg static int amdgpu_device_enable_mgpu_fan_boost(void) 2623c349dbc7Sjsg { 2624c349dbc7Sjsg struct amdgpu_gpu_instance *gpu_ins; 2625c349dbc7Sjsg struct amdgpu_device *adev; 2626c349dbc7Sjsg int i, ret = 0; 2627c349dbc7Sjsg 2628c349dbc7Sjsg mutex_lock(&mgpu_info.mutex); 2629c349dbc7Sjsg 2630c349dbc7Sjsg /* 2631c349dbc7Sjsg * MGPU fan boost feature should be enabled 2632c349dbc7Sjsg * only when there are two or more dGPUs in 2633c349dbc7Sjsg * the system 2634c349dbc7Sjsg */ 2635c349dbc7Sjsg if (mgpu_info.num_dgpu < 2) 2636c349dbc7Sjsg goto out; 2637c349dbc7Sjsg 2638c349dbc7Sjsg for (i = 0; i < mgpu_info.num_dgpu; i++) { 2639c349dbc7Sjsg gpu_ins = &(mgpu_info.gpu_ins[i]); 2640c349dbc7Sjsg adev = gpu_ins->adev; 2641c349dbc7Sjsg if (!(adev->flags & AMD_IS_APU) && 2642ad8b1aafSjsg !gpu_ins->mgpu_fan_enabled) { 2643c349dbc7Sjsg ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); 2644c349dbc7Sjsg if (ret) 2645c349dbc7Sjsg break; 2646c349dbc7Sjsg 2647c349dbc7Sjsg gpu_ins->mgpu_fan_enabled = 1; 2648c349dbc7Sjsg } 2649c349dbc7Sjsg } 2650c349dbc7Sjsg 2651c349dbc7Sjsg out: 2652c349dbc7Sjsg mutex_unlock(&mgpu_info.mutex); 2653c349dbc7Sjsg 2654c349dbc7Sjsg return ret; 2655c349dbc7Sjsg } 2656c349dbc7Sjsg 2657fb4d8502Sjsg /** 2658fb4d8502Sjsg * amdgpu_device_ip_late_init - run late init for hardware IPs 2659fb4d8502Sjsg * 2660fb4d8502Sjsg * @adev: amdgpu_device pointer 2661fb4d8502Sjsg * 2662fb4d8502Sjsg * Late initialization pass for hardware IPs. The list of all the hardware 2663fb4d8502Sjsg * IPs that make up the asic is walked and the late_init callbacks are run. 2664fb4d8502Sjsg * late_init covers any special initialization that an IP requires 2665fb4d8502Sjsg * after all of the have been initialized or something that needs to happen 2666fb4d8502Sjsg * late in the init process. 2667fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2668fb4d8502Sjsg */ 2669fb4d8502Sjsg static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) 2670fb4d8502Sjsg { 2671c349dbc7Sjsg struct amdgpu_gpu_instance *gpu_instance; 2672fb4d8502Sjsg int i = 0, r; 2673fb4d8502Sjsg 2674fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 2675c349dbc7Sjsg if (!adev->ip_blocks[i].status.hw) 2676fb4d8502Sjsg continue; 2677fb4d8502Sjsg if (adev->ip_blocks[i].version->funcs->late_init) { 2678fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); 2679fb4d8502Sjsg if (r) { 2680fb4d8502Sjsg DRM_ERROR("late_init of IP block <%s> failed %d\n", 2681fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2682fb4d8502Sjsg return r; 2683fb4d8502Sjsg } 2684c349dbc7Sjsg } 2685fb4d8502Sjsg adev->ip_blocks[i].status.late_initialized = true; 2686fb4d8502Sjsg } 2687fb4d8502Sjsg 26881bb76ff1Sjsg r = amdgpu_ras_late_init(adev); 26891bb76ff1Sjsg if (r) { 26901bb76ff1Sjsg DRM_ERROR("amdgpu_ras_late_init failed %d", r); 26911bb76ff1Sjsg return r; 26921bb76ff1Sjsg } 26931bb76ff1Sjsg 2694ad8b1aafSjsg amdgpu_ras_set_error_query_ready(adev, true); 2695ad8b1aafSjsg 2696c349dbc7Sjsg amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); 2697c349dbc7Sjsg amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); 2698fb4d8502Sjsg 2699fb4d8502Sjsg amdgpu_device_fill_reset_magic(adev); 2700fb4d8502Sjsg 2701c349dbc7Sjsg r = amdgpu_device_enable_mgpu_fan_boost(); 2702c349dbc7Sjsg if (r) 2703c349dbc7Sjsg DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); 2704c349dbc7Sjsg 27051bb76ff1Sjsg /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ 2706f005ef32Sjsg if (amdgpu_passthrough(adev) && 2707f005ef32Sjsg ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || 27081bb76ff1Sjsg adev->asic_type == CHIP_ALDEBARAN)) 27091bb76ff1Sjsg amdgpu_dpm_handle_passthrough_sbr(adev, true); 2710c349dbc7Sjsg 2711c349dbc7Sjsg if (adev->gmc.xgmi.num_physical_nodes > 1) { 2712c349dbc7Sjsg mutex_lock(&mgpu_info.mutex); 2713c349dbc7Sjsg 2714c349dbc7Sjsg /* 2715c349dbc7Sjsg * Reset device p-state to low as this was booted with high. 2716c349dbc7Sjsg * 2717c349dbc7Sjsg * This should be performed only after all devices from the same 2718c349dbc7Sjsg * hive get initialized. 2719c349dbc7Sjsg * 2720c349dbc7Sjsg * However, it's unknown how many device in the hive in advance. 2721c349dbc7Sjsg * As this is counted one by one during devices initializations. 2722c349dbc7Sjsg * 2723c349dbc7Sjsg * So, we wait for all XGMI interlinked devices initialized. 2724c349dbc7Sjsg * This may bring some delays as those devices may come from 2725c349dbc7Sjsg * different hives. But that should be OK. 2726c349dbc7Sjsg */ 2727c349dbc7Sjsg if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { 2728c349dbc7Sjsg for (i = 0; i < mgpu_info.num_gpu; i++) { 2729c349dbc7Sjsg gpu_instance = &(mgpu_info.gpu_ins[i]); 2730c349dbc7Sjsg if (gpu_instance->adev->flags & AMD_IS_APU) 2731c349dbc7Sjsg continue; 2732c349dbc7Sjsg 2733ad8b1aafSjsg r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 2734ad8b1aafSjsg AMDGPU_XGMI_PSTATE_MIN); 2735c349dbc7Sjsg if (r) { 2736c349dbc7Sjsg DRM_ERROR("pstate setting failed (%d).\n", r); 2737c349dbc7Sjsg break; 2738c349dbc7Sjsg } 2739c349dbc7Sjsg } 2740c349dbc7Sjsg } 2741c349dbc7Sjsg 2742c349dbc7Sjsg mutex_unlock(&mgpu_info.mutex); 2743c349dbc7Sjsg } 2744c349dbc7Sjsg 2745fb4d8502Sjsg return 0; 2746fb4d8502Sjsg } 2747fb4d8502Sjsg 27481bb76ff1Sjsg /** 27491bb76ff1Sjsg * amdgpu_device_smu_fini_early - smu hw_fini wrapper 27501bb76ff1Sjsg * 27511bb76ff1Sjsg * @adev: amdgpu_device pointer 27521bb76ff1Sjsg * 27531bb76ff1Sjsg * For ASICs need to disable SMC first 27541bb76ff1Sjsg */ 27551bb76ff1Sjsg static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) 27561bb76ff1Sjsg { 27571bb76ff1Sjsg int i, r; 27581bb76ff1Sjsg 27591bb76ff1Sjsg if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) 27601bb76ff1Sjsg return; 27611bb76ff1Sjsg 27621bb76ff1Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 27631bb76ff1Sjsg if (!adev->ip_blocks[i].status.hw) 27641bb76ff1Sjsg continue; 27651bb76ff1Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 27661bb76ff1Sjsg r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 27671bb76ff1Sjsg /* XXX handle errors */ 27681bb76ff1Sjsg if (r) { 27691bb76ff1Sjsg DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 27701bb76ff1Sjsg adev->ip_blocks[i].version->funcs->name, r); 27711bb76ff1Sjsg } 27721bb76ff1Sjsg adev->ip_blocks[i].status.hw = false; 27731bb76ff1Sjsg break; 27741bb76ff1Sjsg } 27751bb76ff1Sjsg } 27761bb76ff1Sjsg } 27771bb76ff1Sjsg 27785ca02815Sjsg static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) 2779fb4d8502Sjsg { 2780fb4d8502Sjsg int i, r; 2781fb4d8502Sjsg 27825ca02815Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 27835ca02815Sjsg if (!adev->ip_blocks[i].version->funcs->early_fini) 27845ca02815Sjsg continue; 2785ad8b1aafSjsg 27865ca02815Sjsg r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev); 27875ca02815Sjsg if (r) { 27885ca02815Sjsg DRM_DEBUG("early_fini of IP block <%s> failed %d\n", 27895ca02815Sjsg adev->ip_blocks[i].version->funcs->name, r); 27905ca02815Sjsg } 27915ca02815Sjsg } 2792c349dbc7Sjsg 2793c349dbc7Sjsg amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2794c349dbc7Sjsg amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2795c349dbc7Sjsg 27961bb76ff1Sjsg amdgpu_amdkfd_suspend(adev, false); 27971bb76ff1Sjsg 27981bb76ff1Sjsg /* Workaroud for ASICs need to disable SMC first */ 27991bb76ff1Sjsg amdgpu_device_smu_fini_early(adev); 2800fb4d8502Sjsg 2801fb4d8502Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2802fb4d8502Sjsg if (!adev->ip_blocks[i].status.hw) 2803fb4d8502Sjsg continue; 2804fb4d8502Sjsg 2805fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); 2806fb4d8502Sjsg /* XXX handle errors */ 2807fb4d8502Sjsg if (r) { 2808fb4d8502Sjsg DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", 2809fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2810fb4d8502Sjsg } 2811fb4d8502Sjsg 2812fb4d8502Sjsg adev->ip_blocks[i].status.hw = false; 2813fb4d8502Sjsg } 2814fb4d8502Sjsg 28155ca02815Sjsg if (amdgpu_sriov_vf(adev)) { 28165ca02815Sjsg if (amdgpu_virt_release_full_gpu(adev, false)) 28175ca02815Sjsg DRM_ERROR("failed to release exclusive mode on fini\n"); 28185ca02815Sjsg } 28195ca02815Sjsg 28205ca02815Sjsg return 0; 28215ca02815Sjsg } 28225ca02815Sjsg 28235ca02815Sjsg /** 28245ca02815Sjsg * amdgpu_device_ip_fini - run fini for hardware IPs 28255ca02815Sjsg * 28265ca02815Sjsg * @adev: amdgpu_device pointer 28275ca02815Sjsg * 28285ca02815Sjsg * Main teardown pass for hardware IPs. The list of all the hardware 28295ca02815Sjsg * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks 28305ca02815Sjsg * are run. hw_fini tears down the hardware associated with each IP 28315ca02815Sjsg * and sw_fini tears down any software state associated with each IP. 28325ca02815Sjsg * Returns 0 on success, negative error code on failure. 28335ca02815Sjsg */ 28345ca02815Sjsg static int amdgpu_device_ip_fini(struct amdgpu_device *adev) 28355ca02815Sjsg { 28365ca02815Sjsg int i, r; 28375ca02815Sjsg 28385ca02815Sjsg if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) 28395ca02815Sjsg amdgpu_virt_release_ras_err_handler_data(adev); 28405ca02815Sjsg 28415ca02815Sjsg if (adev->gmc.xgmi.num_physical_nodes > 1) 28425ca02815Sjsg amdgpu_xgmi_remove_device(adev); 28435ca02815Sjsg 28445ca02815Sjsg amdgpu_amdkfd_device_fini_sw(adev); 2845fb4d8502Sjsg 2846fb4d8502Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2847fb4d8502Sjsg if (!adev->ip_blocks[i].status.sw) 2848fb4d8502Sjsg continue; 2849fb4d8502Sjsg 2850fb4d8502Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { 2851c349dbc7Sjsg amdgpu_ucode_free_bo(adev); 2852c349dbc7Sjsg amdgpu_free_static_csa(&adev->virt.csa_obj); 2853fb4d8502Sjsg amdgpu_device_wb_fini(adev); 2854f005ef32Sjsg amdgpu_device_mem_scratch_fini(adev); 2855c349dbc7Sjsg amdgpu_ib_pool_fini(adev); 2856fb4d8502Sjsg } 2857fb4d8502Sjsg 2858fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); 2859fb4d8502Sjsg /* XXX handle errors */ 2860fb4d8502Sjsg if (r) { 2861fb4d8502Sjsg DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", 2862fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2863fb4d8502Sjsg } 2864fb4d8502Sjsg adev->ip_blocks[i].status.sw = false; 2865fb4d8502Sjsg adev->ip_blocks[i].status.valid = false; 2866fb4d8502Sjsg } 2867fb4d8502Sjsg 2868fb4d8502Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2869fb4d8502Sjsg if (!adev->ip_blocks[i].status.late_initialized) 2870fb4d8502Sjsg continue; 2871fb4d8502Sjsg if (adev->ip_blocks[i].version->funcs->late_fini) 2872fb4d8502Sjsg adev->ip_blocks[i].version->funcs->late_fini((void *)adev); 2873fb4d8502Sjsg adev->ip_blocks[i].status.late_initialized = false; 2874fb4d8502Sjsg } 2875fb4d8502Sjsg 2876c349dbc7Sjsg amdgpu_ras_fini(adev); 2877c349dbc7Sjsg 2878fb4d8502Sjsg return 0; 2879fb4d8502Sjsg } 2880fb4d8502Sjsg 2881fb4d8502Sjsg /** 2882c349dbc7Sjsg * amdgpu_device_delayed_init_work_handler - work handler for IB tests 2883fb4d8502Sjsg * 2884c349dbc7Sjsg * @work: work_struct. 2885fb4d8502Sjsg */ 2886c349dbc7Sjsg static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) 2887fb4d8502Sjsg { 2888fb4d8502Sjsg struct amdgpu_device *adev = 2889c349dbc7Sjsg container_of(work, struct amdgpu_device, delayed_init_work.work); 2890fb4d8502Sjsg int r; 2891fb4d8502Sjsg 2892fb4d8502Sjsg r = amdgpu_ib_ring_tests(adev); 2893fb4d8502Sjsg if (r) 2894fb4d8502Sjsg DRM_ERROR("ib ring test failed (%d).\n", r); 2895fb4d8502Sjsg } 2896fb4d8502Sjsg 2897c349dbc7Sjsg static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) 2898c349dbc7Sjsg { 2899c349dbc7Sjsg struct amdgpu_device *adev = 2900c349dbc7Sjsg container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); 2901c349dbc7Sjsg 29028b172e32Sjsg WARN_ON_ONCE(adev->gfx.gfx_off_state); 29038b172e32Sjsg WARN_ON_ONCE(adev->gfx.gfx_off_req_count); 29048b172e32Sjsg 2905c349dbc7Sjsg if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) 2906c349dbc7Sjsg adev->gfx.gfx_off_state = true; 2907c349dbc7Sjsg } 2908c349dbc7Sjsg 2909fb4d8502Sjsg /** 2910fb4d8502Sjsg * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) 2911fb4d8502Sjsg * 2912fb4d8502Sjsg * @adev: amdgpu_device pointer 2913fb4d8502Sjsg * 2914fb4d8502Sjsg * Main suspend function for hardware IPs. The list of all the hardware 2915fb4d8502Sjsg * IPs that make up the asic is walked, clockgating is disabled and the 2916fb4d8502Sjsg * suspend callbacks are run. suspend puts the hardware and software state 2917fb4d8502Sjsg * in each IP into a state suitable for suspend. 2918fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2919fb4d8502Sjsg */ 2920fb4d8502Sjsg static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) 2921fb4d8502Sjsg { 2922fb4d8502Sjsg int i, r; 2923fb4d8502Sjsg 2924c349dbc7Sjsg amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 2925c349dbc7Sjsg amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); 2926fb4d8502Sjsg 29271bb76ff1Sjsg /* 29281bb76ff1Sjsg * Per PMFW team's suggestion, driver needs to handle gfxoff 29291bb76ff1Sjsg * and df cstate features disablement for gpu reset(e.g. Mode1Reset) 29301bb76ff1Sjsg * scenario. Add the missing df cstate disablement here. 29311bb76ff1Sjsg */ 29321bb76ff1Sjsg if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) 29331bb76ff1Sjsg dev_warn(adev->dev, "Failed to disallow df cstate"); 29341bb76ff1Sjsg 2935fb4d8502Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2936fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 2937fb4d8502Sjsg continue; 2938ad8b1aafSjsg 2939fb4d8502Sjsg /* displays are handled separately */ 2940ad8b1aafSjsg if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) 2941ad8b1aafSjsg continue; 2942ad8b1aafSjsg 2943fb4d8502Sjsg /* XXX handle errors */ 2944fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->suspend(adev); 2945fb4d8502Sjsg /* XXX handle errors */ 2946fb4d8502Sjsg if (r) { 2947fb4d8502Sjsg DRM_ERROR("suspend of IP block <%s> failed %d\n", 2948fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 2949c349dbc7Sjsg return r; 2950c349dbc7Sjsg } 2951ad8b1aafSjsg 2952c349dbc7Sjsg adev->ip_blocks[i].status.hw = false; 2953fb4d8502Sjsg } 2954fb4d8502Sjsg 2955fb4d8502Sjsg return 0; 2956fb4d8502Sjsg } 2957fb4d8502Sjsg 2958fb4d8502Sjsg /** 2959fb4d8502Sjsg * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) 2960fb4d8502Sjsg * 2961fb4d8502Sjsg * @adev: amdgpu_device pointer 2962fb4d8502Sjsg * 2963fb4d8502Sjsg * Main suspend function for hardware IPs. The list of all the hardware 2964fb4d8502Sjsg * IPs that make up the asic is walked, clockgating is disabled and the 2965fb4d8502Sjsg * suspend callbacks are run. suspend puts the hardware and software state 2966fb4d8502Sjsg * in each IP into a state suitable for suspend. 2967fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 2968fb4d8502Sjsg */ 2969fb4d8502Sjsg static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) 2970fb4d8502Sjsg { 2971fb4d8502Sjsg int i, r; 2972fb4d8502Sjsg 29735ca02815Sjsg if (adev->in_s0ix) 29741bb76ff1Sjsg amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); 29755ca02815Sjsg 2976fb4d8502Sjsg for (i = adev->num_ip_blocks - 1; i >= 0; i--) { 2977fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 2978fb4d8502Sjsg continue; 2979fb4d8502Sjsg /* displays are handled in phase1 */ 2980fb4d8502Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) 2981fb4d8502Sjsg continue; 2982c349dbc7Sjsg /* PSP lost connection when err_event_athub occurs */ 2983c349dbc7Sjsg if (amdgpu_ras_intr_triggered() && 2984c349dbc7Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 2985c349dbc7Sjsg adev->ip_blocks[i].status.hw = false; 2986c349dbc7Sjsg continue; 2987fb4d8502Sjsg } 29885ca02815Sjsg 29895ca02815Sjsg /* skip unnecessary suspend if we do not initialize them yet */ 29905ca02815Sjsg if (adev->gmc.xgmi.pending_reset && 29915ca02815Sjsg !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 29925ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC || 29935ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 29945ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) { 29955ca02815Sjsg adev->ip_blocks[i].status.hw = false; 29965ca02815Sjsg continue; 29975ca02815Sjsg } 29985ca02815Sjsg 29997a1b9fa9Sjsg /* skip suspend of gfx/mes and psp for S0ix 30005ca02815Sjsg * gfx is in gfxoff state, so on resume it will exit gfxoff just 30015ca02815Sjsg * like at runtime. PSP is also part of the always on hardware 30025ca02815Sjsg * so no need to suspend it. 30035ca02815Sjsg */ 30045ca02815Sjsg if (adev->in_s0ix && 30055ca02815Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || 30067a1b9fa9Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || 30077a1b9fa9Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) 30085ca02815Sjsg continue; 30095ca02815Sjsg 3010955b8040Sjsg /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ 3011955b8040Sjsg if (adev->in_s0ix && 3012955b8040Sjsg (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) && 3013955b8040Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) 3014955b8040Sjsg continue; 3015955b8040Sjsg 3016582979dcSjsg /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. 3017582979dcSjsg * These are in TMR, hence are expected to be reused by PSP-TOS to reload 3018582979dcSjsg * from this location and RLC Autoload automatically also gets loaded 3019582979dcSjsg * from here based on PMFW -> PSP message during re-init sequence. 3020582979dcSjsg * Therefore, the psp suspend & resume should be skipped to avoid destroy 3021582979dcSjsg * the TMR and reload FWs again for IMU enabled APU ASICs. 3022582979dcSjsg */ 3023582979dcSjsg if (amdgpu_in_reset(adev) && 3024582979dcSjsg (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs && 3025582979dcSjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3026582979dcSjsg continue; 3027582979dcSjsg 3028fb4d8502Sjsg /* XXX handle errors */ 3029fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->suspend(adev); 3030fb4d8502Sjsg /* XXX handle errors */ 3031fb4d8502Sjsg if (r) { 3032fb4d8502Sjsg DRM_ERROR("suspend of IP block <%s> failed %d\n", 3033fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 3034fb4d8502Sjsg } 3035c349dbc7Sjsg adev->ip_blocks[i].status.hw = false; 3036c349dbc7Sjsg /* handle putting the SMC in the appropriate state */ 3037c349dbc7Sjsg if (!amdgpu_sriov_vf(adev)) { 3038c349dbc7Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { 3039c349dbc7Sjsg r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state); 3040c349dbc7Sjsg if (r) { 3041c349dbc7Sjsg DRM_ERROR("SMC failed to set mp1 state %d, %d\n", 3042c349dbc7Sjsg adev->mp1_state, r); 3043c349dbc7Sjsg return r; 3044fb4d8502Sjsg } 3045c349dbc7Sjsg } 3046c349dbc7Sjsg } 3047c349dbc7Sjsg } 3048fb4d8502Sjsg 3049fb4d8502Sjsg return 0; 3050fb4d8502Sjsg } 3051fb4d8502Sjsg 3052fb4d8502Sjsg /** 3053fb4d8502Sjsg * amdgpu_device_ip_suspend - run suspend for hardware IPs 3054fb4d8502Sjsg * 3055fb4d8502Sjsg * @adev: amdgpu_device pointer 3056fb4d8502Sjsg * 3057fb4d8502Sjsg * Main suspend function for hardware IPs. The list of all the hardware 3058fb4d8502Sjsg * IPs that make up the asic is walked, clockgating is disabled and the 3059fb4d8502Sjsg * suspend callbacks are run. suspend puts the hardware and software state 3060fb4d8502Sjsg * in each IP into a state suitable for suspend. 3061fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 3062fb4d8502Sjsg */ 3063fb4d8502Sjsg int amdgpu_device_ip_suspend(struct amdgpu_device *adev) 3064fb4d8502Sjsg { 3065fb4d8502Sjsg int r; 3066fb4d8502Sjsg 30675ca02815Sjsg if (amdgpu_sriov_vf(adev)) { 30685ca02815Sjsg amdgpu_virt_fini_data_exchange(adev); 3069c349dbc7Sjsg amdgpu_virt_request_full_gpu(adev, false); 30705ca02815Sjsg } 3071c349dbc7Sjsg 3072fb4d8502Sjsg r = amdgpu_device_ip_suspend_phase1(adev); 3073fb4d8502Sjsg if (r) 3074fb4d8502Sjsg return r; 3075fb4d8502Sjsg r = amdgpu_device_ip_suspend_phase2(adev); 3076fb4d8502Sjsg 3077c349dbc7Sjsg if (amdgpu_sriov_vf(adev)) 3078c349dbc7Sjsg amdgpu_virt_release_full_gpu(adev, false); 3079c349dbc7Sjsg 3080fb4d8502Sjsg return r; 3081fb4d8502Sjsg } 3082fb4d8502Sjsg 3083fb4d8502Sjsg static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) 3084fb4d8502Sjsg { 3085fb4d8502Sjsg int i, r; 3086fb4d8502Sjsg 3087fb4d8502Sjsg static enum amd_ip_block_type ip_order[] = { 3088fb4d8502Sjsg AMD_IP_BLOCK_TYPE_COMMON, 3089bdc47e44Sjsg AMD_IP_BLOCK_TYPE_GMC, 3090fb4d8502Sjsg AMD_IP_BLOCK_TYPE_PSP, 3091fb4d8502Sjsg AMD_IP_BLOCK_TYPE_IH, 3092fb4d8502Sjsg }; 3093fb4d8502Sjsg 3094a30ccb29Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 3095fb4d8502Sjsg int j; 3096fb4d8502Sjsg struct amdgpu_ip_block *block; 3097fb4d8502Sjsg 3098ad8b1aafSjsg block = &adev->ip_blocks[i]; 3099c349dbc7Sjsg block->status.hw = false; 3100ad8b1aafSjsg 3101ad8b1aafSjsg for (j = 0; j < ARRAY_SIZE(ip_order); j++) { 3102ad8b1aafSjsg 3103ad8b1aafSjsg if (block->version->type != ip_order[j] || 3104fb4d8502Sjsg !block->status.valid) 3105fb4d8502Sjsg continue; 3106fb4d8502Sjsg 3107fb4d8502Sjsg r = block->version->funcs->hw_init(adev); 3108c349dbc7Sjsg DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3109fb4d8502Sjsg if (r) 3110fb4d8502Sjsg return r; 3111c349dbc7Sjsg block->status.hw = true; 3112fb4d8502Sjsg } 3113fb4d8502Sjsg } 3114fb4d8502Sjsg 3115fb4d8502Sjsg return 0; 3116fb4d8502Sjsg } 3117fb4d8502Sjsg 3118fb4d8502Sjsg static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) 3119fb4d8502Sjsg { 3120fb4d8502Sjsg int i, r; 3121fb4d8502Sjsg 3122fb4d8502Sjsg static enum amd_ip_block_type ip_order[] = { 3123fb4d8502Sjsg AMD_IP_BLOCK_TYPE_SMC, 3124fb4d8502Sjsg AMD_IP_BLOCK_TYPE_DCE, 3125fb4d8502Sjsg AMD_IP_BLOCK_TYPE_GFX, 3126fb4d8502Sjsg AMD_IP_BLOCK_TYPE_SDMA, 3127f005ef32Sjsg AMD_IP_BLOCK_TYPE_MES, 3128fb4d8502Sjsg AMD_IP_BLOCK_TYPE_UVD, 3129c349dbc7Sjsg AMD_IP_BLOCK_TYPE_VCE, 3130f005ef32Sjsg AMD_IP_BLOCK_TYPE_VCN, 3131f005ef32Sjsg AMD_IP_BLOCK_TYPE_JPEG 3132fb4d8502Sjsg }; 3133fb4d8502Sjsg 3134fb4d8502Sjsg for (i = 0; i < ARRAY_SIZE(ip_order); i++) { 3135fb4d8502Sjsg int j; 3136fb4d8502Sjsg struct amdgpu_ip_block *block; 3137fb4d8502Sjsg 3138fb4d8502Sjsg for (j = 0; j < adev->num_ip_blocks; j++) { 3139fb4d8502Sjsg block = &adev->ip_blocks[j]; 3140fb4d8502Sjsg 3141fb4d8502Sjsg if (block->version->type != ip_order[i] || 3142c349dbc7Sjsg !block->status.valid || 3143c349dbc7Sjsg block->status.hw) 3144fb4d8502Sjsg continue; 3145fb4d8502Sjsg 3146c349dbc7Sjsg if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) 3147c349dbc7Sjsg r = block->version->funcs->resume(adev); 3148c349dbc7Sjsg else 3149fb4d8502Sjsg r = block->version->funcs->hw_init(adev); 3150c349dbc7Sjsg 3151c349dbc7Sjsg DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 3152fb4d8502Sjsg if (r) 3153fb4d8502Sjsg return r; 3154c349dbc7Sjsg block->status.hw = true; 3155fb4d8502Sjsg } 3156fb4d8502Sjsg } 3157fb4d8502Sjsg 3158fb4d8502Sjsg return 0; 3159fb4d8502Sjsg } 3160fb4d8502Sjsg 3161fb4d8502Sjsg /** 3162fb4d8502Sjsg * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs 3163fb4d8502Sjsg * 3164fb4d8502Sjsg * @adev: amdgpu_device pointer 3165fb4d8502Sjsg * 3166fb4d8502Sjsg * First resume function for hardware IPs. The list of all the hardware 3167fb4d8502Sjsg * IPs that make up the asic is walked and the resume callbacks are run for 3168fb4d8502Sjsg * COMMON, GMC, and IH. resume puts the hardware into a functional state 3169fb4d8502Sjsg * after a suspend and updates the software state as necessary. This 3170fb4d8502Sjsg * function is also used for restoring the GPU after a GPU reset. 3171fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 3172fb4d8502Sjsg */ 3173fb4d8502Sjsg static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) 3174fb4d8502Sjsg { 3175fb4d8502Sjsg int i, r; 3176fb4d8502Sjsg 3177fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 3178c349dbc7Sjsg if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3179fb4d8502Sjsg continue; 3180fb4d8502Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3181fb4d8502Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 31821bb76ff1Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 31831bb76ff1Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { 3184c349dbc7Sjsg 3185fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 3186fb4d8502Sjsg if (r) { 3187fb4d8502Sjsg DRM_ERROR("resume of IP block <%s> failed %d\n", 3188fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 3189fb4d8502Sjsg return r; 3190fb4d8502Sjsg } 3191c349dbc7Sjsg adev->ip_blocks[i].status.hw = true; 3192fb4d8502Sjsg } 3193fb4d8502Sjsg } 3194fb4d8502Sjsg 3195fb4d8502Sjsg return 0; 3196fb4d8502Sjsg } 3197fb4d8502Sjsg 3198fb4d8502Sjsg /** 3199fb4d8502Sjsg * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs 3200fb4d8502Sjsg * 3201fb4d8502Sjsg * @adev: amdgpu_device pointer 3202fb4d8502Sjsg * 3203f4ab5340Sjsg * Second resume function for hardware IPs. The list of all the hardware 3204fb4d8502Sjsg * IPs that make up the asic is walked and the resume callbacks are run for 3205fb4d8502Sjsg * all blocks except COMMON, GMC, and IH. resume puts the hardware into a 3206fb4d8502Sjsg * functional state after a suspend and updates the software state as 3207fb4d8502Sjsg * necessary. This function is also used for restoring the GPU after a GPU 3208fb4d8502Sjsg * reset. 3209fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 3210fb4d8502Sjsg */ 3211fb4d8502Sjsg static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) 3212fb4d8502Sjsg { 3213fb4d8502Sjsg int i, r; 3214fb4d8502Sjsg 3215fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 3216c349dbc7Sjsg if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3217fb4d8502Sjsg continue; 3218fb4d8502Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 3219fb4d8502Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 3220c349dbc7Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 3221f4ab5340Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || 3222c349dbc7Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) 3223fb4d8502Sjsg continue; 3224fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 3225fb4d8502Sjsg if (r) { 3226fb4d8502Sjsg DRM_ERROR("resume of IP block <%s> failed %d\n", 3227fb4d8502Sjsg adev->ip_blocks[i].version->funcs->name, r); 3228fb4d8502Sjsg return r; 3229fb4d8502Sjsg } 3230c349dbc7Sjsg adev->ip_blocks[i].status.hw = true; 3231fb4d8502Sjsg } 3232fb4d8502Sjsg 3233fb4d8502Sjsg return 0; 3234fb4d8502Sjsg } 3235fb4d8502Sjsg 3236fb4d8502Sjsg /** 3237f4ab5340Sjsg * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs 3238f4ab5340Sjsg * 3239f4ab5340Sjsg * @adev: amdgpu_device pointer 3240f4ab5340Sjsg * 3241f4ab5340Sjsg * Third resume function for hardware IPs. The list of all the hardware 3242f4ab5340Sjsg * IPs that make up the asic is walked and the resume callbacks are run for 3243f4ab5340Sjsg * all DCE. resume puts the hardware into a functional state after a suspend 3244f4ab5340Sjsg * and updates the software state as necessary. This function is also used 3245f4ab5340Sjsg * for restoring the GPU after a GPU reset. 3246f4ab5340Sjsg * 3247f4ab5340Sjsg * Returns 0 on success, negative error code on failure. 3248f4ab5340Sjsg */ 3249f4ab5340Sjsg static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) 3250f4ab5340Sjsg { 3251f4ab5340Sjsg int i, r; 3252f4ab5340Sjsg 3253f4ab5340Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 3254f4ab5340Sjsg if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) 3255f4ab5340Sjsg continue; 3256f4ab5340Sjsg if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { 3257f4ab5340Sjsg r = adev->ip_blocks[i].version->funcs->resume(adev); 3258*ac374fd8Sjsg if (r) { 3259*ac374fd8Sjsg DRM_ERROR("resume of IP block <%s> failed %d\n", 3260*ac374fd8Sjsg adev->ip_blocks[i].version->funcs->name, r); 3261f4ab5340Sjsg return r; 3262f4ab5340Sjsg } 3263*ac374fd8Sjsg adev->ip_blocks[i].status.hw = true; 3264*ac374fd8Sjsg } 3265f4ab5340Sjsg } 3266f4ab5340Sjsg 3267f4ab5340Sjsg return 0; 3268f4ab5340Sjsg } 3269f4ab5340Sjsg 3270f4ab5340Sjsg /** 3271fb4d8502Sjsg * amdgpu_device_ip_resume - run resume for hardware IPs 3272fb4d8502Sjsg * 3273fb4d8502Sjsg * @adev: amdgpu_device pointer 3274fb4d8502Sjsg * 3275fb4d8502Sjsg * Main resume function for hardware IPs. The hardware IPs 3276fb4d8502Sjsg * are split into two resume functions because they are 3277f005ef32Sjsg * also used in recovering from a GPU reset and some additional 3278fb4d8502Sjsg * steps need to be take between them. In this case (S3/S4) they are 3279fb4d8502Sjsg * run sequentially. 3280fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 3281fb4d8502Sjsg */ 3282fb4d8502Sjsg static int amdgpu_device_ip_resume(struct amdgpu_device *adev) 3283fb4d8502Sjsg { 3284fb4d8502Sjsg int r; 3285fb4d8502Sjsg 3286fb4d8502Sjsg r = amdgpu_device_ip_resume_phase1(adev); 3287fb4d8502Sjsg if (r) 3288fb4d8502Sjsg return r; 3289c349dbc7Sjsg 3290c349dbc7Sjsg r = amdgpu_device_fw_loading(adev); 3291c349dbc7Sjsg if (r) 3292c349dbc7Sjsg return r; 3293c349dbc7Sjsg 3294fb4d8502Sjsg r = amdgpu_device_ip_resume_phase2(adev); 3295fb4d8502Sjsg 3296f4ab5340Sjsg if (r) 3297f4ab5340Sjsg return r; 3298f4ab5340Sjsg 3299f4ab5340Sjsg amdgpu_fence_driver_hw_init(adev); 3300f4ab5340Sjsg 3301f4ab5340Sjsg r = amdgpu_device_ip_resume_phase3(adev); 3302f4ab5340Sjsg 3303fb4d8502Sjsg return r; 3304fb4d8502Sjsg } 3305fb4d8502Sjsg 3306fb4d8502Sjsg /** 3307fb4d8502Sjsg * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV 3308fb4d8502Sjsg * 3309fb4d8502Sjsg * @adev: amdgpu_device pointer 3310fb4d8502Sjsg * 3311fb4d8502Sjsg * Query the VBIOS data tables to determine if the board supports SR-IOV. 3312fb4d8502Sjsg */ 3313fb4d8502Sjsg static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) 3314fb4d8502Sjsg { 3315fb4d8502Sjsg if (amdgpu_sriov_vf(adev)) { 3316fb4d8502Sjsg if (adev->is_atom_fw) { 33175ca02815Sjsg if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) 3318fb4d8502Sjsg adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3319fb4d8502Sjsg } else { 3320fb4d8502Sjsg if (amdgpu_atombios_has_gpu_virtualization_table(adev)) 3321fb4d8502Sjsg adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; 3322fb4d8502Sjsg } 3323fb4d8502Sjsg 3324fb4d8502Sjsg if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) 3325fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); 3326fb4d8502Sjsg } 3327fb4d8502Sjsg } 3328fb4d8502Sjsg 3329fb4d8502Sjsg /** 3330fb4d8502Sjsg * amdgpu_device_asic_has_dc_support - determine if DC supports the asic 3331fb4d8502Sjsg * 3332fb4d8502Sjsg * @asic_type: AMD asic type 3333fb4d8502Sjsg * 3334fb4d8502Sjsg * Check if there is DC (new modesetting infrastructre) support for an asic. 3335fb4d8502Sjsg * returns true if DC has support, false if not. 3336fb4d8502Sjsg */ 3337fb4d8502Sjsg bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) 3338fb4d8502Sjsg { 3339fb4d8502Sjsg switch (asic_type) { 33401bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 33411bb76ff1Sjsg case CHIP_HAINAN: 33421bb76ff1Sjsg #endif 33431bb76ff1Sjsg case CHIP_TOPAZ: 33441bb76ff1Sjsg /* chips with no display hardware */ 33451bb76ff1Sjsg return false; 3346fb4d8502Sjsg #if defined(CONFIG_DRM_AMD_DC) 3347ad8b1aafSjsg case CHIP_TAHITI: 3348ad8b1aafSjsg case CHIP_PITCAIRN: 3349ad8b1aafSjsg case CHIP_VERDE: 3350ad8b1aafSjsg case CHIP_OLAND: 3351fb4d8502Sjsg /* 3352fb4d8502Sjsg * We have systems in the wild with these ASICs that require 3353fb4d8502Sjsg * LVDS and VGA support which is not supported with DC. 3354fb4d8502Sjsg * 3355fb4d8502Sjsg * Fallback to the non-DC driver here by default so as not to 3356fb4d8502Sjsg * cause regressions. 3357fb4d8502Sjsg */ 33581bb76ff1Sjsg #if defined(CONFIG_DRM_AMD_DC_SI) 3359fb4d8502Sjsg return amdgpu_dc > 0; 33601bb76ff1Sjsg #else 33611bb76ff1Sjsg return false; 3362ad8b1aafSjsg #endif 33631bb76ff1Sjsg case CHIP_BONAIRE: 33641bb76ff1Sjsg case CHIP_KAVERI: 33651bb76ff1Sjsg case CHIP_KABINI: 33661bb76ff1Sjsg case CHIP_MULLINS: 33671bb76ff1Sjsg /* 33681bb76ff1Sjsg * We have systems in the wild with these ASICs that require 33691bb76ff1Sjsg * VGA support which is not supported with DC. 33701bb76ff1Sjsg * 33711bb76ff1Sjsg * Fallback to the non-DC driver here by default so as not to 33721bb76ff1Sjsg * cause regressions. 33731bb76ff1Sjsg */ 33741bb76ff1Sjsg return amdgpu_dc > 0; 33751bb76ff1Sjsg default: 3376fb4d8502Sjsg return amdgpu_dc != 0; 33771bb76ff1Sjsg #else 3378fb4d8502Sjsg default: 3379c349dbc7Sjsg if (amdgpu_dc > 0) 3380f005ef32Sjsg DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); 3381fb4d8502Sjsg return false; 33821bb76ff1Sjsg #endif 3383fb4d8502Sjsg } 3384fb4d8502Sjsg } 3385fb4d8502Sjsg 3386fb4d8502Sjsg /** 3387fb4d8502Sjsg * amdgpu_device_has_dc_support - check if dc is supported 3388fb4d8502Sjsg * 3389ad8b1aafSjsg * @adev: amdgpu_device pointer 3390fb4d8502Sjsg * 3391fb4d8502Sjsg * Returns true for supported, false for not supported 3392fb4d8502Sjsg */ 3393fb4d8502Sjsg bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) 3394fb4d8502Sjsg { 3395f005ef32Sjsg if (adev->enable_virtual_display || 33965ca02815Sjsg (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 3397fb4d8502Sjsg return false; 3398fb4d8502Sjsg 3399fb4d8502Sjsg return amdgpu_device_asic_has_dc_support(adev->asic_type); 3400fb4d8502Sjsg } 3401fb4d8502Sjsg 3402c349dbc7Sjsg static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) 3403c349dbc7Sjsg { 3404c349dbc7Sjsg struct amdgpu_device *adev = 3405c349dbc7Sjsg container_of(__work, struct amdgpu_device, xgmi_reset_work); 3406ad8b1aafSjsg struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); 3407c349dbc7Sjsg 3408c349dbc7Sjsg /* It's a bug to not have a hive within this function */ 3409c349dbc7Sjsg if (WARN_ON(!hive)) 3410c349dbc7Sjsg return; 3411c349dbc7Sjsg 3412c349dbc7Sjsg /* 3413c349dbc7Sjsg * Use task barrier to synchronize all xgmi reset works across the 3414c349dbc7Sjsg * hive. task_barrier_enter and task_barrier_exit will block 3415c349dbc7Sjsg * until all the threads running the xgmi reset works reach 3416c349dbc7Sjsg * those points. task_barrier_full will do both blocks. 3417c349dbc7Sjsg */ 3418c349dbc7Sjsg if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { 3419c349dbc7Sjsg 3420c349dbc7Sjsg task_barrier_enter(&hive->tb); 3421ad8b1aafSjsg adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); 3422c349dbc7Sjsg 3423c349dbc7Sjsg if (adev->asic_reset_res) 3424c349dbc7Sjsg goto fail; 3425c349dbc7Sjsg 3426c349dbc7Sjsg task_barrier_exit(&hive->tb); 3427ad8b1aafSjsg adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); 3428c349dbc7Sjsg 3429c349dbc7Sjsg if (adev->asic_reset_res) 3430c349dbc7Sjsg goto fail; 3431c349dbc7Sjsg 34321bb76ff1Sjsg if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && 34331bb76ff1Sjsg adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 34341bb76ff1Sjsg adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); 3435c349dbc7Sjsg } else { 3436c349dbc7Sjsg 3437c349dbc7Sjsg task_barrier_full(&hive->tb); 3438c349dbc7Sjsg adev->asic_reset_res = amdgpu_asic_reset(adev); 3439c349dbc7Sjsg } 3440c349dbc7Sjsg 3441c349dbc7Sjsg fail: 3442c349dbc7Sjsg if (adev->asic_reset_res) 3443c349dbc7Sjsg DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", 3444ad8b1aafSjsg adev->asic_reset_res, adev_to_drm(adev)->unique); 3445ad8b1aafSjsg amdgpu_put_xgmi_hive(hive); 3446c349dbc7Sjsg } 3447c349dbc7Sjsg 3448c349dbc7Sjsg static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) 3449c349dbc7Sjsg { 3450c349dbc7Sjsg char *input = amdgpu_lockup_timeout; 3451c349dbc7Sjsg char *timeout_setting = NULL; 3452c349dbc7Sjsg int index = 0; 3453c349dbc7Sjsg long timeout; 3454c349dbc7Sjsg int ret = 0; 3455c349dbc7Sjsg 3456c349dbc7Sjsg /* 34575ca02815Sjsg * By default timeout for non compute jobs is 10000 34585ca02815Sjsg * and 60000 for compute jobs. 3459c349dbc7Sjsg * In SR-IOV or passthrough mode, timeout for compute 3460ad8b1aafSjsg * jobs are 60000 by default. 3461c349dbc7Sjsg */ 3462c349dbc7Sjsg adev->gfx_timeout = msecs_to_jiffies(10000); 3463c349dbc7Sjsg adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 34645ca02815Sjsg if (amdgpu_sriov_vf(adev)) 34655ca02815Sjsg adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? 34665ca02815Sjsg msecs_to_jiffies(60000) : msecs_to_jiffies(10000); 3467c349dbc7Sjsg else 34685ca02815Sjsg adev->compute_timeout = msecs_to_jiffies(60000); 3469c349dbc7Sjsg 3470c349dbc7Sjsg #ifdef notyet 3471c349dbc7Sjsg if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3472c349dbc7Sjsg while ((timeout_setting = strsep(&input, ",")) && 3473c349dbc7Sjsg strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { 3474c349dbc7Sjsg ret = kstrtol(timeout_setting, 0, &timeout); 3475c349dbc7Sjsg if (ret) 3476c349dbc7Sjsg return ret; 3477c349dbc7Sjsg 3478c349dbc7Sjsg if (timeout == 0) { 3479c349dbc7Sjsg index++; 3480c349dbc7Sjsg continue; 3481c349dbc7Sjsg } else if (timeout < 0) { 3482c349dbc7Sjsg timeout = MAX_SCHEDULE_TIMEOUT; 34831bb76ff1Sjsg dev_warn(adev->dev, "lockup timeout disabled"); 34841bb76ff1Sjsg add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); 3485c349dbc7Sjsg } else { 3486c349dbc7Sjsg timeout = msecs_to_jiffies(timeout); 3487c349dbc7Sjsg } 3488c349dbc7Sjsg 3489c349dbc7Sjsg switch (index++) { 3490c349dbc7Sjsg case 0: 3491c349dbc7Sjsg adev->gfx_timeout = timeout; 3492c349dbc7Sjsg break; 3493c349dbc7Sjsg case 1: 3494c349dbc7Sjsg adev->compute_timeout = timeout; 3495c349dbc7Sjsg break; 3496c349dbc7Sjsg case 2: 3497c349dbc7Sjsg adev->sdma_timeout = timeout; 3498c349dbc7Sjsg break; 3499c349dbc7Sjsg case 3: 3500c349dbc7Sjsg adev->video_timeout = timeout; 3501c349dbc7Sjsg break; 3502c349dbc7Sjsg default: 3503c349dbc7Sjsg break; 3504c349dbc7Sjsg } 3505c349dbc7Sjsg } 3506c349dbc7Sjsg /* 3507c349dbc7Sjsg * There is only one value specified and 3508c349dbc7Sjsg * it should apply to all non-compute jobs. 3509c349dbc7Sjsg */ 3510c349dbc7Sjsg if (index == 1) { 3511c349dbc7Sjsg adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; 3512c349dbc7Sjsg if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) 3513c349dbc7Sjsg adev->compute_timeout = adev->gfx_timeout; 3514c349dbc7Sjsg } 3515c349dbc7Sjsg } 3516c349dbc7Sjsg #endif 3517c349dbc7Sjsg 3518c349dbc7Sjsg return ret; 3519c349dbc7Sjsg } 3520c349dbc7Sjsg 35211bb76ff1Sjsg /** 35221bb76ff1Sjsg * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU 35231bb76ff1Sjsg * 35241bb76ff1Sjsg * @adev: amdgpu_device pointer 35251bb76ff1Sjsg * 35261bb76ff1Sjsg * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode 35271bb76ff1Sjsg */ 35281bb76ff1Sjsg static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) 35291bb76ff1Sjsg { 35301bb76ff1Sjsg #ifdef notyet 35311bb76ff1Sjsg struct iommu_domain *domain; 35321bb76ff1Sjsg 35331bb76ff1Sjsg domain = iommu_get_domain_for_dev(adev->dev); 35341bb76ff1Sjsg if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) 35351bb76ff1Sjsg #endif 35361bb76ff1Sjsg adev->ram_is_direct_mapped = true; 35371bb76ff1Sjsg } 35381bb76ff1Sjsg 3539ad8b1aafSjsg static const struct attribute *amdgpu_dev_attributes[] = { 3540ad8b1aafSjsg &dev_attr_pcie_replay_count.attr, 3541ad8b1aafSjsg NULL 3542ad8b1aafSjsg }; 3543ad8b1aafSjsg 3544f005ef32Sjsg static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) 3545f005ef32Sjsg { 3546f005ef32Sjsg if (amdgpu_mcbp == 1) 3547f005ef32Sjsg adev->gfx.mcbp = true; 3548f005ef32Sjsg else if (amdgpu_mcbp == 0) 3549f005ef32Sjsg adev->gfx.mcbp = false; 3550f005ef32Sjsg 3551f005ef32Sjsg if (amdgpu_sriov_vf(adev)) 3552f005ef32Sjsg adev->gfx.mcbp = true; 3553f005ef32Sjsg 3554f005ef32Sjsg if (adev->gfx.mcbp) 3555f005ef32Sjsg DRM_INFO("MCBP is enabled\n"); 3556f005ef32Sjsg } 3557f005ef32Sjsg 3558fb4d8502Sjsg /** 3559fb4d8502Sjsg * amdgpu_device_init - initialize the driver 3560fb4d8502Sjsg * 3561fb4d8502Sjsg * @adev: amdgpu_device pointer 3562fb4d8502Sjsg * @flags: driver flags 3563fb4d8502Sjsg * 3564fb4d8502Sjsg * Initializes the driver info and hw (all asics). 3565fb4d8502Sjsg * Returns 0 for success or an error on failure. 3566fb4d8502Sjsg * Called at driver startup. 3567fb4d8502Sjsg */ 3568fb4d8502Sjsg int amdgpu_device_init(struct amdgpu_device *adev, 3569fb4d8502Sjsg uint32_t flags) 3570fb4d8502Sjsg { 3571ad8b1aafSjsg struct drm_device *ddev = adev_to_drm(adev); 3572ad8b1aafSjsg struct pci_dev *pdev = adev->pdev; 3573fb4d8502Sjsg int r, i; 35745ca02815Sjsg bool px = false; 3575fb4d8502Sjsg u32 max_MBps; 35763abae83eSjsg int tmp; 3577fb4d8502Sjsg 3578fb4d8502Sjsg adev->shutdown = false; 3579fb4d8502Sjsg adev->flags = flags; 3580c349dbc7Sjsg 3581c349dbc7Sjsg if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) 3582c349dbc7Sjsg adev->asic_type = amdgpu_force_asic_type; 3583c349dbc7Sjsg else 3584fb4d8502Sjsg adev->asic_type = flags & AMD_ASIC_MASK; 3585c349dbc7Sjsg 3586fb4d8502Sjsg adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; 3587fb4d8502Sjsg if (amdgpu_emu_mode == 1) 3588c349dbc7Sjsg adev->usec_timeout *= 10; 3589fb4d8502Sjsg adev->gmc.gart_size = 512 * 1024 * 1024; 3590fb4d8502Sjsg adev->accel_working = false; 3591fb4d8502Sjsg adev->num_rings = 0; 35921bb76ff1Sjsg RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); 3593fb4d8502Sjsg adev->mman.buffer_funcs = NULL; 3594fb4d8502Sjsg adev->mman.buffer_funcs_ring = NULL; 3595fb4d8502Sjsg adev->vm_manager.vm_pte_funcs = NULL; 3596c349dbc7Sjsg adev->vm_manager.vm_pte_num_scheds = 0; 3597fb4d8502Sjsg adev->gmc.gmc_funcs = NULL; 35985ca02815Sjsg adev->harvest_ip_mask = 0x0; 3599fb4d8502Sjsg adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3600fb4d8502Sjsg bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 3601fb4d8502Sjsg 3602fb4d8502Sjsg adev->smc_rreg = &amdgpu_invalid_rreg; 3603fb4d8502Sjsg adev->smc_wreg = &amdgpu_invalid_wreg; 3604fb4d8502Sjsg adev->pcie_rreg = &amdgpu_invalid_rreg; 3605fb4d8502Sjsg adev->pcie_wreg = &amdgpu_invalid_wreg; 3606f005ef32Sjsg adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; 3607f005ef32Sjsg adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; 3608fb4d8502Sjsg adev->pciep_rreg = &amdgpu_invalid_rreg; 3609fb4d8502Sjsg adev->pciep_wreg = &amdgpu_invalid_wreg; 3610c349dbc7Sjsg adev->pcie_rreg64 = &amdgpu_invalid_rreg64; 3611c349dbc7Sjsg adev->pcie_wreg64 = &amdgpu_invalid_wreg64; 3612fb4d8502Sjsg adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; 3613fb4d8502Sjsg adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; 3614fb4d8502Sjsg adev->didt_rreg = &amdgpu_invalid_rreg; 3615fb4d8502Sjsg adev->didt_wreg = &amdgpu_invalid_wreg; 3616fb4d8502Sjsg adev->gc_cac_rreg = &amdgpu_invalid_rreg; 3617fb4d8502Sjsg adev->gc_cac_wreg = &amdgpu_invalid_wreg; 3618fb4d8502Sjsg adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; 3619fb4d8502Sjsg adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; 3620fb4d8502Sjsg 362146154ddeSjsg DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", 3622fb4d8502Sjsg amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, 3623fb4d8502Sjsg pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); 3624fb4d8502Sjsg 3625fb4d8502Sjsg /* mutex initialization are all done here so we 3626f005ef32Sjsg * can recall function without having locking issues 3627f005ef32Sjsg */ 3628fb4d8502Sjsg rw_init(&adev->firmware.mutex, "agfw"); 3629fb4d8502Sjsg rw_init(&adev->pm.mutex, "agpm"); 3630fb4d8502Sjsg rw_init(&adev->gfx.gpu_clock_mutex, "gfxclk"); 3631fb4d8502Sjsg rw_init(&adev->srbm_mutex, "srbm"); 3632fb4d8502Sjsg rw_init(&adev->gfx.pipe_reserve_mutex, "pipers"); 3633c349dbc7Sjsg rw_init(&adev->gfx.gfx_off_mutex, "gfxoff"); 3634f005ef32Sjsg rw_init(&adev->gfx.partition_mutex, "gfxpar"); 3635fb4d8502Sjsg rw_init(&adev->grbm_idx_mutex, "grbmidx"); 3636fb4d8502Sjsg rw_init(&adev->mn_lock, "agpumn"); 3637fb4d8502Sjsg rw_init(&adev->virt.vf_errors.lock, "vferr"); 36386f63516cSjsg rw_init(&adev->virt.rlcg_reg_lock, "vrlcg"); 3639fb4d8502Sjsg hash_init(adev->mn_hash); 3640c349dbc7Sjsg rw_init(&adev->psp.mutex, "agpsp"); 3641c349dbc7Sjsg rw_init(&adev->notifier_lock, "agnf"); 36421bb76ff1Sjsg rw_init(&adev->pm.stable_pstate_ctx_lock, "agps"); 36431bb76ff1Sjsg rw_init(&adev->benchmark_mutex, "agbm"); 3644fb4d8502Sjsg 36451bb76ff1Sjsg amdgpu_device_init_apu_flags(adev); 36465ca02815Sjsg 3647c349dbc7Sjsg r = amdgpu_device_check_arguments(adev); 3648c349dbc7Sjsg if (r) 3649c349dbc7Sjsg return r; 3650fb4d8502Sjsg 3651fb4d8502Sjsg mtx_init(&adev->mmio_idx_lock, IPL_TTY); 3652fb4d8502Sjsg mtx_init(&adev->smc_idx_lock, IPL_TTY); 3653fb4d8502Sjsg mtx_init(&adev->pcie_idx_lock, IPL_TTY); 3654fb4d8502Sjsg mtx_init(&adev->uvd_ctx_idx_lock, IPL_TTY); 3655fb4d8502Sjsg mtx_init(&adev->didt_idx_lock, IPL_TTY); 3656fb4d8502Sjsg mtx_init(&adev->gc_cac_idx_lock, IPL_TTY); 3657fb4d8502Sjsg mtx_init(&adev->se_cac_idx_lock, IPL_TTY); 3658fb4d8502Sjsg mtx_init(&adev->audio_endpt_idx_lock, IPL_TTY); 365963b35fb2Sjsg mtx_init(&adev->mm_stats.lock, IPL_NONE); 3660fb4d8502Sjsg 3661fb4d8502Sjsg INIT_LIST_HEAD(&adev->shadow_list); 3662fb4d8502Sjsg rw_init(&adev->shadow_list_lock, "sdwlst"); 3663fb4d8502Sjsg 36645ca02815Sjsg INIT_LIST_HEAD(&adev->reset_list); 36655ca02815Sjsg 36661bb76ff1Sjsg INIT_LIST_HEAD(&adev->ras_list); 36671bb76ff1Sjsg 3668c349dbc7Sjsg INIT_DELAYED_WORK(&adev->delayed_init_work, 3669c349dbc7Sjsg amdgpu_device_delayed_init_work_handler); 3670c349dbc7Sjsg INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 3671c349dbc7Sjsg amdgpu_device_delay_enable_gfx_off); 3672fb4d8502Sjsg 3673c349dbc7Sjsg INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3674c349dbc7Sjsg 3675c349dbc7Sjsg adev->gfx.gfx_off_req_count = 1; 36761bb76ff1Sjsg adev->gfx.gfx_off_residency = 0; 36771bb76ff1Sjsg adev->gfx.gfx_off_entrycount = 0; 3678ad8b1aafSjsg adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3679ad8b1aafSjsg 3680ad8b1aafSjsg atomic_set(&adev->throttling_logging_enabled, 1); 3681ad8b1aafSjsg /* 3682ad8b1aafSjsg * If throttling continues, logging will be performed every minute 3683ad8b1aafSjsg * to avoid log flooding. "-1" is subtracted since the thermal 3684ad8b1aafSjsg * throttling interrupt comes every second. Thus, the total logging 3685ad8b1aafSjsg * interval is 59 seconds(retelimited printk interval) + 1(waiting 3686ad8b1aafSjsg * for throttling interrupt) = 60 seconds. 3687ad8b1aafSjsg */ 3688ad8b1aafSjsg ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); 3689ad8b1aafSjsg ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); 3690fb4d8502Sjsg 3691fb4d8502Sjsg #ifdef __linux__ 3692fb4d8502Sjsg /* Registers mapping */ 3693fb4d8502Sjsg /* TODO: block userspace mapping of io register */ 3694fb4d8502Sjsg if (adev->asic_type >= CHIP_BONAIRE) { 3695fb4d8502Sjsg adev->rmmio_base = pci_resource_start(adev->pdev, 5); 3696fb4d8502Sjsg adev->rmmio_size = pci_resource_len(adev->pdev, 5); 3697fb4d8502Sjsg } else { 3698fb4d8502Sjsg adev->rmmio_base = pci_resource_start(adev->pdev, 2); 3699fb4d8502Sjsg adev->rmmio_size = pci_resource_len(adev->pdev, 2); 3700fb4d8502Sjsg } 370133b7da62Sjsg #endif 3702fb4d8502Sjsg 37035ca02815Sjsg for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++) 37045ca02815Sjsg atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN); 37055ca02815Sjsg 370633b7da62Sjsg #ifdef __linux__ 3707fb4d8502Sjsg adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); 3708f005ef32Sjsg if (!adev->rmmio) 3709fb4d8502Sjsg return -ENOMEM; 3710fb4d8502Sjsg #endif 3711fb4d8502Sjsg DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); 3712f005ef32Sjsg DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); 3713c349dbc7Sjsg 37141bb76ff1Sjsg /* 37151bb76ff1Sjsg * Reset domain needs to be present early, before XGMI hive discovered 37161bb76ff1Sjsg * (if any) and intitialized to use reset sem and in_gpu reset flag 37171bb76ff1Sjsg * early on during init and before calling to RREG32. 37181bb76ff1Sjsg */ 37191bb76ff1Sjsg adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); 37201bb76ff1Sjsg if (!adev->reset_domain) 37211bb76ff1Sjsg return -ENOMEM; 3722c349dbc7Sjsg 3723ad8b1aafSjsg /* detect hw virtualization here */ 3724ad8b1aafSjsg amdgpu_detect_virtualization(adev); 3725ad8b1aafSjsg 3726f005ef32Sjsg amdgpu_device_get_pcie_info(adev); 3727f005ef32Sjsg 3728ad8b1aafSjsg r = amdgpu_device_get_job_timeout_settings(adev); 3729c349dbc7Sjsg if (r) { 3730ad8b1aafSjsg dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); 373143dd1d00Sjsg return r; 3732c349dbc7Sjsg } 3733c349dbc7Sjsg 3734fb4d8502Sjsg /* early init functions */ 3735fb4d8502Sjsg r = amdgpu_device_ip_early_init(adev); 3736fb4d8502Sjsg if (r) 373743dd1d00Sjsg return r; 3738c349dbc7Sjsg 3739f005ef32Sjsg amdgpu_device_set_mcbp(adev); 3740f005ef32Sjsg 3741269b8745Sjsg /* Get rid of things like offb */ 3742269b8745Sjsg r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); 3743269b8745Sjsg if (r) 3744269b8745Sjsg return r; 3745269b8745Sjsg 37461bb76ff1Sjsg /* Enable TMZ based on IP_VERSION */ 37471bb76ff1Sjsg amdgpu_gmc_tmz_set(adev); 37481bb76ff1Sjsg 37491bb76ff1Sjsg amdgpu_gmc_noretry_set(adev); 37501bb76ff1Sjsg /* Need to get xgmi info early to decide the reset behavior*/ 37511bb76ff1Sjsg if (adev->gmc.xgmi.supported) { 37521bb76ff1Sjsg r = adev->gfxhub.funcs->get_xgmi_info(adev); 37531bb76ff1Sjsg if (r) 37541bb76ff1Sjsg return r; 37551bb76ff1Sjsg } 37561bb76ff1Sjsg 37571bb76ff1Sjsg /* enable PCIE atomic ops */ 37581bb76ff1Sjsg #ifdef notyet 3759f005ef32Sjsg if (amdgpu_sriov_vf(adev)) { 3760f005ef32Sjsg if (adev->virt.fw_reserve.p_pf2vf) 37611bb76ff1Sjsg adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) 37621bb76ff1Sjsg adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == 37631bb76ff1Sjsg (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3764e8543b3dSjsg /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a 3765e8543b3dSjsg * internal path natively support atomics, set have_atomics_support to true. 3766e8543b3dSjsg */ 3767f005ef32Sjsg } else if ((adev->flags & AMD_IS_APU) && 3768f005ef32Sjsg (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) { 3769e8543b3dSjsg adev->have_atomics_support = true; 3770f005ef32Sjsg } else { 37711bb76ff1Sjsg adev->have_atomics_support = 37721bb76ff1Sjsg !pci_enable_atomic_ops_to_root(adev->pdev, 37731bb76ff1Sjsg PCI_EXP_DEVCAP2_ATOMIC_COMP32 | 37741bb76ff1Sjsg PCI_EXP_DEVCAP2_ATOMIC_COMP64); 3775f005ef32Sjsg } 3776f005ef32Sjsg 37771bb76ff1Sjsg if (!adev->have_atomics_support) 37781bb76ff1Sjsg dev_info(adev->dev, "PCIE atomic ops is not supported\n"); 37791bb76ff1Sjsg #else 3780e8543b3dSjsg /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a 3781e8543b3dSjsg * internal path natively support atomics, set have_atomics_support to true. 3782e8543b3dSjsg */ 3783e8543b3dSjsg if ((adev->flags & AMD_IS_APU) && 3784e8543b3dSjsg (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) 3785e8543b3dSjsg adev->have_atomics_support = true; 3786e8543b3dSjsg else 37871bb76ff1Sjsg adev->have_atomics_support = false; 37881bb76ff1Sjsg #endif 37891bb76ff1Sjsg 3790c349dbc7Sjsg /* doorbell bar mapping and doorbell index init*/ 3791f005ef32Sjsg amdgpu_doorbell_init(adev); 3792c349dbc7Sjsg 3793fb4d8502Sjsg if (amdgpu_emu_mode == 1) { 3794fb4d8502Sjsg /* post the asic on emulation mode */ 3795fb4d8502Sjsg emu_soc_asic_init(adev); 3796fb4d8502Sjsg goto fence_driver_init; 3797fb4d8502Sjsg } 3798fb4d8502Sjsg 37995ca02815Sjsg amdgpu_reset_init(adev); 38005ca02815Sjsg 3801fb4d8502Sjsg /* detect if we are with an SRIOV vbios */ 3802f005ef32Sjsg if (adev->bios) 3803fb4d8502Sjsg amdgpu_device_detect_sriov_bios(adev); 3804fb4d8502Sjsg 3805c349dbc7Sjsg /* check if we need to reset the asic 3806c349dbc7Sjsg * E.g., driver was not cleanly unloaded previously, etc. 3807c349dbc7Sjsg */ 3808c349dbc7Sjsg if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { 38095ca02815Sjsg if (adev->gmc.xgmi.num_physical_nodes) { 38105ca02815Sjsg dev_info(adev->dev, "Pending hive reset.\n"); 38115ca02815Sjsg adev->gmc.xgmi.pending_reset = true; 38125ca02815Sjsg /* Only need to init necessary block for SMU to handle the reset */ 38135ca02815Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 38145ca02815Sjsg if (!adev->ip_blocks[i].status.valid) 38155ca02815Sjsg continue; 38165ca02815Sjsg if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || 38175ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || 38185ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || 38195ca02815Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) { 38205ca02815Sjsg DRM_DEBUG("IP %s disabled for hw_init.\n", 38215ca02815Sjsg adev->ip_blocks[i].version->funcs->name); 38225ca02815Sjsg adev->ip_blocks[i].status.hw = true; 38235ca02815Sjsg } 38245ca02815Sjsg } 38255ca02815Sjsg } else { 38263abae83eSjsg tmp = amdgpu_reset_method; 38273abae83eSjsg /* It should do a default reset when loading or reloading the driver, 38283abae83eSjsg * regardless of the module parameter reset_method. 38293abae83eSjsg */ 38303abae83eSjsg amdgpu_reset_method = AMD_RESET_METHOD_NONE; 3831c349dbc7Sjsg r = amdgpu_asic_reset(adev); 38323abae83eSjsg amdgpu_reset_method = tmp; 3833c349dbc7Sjsg if (r) { 3834c349dbc7Sjsg dev_err(adev->dev, "asic reset on init failed\n"); 3835c349dbc7Sjsg goto failed; 3836c349dbc7Sjsg } 3837c349dbc7Sjsg } 38385ca02815Sjsg } 3839c349dbc7Sjsg 3840fb4d8502Sjsg /* Post card if necessary */ 3841fb4d8502Sjsg if (amdgpu_device_need_post(adev)) { 3842fb4d8502Sjsg if (!adev->bios) { 3843fb4d8502Sjsg dev_err(adev->dev, "no vBIOS found\n"); 3844fb4d8502Sjsg r = -EINVAL; 3845fb4d8502Sjsg goto failed; 3846fb4d8502Sjsg } 3847fb4d8502Sjsg DRM_INFO("GPU posting now...\n"); 3848ad8b1aafSjsg r = amdgpu_device_asic_init(adev); 3849fb4d8502Sjsg if (r) { 3850fb4d8502Sjsg dev_err(adev->dev, "gpu post error!\n"); 3851fb4d8502Sjsg goto failed; 3852fb4d8502Sjsg } 3853fb4d8502Sjsg } 3854fb4d8502Sjsg 3855f005ef32Sjsg if (adev->bios) { 3856fb4d8502Sjsg if (adev->is_atom_fw) { 3857fb4d8502Sjsg /* Initialize clocks */ 3858fb4d8502Sjsg r = amdgpu_atomfirmware_get_clock_info(adev); 3859fb4d8502Sjsg if (r) { 3860fb4d8502Sjsg dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); 3861fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3862fb4d8502Sjsg goto failed; 3863fb4d8502Sjsg } 3864fb4d8502Sjsg } else { 3865fb4d8502Sjsg /* Initialize clocks */ 3866fb4d8502Sjsg r = amdgpu_atombios_get_clock_info(adev); 3867fb4d8502Sjsg if (r) { 3868fb4d8502Sjsg dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); 3869fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); 3870fb4d8502Sjsg goto failed; 3871fb4d8502Sjsg } 3872fb4d8502Sjsg /* init i2c buses */ 3873fb4d8502Sjsg if (!amdgpu_device_has_dc_support(adev)) 3874fb4d8502Sjsg amdgpu_atombios_i2c_init(adev); 3875fb4d8502Sjsg } 3876f005ef32Sjsg } 3877fb4d8502Sjsg 3878fb4d8502Sjsg fence_driver_init: 3879fb4d8502Sjsg /* Fence driver */ 38805ca02815Sjsg r = amdgpu_fence_driver_sw_init(adev); 3881fb4d8502Sjsg if (r) { 38825ca02815Sjsg dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); 3883fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); 3884fb4d8502Sjsg goto failed; 3885fb4d8502Sjsg } 3886fb4d8502Sjsg 3887fb4d8502Sjsg /* init the mode config */ 3888ad8b1aafSjsg drm_mode_config_init(adev_to_drm(adev)); 3889fb4d8502Sjsg 3890fb4d8502Sjsg r = amdgpu_device_ip_init(adev); 3891fb4d8502Sjsg if (r) { 3892fb4d8502Sjsg dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 3893fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 38945ca02815Sjsg goto release_ras_con; 3895fb4d8502Sjsg } 3896fb4d8502Sjsg 38975ca02815Sjsg amdgpu_fence_driver_hw_init(adev); 38985ca02815Sjsg 3899ad8b1aafSjsg dev_info(adev->dev, 3900ad8b1aafSjsg "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", 3901c349dbc7Sjsg adev->gfx.config.max_shader_engines, 3902c349dbc7Sjsg adev->gfx.config.max_sh_per_se, 3903c349dbc7Sjsg adev->gfx.config.max_cu_per_sh, 3904c349dbc7Sjsg adev->gfx.cu_info.number); 3905c349dbc7Sjsg 390646154ddeSjsg #ifdef __OpenBSD__ 390746154ddeSjsg { 390846154ddeSjsg const char *chip_name; 39091bb76ff1Sjsg uint32_t version = adev->ip_versions[GC_HWIP][0]; 39101bb76ff1Sjsg int maj, min, rev; 391146154ddeSjsg 391246154ddeSjsg switch (adev->asic_type) { 391346154ddeSjsg case CHIP_RAVEN: 3914ad8b1aafSjsg if (adev->apu_flags & AMD_APU_IS_RAVEN2) 391546154ddeSjsg chip_name = "RAVEN2"; 3916ad8b1aafSjsg else if (adev->apu_flags & AMD_APU_IS_PICASSO) 391746154ddeSjsg chip_name = "PICASSO"; 391846154ddeSjsg else 391946154ddeSjsg chip_name = "RAVEN"; 392046154ddeSjsg break; 3921ad8b1aafSjsg case CHIP_RENOIR: 3922ad8b1aafSjsg if (adev->apu_flags & AMD_APU_IS_RENOIR) 3923ad8b1aafSjsg chip_name = "RENOIR"; 3924ad8b1aafSjsg else 3925ad8b1aafSjsg chip_name = "GREEN_SARDINE"; 3926ad8b1aafSjsg break; 392746154ddeSjsg default: 392846154ddeSjsg chip_name = amdgpu_asic_name[adev->asic_type]; 392946154ddeSjsg } 39301bb76ff1Sjsg 39311bb76ff1Sjsg printf("%s: %s", adev->self.dv_xname, chip_name); 39321bb76ff1Sjsg /* show graphics/compute ip block version, not set on < GFX9 */ 39331bb76ff1Sjsg if (version) { 39341bb76ff1Sjsg maj = IP_VERSION_MAJ(version); 39351bb76ff1Sjsg min = IP_VERSION_MIN(version); 39361bb76ff1Sjsg rev = IP_VERSION_REV(version); 39371bb76ff1Sjsg printf(" GC %d.%d.%d", maj, min, rev); 39381bb76ff1Sjsg } 39391bb76ff1Sjsg printf(" %d CU rev 0x%02x\n", adev->gfx.cu_info.number, adev->rev_id); 394046154ddeSjsg } 394146154ddeSjsg #endif 394246154ddeSjsg 3943fb4d8502Sjsg adev->accel_working = true; 3944fb4d8502Sjsg 3945fb4d8502Sjsg amdgpu_vm_check_compute_bug(adev); 3946fb4d8502Sjsg 3947fb4d8502Sjsg /* Initialize the buffer migration limit. */ 3948fb4d8502Sjsg if (amdgpu_moverate >= 0) 3949fb4d8502Sjsg max_MBps = amdgpu_moverate; 3950fb4d8502Sjsg else 3951fb4d8502Sjsg max_MBps = 8; /* Allow 8 MB/s. */ 3952fb4d8502Sjsg /* Get a log2 for easy divisions. */ 3953fb4d8502Sjsg adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); 3954fb4d8502Sjsg 3955f005ef32Sjsg r = amdgpu_atombios_sysfs_init(adev); 3956f005ef32Sjsg if (r) 3957f005ef32Sjsg drm_err(&adev->ddev, 3958f005ef32Sjsg "registering atombios sysfs failed (%d).\n", r); 3959f005ef32Sjsg 3960fb4d8502Sjsg r = amdgpu_pm_sysfs_init(adev); 3961f005ef32Sjsg if (r) 3962f005ef32Sjsg DRM_ERROR("registering pm sysfs failed (%d).\n", r); 3963fb4d8502Sjsg 3964c349dbc7Sjsg r = amdgpu_ucode_sysfs_init(adev); 3965c349dbc7Sjsg if (r) { 3966c349dbc7Sjsg adev->ucode_sysfs_en = false; 3967c349dbc7Sjsg DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); 3968c349dbc7Sjsg } else 3969c349dbc7Sjsg adev->ucode_sysfs_en = true; 3970fb4d8502Sjsg 3971c349dbc7Sjsg /* 3972c349dbc7Sjsg * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. 3973c349dbc7Sjsg * Otherwise the mgpu fan boost feature will be skipped due to the 3974c349dbc7Sjsg * gpu instance is counted less. 3975c349dbc7Sjsg */ 3976c349dbc7Sjsg amdgpu_register_gpu_instance(adev); 3977c349dbc7Sjsg 3978fb4d8502Sjsg /* enable clockgating, etc. after ib tests, etc. since some blocks require 3979fb4d8502Sjsg * explicit gating rather than handling it automatically. 3980fb4d8502Sjsg */ 39815ca02815Sjsg if (!adev->gmc.xgmi.pending_reset) { 3982fb4d8502Sjsg r = amdgpu_device_ip_late_init(adev); 3983fb4d8502Sjsg if (r) { 3984fb4d8502Sjsg dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); 3985fb4d8502Sjsg amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); 39865ca02815Sjsg goto release_ras_con; 3987fb4d8502Sjsg } 3988c349dbc7Sjsg /* must succeed. */ 3989c349dbc7Sjsg amdgpu_ras_resume(adev); 3990c349dbc7Sjsg queue_delayed_work(system_wq, &adev->delayed_init_work, 3991c349dbc7Sjsg msecs_to_jiffies(AMDGPU_RESUME_MS)); 39925ca02815Sjsg } 3993c349dbc7Sjsg 399434683186Sjsg if (amdgpu_sriov_vf(adev)) { 399534683186Sjsg amdgpu_virt_release_full_gpu(adev, true); 3996ad8b1aafSjsg flush_delayed_work(&adev->delayed_init_work); 399734683186Sjsg } 3998ad8b1aafSjsg 3999ad8b1aafSjsg r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); 4000ad8b1aafSjsg if (r) 4001ad8b1aafSjsg dev_err(adev->dev, "Could not create amdgpu device attr\n"); 4002c349dbc7Sjsg 4003f005ef32Sjsg amdgpu_fru_sysfs_init(adev); 4004f005ef32Sjsg 4005c349dbc7Sjsg if (IS_ENABLED(CONFIG_PERF_EVENTS)) 4006c349dbc7Sjsg r = amdgpu_pmu_init(adev); 4007c349dbc7Sjsg if (r) 4008c349dbc7Sjsg dev_err(adev->dev, "amdgpu_pmu_init failed\n"); 4009c349dbc7Sjsg 4010ad8b1aafSjsg /* Have stored pci confspace at hand for restore in sudden PCI error */ 4011ad8b1aafSjsg if (amdgpu_device_cache_pci_state(adev->pdev)) 4012ad8b1aafSjsg pci_restore_state(pdev); 4013ad8b1aafSjsg 40145ca02815Sjsg /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ 40155ca02815Sjsg /* this will fail for cards that aren't VGA class devices, just 4016f005ef32Sjsg * ignore it 4017f005ef32Sjsg */ 40185ca02815Sjsg #ifdef notyet 40195ca02815Sjsg if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 40205ca02815Sjsg vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); 40215ca02815Sjsg #endif 40225ca02815Sjsg 402378c2b773Sjsg px = amdgpu_device_supports_px(ddev); 402478c2b773Sjsg 4025997286d4Sjsg if (px || (!dev_is_removable(&adev->pdev->dev) && 402678c2b773Sjsg apple_gmux_detect(NULL, NULL))) 40275ca02815Sjsg vga_switcheroo_register_client(adev->pdev, 40285ca02815Sjsg &amdgpu_switcheroo_ops, px); 402978c2b773Sjsg 403078c2b773Sjsg if (px) 40315ca02815Sjsg vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); 40325ca02815Sjsg 40335ca02815Sjsg if (adev->gmc.xgmi.pending_reset) 40345ca02815Sjsg queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work, 40355ca02815Sjsg msecs_to_jiffies(AMDGPU_RESUME_MS)); 40365ca02815Sjsg 40371bb76ff1Sjsg amdgpu_device_check_iommu_direct_map(adev); 40381bb76ff1Sjsg 4039fb4d8502Sjsg return 0; 4040fb4d8502Sjsg 40415ca02815Sjsg release_ras_con: 404234683186Sjsg if (amdgpu_sriov_vf(adev)) 404334683186Sjsg amdgpu_virt_release_full_gpu(adev, true); 404434683186Sjsg 404534683186Sjsg /* failed in exclusive mode due to timeout */ 404634683186Sjsg if (amdgpu_sriov_vf(adev) && 404734683186Sjsg !amdgpu_sriov_runtime(adev) && 404834683186Sjsg amdgpu_virt_mmio_blocked(adev) && 404934683186Sjsg !amdgpu_virt_wait_reset(adev)) { 405034683186Sjsg dev_err(adev->dev, "VF exclusive mode timeout\n"); 405134683186Sjsg /* Don't send request since VF is inactive. */ 405234683186Sjsg adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; 405334683186Sjsg adev->virt.ops = NULL; 405434683186Sjsg r = -EAGAIN; 405534683186Sjsg } 40565ca02815Sjsg amdgpu_release_ras_context(adev); 40575ca02815Sjsg 4058fb4d8502Sjsg failed: 4059fb4d8502Sjsg amdgpu_vf_error_trans_all(adev); 4060fb4d8502Sjsg 4061fb4d8502Sjsg return r; 4062fb4d8502Sjsg } 4063fb4d8502Sjsg 40645ca02815Sjsg static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) 40655ca02815Sjsg { 40665ca02815Sjsg STUB(); 40675ca02815Sjsg #ifdef notyet 4068f005ef32Sjsg 40695ca02815Sjsg /* Clear all CPU mappings pointing to this device */ 40705ca02815Sjsg unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); 40715ca02815Sjsg #endif 40725ca02815Sjsg 40735ca02815Sjsg /* Unmap all mapped bars - Doorbell, registers and VRAM */ 4074f005ef32Sjsg amdgpu_doorbell_fini(adev); 40755ca02815Sjsg 40765ca02815Sjsg #ifdef __linux__ 40775ca02815Sjsg iounmap(adev->rmmio); 40785ca02815Sjsg adev->rmmio = NULL; 40795ca02815Sjsg if (adev->mman.aper_base_kaddr) 40805ca02815Sjsg iounmap(adev->mman.aper_base_kaddr); 40815ca02815Sjsg adev->mman.aper_base_kaddr = NULL; 40825ca02815Sjsg #else 40835ca02815Sjsg if (adev->rmmio_size > 0) 40845ca02815Sjsg bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh, 40855ca02815Sjsg adev->rmmio_size); 40865ca02815Sjsg adev->rmmio_size = 0; 40875ca02815Sjsg adev->rmmio = NULL; 40885ca02815Sjsg if (adev->mman.aper_base_kaddr) 40895ca02815Sjsg bus_space_unmap(adev->memt, adev->mman.aper_bsh, 40905ca02815Sjsg adev->gmc.visible_vram_size); 40915ca02815Sjsg adev->mman.aper_base_kaddr = NULL; 40925ca02815Sjsg #endif 40935ca02815Sjsg 40945ca02815Sjsg /* Memory manager related */ 4095f005ef32Sjsg if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { 40965ca02815Sjsg #ifdef __linux__ 40975ca02815Sjsg arch_phys_wc_del(adev->gmc.vram_mtrr); 40985ca02815Sjsg arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); 40995ca02815Sjsg #else 41005ca02815Sjsg drm_mtrr_del(0, adev->gmc.aper_base, adev->gmc.aper_size, DRM_MTRR_WC); 41015ca02815Sjsg #endif 41025ca02815Sjsg } 41035ca02815Sjsg } 41045ca02815Sjsg 4105fb4d8502Sjsg /** 41061bb76ff1Sjsg * amdgpu_device_fini_hw - tear down the driver 4107fb4d8502Sjsg * 4108fb4d8502Sjsg * @adev: amdgpu_device pointer 4109fb4d8502Sjsg * 4110fb4d8502Sjsg * Tear down the driver info (all asics). 4111fb4d8502Sjsg * Called at driver shutdown. 4112fb4d8502Sjsg */ 41135ca02815Sjsg void amdgpu_device_fini_hw(struct amdgpu_device *adev) 4114fb4d8502Sjsg { 4115ad8b1aafSjsg dev_info(adev->dev, "amdgpu: finishing device.\n"); 4116c349dbc7Sjsg flush_delayed_work(&adev->delayed_init_work); 4117fb4d8502Sjsg adev->shutdown = true; 4118c349dbc7Sjsg 4119c349dbc7Sjsg /* make sure IB test finished before entering exclusive mode 4120c349dbc7Sjsg * to avoid preemption on IB test 4121f005ef32Sjsg */ 4122ad8b1aafSjsg if (amdgpu_sriov_vf(adev)) { 4123c349dbc7Sjsg amdgpu_virt_request_full_gpu(adev, false); 4124ad8b1aafSjsg amdgpu_virt_fini_data_exchange(adev); 4125ad8b1aafSjsg } 4126c349dbc7Sjsg 4127fb4d8502Sjsg /* disable all interrupts */ 4128fb4d8502Sjsg amdgpu_irq_disable_all(adev); 4129fb4d8502Sjsg if (adev->mode_info.mode_config_initialized) { 41305ca02815Sjsg if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) 4131ad8b1aafSjsg drm_helper_force_disable_all(adev_to_drm(adev)); 4132fb4d8502Sjsg else 4133ad8b1aafSjsg drm_atomic_helper_shutdown(adev_to_drm(adev)); 4134fb4d8502Sjsg } 41355ca02815Sjsg amdgpu_fence_driver_hw_fini(adev); 41365ca02815Sjsg 4137f005ef32Sjsg if (adev->mman.initialized) 4138f005ef32Sjsg drain_workqueue(adev->mman.bdev.wq); 41391bb76ff1Sjsg 4140f005ef32Sjsg if (adev->pm.sysfs_initialized) 4141fb4d8502Sjsg amdgpu_pm_sysfs_fini(adev); 41425ca02815Sjsg if (adev->ucode_sysfs_en) 41435ca02815Sjsg amdgpu_ucode_sysfs_fini(adev); 41445ca02815Sjsg sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); 4145f005ef32Sjsg amdgpu_fru_sysfs_fini(adev); 41465ca02815Sjsg 41471bb76ff1Sjsg /* disable ras feature must before hw fini */ 41481bb76ff1Sjsg amdgpu_ras_pre_fini(adev); 41491bb76ff1Sjsg 41501bb76ff1Sjsg amdgpu_device_ip_fini_early(adev); 41515ca02815Sjsg 41525ca02815Sjsg amdgpu_irq_fini_hw(adev); 41535ca02815Sjsg 41541bb76ff1Sjsg if (adev->mman.initialized) 41551bb76ff1Sjsg ttm_device_clear_dma_mappings(&adev->mman.bdev); 41565ca02815Sjsg 41575ca02815Sjsg amdgpu_gart_dummy_page_fini(adev); 41585ca02815Sjsg 4159e63de9fbSjsg if (drm_dev_is_unplugged(adev_to_drm(adev))) 41605ca02815Sjsg amdgpu_device_unmap_mmio(adev); 41611bb76ff1Sjsg 41625ca02815Sjsg } 41635ca02815Sjsg 41645ca02815Sjsg void amdgpu_device_fini_sw(struct amdgpu_device *adev) 41655ca02815Sjsg { 41661bb76ff1Sjsg int idx; 416778c2b773Sjsg bool px; 41681bb76ff1Sjsg 4169ad8b1aafSjsg amdgpu_device_ip_fini(adev); 4170a0d73938Sjsg amdgpu_fence_driver_sw_fini(adev); 4171f005ef32Sjsg amdgpu_ucode_release(&adev->firmware.gpu_info_fw); 4172fb4d8502Sjsg adev->accel_working = false; 41731bb76ff1Sjsg dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); 41745ca02815Sjsg 41755ca02815Sjsg amdgpu_reset_fini(adev); 41765ca02815Sjsg 4177fb4d8502Sjsg /* free i2c buses */ 4178fb4d8502Sjsg if (!amdgpu_device_has_dc_support(adev)) 4179fb4d8502Sjsg amdgpu_i2c_fini(adev); 4180fb4d8502Sjsg 4181fb4d8502Sjsg if (amdgpu_emu_mode != 1) 4182fb4d8502Sjsg amdgpu_atombios_fini(adev); 4183fb4d8502Sjsg 4184fb4d8502Sjsg kfree(adev->bios); 4185fb4d8502Sjsg adev->bios = NULL; 418678c2b773Sjsg 418778c2b773Sjsg px = amdgpu_device_supports_px(adev_to_drm(adev)); 418878c2b773Sjsg 4189997286d4Sjsg if (px || (!dev_is_removable(&adev->pdev->dev) && 419078c2b773Sjsg apple_gmux_detect(NULL, NULL))) 4191fb4d8502Sjsg vga_switcheroo_unregister_client(adev->pdev); 419278c2b773Sjsg 419378c2b773Sjsg if (px) 4194fb4d8502Sjsg vga_switcheroo_fini_domain_pm_ops(adev->dev); 419578c2b773Sjsg 41965ca02815Sjsg if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) 41975ca02815Sjsg vga_client_unregister(adev->pdev); 4198fb4d8502Sjsg 41991bb76ff1Sjsg if (drm_dev_enter(adev_to_drm(adev), &idx)) { 42001bb76ff1Sjsg #ifdef __linux__ 42011bb76ff1Sjsg iounmap(adev->rmmio); 42021bb76ff1Sjsg adev->rmmio = NULL; 42031bb76ff1Sjsg #else 42041bb76ff1Sjsg if (adev->rmmio_size > 0) 42051bb76ff1Sjsg bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh, 42061bb76ff1Sjsg adev->rmmio_size); 42071bb76ff1Sjsg adev->rmmio_size = 0; 42081bb76ff1Sjsg adev->rmmio = NULL; 42091bb76ff1Sjsg #endif 4210f005ef32Sjsg amdgpu_doorbell_fini(adev); 42111bb76ff1Sjsg drm_dev_exit(idx); 42121bb76ff1Sjsg } 42131bb76ff1Sjsg 4214c349dbc7Sjsg if (IS_ENABLED(CONFIG_PERF_EVENTS)) 4215c349dbc7Sjsg amdgpu_pmu_fini(adev); 4216ad8b1aafSjsg if (adev->mman.discovery_bin) 4217c349dbc7Sjsg amdgpu_discovery_fini(adev); 42185ca02815Sjsg 42191bb76ff1Sjsg amdgpu_reset_put_reset_domain(adev->reset_domain); 42201bb76ff1Sjsg adev->reset_domain = NULL; 42211bb76ff1Sjsg 42225ca02815Sjsg kfree(adev->pci_state); 42235ca02815Sjsg 4224fb4d8502Sjsg } 4225fb4d8502Sjsg 42268e01f7deSjsg /** 42278e01f7deSjsg * amdgpu_device_evict_resources - evict device resources 42288e01f7deSjsg * @adev: amdgpu device object 42298e01f7deSjsg * 42308e01f7deSjsg * Evicts all ttm device resources(vram BOs, gart table) from the lru list 42318e01f7deSjsg * of the vram memory type. Mainly used for evicting device resources 42328e01f7deSjsg * at suspend time. 42338e01f7deSjsg * 42348e01f7deSjsg */ 42351bb76ff1Sjsg static int amdgpu_device_evict_resources(struct amdgpu_device *adev) 42368e01f7deSjsg { 42371bb76ff1Sjsg int ret; 42381bb76ff1Sjsg 42393a693bf3Sjsg /* No need to evict vram on APUs for suspend to ram or s2idle */ 42403a693bf3Sjsg if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) 42411bb76ff1Sjsg return 0; 42428e01f7deSjsg 42431bb76ff1Sjsg ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); 42441bb76ff1Sjsg if (ret) 42458e01f7deSjsg DRM_WARN("evicting device resources failed\n"); 42461bb76ff1Sjsg return ret; 42478e01f7deSjsg } 4248fb4d8502Sjsg 4249fb4d8502Sjsg /* 4250fb4d8502Sjsg * Suspend & resume. 4251fb4d8502Sjsg */ 4252fb4d8502Sjsg /** 425336668b15Sjsg * amdgpu_device_prepare - prepare for device suspend 425436668b15Sjsg * 425536668b15Sjsg * @dev: drm dev pointer 425636668b15Sjsg * 425736668b15Sjsg * Prepare to put the hw in the suspend state (all asics). 425836668b15Sjsg * Returns 0 for success or an error on failure. 425936668b15Sjsg * Called at driver suspend. 426036668b15Sjsg */ 426136668b15Sjsg int amdgpu_device_prepare(struct drm_device *dev) 426236668b15Sjsg { 426336668b15Sjsg struct amdgpu_device *adev = drm_to_adev(dev); 4264064f5254Sjsg int i, r; 426536668b15Sjsg 42668571a5a7Skettenis amdgpu_choose_low_power_state(adev); 42678571a5a7Skettenis 426836668b15Sjsg if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 426936668b15Sjsg return 0; 427036668b15Sjsg 427136668b15Sjsg /* Evict the majority of BOs before starting suspend sequence */ 427236668b15Sjsg r = amdgpu_device_evict_resources(adev); 427336668b15Sjsg if (r) 42748571a5a7Skettenis goto unprepare; 427536668b15Sjsg 42764a4ef11eSjsg flush_delayed_work(&adev->gfx.gfx_off_delay_work); 42774a4ef11eSjsg 4278064f5254Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4279064f5254Sjsg if (!adev->ip_blocks[i].status.valid) 4280064f5254Sjsg continue; 4281064f5254Sjsg if (!adev->ip_blocks[i].version->funcs->prepare_suspend) 4282064f5254Sjsg continue; 4283064f5254Sjsg r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); 4284064f5254Sjsg if (r) 42858571a5a7Skettenis goto unprepare; 4286064f5254Sjsg } 4287064f5254Sjsg 428836668b15Sjsg return 0; 42898571a5a7Skettenis 42908571a5a7Skettenis unprepare: 42918571a5a7Skettenis adev->in_s0ix = adev->in_s3 = false; 42928571a5a7Skettenis 42938571a5a7Skettenis return r; 429436668b15Sjsg } 429536668b15Sjsg 429636668b15Sjsg /** 4297fb4d8502Sjsg * amdgpu_device_suspend - initiate device suspend 4298fb4d8502Sjsg * 4299fb4d8502Sjsg * @dev: drm dev pointer 4300fb4d8502Sjsg * @fbcon : notify the fbdev of suspend 4301fb4d8502Sjsg * 4302fb4d8502Sjsg * Puts the hw in the suspend state (all asics). 4303fb4d8502Sjsg * Returns 0 for success or an error on failure. 4304fb4d8502Sjsg * Called at driver suspend. 4305fb4d8502Sjsg */ 4306c349dbc7Sjsg int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) 4307fb4d8502Sjsg { 43085ca02815Sjsg struct amdgpu_device *adev = drm_to_adev(dev); 43091bb76ff1Sjsg int r = 0; 4310fb4d8502Sjsg 43116c3f7e80Sjsg if (adev->shutdown) 43126c3f7e80Sjsg return 0; 4313fb4d8502Sjsg 4314fb4d8502Sjsg #ifdef notyet 4315fb4d8502Sjsg if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4316fb4d8502Sjsg return 0; 4317fb4d8502Sjsg #endif 4318fb4d8502Sjsg 4319c349dbc7Sjsg adev->in_suspend = true; 43205ca02815Sjsg 43211bb76ff1Sjsg if (amdgpu_sriov_vf(adev)) { 43221bb76ff1Sjsg amdgpu_virt_fini_data_exchange(adev); 43231bb76ff1Sjsg r = amdgpu_virt_request_full_gpu(adev, false); 43241bb76ff1Sjsg if (r) 43251bb76ff1Sjsg return r; 43261bb76ff1Sjsg } 43271bb76ff1Sjsg 43285ca02815Sjsg if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) 43295ca02815Sjsg DRM_WARN("smart shift update failed\n"); 43305ca02815Sjsg 4331fb4d8502Sjsg if (fbcon) 43321bb76ff1Sjsg drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); 4333fb4d8502Sjsg 4334c349dbc7Sjsg cancel_delayed_work_sync(&adev->delayed_init_work); 4335c349dbc7Sjsg 4336c349dbc7Sjsg amdgpu_ras_suspend(adev); 4337fb4d8502Sjsg 43385ca02815Sjsg amdgpu_device_ip_suspend_phase1(adev); 4339fb4d8502Sjsg 43405ca02815Sjsg if (!adev->in_s0ix) 43415ca02815Sjsg amdgpu_amdkfd_suspend(adev, adev->in_runpm); 4342c349dbc7Sjsg 43431bb76ff1Sjsg r = amdgpu_device_evict_resources(adev); 43441bb76ff1Sjsg if (r) 43451bb76ff1Sjsg return r; 4346fb4d8502Sjsg 43475ca02815Sjsg amdgpu_fence_driver_hw_fini(adev); 4348fb4d8502Sjsg 43495ca02815Sjsg amdgpu_device_ip_suspend_phase2(adev); 43501bb76ff1Sjsg 43511bb76ff1Sjsg if (amdgpu_sriov_vf(adev)) 43521bb76ff1Sjsg amdgpu_virt_release_full_gpu(adev, false); 4353fb4d8502Sjsg 4354fb4d8502Sjsg return 0; 4355fb4d8502Sjsg } 4356fb4d8502Sjsg 4357fb4d8502Sjsg /** 4358fb4d8502Sjsg * amdgpu_device_resume - initiate device resume 4359fb4d8502Sjsg * 4360fb4d8502Sjsg * @dev: drm dev pointer 4361fb4d8502Sjsg * @fbcon : notify the fbdev of resume 4362fb4d8502Sjsg * 4363fb4d8502Sjsg * Bring the hw back to operating state (all asics). 4364fb4d8502Sjsg * Returns 0 for success or an error on failure. 4365fb4d8502Sjsg * Called at driver resume. 4366fb4d8502Sjsg */ 4367c349dbc7Sjsg int amdgpu_device_resume(struct drm_device *dev, bool fbcon) 4368fb4d8502Sjsg { 4369ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 4370fb4d8502Sjsg int r = 0; 4371fb4d8502Sjsg 43721bb76ff1Sjsg if (amdgpu_sriov_vf(adev)) { 43731bb76ff1Sjsg r = amdgpu_virt_request_full_gpu(adev, true); 43741bb76ff1Sjsg if (r) 43751bb76ff1Sjsg return r; 43761bb76ff1Sjsg } 43771bb76ff1Sjsg 4378fb4d8502Sjsg #ifdef notyet 4379fb4d8502Sjsg if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) 4380fb4d8502Sjsg return 0; 4381fb4d8502Sjsg #endif 4382fb4d8502Sjsg 43835ca02815Sjsg if (adev->in_s0ix) 43841bb76ff1Sjsg amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry); 43855ca02815Sjsg 4386fb4d8502Sjsg /* post card */ 4387fb4d8502Sjsg if (amdgpu_device_need_post(adev)) { 4388ad8b1aafSjsg r = amdgpu_device_asic_init(adev); 4389fb4d8502Sjsg if (r) 4390ad8b1aafSjsg dev_err(adev->dev, "amdgpu asic init failed\n"); 4391fb4d8502Sjsg } 4392fb4d8502Sjsg 4393fb4d8502Sjsg r = amdgpu_device_ip_resume(adev); 43941bb76ff1Sjsg 4395fb4d8502Sjsg if (r) { 4396ad8b1aafSjsg dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); 4397f005ef32Sjsg goto exit; 4398fb4d8502Sjsg } 4399fb4d8502Sjsg 4400fb4d8502Sjsg r = amdgpu_device_ip_late_init(adev); 4401fb4d8502Sjsg if (r) 4402f005ef32Sjsg goto exit; 4403fb4d8502Sjsg 4404c349dbc7Sjsg queue_delayed_work(system_wq, &adev->delayed_init_work, 4405c349dbc7Sjsg msecs_to_jiffies(AMDGPU_RESUME_MS)); 4406c349dbc7Sjsg 44075ca02815Sjsg if (!adev->in_s0ix) { 44085ca02815Sjsg r = amdgpu_amdkfd_resume(adev, adev->in_runpm); 4409fb4d8502Sjsg if (r) 4410f005ef32Sjsg goto exit; 44115ca02815Sjsg } 4412fb4d8502Sjsg 4413f005ef32Sjsg exit: 4414f005ef32Sjsg if (amdgpu_sriov_vf(adev)) { 4415f005ef32Sjsg amdgpu_virt_init_data_exchange(adev); 4416f005ef32Sjsg amdgpu_virt_release_full_gpu(adev, true); 4417f005ef32Sjsg } 4418f005ef32Sjsg 4419f005ef32Sjsg if (r) 4420f005ef32Sjsg return r; 4421f005ef32Sjsg 4422fb4d8502Sjsg /* Make sure IB tests flushed */ 4423c349dbc7Sjsg flush_delayed_work(&adev->delayed_init_work); 4424fb4d8502Sjsg 44255ca02815Sjsg if (fbcon) 44261bb76ff1Sjsg drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false); 4427fb4d8502Sjsg 4428c349dbc7Sjsg amdgpu_ras_resume(adev); 4429c349dbc7Sjsg 4430f005ef32Sjsg if (adev->mode_info.num_crtc) { 4431fb4d8502Sjsg /* 4432fb4d8502Sjsg * Most of the connector probing functions try to acquire runtime pm 4433fb4d8502Sjsg * refs to ensure that the GPU is powered on when connector polling is 4434fb4d8502Sjsg * performed. Since we're calling this from a runtime PM callback, 4435fb4d8502Sjsg * trying to acquire rpm refs will cause us to deadlock. 4436fb4d8502Sjsg * 4437fb4d8502Sjsg * Since we're guaranteed to be holding the rpm lock, it's safe to 4438fb4d8502Sjsg * temporarily disable the rpm helpers so this doesn't deadlock us. 4439fb4d8502Sjsg */ 4440fb4d8502Sjsg #if defined(CONFIG_PM) && defined(__linux__) 4441fb4d8502Sjsg dev->dev->power.disable_depth++; 4442fb4d8502Sjsg #endif 4443f005ef32Sjsg if (!adev->dc_enabled) 4444fb4d8502Sjsg drm_helper_hpd_irq_event(dev); 4445fb4d8502Sjsg else 4446fb4d8502Sjsg drm_kms_helper_hotplug_event(dev); 4447fb4d8502Sjsg #if defined(CONFIG_PM) && defined(__linux__) 4448fb4d8502Sjsg dev->dev->power.disable_depth--; 4449fb4d8502Sjsg #endif 4450f005ef32Sjsg } 4451c349dbc7Sjsg adev->in_suspend = false; 4452c349dbc7Sjsg 445373029064Sjsg if (adev->enable_mes) 445473029064Sjsg amdgpu_mes_self_test(adev); 445573029064Sjsg 44565ca02815Sjsg if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) 44575ca02815Sjsg DRM_WARN("smart shift update failed\n"); 44585ca02815Sjsg 4459fb4d8502Sjsg return 0; 4460fb4d8502Sjsg } 4461fb4d8502Sjsg 4462fb4d8502Sjsg /** 4463fb4d8502Sjsg * amdgpu_device_ip_check_soft_reset - did soft reset succeed 4464fb4d8502Sjsg * 4465fb4d8502Sjsg * @adev: amdgpu_device pointer 4466fb4d8502Sjsg * 4467fb4d8502Sjsg * The list of all the hardware IPs that make up the asic is walked and 4468fb4d8502Sjsg * the check_soft_reset callbacks are run. check_soft_reset determines 4469fb4d8502Sjsg * if the asic is still hung or not. 4470fb4d8502Sjsg * Returns true if any of the IPs are still in a hung state, false if not. 4471fb4d8502Sjsg */ 4472fb4d8502Sjsg static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) 4473fb4d8502Sjsg { 4474fb4d8502Sjsg int i; 4475fb4d8502Sjsg bool asic_hang = false; 4476fb4d8502Sjsg 4477fb4d8502Sjsg if (amdgpu_sriov_vf(adev)) 4478fb4d8502Sjsg return true; 4479fb4d8502Sjsg 4480fb4d8502Sjsg if (amdgpu_asic_need_full_reset(adev)) 4481fb4d8502Sjsg return true; 4482fb4d8502Sjsg 4483fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4484fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 4485fb4d8502Sjsg continue; 4486fb4d8502Sjsg if (adev->ip_blocks[i].version->funcs->check_soft_reset) 4487fb4d8502Sjsg adev->ip_blocks[i].status.hang = 4488fb4d8502Sjsg adev->ip_blocks[i].version->funcs->check_soft_reset(adev); 4489fb4d8502Sjsg if (adev->ip_blocks[i].status.hang) { 4490ad8b1aafSjsg dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); 4491fb4d8502Sjsg asic_hang = true; 4492fb4d8502Sjsg } 4493fb4d8502Sjsg } 4494fb4d8502Sjsg return asic_hang; 4495fb4d8502Sjsg } 4496fb4d8502Sjsg 4497fb4d8502Sjsg /** 4498fb4d8502Sjsg * amdgpu_device_ip_pre_soft_reset - prepare for soft reset 4499fb4d8502Sjsg * 4500fb4d8502Sjsg * @adev: amdgpu_device pointer 4501fb4d8502Sjsg * 4502fb4d8502Sjsg * The list of all the hardware IPs that make up the asic is walked and the 4503fb4d8502Sjsg * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset 4504fb4d8502Sjsg * handles any IP specific hardware or software state changes that are 4505fb4d8502Sjsg * necessary for a soft reset to succeed. 4506fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 4507fb4d8502Sjsg */ 4508fb4d8502Sjsg static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) 4509fb4d8502Sjsg { 4510fb4d8502Sjsg int i, r = 0; 4511fb4d8502Sjsg 4512fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4513fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 4514fb4d8502Sjsg continue; 4515fb4d8502Sjsg if (adev->ip_blocks[i].status.hang && 4516fb4d8502Sjsg adev->ip_blocks[i].version->funcs->pre_soft_reset) { 4517fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev); 4518fb4d8502Sjsg if (r) 4519fb4d8502Sjsg return r; 4520fb4d8502Sjsg } 4521fb4d8502Sjsg } 4522fb4d8502Sjsg 4523fb4d8502Sjsg return 0; 4524fb4d8502Sjsg } 4525fb4d8502Sjsg 4526fb4d8502Sjsg /** 4527fb4d8502Sjsg * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed 4528fb4d8502Sjsg * 4529fb4d8502Sjsg * @adev: amdgpu_device pointer 4530fb4d8502Sjsg * 4531fb4d8502Sjsg * Some hardware IPs cannot be soft reset. If they are hung, a full gpu 4532fb4d8502Sjsg * reset is necessary to recover. 4533fb4d8502Sjsg * Returns true if a full asic reset is required, false if not. 4534fb4d8502Sjsg */ 4535fb4d8502Sjsg static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) 4536fb4d8502Sjsg { 4537fb4d8502Sjsg int i; 4538fb4d8502Sjsg 4539fb4d8502Sjsg if (amdgpu_asic_need_full_reset(adev)) 4540fb4d8502Sjsg return true; 4541fb4d8502Sjsg 4542fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4543fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 4544fb4d8502Sjsg continue; 4545fb4d8502Sjsg if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || 4546fb4d8502Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || 4547fb4d8502Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || 4548fb4d8502Sjsg (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || 4549fb4d8502Sjsg adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { 4550fb4d8502Sjsg if (adev->ip_blocks[i].status.hang) { 4551ad8b1aafSjsg dev_info(adev->dev, "Some block need full reset!\n"); 4552fb4d8502Sjsg return true; 4553fb4d8502Sjsg } 4554fb4d8502Sjsg } 4555fb4d8502Sjsg } 4556fb4d8502Sjsg return false; 4557fb4d8502Sjsg } 4558fb4d8502Sjsg 4559fb4d8502Sjsg /** 4560fb4d8502Sjsg * amdgpu_device_ip_soft_reset - do a soft reset 4561fb4d8502Sjsg * 4562fb4d8502Sjsg * @adev: amdgpu_device pointer 4563fb4d8502Sjsg * 4564fb4d8502Sjsg * The list of all the hardware IPs that make up the asic is walked and the 4565fb4d8502Sjsg * soft_reset callbacks are run if the block is hung. soft_reset handles any 4566fb4d8502Sjsg * IP specific hardware or software state changes that are necessary to soft 4567fb4d8502Sjsg * reset the IP. 4568fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 4569fb4d8502Sjsg */ 4570fb4d8502Sjsg static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) 4571fb4d8502Sjsg { 4572fb4d8502Sjsg int i, r = 0; 4573fb4d8502Sjsg 4574fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4575fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 4576fb4d8502Sjsg continue; 4577fb4d8502Sjsg if (adev->ip_blocks[i].status.hang && 4578fb4d8502Sjsg adev->ip_blocks[i].version->funcs->soft_reset) { 4579fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->soft_reset(adev); 4580fb4d8502Sjsg if (r) 4581fb4d8502Sjsg return r; 4582fb4d8502Sjsg } 4583fb4d8502Sjsg } 4584fb4d8502Sjsg 4585fb4d8502Sjsg return 0; 4586fb4d8502Sjsg } 4587fb4d8502Sjsg 4588fb4d8502Sjsg /** 4589fb4d8502Sjsg * amdgpu_device_ip_post_soft_reset - clean up from soft reset 4590fb4d8502Sjsg * 4591fb4d8502Sjsg * @adev: amdgpu_device pointer 4592fb4d8502Sjsg * 4593fb4d8502Sjsg * The list of all the hardware IPs that make up the asic is walked and the 4594fb4d8502Sjsg * post_soft_reset callbacks are run if the asic was hung. post_soft_reset 4595fb4d8502Sjsg * handles any IP specific hardware or software state changes that are 4596fb4d8502Sjsg * necessary after the IP has been soft reset. 4597fb4d8502Sjsg * Returns 0 on success, negative error code on failure. 4598fb4d8502Sjsg */ 4599fb4d8502Sjsg static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) 4600fb4d8502Sjsg { 4601fb4d8502Sjsg int i, r = 0; 4602fb4d8502Sjsg 4603fb4d8502Sjsg for (i = 0; i < adev->num_ip_blocks; i++) { 4604fb4d8502Sjsg if (!adev->ip_blocks[i].status.valid) 4605fb4d8502Sjsg continue; 4606fb4d8502Sjsg if (adev->ip_blocks[i].status.hang && 4607fb4d8502Sjsg adev->ip_blocks[i].version->funcs->post_soft_reset) 4608fb4d8502Sjsg r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev); 4609fb4d8502Sjsg if (r) 4610fb4d8502Sjsg return r; 4611fb4d8502Sjsg } 4612fb4d8502Sjsg 4613fb4d8502Sjsg return 0; 4614fb4d8502Sjsg } 4615fb4d8502Sjsg 4616fb4d8502Sjsg /** 4617c349dbc7Sjsg * amdgpu_device_recover_vram - Recover some VRAM contents 4618fb4d8502Sjsg * 4619fb4d8502Sjsg * @adev: amdgpu_device pointer 4620fb4d8502Sjsg * 4621fb4d8502Sjsg * Restores the contents of VRAM buffers from the shadows in GTT. Used to 4622fb4d8502Sjsg * restore things like GPUVM page tables after a GPU reset where 4623fb4d8502Sjsg * the contents of VRAM might be lost. 4624c349dbc7Sjsg * 4625c349dbc7Sjsg * Returns: 4626c349dbc7Sjsg * 0 on success, negative error code on failure. 4627fb4d8502Sjsg */ 4628c349dbc7Sjsg static int amdgpu_device_recover_vram(struct amdgpu_device *adev) 4629fb4d8502Sjsg { 4630fb4d8502Sjsg struct dma_fence *fence = NULL, *next = NULL; 4631c349dbc7Sjsg struct amdgpu_bo *shadow; 46325ca02815Sjsg struct amdgpu_bo_vm *vmbo; 4633c349dbc7Sjsg long r = 1, tmo; 4634fb4d8502Sjsg 4635fb4d8502Sjsg if (amdgpu_sriov_runtime(adev)) 4636fb4d8502Sjsg tmo = msecs_to_jiffies(8000); 4637fb4d8502Sjsg else 4638fb4d8502Sjsg tmo = msecs_to_jiffies(100); 4639fb4d8502Sjsg 4640ad8b1aafSjsg dev_info(adev->dev, "recover vram bo from shadow start\n"); 4641fb4d8502Sjsg mutex_lock(&adev->shadow_list_lock); 46425ca02815Sjsg list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { 46435f70b624Sjsg /* If vm is compute context or adev is APU, shadow will be NULL */ 46445f70b624Sjsg if (!vmbo->shadow) 46455f70b624Sjsg continue; 46465f70b624Sjsg shadow = vmbo->shadow; 46475f70b624Sjsg 4648c349dbc7Sjsg /* No need to recover an evicted BO */ 46499be2969aSjsg if (!shadow->tbo.resource || 46509be2969aSjsg shadow->tbo.resource->mem_type != TTM_PL_TT || 46515ca02815Sjsg shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || 46525ca02815Sjsg shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM) 4653c349dbc7Sjsg continue; 4654c349dbc7Sjsg 4655c349dbc7Sjsg r = amdgpu_bo_restore_shadow(shadow, &next); 4656c349dbc7Sjsg if (r) 4657c349dbc7Sjsg break; 4658c349dbc7Sjsg 4659fb4d8502Sjsg if (fence) { 4660c349dbc7Sjsg tmo = dma_fence_wait_timeout(fence, false, tmo); 4661fb4d8502Sjsg dma_fence_put(fence); 4662fb4d8502Sjsg fence = next; 4663c349dbc7Sjsg if (tmo == 0) { 4664c349dbc7Sjsg r = -ETIMEDOUT; 4665c349dbc7Sjsg break; 4666c349dbc7Sjsg } else if (tmo < 0) { 4667c349dbc7Sjsg r = tmo; 4668fb4d8502Sjsg break; 4669fb4d8502Sjsg } 4670c349dbc7Sjsg } else { 4671fb4d8502Sjsg fence = next; 4672fb4d8502Sjsg } 4673c349dbc7Sjsg } 4674fb4d8502Sjsg mutex_unlock(&adev->shadow_list_lock); 4675fb4d8502Sjsg 4676c349dbc7Sjsg if (fence) 4677c349dbc7Sjsg tmo = dma_fence_wait_timeout(fence, false, tmo); 4678fb4d8502Sjsg dma_fence_put(fence); 4679fb4d8502Sjsg 4680c349dbc7Sjsg if (r < 0 || tmo <= 0) { 4681ad8b1aafSjsg dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo); 4682c349dbc7Sjsg return -EIO; 4683c349dbc7Sjsg } 4684c349dbc7Sjsg 4685ad8b1aafSjsg dev_info(adev->dev, "recover vram bo from shadow done\n"); 4686c349dbc7Sjsg return 0; 4687fb4d8502Sjsg } 4688fb4d8502Sjsg 4689fb4d8502Sjsg 4690fb4d8502Sjsg /** 4691fb4d8502Sjsg * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf 4692fb4d8502Sjsg * 4693ad8b1aafSjsg * @adev: amdgpu_device pointer 4694fb4d8502Sjsg * @from_hypervisor: request from hypervisor 4695fb4d8502Sjsg * 4696fb4d8502Sjsg * do VF FLR and reinitialize Asic 4697fb4d8502Sjsg * return 0 means succeeded otherwise failed 4698fb4d8502Sjsg */ 4699fb4d8502Sjsg static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, 4700fb4d8502Sjsg bool from_hypervisor) 4701fb4d8502Sjsg { 4702fb4d8502Sjsg int r; 47031bb76ff1Sjsg struct amdgpu_hive_info *hive = NULL; 47041bb76ff1Sjsg int retry_limit = 0; 47051bb76ff1Sjsg 47061bb76ff1Sjsg retry: 47071bb76ff1Sjsg amdgpu_amdkfd_pre_reset(adev); 4708fb4d8502Sjsg 4709fb4d8502Sjsg if (from_hypervisor) 4710fb4d8502Sjsg r = amdgpu_virt_request_full_gpu(adev, true); 4711fb4d8502Sjsg else 4712fb4d8502Sjsg r = amdgpu_virt_reset_gpu(adev); 4713fb4d8502Sjsg if (r) 4714fb4d8502Sjsg return r; 4715f005ef32Sjsg amdgpu_irq_gpu_reset_resume_helper(adev); 4716f005ef32Sjsg 4717f005ef32Sjsg /* some sw clean up VF needs to do before recover */ 4718f005ef32Sjsg amdgpu_virt_post_reset(adev); 4719fb4d8502Sjsg 4720fb4d8502Sjsg /* Resume IP prior to SMC */ 4721fb4d8502Sjsg r = amdgpu_device_ip_reinit_early_sriov(adev); 4722fb4d8502Sjsg if (r) 4723fb4d8502Sjsg goto error; 4724fb4d8502Sjsg 4725c349dbc7Sjsg amdgpu_virt_init_data_exchange(adev); 4726fb4d8502Sjsg 4727c349dbc7Sjsg r = amdgpu_device_fw_loading(adev); 4728c349dbc7Sjsg if (r) 4729c349dbc7Sjsg return r; 4730c349dbc7Sjsg 4731fb4d8502Sjsg /* now we are okay to resume SMC/CP/SDMA */ 4732fb4d8502Sjsg r = amdgpu_device_ip_reinit_late_sriov(adev); 4733fb4d8502Sjsg if (r) 4734fb4d8502Sjsg goto error; 4735fb4d8502Sjsg 47361bb76ff1Sjsg hive = amdgpu_get_xgmi_hive(adev); 47371bb76ff1Sjsg /* Update PSP FW topology after reset */ 47381bb76ff1Sjsg if (hive && adev->gmc.xgmi.num_physical_nodes > 1) 47391bb76ff1Sjsg r = amdgpu_xgmi_update_topology(hive, adev); 47401bb76ff1Sjsg 47411bb76ff1Sjsg if (hive) 47421bb76ff1Sjsg amdgpu_put_xgmi_hive(hive); 47431bb76ff1Sjsg 47441bb76ff1Sjsg if (!r) { 4745fb4d8502Sjsg r = amdgpu_ib_ring_tests(adev); 47461bb76ff1Sjsg 4747c349dbc7Sjsg amdgpu_amdkfd_post_reset(adev); 47481bb76ff1Sjsg } 4749fb4d8502Sjsg 4750fb4d8502Sjsg error: 4751fb4d8502Sjsg if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 4752c349dbc7Sjsg amdgpu_inc_vram_lost(adev); 4753c349dbc7Sjsg r = amdgpu_device_recover_vram(adev); 4754fb4d8502Sjsg } 47555ca02815Sjsg amdgpu_virt_release_full_gpu(adev, true); 4756fb4d8502Sjsg 47571bb76ff1Sjsg if (AMDGPU_RETRY_SRIOV_RESET(r)) { 47581bb76ff1Sjsg if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { 47591bb76ff1Sjsg retry_limit++; 47601bb76ff1Sjsg goto retry; 47611bb76ff1Sjsg } else 47621bb76ff1Sjsg DRM_ERROR("GPU reset retry is beyond the retry limit\n"); 47631bb76ff1Sjsg } 47641bb76ff1Sjsg 4765fb4d8502Sjsg return r; 4766fb4d8502Sjsg } 4767fb4d8502Sjsg 4768fb4d8502Sjsg /** 4769ad8b1aafSjsg * amdgpu_device_has_job_running - check if there is any job in mirror list 4770ad8b1aafSjsg * 4771ad8b1aafSjsg * @adev: amdgpu_device pointer 4772ad8b1aafSjsg * 4773ad8b1aafSjsg * check if there is any job in mirror list 4774ad8b1aafSjsg */ 4775ad8b1aafSjsg bool amdgpu_device_has_job_running(struct amdgpu_device *adev) 4776ad8b1aafSjsg { 4777ad8b1aafSjsg int i; 4778ad8b1aafSjsg struct drm_sched_job *job; 4779ad8b1aafSjsg 4780ad8b1aafSjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4781ad8b1aafSjsg struct amdgpu_ring *ring = adev->rings[i]; 4782ad8b1aafSjsg 4783ad8b1aafSjsg if (!ring || !ring->sched.thread) 4784ad8b1aafSjsg continue; 4785ad8b1aafSjsg 4786ad8b1aafSjsg spin_lock(&ring->sched.job_list_lock); 47875ca02815Sjsg job = list_first_entry_or_null(&ring->sched.pending_list, 47885ca02815Sjsg struct drm_sched_job, list); 4789ad8b1aafSjsg spin_unlock(&ring->sched.job_list_lock); 4790ad8b1aafSjsg if (job) 4791ad8b1aafSjsg return true; 4792ad8b1aafSjsg } 4793ad8b1aafSjsg return false; 4794ad8b1aafSjsg } 4795ad8b1aafSjsg 4796ad8b1aafSjsg /** 4797c349dbc7Sjsg * amdgpu_device_should_recover_gpu - check if we should try GPU recovery 4798fb4d8502Sjsg * 4799ad8b1aafSjsg * @adev: amdgpu_device pointer 4800fb4d8502Sjsg * 4801c349dbc7Sjsg * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover 4802c349dbc7Sjsg * a hung GPU. 4803fb4d8502Sjsg */ 4804c349dbc7Sjsg bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) 4805fb4d8502Sjsg { 48061bb76ff1Sjsg 48071bb76ff1Sjsg if (amdgpu_gpu_recovery == 0) 48081bb76ff1Sjsg goto disabled; 48091bb76ff1Sjsg 4810f005ef32Sjsg /* Skip soft reset check in fatal error mode */ 4811f005ef32Sjsg if (!amdgpu_ras_is_poison_mode_supported(adev)) 4812f005ef32Sjsg return true; 4813fb4d8502Sjsg 4814c349dbc7Sjsg if (amdgpu_sriov_vf(adev)) 4815c349dbc7Sjsg return true; 4816c349dbc7Sjsg 4817c349dbc7Sjsg if (amdgpu_gpu_recovery == -1) { 4818c349dbc7Sjsg switch (adev->asic_type) { 48191bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 48201bb76ff1Sjsg case CHIP_VERDE: 48211bb76ff1Sjsg case CHIP_TAHITI: 48221bb76ff1Sjsg case CHIP_PITCAIRN: 48231bb76ff1Sjsg case CHIP_OLAND: 48241bb76ff1Sjsg case CHIP_HAINAN: 48251bb76ff1Sjsg #endif 48261bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK 48271bb76ff1Sjsg case CHIP_KAVERI: 48281bb76ff1Sjsg case CHIP_KABINI: 48291bb76ff1Sjsg case CHIP_MULLINS: 48301bb76ff1Sjsg #endif 48311bb76ff1Sjsg case CHIP_CARRIZO: 48321bb76ff1Sjsg case CHIP_STONEY: 48331bb76ff1Sjsg case CHIP_CYAN_SKILLFISH: 4834c349dbc7Sjsg goto disabled; 48351bb76ff1Sjsg default: 48361bb76ff1Sjsg break; 4837c349dbc7Sjsg } 4838c349dbc7Sjsg } 4839c349dbc7Sjsg 4840c349dbc7Sjsg return true; 4841c349dbc7Sjsg 4842c349dbc7Sjsg disabled: 4843ad8b1aafSjsg dev_info(adev->dev, "GPU recovery disabled.\n"); 4844c349dbc7Sjsg return false; 4845fb4d8502Sjsg } 4846fb4d8502Sjsg 48475ca02815Sjsg int amdgpu_device_mode1_reset(struct amdgpu_device *adev) 4848c349dbc7Sjsg { 48495ca02815Sjsg u32 i; 48505ca02815Sjsg int ret = 0; 4851fb4d8502Sjsg 48525ca02815Sjsg amdgpu_atombios_scratch_regs_engine_hung(adev, true); 48535ca02815Sjsg 48545ca02815Sjsg dev_info(adev->dev, "GPU mode1 reset\n"); 48555ca02815Sjsg 48563b3f1b1cSjsg /* Cache the state before bus master disable. The saved config space 48573b3f1b1cSjsg * values are used in other cases like restore after mode-2 reset. 48583b3f1b1cSjsg */ 48593b3f1b1cSjsg amdgpu_device_cache_pci_state(adev->pdev); 48603b3f1b1cSjsg 48615ca02815Sjsg /* disable BM */ 48625ca02815Sjsg pci_clear_master(adev->pdev); 48635ca02815Sjsg 48645ca02815Sjsg if (amdgpu_dpm_is_mode1_reset_supported(adev)) { 48655ca02815Sjsg dev_info(adev->dev, "GPU smu mode1 reset\n"); 48665ca02815Sjsg ret = amdgpu_dpm_mode1_reset(adev); 48675ca02815Sjsg } else { 48685ca02815Sjsg dev_info(adev->dev, "GPU psp mode1 reset\n"); 48695ca02815Sjsg ret = psp_gpu_reset(adev); 48705ca02815Sjsg } 48715ca02815Sjsg 48725ca02815Sjsg if (ret) 4873f005ef32Sjsg goto mode1_reset_failed; 48745ca02815Sjsg 48755ca02815Sjsg amdgpu_device_load_pci_state(adev->pdev); 4876f005ef32Sjsg ret = amdgpu_psp_wait_for_bootloader(adev); 4877f005ef32Sjsg if (ret) 4878f005ef32Sjsg goto mode1_reset_failed; 48795ca02815Sjsg 48805ca02815Sjsg /* wait for asic to come out of reset */ 48815ca02815Sjsg for (i = 0; i < adev->usec_timeout; i++) { 48825ca02815Sjsg u32 memsize = adev->nbio.funcs->get_memsize(adev); 48835ca02815Sjsg 48845ca02815Sjsg if (memsize != 0xffffffff) 48855ca02815Sjsg break; 48865ca02815Sjsg udelay(1); 48875ca02815Sjsg } 48885ca02815Sjsg 4889f005ef32Sjsg if (i >= adev->usec_timeout) { 4890f005ef32Sjsg ret = -ETIMEDOUT; 4891f005ef32Sjsg goto mode1_reset_failed; 4892f005ef32Sjsg } 4893f005ef32Sjsg 48945ca02815Sjsg amdgpu_atombios_scratch_regs_engine_hung(adev, false); 4895f005ef32Sjsg 4896f005ef32Sjsg return 0; 4897f005ef32Sjsg 4898f005ef32Sjsg mode1_reset_failed: 4899f005ef32Sjsg dev_err(adev->dev, "GPU mode1 reset failed\n"); 49005ca02815Sjsg return ret; 49015ca02815Sjsg } 49025ca02815Sjsg 49035ca02815Sjsg int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, 49045ca02815Sjsg struct amdgpu_reset_context *reset_context) 49055ca02815Sjsg { 49061bb76ff1Sjsg int i, r = 0; 49075ca02815Sjsg struct amdgpu_job *job = NULL; 49085ca02815Sjsg bool need_full_reset = 49095ca02815Sjsg test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 49105ca02815Sjsg 49115ca02815Sjsg if (reset_context->reset_req_dev == adev) 49125ca02815Sjsg job = reset_context->job; 4913ad8b1aafSjsg 4914ad8b1aafSjsg if (amdgpu_sriov_vf(adev)) { 4915ad8b1aafSjsg /* stop the data exchange thread */ 4916ad8b1aafSjsg amdgpu_virt_fini_data_exchange(adev); 4917ad8b1aafSjsg } 4918ad8b1aafSjsg 49191bb76ff1Sjsg amdgpu_fence_driver_isr_toggle(adev, true); 49201bb76ff1Sjsg 4921fb4d8502Sjsg /* block all schedulers and reset given job's ring */ 4922fb4d8502Sjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 4923fb4d8502Sjsg struct amdgpu_ring *ring = adev->rings[i]; 4924fb4d8502Sjsg 4925fb4d8502Sjsg if (!ring || !ring->sched.thread) 4926fb4d8502Sjsg continue; 4927fb4d8502Sjsg 4928f005ef32Sjsg /* Clear job fence from fence drv to avoid force_completion 4929f005ef32Sjsg * leave NULL and vm flush fence in fence drv 4930f005ef32Sjsg */ 49311bb76ff1Sjsg amdgpu_fence_driver_clear_job_fences(ring); 49325ca02815Sjsg 4933fb4d8502Sjsg /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 4934fb4d8502Sjsg amdgpu_fence_driver_force_completion(ring); 4935fb4d8502Sjsg } 4936fb4d8502Sjsg 49371bb76ff1Sjsg amdgpu_fence_driver_isr_toggle(adev, false); 49381bb76ff1Sjsg 49395ca02815Sjsg if (job && job->vm) 4940c349dbc7Sjsg drm_sched_increase_karma(&job->base); 4941c349dbc7Sjsg 49425ca02815Sjsg r = amdgpu_reset_prepare_hwcontext(adev, reset_context); 49435ca02815Sjsg /* If reset handler not implemented, continue; otherwise return */ 4944f005ef32Sjsg if (r == -EOPNOTSUPP) 49455ca02815Sjsg r = 0; 49465ca02815Sjsg else 49475ca02815Sjsg return r; 49485ca02815Sjsg 4949c349dbc7Sjsg /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ 4950c349dbc7Sjsg if (!amdgpu_sriov_vf(adev)) { 4951c349dbc7Sjsg 4952c349dbc7Sjsg if (!need_full_reset) 4953c349dbc7Sjsg need_full_reset = amdgpu_device_ip_need_full_reset(adev); 4954c349dbc7Sjsg 4955f005ef32Sjsg if (!need_full_reset && amdgpu_gpu_recovery && 4956f005ef32Sjsg amdgpu_device_ip_check_soft_reset(adev)) { 4957c349dbc7Sjsg amdgpu_device_ip_pre_soft_reset(adev); 4958c349dbc7Sjsg r = amdgpu_device_ip_soft_reset(adev); 4959c349dbc7Sjsg amdgpu_device_ip_post_soft_reset(adev); 4960c349dbc7Sjsg if (r || amdgpu_device_ip_check_soft_reset(adev)) { 4961ad8b1aafSjsg dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n"); 4962c349dbc7Sjsg need_full_reset = true; 4963c349dbc7Sjsg } 4964c349dbc7Sjsg } 4965c349dbc7Sjsg 4966c349dbc7Sjsg if (need_full_reset) 4967c349dbc7Sjsg r = amdgpu_device_ip_suspend(adev); 49685ca02815Sjsg if (need_full_reset) 49695ca02815Sjsg set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 49705ca02815Sjsg else 49715ca02815Sjsg clear_bit(AMDGPU_NEED_FULL_RESET, 49725ca02815Sjsg &reset_context->flags); 4973c349dbc7Sjsg } 4974c349dbc7Sjsg 4975c349dbc7Sjsg return r; 4976c349dbc7Sjsg } 4977c349dbc7Sjsg 49781bb76ff1Sjsg static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) 49791bb76ff1Sjsg { 49801bb76ff1Sjsg int i; 49811bb76ff1Sjsg 49821bb76ff1Sjsg lockdep_assert_held(&adev->reset_domain->sem); 49831bb76ff1Sjsg 49841bb76ff1Sjsg for (i = 0; i < adev->num_regs; i++) { 49851bb76ff1Sjsg adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); 49861bb76ff1Sjsg trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], 49871bb76ff1Sjsg adev->reset_dump_reg_value[i]); 49881bb76ff1Sjsg } 49891bb76ff1Sjsg 49901bb76ff1Sjsg return 0; 49911bb76ff1Sjsg } 49921bb76ff1Sjsg 49931bb76ff1Sjsg #ifdef CONFIG_DEV_COREDUMP 49941bb76ff1Sjsg static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, 49951bb76ff1Sjsg size_t count, void *data, size_t datalen) 49961bb76ff1Sjsg { 49971bb76ff1Sjsg struct drm_printer p; 49981bb76ff1Sjsg struct amdgpu_device *adev = data; 49991bb76ff1Sjsg struct drm_print_iterator iter; 50001bb76ff1Sjsg int i; 50011bb76ff1Sjsg 50021bb76ff1Sjsg iter.data = buffer; 50031bb76ff1Sjsg iter.offset = 0; 50041bb76ff1Sjsg iter.start = offset; 50051bb76ff1Sjsg iter.remain = count; 50061bb76ff1Sjsg 50071bb76ff1Sjsg p = drm_coredump_printer(&iter); 50081bb76ff1Sjsg 50091bb76ff1Sjsg drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); 50101bb76ff1Sjsg drm_printf(&p, "kernel: " UTS_RELEASE "\n"); 50111bb76ff1Sjsg drm_printf(&p, "module: " KBUILD_MODNAME "\n"); 50121bb76ff1Sjsg drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); 50131bb76ff1Sjsg if (adev->reset_task_info.pid) 50141bb76ff1Sjsg drm_printf(&p, "process_name: %s PID: %d\n", 50151bb76ff1Sjsg adev->reset_task_info.process_name, 50161bb76ff1Sjsg adev->reset_task_info.pid); 50171bb76ff1Sjsg 50181bb76ff1Sjsg if (adev->reset_vram_lost) 50191bb76ff1Sjsg drm_printf(&p, "VRAM is lost due to GPU reset!\n"); 50201bb76ff1Sjsg if (adev->num_regs) { 50211bb76ff1Sjsg drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); 50221bb76ff1Sjsg 50231bb76ff1Sjsg for (i = 0; i < adev->num_regs; i++) 50241bb76ff1Sjsg drm_printf(&p, "0x%08x: 0x%08x\n", 50251bb76ff1Sjsg adev->reset_dump_reg_list[i], 50261bb76ff1Sjsg adev->reset_dump_reg_value[i]); 50271bb76ff1Sjsg } 50281bb76ff1Sjsg 50291bb76ff1Sjsg return count - iter.remain; 50301bb76ff1Sjsg } 50311bb76ff1Sjsg 50321bb76ff1Sjsg static void amdgpu_devcoredump_free(void *data) 50331bb76ff1Sjsg { 50341bb76ff1Sjsg } 50351bb76ff1Sjsg 50361bb76ff1Sjsg static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) 50371bb76ff1Sjsg { 50381bb76ff1Sjsg struct drm_device *dev = adev_to_drm(adev); 50391bb76ff1Sjsg 50401bb76ff1Sjsg ktime_get_ts64(&adev->reset_time); 5041f005ef32Sjsg dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT, 50421bb76ff1Sjsg amdgpu_devcoredump_read, amdgpu_devcoredump_free); 50431bb76ff1Sjsg } 50441bb76ff1Sjsg #endif 50451bb76ff1Sjsg 50465ca02815Sjsg int amdgpu_do_asic_reset(struct list_head *device_list_handle, 50475ca02815Sjsg struct amdgpu_reset_context *reset_context) 5048c349dbc7Sjsg { 5049c349dbc7Sjsg struct amdgpu_device *tmp_adev = NULL; 50505ca02815Sjsg bool need_full_reset, skip_hw_reset, vram_lost = false; 5051c349dbc7Sjsg int r = 0; 50521bb76ff1Sjsg bool gpu_reset_for_dev_remove = 0; 5053c349dbc7Sjsg 50545ca02815Sjsg /* Try reset handler method first */ 50555ca02815Sjsg tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 50565ca02815Sjsg reset_list); 50571bb76ff1Sjsg amdgpu_reset_reg_dumps(tmp_adev); 50581bb76ff1Sjsg 50591bb76ff1Sjsg reset_context->reset_device_list = device_list_handle; 50605ca02815Sjsg r = amdgpu_reset_perform_reset(tmp_adev, reset_context); 50615ca02815Sjsg /* If reset handler not implemented, continue; otherwise return */ 5062f005ef32Sjsg if (r == -EOPNOTSUPP) 50635ca02815Sjsg r = 0; 50645ca02815Sjsg else 50655ca02815Sjsg return r; 50665ca02815Sjsg 50675ca02815Sjsg /* Reset handler not implemented, use the default method */ 50685ca02815Sjsg need_full_reset = 50695ca02815Sjsg test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 50705ca02815Sjsg skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); 50715ca02815Sjsg 50721bb76ff1Sjsg gpu_reset_for_dev_remove = 50731bb76ff1Sjsg test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 50741bb76ff1Sjsg test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 50751bb76ff1Sjsg 5076c349dbc7Sjsg /* 50775ca02815Sjsg * ASIC reset has to be done on all XGMI hive nodes ASAP 5078c349dbc7Sjsg * to allow proper links negotiation in FW (within 1 sec) 5079c349dbc7Sjsg */ 5080ad8b1aafSjsg if (!skip_hw_reset && need_full_reset) { 50815ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5082c349dbc7Sjsg /* For XGMI run all resets in parallel to speed up the process */ 5083c349dbc7Sjsg if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 50845ca02815Sjsg tmp_adev->gmc.xgmi.pending_reset = false; 5085c349dbc7Sjsg if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work)) 5086c349dbc7Sjsg r = -EALREADY; 5087c349dbc7Sjsg } else 5088c349dbc7Sjsg r = amdgpu_asic_reset(tmp_adev); 5089c349dbc7Sjsg 5090c349dbc7Sjsg if (r) { 5091ad8b1aafSjsg dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", 5092ad8b1aafSjsg r, adev_to_drm(tmp_adev)->unique); 5093c349dbc7Sjsg break; 5094c349dbc7Sjsg } 5095c349dbc7Sjsg } 5096c349dbc7Sjsg 5097c349dbc7Sjsg /* For XGMI wait for all resets to complete before proceed */ 5098c349dbc7Sjsg if (!r) { 50995ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5100c349dbc7Sjsg if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { 5101c349dbc7Sjsg flush_work(&tmp_adev->xgmi_reset_work); 5102c349dbc7Sjsg r = tmp_adev->asic_reset_res; 5103c349dbc7Sjsg if (r) 5104c349dbc7Sjsg break; 5105c349dbc7Sjsg } 5106c349dbc7Sjsg } 5107c349dbc7Sjsg } 5108c349dbc7Sjsg } 5109c349dbc7Sjsg 5110c349dbc7Sjsg if (!r && amdgpu_ras_intr_triggered()) { 51115ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 51121bb76ff1Sjsg if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && 51131bb76ff1Sjsg tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) 51141bb76ff1Sjsg tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); 5115c349dbc7Sjsg } 5116c349dbc7Sjsg 5117c349dbc7Sjsg amdgpu_ras_intr_cleared(); 5118c349dbc7Sjsg } 5119c349dbc7Sjsg 51201bb76ff1Sjsg /* Since the mode1 reset affects base ip blocks, the 51211bb76ff1Sjsg * phase1 ip blocks need to be resumed. Otherwise there 51221bb76ff1Sjsg * will be a BIOS signature error and the psp bootloader 51231bb76ff1Sjsg * can't load kdb on the next amdgpu install. 51241bb76ff1Sjsg */ 51251bb76ff1Sjsg if (gpu_reset_for_dev_remove) { 51261bb76ff1Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) 51271bb76ff1Sjsg amdgpu_device_ip_resume_phase1(tmp_adev); 51281bb76ff1Sjsg 51291bb76ff1Sjsg goto end; 51301bb76ff1Sjsg } 51311bb76ff1Sjsg 51325ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5133c349dbc7Sjsg if (need_full_reset) { 5134c349dbc7Sjsg /* post card */ 51355ca02815Sjsg r = amdgpu_device_asic_init(tmp_adev); 51365ca02815Sjsg if (r) { 5137ad8b1aafSjsg dev_warn(tmp_adev->dev, "asic atom init failed!"); 51385ca02815Sjsg } else { 5139c349dbc7Sjsg dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); 5140e7ede0e6Sjsg 5141c349dbc7Sjsg r = amdgpu_device_ip_resume_phase1(tmp_adev); 5142c349dbc7Sjsg if (r) 5143c349dbc7Sjsg goto out; 5144c349dbc7Sjsg 5145c349dbc7Sjsg vram_lost = amdgpu_device_check_vram_lost(tmp_adev); 51461bb76ff1Sjsg #ifdef CONFIG_DEV_COREDUMP 51471bb76ff1Sjsg tmp_adev->reset_vram_lost = vram_lost; 51481bb76ff1Sjsg memset(&tmp_adev->reset_task_info, 0, 51491bb76ff1Sjsg sizeof(tmp_adev->reset_task_info)); 51501bb76ff1Sjsg if (reset_context->job && reset_context->job->vm) 51511bb76ff1Sjsg tmp_adev->reset_task_info = 51521bb76ff1Sjsg reset_context->job->vm->task_info; 51531bb76ff1Sjsg amdgpu_reset_capture_coredumpm(tmp_adev); 51541bb76ff1Sjsg #endif 5155c349dbc7Sjsg if (vram_lost) { 5156c349dbc7Sjsg DRM_INFO("VRAM is lost due to GPU reset!\n"); 5157c349dbc7Sjsg amdgpu_inc_vram_lost(tmp_adev); 5158c349dbc7Sjsg } 5159c349dbc7Sjsg 5160c349dbc7Sjsg r = amdgpu_device_fw_loading(tmp_adev); 5161c349dbc7Sjsg if (r) 5162c349dbc7Sjsg return r; 5163c349dbc7Sjsg 5164c349dbc7Sjsg r = amdgpu_device_ip_resume_phase2(tmp_adev); 5165c349dbc7Sjsg if (r) 5166c349dbc7Sjsg goto out; 5167c349dbc7Sjsg 5168f4ab5340Sjsg r = amdgpu_device_ip_resume_phase3(tmp_adev); 5169f4ab5340Sjsg if (r) 5170f4ab5340Sjsg goto out; 5171f4ab5340Sjsg 5172c349dbc7Sjsg if (vram_lost) 5173c349dbc7Sjsg amdgpu_device_fill_reset_magic(tmp_adev); 5174c349dbc7Sjsg 5175c349dbc7Sjsg /* 5176c349dbc7Sjsg * Add this ASIC as tracked as reset was already 5177c349dbc7Sjsg * complete successfully. 5178c349dbc7Sjsg */ 5179c349dbc7Sjsg amdgpu_register_gpu_instance(tmp_adev); 5180c349dbc7Sjsg 51815ca02815Sjsg if (!reset_context->hive && 51825ca02815Sjsg tmp_adev->gmc.xgmi.num_physical_nodes > 1) 51835ca02815Sjsg amdgpu_xgmi_add_device(tmp_adev); 51845ca02815Sjsg 5185c349dbc7Sjsg r = amdgpu_device_ip_late_init(tmp_adev); 5186c349dbc7Sjsg if (r) 5187c349dbc7Sjsg goto out; 5188c349dbc7Sjsg 51891bb76ff1Sjsg drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false); 5190c349dbc7Sjsg 5191ad8b1aafSjsg /* 5192ad8b1aafSjsg * The GPU enters bad state once faulty pages 5193ad8b1aafSjsg * by ECC has reached the threshold, and ras 5194ad8b1aafSjsg * recovery is scheduled next. So add one check 5195ad8b1aafSjsg * here to break recovery if it indeed exceeds 5196ad8b1aafSjsg * bad page threshold, and remind user to 5197ad8b1aafSjsg * retire this GPU or setting one bigger 5198ad8b1aafSjsg * bad_page_threshold value to fix this once 5199ad8b1aafSjsg * probing driver again. 5200ad8b1aafSjsg */ 52015ca02815Sjsg if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) { 5202c349dbc7Sjsg /* must succeed. */ 5203c349dbc7Sjsg amdgpu_ras_resume(tmp_adev); 5204ad8b1aafSjsg } else { 5205ad8b1aafSjsg r = -EINVAL; 5206ad8b1aafSjsg goto out; 5207ad8b1aafSjsg } 5208c349dbc7Sjsg 5209c349dbc7Sjsg /* Update PSP FW topology after reset */ 52105ca02815Sjsg if (reset_context->hive && 52115ca02815Sjsg tmp_adev->gmc.xgmi.num_physical_nodes > 1) 52125ca02815Sjsg r = amdgpu_xgmi_update_topology( 52135ca02815Sjsg reset_context->hive, tmp_adev); 5214c349dbc7Sjsg } 5215c349dbc7Sjsg } 5216c349dbc7Sjsg 5217c349dbc7Sjsg out: 5218c349dbc7Sjsg if (!r) { 5219c349dbc7Sjsg amdgpu_irq_gpu_reset_resume_helper(tmp_adev); 5220c349dbc7Sjsg r = amdgpu_ib_ring_tests(tmp_adev); 5221c349dbc7Sjsg if (r) { 5222c349dbc7Sjsg dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); 5223c349dbc7Sjsg need_full_reset = true; 5224c349dbc7Sjsg r = -EAGAIN; 5225c349dbc7Sjsg goto end; 5226c349dbc7Sjsg } 5227c349dbc7Sjsg } 5228c349dbc7Sjsg 5229c349dbc7Sjsg if (!r) 5230c349dbc7Sjsg r = amdgpu_device_recover_vram(tmp_adev); 5231fb4d8502Sjsg else 5232c349dbc7Sjsg tmp_adev->asic_reset_res = r; 5233c349dbc7Sjsg } 5234c349dbc7Sjsg 5235c349dbc7Sjsg end: 52365ca02815Sjsg if (need_full_reset) 52375ca02815Sjsg set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 52385ca02815Sjsg else 52395ca02815Sjsg clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5240c349dbc7Sjsg return r; 5241c349dbc7Sjsg } 5242c349dbc7Sjsg 52431bb76ff1Sjsg static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) 5244c349dbc7Sjsg { 5245c349dbc7Sjsg 5246c349dbc7Sjsg switch (amdgpu_asic_reset_method(adev)) { 5247c349dbc7Sjsg case AMD_RESET_METHOD_MODE1: 5248c349dbc7Sjsg adev->mp1_state = PP_MP1_STATE_SHUTDOWN; 5249c349dbc7Sjsg break; 5250c349dbc7Sjsg case AMD_RESET_METHOD_MODE2: 5251c349dbc7Sjsg adev->mp1_state = PP_MP1_STATE_RESET; 5252c349dbc7Sjsg break; 5253c349dbc7Sjsg default: 5254c349dbc7Sjsg adev->mp1_state = PP_MP1_STATE_NONE; 5255c349dbc7Sjsg break; 5256c349dbc7Sjsg } 5257c349dbc7Sjsg 52581bb76ff1Sjsg pci_dev_put(p); 5259c349dbc7Sjsg } 5260c349dbc7Sjsg 52611bb76ff1Sjsg static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) 5262c349dbc7Sjsg { 5263c349dbc7Sjsg amdgpu_vf_error_trans_all(adev); 5264c349dbc7Sjsg adev->mp1_state = PP_MP1_STATE_NONE; 52655ca02815Sjsg } 52665ca02815Sjsg 5267ad8b1aafSjsg static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) 5268ad8b1aafSjsg { 5269ad8b1aafSjsg STUB(); 5270ad8b1aafSjsg #ifdef notyet 5271ad8b1aafSjsg struct pci_dev *p = NULL; 5272ad8b1aafSjsg 5273ad8b1aafSjsg p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5274ad8b1aafSjsg adev->pdev->bus->number, 1); 5275ad8b1aafSjsg if (p) { 5276ad8b1aafSjsg pm_runtime_enable(&(p->dev)); 5277ad8b1aafSjsg pm_runtime_resume(&(p->dev)); 5278ad8b1aafSjsg } 5279ad8b1aafSjsg #endif 5280ad8b1aafSjsg } 5281ad8b1aafSjsg 5282ad8b1aafSjsg static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) 5283ad8b1aafSjsg { 5284ad8b1aafSjsg enum amd_reset_method reset_method; 5285ad8b1aafSjsg struct pci_dev *p = NULL; 5286ad8b1aafSjsg u64 expires; 5287ad8b1aafSjsg 5288ad8b1aafSjsg /* 5289ad8b1aafSjsg * For now, only BACO and mode1 reset are confirmed 5290ad8b1aafSjsg * to suffer the audio issue without proper suspended. 5291ad8b1aafSjsg */ 5292ad8b1aafSjsg reset_method = amdgpu_asic_reset_method(adev); 5293ad8b1aafSjsg if ((reset_method != AMD_RESET_METHOD_BACO) && 5294ad8b1aafSjsg (reset_method != AMD_RESET_METHOD_MODE1)) 5295ad8b1aafSjsg return -EINVAL; 5296ad8b1aafSjsg 5297ad8b1aafSjsg STUB(); 5298ad8b1aafSjsg return -ENOSYS; 5299ad8b1aafSjsg #ifdef notyet 5300ad8b1aafSjsg 5301ad8b1aafSjsg p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), 5302ad8b1aafSjsg adev->pdev->bus->number, 1); 5303ad8b1aafSjsg if (!p) 5304ad8b1aafSjsg return -ENODEV; 5305ad8b1aafSjsg 5306ad8b1aafSjsg expires = pm_runtime_autosuspend_expiration(&(p->dev)); 5307ad8b1aafSjsg if (!expires) 5308ad8b1aafSjsg /* 5309ad8b1aafSjsg * If we cannot get the audio device autosuspend delay, 5310ad8b1aafSjsg * a fixed 4S interval will be used. Considering 3S is 5311ad8b1aafSjsg * the audio controller default autosuspend delay setting. 5312ad8b1aafSjsg * 4S used here is guaranteed to cover that. 5313ad8b1aafSjsg */ 5314ad8b1aafSjsg expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; 5315ad8b1aafSjsg 5316ad8b1aafSjsg while (!pm_runtime_status_suspended(&(p->dev))) { 5317ad8b1aafSjsg if (!pm_runtime_suspend(&(p->dev))) 5318ad8b1aafSjsg break; 5319ad8b1aafSjsg 5320ad8b1aafSjsg if (expires < ktime_get_mono_fast_ns()) { 5321ad8b1aafSjsg dev_warn(adev->dev, "failed to suspend display audio\n"); 53221bb76ff1Sjsg pci_dev_put(p); 5323ad8b1aafSjsg /* TODO: abort the succeeding gpu reset? */ 5324ad8b1aafSjsg return -ETIMEDOUT; 5325ad8b1aafSjsg } 5326ad8b1aafSjsg } 5327ad8b1aafSjsg 5328ad8b1aafSjsg pm_runtime_disable(&(p->dev)); 5329ad8b1aafSjsg 53301bb76ff1Sjsg pci_dev_put(p); 5331ad8b1aafSjsg return 0; 5332ad8b1aafSjsg #endif 5333c349dbc7Sjsg } 5334c349dbc7Sjsg 53351bb76ff1Sjsg static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) 53361bb76ff1Sjsg { 53371bb76ff1Sjsg struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 53381bb76ff1Sjsg 53391bb76ff1Sjsg #if defined(CONFIG_DEBUG_FS) 53401bb76ff1Sjsg if (!amdgpu_sriov_vf(adev)) 53411bb76ff1Sjsg cancel_work(&adev->reset_work); 53421bb76ff1Sjsg #endif 53431bb76ff1Sjsg 53441bb76ff1Sjsg if (adev->kfd.dev) 53451bb76ff1Sjsg cancel_work(&adev->kfd.reset_work); 53461bb76ff1Sjsg 53471bb76ff1Sjsg if (amdgpu_sriov_vf(adev)) 53481bb76ff1Sjsg cancel_work(&adev->virt.flr_work); 53491bb76ff1Sjsg 53501bb76ff1Sjsg if (con && adev->ras_enabled) 53511bb76ff1Sjsg cancel_work(&con->recovery_work); 53521bb76ff1Sjsg 53531bb76ff1Sjsg } 53541bb76ff1Sjsg 5355c349dbc7Sjsg /** 5356c349dbc7Sjsg * amdgpu_device_gpu_recover - reset the asic and recover scheduler 5357c349dbc7Sjsg * 5358ad8b1aafSjsg * @adev: amdgpu_device pointer 5359c349dbc7Sjsg * @job: which job trigger hang 5360f005ef32Sjsg * @reset_context: amdgpu reset context pointer 5361c349dbc7Sjsg * 5362c349dbc7Sjsg * Attempt to reset the GPU if it has hung (all asics). 5363c349dbc7Sjsg * Attempt to do soft-reset or full-reset and reinitialize Asic 5364c349dbc7Sjsg * Returns 0 for success or an error on failure. 5365c349dbc7Sjsg */ 5366c349dbc7Sjsg 5367c349dbc7Sjsg int amdgpu_device_gpu_recover(struct amdgpu_device *adev, 53681bb76ff1Sjsg struct amdgpu_job *job, 53691bb76ff1Sjsg struct amdgpu_reset_context *reset_context) 5370c349dbc7Sjsg { 5371c349dbc7Sjsg struct list_head device_list, *device_list_handle = NULL; 5372ad8b1aafSjsg bool job_signaled = false; 5373c349dbc7Sjsg struct amdgpu_hive_info *hive = NULL; 5374c349dbc7Sjsg struct amdgpu_device *tmp_adev = NULL; 5375c349dbc7Sjsg int i, r = 0; 5376ad8b1aafSjsg bool need_emergency_restart = false; 5377ad8b1aafSjsg bool audio_suspended = false; 53781bb76ff1Sjsg bool gpu_reset_for_dev_remove = false; 53795ca02815Sjsg 53801bb76ff1Sjsg gpu_reset_for_dev_remove = 53811bb76ff1Sjsg test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && 53821bb76ff1Sjsg test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); 5383ad8b1aafSjsg 5384ad8b1aafSjsg /* 5385ad8b1aafSjsg * Special case: RAS triggered and full reset isn't supported 5386ad8b1aafSjsg */ 5387ad8b1aafSjsg need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); 5388c349dbc7Sjsg 5389c349dbc7Sjsg /* 5390c349dbc7Sjsg * Flush RAM to disk so that after reboot 5391c349dbc7Sjsg * the user can read log and see why the system rebooted. 5392c349dbc7Sjsg */ 5393aa9b4d72Sjsg if (need_emergency_restart && amdgpu_ras_get_context(adev) && 5394aa9b4d72Sjsg amdgpu_ras_get_context(adev)->reboot) { 5395c349dbc7Sjsg DRM_WARN("Emergency reboot."); 5396c349dbc7Sjsg 5397c349dbc7Sjsg #ifdef notyet 5398c349dbc7Sjsg ksys_sync_helper(); 5399c349dbc7Sjsg emergency_restart(); 5400c349dbc7Sjsg #else 5401c349dbc7Sjsg panic("emergency_restart"); 5402c349dbc7Sjsg #endif 5403c349dbc7Sjsg } 5404c349dbc7Sjsg 5405c349dbc7Sjsg dev_info(adev->dev, "GPU %s begin!\n", 5406ad8b1aafSjsg need_emergency_restart ? "jobs stop":"reset"); 5407c349dbc7Sjsg 54081bb76ff1Sjsg if (!amdgpu_sriov_vf(adev)) 5409ad8b1aafSjsg hive = amdgpu_get_xgmi_hive(adev); 54101bb76ff1Sjsg if (hive) 5411ad8b1aafSjsg mutex_lock(&hive->hive_lock); 5412c349dbc7Sjsg 54131bb76ff1Sjsg reset_context->job = job; 54141bb76ff1Sjsg reset_context->hive = hive; 5415c349dbc7Sjsg /* 5416ad8b1aafSjsg * Build list of devices to reset. 5417ad8b1aafSjsg * In case we are in XGMI hive mode, resort the device list 5418ad8b1aafSjsg * to put adev in the 1st position. 5419c349dbc7Sjsg */ 5420ad8b1aafSjsg INIT_LIST_HEAD(&device_list); 5421b2e9d6baSjsg if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { 54221bb76ff1Sjsg list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { 54235ca02815Sjsg list_add_tail(&tmp_adev->reset_list, &device_list); 54241bb76ff1Sjsg if (gpu_reset_for_dev_remove && adev->shutdown) 54251bb76ff1Sjsg tmp_adev->shutdown = true; 54261bb76ff1Sjsg } 54275ca02815Sjsg if (!list_is_first(&adev->reset_list, &device_list)) 54285ca02815Sjsg list_rotate_to_front(&adev->reset_list, &device_list); 54295ca02815Sjsg device_list_handle = &device_list; 5430c349dbc7Sjsg } else { 54315ca02815Sjsg list_add_tail(&adev->reset_list, &device_list); 5432c349dbc7Sjsg device_list_handle = &device_list; 5433c349dbc7Sjsg } 5434c349dbc7Sjsg 54351bb76ff1Sjsg /* We need to lock reset domain only once both for XGMI and single device */ 54361bb76ff1Sjsg tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 54371bb76ff1Sjsg reset_list); 54381bb76ff1Sjsg amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); 54391bb76ff1Sjsg 5440c349dbc7Sjsg /* block all schedulers and reset given job's ring */ 54415ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 54421bb76ff1Sjsg 54431bb76ff1Sjsg amdgpu_device_set_mp1_state(tmp_adev); 54441bb76ff1Sjsg 5445ad8b1aafSjsg /* 5446ad8b1aafSjsg * Try to put the audio codec into suspend state 5447ad8b1aafSjsg * before gpu reset started. 5448ad8b1aafSjsg * 5449ad8b1aafSjsg * Due to the power domain of the graphics device 5450ad8b1aafSjsg * is shared with AZ power domain. Without this, 5451ad8b1aafSjsg * we may change the audio hardware from behind 5452ad8b1aafSjsg * the audio driver's back. That will trigger 5453ad8b1aafSjsg * some audio codec errors. 5454ad8b1aafSjsg */ 5455ad8b1aafSjsg if (!amdgpu_device_suspend_display_audio(tmp_adev)) 5456ad8b1aafSjsg audio_suspended = true; 5457ad8b1aafSjsg 5458af8ed3f7Sjsg amdgpu_ras_set_error_query_ready(tmp_adev, false); 5459ad8b1aafSjsg 5460ad8b1aafSjsg cancel_delayed_work_sync(&tmp_adev->delayed_init_work); 5461ad8b1aafSjsg 5462c349dbc7Sjsg if (!amdgpu_sriov_vf(tmp_adev)) 5463c349dbc7Sjsg amdgpu_amdkfd_pre_reset(tmp_adev); 5464c349dbc7Sjsg 5465c349dbc7Sjsg /* 5466c349dbc7Sjsg * Mark these ASICs to be reseted as untracked first 5467c349dbc7Sjsg * And add them back after reset completed 5468c349dbc7Sjsg */ 5469c349dbc7Sjsg amdgpu_unregister_gpu_instance(tmp_adev); 5470c349dbc7Sjsg 54711bb76ff1Sjsg drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); 5472c349dbc7Sjsg 5473c349dbc7Sjsg /* disable ras on ALL IPs */ 5474ad8b1aafSjsg if (!need_emergency_restart && 5475c349dbc7Sjsg amdgpu_device_ip_need_full_reset(tmp_adev)) 5476c349dbc7Sjsg amdgpu_ras_suspend(tmp_adev); 5477fb4d8502Sjsg 5478fb4d8502Sjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5479c349dbc7Sjsg struct amdgpu_ring *ring = tmp_adev->rings[i]; 5480fb4d8502Sjsg 5481fb4d8502Sjsg if (!ring || !ring->sched.thread) 5482fb4d8502Sjsg continue; 5483fb4d8502Sjsg 5484c349dbc7Sjsg drm_sched_stop(&ring->sched, job ? &job->base : NULL); 5485c349dbc7Sjsg 5486ad8b1aafSjsg if (need_emergency_restart) 5487c349dbc7Sjsg amdgpu_job_stop_all_jobs_on_sched(&ring->sched); 5488c349dbc7Sjsg } 54895ca02815Sjsg atomic_inc(&tmp_adev->gpu_reset_counter); 5490c349dbc7Sjsg } 5491c349dbc7Sjsg 5492ad8b1aafSjsg if (need_emergency_restart) 5493c349dbc7Sjsg goto skip_sched_resume; 5494c349dbc7Sjsg 5495c349dbc7Sjsg /* 5496c349dbc7Sjsg * Must check guilty signal here since after this point all old 5497c349dbc7Sjsg * HW fences are force signaled. 5498c349dbc7Sjsg * 5499c349dbc7Sjsg * job->base holds a reference to parent fence 5500fb4d8502Sjsg */ 55011bb76ff1Sjsg if (job && dma_fence_is_signaled(&job->hw_fence)) { 5502c349dbc7Sjsg job_signaled = true; 5503c349dbc7Sjsg dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); 5504c349dbc7Sjsg goto skip_hw_reset; 5505fb4d8502Sjsg } 5506fb4d8502Sjsg 5507c349dbc7Sjsg retry: /* Rest of adevs pre asic reset from XGMI hive. */ 55085ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 55091bb76ff1Sjsg if (gpu_reset_for_dev_remove) { 55101bb76ff1Sjsg /* Workaroud for ASICs need to disable SMC first */ 55111bb76ff1Sjsg amdgpu_device_smu_fini_early(tmp_adev); 55121bb76ff1Sjsg } 55131bb76ff1Sjsg r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); 5514c349dbc7Sjsg /*TODO Should we stop ?*/ 5515c349dbc7Sjsg if (r) { 5516ad8b1aafSjsg dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", 5517ad8b1aafSjsg r, adev_to_drm(tmp_adev)->unique); 5518c349dbc7Sjsg tmp_adev->asic_reset_res = r; 5519c349dbc7Sjsg } 55201bb76ff1Sjsg 55211bb76ff1Sjsg /* 55221bb76ff1Sjsg * Drop all pending non scheduler resets. Scheduler resets 55231bb76ff1Sjsg * were already dropped during drm_sched_stop 55241bb76ff1Sjsg */ 55251bb76ff1Sjsg amdgpu_device_stop_pending_resets(tmp_adev); 5526c349dbc7Sjsg } 5527c349dbc7Sjsg 5528c349dbc7Sjsg /* Actual ASIC resets if needed.*/ 55291bb76ff1Sjsg /* Host driver will handle XGMI hive reset for SRIOV */ 5530c349dbc7Sjsg if (amdgpu_sriov_vf(adev)) { 5531c349dbc7Sjsg r = amdgpu_device_reset_sriov(adev, job ? false : true); 5532c349dbc7Sjsg if (r) 5533c349dbc7Sjsg adev->asic_reset_res = r; 55341bb76ff1Sjsg 5535f005ef32Sjsg /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ 5536f005ef32Sjsg if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || 5537f005ef32Sjsg adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) 55381bb76ff1Sjsg amdgpu_ras_resume(adev); 5539c349dbc7Sjsg } else { 55401bb76ff1Sjsg r = amdgpu_do_asic_reset(device_list_handle, reset_context); 5541c349dbc7Sjsg if (r && r == -EAGAIN) 5542c349dbc7Sjsg goto retry; 55431bb76ff1Sjsg 55441bb76ff1Sjsg if (!r && gpu_reset_for_dev_remove) 55451bb76ff1Sjsg goto recover_end; 5546c349dbc7Sjsg } 5547c349dbc7Sjsg 5548c349dbc7Sjsg skip_hw_reset: 5549c349dbc7Sjsg 5550c349dbc7Sjsg /* Post ASIC reset for all devs .*/ 55515ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 55525ca02815Sjsg 5553c349dbc7Sjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5554c349dbc7Sjsg struct amdgpu_ring *ring = tmp_adev->rings[i]; 5555c349dbc7Sjsg 5556c349dbc7Sjsg if (!ring || !ring->sched.thread) 5557c349dbc7Sjsg continue; 5558c349dbc7Sjsg 5559f005ef32Sjsg drm_sched_start(&ring->sched, true); 5560c349dbc7Sjsg } 5561c349dbc7Sjsg 55621bb76ff1Sjsg if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) 55631bb76ff1Sjsg amdgpu_mes_self_test(tmp_adev); 55641bb76ff1Sjsg 5565f005ef32Sjsg if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) 5566ad8b1aafSjsg drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); 5567c349dbc7Sjsg 55681bb76ff1Sjsg if (tmp_adev->asic_reset_res) 55691bb76ff1Sjsg r = tmp_adev->asic_reset_res; 55701bb76ff1Sjsg 5571c349dbc7Sjsg tmp_adev->asic_reset_res = 0; 5572fb4d8502Sjsg 5573fb4d8502Sjsg if (r) { 5574fb4d8502Sjsg /* bad news, how to tell it to userspace ? */ 5575c349dbc7Sjsg dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter)); 5576c349dbc7Sjsg amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); 5577fb4d8502Sjsg } else { 5578c349dbc7Sjsg dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); 55795ca02815Sjsg if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) 55805ca02815Sjsg DRM_WARN("smart shift update failed\n"); 5581c349dbc7Sjsg } 5582fb4d8502Sjsg } 5583fb4d8502Sjsg 5584c349dbc7Sjsg skip_sched_resume: 55855ca02815Sjsg list_for_each_entry(tmp_adev, device_list_handle, reset_list) { 5586c349dbc7Sjsg /* unlock kfd: SRIOV would do it separately */ 5587ad8b1aafSjsg if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) 5588c349dbc7Sjsg amdgpu_amdkfd_post_reset(tmp_adev); 55895ca02815Sjsg 55905ca02815Sjsg /* kfd_post_reset will do nothing if kfd device is not initialized, 55915ca02815Sjsg * need to bring up kfd here if it's not be initialized before 55925ca02815Sjsg */ 55935ca02815Sjsg if (!adev->kfd.init_complete) 55945ca02815Sjsg amdgpu_amdkfd_device_init(adev); 55955ca02815Sjsg 5596ad8b1aafSjsg if (audio_suspended) 5597ad8b1aafSjsg amdgpu_device_resume_display_audio(tmp_adev); 55981bb76ff1Sjsg 55991bb76ff1Sjsg amdgpu_device_unset_mp1_state(tmp_adev); 5600f005ef32Sjsg 5601f005ef32Sjsg amdgpu_ras_set_error_query_ready(tmp_adev, true); 5602c349dbc7Sjsg } 5603c349dbc7Sjsg 56041bb76ff1Sjsg recover_end: 56051bb76ff1Sjsg tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, 56061bb76ff1Sjsg reset_list); 56071bb76ff1Sjsg amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); 56081bb76ff1Sjsg 5609ad8b1aafSjsg if (hive) { 5610ad8b1aafSjsg mutex_unlock(&hive->hive_lock); 5611ad8b1aafSjsg amdgpu_put_xgmi_hive(hive); 5612ad8b1aafSjsg } 5613c349dbc7Sjsg 56141bb76ff1Sjsg if (r) 5615c349dbc7Sjsg dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 56161bb76ff1Sjsg 56171bb76ff1Sjsg atomic_set(&adev->reset_domain->reset_res, r); 5618fb4d8502Sjsg return r; 5619fb4d8502Sjsg } 5620fb4d8502Sjsg 5621fb4d8502Sjsg /** 5622fb4d8502Sjsg * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot 5623fb4d8502Sjsg * 5624fb4d8502Sjsg * @adev: amdgpu_device pointer 5625fb4d8502Sjsg * 5626fb4d8502Sjsg * Fetchs and stores in the driver the PCIE capabilities (gen speed 5627fb4d8502Sjsg * and lanes) of the slot the device is in. Handles APUs and 5628fb4d8502Sjsg * virtualized environments where PCIE config space may not be available. 5629fb4d8502Sjsg */ 5630fb4d8502Sjsg static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) 5631fb4d8502Sjsg { 5632fb4d8502Sjsg struct pci_dev *pdev; 5633c349dbc7Sjsg enum pci_bus_speed speed_cap, platform_speed_cap; 5634c349dbc7Sjsg enum pcie_link_width platform_link_width; 5635fb4d8502Sjsg 5636fb4d8502Sjsg if (amdgpu_pcie_gen_cap) 5637fb4d8502Sjsg adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; 5638fb4d8502Sjsg 5639fb4d8502Sjsg if (amdgpu_pcie_lane_cap) 5640fb4d8502Sjsg adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; 5641fb4d8502Sjsg 5642fb4d8502Sjsg /* covers APUs as well */ 5643f005ef32Sjsg if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) { 5644fb4d8502Sjsg if (adev->pm.pcie_gen_mask == 0) 5645fb4d8502Sjsg adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; 5646fb4d8502Sjsg if (adev->pm.pcie_mlw_mask == 0) 5647fb4d8502Sjsg adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; 5648fb4d8502Sjsg return; 5649fb4d8502Sjsg } 5650fb4d8502Sjsg 5651c349dbc7Sjsg if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) 5652c349dbc7Sjsg return; 5653c349dbc7Sjsg 5654c349dbc7Sjsg pcie_bandwidth_available(adev->pdev, NULL, 5655c349dbc7Sjsg &platform_speed_cap, &platform_link_width); 5656c349dbc7Sjsg 5657fb4d8502Sjsg if (adev->pm.pcie_gen_mask == 0) { 5658fb4d8502Sjsg /* asic caps */ 5659fb4d8502Sjsg pdev = adev->pdev; 5660fb4d8502Sjsg speed_cap = pcie_get_speed_cap(pdev); 5661fb4d8502Sjsg if (speed_cap == PCI_SPEED_UNKNOWN) { 5662fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5663fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5664fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5665fb4d8502Sjsg } else { 56665ca02815Sjsg if (speed_cap == PCIE_SPEED_32_0GT) 56675ca02815Sjsg adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 56685ca02815Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 56695ca02815Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 56705ca02815Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 | 56715ca02815Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5); 56725ca02815Sjsg else if (speed_cap == PCIE_SPEED_16_0GT) 5673fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5674fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5675fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5676fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); 5677fb4d8502Sjsg else if (speed_cap == PCIE_SPEED_8_0GT) 5678fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5679fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5680fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); 5681fb4d8502Sjsg else if (speed_cap == PCIE_SPEED_5_0GT) 5682fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5683fb4d8502Sjsg CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); 5684fb4d8502Sjsg else 5685fb4d8502Sjsg adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; 5686fb4d8502Sjsg } 5687fb4d8502Sjsg /* platform caps */ 5688c349dbc7Sjsg if (platform_speed_cap == PCI_SPEED_UNKNOWN) { 5689fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5690fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5691fb4d8502Sjsg } else { 56925ca02815Sjsg if (platform_speed_cap == PCIE_SPEED_32_0GT) 56935ca02815Sjsg adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 56945ca02815Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 56955ca02815Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 56965ca02815Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 | 56975ca02815Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5); 56985ca02815Sjsg else if (platform_speed_cap == PCIE_SPEED_16_0GT) 5699fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5700fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5701fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | 5702fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); 5703c349dbc7Sjsg else if (platform_speed_cap == PCIE_SPEED_8_0GT) 5704fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5705fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | 5706fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); 5707c349dbc7Sjsg else if (platform_speed_cap == PCIE_SPEED_5_0GT) 5708fb4d8502Sjsg adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | 5709fb4d8502Sjsg CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); 5710fb4d8502Sjsg else 5711fb4d8502Sjsg adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; 5712fb4d8502Sjsg 5713fb4d8502Sjsg } 5714fb4d8502Sjsg } 5715fb4d8502Sjsg if (adev->pm.pcie_mlw_mask == 0) { 5716c349dbc7Sjsg if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { 5717fb4d8502Sjsg adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; 5718fb4d8502Sjsg } else { 5719c349dbc7Sjsg switch (platform_link_width) { 5720fb4d8502Sjsg case PCIE_LNK_X32: 5721fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | 5722fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5723fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5724fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5725fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5726fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5727fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5728fb4d8502Sjsg break; 5729fb4d8502Sjsg case PCIE_LNK_X16: 5730fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | 5731fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5732fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5733fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5734fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5735fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5736fb4d8502Sjsg break; 5737fb4d8502Sjsg case PCIE_LNK_X12: 5738fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | 5739fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5740fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5741fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5742fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5743fb4d8502Sjsg break; 5744fb4d8502Sjsg case PCIE_LNK_X8: 5745fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | 5746fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5747fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5748fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5749fb4d8502Sjsg break; 5750fb4d8502Sjsg case PCIE_LNK_X4: 5751fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | 5752fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5753fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5754fb4d8502Sjsg break; 5755fb4d8502Sjsg case PCIE_LNK_X2: 5756fb4d8502Sjsg adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | 5757fb4d8502Sjsg CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); 5758fb4d8502Sjsg break; 5759fb4d8502Sjsg case PCIE_LNK_X1: 5760fb4d8502Sjsg adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; 5761fb4d8502Sjsg break; 5762fb4d8502Sjsg default: 5763fb4d8502Sjsg break; 5764fb4d8502Sjsg } 5765fb4d8502Sjsg } 5766fb4d8502Sjsg } 5767fb4d8502Sjsg } 5768fb4d8502Sjsg 57691bb76ff1Sjsg /** 57701bb76ff1Sjsg * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR 57711bb76ff1Sjsg * 57721bb76ff1Sjsg * @adev: amdgpu_device pointer 57731bb76ff1Sjsg * @peer_adev: amdgpu_device pointer for peer device trying to access @adev 57741bb76ff1Sjsg * 57751bb76ff1Sjsg * Return true if @peer_adev can access (DMA) @adev through the PCIe 57761bb76ff1Sjsg * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of 57771bb76ff1Sjsg * @peer_adev. 57781bb76ff1Sjsg */ 57791bb76ff1Sjsg bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, 57801bb76ff1Sjsg struct amdgpu_device *peer_adev) 57811bb76ff1Sjsg { 57821bb76ff1Sjsg #ifdef CONFIG_HSA_AMD_P2P 57831bb76ff1Sjsg uint64_t address_mask = peer_adev->dev->dma_mask ? 57841bb76ff1Sjsg ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); 57851bb76ff1Sjsg resource_size_t aper_limit = 57861bb76ff1Sjsg adev->gmc.aper_base + adev->gmc.aper_size - 1; 57871bb76ff1Sjsg bool p2p_access = 57881bb76ff1Sjsg !adev->gmc.xgmi.connected_to_cpu && 57891bb76ff1Sjsg !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0); 57901bb76ff1Sjsg 57911bb76ff1Sjsg return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && 57921bb76ff1Sjsg adev->gmc.real_vram_size == adev->gmc.visible_vram_size && 57931bb76ff1Sjsg !(adev->gmc.aper_base & address_mask || 57941bb76ff1Sjsg aper_limit & address_mask)); 57951bb76ff1Sjsg #else 57961bb76ff1Sjsg return false; 57971bb76ff1Sjsg #endif 57981bb76ff1Sjsg } 57991bb76ff1Sjsg 5800c349dbc7Sjsg int amdgpu_device_baco_enter(struct drm_device *dev) 5801c349dbc7Sjsg { 5802ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 5803c349dbc7Sjsg struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5804c349dbc7Sjsg 5805f005ef32Sjsg if (!amdgpu_device_supports_baco(dev)) 5806c349dbc7Sjsg return -ENOTSUPP; 5807c349dbc7Sjsg 58085ca02815Sjsg if (ras && adev->ras_enabled && 58095ca02815Sjsg adev->nbio.funcs->enable_doorbell_interrupt) 5810c349dbc7Sjsg adev->nbio.funcs->enable_doorbell_interrupt(adev, false); 5811c349dbc7Sjsg 5812c349dbc7Sjsg return amdgpu_dpm_baco_enter(adev); 5813c349dbc7Sjsg } 5814c349dbc7Sjsg 5815c349dbc7Sjsg int amdgpu_device_baco_exit(struct drm_device *dev) 5816c349dbc7Sjsg { 5817ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 5818c349dbc7Sjsg struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); 5819c349dbc7Sjsg int ret = 0; 5820c349dbc7Sjsg 5821f005ef32Sjsg if (!amdgpu_device_supports_baco(dev)) 5822c349dbc7Sjsg return -ENOTSUPP; 5823c349dbc7Sjsg 5824c349dbc7Sjsg ret = amdgpu_dpm_baco_exit(adev); 5825c349dbc7Sjsg if (ret) 5826c349dbc7Sjsg return ret; 5827c349dbc7Sjsg 58285ca02815Sjsg if (ras && adev->ras_enabled && 58295ca02815Sjsg adev->nbio.funcs->enable_doorbell_interrupt) 5830c349dbc7Sjsg adev->nbio.funcs->enable_doorbell_interrupt(adev, true); 5831c349dbc7Sjsg 5832d98eb4daSjsg if (amdgpu_passthrough(adev) && adev->nbio.funcs && 58335ca02815Sjsg adev->nbio.funcs->clear_doorbell_interrupt) 58345ca02815Sjsg adev->nbio.funcs->clear_doorbell_interrupt(adev); 58355ca02815Sjsg 5836c349dbc7Sjsg return 0; 5837c349dbc7Sjsg } 5838ad8b1aafSjsg 5839ad8b1aafSjsg /** 5840ad8b1aafSjsg * amdgpu_pci_error_detected - Called when a PCI error is detected. 5841ad8b1aafSjsg * @pdev: PCI device struct 5842ad8b1aafSjsg * @state: PCI channel state 5843ad8b1aafSjsg * 5844ad8b1aafSjsg * Description: Called when a PCI error is detected. 5845ad8b1aafSjsg * 5846ad8b1aafSjsg * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. 5847ad8b1aafSjsg */ 5848ad8b1aafSjsg pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) 5849ad8b1aafSjsg { 5850ad8b1aafSjsg STUB(); 5851ad8b1aafSjsg return 0; 5852ad8b1aafSjsg #ifdef notyet 5853ad8b1aafSjsg struct drm_device *dev = pci_get_drvdata(pdev); 5854ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 5855ad8b1aafSjsg int i; 5856ad8b1aafSjsg 5857ad8b1aafSjsg DRM_INFO("PCI error: detected callback, state(%d)!!\n", state); 5858ad8b1aafSjsg 5859ad8b1aafSjsg if (adev->gmc.xgmi.num_physical_nodes > 1) { 5860ad8b1aafSjsg DRM_WARN("No support for XGMI hive yet..."); 5861ad8b1aafSjsg return PCI_ERS_RESULT_DISCONNECT; 5862ad8b1aafSjsg } 5863ad8b1aafSjsg 58645ca02815Sjsg adev->pci_channel_state = state; 58655ca02815Sjsg 5866ad8b1aafSjsg switch (state) { 5867ad8b1aafSjsg case pci_channel_io_normal: 5868ad8b1aafSjsg return PCI_ERS_RESULT_CAN_RECOVER; 5869ad8b1aafSjsg /* Fatal error, prepare for slot reset */ 5870ad8b1aafSjsg case pci_channel_io_frozen: 5871ad8b1aafSjsg /* 58721bb76ff1Sjsg * Locking adev->reset_domain->sem will prevent any external access 5873ad8b1aafSjsg * to GPU during PCI error recovery 5874ad8b1aafSjsg */ 58751bb76ff1Sjsg amdgpu_device_lock_reset_domain(adev->reset_domain); 58761bb76ff1Sjsg amdgpu_device_set_mp1_state(adev); 5877ad8b1aafSjsg 5878ad8b1aafSjsg /* 5879ad8b1aafSjsg * Block any work scheduling as we do for regular GPU reset 5880ad8b1aafSjsg * for the duration of the recovery 5881ad8b1aafSjsg */ 5882ad8b1aafSjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 5883ad8b1aafSjsg struct amdgpu_ring *ring = adev->rings[i]; 5884ad8b1aafSjsg 5885ad8b1aafSjsg if (!ring || !ring->sched.thread) 5886ad8b1aafSjsg continue; 5887ad8b1aafSjsg 5888ad8b1aafSjsg drm_sched_stop(&ring->sched, NULL); 5889ad8b1aafSjsg } 58905ca02815Sjsg atomic_inc(&adev->gpu_reset_counter); 5891ad8b1aafSjsg return PCI_ERS_RESULT_NEED_RESET; 5892ad8b1aafSjsg case pci_channel_io_perm_failure: 5893ad8b1aafSjsg /* Permanent error, prepare for device removal */ 5894ad8b1aafSjsg return PCI_ERS_RESULT_DISCONNECT; 5895ad8b1aafSjsg } 5896ad8b1aafSjsg 5897ad8b1aafSjsg return PCI_ERS_RESULT_NEED_RESET; 5898ad8b1aafSjsg #endif 5899ad8b1aafSjsg } 5900ad8b1aafSjsg 5901ad8b1aafSjsg /** 5902ad8b1aafSjsg * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers 5903ad8b1aafSjsg * @pdev: pointer to PCI device 5904ad8b1aafSjsg */ 5905ad8b1aafSjsg pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) 5906ad8b1aafSjsg { 5907ad8b1aafSjsg 5908ad8b1aafSjsg DRM_INFO("PCI error: mmio enabled callback!!\n"); 5909ad8b1aafSjsg 5910ad8b1aafSjsg /* TODO - dump whatever for debugging purposes */ 5911ad8b1aafSjsg 5912ad8b1aafSjsg /* This called only if amdgpu_pci_error_detected returns 5913ad8b1aafSjsg * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still 5914ad8b1aafSjsg * works, no need to reset slot. 5915ad8b1aafSjsg */ 5916ad8b1aafSjsg 5917ad8b1aafSjsg return PCI_ERS_RESULT_RECOVERED; 5918ad8b1aafSjsg } 5919ad8b1aafSjsg 5920ad8b1aafSjsg /** 5921ad8b1aafSjsg * amdgpu_pci_slot_reset - Called when PCI slot has been reset. 5922ad8b1aafSjsg * @pdev: PCI device struct 5923ad8b1aafSjsg * 5924ad8b1aafSjsg * Description: This routine is called by the pci error recovery 5925ad8b1aafSjsg * code after the PCI slot has been reset, just before we 5926ad8b1aafSjsg * should resume normal operations. 5927ad8b1aafSjsg */ 5928ad8b1aafSjsg pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) 5929ad8b1aafSjsg { 5930ad8b1aafSjsg STUB(); 5931ad8b1aafSjsg return PCI_ERS_RESULT_RECOVERED; 5932ad8b1aafSjsg #ifdef notyet 5933ad8b1aafSjsg struct drm_device *dev = pci_get_drvdata(pdev); 5934ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 5935ad8b1aafSjsg int r, i; 59365ca02815Sjsg struct amdgpu_reset_context reset_context; 5937ad8b1aafSjsg u32 memsize; 5938ad8b1aafSjsg struct list_head device_list; 5939ad8b1aafSjsg 5940ad8b1aafSjsg DRM_INFO("PCI error: slot reset callback!!\n"); 5941ad8b1aafSjsg 59425ca02815Sjsg memset(&reset_context, 0, sizeof(reset_context)); 59435ca02815Sjsg 5944ad8b1aafSjsg INIT_LIST_HEAD(&device_list); 59455ca02815Sjsg list_add_tail(&adev->reset_list, &device_list); 5946ad8b1aafSjsg 5947ad8b1aafSjsg /* wait for asic to come out of reset */ 5948ad8b1aafSjsg drm_msleep(500); 5949ad8b1aafSjsg 5950ad8b1aafSjsg /* Restore PCI confspace */ 5951ad8b1aafSjsg amdgpu_device_load_pci_state(pdev); 5952ad8b1aafSjsg 5953ad8b1aafSjsg /* confirm ASIC came out of reset */ 5954ad8b1aafSjsg for (i = 0; i < adev->usec_timeout; i++) { 5955ad8b1aafSjsg memsize = amdgpu_asic_get_config_memsize(adev); 5956ad8b1aafSjsg 5957ad8b1aafSjsg if (memsize != 0xffffffff) 5958ad8b1aafSjsg break; 5959ad8b1aafSjsg udelay(1); 5960ad8b1aafSjsg } 5961ad8b1aafSjsg if (memsize == 0xffffffff) { 5962ad8b1aafSjsg r = -ETIME; 5963ad8b1aafSjsg goto out; 5964ad8b1aafSjsg } 5965ad8b1aafSjsg 59665ca02815Sjsg reset_context.method = AMD_RESET_METHOD_NONE; 59675ca02815Sjsg reset_context.reset_req_dev = adev; 59685ca02815Sjsg set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); 59695ca02815Sjsg set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); 59705ca02815Sjsg 59715ca02815Sjsg adev->no_hw_access = true; 59725ca02815Sjsg r = amdgpu_device_pre_asic_reset(adev, &reset_context); 59735ca02815Sjsg adev->no_hw_access = false; 5974ad8b1aafSjsg if (r) 5975ad8b1aafSjsg goto out; 5976ad8b1aafSjsg 59775ca02815Sjsg r = amdgpu_do_asic_reset(&device_list, &reset_context); 5978ad8b1aafSjsg 5979ad8b1aafSjsg out: 5980ad8b1aafSjsg if (!r) { 5981ad8b1aafSjsg if (amdgpu_device_cache_pci_state(adev->pdev)) 5982ad8b1aafSjsg pci_restore_state(adev->pdev); 5983ad8b1aafSjsg 5984ad8b1aafSjsg DRM_INFO("PCIe error recovery succeeded\n"); 5985ad8b1aafSjsg } else { 5986ad8b1aafSjsg DRM_ERROR("PCIe error recovery failed, err:%d", r); 59871bb76ff1Sjsg amdgpu_device_unset_mp1_state(adev); 59881bb76ff1Sjsg amdgpu_device_unlock_reset_domain(adev->reset_domain); 5989ad8b1aafSjsg } 5990ad8b1aafSjsg 5991ad8b1aafSjsg return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 5992ad8b1aafSjsg #endif 5993ad8b1aafSjsg } 5994ad8b1aafSjsg 5995ad8b1aafSjsg /** 5996ad8b1aafSjsg * amdgpu_pci_resume() - resume normal ops after PCI reset 5997ad8b1aafSjsg * @pdev: pointer to PCI device 5998ad8b1aafSjsg * 5999ad8b1aafSjsg * Called when the error recovery driver tells us that its 60005ca02815Sjsg * OK to resume normal operation. 6001ad8b1aafSjsg */ 6002ad8b1aafSjsg void amdgpu_pci_resume(struct pci_dev *pdev) 6003ad8b1aafSjsg { 6004ad8b1aafSjsg STUB(); 6005ad8b1aafSjsg #ifdef notyet 6006ad8b1aafSjsg struct drm_device *dev = pci_get_drvdata(pdev); 6007ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 6008ad8b1aafSjsg int i; 6009ad8b1aafSjsg 6010ad8b1aafSjsg 6011ad8b1aafSjsg DRM_INFO("PCI error: resume callback!!\n"); 6012ad8b1aafSjsg 60135ca02815Sjsg /* Only continue execution for the case of pci_channel_io_frozen */ 60145ca02815Sjsg if (adev->pci_channel_state != pci_channel_io_frozen) 60155ca02815Sjsg return; 60165ca02815Sjsg 6017ad8b1aafSjsg for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 6018ad8b1aafSjsg struct amdgpu_ring *ring = adev->rings[i]; 6019ad8b1aafSjsg 6020ad8b1aafSjsg if (!ring || !ring->sched.thread) 6021ad8b1aafSjsg continue; 6022ad8b1aafSjsg 6023ad8b1aafSjsg drm_sched_start(&ring->sched, true); 6024ad8b1aafSjsg } 6025ad8b1aafSjsg 60261bb76ff1Sjsg amdgpu_device_unset_mp1_state(adev); 60271bb76ff1Sjsg amdgpu_device_unlock_reset_domain(adev->reset_domain); 6028ad8b1aafSjsg #endif 6029ad8b1aafSjsg } 6030ad8b1aafSjsg 6031ad8b1aafSjsg bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) 6032ad8b1aafSjsg { 6033ad8b1aafSjsg return false; 6034ad8b1aafSjsg #ifdef notyet 6035ad8b1aafSjsg struct drm_device *dev = pci_get_drvdata(pdev); 6036ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 6037ad8b1aafSjsg int r; 6038ad8b1aafSjsg 603998802368Sjsg if (amdgpu_sriov_vf(adev)) 604098802368Sjsg return false; 604198802368Sjsg 6042ad8b1aafSjsg r = pci_save_state(pdev); 6043ad8b1aafSjsg if (!r) { 6044ad8b1aafSjsg kfree(adev->pci_state); 6045ad8b1aafSjsg 6046ad8b1aafSjsg adev->pci_state = pci_store_saved_state(pdev); 6047ad8b1aafSjsg 6048ad8b1aafSjsg if (!adev->pci_state) { 6049ad8b1aafSjsg DRM_ERROR("Failed to store PCI saved state"); 6050ad8b1aafSjsg return false; 6051ad8b1aafSjsg } 6052ad8b1aafSjsg } else { 6053ad8b1aafSjsg DRM_WARN("Failed to save PCI state, err:%d\n", r); 6054ad8b1aafSjsg return false; 6055ad8b1aafSjsg } 6056ad8b1aafSjsg 6057ad8b1aafSjsg return true; 6058ad8b1aafSjsg #endif 6059ad8b1aafSjsg } 6060ad8b1aafSjsg 6061ad8b1aafSjsg bool amdgpu_device_load_pci_state(struct pci_dev *pdev) 6062ad8b1aafSjsg { 6063ad8b1aafSjsg STUB(); 6064ad8b1aafSjsg return false; 6065ad8b1aafSjsg #ifdef notyet 6066ad8b1aafSjsg struct drm_device *dev = pci_get_drvdata(pdev); 6067ad8b1aafSjsg struct amdgpu_device *adev = drm_to_adev(dev); 6068ad8b1aafSjsg int r; 6069ad8b1aafSjsg 6070ad8b1aafSjsg if (!adev->pci_state) 6071ad8b1aafSjsg return false; 6072ad8b1aafSjsg 6073ad8b1aafSjsg r = pci_load_saved_state(pdev, adev->pci_state); 6074ad8b1aafSjsg 6075ad8b1aafSjsg if (!r) { 6076ad8b1aafSjsg pci_restore_state(pdev); 6077ad8b1aafSjsg } else { 6078ad8b1aafSjsg DRM_WARN("Failed to load PCI state, err:%d\n", r); 6079ad8b1aafSjsg return false; 6080ad8b1aafSjsg } 6081ad8b1aafSjsg 6082ad8b1aafSjsg return true; 6083ad8b1aafSjsg #endif 6084ad8b1aafSjsg } 6085ad8b1aafSjsg 60865ca02815Sjsg void amdgpu_device_flush_hdp(struct amdgpu_device *adev, 60875ca02815Sjsg struct amdgpu_ring *ring) 60885ca02815Sjsg { 60895ca02815Sjsg #ifdef CONFIG_X86_64 609016c07370Sjsg if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 60915ca02815Sjsg return; 60925ca02815Sjsg #endif 60935ca02815Sjsg if (adev->gmc.xgmi.connected_to_cpu) 60945ca02815Sjsg return; 6095ad8b1aafSjsg 60965ca02815Sjsg if (ring && ring->funcs->emit_hdp_flush) 60975ca02815Sjsg amdgpu_ring_emit_hdp_flush(ring); 60985ca02815Sjsg else 60995ca02815Sjsg amdgpu_asic_flush_hdp(adev, ring); 61005ca02815Sjsg } 61015ca02815Sjsg 61025ca02815Sjsg void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, 61035ca02815Sjsg struct amdgpu_ring *ring) 61045ca02815Sjsg { 61055ca02815Sjsg #ifdef CONFIG_X86_64 610616c07370Sjsg if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) 61075ca02815Sjsg return; 61085ca02815Sjsg #endif 61095ca02815Sjsg if (adev->gmc.xgmi.connected_to_cpu) 61105ca02815Sjsg return; 61115ca02815Sjsg 61125ca02815Sjsg amdgpu_asic_invalidate_hdp(adev, ring); 61135ca02815Sjsg } 61141bb76ff1Sjsg 61151bb76ff1Sjsg int amdgpu_in_reset(struct amdgpu_device *adev) 61161bb76ff1Sjsg { 61171bb76ff1Sjsg return atomic_read(&adev->reset_domain->in_gpu_reset); 61181bb76ff1Sjsg } 61191bb76ff1Sjsg 61201bb76ff1Sjsg /** 61211bb76ff1Sjsg * amdgpu_device_halt() - bring hardware to some kind of halt state 61221bb76ff1Sjsg * 61231bb76ff1Sjsg * @adev: amdgpu_device pointer 61241bb76ff1Sjsg * 61251bb76ff1Sjsg * Bring hardware to some kind of halt state so that no one can touch it 61261bb76ff1Sjsg * any more. It will help to maintain error context when error occurred. 61271bb76ff1Sjsg * Compare to a simple hang, the system will keep stable at least for SSH 61281bb76ff1Sjsg * access. Then it should be trivial to inspect the hardware state and 61291bb76ff1Sjsg * see what's going on. Implemented as following: 61301bb76ff1Sjsg * 61311bb76ff1Sjsg * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), 61321bb76ff1Sjsg * clears all CPU mappings to device, disallows remappings through page faults 61331bb76ff1Sjsg * 2. amdgpu_irq_disable_all() disables all interrupts 61341bb76ff1Sjsg * 3. amdgpu_fence_driver_hw_fini() signals all HW fences 61351bb76ff1Sjsg * 4. set adev->no_hw_access to avoid potential crashes after setp 5 61361bb76ff1Sjsg * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings 61371bb76ff1Sjsg * 6. pci_disable_device() and pci_wait_for_pending_transaction() 61381bb76ff1Sjsg * flush any in flight DMA operations 61391bb76ff1Sjsg */ 61401bb76ff1Sjsg void amdgpu_device_halt(struct amdgpu_device *adev) 61411bb76ff1Sjsg { 61421bb76ff1Sjsg struct pci_dev *pdev = adev->pdev; 61431bb76ff1Sjsg struct drm_device *ddev = adev_to_drm(adev); 61441bb76ff1Sjsg 6145f005ef32Sjsg amdgpu_xcp_dev_unplug(adev); 61461bb76ff1Sjsg drm_dev_unplug(ddev); 61471bb76ff1Sjsg 61481bb76ff1Sjsg amdgpu_irq_disable_all(adev); 61491bb76ff1Sjsg 61501bb76ff1Sjsg amdgpu_fence_driver_hw_fini(adev); 61511bb76ff1Sjsg 61521bb76ff1Sjsg adev->no_hw_access = true; 61531bb76ff1Sjsg 61541bb76ff1Sjsg amdgpu_device_unmap_mmio(adev); 61551bb76ff1Sjsg 61561bb76ff1Sjsg pci_disable_device(pdev); 61571bb76ff1Sjsg pci_wait_for_pending_transaction(pdev); 61581bb76ff1Sjsg } 61591bb76ff1Sjsg 61601bb76ff1Sjsg u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, 61611bb76ff1Sjsg u32 reg) 61621bb76ff1Sjsg { 61631bb76ff1Sjsg unsigned long flags, address, data; 61641bb76ff1Sjsg u32 r; 61651bb76ff1Sjsg 61661bb76ff1Sjsg address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 61671bb76ff1Sjsg data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 61681bb76ff1Sjsg 61691bb76ff1Sjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 61701bb76ff1Sjsg WREG32(address, reg * 4); 61711bb76ff1Sjsg (void)RREG32(address); 61721bb76ff1Sjsg r = RREG32(data); 61731bb76ff1Sjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 61741bb76ff1Sjsg return r; 61751bb76ff1Sjsg } 61761bb76ff1Sjsg 61771bb76ff1Sjsg void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, 61781bb76ff1Sjsg u32 reg, u32 v) 61791bb76ff1Sjsg { 61801bb76ff1Sjsg unsigned long flags, address, data; 61811bb76ff1Sjsg 61821bb76ff1Sjsg address = adev->nbio.funcs->get_pcie_port_index_offset(adev); 61831bb76ff1Sjsg data = adev->nbio.funcs->get_pcie_port_data_offset(adev); 61841bb76ff1Sjsg 61851bb76ff1Sjsg spin_lock_irqsave(&adev->pcie_idx_lock, flags); 61861bb76ff1Sjsg WREG32(address, reg * 4); 61871bb76ff1Sjsg (void)RREG32(address); 61881bb76ff1Sjsg WREG32(data, v); 61891bb76ff1Sjsg (void)RREG32(data); 61901bb76ff1Sjsg spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); 61911bb76ff1Sjsg } 61921bb76ff1Sjsg 61931bb76ff1Sjsg /** 61941bb76ff1Sjsg * amdgpu_device_switch_gang - switch to a new gang 61951bb76ff1Sjsg * @adev: amdgpu_device pointer 61961bb76ff1Sjsg * @gang: the gang to switch to 61971bb76ff1Sjsg * 61981bb76ff1Sjsg * Try to switch to a new gang. 61991bb76ff1Sjsg * Returns: NULL if we switched to the new gang or a reference to the current 62001bb76ff1Sjsg * gang leader. 62011bb76ff1Sjsg */ 62021bb76ff1Sjsg struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, 62031bb76ff1Sjsg struct dma_fence *gang) 62041bb76ff1Sjsg { 62051bb76ff1Sjsg struct dma_fence *old = NULL; 62061bb76ff1Sjsg 62071bb76ff1Sjsg do { 62081bb76ff1Sjsg dma_fence_put(old); 62091bb76ff1Sjsg rcu_read_lock(); 62101bb76ff1Sjsg old = dma_fence_get_rcu_safe(&adev->gang_submit); 62111bb76ff1Sjsg rcu_read_unlock(); 62121bb76ff1Sjsg 62131bb76ff1Sjsg if (old == gang) 62141bb76ff1Sjsg break; 62151bb76ff1Sjsg 62161bb76ff1Sjsg if (!dma_fence_is_signaled(old)) 62171bb76ff1Sjsg return old; 62181bb76ff1Sjsg 62191bb76ff1Sjsg } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, 62201bb76ff1Sjsg old, gang) != old); 62211bb76ff1Sjsg 62221bb76ff1Sjsg dma_fence_put(old); 62231bb76ff1Sjsg return NULL; 62241bb76ff1Sjsg } 62251bb76ff1Sjsg 62261bb76ff1Sjsg bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) 62271bb76ff1Sjsg { 62281bb76ff1Sjsg switch (adev->asic_type) { 62291bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 62301bb76ff1Sjsg case CHIP_HAINAN: 62311bb76ff1Sjsg #endif 62321bb76ff1Sjsg case CHIP_TOPAZ: 62331bb76ff1Sjsg /* chips with no display hardware */ 62341bb76ff1Sjsg return false; 62351bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_SI 62361bb76ff1Sjsg case CHIP_TAHITI: 62371bb76ff1Sjsg case CHIP_PITCAIRN: 62381bb76ff1Sjsg case CHIP_VERDE: 62391bb76ff1Sjsg case CHIP_OLAND: 62401bb76ff1Sjsg #endif 62411bb76ff1Sjsg #ifdef CONFIG_DRM_AMDGPU_CIK 62421bb76ff1Sjsg case CHIP_BONAIRE: 62431bb76ff1Sjsg case CHIP_HAWAII: 62441bb76ff1Sjsg case CHIP_KAVERI: 62451bb76ff1Sjsg case CHIP_KABINI: 62461bb76ff1Sjsg case CHIP_MULLINS: 62471bb76ff1Sjsg #endif 62481bb76ff1Sjsg case CHIP_TONGA: 62491bb76ff1Sjsg case CHIP_FIJI: 62501bb76ff1Sjsg case CHIP_POLARIS10: 62511bb76ff1Sjsg case CHIP_POLARIS11: 62521bb76ff1Sjsg case CHIP_POLARIS12: 62531bb76ff1Sjsg case CHIP_VEGAM: 62541bb76ff1Sjsg case CHIP_CARRIZO: 62551bb76ff1Sjsg case CHIP_STONEY: 62561bb76ff1Sjsg /* chips with display hardware */ 62571bb76ff1Sjsg return true; 62581bb76ff1Sjsg default: 62591bb76ff1Sjsg /* IP discovery */ 62601bb76ff1Sjsg if (!adev->ip_versions[DCE_HWIP][0] || 62611bb76ff1Sjsg (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) 62621bb76ff1Sjsg return false; 62631bb76ff1Sjsg return true; 62641bb76ff1Sjsg } 62651bb76ff1Sjsg } 6266f005ef32Sjsg 6267f005ef32Sjsg uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, 6268f005ef32Sjsg uint32_t inst, uint32_t reg_addr, char reg_name[], 6269f005ef32Sjsg uint32_t expected_value, uint32_t mask) 6270f005ef32Sjsg { 6271f005ef32Sjsg uint32_t ret = 0; 6272f005ef32Sjsg uint32_t old_ = 0; 6273f005ef32Sjsg uint32_t tmp_ = RREG32(reg_addr); 6274f005ef32Sjsg uint32_t loop = adev->usec_timeout; 6275f005ef32Sjsg 6276f005ef32Sjsg while ((tmp_ & (mask)) != (expected_value)) { 6277f005ef32Sjsg if (old_ != tmp_) { 6278f005ef32Sjsg loop = adev->usec_timeout; 6279f005ef32Sjsg old_ = tmp_; 6280f005ef32Sjsg } else 6281f005ef32Sjsg udelay(1); 6282f005ef32Sjsg tmp_ = RREG32(reg_addr); 6283f005ef32Sjsg loop--; 6284f005ef32Sjsg if (!loop) { 6285f005ef32Sjsg DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", 6286f005ef32Sjsg inst, reg_name, (uint32_t)expected_value, 6287f005ef32Sjsg (uint32_t)(tmp_ & (mask))); 6288f005ef32Sjsg ret = -ETIMEDOUT; 6289f005ef32Sjsg break; 6290f005ef32Sjsg } 6291f005ef32Sjsg } 6292f005ef32Sjsg return ret; 6293f005ef32Sjsg } 6294