1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2014 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * Copyright 2008 Red Hat Inc. 4fb4d8502Sjsg * Copyright 2009 Jerome Glisse. 5fb4d8502Sjsg * 6fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 7fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 8fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 9fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 11fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 12fb4d8502Sjsg * 13fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 14fb4d8502Sjsg * all copies or substantial portions of the Software. 15fb4d8502Sjsg * 16fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 23fb4d8502Sjsg * 24fb4d8502Sjsg */ 25c349dbc7Sjsg 261bb76ff1Sjsg #include <linux/firmware.h> 27fb4d8502Sjsg #include "amdgpu.h" 28fb4d8502Sjsg #include "amdgpu_gfx.h" 29c349dbc7Sjsg #include "amdgpu_rlc.h" 30c349dbc7Sjsg #include "amdgpu_ras.h" 31f005ef32Sjsg #include "amdgpu_xcp.h" 32c349dbc7Sjsg 33c349dbc7Sjsg /* delay 0.1 second to enable gfx off feature */ 34c349dbc7Sjsg #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) 35fb4d8502Sjsg 365ca02815Sjsg #define GFX_OFF_NO_DELAY 0 375ca02815Sjsg 38fb4d8502Sjsg /* 39c349dbc7Sjsg * GPU GFX IP block helpers function. 40fb4d8502Sjsg */ 41c349dbc7Sjsg 42c349dbc7Sjsg int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, 43c349dbc7Sjsg int pipe, int queue) 44c349dbc7Sjsg { 45c349dbc7Sjsg int bit = 0; 46c349dbc7Sjsg 47c349dbc7Sjsg bit += mec * adev->gfx.mec.num_pipe_per_mec 48c349dbc7Sjsg * adev->gfx.mec.num_queue_per_pipe; 49c349dbc7Sjsg bit += pipe * adev->gfx.mec.num_queue_per_pipe; 50c349dbc7Sjsg bit += queue; 51c349dbc7Sjsg 52c349dbc7Sjsg return bit; 53c349dbc7Sjsg } 54c349dbc7Sjsg 55ad8b1aafSjsg void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, 56c349dbc7Sjsg int *mec, int *pipe, int *queue) 57c349dbc7Sjsg { 58c349dbc7Sjsg *queue = bit % adev->gfx.mec.num_queue_per_pipe; 59c349dbc7Sjsg *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) 60c349dbc7Sjsg % adev->gfx.mec.num_pipe_per_mec; 61c349dbc7Sjsg *mec = (bit / adev->gfx.mec.num_queue_per_pipe) 62c349dbc7Sjsg / adev->gfx.mec.num_pipe_per_mec; 63c349dbc7Sjsg 64c349dbc7Sjsg } 65c349dbc7Sjsg 66c349dbc7Sjsg bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, 67f005ef32Sjsg int xcc_id, int mec, int pipe, int queue) 68c349dbc7Sjsg { 69c349dbc7Sjsg return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), 70f005ef32Sjsg adev->gfx.mec_bitmap[xcc_id].queue_bitmap); 71c349dbc7Sjsg } 72c349dbc7Sjsg 73c349dbc7Sjsg int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, 74c349dbc7Sjsg int me, int pipe, int queue) 75c349dbc7Sjsg { 76c349dbc7Sjsg int bit = 0; 77c349dbc7Sjsg 78c349dbc7Sjsg bit += me * adev->gfx.me.num_pipe_per_me 79c349dbc7Sjsg * adev->gfx.me.num_queue_per_pipe; 80c349dbc7Sjsg bit += pipe * adev->gfx.me.num_queue_per_pipe; 81c349dbc7Sjsg bit += queue; 82c349dbc7Sjsg 83c349dbc7Sjsg return bit; 84c349dbc7Sjsg } 85c349dbc7Sjsg 86c349dbc7Sjsg void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, 87c349dbc7Sjsg int *me, int *pipe, int *queue) 88c349dbc7Sjsg { 89c349dbc7Sjsg *queue = bit % adev->gfx.me.num_queue_per_pipe; 90c349dbc7Sjsg *pipe = (bit / adev->gfx.me.num_queue_per_pipe) 91c349dbc7Sjsg % adev->gfx.me.num_pipe_per_me; 92c349dbc7Sjsg *me = (bit / adev->gfx.me.num_queue_per_pipe) 93c349dbc7Sjsg / adev->gfx.me.num_pipe_per_me; 94c349dbc7Sjsg } 95c349dbc7Sjsg 96c349dbc7Sjsg bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, 97c349dbc7Sjsg int me, int pipe, int queue) 98c349dbc7Sjsg { 99c349dbc7Sjsg return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), 100c349dbc7Sjsg adev->gfx.me.queue_bitmap); 101c349dbc7Sjsg } 102c349dbc7Sjsg 103fb4d8502Sjsg /** 104fb4d8502Sjsg * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter 105fb4d8502Sjsg * 106fb4d8502Sjsg * @mask: array in which the per-shader array disable masks will be stored 107fb4d8502Sjsg * @max_se: number of SEs 108fb4d8502Sjsg * @max_sh: number of SHs 109fb4d8502Sjsg * 110fb4d8502Sjsg * The bitmask of CUs to be disabled in the shader array determined by se and 111fb4d8502Sjsg * sh is stored in mask[se * max_sh + sh]. 112fb4d8502Sjsg */ 113f005ef32Sjsg void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh) 114fb4d8502Sjsg { 115f005ef32Sjsg unsigned int se, sh, cu; 116fb4d8502Sjsg const char *p; 117fb4d8502Sjsg 118fb4d8502Sjsg memset(mask, 0, sizeof(*mask) * max_se * max_sh); 119fb4d8502Sjsg 120fb4d8502Sjsg if (!amdgpu_disable_cu || !*amdgpu_disable_cu) 121fb4d8502Sjsg return; 122fb4d8502Sjsg 123fb4d8502Sjsg #ifdef notyet 124fb4d8502Sjsg p = amdgpu_disable_cu; 125fb4d8502Sjsg for (;;) { 126fb4d8502Sjsg char *next; 127fb4d8502Sjsg int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); 128f005ef32Sjsg 129fb4d8502Sjsg if (ret < 3) { 130fb4d8502Sjsg DRM_ERROR("amdgpu: could not parse disable_cu\n"); 131fb4d8502Sjsg return; 132fb4d8502Sjsg } 133fb4d8502Sjsg 134fb4d8502Sjsg if (se < max_se && sh < max_sh && cu < 16) { 135fb4d8502Sjsg DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); 136fb4d8502Sjsg mask[se * max_sh + sh] |= 1u << cu; 137fb4d8502Sjsg } else { 138fb4d8502Sjsg DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", 139fb4d8502Sjsg se, sh, cu); 140fb4d8502Sjsg } 141fb4d8502Sjsg 142fb4d8502Sjsg next = strchr(p, ','); 143fb4d8502Sjsg if (!next) 144fb4d8502Sjsg break; 145fb4d8502Sjsg p = next + 1; 146fb4d8502Sjsg } 147fb4d8502Sjsg #endif 148fb4d8502Sjsg } 149fb4d8502Sjsg 1501bb76ff1Sjsg static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) 1511bb76ff1Sjsg { 1521bb76ff1Sjsg return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; 1531bb76ff1Sjsg } 1541bb76ff1Sjsg 1551bb76ff1Sjsg static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) 156fb4d8502Sjsg { 157fb4d8502Sjsg if (amdgpu_compute_multipipe != -1) { 158fb4d8502Sjsg DRM_INFO("amdgpu: forcing compute pipe policy %d\n", 159fb4d8502Sjsg amdgpu_compute_multipipe); 160fb4d8502Sjsg return amdgpu_compute_multipipe == 1; 161fb4d8502Sjsg } 162fb4d8502Sjsg 1638c68227eSjsg if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) 1648c68227eSjsg return true; 1658c68227eSjsg 166fb4d8502Sjsg /* FIXME: spreading the queues across pipes causes perf regressions 167fb4d8502Sjsg * on POLARIS11 compute workloads */ 168fb4d8502Sjsg if (adev->asic_type == CHIP_POLARIS11) 169fb4d8502Sjsg return false; 170fb4d8502Sjsg 171fb4d8502Sjsg return adev->gfx.mec.num_mec > 1; 172fb4d8502Sjsg } 173fb4d8502Sjsg 1741bb76ff1Sjsg bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, 1751bb76ff1Sjsg struct amdgpu_ring *ring) 1761bb76ff1Sjsg { 1771bb76ff1Sjsg int queue = ring->queue; 1781bb76ff1Sjsg int pipe = ring->pipe; 1791bb76ff1Sjsg 1801bb76ff1Sjsg /* Policy: use pipe1 queue0 as high priority graphics queue if we 1811bb76ff1Sjsg * have more than one gfx pipe. 1821bb76ff1Sjsg */ 1831bb76ff1Sjsg if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && 1841bb76ff1Sjsg adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { 1851bb76ff1Sjsg int me = ring->me; 1861bb76ff1Sjsg int bit; 1871bb76ff1Sjsg 1881bb76ff1Sjsg bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); 1891bb76ff1Sjsg if (ring == &adev->gfx.gfx_ring[bit]) 1901bb76ff1Sjsg return true; 1911bb76ff1Sjsg } 1921bb76ff1Sjsg 1931bb76ff1Sjsg return false; 1941bb76ff1Sjsg } 1951bb76ff1Sjsg 196c349dbc7Sjsg bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, 1975ca02815Sjsg struct amdgpu_ring *ring) 198c349dbc7Sjsg { 1995ca02815Sjsg /* Policy: use 1st queue as high priority compute queue if we 2005ca02815Sjsg * have more than one compute queue. 2015ca02815Sjsg */ 2025ca02815Sjsg if (adev->gfx.num_compute_rings > 1 && 2035ca02815Sjsg ring == &adev->gfx.compute_ring[0]) 2045ca02815Sjsg return true; 205ad8b1aafSjsg 2065ca02815Sjsg return false; 207c349dbc7Sjsg } 208c349dbc7Sjsg 209fb4d8502Sjsg void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) 210fb4d8502Sjsg { 211f005ef32Sjsg int i, j, queue, pipe; 2121bb76ff1Sjsg bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); 213ad8b1aafSjsg int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * 214ad8b1aafSjsg adev->gfx.mec.num_queue_per_pipe, 215ad8b1aafSjsg adev->gfx.num_compute_rings); 216f005ef32Sjsg int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 217fb4d8502Sjsg 218fb4d8502Sjsg if (multipipe_policy) { 219f005ef32Sjsg /* policy: make queues evenly cross all pipes on MEC1 only 220f005ef32Sjsg * for multiple xcc, just use the original policy for simplicity */ 221f005ef32Sjsg for (j = 0; j < num_xcc; j++) { 222ad8b1aafSjsg for (i = 0; i < max_queues_per_mec; i++) { 223ad8b1aafSjsg pipe = i % adev->gfx.mec.num_pipe_per_mec; 224ad8b1aafSjsg queue = (i / adev->gfx.mec.num_pipe_per_mec) % 225ad8b1aafSjsg adev->gfx.mec.num_queue_per_pipe; 226ad8b1aafSjsg 227ad8b1aafSjsg set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, 228f005ef32Sjsg adev->gfx.mec_bitmap[j].queue_bitmap); 229f005ef32Sjsg } 230ad8b1aafSjsg } 231fb4d8502Sjsg } else { 232ad8b1aafSjsg /* policy: amdgpu owns all queues in the given pipe */ 233f005ef32Sjsg for (j = 0; j < num_xcc; j++) { 234ad8b1aafSjsg for (i = 0; i < max_queues_per_mec; ++i) 235f005ef32Sjsg set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); 236f005ef32Sjsg } 237fb4d8502Sjsg } 238fb4d8502Sjsg 239f005ef32Sjsg for (j = 0; j < num_xcc; j++) { 240f005ef32Sjsg dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", 241f005ef32Sjsg bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); 242f005ef32Sjsg } 243fb4d8502Sjsg } 244fb4d8502Sjsg 245c349dbc7Sjsg void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) 246c349dbc7Sjsg { 2471bb76ff1Sjsg int i, queue, pipe; 2481bb76ff1Sjsg bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); 2491bb76ff1Sjsg int max_queues_per_me = adev->gfx.me.num_pipe_per_me * 2501bb76ff1Sjsg adev->gfx.me.num_queue_per_pipe; 251c349dbc7Sjsg 2521bb76ff1Sjsg if (multipipe_policy) { 253c349dbc7Sjsg /* policy: amdgpu owns the first queue per pipe at this stage 254c349dbc7Sjsg * will extend to mulitple queues per pipe later */ 2551bb76ff1Sjsg for (i = 0; i < max_queues_per_me; i++) { 2561bb76ff1Sjsg pipe = i % adev->gfx.me.num_pipe_per_me; 2571bb76ff1Sjsg queue = (i / adev->gfx.me.num_pipe_per_me) % 2581bb76ff1Sjsg adev->gfx.me.num_queue_per_pipe; 2591bb76ff1Sjsg 2601bb76ff1Sjsg set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, 2611bb76ff1Sjsg adev->gfx.me.queue_bitmap); 2621bb76ff1Sjsg } 2631bb76ff1Sjsg } else { 2641bb76ff1Sjsg for (i = 0; i < max_queues_per_me; ++i) 265c349dbc7Sjsg set_bit(i, adev->gfx.me.queue_bitmap); 266c349dbc7Sjsg } 267c349dbc7Sjsg 268c349dbc7Sjsg /* update the number of active graphics rings */ 269c349dbc7Sjsg adev->gfx.num_gfx_rings = 270c349dbc7Sjsg bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 271c349dbc7Sjsg } 272c349dbc7Sjsg 273fb4d8502Sjsg static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, 274f005ef32Sjsg struct amdgpu_ring *ring, int xcc_id) 275fb4d8502Sjsg { 276fb4d8502Sjsg int queue_bit; 277fb4d8502Sjsg int mec, pipe, queue; 278fb4d8502Sjsg 279fb4d8502Sjsg queue_bit = adev->gfx.mec.num_mec 280fb4d8502Sjsg * adev->gfx.mec.num_pipe_per_mec 281fb4d8502Sjsg * adev->gfx.mec.num_queue_per_pipe; 282fb4d8502Sjsg 283ffd4d835Sjsg while (--queue_bit >= 0) { 284f005ef32Sjsg if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) 285fb4d8502Sjsg continue; 286fb4d8502Sjsg 287ad8b1aafSjsg amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); 288fb4d8502Sjsg 289fb4d8502Sjsg /* 290fb4d8502Sjsg * 1. Using pipes 2/3 from MEC 2 seems cause problems. 291fb4d8502Sjsg * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN 292fb4d8502Sjsg * only can be issued on queue 0. 293fb4d8502Sjsg */ 294fb4d8502Sjsg if ((mec == 1 && pipe > 1) || queue != 0) 295fb4d8502Sjsg continue; 296fb4d8502Sjsg 297fb4d8502Sjsg ring->me = mec + 1; 298fb4d8502Sjsg ring->pipe = pipe; 299fb4d8502Sjsg ring->queue = queue; 300fb4d8502Sjsg 301fb4d8502Sjsg return 0; 302fb4d8502Sjsg } 303fb4d8502Sjsg 304fb4d8502Sjsg dev_err(adev->dev, "Failed to find a queue for KIQ\n"); 305fb4d8502Sjsg return -EINVAL; 306fb4d8502Sjsg } 307fb4d8502Sjsg 308fb4d8502Sjsg int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, 309fb4d8502Sjsg struct amdgpu_ring *ring, 310f005ef32Sjsg struct amdgpu_irq_src *irq, int xcc_id) 311fb4d8502Sjsg { 312f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 313fb4d8502Sjsg int r = 0; 314fb4d8502Sjsg 315fb4d8502Sjsg mtx_init(&kiq->ring_lock, IPL_TTY); 316fb4d8502Sjsg 317fb4d8502Sjsg ring->adev = NULL; 318fb4d8502Sjsg ring->ring_obj = NULL; 319fb4d8502Sjsg ring->use_doorbell = true; 320f005ef32Sjsg ring->xcc_id = xcc_id; 321f005ef32Sjsg ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 322f005ef32Sjsg ring->doorbell_index = 323f005ef32Sjsg (adev->doorbell_index.kiq + 324f005ef32Sjsg xcc_id * adev->doorbell_index.xcc_doorbell_range) 325f005ef32Sjsg << 1; 326fb4d8502Sjsg 327f005ef32Sjsg r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id); 328fb4d8502Sjsg if (r) 329fb4d8502Sjsg return r; 330fb4d8502Sjsg 331fb4d8502Sjsg ring->eop_gpu_addr = kiq->eop_gpu_addr; 332ad8b1aafSjsg ring->no_scheduler = true; 333f005ef32Sjsg snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue); 3345ca02815Sjsg r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, 3355ca02815Sjsg AMDGPU_RING_PRIO_DEFAULT, NULL); 336fb4d8502Sjsg if (r) 337fb4d8502Sjsg dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 338fb4d8502Sjsg 339fb4d8502Sjsg return r; 340fb4d8502Sjsg } 341fb4d8502Sjsg 342c349dbc7Sjsg void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) 343fb4d8502Sjsg { 344fb4d8502Sjsg amdgpu_ring_fini(ring); 345fb4d8502Sjsg } 346fb4d8502Sjsg 347f005ef32Sjsg void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id) 348fb4d8502Sjsg { 349f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 350fb4d8502Sjsg 351fb4d8502Sjsg amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 352fb4d8502Sjsg } 353fb4d8502Sjsg 354fb4d8502Sjsg int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, 355f005ef32Sjsg unsigned int hpd_size, int xcc_id) 356fb4d8502Sjsg { 357fb4d8502Sjsg int r; 358fb4d8502Sjsg u32 *hpd; 359f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 360fb4d8502Sjsg 361fb4d8502Sjsg r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, 362fb4d8502Sjsg AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 363fb4d8502Sjsg &kiq->eop_gpu_addr, (void **)&hpd); 364fb4d8502Sjsg if (r) { 365fb4d8502Sjsg dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 366fb4d8502Sjsg return r; 367fb4d8502Sjsg } 368fb4d8502Sjsg 369fb4d8502Sjsg memset(hpd, 0, hpd_size); 370fb4d8502Sjsg 371fb4d8502Sjsg r = amdgpu_bo_reserve(kiq->eop_obj, true); 372fb4d8502Sjsg if (unlikely(r != 0)) 373fb4d8502Sjsg dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 374fb4d8502Sjsg amdgpu_bo_kunmap(kiq->eop_obj); 375fb4d8502Sjsg amdgpu_bo_unreserve(kiq->eop_obj); 376fb4d8502Sjsg 377fb4d8502Sjsg return 0; 378fb4d8502Sjsg } 379fb4d8502Sjsg 380c349dbc7Sjsg /* create MQD for each compute/gfx queue */ 381c349dbc7Sjsg int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, 382f005ef32Sjsg unsigned int mqd_size, int xcc_id) 383fb4d8502Sjsg { 384f005ef32Sjsg int r, i, j; 385f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 386f005ef32Sjsg struct amdgpu_ring *ring = &kiq->ring; 387f005ef32Sjsg u32 domain = AMDGPU_GEM_DOMAIN_GTT; 388f005ef32Sjsg 389f005ef32Sjsg #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) 390f005ef32Sjsg /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ 391f005ef32Sjsg if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) 392f005ef32Sjsg domain |= AMDGPU_GEM_DOMAIN_VRAM; 393f005ef32Sjsg #endif 394fb4d8502Sjsg 395fb4d8502Sjsg /* create MQD for KIQ */ 3961bb76ff1Sjsg if (!adev->enable_mes_kiq && !ring->mqd_obj) { 397fb4d8502Sjsg /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must 398fb4d8502Sjsg * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD 399fb4d8502Sjsg * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for 400fb4d8502Sjsg * KIQ MQD no matter SRIOV or Bare-metal 401fb4d8502Sjsg */ 402fb4d8502Sjsg r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 403f005ef32Sjsg AMDGPU_GEM_DOMAIN_VRAM | 404f005ef32Sjsg AMDGPU_GEM_DOMAIN_GTT, 405f005ef32Sjsg &ring->mqd_obj, 406f005ef32Sjsg &ring->mqd_gpu_addr, 407f005ef32Sjsg &ring->mqd_ptr); 408fb4d8502Sjsg if (r) { 409fb4d8502Sjsg dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 410fb4d8502Sjsg return r; 411fb4d8502Sjsg } 412fb4d8502Sjsg 413fb4d8502Sjsg /* prepare MQD backup */ 414f005ef32Sjsg kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL); 415f005ef32Sjsg if (!kiq->mqd_backup) { 416f005ef32Sjsg dev_warn(adev->dev, 417f005ef32Sjsg "no memory to create MQD backup for ring %s\n", ring->name); 418f005ef32Sjsg return -ENOMEM; 419f005ef32Sjsg } 420fb4d8502Sjsg } 421fb4d8502Sjsg 422c349dbc7Sjsg if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 423c349dbc7Sjsg /* create MQD for each KGQ */ 424c349dbc7Sjsg for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 425c349dbc7Sjsg ring = &adev->gfx.gfx_ring[i]; 426c349dbc7Sjsg if (!ring->mqd_obj) { 427c349dbc7Sjsg r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 428f005ef32Sjsg domain, &ring->mqd_obj, 429c349dbc7Sjsg &ring->mqd_gpu_addr, &ring->mqd_ptr); 430c349dbc7Sjsg if (r) { 431c349dbc7Sjsg dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 432c349dbc7Sjsg return r; 433c349dbc7Sjsg } 434c349dbc7Sjsg 435f005ef32Sjsg ring->mqd_size = mqd_size; 436c349dbc7Sjsg /* prepare MQD backup */ 437c349dbc7Sjsg adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); 438f005ef32Sjsg if (!adev->gfx.me.mqd_backup[i]) { 439c349dbc7Sjsg dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 440f005ef32Sjsg return -ENOMEM; 441f005ef32Sjsg } 442c349dbc7Sjsg } 443c349dbc7Sjsg } 444c349dbc7Sjsg } 445c349dbc7Sjsg 446fb4d8502Sjsg /* create MQD for each KCQ */ 447fb4d8502Sjsg for (i = 0; i < adev->gfx.num_compute_rings; i++) { 448f005ef32Sjsg j = i + xcc_id * adev->gfx.num_compute_rings; 449f005ef32Sjsg ring = &adev->gfx.compute_ring[j]; 450fb4d8502Sjsg if (!ring->mqd_obj) { 451fb4d8502Sjsg r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, 452f005ef32Sjsg domain, &ring->mqd_obj, 453fb4d8502Sjsg &ring->mqd_gpu_addr, &ring->mqd_ptr); 454fb4d8502Sjsg if (r) { 455c349dbc7Sjsg dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r); 456fb4d8502Sjsg return r; 457fb4d8502Sjsg } 458fb4d8502Sjsg 459f005ef32Sjsg ring->mqd_size = mqd_size; 460fb4d8502Sjsg /* prepare MQD backup */ 461f005ef32Sjsg adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL); 462f005ef32Sjsg if (!adev->gfx.mec.mqd_backup[j]) { 463fb4d8502Sjsg dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 464f005ef32Sjsg return -ENOMEM; 465f005ef32Sjsg } 466fb4d8502Sjsg } 467fb4d8502Sjsg } 468fb4d8502Sjsg 469fb4d8502Sjsg return 0; 470fb4d8502Sjsg } 471fb4d8502Sjsg 472f005ef32Sjsg void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id) 473fb4d8502Sjsg { 474fb4d8502Sjsg struct amdgpu_ring *ring = NULL; 475f005ef32Sjsg int i, j; 476f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 477fb4d8502Sjsg 478c349dbc7Sjsg if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { 479c349dbc7Sjsg for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 480c349dbc7Sjsg ring = &adev->gfx.gfx_ring[i]; 481c349dbc7Sjsg kfree(adev->gfx.me.mqd_backup[i]); 482c349dbc7Sjsg amdgpu_bo_free_kernel(&ring->mqd_obj, 483c349dbc7Sjsg &ring->mqd_gpu_addr, 484c349dbc7Sjsg &ring->mqd_ptr); 485c349dbc7Sjsg } 486c349dbc7Sjsg } 487c349dbc7Sjsg 488fb4d8502Sjsg for (i = 0; i < adev->gfx.num_compute_rings; i++) { 489f005ef32Sjsg j = i + xcc_id * adev->gfx.num_compute_rings; 490f005ef32Sjsg ring = &adev->gfx.compute_ring[j]; 491f005ef32Sjsg kfree(adev->gfx.mec.mqd_backup[j]); 492fb4d8502Sjsg amdgpu_bo_free_kernel(&ring->mqd_obj, 493fb4d8502Sjsg &ring->mqd_gpu_addr, 494fb4d8502Sjsg &ring->mqd_ptr); 495fb4d8502Sjsg } 496fb4d8502Sjsg 497f005ef32Sjsg ring = &kiq->ring; 498f005ef32Sjsg kfree(kiq->mqd_backup); 499fb4d8502Sjsg amdgpu_bo_free_kernel(&ring->mqd_obj, 500fb4d8502Sjsg &ring->mqd_gpu_addr, 501fb4d8502Sjsg &ring->mqd_ptr); 502fb4d8502Sjsg } 503c349dbc7Sjsg 504f005ef32Sjsg int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) 505c349dbc7Sjsg { 506f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 507c349dbc7Sjsg struct amdgpu_ring *kiq_ring = &kiq->ring; 5081bb76ff1Sjsg int i, r = 0; 509f005ef32Sjsg int j; 510c349dbc7Sjsg 511c349dbc7Sjsg if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 512c349dbc7Sjsg return -EINVAL; 513c349dbc7Sjsg 514f005ef32Sjsg spin_lock(&kiq->ring_lock); 515c349dbc7Sjsg if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 5165ca02815Sjsg adev->gfx.num_compute_rings)) { 517f005ef32Sjsg spin_unlock(&kiq->ring_lock); 518c349dbc7Sjsg return -ENOMEM; 5195ca02815Sjsg } 520c349dbc7Sjsg 521f005ef32Sjsg for (i = 0; i < adev->gfx.num_compute_rings; i++) { 522f005ef32Sjsg j = i + xcc_id * adev->gfx.num_compute_rings; 523f005ef32Sjsg kiq->pmf->kiq_unmap_queues(kiq_ring, 524f005ef32Sjsg &adev->gfx.compute_ring[j], 525c349dbc7Sjsg RESET_QUEUES, 0, 0); 526f005ef32Sjsg } 5271bb76ff1Sjsg 528f005ef32Sjsg if (kiq_ring->sched.ready && !adev->job_hang) 5295ca02815Sjsg r = amdgpu_ring_test_helper(kiq_ring); 530f005ef32Sjsg spin_unlock(&kiq->ring_lock); 531f005ef32Sjsg 532f005ef32Sjsg return r; 533f005ef32Sjsg } 534f005ef32Sjsg 535f005ef32Sjsg int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) 536f005ef32Sjsg { 537f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 538f005ef32Sjsg struct amdgpu_ring *kiq_ring = &kiq->ring; 539f005ef32Sjsg int i, r = 0; 540f005ef32Sjsg int j; 541f005ef32Sjsg 542f005ef32Sjsg if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 543f005ef32Sjsg return -EINVAL; 544f005ef32Sjsg 545f005ef32Sjsg spin_lock(&kiq->ring_lock); 546f005ef32Sjsg if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { 547f005ef32Sjsg if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * 548f005ef32Sjsg adev->gfx.num_gfx_rings)) { 549f005ef32Sjsg spin_unlock(&kiq->ring_lock); 550f005ef32Sjsg return -ENOMEM; 551f005ef32Sjsg } 552f005ef32Sjsg 553f005ef32Sjsg for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 554f005ef32Sjsg j = i + xcc_id * adev->gfx.num_gfx_rings; 555f005ef32Sjsg kiq->pmf->kiq_unmap_queues(kiq_ring, 556f005ef32Sjsg &adev->gfx.gfx_ring[j], 557f005ef32Sjsg PREEMPT_QUEUES, 0, 0); 558f005ef32Sjsg } 559f005ef32Sjsg } 560f005ef32Sjsg 561f005ef32Sjsg if (adev->gfx.kiq[0].ring.sched.ready && !adev->job_hang) 562f005ef32Sjsg r = amdgpu_ring_test_helper(kiq_ring); 563f005ef32Sjsg spin_unlock(&kiq->ring_lock); 564c349dbc7Sjsg 5655ca02815Sjsg return r; 566c349dbc7Sjsg } 567c349dbc7Sjsg 568ad8b1aafSjsg int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, 569ad8b1aafSjsg int queue_bit) 570ad8b1aafSjsg { 571ad8b1aafSjsg int mec, pipe, queue; 572ad8b1aafSjsg int set_resource_bit = 0; 573ad8b1aafSjsg 574ad8b1aafSjsg amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); 575ad8b1aafSjsg 576ad8b1aafSjsg set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; 577ad8b1aafSjsg 578ad8b1aafSjsg return set_resource_bit; 579ad8b1aafSjsg } 580ad8b1aafSjsg 581f005ef32Sjsg int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) 582c349dbc7Sjsg { 583f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 584f005ef32Sjsg struct amdgpu_ring *kiq_ring = &kiq->ring; 585c349dbc7Sjsg uint64_t queue_mask = 0; 586f005ef32Sjsg int r, i, j; 587c349dbc7Sjsg 588c349dbc7Sjsg if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) 589c349dbc7Sjsg return -EINVAL; 590c349dbc7Sjsg 591c349dbc7Sjsg for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 592f005ef32Sjsg if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap)) 593c349dbc7Sjsg continue; 594c349dbc7Sjsg 595c349dbc7Sjsg /* This situation may be hit in the future if a new HW 596c349dbc7Sjsg * generation exposes more than 64 queues. If so, the 597c349dbc7Sjsg * definition of queue_mask needs updating */ 598c349dbc7Sjsg if (WARN_ON(i > (sizeof(queue_mask)*8))) { 599c349dbc7Sjsg DRM_ERROR("Invalid KCQ enabled: %d\n", i); 600c349dbc7Sjsg break; 601c349dbc7Sjsg } 602c349dbc7Sjsg 603ad8b1aafSjsg queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); 604c349dbc7Sjsg } 605c349dbc7Sjsg 606c349dbc7Sjsg DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, 607c349dbc7Sjsg kiq_ring->queue); 608f005ef32Sjsg amdgpu_device_flush_hdp(adev, NULL); 609f005ef32Sjsg 610f005ef32Sjsg spin_lock(&kiq->ring_lock); 611c349dbc7Sjsg r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 612c349dbc7Sjsg adev->gfx.num_compute_rings + 613c349dbc7Sjsg kiq->pmf->set_resources_size); 614c349dbc7Sjsg if (r) { 615c349dbc7Sjsg DRM_ERROR("Failed to lock KIQ (%d).\n", r); 616f005ef32Sjsg spin_unlock(&kiq->ring_lock); 617c349dbc7Sjsg return r; 618c349dbc7Sjsg } 619c349dbc7Sjsg 6201bb76ff1Sjsg if (adev->enable_mes) 6211bb76ff1Sjsg queue_mask = ~0ULL; 6221bb76ff1Sjsg 623c349dbc7Sjsg kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); 624f005ef32Sjsg for (i = 0; i < adev->gfx.num_compute_rings; i++) { 625f005ef32Sjsg j = i + xcc_id * adev->gfx.num_compute_rings; 626f005ef32Sjsg kiq->pmf->kiq_map_queues(kiq_ring, 627f005ef32Sjsg &adev->gfx.compute_ring[j]); 628f005ef32Sjsg } 629c349dbc7Sjsg 630c349dbc7Sjsg r = amdgpu_ring_test_helper(kiq_ring); 631f005ef32Sjsg spin_unlock(&kiq->ring_lock); 632f005ef32Sjsg if (r) 633f005ef32Sjsg DRM_ERROR("KCQ enable failed\n"); 634f005ef32Sjsg 635f005ef32Sjsg return r; 636f005ef32Sjsg } 637f005ef32Sjsg 638f005ef32Sjsg int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) 639f005ef32Sjsg { 640f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; 641f005ef32Sjsg struct amdgpu_ring *kiq_ring = &kiq->ring; 642f005ef32Sjsg int r, i, j; 643f005ef32Sjsg 644f005ef32Sjsg if (!kiq->pmf || !kiq->pmf->kiq_map_queues) 645f005ef32Sjsg return -EINVAL; 646f005ef32Sjsg 647f005ef32Sjsg amdgpu_device_flush_hdp(adev, NULL); 648f005ef32Sjsg 649f005ef32Sjsg spin_lock(&kiq->ring_lock); 650f005ef32Sjsg /* No need to map kcq on the slave */ 651f005ef32Sjsg if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { 652f005ef32Sjsg r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * 653f005ef32Sjsg adev->gfx.num_gfx_rings); 654f005ef32Sjsg if (r) { 655f005ef32Sjsg DRM_ERROR("Failed to lock KIQ (%d).\n", r); 656f005ef32Sjsg spin_unlock(&kiq->ring_lock); 657f005ef32Sjsg return r; 658f005ef32Sjsg } 659f005ef32Sjsg 660f005ef32Sjsg for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 661f005ef32Sjsg j = i + xcc_id * adev->gfx.num_gfx_rings; 662f005ef32Sjsg kiq->pmf->kiq_map_queues(kiq_ring, 663f005ef32Sjsg &adev->gfx.gfx_ring[j]); 664f005ef32Sjsg } 665f005ef32Sjsg } 666f005ef32Sjsg 667f005ef32Sjsg r = amdgpu_ring_test_helper(kiq_ring); 668f005ef32Sjsg spin_unlock(&kiq->ring_lock); 669c349dbc7Sjsg if (r) 670c349dbc7Sjsg DRM_ERROR("KCQ enable failed\n"); 671c349dbc7Sjsg 672c349dbc7Sjsg return r; 673c349dbc7Sjsg } 674c349dbc7Sjsg 675c349dbc7Sjsg /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable 676c349dbc7Sjsg * 677c349dbc7Sjsg * @adev: amdgpu_device pointer 678c349dbc7Sjsg * @bool enable true: enable gfx off feature, false: disable gfx off feature 679c349dbc7Sjsg * 680c349dbc7Sjsg * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. 681c349dbc7Sjsg * 2. other client can send request to disable gfx off feature, the request should be honored. 682c349dbc7Sjsg * 3. other client can cancel their request of disable gfx off feature 683c349dbc7Sjsg * 4. other client should not send request to enable gfx off feature before disable gfx off feature. 684c349dbc7Sjsg */ 685c349dbc7Sjsg 686c349dbc7Sjsg void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) 687c349dbc7Sjsg { 6885ca02815Sjsg unsigned long delay = GFX_OFF_DELAY_ENABLE; 6895ca02815Sjsg 690c349dbc7Sjsg if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) 691c349dbc7Sjsg return; 692c349dbc7Sjsg 693c349dbc7Sjsg mutex_lock(&adev->gfx.gfx_off_mutex); 694c349dbc7Sjsg 6958b172e32Sjsg if (enable) { 6968b172e32Sjsg /* If the count is already 0, it means there's an imbalance bug somewhere. 6978b172e32Sjsg * Note that the bug may be in a different caller than the one which triggers the 6988b172e32Sjsg * WARN_ON_ONCE. 6998b172e32Sjsg */ 7008b172e32Sjsg if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) 7018b172e32Sjsg goto unlock; 7028b172e32Sjsg 703c349dbc7Sjsg adev->gfx.gfx_off_req_count--; 704c349dbc7Sjsg 7055ca02815Sjsg if (adev->gfx.gfx_off_req_count == 0 && 7065ca02815Sjsg !adev->gfx.gfx_off_state) { 707abed8ff9Sjsg /* If going to s2idle, no need to wait */ 708abed8ff9Sjsg if (adev->in_s0ix) { 709abed8ff9Sjsg if (!amdgpu_dpm_set_powergating_by_smu(adev, 710abed8ff9Sjsg AMD_IP_BLOCK_TYPE_GFX, true)) 711abed8ff9Sjsg adev->gfx.gfx_off_state = true; 712abed8ff9Sjsg } else { 7135ca02815Sjsg schedule_delayed_work(&adev->gfx.gfx_off_delay_work, 7145ca02815Sjsg delay); 7155ca02815Sjsg } 716abed8ff9Sjsg } 7178b172e32Sjsg } else { 7188b172e32Sjsg if (adev->gfx.gfx_off_req_count == 0) { 7198b172e32Sjsg cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 7208b172e32Sjsg 7218b172e32Sjsg if (adev->gfx.gfx_off_state && 7228b172e32Sjsg !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { 723c349dbc7Sjsg adev->gfx.gfx_off_state = false; 724ad8b1aafSjsg 725ad8b1aafSjsg if (adev->gfx.funcs->init_spm_golden) { 7268b172e32Sjsg dev_dbg(adev->dev, 7278b172e32Sjsg "GFXOFF is disabled, re-init SPM golden settings\n"); 728ad8b1aafSjsg amdgpu_gfx_init_spm_golden(adev); 729ad8b1aafSjsg } 730ad8b1aafSjsg } 731c349dbc7Sjsg } 732c349dbc7Sjsg 7338b172e32Sjsg adev->gfx.gfx_off_req_count++; 7348b172e32Sjsg } 7358b172e32Sjsg 7368b172e32Sjsg unlock: 737c349dbc7Sjsg mutex_unlock(&adev->gfx.gfx_off_mutex); 738c349dbc7Sjsg } 739c349dbc7Sjsg 7401bb76ff1Sjsg int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) 7411bb76ff1Sjsg { 7421bb76ff1Sjsg int r = 0; 7431bb76ff1Sjsg 7441bb76ff1Sjsg mutex_lock(&adev->gfx.gfx_off_mutex); 7451bb76ff1Sjsg 7461bb76ff1Sjsg r = amdgpu_dpm_set_residency_gfxoff(adev, value); 7471bb76ff1Sjsg 7481bb76ff1Sjsg mutex_unlock(&adev->gfx.gfx_off_mutex); 7491bb76ff1Sjsg 7501bb76ff1Sjsg return r; 7511bb76ff1Sjsg } 7521bb76ff1Sjsg 7531bb76ff1Sjsg int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) 7541bb76ff1Sjsg { 7551bb76ff1Sjsg int r = 0; 7561bb76ff1Sjsg 7571bb76ff1Sjsg mutex_lock(&adev->gfx.gfx_off_mutex); 7581bb76ff1Sjsg 7591bb76ff1Sjsg r = amdgpu_dpm_get_residency_gfxoff(adev, value); 7601bb76ff1Sjsg 7611bb76ff1Sjsg mutex_unlock(&adev->gfx.gfx_off_mutex); 7621bb76ff1Sjsg 7631bb76ff1Sjsg return r; 7641bb76ff1Sjsg } 7651bb76ff1Sjsg 7661bb76ff1Sjsg int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) 7671bb76ff1Sjsg { 7681bb76ff1Sjsg int r = 0; 7691bb76ff1Sjsg 7701bb76ff1Sjsg mutex_lock(&adev->gfx.gfx_off_mutex); 7711bb76ff1Sjsg 7721bb76ff1Sjsg r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); 7731bb76ff1Sjsg 7741bb76ff1Sjsg mutex_unlock(&adev->gfx.gfx_off_mutex); 7751bb76ff1Sjsg 7761bb76ff1Sjsg return r; 7771bb76ff1Sjsg } 7781bb76ff1Sjsg 779ad8b1aafSjsg int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) 780ad8b1aafSjsg { 781ad8b1aafSjsg 782ad8b1aafSjsg int r = 0; 783ad8b1aafSjsg 784ad8b1aafSjsg mutex_lock(&adev->gfx.gfx_off_mutex); 785ad8b1aafSjsg 7861bb76ff1Sjsg r = amdgpu_dpm_get_status_gfxoff(adev, value); 787ad8b1aafSjsg 788ad8b1aafSjsg mutex_unlock(&adev->gfx.gfx_off_mutex); 789ad8b1aafSjsg 790ad8b1aafSjsg return r; 791ad8b1aafSjsg } 792ad8b1aafSjsg 7931bb76ff1Sjsg int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) 794c349dbc7Sjsg { 795c349dbc7Sjsg int r; 796c349dbc7Sjsg 7971bb76ff1Sjsg if (amdgpu_ras_is_supported(adev, ras_block->block)) { 798*bbf030e1Sjsg if (!amdgpu_persistent_edc_harvesting_supported(adev)) { 799*bbf030e1Sjsg r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); 800*bbf030e1Sjsg if (r) 801*bbf030e1Sjsg return r; 802*bbf030e1Sjsg } 8035ca02815Sjsg 8041bb76ff1Sjsg r = amdgpu_ras_block_late_init(adev, ras_block); 8051bb76ff1Sjsg if (r) 8061bb76ff1Sjsg return r; 8071bb76ff1Sjsg 80858659cb5Sjsg if (adev->gfx.cp_ecc_error_irq.funcs) { 809c349dbc7Sjsg r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 810c349dbc7Sjsg if (r) 811c349dbc7Sjsg goto late_fini; 81258659cb5Sjsg } 813c349dbc7Sjsg } else { 8141bb76ff1Sjsg amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); 815c349dbc7Sjsg } 816c349dbc7Sjsg 817c349dbc7Sjsg return 0; 818c349dbc7Sjsg late_fini: 8191bb76ff1Sjsg amdgpu_ras_block_late_fini(adev, ras_block); 820c349dbc7Sjsg return r; 821c349dbc7Sjsg } 822c349dbc7Sjsg 823f005ef32Sjsg int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev) 824f005ef32Sjsg { 825f005ef32Sjsg int err = 0; 826f005ef32Sjsg struct amdgpu_gfx_ras *ras = NULL; 827f005ef32Sjsg 828f005ef32Sjsg /* adev->gfx.ras is NULL, which means gfx does not 829f005ef32Sjsg * support ras function, then do nothing here. 830f005ef32Sjsg */ 831f005ef32Sjsg if (!adev->gfx.ras) 832f005ef32Sjsg return 0; 833f005ef32Sjsg 834f005ef32Sjsg ras = adev->gfx.ras; 835f005ef32Sjsg 836f005ef32Sjsg err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 837f005ef32Sjsg if (err) { 838f005ef32Sjsg dev_err(adev->dev, "Failed to register gfx ras block!\n"); 839f005ef32Sjsg return err; 840f005ef32Sjsg } 841f005ef32Sjsg 842f005ef32Sjsg strlcpy(ras->ras_block.ras_comm.name, "gfx", 843f005ef32Sjsg sizeof(ras->ras_block.ras_comm.name)); 844f005ef32Sjsg ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; 845f005ef32Sjsg ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 846f005ef32Sjsg adev->gfx.ras_if = &ras->ras_block.ras_comm; 847f005ef32Sjsg 848f005ef32Sjsg /* If not define special ras_late_init function, use gfx default ras_late_init */ 849f005ef32Sjsg if (!ras->ras_block.ras_late_init) 850f005ef32Sjsg ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; 851f005ef32Sjsg 852f005ef32Sjsg /* If not defined special ras_cb function, use default ras_cb */ 853f005ef32Sjsg if (!ras->ras_block.ras_cb) 854f005ef32Sjsg ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; 855f005ef32Sjsg 856f005ef32Sjsg return 0; 857f005ef32Sjsg } 858f005ef32Sjsg 859f005ef32Sjsg int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev, 860f005ef32Sjsg struct amdgpu_iv_entry *entry) 861f005ef32Sjsg { 862f005ef32Sjsg if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler) 863f005ef32Sjsg return adev->gfx.ras->poison_consumption_handler(adev, entry); 864f005ef32Sjsg 865f005ef32Sjsg return 0; 866f005ef32Sjsg } 867f005ef32Sjsg 868c349dbc7Sjsg int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, 869c349dbc7Sjsg void *err_data, 870c349dbc7Sjsg struct amdgpu_iv_entry *entry) 871c349dbc7Sjsg { 872c349dbc7Sjsg /* TODO ue will trigger an interrupt. 873c349dbc7Sjsg * 874c349dbc7Sjsg * When “Full RAS” is enabled, the per-IP interrupt sources should 875c349dbc7Sjsg * be disabled and the driver should only look for the aggregated 876c349dbc7Sjsg * interrupt via sync flood 877c349dbc7Sjsg */ 878c349dbc7Sjsg if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 879c349dbc7Sjsg kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 8801bb76ff1Sjsg if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && 8811bb76ff1Sjsg adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) 8821bb76ff1Sjsg adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); 883c349dbc7Sjsg amdgpu_ras_reset_gpu(adev); 884c349dbc7Sjsg } 885c349dbc7Sjsg return AMDGPU_RAS_SUCCESS; 886c349dbc7Sjsg } 887c349dbc7Sjsg 888c349dbc7Sjsg int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, 889c349dbc7Sjsg struct amdgpu_irq_src *source, 890c349dbc7Sjsg struct amdgpu_iv_entry *entry) 891c349dbc7Sjsg { 892c349dbc7Sjsg struct ras_common_if *ras_if = adev->gfx.ras_if; 893c349dbc7Sjsg struct ras_dispatch_if ih_data = { 894c349dbc7Sjsg .entry = entry, 895c349dbc7Sjsg }; 896c349dbc7Sjsg 897c349dbc7Sjsg if (!ras_if) 898c349dbc7Sjsg return 0; 899c349dbc7Sjsg 900c349dbc7Sjsg ih_data.head = *ras_if; 901c349dbc7Sjsg 902c349dbc7Sjsg DRM_ERROR("CP ECC ERROR IRQ\n"); 903c349dbc7Sjsg amdgpu_ras_interrupt_dispatch(adev, &ih_data); 904c349dbc7Sjsg return 0; 905c349dbc7Sjsg } 906c349dbc7Sjsg 907f005ef32Sjsg void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, 908f005ef32Sjsg void *ras_error_status, 909f005ef32Sjsg void (*func)(struct amdgpu_device *adev, void *ras_error_status, 910f005ef32Sjsg int xcc_id)) 911f005ef32Sjsg { 912f005ef32Sjsg int i; 913f005ef32Sjsg int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; 914f005ef32Sjsg uint32_t xcc_mask = GENMASK(num_xcc - 1, 0); 915f005ef32Sjsg struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 916f005ef32Sjsg 917f005ef32Sjsg if (err_data) { 918f005ef32Sjsg err_data->ue_count = 0; 919f005ef32Sjsg err_data->ce_count = 0; 920f005ef32Sjsg } 921f005ef32Sjsg 922f005ef32Sjsg for_each_inst(i, xcc_mask) 923f005ef32Sjsg func(adev, ras_error_status, i); 924f005ef32Sjsg } 925f005ef32Sjsg 926c349dbc7Sjsg uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) 927c349dbc7Sjsg { 928c349dbc7Sjsg signed long r, cnt = 0; 929c349dbc7Sjsg unsigned long flags; 930ad8b1aafSjsg uint32_t seq, reg_val_offs = 0, value = 0; 931f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 932c349dbc7Sjsg struct amdgpu_ring *ring = &kiq->ring; 933c349dbc7Sjsg 9345ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 935ad8b1aafSjsg return 0; 936ad8b1aafSjsg 9371bb76ff1Sjsg if (adev->mes.ring.sched.ready) 9381bb76ff1Sjsg return amdgpu_mes_rreg(adev, reg); 9391bb76ff1Sjsg 940c349dbc7Sjsg BUG_ON(!ring->funcs->emit_rreg); 941c349dbc7Sjsg 942c349dbc7Sjsg spin_lock_irqsave(&kiq->ring_lock, flags); 943ad8b1aafSjsg if (amdgpu_device_wb_get(adev, ®_val_offs)) { 944ad8b1aafSjsg pr_err("critical bug! too many kiq readers\n"); 945ad8b1aafSjsg goto failed_unlock; 946ad8b1aafSjsg } 947*bbf030e1Sjsg r = amdgpu_ring_alloc(ring, 32); 948*bbf030e1Sjsg if (r) 949*bbf030e1Sjsg goto failed_unlock; 950*bbf030e1Sjsg 951ad8b1aafSjsg amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); 952ad8b1aafSjsg r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 953ad8b1aafSjsg if (r) 954ad8b1aafSjsg goto failed_undo; 955ad8b1aafSjsg 956c349dbc7Sjsg amdgpu_ring_commit(ring); 957c349dbc7Sjsg spin_unlock_irqrestore(&kiq->ring_lock, flags); 958c349dbc7Sjsg 959c349dbc7Sjsg r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 960c349dbc7Sjsg 961c349dbc7Sjsg /* don't wait anymore for gpu reset case because this way may 962c349dbc7Sjsg * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 963c349dbc7Sjsg * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 964c349dbc7Sjsg * never return if we keep waiting in virt_kiq_rreg, which cause 965c349dbc7Sjsg * gpu_recover() hang there. 966c349dbc7Sjsg * 967c349dbc7Sjsg * also don't wait anymore for IRQ context 968c349dbc7Sjsg * */ 969ad8b1aafSjsg if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) 970c349dbc7Sjsg goto failed_kiq_read; 971c349dbc7Sjsg 972c349dbc7Sjsg might_sleep(); 973c349dbc7Sjsg while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 974c349dbc7Sjsg drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 975c349dbc7Sjsg r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 976c349dbc7Sjsg } 977c349dbc7Sjsg 978c349dbc7Sjsg if (cnt > MAX_KIQ_REG_TRY) 979c349dbc7Sjsg goto failed_kiq_read; 980c349dbc7Sjsg 981ad8b1aafSjsg mb(); 982ad8b1aafSjsg value = adev->wb.wb[reg_val_offs]; 983ad8b1aafSjsg amdgpu_device_wb_free(adev, reg_val_offs); 984ad8b1aafSjsg return value; 985c349dbc7Sjsg 986ad8b1aafSjsg failed_undo: 987ad8b1aafSjsg amdgpu_ring_undo(ring); 988ad8b1aafSjsg failed_unlock: 989ad8b1aafSjsg spin_unlock_irqrestore(&kiq->ring_lock, flags); 990c349dbc7Sjsg failed_kiq_read: 991ad8b1aafSjsg if (reg_val_offs) 992ad8b1aafSjsg amdgpu_device_wb_free(adev, reg_val_offs); 993ad8b1aafSjsg dev_err(adev->dev, "failed to read reg:%x\n", reg); 994c349dbc7Sjsg return ~0; 995c349dbc7Sjsg } 996c349dbc7Sjsg 997c349dbc7Sjsg void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 998c349dbc7Sjsg { 999c349dbc7Sjsg signed long r, cnt = 0; 1000c349dbc7Sjsg unsigned long flags; 1001c349dbc7Sjsg uint32_t seq; 1002f005ef32Sjsg struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 1003c349dbc7Sjsg struct amdgpu_ring *ring = &kiq->ring; 1004c349dbc7Sjsg 1005c349dbc7Sjsg BUG_ON(!ring->funcs->emit_wreg); 1006c349dbc7Sjsg 10075ca02815Sjsg if (amdgpu_device_skip_hw_access(adev)) 1008ad8b1aafSjsg return; 1009ad8b1aafSjsg 10101bb76ff1Sjsg if (adev->mes.ring.sched.ready) { 10111bb76ff1Sjsg amdgpu_mes_wreg(adev, reg, v); 10121bb76ff1Sjsg return; 10131bb76ff1Sjsg } 10141bb76ff1Sjsg 1015c349dbc7Sjsg spin_lock_irqsave(&kiq->ring_lock, flags); 1016*bbf030e1Sjsg r = amdgpu_ring_alloc(ring, 32); 1017*bbf030e1Sjsg if (r) 1018*bbf030e1Sjsg goto failed_unlock; 1019*bbf030e1Sjsg 1020c349dbc7Sjsg amdgpu_ring_emit_wreg(ring, reg, v); 1021ad8b1aafSjsg r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 1022ad8b1aafSjsg if (r) 1023ad8b1aafSjsg goto failed_undo; 1024ad8b1aafSjsg 1025c349dbc7Sjsg amdgpu_ring_commit(ring); 1026c349dbc7Sjsg spin_unlock_irqrestore(&kiq->ring_lock, flags); 1027c349dbc7Sjsg 1028c349dbc7Sjsg r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1029c349dbc7Sjsg 1030c349dbc7Sjsg /* don't wait anymore for gpu reset case because this way may 1031c349dbc7Sjsg * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 1032c349dbc7Sjsg * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 1033c349dbc7Sjsg * never return if we keep waiting in virt_kiq_rreg, which cause 1034c349dbc7Sjsg * gpu_recover() hang there. 1035c349dbc7Sjsg * 1036c349dbc7Sjsg * also don't wait anymore for IRQ context 1037c349dbc7Sjsg * */ 1038ad8b1aafSjsg if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) 1039c349dbc7Sjsg goto failed_kiq_write; 1040c349dbc7Sjsg 1041c349dbc7Sjsg might_sleep(); 1042c349dbc7Sjsg while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 1043c349dbc7Sjsg 1044c349dbc7Sjsg drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 1045c349dbc7Sjsg r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 1046c349dbc7Sjsg } 1047c349dbc7Sjsg 1048c349dbc7Sjsg if (cnt > MAX_KIQ_REG_TRY) 1049c349dbc7Sjsg goto failed_kiq_write; 1050c349dbc7Sjsg 1051c349dbc7Sjsg return; 1052c349dbc7Sjsg 1053ad8b1aafSjsg failed_undo: 1054ad8b1aafSjsg amdgpu_ring_undo(ring); 1055*bbf030e1Sjsg failed_unlock: 1056ad8b1aafSjsg spin_unlock_irqrestore(&kiq->ring_lock, flags); 1057c349dbc7Sjsg failed_kiq_write: 1058ad8b1aafSjsg dev_err(adev->dev, "failed to write reg:%x\n", reg); 1059c349dbc7Sjsg } 10605ca02815Sjsg 10615ca02815Sjsg int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) 10625ca02815Sjsg { 10635ca02815Sjsg if (amdgpu_num_kcq == -1) { 10645ca02815Sjsg return 8; 10655ca02815Sjsg } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { 10665ca02815Sjsg dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); 10675ca02815Sjsg return 8; 10685ca02815Sjsg } 10695ca02815Sjsg return amdgpu_num_kcq; 10705ca02815Sjsg } 10715ca02815Sjsg 10721bb76ff1Sjsg void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, 10731bb76ff1Sjsg uint32_t ucode_id) 10745ca02815Sjsg { 10751bb76ff1Sjsg const struct gfx_firmware_header_v1_0 *cp_hdr; 10761bb76ff1Sjsg const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; 10771bb76ff1Sjsg struct amdgpu_firmware_info *info = NULL; 10781bb76ff1Sjsg const struct firmware *ucode_fw; 10791bb76ff1Sjsg unsigned int fw_size; 10801bb76ff1Sjsg 10811bb76ff1Sjsg switch (ucode_id) { 10821bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_PFP: 10831bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 10841bb76ff1Sjsg adev->gfx.pfp_fw->data; 10851bb76ff1Sjsg adev->gfx.pfp_fw_version = 10861bb76ff1Sjsg le32_to_cpu(cp_hdr->header.ucode_version); 10871bb76ff1Sjsg adev->gfx.pfp_feature_version = 10881bb76ff1Sjsg le32_to_cpu(cp_hdr->ucode_feature_version); 10891bb76ff1Sjsg ucode_fw = adev->gfx.pfp_fw; 10901bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 10911bb76ff1Sjsg break; 10921bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_PFP: 10931bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 10941bb76ff1Sjsg adev->gfx.pfp_fw->data; 10951bb76ff1Sjsg adev->gfx.pfp_fw_version = 10961bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 10971bb76ff1Sjsg adev->gfx.pfp_feature_version = 10981bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 10991bb76ff1Sjsg ucode_fw = adev->gfx.pfp_fw; 11001bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 11011bb76ff1Sjsg break; 11021bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: 11031bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: 11041bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 11051bb76ff1Sjsg adev->gfx.pfp_fw->data; 11061bb76ff1Sjsg ucode_fw = adev->gfx.pfp_fw; 11071bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 11081bb76ff1Sjsg break; 11091bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_ME: 11101bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11111bb76ff1Sjsg adev->gfx.me_fw->data; 11121bb76ff1Sjsg adev->gfx.me_fw_version = 11131bb76ff1Sjsg le32_to_cpu(cp_hdr->header.ucode_version); 11141bb76ff1Sjsg adev->gfx.me_feature_version = 11151bb76ff1Sjsg le32_to_cpu(cp_hdr->ucode_feature_version); 11161bb76ff1Sjsg ucode_fw = adev->gfx.me_fw; 11171bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 11181bb76ff1Sjsg break; 11191bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_ME: 11201bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 11211bb76ff1Sjsg adev->gfx.me_fw->data; 11221bb76ff1Sjsg adev->gfx.me_fw_version = 11231bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 11241bb76ff1Sjsg adev->gfx.me_feature_version = 11251bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 11261bb76ff1Sjsg ucode_fw = adev->gfx.me_fw; 11271bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 11281bb76ff1Sjsg break; 11291bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: 11301bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: 11311bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 11321bb76ff1Sjsg adev->gfx.me_fw->data; 11331bb76ff1Sjsg ucode_fw = adev->gfx.me_fw; 11341bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 11351bb76ff1Sjsg break; 11361bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_CE: 11371bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11381bb76ff1Sjsg adev->gfx.ce_fw->data; 11391bb76ff1Sjsg adev->gfx.ce_fw_version = 11401bb76ff1Sjsg le32_to_cpu(cp_hdr->header.ucode_version); 11411bb76ff1Sjsg adev->gfx.ce_feature_version = 11421bb76ff1Sjsg le32_to_cpu(cp_hdr->ucode_feature_version); 11431bb76ff1Sjsg ucode_fw = adev->gfx.ce_fw; 11441bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 11451bb76ff1Sjsg break; 11461bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_MEC1: 11471bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11481bb76ff1Sjsg adev->gfx.mec_fw->data; 11491bb76ff1Sjsg adev->gfx.mec_fw_version = 11501bb76ff1Sjsg le32_to_cpu(cp_hdr->header.ucode_version); 11511bb76ff1Sjsg adev->gfx.mec_feature_version = 11521bb76ff1Sjsg le32_to_cpu(cp_hdr->ucode_feature_version); 11531bb76ff1Sjsg ucode_fw = adev->gfx.mec_fw; 11541bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 11551bb76ff1Sjsg le32_to_cpu(cp_hdr->jt_size) * 4; 11561bb76ff1Sjsg break; 11571bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_MEC1_JT: 11581bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11591bb76ff1Sjsg adev->gfx.mec_fw->data; 11601bb76ff1Sjsg ucode_fw = adev->gfx.mec_fw; 11611bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; 11621bb76ff1Sjsg break; 11631bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_MEC2: 11641bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11651bb76ff1Sjsg adev->gfx.mec2_fw->data; 11661bb76ff1Sjsg adev->gfx.mec2_fw_version = 11671bb76ff1Sjsg le32_to_cpu(cp_hdr->header.ucode_version); 11681bb76ff1Sjsg adev->gfx.mec2_feature_version = 11691bb76ff1Sjsg le32_to_cpu(cp_hdr->ucode_feature_version); 11701bb76ff1Sjsg ucode_fw = adev->gfx.mec2_fw; 11711bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 11721bb76ff1Sjsg le32_to_cpu(cp_hdr->jt_size) * 4; 11731bb76ff1Sjsg break; 11741bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_MEC2_JT: 11751bb76ff1Sjsg cp_hdr = (const struct gfx_firmware_header_v1_0 *) 11761bb76ff1Sjsg adev->gfx.mec2_fw->data; 11771bb76ff1Sjsg ucode_fw = adev->gfx.mec2_fw; 11781bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr->jt_size) * 4; 11791bb76ff1Sjsg break; 11801bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_MEC: 11811bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 11821bb76ff1Sjsg adev->gfx.mec_fw->data; 11831bb76ff1Sjsg adev->gfx.mec_fw_version = 11841bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->header.ucode_version); 11851bb76ff1Sjsg adev->gfx.mec_feature_version = 11861bb76ff1Sjsg le32_to_cpu(cp_hdr_v2_0->ucode_feature_version); 11871bb76ff1Sjsg ucode_fw = adev->gfx.mec_fw; 11881bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes); 11891bb76ff1Sjsg break; 11901bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: 11911bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: 11921bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: 11931bb76ff1Sjsg case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: 11941bb76ff1Sjsg cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) 11951bb76ff1Sjsg adev->gfx.mec_fw->data; 11961bb76ff1Sjsg ucode_fw = adev->gfx.mec_fw; 11971bb76ff1Sjsg fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); 11981bb76ff1Sjsg break; 11991bb76ff1Sjsg default: 12005c27c7d4Sjsg dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); 12015c27c7d4Sjsg return; 12021bb76ff1Sjsg } 12031bb76ff1Sjsg 12041bb76ff1Sjsg if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 12051bb76ff1Sjsg info = &adev->firmware.ucode[ucode_id]; 12061bb76ff1Sjsg info->ucode_id = ucode_id; 12071bb76ff1Sjsg info->fw = ucode_fw; 1208f005ef32Sjsg adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE); 12091bb76ff1Sjsg } 12105ca02815Sjsg } 1211f005ef32Sjsg 1212f005ef32Sjsg bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id) 1213f005ef32Sjsg { 1214f005ef32Sjsg return !(xcc_id % (adev->gfx.num_xcc_per_xcp ? 1215f005ef32Sjsg adev->gfx.num_xcc_per_xcp : 1)); 1216f005ef32Sjsg } 1217f005ef32Sjsg 1218f005ef32Sjsg static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, 1219f005ef32Sjsg struct device_attribute *addr, 1220f005ef32Sjsg char *buf) 1221f005ef32Sjsg { 1222f005ef32Sjsg struct drm_device *ddev = dev_get_drvdata(dev); 1223f005ef32Sjsg struct amdgpu_device *adev = drm_to_adev(ddev); 1224f005ef32Sjsg int mode; 1225f005ef32Sjsg 1226f005ef32Sjsg mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 1227f005ef32Sjsg AMDGPU_XCP_FL_NONE); 1228f005ef32Sjsg 1229f005ef32Sjsg return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode)); 1230f005ef32Sjsg } 1231f005ef32Sjsg 1232f005ef32Sjsg static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, 1233f005ef32Sjsg struct device_attribute *addr, 1234f005ef32Sjsg const char *buf, size_t count) 1235f005ef32Sjsg { 1236f005ef32Sjsg struct drm_device *ddev = dev_get_drvdata(dev); 1237f005ef32Sjsg struct amdgpu_device *adev = drm_to_adev(ddev); 1238f005ef32Sjsg enum amdgpu_gfx_partition mode; 1239f005ef32Sjsg int ret = 0, num_xcc; 1240f005ef32Sjsg 1241f005ef32Sjsg num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1242f005ef32Sjsg if (num_xcc % 2 != 0) 1243f005ef32Sjsg return -EINVAL; 1244f005ef32Sjsg 1245f005ef32Sjsg if (!strncasecmp("SPX", buf, strlen("SPX"))) { 1246f005ef32Sjsg mode = AMDGPU_SPX_PARTITION_MODE; 1247f005ef32Sjsg } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { 1248f005ef32Sjsg /* 1249f005ef32Sjsg * DPX mode needs AIDs to be in multiple of 2. 1250f005ef32Sjsg * Each AID connects 2 XCCs. 1251f005ef32Sjsg */ 1252f005ef32Sjsg if (num_xcc%4) 1253f005ef32Sjsg return -EINVAL; 1254f005ef32Sjsg mode = AMDGPU_DPX_PARTITION_MODE; 1255f005ef32Sjsg } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { 1256f005ef32Sjsg if (num_xcc != 6) 1257f005ef32Sjsg return -EINVAL; 1258f005ef32Sjsg mode = AMDGPU_TPX_PARTITION_MODE; 1259f005ef32Sjsg } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { 1260f005ef32Sjsg if (num_xcc != 8) 1261f005ef32Sjsg return -EINVAL; 1262f005ef32Sjsg mode = AMDGPU_QPX_PARTITION_MODE; 1263f005ef32Sjsg } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { 1264f005ef32Sjsg mode = AMDGPU_CPX_PARTITION_MODE; 1265f005ef32Sjsg } else { 1266f005ef32Sjsg return -EINVAL; 1267f005ef32Sjsg } 1268f005ef32Sjsg 1269f005ef32Sjsg ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode); 1270f005ef32Sjsg 1271f005ef32Sjsg if (ret) 1272f005ef32Sjsg return ret; 1273f005ef32Sjsg 1274f005ef32Sjsg return count; 1275f005ef32Sjsg } 1276f005ef32Sjsg 1277f005ef32Sjsg static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, 1278f005ef32Sjsg struct device_attribute *addr, 1279f005ef32Sjsg char *buf) 1280f005ef32Sjsg { 1281f005ef32Sjsg struct drm_device *ddev = dev_get_drvdata(dev); 1282f005ef32Sjsg struct amdgpu_device *adev = drm_to_adev(ddev); 1283f005ef32Sjsg char *supported_partition; 1284f005ef32Sjsg 1285f005ef32Sjsg /* TBD */ 1286f005ef32Sjsg switch (NUM_XCC(adev->gfx.xcc_mask)) { 1287f005ef32Sjsg case 8: 1288f005ef32Sjsg supported_partition = "SPX, DPX, QPX, CPX"; 1289f005ef32Sjsg break; 1290f005ef32Sjsg case 6: 1291f005ef32Sjsg supported_partition = "SPX, TPX, CPX"; 1292f005ef32Sjsg break; 1293f005ef32Sjsg case 4: 1294f005ef32Sjsg supported_partition = "SPX, DPX, CPX"; 1295f005ef32Sjsg break; 1296f005ef32Sjsg /* this seems only existing in emulation phase */ 1297f005ef32Sjsg case 2: 1298f005ef32Sjsg supported_partition = "SPX, CPX"; 1299f005ef32Sjsg break; 1300f005ef32Sjsg default: 1301f005ef32Sjsg supported_partition = "Not supported"; 1302f005ef32Sjsg break; 1303f005ef32Sjsg } 1304f005ef32Sjsg 1305f005ef32Sjsg return sysfs_emit(buf, "%s\n", supported_partition); 1306f005ef32Sjsg } 1307f005ef32Sjsg 1308f005ef32Sjsg static DEVICE_ATTR(current_compute_partition, 0644, 1309f005ef32Sjsg amdgpu_gfx_get_current_compute_partition, 1310f005ef32Sjsg amdgpu_gfx_set_compute_partition); 1311f005ef32Sjsg 1312f005ef32Sjsg static DEVICE_ATTR(available_compute_partition, 0444, 1313f005ef32Sjsg amdgpu_gfx_get_available_compute_partition, NULL); 1314f005ef32Sjsg 1315f005ef32Sjsg int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) 1316f005ef32Sjsg { 1317f005ef32Sjsg int r; 1318f005ef32Sjsg 1319f005ef32Sjsg r = device_create_file(adev->dev, &dev_attr_current_compute_partition); 1320f005ef32Sjsg if (r) 1321f005ef32Sjsg return r; 1322f005ef32Sjsg 1323f005ef32Sjsg r = device_create_file(adev->dev, &dev_attr_available_compute_partition); 1324f005ef32Sjsg 1325f005ef32Sjsg return r; 1326f005ef32Sjsg } 1327f005ef32Sjsg 1328f005ef32Sjsg void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) 1329f005ef32Sjsg { 1330f005ef32Sjsg device_remove_file(adev->dev, &dev_attr_current_compute_partition); 1331f005ef32Sjsg device_remove_file(adev->dev, &dev_attr_available_compute_partition); 1332f005ef32Sjsg } 1333