1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2008 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * Copyright 2008 Red Hat Inc. 4fb4d8502Sjsg * Copyright 2009 Jerome Glisse. 5fb4d8502Sjsg * 6fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 7fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 8fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 9fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 11fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 12fb4d8502Sjsg * 13fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 14fb4d8502Sjsg * all copies or substantial portions of the Software. 15fb4d8502Sjsg * 16fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 23fb4d8502Sjsg * 24fb4d8502Sjsg * Authors: Dave Airlie 25fb4d8502Sjsg * Alex Deucher 26fb4d8502Sjsg * Jerome Glisse 27fb4d8502Sjsg * Christian König 28fb4d8502Sjsg */ 29fb4d8502Sjsg #include <linux/seq_file.h> 30fb4d8502Sjsg #include <linux/slab.h> 31c349dbc7Sjsg #include <linux/uaccess.h> 32fb4d8502Sjsg #include <linux/debugfs.h> 33c349dbc7Sjsg 34fb4d8502Sjsg #include <drm/amdgpu_drm.h> 35fb4d8502Sjsg #include "amdgpu.h" 36fb4d8502Sjsg #include "atom.h" 37fb4d8502Sjsg 38fb4d8502Sjsg /* 39fb4d8502Sjsg * Rings 40fb4d8502Sjsg * Most engines on the GPU are fed via ring buffers. Ring 41fb4d8502Sjsg * buffers are areas of GPU accessible memory that the host 42fb4d8502Sjsg * writes commands into and the GPU reads commands out of. 43fb4d8502Sjsg * There is a rptr (read pointer) that determines where the 44fb4d8502Sjsg * GPU is currently reading, and a wptr (write pointer) 45fb4d8502Sjsg * which determines where the host has written. When the 46fb4d8502Sjsg * pointers are equal, the ring is idle. When the host 47fb4d8502Sjsg * writes commands to the ring buffer, it increments the 48fb4d8502Sjsg * wptr. The GPU then starts fetching commands and executes 49fb4d8502Sjsg * them until the pointers are equal again. 50fb4d8502Sjsg */ 51fb4d8502Sjsg 52fb4d8502Sjsg /** 53f005ef32Sjsg * amdgpu_ring_max_ibs - Return max IBs that fit in a single submission. 54f005ef32Sjsg * 55f005ef32Sjsg * @type: ring type for which to return the limit. 56f005ef32Sjsg */ 57f005ef32Sjsg unsigned int amdgpu_ring_max_ibs(enum amdgpu_ring_type type) 58f005ef32Sjsg { 59f005ef32Sjsg switch (type) { 60f005ef32Sjsg case AMDGPU_RING_TYPE_GFX: 61f005ef32Sjsg /* Need to keep at least 192 on GFX7+ for old radv. */ 62f005ef32Sjsg return 192; 63f005ef32Sjsg case AMDGPU_RING_TYPE_COMPUTE: 64f005ef32Sjsg return 125; 65f005ef32Sjsg case AMDGPU_RING_TYPE_VCN_JPEG: 66f005ef32Sjsg return 16; 67f005ef32Sjsg default: 68f005ef32Sjsg return 49; 69f005ef32Sjsg } 70f005ef32Sjsg } 71f005ef32Sjsg 72f005ef32Sjsg /** 73fb4d8502Sjsg * amdgpu_ring_alloc - allocate space on the ring buffer 74fb4d8502Sjsg * 75fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 76fb4d8502Sjsg * @ndw: number of dwords to allocate in the ring buffer 77fb4d8502Sjsg * 78fb4d8502Sjsg * Allocate @ndw dwords in the ring buffer (all asics). 79fb4d8502Sjsg * Returns 0 on success, error on failure. 80fb4d8502Sjsg */ 81f005ef32Sjsg int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) 82fb4d8502Sjsg { 83fb4d8502Sjsg /* Align requested size with padding so unlock_commit can 84fb4d8502Sjsg * pad safely */ 85fb4d8502Sjsg ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; 86fb4d8502Sjsg 87fb4d8502Sjsg /* Make sure we aren't trying to allocate more space 88fb4d8502Sjsg * than the maximum for one submission 89fb4d8502Sjsg */ 90fb4d8502Sjsg if (WARN_ON_ONCE(ndw > ring->max_dw)) 91fb4d8502Sjsg return -ENOMEM; 92fb4d8502Sjsg 93fb4d8502Sjsg ring->count_dw = ndw; 94fb4d8502Sjsg ring->wptr_old = ring->wptr; 95fb4d8502Sjsg 96fb4d8502Sjsg if (ring->funcs->begin_use) 97fb4d8502Sjsg ring->funcs->begin_use(ring); 98fb4d8502Sjsg 99fb4d8502Sjsg return 0; 100fb4d8502Sjsg } 101fb4d8502Sjsg 102fb4d8502Sjsg /** amdgpu_ring_insert_nop - insert NOP packets 103fb4d8502Sjsg * 104fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 105fb4d8502Sjsg * @count: the number of NOP packets to insert 106fb4d8502Sjsg * 107fb4d8502Sjsg * This is the generic insert_nop function for rings except SDMA 108fb4d8502Sjsg */ 109fb4d8502Sjsg void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 110fb4d8502Sjsg { 111fb4d8502Sjsg int i; 112fb4d8502Sjsg 113fb4d8502Sjsg for (i = 0; i < count; i++) 114fb4d8502Sjsg amdgpu_ring_write(ring, ring->funcs->nop); 115fb4d8502Sjsg } 116fb4d8502Sjsg 1175ca02815Sjsg /** 1185ca02815Sjsg * amdgpu_ring_generic_pad_ib - pad IB with NOP packets 119fb4d8502Sjsg * 120fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 121fb4d8502Sjsg * @ib: IB to add NOP packets to 122fb4d8502Sjsg * 123fb4d8502Sjsg * This is the generic pad_ib function for rings except SDMA 124fb4d8502Sjsg */ 125fb4d8502Sjsg void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 126fb4d8502Sjsg { 127fb4d8502Sjsg while (ib->length_dw & ring->funcs->align_mask) 128fb4d8502Sjsg ib->ptr[ib->length_dw++] = ring->funcs->nop; 129fb4d8502Sjsg } 130fb4d8502Sjsg 131fb4d8502Sjsg /** 132fb4d8502Sjsg * amdgpu_ring_commit - tell the GPU to execute the new 133fb4d8502Sjsg * commands on the ring buffer 134fb4d8502Sjsg * 135fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 136fb4d8502Sjsg * 137fb4d8502Sjsg * Update the wptr (write pointer) to tell the GPU to 138fb4d8502Sjsg * execute new commands on the ring buffer (all asics). 139fb4d8502Sjsg */ 140fb4d8502Sjsg void amdgpu_ring_commit(struct amdgpu_ring *ring) 141fb4d8502Sjsg { 142fb4d8502Sjsg uint32_t count; 143fb4d8502Sjsg 144fb4d8502Sjsg /* We pad to match fetch size */ 145fb4d8502Sjsg count = ring->funcs->align_mask + 1 - 146fb4d8502Sjsg (ring->wptr & ring->funcs->align_mask); 147fb4d8502Sjsg count %= ring->funcs->align_mask + 1; 148fb4d8502Sjsg ring->funcs->insert_nop(ring, count); 149fb4d8502Sjsg 150fb4d8502Sjsg mb(); 151fb4d8502Sjsg amdgpu_ring_set_wptr(ring); 152fb4d8502Sjsg 153fb4d8502Sjsg if (ring->funcs->end_use) 154fb4d8502Sjsg ring->funcs->end_use(ring); 155fb4d8502Sjsg } 156fb4d8502Sjsg 157fb4d8502Sjsg /** 158fb4d8502Sjsg * amdgpu_ring_undo - reset the wptr 159fb4d8502Sjsg * 160fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 161fb4d8502Sjsg * 162fb4d8502Sjsg * Reset the driver's copy of the wptr (all asics). 163fb4d8502Sjsg */ 164fb4d8502Sjsg void amdgpu_ring_undo(struct amdgpu_ring *ring) 165fb4d8502Sjsg { 166fb4d8502Sjsg ring->wptr = ring->wptr_old; 167fb4d8502Sjsg 168fb4d8502Sjsg if (ring->funcs->end_use) 169fb4d8502Sjsg ring->funcs->end_use(ring); 170fb4d8502Sjsg } 171fb4d8502Sjsg 1721bb76ff1Sjsg #define amdgpu_ring_get_gpu_addr(ring, offset) \ 1731bb76ff1Sjsg (ring->is_mes_queue ? \ 1741bb76ff1Sjsg (ring->mes_ctx->meta_data_gpu_addr + offset) : \ 1751bb76ff1Sjsg (ring->adev->wb.gpu_addr + offset * 4)) 1761bb76ff1Sjsg 1771bb76ff1Sjsg #define amdgpu_ring_get_cpu_addr(ring, offset) \ 1781bb76ff1Sjsg (ring->is_mes_queue ? \ 1791bb76ff1Sjsg (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \ 1801bb76ff1Sjsg (&ring->adev->wb.wb[offset])) 1811bb76ff1Sjsg 182fb4d8502Sjsg /** 183fb4d8502Sjsg * amdgpu_ring_init - init driver ring struct. 184fb4d8502Sjsg * 185fb4d8502Sjsg * @adev: amdgpu_device pointer 186fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 1875ca02815Sjsg * @max_dw: maximum number of dw for ring alloc 1885ca02815Sjsg * @irq_src: interrupt source to use for this ring 1895ca02815Sjsg * @irq_type: interrupt type to use for this ring 1905ca02815Sjsg * @hw_prio: ring priority (NORMAL/HIGH) 1915ca02815Sjsg * @sched_score: optional score atomic shared with other schedulers 192fb4d8502Sjsg * 193fb4d8502Sjsg * Initialize the driver information for the selected ring (all asics). 194fb4d8502Sjsg * Returns 0 on success, error on failure. 195fb4d8502Sjsg */ 196fb4d8502Sjsg int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 197ad8b1aafSjsg unsigned int max_dw, struct amdgpu_irq_src *irq_src, 1985ca02815Sjsg unsigned int irq_type, unsigned int hw_prio, 1995ca02815Sjsg atomic_t *sched_score) 200fb4d8502Sjsg { 2015ca02815Sjsg int r; 202fb4d8502Sjsg int sched_hw_submission = amdgpu_sched_hw_submission; 203ad8b1aafSjsg u32 *num_sched; 204ad8b1aafSjsg u32 hw_ip; 205f005ef32Sjsg unsigned int max_ibs_dw; 206fb4d8502Sjsg 207fb4d8502Sjsg /* Set the hw submission limit higher for KIQ because 208fb4d8502Sjsg * it's used for a number of gfx/compute tasks by both 209fb4d8502Sjsg * KFD and KGD which may have outstanding fences and 210fb4d8502Sjsg * it doesn't really use the gpu scheduler anyway; 211fb4d8502Sjsg * KIQ tasks get submitted directly to the ring. 212fb4d8502Sjsg */ 213fb4d8502Sjsg if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 214fb4d8502Sjsg sched_hw_submission = max(sched_hw_submission, 256); 215c349dbc7Sjsg else if (ring == &adev->sdma.instance[0].page) 216c349dbc7Sjsg sched_hw_submission = 256; 217fb4d8502Sjsg 218fb4d8502Sjsg if (ring->adev == NULL) { 219fb4d8502Sjsg if (adev->num_rings >= AMDGPU_MAX_RINGS) 220fb4d8502Sjsg return -EINVAL; 221fb4d8502Sjsg 222fb4d8502Sjsg ring->adev = adev; 2231bb76ff1Sjsg ring->num_hw_submission = sched_hw_submission; 2241bb76ff1Sjsg ring->sched_score = sched_score; 2251bb76ff1Sjsg ring->vmid_wait = dma_fence_get_stub(); 2261bb76ff1Sjsg 2271bb76ff1Sjsg if (!ring->is_mes_queue) { 228fb4d8502Sjsg ring->idx = adev->num_rings++; 229fb4d8502Sjsg adev->rings[ring->idx] = ring; 2301bb76ff1Sjsg } 2311bb76ff1Sjsg 2321bb76ff1Sjsg r = amdgpu_fence_driver_init_ring(ring); 233fb4d8502Sjsg if (r) 234fb4d8502Sjsg return r; 235fb4d8502Sjsg } 236fb4d8502Sjsg 2371bb76ff1Sjsg if (ring->is_mes_queue) { 2381bb76ff1Sjsg ring->rptr_offs = amdgpu_mes_ctx_get_offs(ring, 2391bb76ff1Sjsg AMDGPU_MES_CTX_RPTR_OFFS); 2401bb76ff1Sjsg ring->wptr_offs = amdgpu_mes_ctx_get_offs(ring, 2411bb76ff1Sjsg AMDGPU_MES_CTX_WPTR_OFFS); 2421bb76ff1Sjsg ring->fence_offs = amdgpu_mes_ctx_get_offs(ring, 2431bb76ff1Sjsg AMDGPU_MES_CTX_FENCE_OFFS); 2441bb76ff1Sjsg ring->trail_fence_offs = amdgpu_mes_ctx_get_offs(ring, 2451bb76ff1Sjsg AMDGPU_MES_CTX_TRAIL_FENCE_OFFS); 2461bb76ff1Sjsg ring->cond_exe_offs = amdgpu_mes_ctx_get_offs(ring, 2471bb76ff1Sjsg AMDGPU_MES_CTX_COND_EXE_OFFS); 2481bb76ff1Sjsg } else { 249fb4d8502Sjsg r = amdgpu_device_wb_get(adev, &ring->rptr_offs); 250fb4d8502Sjsg if (r) { 251fb4d8502Sjsg dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); 252fb4d8502Sjsg return r; 253fb4d8502Sjsg } 254fb4d8502Sjsg 255fb4d8502Sjsg r = amdgpu_device_wb_get(adev, &ring->wptr_offs); 256fb4d8502Sjsg if (r) { 257fb4d8502Sjsg dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); 258fb4d8502Sjsg return r; 259fb4d8502Sjsg } 260fb4d8502Sjsg 261fb4d8502Sjsg r = amdgpu_device_wb_get(adev, &ring->fence_offs); 262fb4d8502Sjsg if (r) { 263fb4d8502Sjsg dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); 264fb4d8502Sjsg return r; 265fb4d8502Sjsg } 266fb4d8502Sjsg 267c349dbc7Sjsg r = amdgpu_device_wb_get(adev, &ring->trail_fence_offs); 268c349dbc7Sjsg if (r) { 2691bb76ff1Sjsg dev_err(adev->dev, "(%d) ring trail_fence_offs wb alloc failed\n", r); 270c349dbc7Sjsg return r; 271c349dbc7Sjsg } 272c349dbc7Sjsg 273fb4d8502Sjsg r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); 274fb4d8502Sjsg if (r) { 275fb4d8502Sjsg dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); 276fb4d8502Sjsg return r; 277fb4d8502Sjsg } 2781bb76ff1Sjsg } 2791bb76ff1Sjsg 2801bb76ff1Sjsg ring->fence_gpu_addr = 2811bb76ff1Sjsg amdgpu_ring_get_gpu_addr(ring, ring->fence_offs); 2821bb76ff1Sjsg ring->fence_cpu_addr = 2831bb76ff1Sjsg amdgpu_ring_get_cpu_addr(ring, ring->fence_offs); 2841bb76ff1Sjsg 2851bb76ff1Sjsg ring->rptr_gpu_addr = 2861bb76ff1Sjsg amdgpu_ring_get_gpu_addr(ring, ring->rptr_offs); 2871bb76ff1Sjsg ring->rptr_cpu_addr = 2881bb76ff1Sjsg amdgpu_ring_get_cpu_addr(ring, ring->rptr_offs); 2891bb76ff1Sjsg 2901bb76ff1Sjsg ring->wptr_gpu_addr = 2911bb76ff1Sjsg amdgpu_ring_get_gpu_addr(ring, ring->wptr_offs); 2921bb76ff1Sjsg ring->wptr_cpu_addr = 2931bb76ff1Sjsg amdgpu_ring_get_cpu_addr(ring, ring->wptr_offs); 2941bb76ff1Sjsg 2951bb76ff1Sjsg ring->trail_fence_gpu_addr = 2961bb76ff1Sjsg amdgpu_ring_get_gpu_addr(ring, ring->trail_fence_offs); 2971bb76ff1Sjsg ring->trail_fence_cpu_addr = 2981bb76ff1Sjsg amdgpu_ring_get_cpu_addr(ring, ring->trail_fence_offs); 2991bb76ff1Sjsg 3001bb76ff1Sjsg ring->cond_exe_gpu_addr = 3011bb76ff1Sjsg amdgpu_ring_get_gpu_addr(ring, ring->cond_exe_offs); 3021bb76ff1Sjsg ring->cond_exe_cpu_addr = 3031bb76ff1Sjsg amdgpu_ring_get_cpu_addr(ring, ring->cond_exe_offs); 3041bb76ff1Sjsg 305fb4d8502Sjsg /* always set cond_exec_polling to CONTINUE */ 306fb4d8502Sjsg *ring->cond_exe_cpu_addr = 1; 307fb4d8502Sjsg 308fb4d8502Sjsg r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); 309fb4d8502Sjsg if (r) { 310fb4d8502Sjsg dev_err(adev->dev, "failed initializing fences (%d).\n", r); 311fb4d8502Sjsg return r; 312fb4d8502Sjsg } 313fb4d8502Sjsg 314f005ef32Sjsg max_ibs_dw = ring->funcs->emit_frame_size + 315f005ef32Sjsg amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size; 316f005ef32Sjsg max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask; 317f005ef32Sjsg 318f005ef32Sjsg if (WARN_ON(max_ibs_dw > max_dw)) 319f005ef32Sjsg max_dw = max_ibs_dw; 320f005ef32Sjsg 321fb4d8502Sjsg ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); 322fb4d8502Sjsg 323fb4d8502Sjsg ring->buf_mask = (ring->ring_size / 4) - 1; 324fb4d8502Sjsg ring->ptr_mask = ring->funcs->support_64bit_ptrs ? 325fb4d8502Sjsg 0xffffffffffffffff : ring->buf_mask; 3261bb76ff1Sjsg 327fb4d8502Sjsg /* Allocate ring buffer */ 3281bb76ff1Sjsg if (ring->is_mes_queue) { 3291bb76ff1Sjsg int offset = 0; 3301bb76ff1Sjsg 3311bb76ff1Sjsg BUG_ON(ring->ring_size > PAGE_SIZE*4); 3321bb76ff1Sjsg 3331bb76ff1Sjsg offset = amdgpu_mes_ctx_get_offs(ring, 3341bb76ff1Sjsg AMDGPU_MES_CTX_RING_OFFS); 3351bb76ff1Sjsg ring->gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 3361bb76ff1Sjsg ring->ring = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 3371bb76ff1Sjsg amdgpu_ring_clear_ring(ring); 3381bb76ff1Sjsg 3391bb76ff1Sjsg } else if (ring->ring_obj == NULL) { 340fb4d8502Sjsg r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, 341fb4d8502Sjsg AMDGPU_GEM_DOMAIN_GTT, 342fb4d8502Sjsg &ring->ring_obj, 343fb4d8502Sjsg &ring->gpu_addr, 344fb4d8502Sjsg (void **)&ring->ring); 345fb4d8502Sjsg if (r) { 346fb4d8502Sjsg dev_err(adev->dev, "(%d) ring create failed\n", r); 347fb4d8502Sjsg return r; 348fb4d8502Sjsg } 349fb4d8502Sjsg amdgpu_ring_clear_ring(ring); 350fb4d8502Sjsg } 351fb4d8502Sjsg 352fb4d8502Sjsg ring->max_dw = max_dw; 3535ca02815Sjsg ring->hw_prio = hw_prio; 354fb4d8502Sjsg 355*ebd6deadSjsg if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) { 356ad8b1aafSjsg hw_ip = ring->funcs->type; 357ad8b1aafSjsg num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds; 358ad8b1aafSjsg adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] = 359ad8b1aafSjsg &ring->sched; 360ad8b1aafSjsg } 361ad8b1aafSjsg 362fb4d8502Sjsg return 0; 363fb4d8502Sjsg } 364fb4d8502Sjsg 365fb4d8502Sjsg /** 366fb4d8502Sjsg * amdgpu_ring_fini - tear down the driver ring struct. 367fb4d8502Sjsg * 368fb4d8502Sjsg * @ring: amdgpu_ring structure holding ring information 369fb4d8502Sjsg * 370fb4d8502Sjsg * Tear down the driver information for the selected ring (all asics). 371fb4d8502Sjsg */ 372fb4d8502Sjsg void amdgpu_ring_fini(struct amdgpu_ring *ring) 373fb4d8502Sjsg { 374fb4d8502Sjsg 375fb4d8502Sjsg /* Not to finish a ring which is not initialized */ 3761bb76ff1Sjsg if (!(ring->adev) || 3771bb76ff1Sjsg (!ring->is_mes_queue && !(ring->adev->rings[ring->idx]))) 378fb4d8502Sjsg return; 379fb4d8502Sjsg 380c349dbc7Sjsg ring->sched.ready = false; 381c349dbc7Sjsg 3821bb76ff1Sjsg if (!ring->is_mes_queue) { 383fb4d8502Sjsg amdgpu_device_wb_free(ring->adev, ring->rptr_offs); 384fb4d8502Sjsg amdgpu_device_wb_free(ring->adev, ring->wptr_offs); 385fb4d8502Sjsg 386fb4d8502Sjsg amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); 387fb4d8502Sjsg amdgpu_device_wb_free(ring->adev, ring->fence_offs); 388fb4d8502Sjsg 389fb4d8502Sjsg amdgpu_bo_free_kernel(&ring->ring_obj, 390fb4d8502Sjsg &ring->gpu_addr, 391fb4d8502Sjsg (void **)&ring->ring); 39241aac9c5Sjsg } else { 39341aac9c5Sjsg kfree(ring->fence_drv.fences); 3941bb76ff1Sjsg } 395fb4d8502Sjsg 396fb4d8502Sjsg dma_fence_put(ring->vmid_wait); 397fb4d8502Sjsg ring->vmid_wait = NULL; 398fb4d8502Sjsg ring->me = 0; 399fb4d8502Sjsg 4001bb76ff1Sjsg if (!ring->is_mes_queue) 401fb4d8502Sjsg ring->adev->rings[ring->idx] = NULL; 402fb4d8502Sjsg } 403fb4d8502Sjsg 404fb4d8502Sjsg /** 405fb4d8502Sjsg * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper 406fb4d8502Sjsg * 4075ca02815Sjsg * @ring: ring to write to 408fb4d8502Sjsg * @reg0: register to write 409fb4d8502Sjsg * @reg1: register to wait on 410fb4d8502Sjsg * @ref: reference value to write/wait on 411fb4d8502Sjsg * @mask: mask to wait on 412fb4d8502Sjsg * 413fb4d8502Sjsg * Helper for rings that don't support write and wait in a 414fb4d8502Sjsg * single oneshot packet. 415fb4d8502Sjsg */ 416fb4d8502Sjsg void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, 417fb4d8502Sjsg uint32_t reg0, uint32_t reg1, 418fb4d8502Sjsg uint32_t ref, uint32_t mask) 419fb4d8502Sjsg { 420fb4d8502Sjsg amdgpu_ring_emit_wreg(ring, reg0, ref); 421fb4d8502Sjsg amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 422fb4d8502Sjsg } 423fb4d8502Sjsg 424c349dbc7Sjsg /** 425c349dbc7Sjsg * amdgpu_ring_soft_recovery - try to soft recover a ring lockup 426c349dbc7Sjsg * 427c349dbc7Sjsg * @ring: ring to try the recovery on 428c349dbc7Sjsg * @vmid: VMID we try to get going again 429c349dbc7Sjsg * @fence: timedout fence 430c349dbc7Sjsg * 431c349dbc7Sjsg * Tries to get a ring proceeding again when it is stuck. 432c349dbc7Sjsg */ 433c349dbc7Sjsg bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, 434c349dbc7Sjsg struct dma_fence *fence) 435c349dbc7Sjsg { 436f005ef32Sjsg unsigned long flags; 437f005ef32Sjsg 438c349dbc7Sjsg ktime_t deadline = ktime_add_us(ktime_get(), 10000); 439c349dbc7Sjsg 440c349dbc7Sjsg if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) 441c349dbc7Sjsg return false; 442c349dbc7Sjsg 443f005ef32Sjsg spin_lock_irqsave(fence->lock, flags); 444f005ef32Sjsg if (!dma_fence_is_signaled_locked(fence)) 445f005ef32Sjsg dma_fence_set_error(fence, -ENODATA); 446f005ef32Sjsg spin_unlock_irqrestore(fence->lock, flags); 447f005ef32Sjsg 448c349dbc7Sjsg atomic_inc(&ring->adev->gpu_reset_counter); 449c349dbc7Sjsg while (!dma_fence_is_signaled(fence) && 450c349dbc7Sjsg ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) 451c349dbc7Sjsg ring->funcs->soft_recovery(ring, vmid); 452c349dbc7Sjsg 453c349dbc7Sjsg return dma_fence_is_signaled(fence); 454c349dbc7Sjsg } 455c349dbc7Sjsg 456fb4d8502Sjsg /* 457fb4d8502Sjsg * Debugfs info 458fb4d8502Sjsg */ 459fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS) 460fb4d8502Sjsg 461fb4d8502Sjsg /* Layout of file is 12 bytes consisting of 462fb4d8502Sjsg * - rptr 463fb4d8502Sjsg * - wptr 464fb4d8502Sjsg * - driver's copy of wptr 465fb4d8502Sjsg * 466fb4d8502Sjsg * followed by n-words of ring data 467fb4d8502Sjsg */ 468fb4d8502Sjsg static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, 469fb4d8502Sjsg size_t size, loff_t *pos) 470fb4d8502Sjsg { 471fb4d8502Sjsg struct amdgpu_ring *ring = file_inode(f)->i_private; 472fb4d8502Sjsg uint32_t value, result, early[3]; 473fbc99a0fSjsg loff_t i; 474fbc99a0fSjsg int r; 475fb4d8502Sjsg 476fb4d8502Sjsg if (*pos & 3 || size & 3) 477fb4d8502Sjsg return -EINVAL; 478fb4d8502Sjsg 479fb4d8502Sjsg result = 0; 480fb4d8502Sjsg 481fb4d8502Sjsg if (*pos < 12) { 482fb4d8502Sjsg early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; 483fb4d8502Sjsg early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; 484fb4d8502Sjsg early[2] = ring->wptr & ring->buf_mask; 485fb4d8502Sjsg for (i = *pos / 4; i < 3 && size; i++) { 486fb4d8502Sjsg r = put_user(early[i], (uint32_t *)buf); 487fb4d8502Sjsg if (r) 488fb4d8502Sjsg return r; 489fb4d8502Sjsg buf += 4; 490fb4d8502Sjsg result += 4; 491fb4d8502Sjsg size -= 4; 492fb4d8502Sjsg *pos += 4; 493fb4d8502Sjsg } 494fb4d8502Sjsg } 495fb4d8502Sjsg 496fb4d8502Sjsg while (size) { 497fb4d8502Sjsg if (*pos >= (ring->ring_size + 12)) 498fb4d8502Sjsg return result; 499fb4d8502Sjsg 500fb4d8502Sjsg value = ring->ring[(*pos - 12)/4]; 501fb4d8502Sjsg r = put_user(value, (uint32_t *)buf); 502fb4d8502Sjsg if (r) 503fb4d8502Sjsg return r; 504fb4d8502Sjsg buf += 4; 505fb4d8502Sjsg result += 4; 506fb4d8502Sjsg size -= 4; 507fb4d8502Sjsg *pos += 4; 508fb4d8502Sjsg } 509fb4d8502Sjsg 510fb4d8502Sjsg return result; 511fb4d8502Sjsg } 512fb4d8502Sjsg 513fb4d8502Sjsg static const struct file_operations amdgpu_debugfs_ring_fops = { 514fb4d8502Sjsg .owner = THIS_MODULE, 515fb4d8502Sjsg .read = amdgpu_debugfs_ring_read, 516fb4d8502Sjsg .llseek = default_llseek 517fb4d8502Sjsg }; 518fb4d8502Sjsg 519f005ef32Sjsg static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, 520f005ef32Sjsg size_t size, loff_t *pos) 521f005ef32Sjsg { 522f005ef32Sjsg struct amdgpu_ring *ring = file_inode(f)->i_private; 523f005ef32Sjsg volatile u32 *mqd; 5240f16dbb9Sjsg u32 *kbuf; 5250f16dbb9Sjsg int r, i; 526f005ef32Sjsg uint32_t value, result; 527f005ef32Sjsg 528f005ef32Sjsg if (*pos & 3 || size & 3) 529f005ef32Sjsg return -EINVAL; 530f005ef32Sjsg 5310f16dbb9Sjsg kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); 5320f16dbb9Sjsg if (!kbuf) 5330f16dbb9Sjsg return -ENOMEM; 534f005ef32Sjsg 535f005ef32Sjsg r = amdgpu_bo_reserve(ring->mqd_obj, false); 536f005ef32Sjsg if (unlikely(r != 0)) 5370f16dbb9Sjsg goto err_free; 538f005ef32Sjsg 539f005ef32Sjsg r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); 5400f16dbb9Sjsg if (r) 5410f16dbb9Sjsg goto err_unreserve; 542f005ef32Sjsg 5430f16dbb9Sjsg /* 5440f16dbb9Sjsg * Copy to local buffer to avoid put_user(), which might fault 5450f16dbb9Sjsg * and acquire mmap_sem, under reservation_ww_class_mutex. 5460f16dbb9Sjsg */ 5470f16dbb9Sjsg for (i = 0; i < ring->mqd_size/sizeof(u32); i++) 5480f16dbb9Sjsg kbuf[i] = mqd[i]; 5490f16dbb9Sjsg 5500f16dbb9Sjsg amdgpu_bo_kunmap(ring->mqd_obj); 5510f16dbb9Sjsg amdgpu_bo_unreserve(ring->mqd_obj); 5520f16dbb9Sjsg 5530f16dbb9Sjsg result = 0; 554f005ef32Sjsg while (size) { 555f005ef32Sjsg if (*pos >= ring->mqd_size) 5560f16dbb9Sjsg break; 557f005ef32Sjsg 5580f16dbb9Sjsg value = kbuf[*pos/4]; 559f005ef32Sjsg r = put_user(value, (uint32_t *)buf); 560f005ef32Sjsg if (r) 5610f16dbb9Sjsg goto err_free; 562f005ef32Sjsg buf += 4; 563f005ef32Sjsg result += 4; 564f005ef32Sjsg size -= 4; 565f005ef32Sjsg *pos += 4; 566f005ef32Sjsg } 567f005ef32Sjsg 5680f16dbb9Sjsg kfree(kbuf); 569f005ef32Sjsg return result; 5700f16dbb9Sjsg 5710f16dbb9Sjsg err_unreserve: 5720f16dbb9Sjsg amdgpu_bo_unreserve(ring->mqd_obj); 5730f16dbb9Sjsg err_free: 5740f16dbb9Sjsg kfree(kbuf); 5750f16dbb9Sjsg return r; 576f005ef32Sjsg } 577f005ef32Sjsg 578f005ef32Sjsg static const struct file_operations amdgpu_debugfs_mqd_fops = { 579f005ef32Sjsg .owner = THIS_MODULE, 580f005ef32Sjsg .read = amdgpu_debugfs_mqd_read, 581f005ef32Sjsg .llseek = default_llseek 582f005ef32Sjsg }; 583f005ef32Sjsg 584f005ef32Sjsg static int amdgpu_debugfs_ring_error(void *data, u64 val) 585f005ef32Sjsg { 586f005ef32Sjsg struct amdgpu_ring *ring = data; 587f005ef32Sjsg 588f005ef32Sjsg amdgpu_fence_driver_set_error(ring, val); 589f005ef32Sjsg return 0; 590f005ef32Sjsg } 591f005ef32Sjsg 592f005ef32Sjsg DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(amdgpu_debugfs_error_fops, NULL, 593f005ef32Sjsg amdgpu_debugfs_ring_error, "%lld\n"); 594f005ef32Sjsg 595fb4d8502Sjsg #endif 596fb4d8502Sjsg 5971bb76ff1Sjsg void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, 598fb4d8502Sjsg struct amdgpu_ring *ring) 599fb4d8502Sjsg { 600fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS) 601ad8b1aafSjsg struct drm_minor *minor = adev_to_drm(adev)->primary; 6021bb76ff1Sjsg struct dentry *root = minor->debugfs_root; 603fb4d8502Sjsg char name[32]; 604fb4d8502Sjsg 605c349dbc7Sjsg sprintf(name, "amdgpu_ring_%s", ring->name); 606f005ef32Sjsg debugfs_create_file_size(name, S_IFREG | 0444, root, ring, 6071bb76ff1Sjsg &amdgpu_debugfs_ring_fops, 6081bb76ff1Sjsg ring->ring_size + 12); 609fb4d8502Sjsg 610f005ef32Sjsg if (ring->mqd_obj) { 611f005ef32Sjsg sprintf(name, "amdgpu_mqd_%s", ring->name); 612f005ef32Sjsg debugfs_create_file_size(name, S_IFREG | 0444, root, ring, 613f005ef32Sjsg &amdgpu_debugfs_mqd_fops, 614f005ef32Sjsg ring->mqd_size); 615f005ef32Sjsg } 616f005ef32Sjsg 617f005ef32Sjsg sprintf(name, "amdgpu_error_%s", ring->name); 618f005ef32Sjsg debugfs_create_file(name, 0200, root, ring, 619f005ef32Sjsg &amdgpu_debugfs_error_fops); 620f005ef32Sjsg 621fb4d8502Sjsg #endif 622fb4d8502Sjsg } 623fb4d8502Sjsg 624c349dbc7Sjsg /** 625c349dbc7Sjsg * amdgpu_ring_test_helper - tests ring and set sched readiness status 626c349dbc7Sjsg * 627c349dbc7Sjsg * @ring: ring to try the recovery on 628c349dbc7Sjsg * 629c349dbc7Sjsg * Tests ring and set sched readiness status 630c349dbc7Sjsg * 631c349dbc7Sjsg * Returns 0 on success, error on failure. 632c349dbc7Sjsg */ 633c349dbc7Sjsg int amdgpu_ring_test_helper(struct amdgpu_ring *ring) 634fb4d8502Sjsg { 635c349dbc7Sjsg struct amdgpu_device *adev = ring->adev; 636c349dbc7Sjsg int r; 637c349dbc7Sjsg 638c349dbc7Sjsg r = amdgpu_ring_test_ring(ring); 639c349dbc7Sjsg if (r) 640c349dbc7Sjsg DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n", 641c349dbc7Sjsg ring->name, r); 642c349dbc7Sjsg else 643c349dbc7Sjsg DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n", 644c349dbc7Sjsg ring->name); 645c349dbc7Sjsg 646c349dbc7Sjsg ring->sched.ready = !r; 647c349dbc7Sjsg return r; 648fb4d8502Sjsg } 6491bb76ff1Sjsg 6501bb76ff1Sjsg static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, 6511bb76ff1Sjsg struct amdgpu_mqd_prop *prop) 6521bb76ff1Sjsg { 6531bb76ff1Sjsg struct amdgpu_device *adev = ring->adev; 6541bb76ff1Sjsg 6551bb76ff1Sjsg memset(prop, 0, sizeof(*prop)); 6561bb76ff1Sjsg 6571bb76ff1Sjsg prop->mqd_gpu_addr = ring->mqd_gpu_addr; 6581bb76ff1Sjsg prop->hqd_base_gpu_addr = ring->gpu_addr; 6591bb76ff1Sjsg prop->rptr_gpu_addr = ring->rptr_gpu_addr; 6601bb76ff1Sjsg prop->wptr_gpu_addr = ring->wptr_gpu_addr; 6611bb76ff1Sjsg prop->queue_size = ring->ring_size; 6621bb76ff1Sjsg prop->eop_gpu_addr = ring->eop_gpu_addr; 6631bb76ff1Sjsg prop->use_doorbell = ring->use_doorbell; 6641bb76ff1Sjsg prop->doorbell_index = ring->doorbell_index; 6651bb76ff1Sjsg 6661bb76ff1Sjsg /* map_queues packet doesn't need activate the queue, 6671bb76ff1Sjsg * so only kiq need set this field. 6681bb76ff1Sjsg */ 6691bb76ff1Sjsg prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; 6701bb76ff1Sjsg 6711bb76ff1Sjsg if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && 6721bb76ff1Sjsg amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || 6731bb76ff1Sjsg (ring->funcs->type == AMDGPU_RING_TYPE_GFX && 6741bb76ff1Sjsg amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { 6751bb76ff1Sjsg prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 6761bb76ff1Sjsg prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 6771bb76ff1Sjsg } 6781bb76ff1Sjsg } 6791bb76ff1Sjsg 6801bb76ff1Sjsg int amdgpu_ring_init_mqd(struct amdgpu_ring *ring) 6811bb76ff1Sjsg { 6821bb76ff1Sjsg struct amdgpu_device *adev = ring->adev; 6831bb76ff1Sjsg struct amdgpu_mqd *mqd_mgr; 6841bb76ff1Sjsg struct amdgpu_mqd_prop prop; 6851bb76ff1Sjsg 6861bb76ff1Sjsg amdgpu_ring_to_mqd_prop(ring, &prop); 6871bb76ff1Sjsg 6881bb76ff1Sjsg ring->wptr = 0; 6891bb76ff1Sjsg 6901bb76ff1Sjsg if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 6911bb76ff1Sjsg mqd_mgr = &adev->mqds[AMDGPU_HW_IP_COMPUTE]; 6921bb76ff1Sjsg else 6931bb76ff1Sjsg mqd_mgr = &adev->mqds[ring->funcs->type]; 6941bb76ff1Sjsg 6951bb76ff1Sjsg return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop); 6961bb76ff1Sjsg } 697f005ef32Sjsg 698f005ef32Sjsg void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) 699f005ef32Sjsg { 700f005ef32Sjsg if (ring->is_sw_ring) 701f005ef32Sjsg amdgpu_sw_ring_ib_begin(ring); 702f005ef32Sjsg } 703f005ef32Sjsg 704f005ef32Sjsg void amdgpu_ring_ib_end(struct amdgpu_ring *ring) 705f005ef32Sjsg { 706f005ef32Sjsg if (ring->is_sw_ring) 707f005ef32Sjsg amdgpu_sw_ring_ib_end(ring); 708f005ef32Sjsg } 709f005ef32Sjsg 710f005ef32Sjsg void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring) 711f005ef32Sjsg { 712f005ef32Sjsg if (ring->is_sw_ring) 713f005ef32Sjsg amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL); 714f005ef32Sjsg } 715f005ef32Sjsg 716f005ef32Sjsg void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring) 717f005ef32Sjsg { 718f005ef32Sjsg if (ring->is_sw_ring) 719f005ef32Sjsg amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE); 720f005ef32Sjsg } 721f005ef32Sjsg 722f005ef32Sjsg void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) 723f005ef32Sjsg { 724f005ef32Sjsg if (ring->is_sw_ring) 725f005ef32Sjsg amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); 726f005ef32Sjsg } 727