11bb76ff1Sjsg // SPDX-License-Identifier: GPL-2.0 OR MIT 2fb4d8502Sjsg /* 31bb76ff1Sjsg * Copyright 2014-2022 Advanced Micro Devices, Inc. 4fb4d8502Sjsg * 5fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 6fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 7fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 8fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 10fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 11fb4d8502Sjsg * 12fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 13fb4d8502Sjsg * all copies or substantial portions of the Software. 14fb4d8502Sjsg * 15fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 22fb4d8502Sjsg * 23fb4d8502Sjsg */ 24fb4d8502Sjsg 25fb4d8502Sjsg #include <linux/ratelimit.h> 26fb4d8502Sjsg #include <linux/printk.h> 27fb4d8502Sjsg #include <linux/slab.h> 28fb4d8502Sjsg #include <linux/list.h> 29fb4d8502Sjsg #include <linux/types.h> 30fb4d8502Sjsg #include <linux/bitops.h> 31fb4d8502Sjsg #include <linux/sched.h> 32fb4d8502Sjsg #include "kfd_priv.h" 33fb4d8502Sjsg #include "kfd_device_queue_manager.h" 34fb4d8502Sjsg #include "kfd_mqd_manager.h" 35fb4d8502Sjsg #include "cik_regs.h" 36fb4d8502Sjsg #include "kfd_kernel_queue.h" 37c349dbc7Sjsg #include "amdgpu_amdkfd.h" 381bb76ff1Sjsg #include "mes_api_def.h" 39f005ef32Sjsg #include "kfd_debug.h" 40fb4d8502Sjsg 41fb4d8502Sjsg /* Size of the per-pipe EOP queue */ 42fb4d8502Sjsg #define CIK_HPD_EOP_BYTES_LOG2 11 43fb4d8502Sjsg #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 44fb4d8502Sjsg 45fb4d8502Sjsg static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 46ad8b1aafSjsg u32 pasid, unsigned int vmid); 47fb4d8502Sjsg 48fb4d8502Sjsg static int execute_queues_cpsch(struct device_queue_manager *dqm, 49fb4d8502Sjsg enum kfd_unmap_queues_filter filter, 50f005ef32Sjsg uint32_t filter_param, 51f005ef32Sjsg uint32_t grace_period); 52fb4d8502Sjsg static int unmap_queues_cpsch(struct device_queue_manager *dqm, 53fb4d8502Sjsg enum kfd_unmap_queues_filter filter, 54f005ef32Sjsg uint32_t filter_param, 55f005ef32Sjsg uint32_t grace_period, 56f005ef32Sjsg bool reset); 57fb4d8502Sjsg 58fb4d8502Sjsg static int map_queues_cpsch(struct device_queue_manager *dqm); 59fb4d8502Sjsg 60fb4d8502Sjsg static void deallocate_sdma_queue(struct device_queue_manager *dqm, 61c349dbc7Sjsg struct queue *q); 62fb4d8502Sjsg 63c349dbc7Sjsg static inline void deallocate_hqd(struct device_queue_manager *dqm, 64c349dbc7Sjsg struct queue *q); 65c349dbc7Sjsg static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 66c349dbc7Sjsg static int allocate_sdma_queue(struct device_queue_manager *dqm, 671bb76ff1Sjsg struct queue *q, const uint32_t *restore_sdma_id); 68fb4d8502Sjsg static void kfd_process_hw_exception(struct work_struct *work); 69fb4d8502Sjsg 70fb4d8502Sjsg static inline 71fb4d8502Sjsg enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 72fb4d8502Sjsg { 73c349dbc7Sjsg if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 74fb4d8502Sjsg return KFD_MQD_TYPE_SDMA; 75fb4d8502Sjsg return KFD_MQD_TYPE_CP; 76fb4d8502Sjsg } 77fb4d8502Sjsg 78fb4d8502Sjsg static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 79fb4d8502Sjsg { 80fb4d8502Sjsg int i; 81f005ef32Sjsg int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 82f005ef32Sjsg + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 83fb4d8502Sjsg 84fb4d8502Sjsg /* queue is available for KFD usage if bit is 1 */ 85f005ef32Sjsg for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 86fb4d8502Sjsg if (test_bit(pipe_offset + i, 87f005ef32Sjsg dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 88fb4d8502Sjsg return true; 89fb4d8502Sjsg return false; 90fb4d8502Sjsg } 91fb4d8502Sjsg 92c349dbc7Sjsg unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 93fb4d8502Sjsg { 94f005ef32Sjsg return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 95fb4d8502Sjsg KGD_MAX_QUEUES); 96fb4d8502Sjsg } 97fb4d8502Sjsg 98fb4d8502Sjsg unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 99fb4d8502Sjsg { 100f005ef32Sjsg return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 101fb4d8502Sjsg } 102fb4d8502Sjsg 103fb4d8502Sjsg unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 104fb4d8502Sjsg { 105f005ef32Sjsg return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 106fb4d8502Sjsg } 107fb4d8502Sjsg 108c349dbc7Sjsg static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 109c349dbc7Sjsg { 1101bb76ff1Sjsg return kfd_get_num_sdma_engines(dqm->dev) + 1111bb76ff1Sjsg kfd_get_num_xgmi_sdma_engines(dqm->dev); 112c349dbc7Sjsg } 113c349dbc7Sjsg 114fb4d8502Sjsg unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 115fb4d8502Sjsg { 1161bb76ff1Sjsg return kfd_get_num_sdma_engines(dqm->dev) * 117f005ef32Sjsg dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 118c349dbc7Sjsg } 119c349dbc7Sjsg 120c349dbc7Sjsg unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 121c349dbc7Sjsg { 1221bb76ff1Sjsg return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 123f005ef32Sjsg dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 1241bb76ff1Sjsg } 1251bb76ff1Sjsg 126f005ef32Sjsg static void init_sdma_bitmaps(struct device_queue_manager *dqm) 1271bb76ff1Sjsg { 128f005ef32Sjsg bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 129f005ef32Sjsg bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 130f005ef32Sjsg 131f005ef32Sjsg bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 132f005ef32Sjsg bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 133f005ef32Sjsg 134f005ef32Sjsg /* Mask out the reserved queues */ 135f005ef32Sjsg bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 136f005ef32Sjsg dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 137f005ef32Sjsg KFD_MAX_SDMA_QUEUES); 138fb4d8502Sjsg } 139fb4d8502Sjsg 140fb4d8502Sjsg void program_sh_mem_settings(struct device_queue_manager *dqm, 141fb4d8502Sjsg struct qcm_process_device *qpd) 142fb4d8502Sjsg { 143f005ef32Sjsg uint32_t xcc_mask = dqm->dev->xcc_mask; 144f005ef32Sjsg int xcc_id; 145f005ef32Sjsg 146f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) 147f005ef32Sjsg dqm->dev->kfd2kgd->program_sh_mem_settings( 148f005ef32Sjsg dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 149f005ef32Sjsg qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 150f005ef32Sjsg qpd->sh_mem_bases, xcc_id); 151fb4d8502Sjsg } 152fb4d8502Sjsg 1531bb76ff1Sjsg static void kfd_hws_hang(struct device_queue_manager *dqm) 1541bb76ff1Sjsg { 1551bb76ff1Sjsg /* 1561bb76ff1Sjsg * Issue a GPU reset if HWS is unresponsive 1571bb76ff1Sjsg */ 1581bb76ff1Sjsg dqm->is_hws_hang = true; 1591bb76ff1Sjsg 1601bb76ff1Sjsg /* It's possible we're detecting a HWS hang in the 1611bb76ff1Sjsg * middle of a GPU reset. No need to schedule another 1621bb76ff1Sjsg * reset in this case. 1631bb76ff1Sjsg */ 1641bb76ff1Sjsg if (!dqm->is_resetting) 1651bb76ff1Sjsg schedule_work(&dqm->hw_exception_work); 1661bb76ff1Sjsg } 1671bb76ff1Sjsg 1681bb76ff1Sjsg static int convert_to_mes_queue_type(int queue_type) 1691bb76ff1Sjsg { 1701bb76ff1Sjsg int mes_queue_type; 1711bb76ff1Sjsg 1721bb76ff1Sjsg switch (queue_type) { 1731bb76ff1Sjsg case KFD_QUEUE_TYPE_COMPUTE: 1741bb76ff1Sjsg mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 1751bb76ff1Sjsg break; 1761bb76ff1Sjsg case KFD_QUEUE_TYPE_SDMA: 1771bb76ff1Sjsg mes_queue_type = MES_QUEUE_TYPE_SDMA; 1781bb76ff1Sjsg break; 1791bb76ff1Sjsg default: 1801bb76ff1Sjsg WARN(1, "Invalid queue type %d", queue_type); 1811bb76ff1Sjsg mes_queue_type = -EINVAL; 1821bb76ff1Sjsg break; 1831bb76ff1Sjsg } 1841bb76ff1Sjsg 1851bb76ff1Sjsg return mes_queue_type; 1861bb76ff1Sjsg } 1871bb76ff1Sjsg 1881bb76ff1Sjsg static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 1891bb76ff1Sjsg struct qcm_process_device *qpd) 1901bb76ff1Sjsg { 1911bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 1921bb76ff1Sjsg struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1931bb76ff1Sjsg struct mes_add_queue_input queue_input; 1941bb76ff1Sjsg int r, queue_type; 1951bb76ff1Sjsg uint64_t wptr_addr_off; 1961bb76ff1Sjsg 1971bb76ff1Sjsg if (dqm->is_hws_hang) 1981bb76ff1Sjsg return -EIO; 1991bb76ff1Sjsg 2001bb76ff1Sjsg memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 2011bb76ff1Sjsg queue_input.process_id = qpd->pqm->process->pasid; 2021bb76ff1Sjsg queue_input.page_table_base_addr = qpd->page_table_base; 2031bb76ff1Sjsg queue_input.process_va_start = 0; 2041bb76ff1Sjsg queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 2051bb76ff1Sjsg /* MES unit for quantum is 100ns */ 2061bb76ff1Sjsg queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 2071bb76ff1Sjsg queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 2081bb76ff1Sjsg queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 2091bb76ff1Sjsg queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 2101bb76ff1Sjsg queue_input.inprocess_gang_priority = q->properties.priority; 2111bb76ff1Sjsg queue_input.gang_global_priority_level = 2121bb76ff1Sjsg AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 2131bb76ff1Sjsg queue_input.doorbell_offset = q->properties.doorbell_off; 2141bb76ff1Sjsg queue_input.mqd_addr = q->gart_mqd_addr; 2151bb76ff1Sjsg queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 2161bb76ff1Sjsg 2171bb76ff1Sjsg if (q->wptr_bo) { 218486281faSjsg wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 219de35ea82Sjsg queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off; 2201bb76ff1Sjsg } 2211bb76ff1Sjsg 2221bb76ff1Sjsg queue_input.is_kfd_process = 1; 2231bb76ff1Sjsg queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 2241bb76ff1Sjsg queue_input.queue_size = q->properties.queue_size >> 2; 2251bb76ff1Sjsg 2261bb76ff1Sjsg queue_input.paging = false; 2271bb76ff1Sjsg queue_input.tba_addr = qpd->tba_addr; 2281bb76ff1Sjsg queue_input.tma_addr = qpd->tma_addr; 229f005ef32Sjsg queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 230f005ef32Sjsg queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled || 231f005ef32Sjsg kfd_dbg_has_ttmps_always_setup(q->device); 2321bb76ff1Sjsg 2331bb76ff1Sjsg queue_type = convert_to_mes_queue_type(q->properties.type); 2341bb76ff1Sjsg if (queue_type < 0) { 2351bb76ff1Sjsg pr_err("Queue type not supported with MES, queue:%d\n", 2361bb76ff1Sjsg q->properties.type); 2371bb76ff1Sjsg return -EINVAL; 2381bb76ff1Sjsg } 2391bb76ff1Sjsg queue_input.queue_type = (uint32_t)queue_type; 2401bb76ff1Sjsg 241f005ef32Sjsg queue_input.exclusively_scheduled = q->properties.is_gws; 2421bb76ff1Sjsg 2431bb76ff1Sjsg amdgpu_mes_lock(&adev->mes); 2441bb76ff1Sjsg r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 2451bb76ff1Sjsg amdgpu_mes_unlock(&adev->mes); 2461bb76ff1Sjsg if (r) { 2471bb76ff1Sjsg pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 2481bb76ff1Sjsg q->properties.doorbell_off); 2491bb76ff1Sjsg pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 2501bb76ff1Sjsg kfd_hws_hang(dqm); 2511bb76ff1Sjsg } 2521bb76ff1Sjsg 2531bb76ff1Sjsg return r; 2541bb76ff1Sjsg } 2551bb76ff1Sjsg 2561bb76ff1Sjsg static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 2571bb76ff1Sjsg struct qcm_process_device *qpd) 2581bb76ff1Sjsg { 2591bb76ff1Sjsg struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 2601bb76ff1Sjsg int r; 2611bb76ff1Sjsg struct mes_remove_queue_input queue_input; 2621bb76ff1Sjsg 2631bb76ff1Sjsg if (dqm->is_hws_hang) 2641bb76ff1Sjsg return -EIO; 2651bb76ff1Sjsg 2661bb76ff1Sjsg memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 2671bb76ff1Sjsg queue_input.doorbell_offset = q->properties.doorbell_off; 2681bb76ff1Sjsg queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 2691bb76ff1Sjsg 2701bb76ff1Sjsg amdgpu_mes_lock(&adev->mes); 2711bb76ff1Sjsg r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 2721bb76ff1Sjsg amdgpu_mes_unlock(&adev->mes); 2731bb76ff1Sjsg 2741bb76ff1Sjsg if (r) { 2751bb76ff1Sjsg pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 2761bb76ff1Sjsg q->properties.doorbell_off); 2771bb76ff1Sjsg pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 2781bb76ff1Sjsg kfd_hws_hang(dqm); 2791bb76ff1Sjsg } 2801bb76ff1Sjsg 2811bb76ff1Sjsg return r; 2821bb76ff1Sjsg } 2831bb76ff1Sjsg 2841bb76ff1Sjsg static int remove_all_queues_mes(struct device_queue_manager *dqm) 2851bb76ff1Sjsg { 2861bb76ff1Sjsg struct device_process_node *cur; 2871bb76ff1Sjsg struct qcm_process_device *qpd; 2881bb76ff1Sjsg struct queue *q; 2891bb76ff1Sjsg int retval = 0; 2901bb76ff1Sjsg 2911bb76ff1Sjsg list_for_each_entry(cur, &dqm->queues, list) { 2921bb76ff1Sjsg qpd = cur->qpd; 2931bb76ff1Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2941bb76ff1Sjsg if (q->properties.is_active) { 2951bb76ff1Sjsg retval = remove_queue_mes(dqm, q, qpd); 2961bb76ff1Sjsg if (retval) { 2971bb76ff1Sjsg pr_err("%s: Failed to remove queue %d for dev %d", 2981bb76ff1Sjsg __func__, 2991bb76ff1Sjsg q->properties.queue_id, 3001bb76ff1Sjsg dqm->dev->id); 3011bb76ff1Sjsg return retval; 3021bb76ff1Sjsg } 3031bb76ff1Sjsg } 3041bb76ff1Sjsg } 3051bb76ff1Sjsg } 3061bb76ff1Sjsg 3071bb76ff1Sjsg return retval; 3081bb76ff1Sjsg } 3091bb76ff1Sjsg 310ad8b1aafSjsg static void increment_queue_count(struct device_queue_manager *dqm, 3111285848aSjsg struct qcm_process_device *qpd, 3121285848aSjsg struct queue *q) 313c349dbc7Sjsg { 314c349dbc7Sjsg dqm->active_queue_count++; 3151285848aSjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 3161285848aSjsg q->properties.type == KFD_QUEUE_TYPE_DIQ) 317c349dbc7Sjsg dqm->active_cp_queue_count++; 3181285848aSjsg 3191285848aSjsg if (q->properties.is_gws) { 3201285848aSjsg dqm->gws_queue_count++; 3211285848aSjsg qpd->mapped_gws_queue = true; 3221285848aSjsg } 323c349dbc7Sjsg } 324c349dbc7Sjsg 325ad8b1aafSjsg static void decrement_queue_count(struct device_queue_manager *dqm, 3261285848aSjsg struct qcm_process_device *qpd, 3271285848aSjsg struct queue *q) 328c349dbc7Sjsg { 329c349dbc7Sjsg dqm->active_queue_count--; 3301285848aSjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 3311285848aSjsg q->properties.type == KFD_QUEUE_TYPE_DIQ) 332c349dbc7Sjsg dqm->active_cp_queue_count--; 3331285848aSjsg 3341285848aSjsg if (q->properties.is_gws) { 3351285848aSjsg dqm->gws_queue_count--; 3361285848aSjsg qpd->mapped_gws_queue = false; 3371285848aSjsg } 338c349dbc7Sjsg } 339c349dbc7Sjsg 3401bb76ff1Sjsg /* 3411bb76ff1Sjsg * Allocate a doorbell ID to this queue. 3421bb76ff1Sjsg * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 3431bb76ff1Sjsg */ 3441bb76ff1Sjsg static int allocate_doorbell(struct qcm_process_device *qpd, 3451bb76ff1Sjsg struct queue *q, 3461bb76ff1Sjsg uint32_t const *restore_id) 347fb4d8502Sjsg { 348f005ef32Sjsg struct kfd_node *dev = qpd->dqm->dev; 349fb4d8502Sjsg 3501bb76ff1Sjsg if (!KFD_IS_SOC15(dev)) { 351fb4d8502Sjsg /* On pre-SOC15 chips we need to use the queue ID to 352fb4d8502Sjsg * preserve the user mode ABI. 353fb4d8502Sjsg */ 3541bb76ff1Sjsg 3551bb76ff1Sjsg if (restore_id && *restore_id != q->properties.queue_id) 3561bb76ff1Sjsg return -EINVAL; 3571bb76ff1Sjsg 358fb4d8502Sjsg q->doorbell_id = q->properties.queue_id; 359c349dbc7Sjsg } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 360c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 361c349dbc7Sjsg /* For SDMA queues on SOC15 with 8-byte doorbell, use static 362c349dbc7Sjsg * doorbell assignments based on the engine and queue id. 363c349dbc7Sjsg * The doobell index distance between RLC (2*i) and (2*i+1) 364c349dbc7Sjsg * for a SDMA engine is 512. 365fb4d8502Sjsg */ 366c349dbc7Sjsg 367f005ef32Sjsg uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 368f005ef32Sjsg 369f005ef32Sjsg /* 370f005ef32Sjsg * q->properties.sdma_engine_id corresponds to the virtual 371f005ef32Sjsg * sdma engine number. However, for doorbell allocation, 372f005ef32Sjsg * we need the physical sdma engine id in order to get the 373f005ef32Sjsg * correct doorbell offset. 374f005ef32Sjsg */ 375f005ef32Sjsg uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 376f005ef32Sjsg get_num_all_sdma_engines(qpd->dqm) + 377f005ef32Sjsg q->properties.sdma_engine_id] 378c349dbc7Sjsg + (q->properties.sdma_queue_id & 1) 379c349dbc7Sjsg * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 380c349dbc7Sjsg + (q->properties.sdma_queue_id >> 1); 3811bb76ff1Sjsg 3821bb76ff1Sjsg if (restore_id && *restore_id != valid_id) 3831bb76ff1Sjsg return -EINVAL; 3841bb76ff1Sjsg q->doorbell_id = valid_id; 385fb4d8502Sjsg } else { 3861bb76ff1Sjsg /* For CP queues on SOC15 */ 3871bb76ff1Sjsg if (restore_id) { 3881bb76ff1Sjsg /* make sure that ID is free */ 3891bb76ff1Sjsg if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 3901bb76ff1Sjsg return -EINVAL; 3911bb76ff1Sjsg 3921bb76ff1Sjsg q->doorbell_id = *restore_id; 3931bb76ff1Sjsg } else { 3941bb76ff1Sjsg /* or reserve a free doorbell ID */ 395fb4d8502Sjsg unsigned int found; 396fb4d8502Sjsg 397fb4d8502Sjsg found = find_first_zero_bit(qpd->doorbell_bitmap, 398fb4d8502Sjsg KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 399fb4d8502Sjsg if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 400fb4d8502Sjsg pr_debug("No doorbells available"); 401fb4d8502Sjsg return -EBUSY; 402fb4d8502Sjsg } 403fb4d8502Sjsg set_bit(found, qpd->doorbell_bitmap); 404fb4d8502Sjsg q->doorbell_id = found; 405fb4d8502Sjsg } 4061bb76ff1Sjsg } 407fb4d8502Sjsg 408f005ef32Sjsg q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 409f005ef32Sjsg qpd->proc_doorbells, 410f005ef32Sjsg q->doorbell_id, 411f005ef32Sjsg dev->kfd->device_info.doorbell_size); 412fb4d8502Sjsg return 0; 413fb4d8502Sjsg } 414fb4d8502Sjsg 415fb4d8502Sjsg static void deallocate_doorbell(struct qcm_process_device *qpd, 416fb4d8502Sjsg struct queue *q) 417fb4d8502Sjsg { 418fb4d8502Sjsg unsigned int old; 419f005ef32Sjsg struct kfd_node *dev = qpd->dqm->dev; 420fb4d8502Sjsg 4211bb76ff1Sjsg if (!KFD_IS_SOC15(dev) || 422c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA || 423c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 424fb4d8502Sjsg return; 425fb4d8502Sjsg 426fb4d8502Sjsg old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 427fb4d8502Sjsg WARN_ON(!old); 428fb4d8502Sjsg } 429fb4d8502Sjsg 4305ca02815Sjsg static void program_trap_handler_settings(struct device_queue_manager *dqm, 4315ca02815Sjsg struct qcm_process_device *qpd) 4325ca02815Sjsg { 433f005ef32Sjsg uint32_t xcc_mask = dqm->dev->xcc_mask; 434f005ef32Sjsg int xcc_id; 435f005ef32Sjsg 4365ca02815Sjsg if (dqm->dev->kfd2kgd->program_trap_handler_settings) 437f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) 4385ca02815Sjsg dqm->dev->kfd2kgd->program_trap_handler_settings( 439f005ef32Sjsg dqm->dev->adev, qpd->vmid, qpd->tba_addr, 440f005ef32Sjsg qpd->tma_addr, xcc_id); 4415ca02815Sjsg } 4425ca02815Sjsg 443fb4d8502Sjsg static int allocate_vmid(struct device_queue_manager *dqm, 444fb4d8502Sjsg struct qcm_process_device *qpd, 445fb4d8502Sjsg struct queue *q) 446fb4d8502Sjsg { 447c349dbc7Sjsg int allocated_vmid = -1, i; 448fb4d8502Sjsg 449c349dbc7Sjsg for (i = dqm->dev->vm_info.first_vmid_kfd; 450c349dbc7Sjsg i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 451c349dbc7Sjsg if (!dqm->vmid_pasid[i]) { 452c349dbc7Sjsg allocated_vmid = i; 453c349dbc7Sjsg break; 454c349dbc7Sjsg } 455c349dbc7Sjsg } 456fb4d8502Sjsg 457c349dbc7Sjsg if (allocated_vmid < 0) { 458c349dbc7Sjsg pr_err("no more vmid to allocate\n"); 459c349dbc7Sjsg return -ENOSPC; 460c349dbc7Sjsg } 461fb4d8502Sjsg 462c349dbc7Sjsg pr_debug("vmid allocated: %d\n", allocated_vmid); 463c349dbc7Sjsg 464c349dbc7Sjsg dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 465c349dbc7Sjsg 466c349dbc7Sjsg set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 467c349dbc7Sjsg 468fb4d8502Sjsg qpd->vmid = allocated_vmid; 469fb4d8502Sjsg q->properties.vmid = allocated_vmid; 470fb4d8502Sjsg 471fb4d8502Sjsg program_sh_mem_settings(dqm, qpd); 472fb4d8502Sjsg 473f005ef32Sjsg if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 4745ca02815Sjsg program_trap_handler_settings(dqm, qpd); 4755ca02815Sjsg 476fb4d8502Sjsg /* qpd->page_table_base is set earlier when register_process() 477fb4d8502Sjsg * is called, i.e. when the first queue is created. 478fb4d8502Sjsg */ 4791bb76ff1Sjsg dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 480fb4d8502Sjsg qpd->vmid, 481fb4d8502Sjsg qpd->page_table_base); 482fb4d8502Sjsg /* invalidate the VM context after pasid and vmid mapping is set up */ 4835ca02815Sjsg kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 484fb4d8502Sjsg 485c349dbc7Sjsg if (dqm->dev->kfd2kgd->set_scratch_backing_va) 4861bb76ff1Sjsg dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 487c349dbc7Sjsg qpd->sh_hidden_private_base, qpd->vmid); 488c349dbc7Sjsg 489fb4d8502Sjsg return 0; 490fb4d8502Sjsg } 491fb4d8502Sjsg 492f005ef32Sjsg static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 493fb4d8502Sjsg struct qcm_process_device *qpd) 494fb4d8502Sjsg { 4955ca02815Sjsg const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 496fb4d8502Sjsg int ret; 497fb4d8502Sjsg 498fb4d8502Sjsg if (!qpd->ib_kaddr) 499fb4d8502Sjsg return -ENOMEM; 500fb4d8502Sjsg 501fb4d8502Sjsg ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 502fb4d8502Sjsg if (ret) 503fb4d8502Sjsg return ret; 504fb4d8502Sjsg 5051bb76ff1Sjsg return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 506fb4d8502Sjsg qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 507fb4d8502Sjsg pmf->release_mem_size / sizeof(uint32_t)); 508fb4d8502Sjsg } 509fb4d8502Sjsg 510fb4d8502Sjsg static void deallocate_vmid(struct device_queue_manager *dqm, 511fb4d8502Sjsg struct qcm_process_device *qpd, 512fb4d8502Sjsg struct queue *q) 513fb4d8502Sjsg { 514fb4d8502Sjsg /* On GFX v7, CP doesn't flush TC at dequeue */ 5151bb76ff1Sjsg if (q->device->adev->asic_type == CHIP_HAWAII) 516fb4d8502Sjsg if (flush_texture_cache_nocpsch(q->device, qpd)) 517fb4d8502Sjsg pr_err("Failed to flush TC\n"); 518fb4d8502Sjsg 5195ca02815Sjsg kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 520fb4d8502Sjsg 521fb4d8502Sjsg /* Release the vmid mapping */ 522fb4d8502Sjsg set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 523c349dbc7Sjsg dqm->vmid_pasid[qpd->vmid] = 0; 524fb4d8502Sjsg 525fb4d8502Sjsg qpd->vmid = 0; 526fb4d8502Sjsg q->properties.vmid = 0; 527fb4d8502Sjsg } 528fb4d8502Sjsg 529fb4d8502Sjsg static int create_queue_nocpsch(struct device_queue_manager *dqm, 530fb4d8502Sjsg struct queue *q, 5311bb76ff1Sjsg struct qcm_process_device *qpd, 5321bb76ff1Sjsg const struct kfd_criu_queue_priv_data *qd, 5331bb76ff1Sjsg const void *restore_mqd, const void *restore_ctl_stack) 534fb4d8502Sjsg { 535c349dbc7Sjsg struct mqd_manager *mqd_mgr; 536fb4d8502Sjsg int retval; 537fb4d8502Sjsg 538fb4d8502Sjsg dqm_lock(dqm); 539fb4d8502Sjsg 540fb4d8502Sjsg if (dqm->total_queue_count >= max_num_of_queues_per_device) { 541fb4d8502Sjsg pr_warn("Can't create new usermode queue because %d queues were already created\n", 542fb4d8502Sjsg dqm->total_queue_count); 543fb4d8502Sjsg retval = -EPERM; 544fb4d8502Sjsg goto out_unlock; 545fb4d8502Sjsg } 546fb4d8502Sjsg 547fb4d8502Sjsg if (list_empty(&qpd->queues_list)) { 548fb4d8502Sjsg retval = allocate_vmid(dqm, qpd, q); 549fb4d8502Sjsg if (retval) 550fb4d8502Sjsg goto out_unlock; 551fb4d8502Sjsg } 552fb4d8502Sjsg q->properties.vmid = qpd->vmid; 553fb4d8502Sjsg /* 554c349dbc7Sjsg * Eviction state logic: mark all queues as evicted, even ones 555c349dbc7Sjsg * not currently active. Restoring inactive queues later only 556c349dbc7Sjsg * updates the is_evicted flag but is a no-op otherwise. 557fb4d8502Sjsg */ 558c349dbc7Sjsg q->properties.is_evicted = !!qpd->evicted; 559fb4d8502Sjsg 560fb4d8502Sjsg q->properties.tba_addr = qpd->tba_addr; 561fb4d8502Sjsg q->properties.tma_addr = qpd->tma_addr; 562fb4d8502Sjsg 563c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 564c349dbc7Sjsg q->properties.type)]; 565c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 566c349dbc7Sjsg retval = allocate_hqd(dqm, q); 567c349dbc7Sjsg if (retval) 568c349dbc7Sjsg goto deallocate_vmid; 569c349dbc7Sjsg pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 570c349dbc7Sjsg q->pipe, q->queue); 571c349dbc7Sjsg } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 572c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 5731bb76ff1Sjsg retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 574c349dbc7Sjsg if (retval) 575c349dbc7Sjsg goto deallocate_vmid; 576c349dbc7Sjsg dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 577fb4d8502Sjsg } 578fb4d8502Sjsg 5791bb76ff1Sjsg retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 580c349dbc7Sjsg if (retval) 581c349dbc7Sjsg goto out_deallocate_hqd; 582c349dbc7Sjsg 583c349dbc7Sjsg /* Temporarily release dqm lock to avoid a circular lock dependency */ 584c349dbc7Sjsg dqm_unlock(dqm); 585c349dbc7Sjsg q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 586c349dbc7Sjsg dqm_lock(dqm); 587c349dbc7Sjsg 588c349dbc7Sjsg if (!q->mqd_mem_obj) { 589c349dbc7Sjsg retval = -ENOMEM; 590c349dbc7Sjsg goto out_deallocate_doorbell; 591c349dbc7Sjsg } 5921bb76ff1Sjsg 5931bb76ff1Sjsg if (qd) 5941bb76ff1Sjsg mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 5951bb76ff1Sjsg &q->properties, restore_mqd, restore_ctl_stack, 5961bb76ff1Sjsg qd->ctl_stack_size); 5971bb76ff1Sjsg else 598c349dbc7Sjsg mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 599c349dbc7Sjsg &q->gart_mqd_addr, &q->properties); 6001bb76ff1Sjsg 601c349dbc7Sjsg if (q->properties.is_active) { 602c349dbc7Sjsg if (!dqm->sched_running) { 603c349dbc7Sjsg WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 604c349dbc7Sjsg goto add_queue_to_list; 605c349dbc7Sjsg } 606c349dbc7Sjsg 607c349dbc7Sjsg if (WARN(q->process->mm != current->mm, 608c349dbc7Sjsg "should only run in user thread")) 609c349dbc7Sjsg retval = -EFAULT; 610c349dbc7Sjsg else 611c349dbc7Sjsg retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 612c349dbc7Sjsg q->queue, &q->properties, current->mm); 613c349dbc7Sjsg if (retval) 614c349dbc7Sjsg goto out_free_mqd; 615c349dbc7Sjsg } 616c349dbc7Sjsg 617c349dbc7Sjsg add_queue_to_list: 618fb4d8502Sjsg list_add(&q->list, &qpd->queues_list); 619fb4d8502Sjsg qpd->queue_count++; 620fb4d8502Sjsg if (q->properties.is_active) 6211285848aSjsg increment_queue_count(dqm, qpd, q); 622fb4d8502Sjsg 623fb4d8502Sjsg /* 624fb4d8502Sjsg * Unconditionally increment this counter, regardless of the queue's 625fb4d8502Sjsg * type or whether the queue is active. 626fb4d8502Sjsg */ 627fb4d8502Sjsg dqm->total_queue_count++; 628fb4d8502Sjsg pr_debug("Total of %d queues are accountable so far\n", 629fb4d8502Sjsg dqm->total_queue_count); 630c349dbc7Sjsg goto out_unlock; 631fb4d8502Sjsg 632c349dbc7Sjsg out_free_mqd: 633c349dbc7Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 634c349dbc7Sjsg out_deallocate_doorbell: 635c349dbc7Sjsg deallocate_doorbell(qpd, q); 636c349dbc7Sjsg out_deallocate_hqd: 637c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 638c349dbc7Sjsg deallocate_hqd(dqm, q); 639c349dbc7Sjsg else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 640c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 641c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 642c349dbc7Sjsg deallocate_vmid: 643c349dbc7Sjsg if (list_empty(&qpd->queues_list)) 644c349dbc7Sjsg deallocate_vmid(dqm, qpd, q); 645fb4d8502Sjsg out_unlock: 646fb4d8502Sjsg dqm_unlock(dqm); 647fb4d8502Sjsg return retval; 648fb4d8502Sjsg } 649fb4d8502Sjsg 650fb4d8502Sjsg static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 651fb4d8502Sjsg { 652fb4d8502Sjsg bool set; 653fb4d8502Sjsg int pipe, bit, i; 654fb4d8502Sjsg 655fb4d8502Sjsg set = false; 656fb4d8502Sjsg 657fb4d8502Sjsg for (pipe = dqm->next_pipe_to_allocate, i = 0; 658fb4d8502Sjsg i < get_pipes_per_mec(dqm); 659fb4d8502Sjsg pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 660fb4d8502Sjsg 661fb4d8502Sjsg if (!is_pipe_enabled(dqm, 0, pipe)) 662fb4d8502Sjsg continue; 663fb4d8502Sjsg 664fb4d8502Sjsg if (dqm->allocated_queues[pipe] != 0) { 665fb4d8502Sjsg bit = ffs(dqm->allocated_queues[pipe]) - 1; 666fb4d8502Sjsg dqm->allocated_queues[pipe] &= ~(1 << bit); 667fb4d8502Sjsg q->pipe = pipe; 668fb4d8502Sjsg q->queue = bit; 669fb4d8502Sjsg set = true; 670fb4d8502Sjsg break; 671fb4d8502Sjsg } 672fb4d8502Sjsg } 673fb4d8502Sjsg 674fb4d8502Sjsg if (!set) 675fb4d8502Sjsg return -EBUSY; 676fb4d8502Sjsg 677fb4d8502Sjsg pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 678fb4d8502Sjsg /* horizontal hqd allocation */ 679fb4d8502Sjsg dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 680fb4d8502Sjsg 681fb4d8502Sjsg return 0; 682fb4d8502Sjsg } 683fb4d8502Sjsg 684fb4d8502Sjsg static inline void deallocate_hqd(struct device_queue_manager *dqm, 685fb4d8502Sjsg struct queue *q) 686fb4d8502Sjsg { 687fb4d8502Sjsg dqm->allocated_queues[q->pipe] |= (1 << q->queue); 688fb4d8502Sjsg } 689fb4d8502Sjsg 6901bb76ff1Sjsg #define SQ_IND_CMD_CMD_KILL 0x00000003 6911bb76ff1Sjsg #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 6921bb76ff1Sjsg 693f005ef32Sjsg static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 6941bb76ff1Sjsg { 6951bb76ff1Sjsg int status = 0; 6961bb76ff1Sjsg unsigned int vmid; 6971bb76ff1Sjsg uint16_t queried_pasid; 6981bb76ff1Sjsg union SQ_CMD_BITS reg_sq_cmd; 6991bb76ff1Sjsg union GRBM_GFX_INDEX_BITS reg_gfx_index; 7001bb76ff1Sjsg struct kfd_process_device *pdd; 7011bb76ff1Sjsg int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 7021bb76ff1Sjsg int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 703f005ef32Sjsg uint32_t xcc_mask = dev->xcc_mask; 704f005ef32Sjsg int xcc_id; 7051bb76ff1Sjsg 7061bb76ff1Sjsg reg_sq_cmd.u32All = 0; 7071bb76ff1Sjsg reg_gfx_index.u32All = 0; 7081bb76ff1Sjsg 7091bb76ff1Sjsg pr_debug("Killing all process wavefronts\n"); 7101bb76ff1Sjsg 7111bb76ff1Sjsg if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 7121bb76ff1Sjsg pr_err("no vmid pasid mapping supported \n"); 7131bb76ff1Sjsg return -EOPNOTSUPP; 7141bb76ff1Sjsg } 7151bb76ff1Sjsg 7161bb76ff1Sjsg /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 7171bb76ff1Sjsg * ATC_VMID15_PASID_MAPPING 7181bb76ff1Sjsg * to check which VMID the current process is mapped to. 7191bb76ff1Sjsg */ 7201bb76ff1Sjsg 7211bb76ff1Sjsg for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 7221bb76ff1Sjsg status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 7231bb76ff1Sjsg (dev->adev, vmid, &queried_pasid); 7241bb76ff1Sjsg 7251bb76ff1Sjsg if (status && queried_pasid == p->pasid) { 7261bb76ff1Sjsg pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 7271bb76ff1Sjsg vmid, p->pasid); 7281bb76ff1Sjsg break; 7291bb76ff1Sjsg } 7301bb76ff1Sjsg } 7311bb76ff1Sjsg 7321bb76ff1Sjsg if (vmid > last_vmid_to_scan) { 7331bb76ff1Sjsg pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 7341bb76ff1Sjsg return -EFAULT; 7351bb76ff1Sjsg } 7361bb76ff1Sjsg 7371bb76ff1Sjsg /* taking the VMID for that process on the safe way using PDD */ 7381bb76ff1Sjsg pdd = kfd_get_process_device_data(dev, p); 7391bb76ff1Sjsg if (!pdd) 7401bb76ff1Sjsg return -EFAULT; 7411bb76ff1Sjsg 7421bb76ff1Sjsg reg_gfx_index.bits.sh_broadcast_writes = 1; 7431bb76ff1Sjsg reg_gfx_index.bits.se_broadcast_writes = 1; 7441bb76ff1Sjsg reg_gfx_index.bits.instance_broadcast_writes = 1; 7451bb76ff1Sjsg reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 7461bb76ff1Sjsg reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 7471bb76ff1Sjsg reg_sq_cmd.bits.vm_id = vmid; 7481bb76ff1Sjsg 749f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) 750f005ef32Sjsg dev->kfd2kgd->wave_control_execute( 751f005ef32Sjsg dev->adev, reg_gfx_index.u32All, 752f005ef32Sjsg reg_sq_cmd.u32All, xcc_id); 7531bb76ff1Sjsg 7541bb76ff1Sjsg return 0; 7551bb76ff1Sjsg } 7561bb76ff1Sjsg 757fb4d8502Sjsg /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 758fb4d8502Sjsg * to avoid asynchronized access 759fb4d8502Sjsg */ 760fb4d8502Sjsg static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 761fb4d8502Sjsg struct qcm_process_device *qpd, 762fb4d8502Sjsg struct queue *q) 763fb4d8502Sjsg { 764fb4d8502Sjsg int retval; 765fb4d8502Sjsg struct mqd_manager *mqd_mgr; 766fb4d8502Sjsg 767c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 768c349dbc7Sjsg q->properties.type)]; 769fb4d8502Sjsg 770c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 771fb4d8502Sjsg deallocate_hqd(dqm, q); 772c349dbc7Sjsg else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 773c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 774c349dbc7Sjsg else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 775c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 776c349dbc7Sjsg else { 777fb4d8502Sjsg pr_debug("q->properties.type %d is invalid\n", 778fb4d8502Sjsg q->properties.type); 779fb4d8502Sjsg return -EINVAL; 780fb4d8502Sjsg } 781fb4d8502Sjsg dqm->total_queue_count--; 782fb4d8502Sjsg 783fb4d8502Sjsg deallocate_doorbell(qpd, q); 784fb4d8502Sjsg 785c349dbc7Sjsg if (!dqm->sched_running) { 786c349dbc7Sjsg WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 787c349dbc7Sjsg return 0; 788c349dbc7Sjsg } 789c349dbc7Sjsg 790fb4d8502Sjsg retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 791fb4d8502Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 792fb4d8502Sjsg KFD_UNMAP_LATENCY_MS, 793fb4d8502Sjsg q->pipe, q->queue); 794fb4d8502Sjsg if (retval == -ETIME) 795fb4d8502Sjsg qpd->reset_wavefronts = true; 796fb4d8502Sjsg 797fb4d8502Sjsg list_del(&q->list); 798fb4d8502Sjsg if (list_empty(&qpd->queues_list)) { 799fb4d8502Sjsg if (qpd->reset_wavefronts) { 800fb4d8502Sjsg pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 801fb4d8502Sjsg dqm->dev); 802fb4d8502Sjsg /* dbgdev_wave_reset_wavefronts has to be called before 803fb4d8502Sjsg * deallocate_vmid(), i.e. when vmid is still in use. 804fb4d8502Sjsg */ 805fb4d8502Sjsg dbgdev_wave_reset_wavefronts(dqm->dev, 806fb4d8502Sjsg qpd->pqm->process); 807fb4d8502Sjsg qpd->reset_wavefronts = false; 808fb4d8502Sjsg } 809fb4d8502Sjsg 810fb4d8502Sjsg deallocate_vmid(dqm, qpd, q); 811fb4d8502Sjsg } 812fb4d8502Sjsg qpd->queue_count--; 8131285848aSjsg if (q->properties.is_active) 8141285848aSjsg decrement_queue_count(dqm, qpd, q); 815fb4d8502Sjsg 816fb4d8502Sjsg return retval; 817fb4d8502Sjsg } 818fb4d8502Sjsg 819fb4d8502Sjsg static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 820fb4d8502Sjsg struct qcm_process_device *qpd, 821fb4d8502Sjsg struct queue *q) 822fb4d8502Sjsg { 823fb4d8502Sjsg int retval; 824ad8b1aafSjsg uint64_t sdma_val = 0; 825ad8b1aafSjsg struct kfd_process_device *pdd = qpd_to_pdd(qpd); 82639437df3Sjsg struct mqd_manager *mqd_mgr = 82739437df3Sjsg dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 828ad8b1aafSjsg 829ad8b1aafSjsg /* Get the SDMA queue stats */ 830ad8b1aafSjsg if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 831ad8b1aafSjsg (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 832ad8b1aafSjsg retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 833ad8b1aafSjsg &sdma_val); 834ad8b1aafSjsg if (retval) 835ad8b1aafSjsg pr_err("Failed to read SDMA queue counter for queue: %d\n", 836ad8b1aafSjsg q->properties.queue_id); 837ad8b1aafSjsg } 838fb4d8502Sjsg 839fb4d8502Sjsg dqm_lock(dqm); 840fb4d8502Sjsg retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 841ad8b1aafSjsg if (!retval) 842ad8b1aafSjsg pdd->sdma_past_activity_counter += sdma_val; 843fb4d8502Sjsg dqm_unlock(dqm); 844fb4d8502Sjsg 84539437df3Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 84639437df3Sjsg 847fb4d8502Sjsg return retval; 848fb4d8502Sjsg } 849fb4d8502Sjsg 8501bb76ff1Sjsg static int update_queue(struct device_queue_manager *dqm, struct queue *q, 8511bb76ff1Sjsg struct mqd_update_info *minfo) 852fb4d8502Sjsg { 853c349dbc7Sjsg int retval = 0; 854fb4d8502Sjsg struct mqd_manager *mqd_mgr; 855fb4d8502Sjsg struct kfd_process_device *pdd; 856fb4d8502Sjsg bool prev_active = false; 857fb4d8502Sjsg 858fb4d8502Sjsg dqm_lock(dqm); 859fb4d8502Sjsg pdd = kfd_get_process_device_data(q->device, q->process); 860fb4d8502Sjsg if (!pdd) { 861fb4d8502Sjsg retval = -ENODEV; 862fb4d8502Sjsg goto out_unlock; 863fb4d8502Sjsg } 864c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 865c349dbc7Sjsg q->properties.type)]; 866fb4d8502Sjsg 867fb4d8502Sjsg /* Save previous activity state for counters */ 868fb4d8502Sjsg prev_active = q->properties.is_active; 869fb4d8502Sjsg 870fb4d8502Sjsg /* Make sure the queue is unmapped before updating the MQD */ 871fb4d8502Sjsg if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 872f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 873fb4d8502Sjsg retval = unmap_queues_cpsch(dqm, 874f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 8751bb76ff1Sjsg else if (prev_active) 8761bb76ff1Sjsg retval = remove_queue_mes(dqm, q, &pdd->qpd); 8771bb76ff1Sjsg 878fb4d8502Sjsg if (retval) { 879fb4d8502Sjsg pr_err("unmap queue failed\n"); 880fb4d8502Sjsg goto out_unlock; 881fb4d8502Sjsg } 882fb4d8502Sjsg } else if (prev_active && 883fb4d8502Sjsg (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 884c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA || 885c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 886c349dbc7Sjsg 887c349dbc7Sjsg if (!dqm->sched_running) { 888c349dbc7Sjsg WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 889c349dbc7Sjsg goto out_unlock; 890c349dbc7Sjsg } 891c349dbc7Sjsg 892fb4d8502Sjsg retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 893f005ef32Sjsg (dqm->dev->kfd->cwsr_enabled ? 8945ca02815Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 8955ca02815Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 896fb4d8502Sjsg KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 897fb4d8502Sjsg if (retval) { 898fb4d8502Sjsg pr_err("destroy mqd failed\n"); 899fb4d8502Sjsg goto out_unlock; 900fb4d8502Sjsg } 901fb4d8502Sjsg } 902fb4d8502Sjsg 9031bb76ff1Sjsg mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 904fb4d8502Sjsg 905fb4d8502Sjsg /* 906fb4d8502Sjsg * check active state vs. the previous state and modify 907fb4d8502Sjsg * counter accordingly. map_queues_cpsch uses the 908c349dbc7Sjsg * dqm->active_queue_count to determine whether a new runlist must be 909fb4d8502Sjsg * uploaded. 910fb4d8502Sjsg */ 9111285848aSjsg if (q->properties.is_active && !prev_active) { 9121285848aSjsg increment_queue_count(dqm, &pdd->qpd, q); 9131285848aSjsg } else if (!q->properties.is_active && prev_active) { 9141285848aSjsg decrement_queue_count(dqm, &pdd->qpd, q); 9151285848aSjsg } else if (q->gws && !q->properties.is_gws) { 916ad8b1aafSjsg if (q->properties.is_active) { 917ad8b1aafSjsg dqm->gws_queue_count++; 918ad8b1aafSjsg pdd->qpd.mapped_gws_queue = true; 919ad8b1aafSjsg } 920ad8b1aafSjsg q->properties.is_gws = true; 921ad8b1aafSjsg } else if (!q->gws && q->properties.is_gws) { 922ad8b1aafSjsg if (q->properties.is_active) { 923ad8b1aafSjsg dqm->gws_queue_count--; 924ad8b1aafSjsg pdd->qpd.mapped_gws_queue = false; 925ad8b1aafSjsg } 926ad8b1aafSjsg q->properties.is_gws = false; 927ad8b1aafSjsg } 928ad8b1aafSjsg 9291bb76ff1Sjsg if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 930f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 931fb4d8502Sjsg retval = map_queues_cpsch(dqm); 9321bb76ff1Sjsg else if (q->properties.is_active) 9331bb76ff1Sjsg retval = add_queue_mes(dqm, q, &pdd->qpd); 9341bb76ff1Sjsg } else if (q->properties.is_active && 935fb4d8502Sjsg (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 936c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA || 937c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 938fb4d8502Sjsg if (WARN(q->process->mm != current->mm, 939fb4d8502Sjsg "should only run in user thread")) 940fb4d8502Sjsg retval = -EFAULT; 941fb4d8502Sjsg else 942fb4d8502Sjsg retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 943fb4d8502Sjsg q->pipe, q->queue, 944fb4d8502Sjsg &q->properties, current->mm); 945fb4d8502Sjsg } 946fb4d8502Sjsg 947fb4d8502Sjsg out_unlock: 948fb4d8502Sjsg dqm_unlock(dqm); 949fb4d8502Sjsg return retval; 950fb4d8502Sjsg } 951fb4d8502Sjsg 952f005ef32Sjsg /* suspend_single_queue does not lock the dqm like the 953f005ef32Sjsg * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 954f005ef32Sjsg * lock the dqm before calling, and unlock after calling. 955f005ef32Sjsg * 956f005ef32Sjsg * The reason we don't lock the dqm is because this function may be 957f005ef32Sjsg * called on multiple queues in a loop, so rather than locking/unlocking 958f005ef32Sjsg * multiple times, we will just keep the dqm locked for all of the calls. 959f005ef32Sjsg */ 960f005ef32Sjsg static int suspend_single_queue(struct device_queue_manager *dqm, 961f005ef32Sjsg struct kfd_process_device *pdd, 962f005ef32Sjsg struct queue *q) 963f005ef32Sjsg { 964f005ef32Sjsg bool is_new; 965f005ef32Sjsg 966f005ef32Sjsg if (q->properties.is_suspended) 967f005ef32Sjsg return 0; 968f005ef32Sjsg 969f005ef32Sjsg pr_debug("Suspending PASID %u queue [%i]\n", 970f005ef32Sjsg pdd->process->pasid, 971f005ef32Sjsg q->properties.queue_id); 972f005ef32Sjsg 973f005ef32Sjsg is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 974f005ef32Sjsg 975f005ef32Sjsg if (is_new || q->properties.is_being_destroyed) { 976f005ef32Sjsg pr_debug("Suspend: skip %s queue id %i\n", 977f005ef32Sjsg is_new ? "new" : "destroyed", 978f005ef32Sjsg q->properties.queue_id); 979f005ef32Sjsg return -EBUSY; 980f005ef32Sjsg } 981f005ef32Sjsg 982f005ef32Sjsg q->properties.is_suspended = true; 983f005ef32Sjsg if (q->properties.is_active) { 984f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 985f005ef32Sjsg int r = remove_queue_mes(dqm, q, &pdd->qpd); 986f005ef32Sjsg 987f005ef32Sjsg if (r) 988f005ef32Sjsg return r; 989f005ef32Sjsg } 990f005ef32Sjsg 991f005ef32Sjsg decrement_queue_count(dqm, &pdd->qpd, q); 992f005ef32Sjsg q->properties.is_active = false; 993f005ef32Sjsg } 994f005ef32Sjsg 995f005ef32Sjsg return 0; 996f005ef32Sjsg } 997f005ef32Sjsg 998f005ef32Sjsg /* resume_single_queue does not lock the dqm like the functions 999f005ef32Sjsg * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1000f005ef32Sjsg * lock the dqm before calling, and unlock after calling. 1001f005ef32Sjsg * 1002f005ef32Sjsg * The reason we don't lock the dqm is because this function may be 1003f005ef32Sjsg * called on multiple queues in a loop, so rather than locking/unlocking 1004f005ef32Sjsg * multiple times, we will just keep the dqm locked for all of the calls. 1005f005ef32Sjsg */ 1006f005ef32Sjsg static int resume_single_queue(struct device_queue_manager *dqm, 1007f005ef32Sjsg struct qcm_process_device *qpd, 1008f005ef32Sjsg struct queue *q) 1009f005ef32Sjsg { 1010f005ef32Sjsg struct kfd_process_device *pdd; 1011f005ef32Sjsg 1012f005ef32Sjsg if (!q->properties.is_suspended) 1013f005ef32Sjsg return 0; 1014f005ef32Sjsg 1015f005ef32Sjsg pdd = qpd_to_pdd(qpd); 1016f005ef32Sjsg 1017f005ef32Sjsg pr_debug("Restoring from suspend PASID %u queue [%i]\n", 1018f005ef32Sjsg pdd->process->pasid, 1019f005ef32Sjsg q->properties.queue_id); 1020f005ef32Sjsg 1021f005ef32Sjsg q->properties.is_suspended = false; 1022f005ef32Sjsg 1023f005ef32Sjsg if (QUEUE_IS_ACTIVE(q->properties)) { 1024f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 1025f005ef32Sjsg int r = add_queue_mes(dqm, q, &pdd->qpd); 1026f005ef32Sjsg 1027f005ef32Sjsg if (r) 1028f005ef32Sjsg return r; 1029f005ef32Sjsg } 1030f005ef32Sjsg 1031f005ef32Sjsg q->properties.is_active = true; 1032f005ef32Sjsg increment_queue_count(dqm, qpd, q); 1033f005ef32Sjsg } 1034f005ef32Sjsg 1035f005ef32Sjsg return 0; 1036f005ef32Sjsg } 1037f005ef32Sjsg 1038fb4d8502Sjsg static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1039fb4d8502Sjsg struct qcm_process_device *qpd) 1040fb4d8502Sjsg { 1041fb4d8502Sjsg struct queue *q; 1042fb4d8502Sjsg struct mqd_manager *mqd_mgr; 1043fb4d8502Sjsg struct kfd_process_device *pdd; 1044c349dbc7Sjsg int retval, ret = 0; 1045fb4d8502Sjsg 1046fb4d8502Sjsg dqm_lock(dqm); 1047fb4d8502Sjsg if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1048fb4d8502Sjsg goto out; 1049fb4d8502Sjsg 1050fb4d8502Sjsg pdd = qpd_to_pdd(qpd); 1051ad8b1aafSjsg pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1052fb4d8502Sjsg pdd->process->pasid); 1053fb4d8502Sjsg 1054ad8b1aafSjsg pdd->last_evict_timestamp = get_jiffies_64(); 1055c349dbc7Sjsg /* Mark all queues as evicted. Deactivate all active queues on 1056c349dbc7Sjsg * the qpd. 1057c349dbc7Sjsg */ 1058fb4d8502Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 1059c349dbc7Sjsg q->properties.is_evicted = true; 1060fb4d8502Sjsg if (!q->properties.is_active) 1061fb4d8502Sjsg continue; 1062c349dbc7Sjsg 1063c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1064c349dbc7Sjsg q->properties.type)]; 1065fb4d8502Sjsg q->properties.is_active = false; 10661285848aSjsg decrement_queue_count(dqm, qpd, q); 1067c349dbc7Sjsg 1068c349dbc7Sjsg if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1069c349dbc7Sjsg continue; 1070c349dbc7Sjsg 1071fb4d8502Sjsg retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1072f005ef32Sjsg (dqm->dev->kfd->cwsr_enabled ? 10735ca02815Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 10745ca02815Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1075fb4d8502Sjsg KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1076c349dbc7Sjsg if (retval && !ret) 1077c349dbc7Sjsg /* Return the first error, but keep going to 1078c349dbc7Sjsg * maintain a consistent eviction state 1079c349dbc7Sjsg */ 1080c349dbc7Sjsg ret = retval; 1081fb4d8502Sjsg } 1082fb4d8502Sjsg 1083fb4d8502Sjsg out: 1084fb4d8502Sjsg dqm_unlock(dqm); 1085c349dbc7Sjsg return ret; 1086fb4d8502Sjsg } 1087fb4d8502Sjsg 1088fb4d8502Sjsg static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1089fb4d8502Sjsg struct qcm_process_device *qpd) 1090fb4d8502Sjsg { 1091fb4d8502Sjsg struct queue *q; 1092fb4d8502Sjsg struct kfd_process_device *pdd; 1093fb4d8502Sjsg int retval = 0; 1094fb4d8502Sjsg 1095fb4d8502Sjsg dqm_lock(dqm); 1096fb4d8502Sjsg if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1097fb4d8502Sjsg goto out; 1098fb4d8502Sjsg 1099fb4d8502Sjsg pdd = qpd_to_pdd(qpd); 1100f005ef32Sjsg 1101f005ef32Sjsg /* The debugger creates processes that temporarily have not acquired 1102f005ef32Sjsg * all VMs for all devices and has no VMs itself. 1103f005ef32Sjsg * Skip queue eviction on process eviction. 1104f005ef32Sjsg */ 1105f005ef32Sjsg if (!pdd->drm_priv) 1106f005ef32Sjsg goto out; 1107f005ef32Sjsg 1108ad8b1aafSjsg pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1109fb4d8502Sjsg pdd->process->pasid); 1110fb4d8502Sjsg 1111c349dbc7Sjsg /* Mark all queues as evicted. Deactivate all active queues on 1112c349dbc7Sjsg * the qpd. 1113c349dbc7Sjsg */ 1114fb4d8502Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 1115c349dbc7Sjsg q->properties.is_evicted = true; 1116fb4d8502Sjsg if (!q->properties.is_active) 1117fb4d8502Sjsg continue; 1118c349dbc7Sjsg 1119fb4d8502Sjsg q->properties.is_active = false; 11201285848aSjsg decrement_queue_count(dqm, qpd, q); 11211bb76ff1Sjsg 1122f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 11231bb76ff1Sjsg retval = remove_queue_mes(dqm, q, qpd); 11241bb76ff1Sjsg if (retval) { 11251bb76ff1Sjsg pr_err("Failed to evict queue %d\n", 11261bb76ff1Sjsg q->properties.queue_id); 11271bb76ff1Sjsg goto out; 11281bb76ff1Sjsg } 11291bb76ff1Sjsg } 1130fb4d8502Sjsg } 1131ad8b1aafSjsg pdd->last_evict_timestamp = get_jiffies_64(); 1132f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 1133fb4d8502Sjsg retval = execute_queues_cpsch(dqm, 1134fb4d8502Sjsg qpd->is_debug ? 1135fb4d8502Sjsg KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1136f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1137f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD); 1138fb4d8502Sjsg 1139fb4d8502Sjsg out: 1140fb4d8502Sjsg dqm_unlock(dqm); 1141fb4d8502Sjsg return retval; 1142fb4d8502Sjsg } 1143fb4d8502Sjsg 1144fb4d8502Sjsg static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1145fb4d8502Sjsg struct qcm_process_device *qpd) 1146fb4d8502Sjsg { 1147fb4d8502Sjsg struct mm_struct *mm = NULL; 1148fb4d8502Sjsg struct queue *q; 1149fb4d8502Sjsg struct mqd_manager *mqd_mgr; 1150fb4d8502Sjsg struct kfd_process_device *pdd; 1151c349dbc7Sjsg uint64_t pd_base; 1152ad8b1aafSjsg uint64_t eviction_duration; 1153c349dbc7Sjsg int retval, ret = 0; 1154fb4d8502Sjsg 1155fb4d8502Sjsg pdd = qpd_to_pdd(qpd); 1156fb4d8502Sjsg /* Retrieve PD base */ 11575ca02815Sjsg pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1158fb4d8502Sjsg 1159fb4d8502Sjsg dqm_lock(dqm); 1160fb4d8502Sjsg if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1161fb4d8502Sjsg goto out; 1162fb4d8502Sjsg if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1163fb4d8502Sjsg qpd->evicted--; 1164fb4d8502Sjsg goto out; 1165fb4d8502Sjsg } 1166fb4d8502Sjsg 1167ad8b1aafSjsg pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1168fb4d8502Sjsg pdd->process->pasid); 1169fb4d8502Sjsg 1170fb4d8502Sjsg /* Update PD Base in QPD */ 1171fb4d8502Sjsg qpd->page_table_base = pd_base; 1172c349dbc7Sjsg pr_debug("Updated PD address to 0x%llx\n", pd_base); 1173fb4d8502Sjsg 1174fb4d8502Sjsg if (!list_empty(&qpd->queues_list)) { 1175fb4d8502Sjsg dqm->dev->kfd2kgd->set_vm_context_page_table_base( 11761bb76ff1Sjsg dqm->dev->adev, 1177fb4d8502Sjsg qpd->vmid, 1178fb4d8502Sjsg qpd->page_table_base); 11795ca02815Sjsg kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1180fb4d8502Sjsg } 1181fb4d8502Sjsg 1182fb4d8502Sjsg /* Take a safe reference to the mm_struct, which may otherwise 1183fb4d8502Sjsg * disappear even while the kfd_process is still referenced. 1184fb4d8502Sjsg */ 1185fb4d8502Sjsg mm = get_task_mm(pdd->process->lead_thread); 1186fb4d8502Sjsg if (!mm) { 1187c349dbc7Sjsg ret = -EFAULT; 1188fb4d8502Sjsg goto out; 1189fb4d8502Sjsg } 1190fb4d8502Sjsg 1191c349dbc7Sjsg /* Remove the eviction flags. Activate queues that are not 1192c349dbc7Sjsg * inactive for other reasons. 1193c349dbc7Sjsg */ 1194fb4d8502Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 1195fb4d8502Sjsg q->properties.is_evicted = false; 1196c349dbc7Sjsg if (!QUEUE_IS_ACTIVE(q->properties)) 1197c349dbc7Sjsg continue; 1198c349dbc7Sjsg 1199c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1200c349dbc7Sjsg q->properties.type)]; 1201fb4d8502Sjsg q->properties.is_active = true; 12021285848aSjsg increment_queue_count(dqm, qpd, q); 1203c349dbc7Sjsg 1204c349dbc7Sjsg if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1205c349dbc7Sjsg continue; 1206c349dbc7Sjsg 1207fb4d8502Sjsg retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1208fb4d8502Sjsg q->queue, &q->properties, mm); 1209c349dbc7Sjsg if (retval && !ret) 1210c349dbc7Sjsg /* Return the first error, but keep going to 1211c349dbc7Sjsg * maintain a consistent eviction state 1212c349dbc7Sjsg */ 1213c349dbc7Sjsg ret = retval; 1214fb4d8502Sjsg } 1215fb4d8502Sjsg qpd->evicted = 0; 1216ad8b1aafSjsg eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1217ad8b1aafSjsg atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1218fb4d8502Sjsg out: 1219fb4d8502Sjsg if (mm) 1220fb4d8502Sjsg mmput(mm); 1221fb4d8502Sjsg dqm_unlock(dqm); 1222c349dbc7Sjsg return ret; 1223fb4d8502Sjsg } 1224fb4d8502Sjsg 1225fb4d8502Sjsg static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1226fb4d8502Sjsg struct qcm_process_device *qpd) 1227fb4d8502Sjsg { 1228fb4d8502Sjsg struct queue *q; 1229fb4d8502Sjsg struct kfd_process_device *pdd; 1230ad8b1aafSjsg uint64_t eviction_duration; 1231fb4d8502Sjsg int retval = 0; 1232fb4d8502Sjsg 1233fb4d8502Sjsg pdd = qpd_to_pdd(qpd); 1234fb4d8502Sjsg 1235fb4d8502Sjsg dqm_lock(dqm); 1236fb4d8502Sjsg if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1237fb4d8502Sjsg goto out; 1238fb4d8502Sjsg if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1239fb4d8502Sjsg qpd->evicted--; 1240fb4d8502Sjsg goto out; 1241fb4d8502Sjsg } 1242fb4d8502Sjsg 1243f005ef32Sjsg /* The debugger creates processes that temporarily have not acquired 1244f005ef32Sjsg * all VMs for all devices and has no VMs itself. 1245f005ef32Sjsg * Skip queue restore on process restore. 1246f005ef32Sjsg */ 1247f005ef32Sjsg if (!pdd->drm_priv) 1248f005ef32Sjsg goto vm_not_acquired; 1249f005ef32Sjsg 1250ad8b1aafSjsg pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1251fb4d8502Sjsg pdd->process->pasid); 1252fb4d8502Sjsg 1253fb4d8502Sjsg /* Update PD Base in QPD */ 1254f005ef32Sjsg qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1255f005ef32Sjsg pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1256fb4d8502Sjsg 1257fb4d8502Sjsg /* activate all active queues on the qpd */ 1258fb4d8502Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 1259fb4d8502Sjsg q->properties.is_evicted = false; 1260c349dbc7Sjsg if (!QUEUE_IS_ACTIVE(q->properties)) 1261c349dbc7Sjsg continue; 1262c349dbc7Sjsg 1263fb4d8502Sjsg q->properties.is_active = true; 12641285848aSjsg increment_queue_count(dqm, &pdd->qpd, q); 12651bb76ff1Sjsg 1266f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 12671bb76ff1Sjsg retval = add_queue_mes(dqm, q, qpd); 12681bb76ff1Sjsg if (retval) { 12691bb76ff1Sjsg pr_err("Failed to restore queue %d\n", 12701bb76ff1Sjsg q->properties.queue_id); 12711bb76ff1Sjsg goto out; 1272fb4d8502Sjsg } 12731bb76ff1Sjsg } 12741bb76ff1Sjsg } 1275f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 1276fb4d8502Sjsg retval = execute_queues_cpsch(dqm, 1277f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1278ad8b1aafSjsg eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1279ad8b1aafSjsg atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1280f005ef32Sjsg vm_not_acquired: 1281f005ef32Sjsg qpd->evicted = 0; 1282fb4d8502Sjsg out: 1283fb4d8502Sjsg dqm_unlock(dqm); 1284fb4d8502Sjsg return retval; 1285fb4d8502Sjsg } 1286fb4d8502Sjsg 1287fb4d8502Sjsg static int register_process(struct device_queue_manager *dqm, 1288fb4d8502Sjsg struct qcm_process_device *qpd) 1289fb4d8502Sjsg { 1290fb4d8502Sjsg struct device_process_node *n; 1291fb4d8502Sjsg struct kfd_process_device *pdd; 1292c349dbc7Sjsg uint64_t pd_base; 1293fb4d8502Sjsg int retval; 1294fb4d8502Sjsg 1295fb4d8502Sjsg n = kzalloc(sizeof(*n), GFP_KERNEL); 1296fb4d8502Sjsg if (!n) 1297fb4d8502Sjsg return -ENOMEM; 1298fb4d8502Sjsg 1299fb4d8502Sjsg n->qpd = qpd; 1300fb4d8502Sjsg 1301fb4d8502Sjsg pdd = qpd_to_pdd(qpd); 1302fb4d8502Sjsg /* Retrieve PD base */ 13035ca02815Sjsg pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1304fb4d8502Sjsg 1305fb4d8502Sjsg dqm_lock(dqm); 1306fb4d8502Sjsg list_add(&n->list, &dqm->queues); 1307fb4d8502Sjsg 1308fb4d8502Sjsg /* Update PD Base in QPD */ 1309fb4d8502Sjsg qpd->page_table_base = pd_base; 1310c349dbc7Sjsg pr_debug("Updated PD address to 0x%llx\n", pd_base); 1311fb4d8502Sjsg 1312fb4d8502Sjsg retval = dqm->asic_ops.update_qpd(dqm, qpd); 1313fb4d8502Sjsg 1314c349dbc7Sjsg dqm->processes_count++; 1315fb4d8502Sjsg 1316fb4d8502Sjsg dqm_unlock(dqm); 1317fb4d8502Sjsg 1318c349dbc7Sjsg /* Outside the DQM lock because under the DQM lock we can't do 1319c349dbc7Sjsg * reclaim or take other locks that others hold while reclaiming. 1320c349dbc7Sjsg */ 1321c349dbc7Sjsg kfd_inc_compute_active(dqm->dev); 1322c349dbc7Sjsg 1323fb4d8502Sjsg return retval; 1324fb4d8502Sjsg } 1325fb4d8502Sjsg 1326fb4d8502Sjsg static int unregister_process(struct device_queue_manager *dqm, 1327fb4d8502Sjsg struct qcm_process_device *qpd) 1328fb4d8502Sjsg { 1329fb4d8502Sjsg int retval; 1330fb4d8502Sjsg struct device_process_node *cur, *next; 1331fb4d8502Sjsg 1332fb4d8502Sjsg pr_debug("qpd->queues_list is %s\n", 1333fb4d8502Sjsg list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1334fb4d8502Sjsg 1335fb4d8502Sjsg retval = 0; 1336fb4d8502Sjsg dqm_lock(dqm); 1337fb4d8502Sjsg 1338fb4d8502Sjsg list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1339fb4d8502Sjsg if (qpd == cur->qpd) { 1340fb4d8502Sjsg list_del(&cur->list); 1341fb4d8502Sjsg kfree(cur); 1342c349dbc7Sjsg dqm->processes_count--; 1343fb4d8502Sjsg goto out; 1344fb4d8502Sjsg } 1345fb4d8502Sjsg } 1346fb4d8502Sjsg /* qpd not found in dqm list */ 1347fb4d8502Sjsg retval = 1; 1348fb4d8502Sjsg out: 1349fb4d8502Sjsg dqm_unlock(dqm); 1350c349dbc7Sjsg 1351c349dbc7Sjsg /* Outside the DQM lock because under the DQM lock we can't do 1352c349dbc7Sjsg * reclaim or take other locks that others hold while reclaiming. 1353c349dbc7Sjsg */ 1354c349dbc7Sjsg if (!retval) 1355c349dbc7Sjsg kfd_dec_compute_active(dqm->dev); 1356c349dbc7Sjsg 1357fb4d8502Sjsg return retval; 1358fb4d8502Sjsg } 1359fb4d8502Sjsg 1360fb4d8502Sjsg static int 1361ad8b1aafSjsg set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1362fb4d8502Sjsg unsigned int vmid) 1363fb4d8502Sjsg { 1364f005ef32Sjsg uint32_t xcc_mask = dqm->dev->xcc_mask; 1365f005ef32Sjsg int xcc_id, ret; 1366f005ef32Sjsg 1367f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) { 1368f005ef32Sjsg ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1369f005ef32Sjsg dqm->dev->adev, pasid, vmid, xcc_id); 1370f005ef32Sjsg if (ret) 1371f005ef32Sjsg break; 1372f005ef32Sjsg } 1373f005ef32Sjsg 1374f005ef32Sjsg return ret; 1375fb4d8502Sjsg } 1376fb4d8502Sjsg 1377fb4d8502Sjsg static void init_interrupts(struct device_queue_manager *dqm) 1378fb4d8502Sjsg { 1379f005ef32Sjsg uint32_t xcc_mask = dqm->dev->xcc_mask; 1380f005ef32Sjsg unsigned int i, xcc_id; 1381fb4d8502Sjsg 1382f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) { 1383f005ef32Sjsg for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1384f005ef32Sjsg if (is_pipe_enabled(dqm, 0, i)) { 1385f005ef32Sjsg dqm->dev->kfd2kgd->init_interrupts( 1386f005ef32Sjsg dqm->dev->adev, i, xcc_id); 13871bb76ff1Sjsg } 1388f005ef32Sjsg } 1389f005ef32Sjsg } 1390fb4d8502Sjsg } 1391fb4d8502Sjsg 1392fb4d8502Sjsg static int initialize_nocpsch(struct device_queue_manager *dqm) 1393fb4d8502Sjsg { 1394fb4d8502Sjsg int pipe, queue; 1395fb4d8502Sjsg 1396fb4d8502Sjsg pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1397fb4d8502Sjsg 1398fb4d8502Sjsg dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1399fb4d8502Sjsg sizeof(unsigned int), GFP_KERNEL); 1400fb4d8502Sjsg if (!dqm->allocated_queues) 1401fb4d8502Sjsg return -ENOMEM; 1402fb4d8502Sjsg 1403ad8b1aafSjsg mutex_init(&dqm->lock_hidden); 1404fb4d8502Sjsg INIT_LIST_HEAD(&dqm->queues); 1405c349dbc7Sjsg dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1406c349dbc7Sjsg dqm->active_cp_queue_count = 0; 1407ad8b1aafSjsg dqm->gws_queue_count = 0; 1408fb4d8502Sjsg 1409fb4d8502Sjsg for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1410fb4d8502Sjsg int pipe_offset = pipe * get_queues_per_pipe(dqm); 1411fb4d8502Sjsg 1412fb4d8502Sjsg for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1413fb4d8502Sjsg if (test_bit(pipe_offset + queue, 1414f005ef32Sjsg dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1415fb4d8502Sjsg dqm->allocated_queues[pipe] |= 1 << queue; 1416fb4d8502Sjsg } 1417fb4d8502Sjsg 1418c349dbc7Sjsg memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1419c349dbc7Sjsg 14201bb76ff1Sjsg init_sdma_bitmaps(dqm); 1421fb4d8502Sjsg 1422fb4d8502Sjsg return 0; 1423fb4d8502Sjsg } 1424fb4d8502Sjsg 1425fb4d8502Sjsg static void uninitialize(struct device_queue_manager *dqm) 1426fb4d8502Sjsg { 1427fb4d8502Sjsg int i; 1428fb4d8502Sjsg 1429c349dbc7Sjsg WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1430fb4d8502Sjsg 1431fb4d8502Sjsg kfree(dqm->allocated_queues); 1432fb4d8502Sjsg for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1433fb4d8502Sjsg kfree(dqm->mqd_mgrs[i]); 1434fb4d8502Sjsg mutex_destroy(&dqm->lock_hidden); 1435fb4d8502Sjsg } 1436fb4d8502Sjsg 1437fb4d8502Sjsg static int start_nocpsch(struct device_queue_manager *dqm) 1438fb4d8502Sjsg { 14391bb76ff1Sjsg int r = 0; 14401bb76ff1Sjsg 1441c349dbc7Sjsg pr_info("SW scheduler is used"); 1442fb4d8502Sjsg init_interrupts(dqm); 1443c349dbc7Sjsg 14441bb76ff1Sjsg if (dqm->dev->adev->asic_type == CHIP_HAWAII) 14451bb76ff1Sjsg r = pm_init(&dqm->packet_mgr, dqm); 14461bb76ff1Sjsg if (!r) 1447c349dbc7Sjsg dqm->sched_running = true; 1448c349dbc7Sjsg 14491bb76ff1Sjsg return r; 1450fb4d8502Sjsg } 1451fb4d8502Sjsg 1452fb4d8502Sjsg static int stop_nocpsch(struct device_queue_manager *dqm) 1453fb4d8502Sjsg { 1454f005ef32Sjsg dqm_lock(dqm); 1455f005ef32Sjsg if (!dqm->sched_running) { 1456f005ef32Sjsg dqm_unlock(dqm); 1457f005ef32Sjsg return 0; 1458f005ef32Sjsg } 1459f005ef32Sjsg 14601bb76ff1Sjsg if (dqm->dev->adev->asic_type == CHIP_HAWAII) 14615ca02815Sjsg pm_uninit(&dqm->packet_mgr, false); 1462c349dbc7Sjsg dqm->sched_running = false; 1463f005ef32Sjsg dqm_unlock(dqm); 1464c349dbc7Sjsg 1465fb4d8502Sjsg return 0; 1466fb4d8502Sjsg } 1467fb4d8502Sjsg 1468c349dbc7Sjsg static void pre_reset(struct device_queue_manager *dqm) 1469c349dbc7Sjsg { 1470c349dbc7Sjsg dqm_lock(dqm); 1471c349dbc7Sjsg dqm->is_resetting = true; 1472c349dbc7Sjsg dqm_unlock(dqm); 1473c349dbc7Sjsg } 1474c349dbc7Sjsg 1475fb4d8502Sjsg static int allocate_sdma_queue(struct device_queue_manager *dqm, 14761bb76ff1Sjsg struct queue *q, const uint32_t *restore_sdma_id) 1477fb4d8502Sjsg { 1478fb4d8502Sjsg int bit; 1479fb4d8502Sjsg 1480c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1481f005ef32Sjsg if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1482c349dbc7Sjsg pr_err("No more SDMA queue to allocate\n"); 1483fb4d8502Sjsg return -ENOMEM; 1484c349dbc7Sjsg } 1485fb4d8502Sjsg 14861bb76ff1Sjsg if (restore_sdma_id) { 14871bb76ff1Sjsg /* Re-use existing sdma_id */ 1488f005ef32Sjsg if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 14891bb76ff1Sjsg pr_err("SDMA queue already in use\n"); 14901bb76ff1Sjsg return -EBUSY; 14911bb76ff1Sjsg } 1492f005ef32Sjsg clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 14931bb76ff1Sjsg q->sdma_id = *restore_sdma_id; 14941bb76ff1Sjsg } else { 14951bb76ff1Sjsg /* Find first available sdma_id */ 1496f005ef32Sjsg bit = find_first_bit(dqm->sdma_bitmap, 1497f005ef32Sjsg get_num_sdma_queues(dqm)); 1498f005ef32Sjsg clear_bit(bit, dqm->sdma_bitmap); 1499c349dbc7Sjsg q->sdma_id = bit; 15001bb76ff1Sjsg } 15011bb76ff1Sjsg 1502f005ef32Sjsg q->properties.sdma_engine_id = 1503f005ef32Sjsg q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1504c349dbc7Sjsg q->properties.sdma_queue_id = q->sdma_id / 15051bb76ff1Sjsg kfd_get_num_sdma_engines(dqm->dev); 1506c349dbc7Sjsg } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1507f005ef32Sjsg if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1508c349dbc7Sjsg pr_err("No more XGMI SDMA queue to allocate\n"); 1509c349dbc7Sjsg return -ENOMEM; 1510c349dbc7Sjsg } 15111bb76ff1Sjsg if (restore_sdma_id) { 15121bb76ff1Sjsg /* Re-use existing sdma_id */ 1513f005ef32Sjsg if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 15141bb76ff1Sjsg pr_err("SDMA queue already in use\n"); 15151bb76ff1Sjsg return -EBUSY; 15161bb76ff1Sjsg } 1517f005ef32Sjsg clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 15181bb76ff1Sjsg q->sdma_id = *restore_sdma_id; 15191bb76ff1Sjsg } else { 1520f005ef32Sjsg bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1521f005ef32Sjsg get_num_xgmi_sdma_queues(dqm)); 1522f005ef32Sjsg clear_bit(bit, dqm->xgmi_sdma_bitmap); 1523c349dbc7Sjsg q->sdma_id = bit; 15241bb76ff1Sjsg } 1525c349dbc7Sjsg /* sdma_engine_id is sdma id including 1526c349dbc7Sjsg * both PCIe-optimized SDMAs and XGMI- 1527c349dbc7Sjsg * optimized SDMAs. The calculation below 1528c349dbc7Sjsg * assumes the first N engines are always 1529c349dbc7Sjsg * PCIe-optimized ones 1530c349dbc7Sjsg */ 15311bb76ff1Sjsg q->properties.sdma_engine_id = 15321bb76ff1Sjsg kfd_get_num_sdma_engines(dqm->dev) + 15331bb76ff1Sjsg q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1534c349dbc7Sjsg q->properties.sdma_queue_id = q->sdma_id / 15351bb76ff1Sjsg kfd_get_num_xgmi_sdma_engines(dqm->dev); 1536c349dbc7Sjsg } 1537c349dbc7Sjsg 1538c349dbc7Sjsg pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1539c349dbc7Sjsg pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1540fb4d8502Sjsg 1541fb4d8502Sjsg return 0; 1542fb4d8502Sjsg } 1543fb4d8502Sjsg 1544fb4d8502Sjsg static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1545c349dbc7Sjsg struct queue *q) 1546fb4d8502Sjsg { 1547c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1548c349dbc7Sjsg if (q->sdma_id >= get_num_sdma_queues(dqm)) 1549fb4d8502Sjsg return; 1550f005ef32Sjsg set_bit(q->sdma_id, dqm->sdma_bitmap); 1551c349dbc7Sjsg } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1552c349dbc7Sjsg if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1553c349dbc7Sjsg return; 1554f005ef32Sjsg set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1555fb4d8502Sjsg } 1556fb4d8502Sjsg } 1557fb4d8502Sjsg 1558fb4d8502Sjsg /* 1559fb4d8502Sjsg * Device Queue Manager implementation for cp scheduler 1560fb4d8502Sjsg */ 1561fb4d8502Sjsg 1562fb4d8502Sjsg static int set_sched_resources(struct device_queue_manager *dqm) 1563fb4d8502Sjsg { 1564fb4d8502Sjsg int i, mec; 1565fb4d8502Sjsg struct scheduling_resources res; 1566fb4d8502Sjsg 1567f005ef32Sjsg res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1568fb4d8502Sjsg 1569fb4d8502Sjsg res.queue_mask = 0; 1570fb4d8502Sjsg for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1571f005ef32Sjsg mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1572f005ef32Sjsg / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1573fb4d8502Sjsg 1574f005ef32Sjsg if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1575fb4d8502Sjsg continue; 1576fb4d8502Sjsg 1577fb4d8502Sjsg /* only acquire queues from the first MEC */ 1578fb4d8502Sjsg if (mec > 0) 1579fb4d8502Sjsg continue; 1580fb4d8502Sjsg 1581fb4d8502Sjsg /* This situation may be hit in the future if a new HW 1582fb4d8502Sjsg * generation exposes more than 64 queues. If so, the 1583fb4d8502Sjsg * definition of res.queue_mask needs updating 1584fb4d8502Sjsg */ 1585fb4d8502Sjsg if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1586fb4d8502Sjsg pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1587fb4d8502Sjsg break; 1588fb4d8502Sjsg } 1589fb4d8502Sjsg 1590ad8b1aafSjsg res.queue_mask |= 1ull 1591ad8b1aafSjsg << amdgpu_queue_mask_bit_to_set_resource_bit( 15921bb76ff1Sjsg dqm->dev->adev, i); 1593fb4d8502Sjsg } 1594c349dbc7Sjsg res.gws_mask = ~0ull; 1595c349dbc7Sjsg res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1596fb4d8502Sjsg 1597fb4d8502Sjsg pr_debug("Scheduling resources:\n" 1598fb4d8502Sjsg "vmid mask: 0x%8X\n" 1599fb4d8502Sjsg "queue mask: 0x%8llX\n", 1600fb4d8502Sjsg res.vmid_mask, res.queue_mask); 1601fb4d8502Sjsg 16025ca02815Sjsg return pm_send_set_resources(&dqm->packet_mgr, &res); 1603fb4d8502Sjsg } 1604fb4d8502Sjsg 1605fb4d8502Sjsg static int initialize_cpsch(struct device_queue_manager *dqm) 1606fb4d8502Sjsg { 1607fb4d8502Sjsg pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1608fb4d8502Sjsg 1609ad8b1aafSjsg mutex_init(&dqm->lock_hidden); 1610fb4d8502Sjsg INIT_LIST_HEAD(&dqm->queues); 1611c349dbc7Sjsg dqm->active_queue_count = dqm->processes_count = 0; 1612c349dbc7Sjsg dqm->active_cp_queue_count = 0; 1613ad8b1aafSjsg dqm->gws_queue_count = 0; 1614fb4d8502Sjsg dqm->active_runlist = false; 1615fb4d8502Sjsg INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1616f005ef32Sjsg dqm->trap_debug_vmid = 0; 1617fb4d8502Sjsg 16181bb76ff1Sjsg init_sdma_bitmaps(dqm); 16191bb76ff1Sjsg 1620f005ef32Sjsg if (dqm->dev->kfd2kgd->get_iq_wait_times) 1621f005ef32Sjsg dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1622f005ef32Sjsg &dqm->wait_times, 1623f005ef32Sjsg ffs(dqm->dev->xcc_mask) - 1); 1624fb4d8502Sjsg return 0; 1625fb4d8502Sjsg } 1626fb4d8502Sjsg 1627fb4d8502Sjsg static int start_cpsch(struct device_queue_manager *dqm) 1628fb4d8502Sjsg { 1629fb4d8502Sjsg int retval; 1630fb4d8502Sjsg 1631fb4d8502Sjsg retval = 0; 1632fb4d8502Sjsg 16335ca02815Sjsg dqm_lock(dqm); 16341bb76ff1Sjsg 1635f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) { 16365ca02815Sjsg retval = pm_init(&dqm->packet_mgr, dqm); 1637fb4d8502Sjsg if (retval) 1638fb4d8502Sjsg goto fail_packet_manager_init; 1639fb4d8502Sjsg 1640fb4d8502Sjsg retval = set_sched_resources(dqm); 1641fb4d8502Sjsg if (retval) 1642fb4d8502Sjsg goto fail_set_sched_resources; 16431bb76ff1Sjsg } 1644fb4d8502Sjsg pr_debug("Allocating fence memory\n"); 1645fb4d8502Sjsg 1646fb4d8502Sjsg /* allocate fence memory on the gart */ 1647fb4d8502Sjsg retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1648fb4d8502Sjsg &dqm->fence_mem); 1649fb4d8502Sjsg 1650fb4d8502Sjsg if (retval) 1651fb4d8502Sjsg goto fail_allocate_vidmem; 1652fb4d8502Sjsg 1653ad8b1aafSjsg dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1654fb4d8502Sjsg dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1655fb4d8502Sjsg 1656fb4d8502Sjsg init_interrupts(dqm); 1657fb4d8502Sjsg 1658fb4d8502Sjsg /* clear hang status when driver try to start the hw scheduler */ 1659fb4d8502Sjsg dqm->is_hws_hang = false; 1660c349dbc7Sjsg dqm->is_resetting = false; 1661c349dbc7Sjsg dqm->sched_running = true; 1662f005ef32Sjsg 1663f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 1664f005ef32Sjsg execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1665f005ef32Sjsg 1666f005ef32Sjsg /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 1667f005ef32Sjsg if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && 1668f005ef32Sjsg (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { 1669f005ef32Sjsg uint32_t reg_offset = 0; 1670f005ef32Sjsg uint32_t grace_period = 1; 1671f005ef32Sjsg 1672f005ef32Sjsg retval = pm_update_grace_period(&dqm->packet_mgr, 1673f005ef32Sjsg grace_period); 1674f005ef32Sjsg if (retval) 1675f005ef32Sjsg pr_err("Setting grace timeout failed\n"); 1676f005ef32Sjsg else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) 1677f005ef32Sjsg /* Update dqm->wait_times maintained in software */ 1678f005ef32Sjsg dqm->dev->kfd2kgd->build_grace_period_packet_info( 1679f005ef32Sjsg dqm->dev->adev, dqm->wait_times, 1680f005ef32Sjsg grace_period, ®_offset, 1681f005ef32Sjsg &dqm->wait_times); 1682f005ef32Sjsg } 1683f005ef32Sjsg 1684fb4d8502Sjsg dqm_unlock(dqm); 1685fb4d8502Sjsg 1686fb4d8502Sjsg return 0; 1687fb4d8502Sjsg fail_allocate_vidmem: 1688fb4d8502Sjsg fail_set_sched_resources: 1689f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 16905ca02815Sjsg pm_uninit(&dqm->packet_mgr, false); 1691fb4d8502Sjsg fail_packet_manager_init: 16925ca02815Sjsg dqm_unlock(dqm); 1693fb4d8502Sjsg return retval; 1694fb4d8502Sjsg } 1695fb4d8502Sjsg 1696fb4d8502Sjsg static int stop_cpsch(struct device_queue_manager *dqm) 1697fb4d8502Sjsg { 1698c349dbc7Sjsg bool hanging; 1699c349dbc7Sjsg 1700fb4d8502Sjsg dqm_lock(dqm); 170138326f19Sjsg if (!dqm->sched_running) { 170238326f19Sjsg dqm_unlock(dqm); 170338326f19Sjsg return 0; 170438326f19Sjsg } 170538326f19Sjsg 17061bb76ff1Sjsg if (!dqm->is_hws_hang) { 1707f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 1708f005ef32Sjsg unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 17091bb76ff1Sjsg else 17101bb76ff1Sjsg remove_all_queues_mes(dqm); 17111bb76ff1Sjsg } 17121bb76ff1Sjsg 1713c349dbc7Sjsg hanging = dqm->is_hws_hang || dqm->is_resetting; 1714c349dbc7Sjsg dqm->sched_running = false; 1715fb4d8502Sjsg 1716f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 17175ca02815Sjsg pm_release_ib(&dqm->packet_mgr); 1718ad8b1aafSjsg 1719fb4d8502Sjsg kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1720f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 17215ca02815Sjsg pm_uninit(&dqm->packet_mgr, hanging); 17225ca02815Sjsg dqm_unlock(dqm); 1723fb4d8502Sjsg 1724fb4d8502Sjsg return 0; 1725fb4d8502Sjsg } 1726fb4d8502Sjsg 1727fb4d8502Sjsg static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1728fb4d8502Sjsg struct kernel_queue *kq, 1729fb4d8502Sjsg struct qcm_process_device *qpd) 1730fb4d8502Sjsg { 1731fb4d8502Sjsg dqm_lock(dqm); 1732fb4d8502Sjsg if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1733fb4d8502Sjsg pr_warn("Can't create new kernel queue because %d queues were already created\n", 1734fb4d8502Sjsg dqm->total_queue_count); 1735fb4d8502Sjsg dqm_unlock(dqm); 1736fb4d8502Sjsg return -EPERM; 1737fb4d8502Sjsg } 1738fb4d8502Sjsg 1739fb4d8502Sjsg /* 1740fb4d8502Sjsg * Unconditionally increment this counter, regardless of the queue's 1741fb4d8502Sjsg * type or whether the queue is active. 1742fb4d8502Sjsg */ 1743fb4d8502Sjsg dqm->total_queue_count++; 1744fb4d8502Sjsg pr_debug("Total of %d queues are accountable so far\n", 1745fb4d8502Sjsg dqm->total_queue_count); 1746fb4d8502Sjsg 1747fb4d8502Sjsg list_add(&kq->list, &qpd->priv_queue_list); 17481285848aSjsg increment_queue_count(dqm, qpd, kq->queue); 1749fb4d8502Sjsg qpd->is_debug = true; 1750f005ef32Sjsg execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1751f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD); 1752fb4d8502Sjsg dqm_unlock(dqm); 1753fb4d8502Sjsg 1754fb4d8502Sjsg return 0; 1755fb4d8502Sjsg } 1756fb4d8502Sjsg 1757fb4d8502Sjsg static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1758fb4d8502Sjsg struct kernel_queue *kq, 1759fb4d8502Sjsg struct qcm_process_device *qpd) 1760fb4d8502Sjsg { 1761fb4d8502Sjsg dqm_lock(dqm); 1762fb4d8502Sjsg list_del(&kq->list); 17631285848aSjsg decrement_queue_count(dqm, qpd, kq->queue); 1764fb4d8502Sjsg qpd->is_debug = false; 1765f005ef32Sjsg execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1766f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD); 1767fb4d8502Sjsg /* 1768fb4d8502Sjsg * Unconditionally decrement this counter, regardless of the queue's 1769fb4d8502Sjsg * type. 1770fb4d8502Sjsg */ 1771fb4d8502Sjsg dqm->total_queue_count--; 1772fb4d8502Sjsg pr_debug("Total of %d queues are accountable so far\n", 1773fb4d8502Sjsg dqm->total_queue_count); 1774fb4d8502Sjsg dqm_unlock(dqm); 1775fb4d8502Sjsg } 1776fb4d8502Sjsg 1777fb4d8502Sjsg static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 17781bb76ff1Sjsg struct qcm_process_device *qpd, 17791bb76ff1Sjsg const struct kfd_criu_queue_priv_data *qd, 17801bb76ff1Sjsg const void *restore_mqd, const void *restore_ctl_stack) 1781fb4d8502Sjsg { 1782fb4d8502Sjsg int retval; 1783fb4d8502Sjsg struct mqd_manager *mqd_mgr; 1784fb4d8502Sjsg 1785fb4d8502Sjsg if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1786fb4d8502Sjsg pr_warn("Can't create new usermode queue because %d queues were already created\n", 1787fb4d8502Sjsg dqm->total_queue_count); 1788fb4d8502Sjsg retval = -EPERM; 1789c349dbc7Sjsg goto out; 1790fb4d8502Sjsg } 1791fb4d8502Sjsg 1792c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1793c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1794c349dbc7Sjsg dqm_lock(dqm); 17951bb76ff1Sjsg retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1796c349dbc7Sjsg dqm_unlock(dqm); 1797fb4d8502Sjsg if (retval) 1798c349dbc7Sjsg goto out; 1799fb4d8502Sjsg } 1800fb4d8502Sjsg 18011bb76ff1Sjsg retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1802fb4d8502Sjsg if (retval) 1803fb4d8502Sjsg goto out_deallocate_sdma_queue; 1804fb4d8502Sjsg 1805c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1806c349dbc7Sjsg q->properties.type)]; 1807fb4d8502Sjsg 1808c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1809c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1810c349dbc7Sjsg dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1811c349dbc7Sjsg q->properties.tba_addr = qpd->tba_addr; 1812c349dbc7Sjsg q->properties.tma_addr = qpd->tma_addr; 1813c349dbc7Sjsg q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1814c349dbc7Sjsg if (!q->mqd_mem_obj) { 1815fb4d8502Sjsg retval = -ENOMEM; 1816fb4d8502Sjsg goto out_deallocate_doorbell; 1817fb4d8502Sjsg } 1818c349dbc7Sjsg 1819c349dbc7Sjsg dqm_lock(dqm); 1820fb4d8502Sjsg /* 1821c349dbc7Sjsg * Eviction state logic: mark all queues as evicted, even ones 1822c349dbc7Sjsg * not currently active. Restoring inactive queues later only 1823c349dbc7Sjsg * updates the is_evicted flag but is a no-op otherwise. 1824fb4d8502Sjsg */ 1825c349dbc7Sjsg q->properties.is_evicted = !!qpd->evicted; 1826f005ef32Sjsg q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 1827f005ef32Sjsg kfd_dbg_has_cwsr_workaround(q->device); 18281bb76ff1Sjsg 18291bb76ff1Sjsg if (qd) 18301bb76ff1Sjsg mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 18311bb76ff1Sjsg &q->properties, restore_mqd, restore_ctl_stack, 18321bb76ff1Sjsg qd->ctl_stack_size); 18331bb76ff1Sjsg else 1834c349dbc7Sjsg mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1835fb4d8502Sjsg &q->gart_mqd_addr, &q->properties); 1836fb4d8502Sjsg 1837fb4d8502Sjsg list_add(&q->list, &qpd->queues_list); 1838fb4d8502Sjsg qpd->queue_count++; 1839c349dbc7Sjsg 1840fb4d8502Sjsg if (q->properties.is_active) { 18411285848aSjsg increment_queue_count(dqm, qpd, q); 1842c349dbc7Sjsg 1843f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 18441bb76ff1Sjsg retval = execute_queues_cpsch(dqm, 1845f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 18461bb76ff1Sjsg else 18471bb76ff1Sjsg retval = add_queue_mes(dqm, q, qpd); 18481bb76ff1Sjsg if (retval) 18491bb76ff1Sjsg goto cleanup_queue; 1850fb4d8502Sjsg } 1851fb4d8502Sjsg 1852fb4d8502Sjsg /* 1853fb4d8502Sjsg * Unconditionally increment this counter, regardless of the queue's 1854fb4d8502Sjsg * type or whether the queue is active. 1855fb4d8502Sjsg */ 1856fb4d8502Sjsg dqm->total_queue_count++; 1857fb4d8502Sjsg 1858fb4d8502Sjsg pr_debug("Total of %d queues are accountable so far\n", 1859fb4d8502Sjsg dqm->total_queue_count); 1860fb4d8502Sjsg 1861fb4d8502Sjsg dqm_unlock(dqm); 1862fb4d8502Sjsg return retval; 1863fb4d8502Sjsg 18641bb76ff1Sjsg cleanup_queue: 18651bb76ff1Sjsg qpd->queue_count--; 18661bb76ff1Sjsg list_del(&q->list); 18671bb76ff1Sjsg if (q->properties.is_active) 18681bb76ff1Sjsg decrement_queue_count(dqm, qpd, q); 18691bb76ff1Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 18701bb76ff1Sjsg dqm_unlock(dqm); 1871fb4d8502Sjsg out_deallocate_doorbell: 1872fb4d8502Sjsg deallocate_doorbell(qpd, q); 1873fb4d8502Sjsg out_deallocate_sdma_queue: 1874c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1875c349dbc7Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1876c349dbc7Sjsg dqm_lock(dqm); 1877c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 1878fb4d8502Sjsg dqm_unlock(dqm); 1879c349dbc7Sjsg } 1880c349dbc7Sjsg out: 1881fb4d8502Sjsg return retval; 1882fb4d8502Sjsg } 1883fb4d8502Sjsg 1884ad8b1aafSjsg int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1885ad8b1aafSjsg uint64_t fence_value, 1886fb4d8502Sjsg unsigned int timeout_ms) 1887fb4d8502Sjsg { 1888fb4d8502Sjsg unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1889fb4d8502Sjsg 1890fb4d8502Sjsg while (*fence_addr != fence_value) { 1891fb4d8502Sjsg if (time_after(jiffies, end_jiffies)) { 1892fb4d8502Sjsg pr_err("qcm fence wait loop timeout expired\n"); 1893fb4d8502Sjsg /* In HWS case, this is used to halt the driver thread 1894fb4d8502Sjsg * in order not to mess up CP states before doing 1895fb4d8502Sjsg * scandumps for FW debugging. 1896fb4d8502Sjsg */ 1897fb4d8502Sjsg while (halt_if_hws_hang) 1898fb4d8502Sjsg schedule(); 1899fb4d8502Sjsg 1900fb4d8502Sjsg return -ETIME; 1901fb4d8502Sjsg } 1902fb4d8502Sjsg schedule(); 1903fb4d8502Sjsg } 1904fb4d8502Sjsg 1905fb4d8502Sjsg return 0; 1906fb4d8502Sjsg } 1907fb4d8502Sjsg 1908fb4d8502Sjsg /* dqm->lock mutex has to be locked before calling this function */ 1909fb4d8502Sjsg static int map_queues_cpsch(struct device_queue_manager *dqm) 1910fb4d8502Sjsg { 1911fb4d8502Sjsg int retval; 1912fb4d8502Sjsg 1913c349dbc7Sjsg if (!dqm->sched_running) 1914fb4d8502Sjsg return 0; 1915c349dbc7Sjsg if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1916c349dbc7Sjsg return 0; 1917fb4d8502Sjsg if (dqm->active_runlist) 1918fb4d8502Sjsg return 0; 1919fb4d8502Sjsg 19205ca02815Sjsg retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1921c349dbc7Sjsg pr_debug("%s sent runlist\n", __func__); 1922fb4d8502Sjsg if (retval) { 1923fb4d8502Sjsg pr_err("failed to execute runlist\n"); 1924fb4d8502Sjsg return retval; 1925fb4d8502Sjsg } 1926fb4d8502Sjsg dqm->active_runlist = true; 1927fb4d8502Sjsg 1928fb4d8502Sjsg return retval; 1929fb4d8502Sjsg } 1930fb4d8502Sjsg 1931fb4d8502Sjsg /* dqm->lock mutex has to be locked before calling this function */ 1932fb4d8502Sjsg static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1933fb4d8502Sjsg enum kfd_unmap_queues_filter filter, 1934f005ef32Sjsg uint32_t filter_param, 1935f005ef32Sjsg uint32_t grace_period, 1936f005ef32Sjsg bool reset) 1937fb4d8502Sjsg { 1938fb4d8502Sjsg int retval = 0; 19395ca02815Sjsg struct mqd_manager *mqd_mgr; 1940fb4d8502Sjsg 1941c349dbc7Sjsg if (!dqm->sched_running) 1942c349dbc7Sjsg return 0; 19431bb76ff1Sjsg if (dqm->is_hws_hang || dqm->is_resetting) 1944fb4d8502Sjsg return -EIO; 1945fb4d8502Sjsg if (!dqm->active_runlist) 1946fb4d8502Sjsg return retval; 1947fb4d8502Sjsg 1948f005ef32Sjsg if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1949f005ef32Sjsg retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 1950f005ef32Sjsg if (retval) 1951f005ef32Sjsg return retval; 1952f005ef32Sjsg } 1953f005ef32Sjsg 19541bb76ff1Sjsg retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1955fb4d8502Sjsg if (retval) 1956fb4d8502Sjsg return retval; 1957fb4d8502Sjsg 1958fb4d8502Sjsg *dqm->fence_addr = KFD_FENCE_INIT; 19595ca02815Sjsg pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1960fb4d8502Sjsg KFD_FENCE_COMPLETED); 1961fb4d8502Sjsg /* should be timed out */ 1962fb4d8502Sjsg retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1963c349dbc7Sjsg queue_preemption_timeout_ms); 1964c349dbc7Sjsg if (retval) { 1965c349dbc7Sjsg pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 19661bb76ff1Sjsg kfd_hws_hang(dqm); 1967fb4d8502Sjsg return retval; 1968c349dbc7Sjsg } 1969fb4d8502Sjsg 19705ca02815Sjsg /* In the current MEC firmware implementation, if compute queue 19715ca02815Sjsg * doesn't response to the preemption request in time, HIQ will 19725ca02815Sjsg * abandon the unmap request without returning any timeout error 19735ca02815Sjsg * to driver. Instead, MEC firmware will log the doorbell of the 19745ca02815Sjsg * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 19755ca02815Sjsg * To make sure the queue unmap was successful, driver need to 19765ca02815Sjsg * check those fields 19775ca02815Sjsg */ 19785ca02815Sjsg mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 19795ca02815Sjsg if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 19805ca02815Sjsg pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 19815ca02815Sjsg while (halt_if_hws_hang) 19825ca02815Sjsg schedule(); 1983afba13e9Sjsg kfd_hws_hang(dqm); 19845ca02815Sjsg return -ETIME; 19855ca02815Sjsg } 19865ca02815Sjsg 1987f005ef32Sjsg /* We need to reset the grace period value for this device */ 1988f005ef32Sjsg if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1989f005ef32Sjsg if (pm_update_grace_period(&dqm->packet_mgr, 1990f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD)) 1991f005ef32Sjsg pr_err("Failed to reset grace period\n"); 1992f005ef32Sjsg } 1993f005ef32Sjsg 19945ca02815Sjsg pm_release_ib(&dqm->packet_mgr); 1995fb4d8502Sjsg dqm->active_runlist = false; 1996fb4d8502Sjsg 1997fb4d8502Sjsg return retval; 1998fb4d8502Sjsg } 1999fb4d8502Sjsg 20001bb76ff1Sjsg /* only for compute queue */ 20011bb76ff1Sjsg static int reset_queues_cpsch(struct device_queue_manager *dqm, 20021bb76ff1Sjsg uint16_t pasid) 20031bb76ff1Sjsg { 20041bb76ff1Sjsg int retval; 20051bb76ff1Sjsg 20061bb76ff1Sjsg dqm_lock(dqm); 20071bb76ff1Sjsg 20081bb76ff1Sjsg retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2009f005ef32Sjsg pasid, USE_DEFAULT_GRACE_PERIOD, true); 20101bb76ff1Sjsg 20111bb76ff1Sjsg dqm_unlock(dqm); 20121bb76ff1Sjsg return retval; 20131bb76ff1Sjsg } 20141bb76ff1Sjsg 2015fb4d8502Sjsg /* dqm->lock mutex has to be locked before calling this function */ 2016fb4d8502Sjsg static int execute_queues_cpsch(struct device_queue_manager *dqm, 2017fb4d8502Sjsg enum kfd_unmap_queues_filter filter, 2018f005ef32Sjsg uint32_t filter_param, 2019f005ef32Sjsg uint32_t grace_period) 2020fb4d8502Sjsg { 2021fb4d8502Sjsg int retval; 2022fb4d8502Sjsg 2023fb4d8502Sjsg if (dqm->is_hws_hang) 2024fb4d8502Sjsg return -EIO; 2025f005ef32Sjsg retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2026c349dbc7Sjsg if (retval) 2027fb4d8502Sjsg return retval; 2028fb4d8502Sjsg 2029fb4d8502Sjsg return map_queues_cpsch(dqm); 2030fb4d8502Sjsg } 2031fb4d8502Sjsg 2032f005ef32Sjsg static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2033f005ef32Sjsg struct queue *q) 2034f005ef32Sjsg { 2035f005ef32Sjsg struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2036f005ef32Sjsg q->process); 2037f005ef32Sjsg int ret = 0; 2038f005ef32Sjsg 2039f005ef32Sjsg if (pdd->qpd.is_debug) 2040f005ef32Sjsg return ret; 2041f005ef32Sjsg 2042f005ef32Sjsg q->properties.is_being_destroyed = true; 2043f005ef32Sjsg 2044f005ef32Sjsg if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2045f005ef32Sjsg dqm_unlock(dqm); 2046f005ef32Sjsg mutex_unlock(&q->process->mutex); 2047f005ef32Sjsg ret = wait_event_interruptible(dqm->destroy_wait, 2048f005ef32Sjsg !q->properties.is_suspended); 2049f005ef32Sjsg 2050f005ef32Sjsg mutex_lock(&q->process->mutex); 2051f005ef32Sjsg dqm_lock(dqm); 2052f005ef32Sjsg } 2053f005ef32Sjsg 2054f005ef32Sjsg return ret; 2055f005ef32Sjsg } 2056f005ef32Sjsg 2057fb4d8502Sjsg static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2058fb4d8502Sjsg struct qcm_process_device *qpd, 2059fb4d8502Sjsg struct queue *q) 2060fb4d8502Sjsg { 2061fb4d8502Sjsg int retval; 2062fb4d8502Sjsg struct mqd_manager *mqd_mgr; 2063ad8b1aafSjsg uint64_t sdma_val = 0; 2064ad8b1aafSjsg struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2065ad8b1aafSjsg 2066ad8b1aafSjsg /* Get the SDMA queue stats */ 2067ad8b1aafSjsg if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2068ad8b1aafSjsg (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2069ad8b1aafSjsg retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2070ad8b1aafSjsg &sdma_val); 2071ad8b1aafSjsg if (retval) 2072ad8b1aafSjsg pr_err("Failed to read SDMA queue counter for queue: %d\n", 2073ad8b1aafSjsg q->properties.queue_id); 2074ad8b1aafSjsg } 2075fb4d8502Sjsg 2076fb4d8502Sjsg /* remove queue from list to prevent rescheduling after preemption */ 2077fb4d8502Sjsg dqm_lock(dqm); 2078fb4d8502Sjsg 2079f005ef32Sjsg retval = wait_on_destroy_queue(dqm, q); 2080f005ef32Sjsg 2081f005ef32Sjsg if (retval) { 2082f005ef32Sjsg dqm_unlock(dqm); 2083f005ef32Sjsg return retval; 2084f005ef32Sjsg } 2085f005ef32Sjsg 2086fb4d8502Sjsg if (qpd->is_debug) { 2087fb4d8502Sjsg /* 2088fb4d8502Sjsg * error, currently we do not allow to destroy a queue 2089fb4d8502Sjsg * of a currently debugged process 2090fb4d8502Sjsg */ 2091fb4d8502Sjsg retval = -EBUSY; 2092fb4d8502Sjsg goto failed_try_destroy_debugged_queue; 2093fb4d8502Sjsg 2094fb4d8502Sjsg } 2095fb4d8502Sjsg 2096c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2097c349dbc7Sjsg q->properties.type)]; 2098fb4d8502Sjsg 2099fb4d8502Sjsg deallocate_doorbell(qpd, q); 2100fb4d8502Sjsg 2101ad8b1aafSjsg if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2102ad8b1aafSjsg (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2103c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 2104ad8b1aafSjsg pdd->sdma_past_activity_counter += sdma_val; 2105ad8b1aafSjsg } 2106fb4d8502Sjsg 2107fb4d8502Sjsg list_del(&q->list); 2108fb4d8502Sjsg qpd->queue_count--; 2109fb4d8502Sjsg if (q->properties.is_active) { 21101285848aSjsg decrement_queue_count(dqm, qpd, q); 2111f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) { 2112fb4d8502Sjsg retval = execute_queues_cpsch(dqm, 2113f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2114f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD); 2115fb4d8502Sjsg if (retval == -ETIME) 2116fb4d8502Sjsg qpd->reset_wavefronts = true; 21171bb76ff1Sjsg } else { 21181bb76ff1Sjsg retval = remove_queue_mes(dqm, q, qpd); 21191bb76ff1Sjsg } 2120fb4d8502Sjsg } 2121fb4d8502Sjsg 2122fb4d8502Sjsg /* 2123fb4d8502Sjsg * Unconditionally decrement this counter, regardless of the queue's 2124fb4d8502Sjsg * type 2125fb4d8502Sjsg */ 2126fb4d8502Sjsg dqm->total_queue_count--; 2127fb4d8502Sjsg pr_debug("Total of %d queues are accountable so far\n", 2128fb4d8502Sjsg dqm->total_queue_count); 2129fb4d8502Sjsg 2130fb4d8502Sjsg dqm_unlock(dqm); 2131fb4d8502Sjsg 2132f005ef32Sjsg /* 2133f005ef32Sjsg * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2134f005ef32Sjsg * circular locking 2135f005ef32Sjsg */ 2136f005ef32Sjsg kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2137f005ef32Sjsg qpd->pqm->process, q->device, 2138f005ef32Sjsg -1, false, NULL, 0); 2139f005ef32Sjsg 2140c349dbc7Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2141c349dbc7Sjsg 2142fb4d8502Sjsg return retval; 2143fb4d8502Sjsg 2144fb4d8502Sjsg failed_try_destroy_debugged_queue: 2145fb4d8502Sjsg 2146fb4d8502Sjsg dqm_unlock(dqm); 2147fb4d8502Sjsg return retval; 2148fb4d8502Sjsg } 2149fb4d8502Sjsg 2150fb4d8502Sjsg /* 2151fb4d8502Sjsg * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2152fb4d8502Sjsg * stay in user mode. 2153fb4d8502Sjsg */ 2154fb4d8502Sjsg #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2155fb4d8502Sjsg /* APE1 limit is inclusive and 64K aligned. */ 2156fb4d8502Sjsg #define APE1_LIMIT_ALIGNMENT 0xFFFF 2157fb4d8502Sjsg 2158fb4d8502Sjsg static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2159fb4d8502Sjsg struct qcm_process_device *qpd, 2160fb4d8502Sjsg enum cache_policy default_policy, 2161fb4d8502Sjsg enum cache_policy alternate_policy, 2162fb4d8502Sjsg void __user *alternate_aperture_base, 2163fb4d8502Sjsg uint64_t alternate_aperture_size) 2164fb4d8502Sjsg { 2165fb4d8502Sjsg bool retval = true; 2166fb4d8502Sjsg 2167fb4d8502Sjsg if (!dqm->asic_ops.set_cache_memory_policy) 2168fb4d8502Sjsg return retval; 2169fb4d8502Sjsg 2170fb4d8502Sjsg dqm_lock(dqm); 2171fb4d8502Sjsg 2172fb4d8502Sjsg if (alternate_aperture_size == 0) { 2173fb4d8502Sjsg /* base > limit disables APE1 */ 2174fb4d8502Sjsg qpd->sh_mem_ape1_base = 1; 2175fb4d8502Sjsg qpd->sh_mem_ape1_limit = 0; 2176fb4d8502Sjsg } else { 2177fb4d8502Sjsg /* 2178fb4d8502Sjsg * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2179fb4d8502Sjsg * SH_MEM_APE1_BASE[31:0], 0x0000 } 2180fb4d8502Sjsg * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2181fb4d8502Sjsg * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2182fb4d8502Sjsg * Verify that the base and size parameters can be 2183fb4d8502Sjsg * represented in this format and convert them. 2184fb4d8502Sjsg * Additionally restrict APE1 to user-mode addresses. 2185fb4d8502Sjsg */ 2186fb4d8502Sjsg 2187fb4d8502Sjsg uint64_t base = (uintptr_t)alternate_aperture_base; 2188fb4d8502Sjsg uint64_t limit = base + alternate_aperture_size - 1; 2189fb4d8502Sjsg 2190fb4d8502Sjsg if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2191fb4d8502Sjsg (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2192fb4d8502Sjsg retval = false; 2193fb4d8502Sjsg goto out; 2194fb4d8502Sjsg } 2195fb4d8502Sjsg 2196fb4d8502Sjsg qpd->sh_mem_ape1_base = base >> 16; 2197fb4d8502Sjsg qpd->sh_mem_ape1_limit = limit >> 16; 2198fb4d8502Sjsg } 2199fb4d8502Sjsg 2200fb4d8502Sjsg retval = dqm->asic_ops.set_cache_memory_policy( 2201fb4d8502Sjsg dqm, 2202fb4d8502Sjsg qpd, 2203fb4d8502Sjsg default_policy, 2204fb4d8502Sjsg alternate_policy, 2205fb4d8502Sjsg alternate_aperture_base, 2206fb4d8502Sjsg alternate_aperture_size); 2207fb4d8502Sjsg 2208fb4d8502Sjsg if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2209fb4d8502Sjsg program_sh_mem_settings(dqm, qpd); 2210fb4d8502Sjsg 2211fb4d8502Sjsg pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2212fb4d8502Sjsg qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2213fb4d8502Sjsg qpd->sh_mem_ape1_limit); 2214fb4d8502Sjsg 2215fb4d8502Sjsg out: 2216fb4d8502Sjsg dqm_unlock(dqm); 2217fb4d8502Sjsg return retval; 2218fb4d8502Sjsg } 2219fb4d8502Sjsg 2220fb4d8502Sjsg static int process_termination_nocpsch(struct device_queue_manager *dqm, 2221fb4d8502Sjsg struct qcm_process_device *qpd) 2222fb4d8502Sjsg { 222339437df3Sjsg struct queue *q; 2224fb4d8502Sjsg struct device_process_node *cur, *next_dpn; 2225fb4d8502Sjsg int retval = 0; 2226c349dbc7Sjsg bool found = false; 2227fb4d8502Sjsg 2228fb4d8502Sjsg dqm_lock(dqm); 2229fb4d8502Sjsg 2230fb4d8502Sjsg /* Clear all user mode queues */ 223139437df3Sjsg while (!list_empty(&qpd->queues_list)) { 223239437df3Sjsg struct mqd_manager *mqd_mgr; 2233fb4d8502Sjsg int ret; 2234fb4d8502Sjsg 223539437df3Sjsg q = list_first_entry(&qpd->queues_list, struct queue, list); 223639437df3Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 223739437df3Sjsg q->properties.type)]; 2238fb4d8502Sjsg ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2239fb4d8502Sjsg if (ret) 2240fb4d8502Sjsg retval = ret; 224139437df3Sjsg dqm_unlock(dqm); 224239437df3Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 224339437df3Sjsg dqm_lock(dqm); 2244fb4d8502Sjsg } 2245fb4d8502Sjsg 2246fb4d8502Sjsg /* Unregister process */ 2247fb4d8502Sjsg list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2248fb4d8502Sjsg if (qpd == cur->qpd) { 2249fb4d8502Sjsg list_del(&cur->list); 2250fb4d8502Sjsg kfree(cur); 2251fb4d8502Sjsg dqm->processes_count--; 2252c349dbc7Sjsg found = true; 2253fb4d8502Sjsg break; 2254fb4d8502Sjsg } 2255fb4d8502Sjsg } 2256fb4d8502Sjsg 2257fb4d8502Sjsg dqm_unlock(dqm); 2258c349dbc7Sjsg 2259c349dbc7Sjsg /* Outside the DQM lock because under the DQM lock we can't do 2260c349dbc7Sjsg * reclaim or take other locks that others hold while reclaiming. 2261c349dbc7Sjsg */ 2262c349dbc7Sjsg if (found) 2263c349dbc7Sjsg kfd_dec_compute_active(dqm->dev); 2264c349dbc7Sjsg 2265fb4d8502Sjsg return retval; 2266fb4d8502Sjsg } 2267fb4d8502Sjsg 2268c349dbc7Sjsg static int get_wave_state(struct device_queue_manager *dqm, 2269c349dbc7Sjsg struct queue *q, 2270c349dbc7Sjsg void __user *ctl_stack, 2271c349dbc7Sjsg u32 *ctl_stack_used_size, 2272c349dbc7Sjsg u32 *save_area_used_size) 2273c349dbc7Sjsg { 2274c349dbc7Sjsg struct mqd_manager *mqd_mgr; 2275c349dbc7Sjsg 2276c349dbc7Sjsg dqm_lock(dqm); 2277c349dbc7Sjsg 2278c349dbc7Sjsg mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2279c349dbc7Sjsg 2280babb1d53Sjsg if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2281f005ef32Sjsg q->properties.is_active || !q->device->kfd->cwsr_enabled || 2282babb1d53Sjsg !mqd_mgr->get_wave_state) { 2283babb1d53Sjsg dqm_unlock(dqm); 2284babb1d53Sjsg return -EINVAL; 2285c349dbc7Sjsg } 2286c349dbc7Sjsg 2287c349dbc7Sjsg dqm_unlock(dqm); 2288babb1d53Sjsg 2289babb1d53Sjsg /* 2290babb1d53Sjsg * get_wave_state is outside the dqm lock to prevent circular locking 2291babb1d53Sjsg * and the queue should be protected against destruction by the process 2292babb1d53Sjsg * lock. 2293babb1d53Sjsg */ 2294f005ef32Sjsg return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2295f005ef32Sjsg ctl_stack, ctl_stack_used_size, save_area_used_size); 2296c349dbc7Sjsg } 2297fb4d8502Sjsg 22981bb76ff1Sjsg static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 22991bb76ff1Sjsg const struct queue *q, 23001bb76ff1Sjsg u32 *mqd_size, 23011bb76ff1Sjsg u32 *ctl_stack_size) 23021bb76ff1Sjsg { 23031bb76ff1Sjsg struct mqd_manager *mqd_mgr; 23041bb76ff1Sjsg enum KFD_MQD_TYPE mqd_type = 23051bb76ff1Sjsg get_mqd_type_from_queue_type(q->properties.type); 23061bb76ff1Sjsg 23071bb76ff1Sjsg dqm_lock(dqm); 23081bb76ff1Sjsg mqd_mgr = dqm->mqd_mgrs[mqd_type]; 23091bb76ff1Sjsg *mqd_size = mqd_mgr->mqd_size; 23101bb76ff1Sjsg *ctl_stack_size = 0; 23111bb76ff1Sjsg 23121bb76ff1Sjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 23131bb76ff1Sjsg mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 23141bb76ff1Sjsg 23151bb76ff1Sjsg dqm_unlock(dqm); 23161bb76ff1Sjsg } 23171bb76ff1Sjsg 23181bb76ff1Sjsg static int checkpoint_mqd(struct device_queue_manager *dqm, 23191bb76ff1Sjsg const struct queue *q, 23201bb76ff1Sjsg void *mqd, 23211bb76ff1Sjsg void *ctl_stack) 23221bb76ff1Sjsg { 23231bb76ff1Sjsg struct mqd_manager *mqd_mgr; 23241bb76ff1Sjsg int r = 0; 23251bb76ff1Sjsg enum KFD_MQD_TYPE mqd_type = 23261bb76ff1Sjsg get_mqd_type_from_queue_type(q->properties.type); 23271bb76ff1Sjsg 23281bb76ff1Sjsg dqm_lock(dqm); 23291bb76ff1Sjsg 2330f005ef32Sjsg if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 23311bb76ff1Sjsg r = -EINVAL; 23321bb76ff1Sjsg goto dqm_unlock; 23331bb76ff1Sjsg } 23341bb76ff1Sjsg 23351bb76ff1Sjsg mqd_mgr = dqm->mqd_mgrs[mqd_type]; 23361bb76ff1Sjsg if (!mqd_mgr->checkpoint_mqd) { 23371bb76ff1Sjsg r = -EOPNOTSUPP; 23381bb76ff1Sjsg goto dqm_unlock; 23391bb76ff1Sjsg } 23401bb76ff1Sjsg 23411bb76ff1Sjsg mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 23421bb76ff1Sjsg 23431bb76ff1Sjsg dqm_unlock: 23441bb76ff1Sjsg dqm_unlock(dqm); 23451bb76ff1Sjsg return r; 23461bb76ff1Sjsg } 23471bb76ff1Sjsg 2348fb4d8502Sjsg static int process_termination_cpsch(struct device_queue_manager *dqm, 2349fb4d8502Sjsg struct qcm_process_device *qpd) 2350fb4d8502Sjsg { 2351fb4d8502Sjsg int retval; 2352682c5519Sjsg struct queue *q; 2353fb4d8502Sjsg struct kernel_queue *kq, *kq_next; 2354fb4d8502Sjsg struct mqd_manager *mqd_mgr; 2355fb4d8502Sjsg struct device_process_node *cur, *next_dpn; 2356fb4d8502Sjsg enum kfd_unmap_queues_filter filter = 2357fb4d8502Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2358c349dbc7Sjsg bool found = false; 2359fb4d8502Sjsg 2360fb4d8502Sjsg retval = 0; 2361fb4d8502Sjsg 2362fb4d8502Sjsg dqm_lock(dqm); 2363fb4d8502Sjsg 2364fb4d8502Sjsg /* Clean all kernel queues */ 2365fb4d8502Sjsg list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2366fb4d8502Sjsg list_del(&kq->list); 23671285848aSjsg decrement_queue_count(dqm, qpd, kq->queue); 2368fb4d8502Sjsg qpd->is_debug = false; 2369fb4d8502Sjsg dqm->total_queue_count--; 2370fb4d8502Sjsg filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2371fb4d8502Sjsg } 2372fb4d8502Sjsg 2373fb4d8502Sjsg /* Clear all user mode queues */ 2374fb4d8502Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2375c349dbc7Sjsg if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2376c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 2377c349dbc7Sjsg else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2378c349dbc7Sjsg deallocate_sdma_queue(dqm, q); 2379fb4d8502Sjsg 23801bb76ff1Sjsg if (q->properties.is_active) { 23811285848aSjsg decrement_queue_count(dqm, qpd, q); 2382fb4d8502Sjsg 2383f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 23841bb76ff1Sjsg retval = remove_queue_mes(dqm, q, qpd); 23851bb76ff1Sjsg if (retval) 23861bb76ff1Sjsg pr_err("Failed to remove queue %d\n", 23871bb76ff1Sjsg q->properties.queue_id); 23881bb76ff1Sjsg } 23891bb76ff1Sjsg } 23901bb76ff1Sjsg 2391fb4d8502Sjsg dqm->total_queue_count--; 2392fb4d8502Sjsg } 2393fb4d8502Sjsg 2394fb4d8502Sjsg /* Unregister process */ 2395fb4d8502Sjsg list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2396fb4d8502Sjsg if (qpd == cur->qpd) { 2397fb4d8502Sjsg list_del(&cur->list); 2398fb4d8502Sjsg kfree(cur); 2399fb4d8502Sjsg dqm->processes_count--; 2400c349dbc7Sjsg found = true; 2401fb4d8502Sjsg break; 2402fb4d8502Sjsg } 2403fb4d8502Sjsg } 2404fb4d8502Sjsg 2405f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 2406f005ef32Sjsg retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 24071bb76ff1Sjsg 2408fb4d8502Sjsg if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2409fb4d8502Sjsg pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2410fb4d8502Sjsg dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2411fb4d8502Sjsg qpd->reset_wavefronts = false; 2412fb4d8502Sjsg } 2413fb4d8502Sjsg 2414682c5519Sjsg /* Lastly, free mqd resources. 2415682c5519Sjsg * Do free_mqd() after dqm_unlock to avoid circular locking. 2416682c5519Sjsg */ 2417682c5519Sjsg while (!list_empty(&qpd->queues_list)) { 2418682c5519Sjsg q = list_first_entry(&qpd->queues_list, struct queue, list); 2419682c5519Sjsg mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2420682c5519Sjsg q->properties.type)]; 2421682c5519Sjsg list_del(&q->list); 2422682c5519Sjsg qpd->queue_count--; 2423682c5519Sjsg dqm_unlock(dqm); 2424682c5519Sjsg mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2425682c5519Sjsg dqm_lock(dqm); 2426682c5519Sjsg } 2427c349dbc7Sjsg dqm_unlock(dqm); 2428c349dbc7Sjsg 2429c349dbc7Sjsg /* Outside the DQM lock because under the DQM lock we can't do 2430c349dbc7Sjsg * reclaim or take other locks that others hold while reclaiming. 2431c349dbc7Sjsg */ 2432c349dbc7Sjsg if (found) 2433c349dbc7Sjsg kfd_dec_compute_active(dqm->dev); 2434c349dbc7Sjsg 2435c349dbc7Sjsg return retval; 2436c349dbc7Sjsg } 2437c349dbc7Sjsg 2438c349dbc7Sjsg static int init_mqd_managers(struct device_queue_manager *dqm) 2439c349dbc7Sjsg { 2440c349dbc7Sjsg int i, j; 2441c349dbc7Sjsg struct mqd_manager *mqd_mgr; 2442c349dbc7Sjsg 2443c349dbc7Sjsg for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2444c349dbc7Sjsg mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2445c349dbc7Sjsg if (!mqd_mgr) { 2446c349dbc7Sjsg pr_err("mqd manager [%d] initialization failed\n", i); 2447c349dbc7Sjsg goto out_free; 2448c349dbc7Sjsg } 2449c349dbc7Sjsg dqm->mqd_mgrs[i] = mqd_mgr; 2450c349dbc7Sjsg } 2451c349dbc7Sjsg 2452c349dbc7Sjsg return 0; 2453c349dbc7Sjsg 2454c349dbc7Sjsg out_free: 2455c349dbc7Sjsg for (j = 0; j < i; j++) { 2456c349dbc7Sjsg kfree(dqm->mqd_mgrs[j]); 2457c349dbc7Sjsg dqm->mqd_mgrs[j] = NULL; 2458c349dbc7Sjsg } 2459c349dbc7Sjsg 2460c349dbc7Sjsg return -ENOMEM; 2461c349dbc7Sjsg } 2462c349dbc7Sjsg 2463c349dbc7Sjsg /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2464c349dbc7Sjsg static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2465c349dbc7Sjsg { 2466c349dbc7Sjsg int retval; 2467f005ef32Sjsg struct kfd_node *dev = dqm->dev; 2468c349dbc7Sjsg struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2469c349dbc7Sjsg uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2470c349dbc7Sjsg get_num_all_sdma_engines(dqm) * 2471f005ef32Sjsg dev->kfd->device_info.num_sdma_queues_per_engine + 2472f005ef32Sjsg (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2473f005ef32Sjsg NUM_XCC(dqm->dev->xcc_mask)); 2474c349dbc7Sjsg 24751bb76ff1Sjsg retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2476c349dbc7Sjsg &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2477c349dbc7Sjsg (void *)&(mem_obj->cpu_ptr), false); 2478c349dbc7Sjsg 2479fb4d8502Sjsg return retval; 2480fb4d8502Sjsg } 2481fb4d8502Sjsg 2482f005ef32Sjsg struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2483fb4d8502Sjsg { 2484fb4d8502Sjsg struct device_queue_manager *dqm; 2485fb4d8502Sjsg 2486fb4d8502Sjsg pr_debug("Loading device queue manager\n"); 2487fb4d8502Sjsg 2488fb4d8502Sjsg dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2489fb4d8502Sjsg if (!dqm) 2490fb4d8502Sjsg return NULL; 2491fb4d8502Sjsg 24921bb76ff1Sjsg switch (dev->adev->asic_type) { 2493fb4d8502Sjsg /* HWS is not available on Hawaii. */ 2494fb4d8502Sjsg case CHIP_HAWAII: 2495fb4d8502Sjsg /* HWS depends on CWSR for timely dequeue. CWSR is not 2496fb4d8502Sjsg * available on Tonga. 2497fb4d8502Sjsg * 2498fb4d8502Sjsg * FIXME: This argument also applies to Kaveri. 2499fb4d8502Sjsg */ 2500fb4d8502Sjsg case CHIP_TONGA: 2501fb4d8502Sjsg dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2502fb4d8502Sjsg break; 2503fb4d8502Sjsg default: 2504fb4d8502Sjsg dqm->sched_policy = sched_policy; 2505fb4d8502Sjsg break; 2506fb4d8502Sjsg } 2507fb4d8502Sjsg 2508fb4d8502Sjsg dqm->dev = dev; 2509fb4d8502Sjsg switch (dqm->sched_policy) { 2510fb4d8502Sjsg case KFD_SCHED_POLICY_HWS: 2511fb4d8502Sjsg case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2512fb4d8502Sjsg /* initialize dqm for cp scheduling */ 2513fb4d8502Sjsg dqm->ops.create_queue = create_queue_cpsch; 2514fb4d8502Sjsg dqm->ops.initialize = initialize_cpsch; 2515fb4d8502Sjsg dqm->ops.start = start_cpsch; 2516fb4d8502Sjsg dqm->ops.stop = stop_cpsch; 2517c349dbc7Sjsg dqm->ops.pre_reset = pre_reset; 2518fb4d8502Sjsg dqm->ops.destroy_queue = destroy_queue_cpsch; 2519fb4d8502Sjsg dqm->ops.update_queue = update_queue; 2520fb4d8502Sjsg dqm->ops.register_process = register_process; 2521fb4d8502Sjsg dqm->ops.unregister_process = unregister_process; 2522fb4d8502Sjsg dqm->ops.uninitialize = uninitialize; 2523fb4d8502Sjsg dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2524fb4d8502Sjsg dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2525fb4d8502Sjsg dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2526fb4d8502Sjsg dqm->ops.process_termination = process_termination_cpsch; 2527fb4d8502Sjsg dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2528fb4d8502Sjsg dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2529c349dbc7Sjsg dqm->ops.get_wave_state = get_wave_state; 25301bb76ff1Sjsg dqm->ops.reset_queues = reset_queues_cpsch; 25311bb76ff1Sjsg dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 25321bb76ff1Sjsg dqm->ops.checkpoint_mqd = checkpoint_mqd; 2533fb4d8502Sjsg break; 2534fb4d8502Sjsg case KFD_SCHED_POLICY_NO_HWS: 2535fb4d8502Sjsg /* initialize dqm for no cp scheduling */ 2536fb4d8502Sjsg dqm->ops.start = start_nocpsch; 2537fb4d8502Sjsg dqm->ops.stop = stop_nocpsch; 2538c349dbc7Sjsg dqm->ops.pre_reset = pre_reset; 2539fb4d8502Sjsg dqm->ops.create_queue = create_queue_nocpsch; 2540fb4d8502Sjsg dqm->ops.destroy_queue = destroy_queue_nocpsch; 2541fb4d8502Sjsg dqm->ops.update_queue = update_queue; 2542fb4d8502Sjsg dqm->ops.register_process = register_process; 2543fb4d8502Sjsg dqm->ops.unregister_process = unregister_process; 2544fb4d8502Sjsg dqm->ops.initialize = initialize_nocpsch; 2545fb4d8502Sjsg dqm->ops.uninitialize = uninitialize; 2546fb4d8502Sjsg dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2547fb4d8502Sjsg dqm->ops.process_termination = process_termination_nocpsch; 2548fb4d8502Sjsg dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2549fb4d8502Sjsg dqm->ops.restore_process_queues = 2550fb4d8502Sjsg restore_process_queues_nocpsch; 2551c349dbc7Sjsg dqm->ops.get_wave_state = get_wave_state; 25521bb76ff1Sjsg dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 25531bb76ff1Sjsg dqm->ops.checkpoint_mqd = checkpoint_mqd; 2554fb4d8502Sjsg break; 2555fb4d8502Sjsg default: 2556fb4d8502Sjsg pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2557fb4d8502Sjsg goto out_free; 2558fb4d8502Sjsg } 2559fb4d8502Sjsg 25601bb76ff1Sjsg switch (dev->adev->asic_type) { 2561fb4d8502Sjsg case CHIP_KAVERI: 2562f005ef32Sjsg case CHIP_HAWAII: 2563fb4d8502Sjsg device_queue_manager_init_cik(&dqm->asic_ops); 2564fb4d8502Sjsg break; 2565fb4d8502Sjsg 2566f005ef32Sjsg case CHIP_CARRIZO: 2567fb4d8502Sjsg case CHIP_TONGA: 2568fb4d8502Sjsg case CHIP_FIJI: 2569fb4d8502Sjsg case CHIP_POLARIS10: 2570fb4d8502Sjsg case CHIP_POLARIS11: 2571c349dbc7Sjsg case CHIP_POLARIS12: 2572c349dbc7Sjsg case CHIP_VEGAM: 2573f005ef32Sjsg device_queue_manager_init_vi(&dqm->asic_ops); 2574fb4d8502Sjsg break; 2575fb4d8502Sjsg 2576fb4d8502Sjsg default: 25771bb76ff1Sjsg if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 25781bb76ff1Sjsg device_queue_manager_init_v11(&dqm->asic_ops); 25791bb76ff1Sjsg else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2580f005ef32Sjsg device_queue_manager_init_v10(&dqm->asic_ops); 25811bb76ff1Sjsg else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 25821bb76ff1Sjsg device_queue_manager_init_v9(&dqm->asic_ops); 25831bb76ff1Sjsg else { 2584fb4d8502Sjsg WARN(1, "Unexpected ASIC family %u", 25851bb76ff1Sjsg dev->adev->asic_type); 2586fb4d8502Sjsg goto out_free; 2587fb4d8502Sjsg } 25881bb76ff1Sjsg } 2589fb4d8502Sjsg 2590c349dbc7Sjsg if (init_mqd_managers(dqm)) 2591c349dbc7Sjsg goto out_free; 2592c349dbc7Sjsg 2593f005ef32Sjsg if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2594c349dbc7Sjsg pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2595c349dbc7Sjsg goto out_free; 2596c349dbc7Sjsg } 2597c349dbc7Sjsg 2598f005ef32Sjsg if (!dqm->ops.initialize(dqm)) { 2599f005ef32Sjsg init_waitqueue_head(&dqm->destroy_wait); 2600fb4d8502Sjsg return dqm; 2601f005ef32Sjsg } 2602fb4d8502Sjsg 2603fb4d8502Sjsg out_free: 2604fb4d8502Sjsg kfree(dqm); 2605fb4d8502Sjsg return NULL; 2606fb4d8502Sjsg } 2607fb4d8502Sjsg 2608f005ef32Sjsg static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2609c349dbc7Sjsg struct kfd_mem_obj *mqd) 2610c349dbc7Sjsg { 2611c349dbc7Sjsg WARN(!mqd, "No hiq sdma mqd trunk to free"); 2612c349dbc7Sjsg 2613*ff6d5195Sjsg amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); 2614c349dbc7Sjsg } 2615c349dbc7Sjsg 2616fb4d8502Sjsg void device_queue_manager_uninit(struct device_queue_manager *dqm) 2617fb4d8502Sjsg { 2618f005ef32Sjsg dqm->ops.stop(dqm); 2619fb4d8502Sjsg dqm->ops.uninitialize(dqm); 2620f005ef32Sjsg if (!dqm->dev->kfd->shared_resources.enable_mes) 2621c349dbc7Sjsg deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2622fb4d8502Sjsg kfree(dqm); 2623fb4d8502Sjsg } 2624fb4d8502Sjsg 26251bb76ff1Sjsg int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2626fb4d8502Sjsg { 2627fb4d8502Sjsg struct kfd_process_device *pdd; 2628fb4d8502Sjsg struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2629fb4d8502Sjsg int ret = 0; 2630fb4d8502Sjsg 2631fb4d8502Sjsg if (!p) 2632fb4d8502Sjsg return -EINVAL; 2633ad8b1aafSjsg WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2634fb4d8502Sjsg pdd = kfd_get_process_device_data(dqm->dev, p); 2635fb4d8502Sjsg if (pdd) 2636fb4d8502Sjsg ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2637fb4d8502Sjsg kfd_unref_process(p); 2638fb4d8502Sjsg 2639fb4d8502Sjsg return ret; 2640fb4d8502Sjsg } 2641fb4d8502Sjsg 2642fb4d8502Sjsg static void kfd_process_hw_exception(struct work_struct *work) 2643fb4d8502Sjsg { 2644fb4d8502Sjsg struct device_queue_manager *dqm = container_of(work, 2645fb4d8502Sjsg struct device_queue_manager, hw_exception_work); 26461bb76ff1Sjsg amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2647fb4d8502Sjsg } 2648fb4d8502Sjsg 2649f005ef32Sjsg int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 2650f005ef32Sjsg struct qcm_process_device *qpd) 2651f005ef32Sjsg { 2652f005ef32Sjsg int r; 2653f005ef32Sjsg int updated_vmid_mask; 2654f005ef32Sjsg 2655f005ef32Sjsg if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2656f005ef32Sjsg pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 2657f005ef32Sjsg return -EINVAL; 2658f005ef32Sjsg } 2659f005ef32Sjsg 2660f005ef32Sjsg dqm_lock(dqm); 2661f005ef32Sjsg 2662f005ef32Sjsg if (dqm->trap_debug_vmid != 0) { 2663f005ef32Sjsg pr_err("Trap debug id already reserved\n"); 2664f005ef32Sjsg r = -EBUSY; 2665f005ef32Sjsg goto out_unlock; 2666f005ef32Sjsg } 2667f005ef32Sjsg 2668f005ef32Sjsg r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2669f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD, false); 2670f005ef32Sjsg if (r) 2671f005ef32Sjsg goto out_unlock; 2672f005ef32Sjsg 2673f005ef32Sjsg updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2674f005ef32Sjsg updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 2675f005ef32Sjsg 2676f005ef32Sjsg dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2677f005ef32Sjsg dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 2678f005ef32Sjsg r = set_sched_resources(dqm); 2679f005ef32Sjsg if (r) 2680f005ef32Sjsg goto out_unlock; 2681f005ef32Sjsg 2682f005ef32Sjsg r = map_queues_cpsch(dqm); 2683f005ef32Sjsg if (r) 2684f005ef32Sjsg goto out_unlock; 2685f005ef32Sjsg 2686f005ef32Sjsg pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 2687f005ef32Sjsg 2688f005ef32Sjsg out_unlock: 2689f005ef32Sjsg dqm_unlock(dqm); 2690f005ef32Sjsg return r; 2691f005ef32Sjsg } 2692f005ef32Sjsg 2693f005ef32Sjsg /* 2694f005ef32Sjsg * Releases vmid for the trap debugger 2695f005ef32Sjsg */ 2696f005ef32Sjsg int release_debug_trap_vmid(struct device_queue_manager *dqm, 2697f005ef32Sjsg struct qcm_process_device *qpd) 2698f005ef32Sjsg { 2699f005ef32Sjsg int r; 2700f005ef32Sjsg int updated_vmid_mask; 2701f005ef32Sjsg uint32_t trap_debug_vmid; 2702f005ef32Sjsg 2703f005ef32Sjsg if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2704f005ef32Sjsg pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 2705f005ef32Sjsg return -EINVAL; 2706f005ef32Sjsg } 2707f005ef32Sjsg 2708f005ef32Sjsg dqm_lock(dqm); 2709f005ef32Sjsg trap_debug_vmid = dqm->trap_debug_vmid; 2710f005ef32Sjsg if (dqm->trap_debug_vmid == 0) { 2711f005ef32Sjsg pr_err("Trap debug id is not reserved\n"); 2712f005ef32Sjsg r = -EINVAL; 2713f005ef32Sjsg goto out_unlock; 2714f005ef32Sjsg } 2715f005ef32Sjsg 2716f005ef32Sjsg r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2717f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD, false); 2718f005ef32Sjsg if (r) 2719f005ef32Sjsg goto out_unlock; 2720f005ef32Sjsg 2721f005ef32Sjsg updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2722f005ef32Sjsg updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 2723f005ef32Sjsg 2724f005ef32Sjsg dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2725f005ef32Sjsg dqm->trap_debug_vmid = 0; 2726f005ef32Sjsg r = set_sched_resources(dqm); 2727f005ef32Sjsg if (r) 2728f005ef32Sjsg goto out_unlock; 2729f005ef32Sjsg 2730f005ef32Sjsg r = map_queues_cpsch(dqm); 2731f005ef32Sjsg if (r) 2732f005ef32Sjsg goto out_unlock; 2733f005ef32Sjsg 2734f005ef32Sjsg pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 2735f005ef32Sjsg 2736f005ef32Sjsg out_unlock: 2737f005ef32Sjsg dqm_unlock(dqm); 2738f005ef32Sjsg return r; 2739f005ef32Sjsg } 2740f005ef32Sjsg 2741f005ef32Sjsg #define QUEUE_NOT_FOUND -1 2742f005ef32Sjsg /* invalidate queue operation in array */ 2743f005ef32Sjsg static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 2744f005ef32Sjsg { 2745f005ef32Sjsg int i; 2746f005ef32Sjsg 2747f005ef32Sjsg for (i = 0; i < num_queues; i++) 2748f005ef32Sjsg queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 2749f005ef32Sjsg } 2750f005ef32Sjsg 2751f005ef32Sjsg /* find queue index in array */ 2752f005ef32Sjsg static int q_array_get_index(unsigned int queue_id, 2753f005ef32Sjsg uint32_t num_queues, 2754f005ef32Sjsg uint32_t *queue_ids) 2755f005ef32Sjsg { 2756f005ef32Sjsg int i; 2757f005ef32Sjsg 2758f005ef32Sjsg for (i = 0; i < num_queues; i++) 2759f005ef32Sjsg if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 2760f005ef32Sjsg return i; 2761f005ef32Sjsg 2762f005ef32Sjsg return QUEUE_NOT_FOUND; 2763f005ef32Sjsg } 2764f005ef32Sjsg 2765f005ef32Sjsg struct copy_context_work_handler_workarea { 2766f005ef32Sjsg struct work_struct copy_context_work; 2767f005ef32Sjsg struct kfd_process *p; 2768f005ef32Sjsg }; 2769f005ef32Sjsg 2770f005ef32Sjsg static void copy_context_work_handler (struct work_struct *work) 2771f005ef32Sjsg { 2772f005ef32Sjsg struct copy_context_work_handler_workarea *workarea; 2773f005ef32Sjsg struct mqd_manager *mqd_mgr; 2774f005ef32Sjsg struct queue *q; 2775f005ef32Sjsg struct mm_struct *mm; 2776f005ef32Sjsg struct kfd_process *p; 2777f005ef32Sjsg uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 2778f005ef32Sjsg int i; 2779f005ef32Sjsg 2780f005ef32Sjsg workarea = container_of(work, 2781f005ef32Sjsg struct copy_context_work_handler_workarea, 2782f005ef32Sjsg copy_context_work); 2783f005ef32Sjsg 2784f005ef32Sjsg p = workarea->p; 2785f005ef32Sjsg mm = get_task_mm(p->lead_thread); 2786f005ef32Sjsg 2787f005ef32Sjsg if (!mm) 2788f005ef32Sjsg return; 2789f005ef32Sjsg 2790f005ef32Sjsg kthread_use_mm(mm); 2791f005ef32Sjsg for (i = 0; i < p->n_pdds; i++) { 2792f005ef32Sjsg struct kfd_process_device *pdd = p->pdds[i]; 2793f005ef32Sjsg struct device_queue_manager *dqm = pdd->dev->dqm; 2794f005ef32Sjsg struct qcm_process_device *qpd = &pdd->qpd; 2795f005ef32Sjsg 2796f005ef32Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2797f005ef32Sjsg mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2798f005ef32Sjsg 2799f005ef32Sjsg /* We ignore the return value from get_wave_state 2800f005ef32Sjsg * because 2801f005ef32Sjsg * i) right now, it always returns 0, and 2802f005ef32Sjsg * ii) if we hit an error, we would continue to the 2803f005ef32Sjsg * next queue anyway. 2804f005ef32Sjsg */ 2805f005ef32Sjsg mqd_mgr->get_wave_state(mqd_mgr, 2806f005ef32Sjsg q->mqd, 2807f005ef32Sjsg &q->properties, 2808f005ef32Sjsg (void __user *) q->properties.ctx_save_restore_area_address, 2809f005ef32Sjsg &tmp_ctl_stack_used_size, 2810f005ef32Sjsg &tmp_save_area_used_size); 2811f005ef32Sjsg } 2812f005ef32Sjsg } 2813f005ef32Sjsg kthread_unuse_mm(mm); 2814f005ef32Sjsg mmput(mm); 2815f005ef32Sjsg } 2816f005ef32Sjsg 2817f005ef32Sjsg static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 2818f005ef32Sjsg { 2819f005ef32Sjsg size_t array_size = num_queues * sizeof(uint32_t); 2820f005ef32Sjsg 2821f005ef32Sjsg if (!usr_queue_id_array) 2822f005ef32Sjsg return NULL; 2823f005ef32Sjsg 2824f005ef32Sjsg return memdup_user(usr_queue_id_array, array_size); 2825f005ef32Sjsg } 2826f005ef32Sjsg 2827f005ef32Sjsg int resume_queues(struct kfd_process *p, 2828f005ef32Sjsg uint32_t num_queues, 2829f005ef32Sjsg uint32_t *usr_queue_id_array) 2830f005ef32Sjsg { 2831f005ef32Sjsg uint32_t *queue_ids = NULL; 2832f005ef32Sjsg int total_resumed = 0; 2833f005ef32Sjsg int i; 2834f005ef32Sjsg 2835f005ef32Sjsg if (usr_queue_id_array) { 2836f005ef32Sjsg queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2837f005ef32Sjsg 2838f005ef32Sjsg if (IS_ERR(queue_ids)) 2839f005ef32Sjsg return PTR_ERR(queue_ids); 2840f005ef32Sjsg 2841f005ef32Sjsg /* mask all queues as invalid. unmask per successful request */ 2842f005ef32Sjsg q_array_invalidate(num_queues, queue_ids); 2843f005ef32Sjsg } 2844f005ef32Sjsg 2845f005ef32Sjsg for (i = 0; i < p->n_pdds; i++) { 2846f005ef32Sjsg struct kfd_process_device *pdd = p->pdds[i]; 2847f005ef32Sjsg struct device_queue_manager *dqm = pdd->dev->dqm; 2848f005ef32Sjsg struct qcm_process_device *qpd = &pdd->qpd; 2849f005ef32Sjsg struct queue *q; 2850f005ef32Sjsg int r, per_device_resumed = 0; 2851f005ef32Sjsg 2852f005ef32Sjsg dqm_lock(dqm); 2853f005ef32Sjsg 2854f005ef32Sjsg /* unmask queues that resume or already resumed as valid */ 2855f005ef32Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2856f005ef32Sjsg int q_idx = QUEUE_NOT_FOUND; 2857f005ef32Sjsg 2858f005ef32Sjsg if (queue_ids) 2859f005ef32Sjsg q_idx = q_array_get_index( 2860f005ef32Sjsg q->properties.queue_id, 2861f005ef32Sjsg num_queues, 2862f005ef32Sjsg queue_ids); 2863f005ef32Sjsg 2864f005ef32Sjsg if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 2865f005ef32Sjsg int err = resume_single_queue(dqm, &pdd->qpd, q); 2866f005ef32Sjsg 2867f005ef32Sjsg if (queue_ids) { 2868f005ef32Sjsg if (!err) { 2869f005ef32Sjsg queue_ids[q_idx] &= 2870f005ef32Sjsg ~KFD_DBG_QUEUE_INVALID_MASK; 2871f005ef32Sjsg } else { 2872f005ef32Sjsg queue_ids[q_idx] |= 2873f005ef32Sjsg KFD_DBG_QUEUE_ERROR_MASK; 2874f005ef32Sjsg break; 2875f005ef32Sjsg } 2876f005ef32Sjsg } 2877f005ef32Sjsg 2878f005ef32Sjsg if (dqm->dev->kfd->shared_resources.enable_mes) { 2879f005ef32Sjsg wake_up_all(&dqm->destroy_wait); 2880f005ef32Sjsg if (!err) 2881f005ef32Sjsg total_resumed++; 2882f005ef32Sjsg } else { 2883f005ef32Sjsg per_device_resumed++; 2884f005ef32Sjsg } 2885f005ef32Sjsg } 2886f005ef32Sjsg } 2887f005ef32Sjsg 2888f005ef32Sjsg if (!per_device_resumed) { 2889f005ef32Sjsg dqm_unlock(dqm); 2890f005ef32Sjsg continue; 2891f005ef32Sjsg } 2892f005ef32Sjsg 2893f005ef32Sjsg r = execute_queues_cpsch(dqm, 2894f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 2895f005ef32Sjsg 0, 2896f005ef32Sjsg USE_DEFAULT_GRACE_PERIOD); 2897f005ef32Sjsg if (r) { 2898f005ef32Sjsg pr_err("Failed to resume process queues\n"); 2899f005ef32Sjsg if (queue_ids) { 2900f005ef32Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2901f005ef32Sjsg int q_idx = q_array_get_index( 2902f005ef32Sjsg q->properties.queue_id, 2903f005ef32Sjsg num_queues, 2904f005ef32Sjsg queue_ids); 2905f005ef32Sjsg 2906f005ef32Sjsg /* mask queue as error on resume fail */ 2907f005ef32Sjsg if (q_idx != QUEUE_NOT_FOUND) 2908f005ef32Sjsg queue_ids[q_idx] |= 2909f005ef32Sjsg KFD_DBG_QUEUE_ERROR_MASK; 2910f005ef32Sjsg } 2911f005ef32Sjsg } 2912f005ef32Sjsg } else { 2913f005ef32Sjsg wake_up_all(&dqm->destroy_wait); 2914f005ef32Sjsg total_resumed += per_device_resumed; 2915f005ef32Sjsg } 2916f005ef32Sjsg 2917f005ef32Sjsg dqm_unlock(dqm); 2918f005ef32Sjsg } 2919f005ef32Sjsg 2920f005ef32Sjsg if (queue_ids) { 2921f005ef32Sjsg if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 2922f005ef32Sjsg num_queues * sizeof(uint32_t))) 2923f005ef32Sjsg pr_err("copy_to_user failed on queue resume\n"); 2924f005ef32Sjsg 2925f005ef32Sjsg kfree(queue_ids); 2926f005ef32Sjsg } 2927f005ef32Sjsg 2928f005ef32Sjsg return total_resumed; 2929f005ef32Sjsg } 2930f005ef32Sjsg 2931f005ef32Sjsg int suspend_queues(struct kfd_process *p, 2932f005ef32Sjsg uint32_t num_queues, 2933f005ef32Sjsg uint32_t grace_period, 2934f005ef32Sjsg uint64_t exception_clear_mask, 2935f005ef32Sjsg uint32_t *usr_queue_id_array) 2936f005ef32Sjsg { 2937f005ef32Sjsg uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2938f005ef32Sjsg int total_suspended = 0; 2939f005ef32Sjsg int i; 2940f005ef32Sjsg 2941f005ef32Sjsg if (IS_ERR(queue_ids)) 2942f005ef32Sjsg return PTR_ERR(queue_ids); 2943f005ef32Sjsg 2944f005ef32Sjsg /* mask all queues as invalid. umask on successful request */ 2945f005ef32Sjsg q_array_invalidate(num_queues, queue_ids); 2946f005ef32Sjsg 2947f005ef32Sjsg for (i = 0; i < p->n_pdds; i++) { 2948f005ef32Sjsg struct kfd_process_device *pdd = p->pdds[i]; 2949f005ef32Sjsg struct device_queue_manager *dqm = pdd->dev->dqm; 2950f005ef32Sjsg struct qcm_process_device *qpd = &pdd->qpd; 2951f005ef32Sjsg struct queue *q; 2952f005ef32Sjsg int r, per_device_suspended = 0; 2953f005ef32Sjsg 2954f005ef32Sjsg mutex_lock(&p->event_mutex); 2955f005ef32Sjsg dqm_lock(dqm); 2956f005ef32Sjsg 2957f005ef32Sjsg /* unmask queues that suspend or already suspended */ 2958f005ef32Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 2959f005ef32Sjsg int q_idx = q_array_get_index(q->properties.queue_id, 2960f005ef32Sjsg num_queues, 2961f005ef32Sjsg queue_ids); 2962f005ef32Sjsg 2963f005ef32Sjsg if (q_idx != QUEUE_NOT_FOUND) { 2964f005ef32Sjsg int err = suspend_single_queue(dqm, pdd, q); 2965f005ef32Sjsg bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 2966f005ef32Sjsg 2967f005ef32Sjsg if (!err) { 2968f005ef32Sjsg queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 2969f005ef32Sjsg if (exception_clear_mask && is_mes) 2970f005ef32Sjsg q->properties.exception_status &= 2971f005ef32Sjsg ~exception_clear_mask; 2972f005ef32Sjsg 2973f005ef32Sjsg if (is_mes) 2974f005ef32Sjsg total_suspended++; 2975f005ef32Sjsg else 2976f005ef32Sjsg per_device_suspended++; 2977f005ef32Sjsg } else if (err != -EBUSY) { 2978f005ef32Sjsg r = err; 2979f005ef32Sjsg queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 2980f005ef32Sjsg break; 2981f005ef32Sjsg } 2982f005ef32Sjsg } 2983f005ef32Sjsg } 2984f005ef32Sjsg 2985f005ef32Sjsg if (!per_device_suspended) { 2986f005ef32Sjsg dqm_unlock(dqm); 2987f005ef32Sjsg mutex_unlock(&p->event_mutex); 2988f005ef32Sjsg if (total_suspended) 2989f005ef32Sjsg amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 2990f005ef32Sjsg continue; 2991f005ef32Sjsg } 2992f005ef32Sjsg 2993f005ef32Sjsg r = execute_queues_cpsch(dqm, 2994f005ef32Sjsg KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2995f005ef32Sjsg grace_period); 2996f005ef32Sjsg 2997f005ef32Sjsg if (r) 2998f005ef32Sjsg pr_err("Failed to suspend process queues.\n"); 2999f005ef32Sjsg else 3000f005ef32Sjsg total_suspended += per_device_suspended; 3001f005ef32Sjsg 3002f005ef32Sjsg list_for_each_entry(q, &qpd->queues_list, list) { 3003f005ef32Sjsg int q_idx = q_array_get_index(q->properties.queue_id, 3004f005ef32Sjsg num_queues, queue_ids); 3005f005ef32Sjsg 3006f005ef32Sjsg if (q_idx == QUEUE_NOT_FOUND) 3007f005ef32Sjsg continue; 3008f005ef32Sjsg 3009f005ef32Sjsg /* mask queue as error on suspend fail */ 3010f005ef32Sjsg if (r) 3011f005ef32Sjsg queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3012f005ef32Sjsg else if (exception_clear_mask) 3013f005ef32Sjsg q->properties.exception_status &= 3014f005ef32Sjsg ~exception_clear_mask; 3015f005ef32Sjsg } 3016f005ef32Sjsg 3017f005ef32Sjsg dqm_unlock(dqm); 3018f005ef32Sjsg mutex_unlock(&p->event_mutex); 3019f005ef32Sjsg amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3020f005ef32Sjsg } 3021f005ef32Sjsg 3022f005ef32Sjsg if (total_suspended) { 3023f005ef32Sjsg struct copy_context_work_handler_workarea copy_context_worker; 3024f005ef32Sjsg 3025f005ef32Sjsg INIT_WORK_ONSTACK( 3026f005ef32Sjsg ©_context_worker.copy_context_work, 3027f005ef32Sjsg copy_context_work_handler); 3028f005ef32Sjsg 3029f005ef32Sjsg copy_context_worker.p = p; 3030f005ef32Sjsg 3031f005ef32Sjsg schedule_work(©_context_worker.copy_context_work); 3032f005ef32Sjsg 3033f005ef32Sjsg 3034f005ef32Sjsg flush_work(©_context_worker.copy_context_work); 3035f005ef32Sjsg destroy_work_on_stack(©_context_worker.copy_context_work); 3036f005ef32Sjsg } 3037f005ef32Sjsg 3038f005ef32Sjsg if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3039f005ef32Sjsg num_queues * sizeof(uint32_t))) 3040f005ef32Sjsg pr_err("copy_to_user failed on queue suspend\n"); 3041f005ef32Sjsg 3042f005ef32Sjsg kfree(queue_ids); 3043f005ef32Sjsg 3044f005ef32Sjsg return total_suspended; 3045f005ef32Sjsg } 3046f005ef32Sjsg 3047f005ef32Sjsg static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3048f005ef32Sjsg { 3049f005ef32Sjsg switch (q_props->type) { 3050f005ef32Sjsg case KFD_QUEUE_TYPE_COMPUTE: 3051f005ef32Sjsg return q_props->format == KFD_QUEUE_FORMAT_PM4 3052f005ef32Sjsg ? KFD_IOC_QUEUE_TYPE_COMPUTE 3053f005ef32Sjsg : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3054f005ef32Sjsg case KFD_QUEUE_TYPE_SDMA: 3055f005ef32Sjsg return KFD_IOC_QUEUE_TYPE_SDMA; 3056f005ef32Sjsg case KFD_QUEUE_TYPE_SDMA_XGMI: 3057f005ef32Sjsg return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3058f005ef32Sjsg default: 3059f005ef32Sjsg WARN_ONCE(true, "queue type not recognized!"); 3060f005ef32Sjsg return 0xffffffff; 3061f005ef32Sjsg }; 3062f005ef32Sjsg } 3063f005ef32Sjsg 3064f005ef32Sjsg void set_queue_snapshot_entry(struct queue *q, 3065f005ef32Sjsg uint64_t exception_clear_mask, 3066f005ef32Sjsg struct kfd_queue_snapshot_entry *qss_entry) 3067f005ef32Sjsg { 3068f005ef32Sjsg qss_entry->ring_base_address = q->properties.queue_address; 3069f005ef32Sjsg qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3070f005ef32Sjsg qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3071f005ef32Sjsg qss_entry->ctx_save_restore_address = 3072f005ef32Sjsg q->properties.ctx_save_restore_area_address; 3073f005ef32Sjsg qss_entry->ctx_save_restore_area_size = 3074f005ef32Sjsg q->properties.ctx_save_restore_area_size; 3075f005ef32Sjsg qss_entry->exception_status = q->properties.exception_status; 3076f005ef32Sjsg qss_entry->queue_id = q->properties.queue_id; 3077f005ef32Sjsg qss_entry->gpu_id = q->device->id; 3078f005ef32Sjsg qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3079f005ef32Sjsg qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3080f005ef32Sjsg q->properties.exception_status &= ~exception_clear_mask; 3081f005ef32Sjsg } 3082f005ef32Sjsg 3083f005ef32Sjsg int debug_lock_and_unmap(struct device_queue_manager *dqm) 3084f005ef32Sjsg { 3085f005ef32Sjsg int r; 3086f005ef32Sjsg 3087f005ef32Sjsg if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3088f005ef32Sjsg pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 3089f005ef32Sjsg return -EINVAL; 3090f005ef32Sjsg } 3091f005ef32Sjsg 3092f005ef32Sjsg if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3093f005ef32Sjsg return 0; 3094f005ef32Sjsg 3095f005ef32Sjsg dqm_lock(dqm); 3096f005ef32Sjsg 3097f005ef32Sjsg r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3098f005ef32Sjsg if (r) 3099f005ef32Sjsg dqm_unlock(dqm); 3100f005ef32Sjsg 3101f005ef32Sjsg return r; 3102f005ef32Sjsg } 3103f005ef32Sjsg 3104f005ef32Sjsg int debug_map_and_unlock(struct device_queue_manager *dqm) 3105f005ef32Sjsg { 3106f005ef32Sjsg int r; 3107f005ef32Sjsg 3108f005ef32Sjsg if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3109f005ef32Sjsg pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 3110f005ef32Sjsg return -EINVAL; 3111f005ef32Sjsg } 3112f005ef32Sjsg 3113f005ef32Sjsg if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3114f005ef32Sjsg return 0; 3115f005ef32Sjsg 3116f005ef32Sjsg r = map_queues_cpsch(dqm); 3117f005ef32Sjsg 3118f005ef32Sjsg dqm_unlock(dqm); 3119f005ef32Sjsg 3120f005ef32Sjsg return r; 3121f005ef32Sjsg } 3122f005ef32Sjsg 3123f005ef32Sjsg int debug_refresh_runlist(struct device_queue_manager *dqm) 3124f005ef32Sjsg { 3125f005ef32Sjsg int r = debug_lock_and_unmap(dqm); 3126f005ef32Sjsg 3127f005ef32Sjsg if (r) 3128f005ef32Sjsg return r; 3129f005ef32Sjsg 3130f005ef32Sjsg return debug_map_and_unlock(dqm); 3131f005ef32Sjsg } 3132f005ef32Sjsg 3133fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS) 3134fb4d8502Sjsg 3135fb4d8502Sjsg static void seq_reg_dump(struct seq_file *m, 3136fb4d8502Sjsg uint32_t (*dump)[2], uint32_t n_regs) 3137fb4d8502Sjsg { 3138fb4d8502Sjsg uint32_t i, count; 3139fb4d8502Sjsg 3140fb4d8502Sjsg for (i = 0, count = 0; i < n_regs; i++) { 3141fb4d8502Sjsg if (count == 0 || 3142fb4d8502Sjsg dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3143fb4d8502Sjsg seq_printf(m, "%s %08x: %08x", 3144fb4d8502Sjsg i ? "\n" : "", 3145fb4d8502Sjsg dump[i][0], dump[i][1]); 3146fb4d8502Sjsg count = 7; 3147fb4d8502Sjsg } else { 3148fb4d8502Sjsg seq_printf(m, " %08x", dump[i][1]); 3149fb4d8502Sjsg count--; 3150fb4d8502Sjsg } 3151fb4d8502Sjsg } 3152fb4d8502Sjsg 3153fb4d8502Sjsg seq_puts(m, "\n"); 3154fb4d8502Sjsg } 3155fb4d8502Sjsg 3156fb4d8502Sjsg int dqm_debugfs_hqds(struct seq_file *m, void *data) 3157fb4d8502Sjsg { 3158fb4d8502Sjsg struct device_queue_manager *dqm = data; 3159f005ef32Sjsg uint32_t xcc_mask = dqm->dev->xcc_mask; 3160fb4d8502Sjsg uint32_t (*dump)[2], n_regs; 3161fb4d8502Sjsg int pipe, queue; 3162f005ef32Sjsg int r = 0, xcc_id; 3163f005ef32Sjsg uint32_t sdma_engine_start; 3164fb4d8502Sjsg 3165c349dbc7Sjsg if (!dqm->sched_running) { 31661bb76ff1Sjsg seq_puts(m, " Device is stopped\n"); 3167c349dbc7Sjsg return 0; 3168c349dbc7Sjsg } 3169c349dbc7Sjsg 3170f005ef32Sjsg for_each_inst(xcc_id, xcc_mask) { 31711bb76ff1Sjsg r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3172f005ef32Sjsg KFD_CIK_HIQ_PIPE, 3173f005ef32Sjsg KFD_CIK_HIQ_QUEUE, &dump, 3174f005ef32Sjsg &n_regs, xcc_id); 3175fb4d8502Sjsg if (!r) { 3176f005ef32Sjsg seq_printf( 3177f005ef32Sjsg m, 3178f005ef32Sjsg " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3179f005ef32Sjsg xcc_id, 3180fb4d8502Sjsg KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3181fb4d8502Sjsg KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3182fb4d8502Sjsg KFD_CIK_HIQ_QUEUE); 3183fb4d8502Sjsg seq_reg_dump(m, dump, n_regs); 3184fb4d8502Sjsg 3185fb4d8502Sjsg kfree(dump); 3186fb4d8502Sjsg } 3187fb4d8502Sjsg 3188fb4d8502Sjsg for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3189fb4d8502Sjsg int pipe_offset = pipe * get_queues_per_pipe(dqm); 3190fb4d8502Sjsg 3191fb4d8502Sjsg for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3192fb4d8502Sjsg if (!test_bit(pipe_offset + queue, 3193f005ef32Sjsg dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3194fb4d8502Sjsg continue; 3195fb4d8502Sjsg 3196f005ef32Sjsg r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3197f005ef32Sjsg pipe, queue, 3198f005ef32Sjsg &dump, &n_regs, 3199f005ef32Sjsg xcc_id); 3200fb4d8502Sjsg if (r) 3201fb4d8502Sjsg break; 3202fb4d8502Sjsg 3203f005ef32Sjsg seq_printf(m, 3204f005ef32Sjsg " Inst %d, CP Pipe %d, Queue %d\n", 3205f005ef32Sjsg xcc_id, pipe, queue); 3206fb4d8502Sjsg seq_reg_dump(m, dump, n_regs); 3207fb4d8502Sjsg 3208fb4d8502Sjsg kfree(dump); 3209fb4d8502Sjsg } 3210fb4d8502Sjsg } 3211f005ef32Sjsg } 3212fb4d8502Sjsg 3213f005ef32Sjsg sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3214f005ef32Sjsg for (pipe = sdma_engine_start; 3215f005ef32Sjsg pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3216f005ef32Sjsg pipe++) { 3217c349dbc7Sjsg for (queue = 0; 3218f005ef32Sjsg queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3219c349dbc7Sjsg queue++) { 3220fb4d8502Sjsg r = dqm->dev->kfd2kgd->hqd_sdma_dump( 32211bb76ff1Sjsg dqm->dev->adev, pipe, queue, &dump, &n_regs); 3222fb4d8502Sjsg if (r) 3223fb4d8502Sjsg break; 3224fb4d8502Sjsg 3225fb4d8502Sjsg seq_printf(m, " SDMA Engine %d, RLC %d\n", 3226fb4d8502Sjsg pipe, queue); 3227fb4d8502Sjsg seq_reg_dump(m, dump, n_regs); 3228fb4d8502Sjsg 3229fb4d8502Sjsg kfree(dump); 3230fb4d8502Sjsg } 3231fb4d8502Sjsg } 3232fb4d8502Sjsg 3233fb4d8502Sjsg return r; 3234fb4d8502Sjsg } 3235fb4d8502Sjsg 32365ca02815Sjsg int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3237fb4d8502Sjsg { 3238fb4d8502Sjsg int r = 0; 3239fb4d8502Sjsg 3240fb4d8502Sjsg dqm_lock(dqm); 32415ca02815Sjsg r = pm_debugfs_hang_hws(&dqm->packet_mgr); 32425ca02815Sjsg if (r) { 32435ca02815Sjsg dqm_unlock(dqm); 32445ca02815Sjsg return r; 32455ca02815Sjsg } 3246fb4d8502Sjsg dqm->active_runlist = true; 3247f005ef32Sjsg r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3248f005ef32Sjsg 0, USE_DEFAULT_GRACE_PERIOD); 3249fb4d8502Sjsg dqm_unlock(dqm); 3250fb4d8502Sjsg 3251fb4d8502Sjsg return r; 3252fb4d8502Sjsg } 3253fb4d8502Sjsg 3254fb4d8502Sjsg #endif 3255