11bb76ff1Sjsg // SPDX-License-Identifier: GPL-2.0 OR MIT 2fb4d8502Sjsg /* 31bb76ff1Sjsg * Copyright 2014-2022 Advanced Micro Devices, Inc. 4fb4d8502Sjsg * 5fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 6fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 7fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 8fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 10fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 11fb4d8502Sjsg * 12fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 13fb4d8502Sjsg * all copies or substantial portions of the Software. 14fb4d8502Sjsg * 15fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 22fb4d8502Sjsg * 23fb4d8502Sjsg */ 24fb4d8502Sjsg 25fb4d8502Sjsg #include <linux/slab.h> 26fb4d8502Sjsg #include <linux/list.h> 27fb4d8502Sjsg #include "kfd_device_queue_manager.h" 28fb4d8502Sjsg #include "kfd_priv.h" 29fb4d8502Sjsg #include "kfd_kernel_queue.h" 30c349dbc7Sjsg #include "amdgpu_amdkfd.h" 318c1206e6Sjsg #include "amdgpu_reset.h" 32fb4d8502Sjsg 33fb4d8502Sjsg static inline struct process_queue_node *get_queue_by_qid( 34fb4d8502Sjsg struct process_queue_manager *pqm, unsigned int qid) 35fb4d8502Sjsg { 36fb4d8502Sjsg struct process_queue_node *pqn; 37fb4d8502Sjsg 38fb4d8502Sjsg list_for_each_entry(pqn, &pqm->queues, process_queue_list) { 39fb4d8502Sjsg if ((pqn->q && pqn->q->properties.queue_id == qid) || 40fb4d8502Sjsg (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) 41fb4d8502Sjsg return pqn; 42fb4d8502Sjsg } 43fb4d8502Sjsg 44fb4d8502Sjsg return NULL; 45fb4d8502Sjsg } 46fb4d8502Sjsg 471bb76ff1Sjsg static int assign_queue_slot_by_qid(struct process_queue_manager *pqm, 481bb76ff1Sjsg unsigned int qid) 491bb76ff1Sjsg { 501bb76ff1Sjsg if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 511bb76ff1Sjsg return -EINVAL; 521bb76ff1Sjsg 531bb76ff1Sjsg if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) { 541bb76ff1Sjsg pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid); 551bb76ff1Sjsg return -ENOSPC; 561bb76ff1Sjsg } 571bb76ff1Sjsg 581bb76ff1Sjsg return 0; 591bb76ff1Sjsg } 601bb76ff1Sjsg 61fb4d8502Sjsg static int find_available_queue_slot(struct process_queue_manager *pqm, 62fb4d8502Sjsg unsigned int *qid) 63fb4d8502Sjsg { 64fb4d8502Sjsg unsigned long found; 65fb4d8502Sjsg 66fb4d8502Sjsg found = find_first_zero_bit(pqm->queue_slot_bitmap, 67fb4d8502Sjsg KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 68fb4d8502Sjsg 69fb4d8502Sjsg pr_debug("The new slot id %lu\n", found); 70fb4d8502Sjsg 71fb4d8502Sjsg if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 72c349dbc7Sjsg pr_info("Cannot open more queues for process with pasid 0x%x\n", 73fb4d8502Sjsg pqm->process->pasid); 74fb4d8502Sjsg return -ENOMEM; 75fb4d8502Sjsg } 76fb4d8502Sjsg 77fb4d8502Sjsg set_bit(found, pqm->queue_slot_bitmap); 78fb4d8502Sjsg *qid = found; 79fb4d8502Sjsg 80fb4d8502Sjsg return 0; 81fb4d8502Sjsg } 82fb4d8502Sjsg 83fb4d8502Sjsg void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) 84fb4d8502Sjsg { 85f005ef32Sjsg struct kfd_node *dev = pdd->dev; 86fb4d8502Sjsg 87fb4d8502Sjsg if (pdd->already_dequeued) 88fb4d8502Sjsg return; 89fb4d8502Sjsg 90fb4d8502Sjsg dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd); 918c1206e6Sjsg if (dev->kfd->shared_resources.enable_mes && 928c1206e6Sjsg down_read_trylock(&dev->adev->reset_domain->sem)) { 938c1206e6Sjsg amdgpu_mes_flush_shader_debugger(dev->adev, 948c1206e6Sjsg pdd->proc_ctx_gpu_addr); 958c1206e6Sjsg up_read(&dev->adev->reset_domain->sem); 968c1206e6Sjsg } 97fb4d8502Sjsg pdd->already_dequeued = true; 98fb4d8502Sjsg } 99fb4d8502Sjsg 100c349dbc7Sjsg int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, 101c349dbc7Sjsg void *gws) 102c349dbc7Sjsg { 103f005ef32Sjsg struct kfd_node *dev = NULL; 104c349dbc7Sjsg struct process_queue_node *pqn; 105c349dbc7Sjsg struct kfd_process_device *pdd; 106c349dbc7Sjsg struct kgd_mem *mem = NULL; 107c349dbc7Sjsg int ret; 108c349dbc7Sjsg 109c349dbc7Sjsg pqn = get_queue_by_qid(pqm, qid); 110c349dbc7Sjsg if (!pqn) { 111c349dbc7Sjsg pr_err("Queue id does not match any known queue\n"); 112c349dbc7Sjsg return -EINVAL; 113c349dbc7Sjsg } 114c349dbc7Sjsg 115c349dbc7Sjsg if (pqn->q) 116c349dbc7Sjsg dev = pqn->q->device; 117c349dbc7Sjsg if (WARN_ON(!dev)) 118c349dbc7Sjsg return -ENODEV; 119c349dbc7Sjsg 120c349dbc7Sjsg pdd = kfd_get_process_device_data(dev, pqm->process); 121c349dbc7Sjsg if (!pdd) { 122c349dbc7Sjsg pr_err("Process device data doesn't exist\n"); 123c349dbc7Sjsg return -EINVAL; 124c349dbc7Sjsg } 125c349dbc7Sjsg 126c349dbc7Sjsg /* Only allow one queue per process can have GWS assigned */ 127c349dbc7Sjsg if (gws && pdd->qpd.num_gws) 128c349dbc7Sjsg return -EBUSY; 129c349dbc7Sjsg 130c349dbc7Sjsg if (!gws && pdd->qpd.num_gws == 0) 131c349dbc7Sjsg return -EINVAL; 132c349dbc7Sjsg 133f005ef32Sjsg if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) { 134c349dbc7Sjsg if (gws) 135c349dbc7Sjsg ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, 136c349dbc7Sjsg gws, &mem); 137c349dbc7Sjsg else 138c349dbc7Sjsg ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, 139c349dbc7Sjsg pqn->q->gws); 140c349dbc7Sjsg if (unlikely(ret)) 141c349dbc7Sjsg return ret; 142c349dbc7Sjsg pqn->q->gws = mem; 143f005ef32Sjsg } else { 144f005ef32Sjsg /* 145f005ef32Sjsg * Intentionally set GWS to a non-NULL value 146f005ef32Sjsg * for devices that do not use GWS for global wave 147f005ef32Sjsg * synchronization but require the formality 148f005ef32Sjsg * of setting GWS for cooperative groups. 149f005ef32Sjsg */ 150f005ef32Sjsg pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL; 151f005ef32Sjsg } 152f005ef32Sjsg 1531bb76ff1Sjsg pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; 154c349dbc7Sjsg 155c349dbc7Sjsg return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, 1561bb76ff1Sjsg pqn->q, NULL); 157c349dbc7Sjsg } 158c349dbc7Sjsg 159fb4d8502Sjsg void kfd_process_dequeue_from_all_devices(struct kfd_process *p) 160fb4d8502Sjsg { 1615ca02815Sjsg int i; 162fb4d8502Sjsg 1635ca02815Sjsg for (i = 0; i < p->n_pdds; i++) 1645ca02815Sjsg kfd_process_dequeue_from_device(p->pdds[i]); 165fb4d8502Sjsg } 166fb4d8502Sjsg 167fb4d8502Sjsg int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) 168fb4d8502Sjsg { 169fb4d8502Sjsg INIT_LIST_HEAD(&pqm->queues); 1701bb76ff1Sjsg pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 1711bb76ff1Sjsg GFP_KERNEL); 172fb4d8502Sjsg if (!pqm->queue_slot_bitmap) 173fb4d8502Sjsg return -ENOMEM; 174fb4d8502Sjsg pqm->process = p; 175fb4d8502Sjsg 176fb4d8502Sjsg return 0; 177fb4d8502Sjsg } 178fb4d8502Sjsg 1791aef8867Sjsg static void pqm_clean_queue_resource(struct process_queue_manager *pqm, 1801aef8867Sjsg struct process_queue_node *pqn) 1811aef8867Sjsg { 1821aef8867Sjsg struct kfd_node *dev; 1831aef8867Sjsg struct kfd_process_device *pdd; 1841aef8867Sjsg 1851aef8867Sjsg dev = pqn->q->device; 1861aef8867Sjsg 1871aef8867Sjsg pdd = kfd_get_process_device_data(dev, pqm->process); 1881aef8867Sjsg if (!pdd) { 1891aef8867Sjsg pr_err("Process device data doesn't exist\n"); 1901aef8867Sjsg return; 1911aef8867Sjsg } 1921aef8867Sjsg 1931aef8867Sjsg if (pqn->q->gws) { 1941aef8867Sjsg if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && 1951aef8867Sjsg !dev->kfd->shared_resources.enable_mes) 1961aef8867Sjsg amdgpu_amdkfd_remove_gws_from_process( 1971aef8867Sjsg pqm->process->kgd_process_info, pqn->q->gws); 1981aef8867Sjsg pdd->qpd.num_gws = 0; 1991aef8867Sjsg } 2001aef8867Sjsg 2011aef8867Sjsg if (dev->kfd->shared_resources.enable_mes) { 202ff6d5195Sjsg amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo); 2031aef8867Sjsg if (pqn->q->wptr_bo) 204ff6d5195Sjsg amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo); 2051aef8867Sjsg } 2061aef8867Sjsg } 2071aef8867Sjsg 208fb4d8502Sjsg void pqm_uninit(struct process_queue_manager *pqm) 209fb4d8502Sjsg { 210fb4d8502Sjsg struct process_queue_node *pqn, *next; 211fb4d8502Sjsg 212fb4d8502Sjsg list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { 2131aef8867Sjsg if (pqn->q) 2141aef8867Sjsg pqm_clean_queue_resource(pqm, pqn); 2151aef8867Sjsg 2168f3bef5aSjsg kfd_procfs_del_queue(pqn->q); 217fb4d8502Sjsg uninit_queue(pqn->q); 218fb4d8502Sjsg list_del(&pqn->process_queue_list); 219fb4d8502Sjsg kfree(pqn); 220fb4d8502Sjsg } 221fb4d8502Sjsg 2221bb76ff1Sjsg bitmap_free(pqm->queue_slot_bitmap); 223fb4d8502Sjsg pqm->queue_slot_bitmap = NULL; 224fb4d8502Sjsg } 225fb4d8502Sjsg 226c349dbc7Sjsg static int init_user_queue(struct process_queue_manager *pqm, 227f005ef32Sjsg struct kfd_node *dev, struct queue **q, 228fb4d8502Sjsg struct queue_properties *q_properties, 2291bb76ff1Sjsg struct file *f, struct amdgpu_bo *wptr_bo, 2301bb76ff1Sjsg unsigned int qid) 231fb4d8502Sjsg { 232fb4d8502Sjsg int retval; 233fb4d8502Sjsg 234fb4d8502Sjsg /* Doorbell initialized in user space*/ 235fb4d8502Sjsg q_properties->doorbell_ptr = NULL; 236f005ef32Sjsg q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW); 237fb4d8502Sjsg 238fb4d8502Sjsg /* let DQM handle it*/ 239fb4d8502Sjsg q_properties->vmid = 0; 240fb4d8502Sjsg q_properties->queue_id = qid; 241fb4d8502Sjsg 242fb4d8502Sjsg retval = init_queue(q, q_properties); 243fb4d8502Sjsg if (retval != 0) 244fb4d8502Sjsg return retval; 245fb4d8502Sjsg 246fb4d8502Sjsg (*q)->device = dev; 247fb4d8502Sjsg (*q)->process = pqm->process; 248fb4d8502Sjsg 249f005ef32Sjsg if (dev->kfd->shared_resources.enable_mes) { 2501bb76ff1Sjsg retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, 2511bb76ff1Sjsg AMDGPU_MES_GANG_CTX_SIZE, 2521bb76ff1Sjsg &(*q)->gang_ctx_bo, 2531bb76ff1Sjsg &(*q)->gang_ctx_gpu_addr, 2541bb76ff1Sjsg &(*q)->gang_ctx_cpu_ptr, 2551bb76ff1Sjsg false); 2561bb76ff1Sjsg if (retval) { 2571bb76ff1Sjsg pr_err("failed to allocate gang context bo\n"); 2581bb76ff1Sjsg goto cleanup; 2591bb76ff1Sjsg } 2601bb76ff1Sjsg memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE); 2611bb76ff1Sjsg (*q)->wptr_bo = wptr_bo; 2621bb76ff1Sjsg } 263fb4d8502Sjsg 2641bb76ff1Sjsg pr_debug("PQM After init queue"); 2651bb76ff1Sjsg return 0; 2661bb76ff1Sjsg 2671bb76ff1Sjsg cleanup: 2681bb76ff1Sjsg uninit_queue(*q); 269d9a48308Sjsg *q = NULL; 270fb4d8502Sjsg return retval; 271fb4d8502Sjsg } 272fb4d8502Sjsg 273fb4d8502Sjsg int pqm_create_queue(struct process_queue_manager *pqm, 274f005ef32Sjsg struct kfd_node *dev, 275fb4d8502Sjsg struct file *f, 276fb4d8502Sjsg struct queue_properties *properties, 277c349dbc7Sjsg unsigned int *qid, 2781bb76ff1Sjsg struct amdgpu_bo *wptr_bo, 2791bb76ff1Sjsg const struct kfd_criu_queue_priv_data *q_data, 2801bb76ff1Sjsg const void *restore_mqd, 2811bb76ff1Sjsg const void *restore_ctl_stack, 282c349dbc7Sjsg uint32_t *p_doorbell_offset_in_process) 283fb4d8502Sjsg { 284fb4d8502Sjsg int retval; 285fb4d8502Sjsg struct kfd_process_device *pdd; 286fb4d8502Sjsg struct queue *q; 287fb4d8502Sjsg struct process_queue_node *pqn; 288fb4d8502Sjsg struct kernel_queue *kq; 289fb4d8502Sjsg enum kfd_queue_type type = properties->type; 290fb4d8502Sjsg unsigned int max_queues = 127; /* HWS limit */ 291fb4d8502Sjsg 292f005ef32Sjsg /* 293f005ef32Sjsg * On GFX 9.4.3, increase the number of queues that 294f005ef32Sjsg * can be created to 255. No HWS limit on GFX 9.4.3. 295f005ef32Sjsg */ 296f005ef32Sjsg if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) 297f005ef32Sjsg max_queues = 255; 298f005ef32Sjsg 299fb4d8502Sjsg q = NULL; 300fb4d8502Sjsg kq = NULL; 301fb4d8502Sjsg 302fb4d8502Sjsg pdd = kfd_get_process_device_data(dev, pqm->process); 303fb4d8502Sjsg if (!pdd) { 304fb4d8502Sjsg pr_err("Process device data doesn't exist\n"); 305fb4d8502Sjsg return -1; 306fb4d8502Sjsg } 307fb4d8502Sjsg 308fb4d8502Sjsg /* 309fb4d8502Sjsg * for debug process, verify that it is within the static queues limit 310fb4d8502Sjsg * currently limit is set to half of the total avail HQD slots 311fb4d8502Sjsg * If we are just about to create DIQ, the is_debug flag is not set yet 312fb4d8502Sjsg * Hence we also check the type as well 313fb4d8502Sjsg */ 314fb4d8502Sjsg if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) 315f005ef32Sjsg max_queues = dev->kfd->device_info.max_no_of_hqd/2; 316fb4d8502Sjsg 317fb4d8502Sjsg if (pdd->qpd.queue_count >= max_queues) 318fb4d8502Sjsg return -ENOSPC; 319fb4d8502Sjsg 3201bb76ff1Sjsg if (q_data) { 3211bb76ff1Sjsg retval = assign_queue_slot_by_qid(pqm, q_data->q_id); 3221bb76ff1Sjsg *qid = q_data->q_id; 3231bb76ff1Sjsg } else 324fb4d8502Sjsg retval = find_available_queue_slot(pqm, qid); 3251bb76ff1Sjsg 326fb4d8502Sjsg if (retval != 0) 327fb4d8502Sjsg return retval; 328fb4d8502Sjsg 329fb4d8502Sjsg if (list_empty(&pdd->qpd.queues_list) && 330fb4d8502Sjsg list_empty(&pdd->qpd.priv_queue_list)) 331fb4d8502Sjsg dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); 332fb4d8502Sjsg 333fb4d8502Sjsg pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); 334fb4d8502Sjsg if (!pqn) { 335fb4d8502Sjsg retval = -ENOMEM; 336fb4d8502Sjsg goto err_allocate_pqn; 337fb4d8502Sjsg } 338fb4d8502Sjsg 339fb4d8502Sjsg switch (type) { 340fb4d8502Sjsg case KFD_QUEUE_TYPE_SDMA: 341c349dbc7Sjsg case KFD_QUEUE_TYPE_SDMA_XGMI: 342c349dbc7Sjsg /* SDMA queues are always allocated statically no matter 343c349dbc7Sjsg * which scheduler mode is used. We also do not need to 344c349dbc7Sjsg * check whether a SDMA queue can be allocated here, because 345c349dbc7Sjsg * allocate_sdma_queue() in create_queue() has the 346c349dbc7Sjsg * corresponding check logic. 347c349dbc7Sjsg */ 3481bb76ff1Sjsg retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); 349fb4d8502Sjsg if (retval != 0) 350fb4d8502Sjsg goto err_create_queue; 351fb4d8502Sjsg pqn->q = q; 352fb4d8502Sjsg pqn->kq = NULL; 3531bb76ff1Sjsg retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, 3541bb76ff1Sjsg restore_mqd, restore_ctl_stack); 355fb4d8502Sjsg print_queue(q); 356fb4d8502Sjsg break; 357fb4d8502Sjsg 358fb4d8502Sjsg case KFD_QUEUE_TYPE_COMPUTE: 359fb4d8502Sjsg /* check if there is over subscription */ 360fb4d8502Sjsg if ((dev->dqm->sched_policy == 361fb4d8502Sjsg KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && 362fb4d8502Sjsg ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || 363c349dbc7Sjsg (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) { 364fb4d8502Sjsg pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n"); 365fb4d8502Sjsg retval = -EPERM; 366fb4d8502Sjsg goto err_create_queue; 367fb4d8502Sjsg } 368fb4d8502Sjsg 3691bb76ff1Sjsg retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid); 370fb4d8502Sjsg if (retval != 0) 371fb4d8502Sjsg goto err_create_queue; 372fb4d8502Sjsg pqn->q = q; 373fb4d8502Sjsg pqn->kq = NULL; 3741bb76ff1Sjsg retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data, 3751bb76ff1Sjsg restore_mqd, restore_ctl_stack); 376fb4d8502Sjsg print_queue(q); 377fb4d8502Sjsg break; 378fb4d8502Sjsg case KFD_QUEUE_TYPE_DIQ: 379fb4d8502Sjsg kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); 380fb4d8502Sjsg if (!kq) { 381fb4d8502Sjsg retval = -ENOMEM; 382fb4d8502Sjsg goto err_create_queue; 383fb4d8502Sjsg } 384fb4d8502Sjsg kq->queue->properties.queue_id = *qid; 385fb4d8502Sjsg pqn->kq = kq; 386fb4d8502Sjsg pqn->q = NULL; 387f005ef32Sjsg retval = kfd_process_drain_interrupts(pdd); 388f005ef32Sjsg if (retval) 389f005ef32Sjsg break; 390f005ef32Sjsg 391fb4d8502Sjsg retval = dev->dqm->ops.create_kernel_queue(dev->dqm, 392fb4d8502Sjsg kq, &pdd->qpd); 393fb4d8502Sjsg break; 394fb4d8502Sjsg default: 395fb4d8502Sjsg WARN(1, "Invalid queue type %d", type); 396fb4d8502Sjsg retval = -EINVAL; 397fb4d8502Sjsg } 398fb4d8502Sjsg 399fb4d8502Sjsg if (retval != 0) { 400c349dbc7Sjsg pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", 401fb4d8502Sjsg pqm->process->pasid, type, retval); 402fb4d8502Sjsg goto err_create_queue; 403fb4d8502Sjsg } 404fb4d8502Sjsg 405f005ef32Sjsg if (q && p_doorbell_offset_in_process) { 406fb4d8502Sjsg /* Return the doorbell offset within the doorbell page 407fb4d8502Sjsg * to the caller so it can be passed up to user mode 408fb4d8502Sjsg * (in bytes). 409f005ef32Sjsg * relative doorbell index = Absolute doorbell index - 410f005ef32Sjsg * absolute index of first doorbell in the page. 411fb4d8502Sjsg */ 412f005ef32Sjsg uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev, 413f005ef32Sjsg pdd->qpd.proc_doorbells, 414f005ef32Sjsg 0, 415f005ef32Sjsg pdd->dev->kfd->device_info.doorbell_size); 416f005ef32Sjsg 417f005ef32Sjsg *p_doorbell_offset_in_process = (q->properties.doorbell_off 418f005ef32Sjsg - first_db_index) * sizeof(uint32_t); 419f005ef32Sjsg } 420fb4d8502Sjsg 421fb4d8502Sjsg pr_debug("PQM After DQM create queue\n"); 422fb4d8502Sjsg 423fb4d8502Sjsg list_add(&pqn->process_queue_list, &pqm->queues); 424fb4d8502Sjsg 425fb4d8502Sjsg if (q) { 426fb4d8502Sjsg pr_debug("PQM done creating queue\n"); 427c349dbc7Sjsg kfd_procfs_add_queue(q); 428fb4d8502Sjsg print_queue_properties(&q->properties); 429fb4d8502Sjsg } 430fb4d8502Sjsg 431fb4d8502Sjsg return retval; 432fb4d8502Sjsg 433fb4d8502Sjsg err_create_queue: 434c349dbc7Sjsg uninit_queue(q); 435c349dbc7Sjsg if (kq) 436c349dbc7Sjsg kernel_queue_uninit(kq, false); 437fb4d8502Sjsg kfree(pqn); 438fb4d8502Sjsg err_allocate_pqn: 439fb4d8502Sjsg /* check if queues list is empty unregister process from device */ 440fb4d8502Sjsg clear_bit(*qid, pqm->queue_slot_bitmap); 441fb4d8502Sjsg if (list_empty(&pdd->qpd.queues_list) && 442fb4d8502Sjsg list_empty(&pdd->qpd.priv_queue_list)) 443fb4d8502Sjsg dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd); 444fb4d8502Sjsg return retval; 445fb4d8502Sjsg } 446fb4d8502Sjsg 447fb4d8502Sjsg int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) 448fb4d8502Sjsg { 449fb4d8502Sjsg struct process_queue_node *pqn; 450fb4d8502Sjsg struct kfd_process_device *pdd; 451fb4d8502Sjsg struct device_queue_manager *dqm; 452f005ef32Sjsg struct kfd_node *dev; 453fb4d8502Sjsg int retval; 454fb4d8502Sjsg 455fb4d8502Sjsg dqm = NULL; 456fb4d8502Sjsg 457fb4d8502Sjsg retval = 0; 458fb4d8502Sjsg 459fb4d8502Sjsg pqn = get_queue_by_qid(pqm, qid); 460fb4d8502Sjsg if (!pqn) { 461fb4d8502Sjsg pr_err("Queue id does not match any known queue\n"); 462fb4d8502Sjsg return -EINVAL; 463fb4d8502Sjsg } 464fb4d8502Sjsg 465fb4d8502Sjsg dev = NULL; 466fb4d8502Sjsg if (pqn->kq) 467fb4d8502Sjsg dev = pqn->kq->dev; 468fb4d8502Sjsg if (pqn->q) 469fb4d8502Sjsg dev = pqn->q->device; 470fb4d8502Sjsg if (WARN_ON(!dev)) 471fb4d8502Sjsg return -ENODEV; 472fb4d8502Sjsg 473fb4d8502Sjsg pdd = kfd_get_process_device_data(dev, pqm->process); 474fb4d8502Sjsg if (!pdd) { 475fb4d8502Sjsg pr_err("Process device data doesn't exist\n"); 476fb4d8502Sjsg return -1; 477fb4d8502Sjsg } 478fb4d8502Sjsg 479fb4d8502Sjsg if (pqn->kq) { 480fb4d8502Sjsg /* destroy kernel queue (DIQ) */ 481fb4d8502Sjsg dqm = pqn->kq->dev->dqm; 482fb4d8502Sjsg dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); 483c349dbc7Sjsg kernel_queue_uninit(pqn->kq, false); 484fb4d8502Sjsg } 485fb4d8502Sjsg 486fb4d8502Sjsg if (pqn->q) { 487c349dbc7Sjsg kfd_procfs_del_queue(pqn->q); 488fb4d8502Sjsg dqm = pqn->q->device->dqm; 489fb4d8502Sjsg retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); 490fb4d8502Sjsg if (retval) { 491c349dbc7Sjsg pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n", 492fb4d8502Sjsg pqm->process->pasid, 493fb4d8502Sjsg pqn->q->properties.queue_id, retval); 494fb4d8502Sjsg if (retval != -ETIME) 495fb4d8502Sjsg goto err_destroy_queue; 496fb4d8502Sjsg } 497c349dbc7Sjsg 4981aef8867Sjsg pqm_clean_queue_resource(pqm, pqn); 499fb4d8502Sjsg uninit_queue(pqn->q); 500fb4d8502Sjsg } 501fb4d8502Sjsg 502fb4d8502Sjsg list_del(&pqn->process_queue_list); 503fb4d8502Sjsg kfree(pqn); 504fb4d8502Sjsg clear_bit(qid, pqm->queue_slot_bitmap); 505fb4d8502Sjsg 506fb4d8502Sjsg if (list_empty(&pdd->qpd.queues_list) && 507fb4d8502Sjsg list_empty(&pdd->qpd.priv_queue_list)) 508fb4d8502Sjsg dqm->ops.unregister_process(dqm, &pdd->qpd); 509fb4d8502Sjsg 510fb4d8502Sjsg err_destroy_queue: 511fb4d8502Sjsg return retval; 512fb4d8502Sjsg } 513fb4d8502Sjsg 5141bb76ff1Sjsg int pqm_update_queue_properties(struct process_queue_manager *pqm, 5151bb76ff1Sjsg unsigned int qid, struct queue_properties *p) 516fb4d8502Sjsg { 517fb4d8502Sjsg int retval; 518fb4d8502Sjsg struct process_queue_node *pqn; 519fb4d8502Sjsg 520fb4d8502Sjsg pqn = get_queue_by_qid(pqm, qid); 521fb4d8502Sjsg if (!pqn) { 522fb4d8502Sjsg pr_debug("No queue %d exists for update operation\n", qid); 523fb4d8502Sjsg return -EFAULT; 524fb4d8502Sjsg } 525fb4d8502Sjsg 526fb4d8502Sjsg pqn->q->properties.queue_address = p->queue_address; 527fb4d8502Sjsg pqn->q->properties.queue_size = p->queue_size; 528fb4d8502Sjsg pqn->q->properties.queue_percent = p->queue_percent; 529fb4d8502Sjsg pqn->q->properties.priority = p->priority; 530f005ef32Sjsg pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc; 531fb4d8502Sjsg 532fb4d8502Sjsg retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, 5331bb76ff1Sjsg pqn->q, NULL); 534fb4d8502Sjsg if (retval != 0) 535fb4d8502Sjsg return retval; 536fb4d8502Sjsg 537fb4d8502Sjsg return 0; 538fb4d8502Sjsg } 539fb4d8502Sjsg 5401bb76ff1Sjsg int pqm_update_mqd(struct process_queue_manager *pqm, 5411bb76ff1Sjsg unsigned int qid, struct mqd_update_info *minfo) 542fb4d8502Sjsg { 543fb4d8502Sjsg int retval; 544fb4d8502Sjsg struct process_queue_node *pqn; 545fb4d8502Sjsg 546fb4d8502Sjsg pqn = get_queue_by_qid(pqm, qid); 547fb4d8502Sjsg if (!pqn) { 548fb4d8502Sjsg pr_debug("No queue %d exists for update operation\n", qid); 549fb4d8502Sjsg return -EFAULT; 550fb4d8502Sjsg } 551fb4d8502Sjsg 552f005ef32Sjsg /* CUs are masked for debugger requirements so deny user mask */ 553f005ef32Sjsg if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr) 554f005ef32Sjsg return -EBUSY; 555f005ef32Sjsg 5561bb76ff1Sjsg /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ 557f005ef32Sjsg if (minfo && minfo->cu_mask.ptr && 5581bb76ff1Sjsg KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { 5591bb76ff1Sjsg int i; 560fb4d8502Sjsg 5611bb76ff1Sjsg for (i = 0; i < minfo->cu_mask.count; i += 2) { 5621bb76ff1Sjsg uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; 5631bb76ff1Sjsg 5641bb76ff1Sjsg if (cu_pair && cu_pair != 0x3) { 5651bb76ff1Sjsg pr_debug("CUs must be adjacent pairwise enabled.\n"); 5661bb76ff1Sjsg return -EINVAL; 5671bb76ff1Sjsg } 5681bb76ff1Sjsg } 5691bb76ff1Sjsg } 570fb4d8502Sjsg 571fb4d8502Sjsg retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, 5721bb76ff1Sjsg pqn->q, minfo); 573fb4d8502Sjsg if (retval != 0) 574fb4d8502Sjsg return retval; 575fb4d8502Sjsg 576f005ef32Sjsg if (minfo && minfo->cu_mask.ptr) 577f005ef32Sjsg pqn->q->properties.is_user_cu_masked = true; 578f005ef32Sjsg 579fb4d8502Sjsg return 0; 580fb4d8502Sjsg } 581fb4d8502Sjsg 582fb4d8502Sjsg struct kernel_queue *pqm_get_kernel_queue( 583fb4d8502Sjsg struct process_queue_manager *pqm, 584fb4d8502Sjsg unsigned int qid) 585fb4d8502Sjsg { 586fb4d8502Sjsg struct process_queue_node *pqn; 587fb4d8502Sjsg 588fb4d8502Sjsg pqn = get_queue_by_qid(pqm, qid); 589fb4d8502Sjsg if (pqn && pqn->kq) 590fb4d8502Sjsg return pqn->kq; 591fb4d8502Sjsg 592fb4d8502Sjsg return NULL; 593fb4d8502Sjsg } 594fb4d8502Sjsg 595ad8b1aafSjsg struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, 596ad8b1aafSjsg unsigned int qid) 597ad8b1aafSjsg { 598ad8b1aafSjsg struct process_queue_node *pqn; 599ad8b1aafSjsg 600ad8b1aafSjsg pqn = get_queue_by_qid(pqm, qid); 601ad8b1aafSjsg return pqn ? pqn->q : NULL; 602ad8b1aafSjsg } 603ad8b1aafSjsg 604c349dbc7Sjsg int pqm_get_wave_state(struct process_queue_manager *pqm, 605c349dbc7Sjsg unsigned int qid, 606c349dbc7Sjsg void __user *ctl_stack, 607c349dbc7Sjsg u32 *ctl_stack_used_size, 608c349dbc7Sjsg u32 *save_area_used_size) 609c349dbc7Sjsg { 610c349dbc7Sjsg struct process_queue_node *pqn; 611c349dbc7Sjsg 612c349dbc7Sjsg pqn = get_queue_by_qid(pqm, qid); 613c349dbc7Sjsg if (!pqn) { 614c349dbc7Sjsg pr_debug("amdkfd: No queue %d exists for operation\n", 615c349dbc7Sjsg qid); 616c349dbc7Sjsg return -EFAULT; 617c349dbc7Sjsg } 618c349dbc7Sjsg 619c349dbc7Sjsg return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, 620c349dbc7Sjsg pqn->q, 621c349dbc7Sjsg ctl_stack, 622c349dbc7Sjsg ctl_stack_used_size, 623c349dbc7Sjsg save_area_used_size); 624c349dbc7Sjsg } 625c349dbc7Sjsg 626f005ef32Sjsg int pqm_get_queue_snapshot(struct process_queue_manager *pqm, 627f005ef32Sjsg uint64_t exception_clear_mask, 628f005ef32Sjsg void __user *buf, 629f005ef32Sjsg int *num_qss_entries, 630f005ef32Sjsg uint32_t *entry_size) 631f005ef32Sjsg { 632f005ef32Sjsg struct process_queue_node *pqn; 633f005ef32Sjsg struct kfd_queue_snapshot_entry src; 634f005ef32Sjsg uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries; 635f005ef32Sjsg int r = 0; 636f005ef32Sjsg 637f005ef32Sjsg *num_qss_entries = 0; 638f005ef32Sjsg if (!(*entry_size)) 639f005ef32Sjsg return -EINVAL; 640f005ef32Sjsg 641f005ef32Sjsg *entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry)); 642f005ef32Sjsg mutex_lock(&pqm->process->event_mutex); 643f005ef32Sjsg 644f005ef32Sjsg memset(&src, 0, sizeof(src)); 645f005ef32Sjsg 646f005ef32Sjsg list_for_each_entry(pqn, &pqm->queues, process_queue_list) { 647f005ef32Sjsg if (!pqn->q) 648f005ef32Sjsg continue; 649f005ef32Sjsg 650f005ef32Sjsg if (*num_qss_entries < tmp_qss_entries) { 651f005ef32Sjsg set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src); 652f005ef32Sjsg 653f005ef32Sjsg if (copy_to_user(buf, &src, *entry_size)) { 654f005ef32Sjsg r = -EFAULT; 655f005ef32Sjsg break; 656f005ef32Sjsg } 657f005ef32Sjsg buf += tmp_entry_size; 658f005ef32Sjsg } 659f005ef32Sjsg *num_qss_entries += 1; 660f005ef32Sjsg } 661f005ef32Sjsg 662f005ef32Sjsg mutex_unlock(&pqm->process->event_mutex); 663f005ef32Sjsg return r; 664f005ef32Sjsg } 665f005ef32Sjsg 6661bb76ff1Sjsg static int get_queue_data_sizes(struct kfd_process_device *pdd, 6671bb76ff1Sjsg struct queue *q, 6681bb76ff1Sjsg uint32_t *mqd_size, 6691bb76ff1Sjsg uint32_t *ctl_stack_size) 6701bb76ff1Sjsg { 6711bb76ff1Sjsg int ret; 6721bb76ff1Sjsg 6731bb76ff1Sjsg ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm, 6741bb76ff1Sjsg q->properties.queue_id, 6751bb76ff1Sjsg mqd_size, 6761bb76ff1Sjsg ctl_stack_size); 6771bb76ff1Sjsg if (ret) 6781bb76ff1Sjsg pr_err("Failed to get queue dump info (%d)\n", ret); 6791bb76ff1Sjsg 6801bb76ff1Sjsg return ret; 6811bb76ff1Sjsg } 6821bb76ff1Sjsg 6831bb76ff1Sjsg int kfd_process_get_queue_info(struct kfd_process *p, 6841bb76ff1Sjsg uint32_t *num_queues, 6851bb76ff1Sjsg uint64_t *priv_data_sizes) 6861bb76ff1Sjsg { 6871bb76ff1Sjsg uint32_t extra_data_sizes = 0; 6881bb76ff1Sjsg struct queue *q; 6891bb76ff1Sjsg int i; 6901bb76ff1Sjsg int ret; 6911bb76ff1Sjsg 6921bb76ff1Sjsg *num_queues = 0; 6931bb76ff1Sjsg 6941bb76ff1Sjsg /* Run over all PDDs of the process */ 6951bb76ff1Sjsg for (i = 0; i < p->n_pdds; i++) { 6961bb76ff1Sjsg struct kfd_process_device *pdd = p->pdds[i]; 6971bb76ff1Sjsg 6981bb76ff1Sjsg list_for_each_entry(q, &pdd->qpd.queues_list, list) { 6991bb76ff1Sjsg if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 7001bb76ff1Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA || 7011bb76ff1Sjsg q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 7021bb76ff1Sjsg uint32_t mqd_size, ctl_stack_size; 7031bb76ff1Sjsg 7041bb76ff1Sjsg *num_queues = *num_queues + 1; 7051bb76ff1Sjsg 7061bb76ff1Sjsg ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); 7071bb76ff1Sjsg if (ret) 7081bb76ff1Sjsg return ret; 7091bb76ff1Sjsg 7101bb76ff1Sjsg extra_data_sizes += mqd_size + ctl_stack_size; 7111bb76ff1Sjsg } else { 7121bb76ff1Sjsg pr_err("Unsupported queue type (%d)\n", q->properties.type); 7131bb76ff1Sjsg return -EOPNOTSUPP; 7141bb76ff1Sjsg } 7151bb76ff1Sjsg } 7161bb76ff1Sjsg } 7171bb76ff1Sjsg *priv_data_sizes = extra_data_sizes + 7181bb76ff1Sjsg (*num_queues * sizeof(struct kfd_criu_queue_priv_data)); 7191bb76ff1Sjsg 7201bb76ff1Sjsg return 0; 7211bb76ff1Sjsg } 7221bb76ff1Sjsg 7231bb76ff1Sjsg static int pqm_checkpoint_mqd(struct process_queue_manager *pqm, 7241bb76ff1Sjsg unsigned int qid, 7251bb76ff1Sjsg void *mqd, 7261bb76ff1Sjsg void *ctl_stack) 7271bb76ff1Sjsg { 7281bb76ff1Sjsg struct process_queue_node *pqn; 7291bb76ff1Sjsg 7301bb76ff1Sjsg pqn = get_queue_by_qid(pqm, qid); 7311bb76ff1Sjsg if (!pqn) { 7321bb76ff1Sjsg pr_debug("amdkfd: No queue %d exists for operation\n", qid); 7331bb76ff1Sjsg return -EFAULT; 7341bb76ff1Sjsg } 7351bb76ff1Sjsg 7361bb76ff1Sjsg if (!pqn->q->device->dqm->ops.checkpoint_mqd) { 7371bb76ff1Sjsg pr_err("amdkfd: queue dumping not supported on this device\n"); 7381bb76ff1Sjsg return -EOPNOTSUPP; 7391bb76ff1Sjsg } 7401bb76ff1Sjsg 7411bb76ff1Sjsg return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm, 7421bb76ff1Sjsg pqn->q, mqd, ctl_stack); 7431bb76ff1Sjsg } 7441bb76ff1Sjsg 7451bb76ff1Sjsg static int criu_checkpoint_queue(struct kfd_process_device *pdd, 7461bb76ff1Sjsg struct queue *q, 7471bb76ff1Sjsg struct kfd_criu_queue_priv_data *q_data) 7481bb76ff1Sjsg { 7491bb76ff1Sjsg uint8_t *mqd, *ctl_stack; 7501bb76ff1Sjsg int ret; 7511bb76ff1Sjsg 7521bb76ff1Sjsg mqd = (void *)(q_data + 1); 7531bb76ff1Sjsg ctl_stack = mqd + q_data->mqd_size; 7541bb76ff1Sjsg 7551bb76ff1Sjsg q_data->gpu_id = pdd->user_gpu_id; 7561bb76ff1Sjsg q_data->type = q->properties.type; 7571bb76ff1Sjsg q_data->format = q->properties.format; 7581bb76ff1Sjsg q_data->q_id = q->properties.queue_id; 7591bb76ff1Sjsg q_data->q_address = q->properties.queue_address; 7601bb76ff1Sjsg q_data->q_size = q->properties.queue_size; 7611bb76ff1Sjsg q_data->priority = q->properties.priority; 7621bb76ff1Sjsg q_data->q_percent = q->properties.queue_percent; 7631bb76ff1Sjsg q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr; 7641bb76ff1Sjsg q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr; 7651bb76ff1Sjsg q_data->doorbell_id = q->doorbell_id; 7661bb76ff1Sjsg 7671bb76ff1Sjsg q_data->sdma_id = q->sdma_id; 7681bb76ff1Sjsg 7691bb76ff1Sjsg q_data->eop_ring_buffer_address = 7701bb76ff1Sjsg q->properties.eop_ring_buffer_address; 7711bb76ff1Sjsg 7721bb76ff1Sjsg q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size; 7731bb76ff1Sjsg 7741bb76ff1Sjsg q_data->ctx_save_restore_area_address = 7751bb76ff1Sjsg q->properties.ctx_save_restore_area_address; 7761bb76ff1Sjsg 7771bb76ff1Sjsg q_data->ctx_save_restore_area_size = 7781bb76ff1Sjsg q->properties.ctx_save_restore_area_size; 7791bb76ff1Sjsg 7801bb76ff1Sjsg q_data->gws = !!q->gws; 7811bb76ff1Sjsg 7821bb76ff1Sjsg ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack); 7831bb76ff1Sjsg if (ret) { 7841bb76ff1Sjsg pr_err("Failed checkpoint queue_mqd (%d)\n", ret); 7851bb76ff1Sjsg return ret; 7861bb76ff1Sjsg } 7871bb76ff1Sjsg 7881bb76ff1Sjsg pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id); 7891bb76ff1Sjsg return ret; 7901bb76ff1Sjsg } 7911bb76ff1Sjsg 7921bb76ff1Sjsg static int criu_checkpoint_queues_device(struct kfd_process_device *pdd, 7931bb76ff1Sjsg uint8_t __user *user_priv, 7941bb76ff1Sjsg unsigned int *q_index, 7951bb76ff1Sjsg uint64_t *queues_priv_data_offset) 7961bb76ff1Sjsg { 7971bb76ff1Sjsg unsigned int q_private_data_size = 0; 7981bb76ff1Sjsg uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */ 7991bb76ff1Sjsg struct queue *q; 8001bb76ff1Sjsg int ret = 0; 8011bb76ff1Sjsg 8021bb76ff1Sjsg list_for_each_entry(q, &pdd->qpd.queues_list, list) { 8031bb76ff1Sjsg struct kfd_criu_queue_priv_data *q_data; 8041bb76ff1Sjsg uint64_t q_data_size; 8051bb76ff1Sjsg uint32_t mqd_size; 8061bb76ff1Sjsg uint32_t ctl_stack_size; 8071bb76ff1Sjsg 8081bb76ff1Sjsg if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE && 8091bb76ff1Sjsg q->properties.type != KFD_QUEUE_TYPE_SDMA && 8101bb76ff1Sjsg q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) { 8111bb76ff1Sjsg 8121bb76ff1Sjsg pr_err("Unsupported queue type (%d)\n", q->properties.type); 8131bb76ff1Sjsg ret = -EOPNOTSUPP; 8141bb76ff1Sjsg break; 8151bb76ff1Sjsg } 8161bb76ff1Sjsg 8171bb76ff1Sjsg ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size); 8181bb76ff1Sjsg if (ret) 8191bb76ff1Sjsg break; 8201bb76ff1Sjsg 8211bb76ff1Sjsg q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size; 8221bb76ff1Sjsg 8231bb76ff1Sjsg /* Increase local buffer space if needed */ 8241bb76ff1Sjsg if (q_private_data_size < q_data_size) { 8251bb76ff1Sjsg kfree(q_private_data); 8261bb76ff1Sjsg 8271bb76ff1Sjsg q_private_data = kzalloc(q_data_size, GFP_KERNEL); 8281bb76ff1Sjsg if (!q_private_data) { 8291bb76ff1Sjsg ret = -ENOMEM; 8301bb76ff1Sjsg break; 8311bb76ff1Sjsg } 8321bb76ff1Sjsg q_private_data_size = q_data_size; 8331bb76ff1Sjsg } 8341bb76ff1Sjsg 8351bb76ff1Sjsg q_data = (struct kfd_criu_queue_priv_data *)q_private_data; 8361bb76ff1Sjsg 8371bb76ff1Sjsg /* data stored in this order: priv_data, mqd, ctl_stack */ 8381bb76ff1Sjsg q_data->mqd_size = mqd_size; 8391bb76ff1Sjsg q_data->ctl_stack_size = ctl_stack_size; 8401bb76ff1Sjsg 8411bb76ff1Sjsg ret = criu_checkpoint_queue(pdd, q, q_data); 8421bb76ff1Sjsg if (ret) 8431bb76ff1Sjsg break; 8441bb76ff1Sjsg 8451bb76ff1Sjsg q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE; 8461bb76ff1Sjsg 8471bb76ff1Sjsg ret = copy_to_user(user_priv + *queues_priv_data_offset, 8481bb76ff1Sjsg q_data, q_data_size); 8491bb76ff1Sjsg if (ret) { 8501bb76ff1Sjsg ret = -EFAULT; 8511bb76ff1Sjsg break; 8521bb76ff1Sjsg } 8531bb76ff1Sjsg *queues_priv_data_offset += q_data_size; 8541bb76ff1Sjsg *q_index = *q_index + 1; 8551bb76ff1Sjsg } 8561bb76ff1Sjsg 8571bb76ff1Sjsg kfree(q_private_data); 8581bb76ff1Sjsg 8591bb76ff1Sjsg return ret; 8601bb76ff1Sjsg } 8611bb76ff1Sjsg 8621bb76ff1Sjsg int kfd_criu_checkpoint_queues(struct kfd_process *p, 8631bb76ff1Sjsg uint8_t __user *user_priv_data, 8641bb76ff1Sjsg uint64_t *priv_data_offset) 8651bb76ff1Sjsg { 8661bb76ff1Sjsg int ret = 0, pdd_index, q_index = 0; 8671bb76ff1Sjsg 8681bb76ff1Sjsg for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { 8691bb76ff1Sjsg struct kfd_process_device *pdd = p->pdds[pdd_index]; 8701bb76ff1Sjsg 8711bb76ff1Sjsg /* 8721bb76ff1Sjsg * criu_checkpoint_queues_device will copy data to user and update q_index and 8731bb76ff1Sjsg * queues_priv_data_offset 8741bb76ff1Sjsg */ 8751bb76ff1Sjsg ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index, 8761bb76ff1Sjsg priv_data_offset); 8771bb76ff1Sjsg 8781bb76ff1Sjsg if (ret) 8791bb76ff1Sjsg break; 8801bb76ff1Sjsg } 8811bb76ff1Sjsg 8821bb76ff1Sjsg return ret; 8831bb76ff1Sjsg } 8841bb76ff1Sjsg 8851bb76ff1Sjsg static void set_queue_properties_from_criu(struct queue_properties *qp, 8861bb76ff1Sjsg struct kfd_criu_queue_priv_data *q_data) 8871bb76ff1Sjsg { 8881bb76ff1Sjsg qp->is_interop = false; 8891bb76ff1Sjsg qp->queue_percent = q_data->q_percent; 8901bb76ff1Sjsg qp->priority = q_data->priority; 8911bb76ff1Sjsg qp->queue_address = q_data->q_address; 8921bb76ff1Sjsg qp->queue_size = q_data->q_size; 8931bb76ff1Sjsg qp->read_ptr = (uint32_t *) q_data->read_ptr_addr; 8941bb76ff1Sjsg qp->write_ptr = (uint32_t *) q_data->write_ptr_addr; 8951bb76ff1Sjsg qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address; 8961bb76ff1Sjsg qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size; 8971bb76ff1Sjsg qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address; 8981bb76ff1Sjsg qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size; 8991bb76ff1Sjsg qp->ctl_stack_size = q_data->ctl_stack_size; 9001bb76ff1Sjsg qp->type = q_data->type; 9011bb76ff1Sjsg qp->format = q_data->format; 9021bb76ff1Sjsg } 9031bb76ff1Sjsg 9041bb76ff1Sjsg int kfd_criu_restore_queue(struct kfd_process *p, 9051bb76ff1Sjsg uint8_t __user *user_priv_ptr, 9061bb76ff1Sjsg uint64_t *priv_data_offset, 9071bb76ff1Sjsg uint64_t max_priv_data_size) 9081bb76ff1Sjsg { 9091bb76ff1Sjsg uint8_t *mqd, *ctl_stack, *q_extra_data = NULL; 9101bb76ff1Sjsg struct kfd_criu_queue_priv_data *q_data; 9111bb76ff1Sjsg struct kfd_process_device *pdd; 9121bb76ff1Sjsg uint64_t q_extra_data_size; 9131bb76ff1Sjsg struct queue_properties qp; 9141bb76ff1Sjsg unsigned int queue_id; 9151bb76ff1Sjsg int ret = 0; 9161bb76ff1Sjsg 9171bb76ff1Sjsg if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size) 9181bb76ff1Sjsg return -EINVAL; 9191bb76ff1Sjsg 9201bb76ff1Sjsg q_data = kmalloc(sizeof(*q_data), GFP_KERNEL); 9211bb76ff1Sjsg if (!q_data) 9221bb76ff1Sjsg return -ENOMEM; 9231bb76ff1Sjsg 9241bb76ff1Sjsg ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data)); 9251bb76ff1Sjsg if (ret) { 9261bb76ff1Sjsg ret = -EFAULT; 9271bb76ff1Sjsg goto exit; 9281bb76ff1Sjsg } 9291bb76ff1Sjsg 9301bb76ff1Sjsg *priv_data_offset += sizeof(*q_data); 9311bb76ff1Sjsg q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size; 9321bb76ff1Sjsg 9331bb76ff1Sjsg if (*priv_data_offset + q_extra_data_size > max_priv_data_size) { 9341bb76ff1Sjsg ret = -EINVAL; 9351bb76ff1Sjsg goto exit; 9361bb76ff1Sjsg } 9371bb76ff1Sjsg 9381bb76ff1Sjsg q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL); 9391bb76ff1Sjsg if (!q_extra_data) { 9401bb76ff1Sjsg ret = -ENOMEM; 9411bb76ff1Sjsg goto exit; 9421bb76ff1Sjsg } 9431bb76ff1Sjsg 9441bb76ff1Sjsg ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size); 9451bb76ff1Sjsg if (ret) { 9461bb76ff1Sjsg ret = -EFAULT; 9471bb76ff1Sjsg goto exit; 9481bb76ff1Sjsg } 9491bb76ff1Sjsg 9501bb76ff1Sjsg *priv_data_offset += q_extra_data_size; 9511bb76ff1Sjsg 9521bb76ff1Sjsg pdd = kfd_process_device_data_by_id(p, q_data->gpu_id); 9531bb76ff1Sjsg if (!pdd) { 9541bb76ff1Sjsg pr_err("Failed to get pdd\n"); 9551bb76ff1Sjsg ret = -EINVAL; 9561bb76ff1Sjsg goto exit; 9571bb76ff1Sjsg } 9581bb76ff1Sjsg 9591bb76ff1Sjsg /* data stored in this order: mqd, ctl_stack */ 9601bb76ff1Sjsg mqd = q_extra_data; 9611bb76ff1Sjsg ctl_stack = mqd + q_data->mqd_size; 9621bb76ff1Sjsg 9631bb76ff1Sjsg memset(&qp, 0, sizeof(qp)); 9641bb76ff1Sjsg set_queue_properties_from_criu(&qp, q_data); 9651bb76ff1Sjsg 9661bb76ff1Sjsg print_queue_properties(&qp); 9671bb76ff1Sjsg 9681bb76ff1Sjsg ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack, 9691bb76ff1Sjsg NULL); 9701bb76ff1Sjsg if (ret) { 9711bb76ff1Sjsg pr_err("Failed to create new queue err:%d\n", ret); 9721bb76ff1Sjsg goto exit; 9731bb76ff1Sjsg } 9741bb76ff1Sjsg 9751bb76ff1Sjsg if (q_data->gws) 9761bb76ff1Sjsg ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws); 9771bb76ff1Sjsg 9781bb76ff1Sjsg exit: 9791bb76ff1Sjsg if (ret) 9801bb76ff1Sjsg pr_err("Failed to restore queue (%d)\n", ret); 9811bb76ff1Sjsg else 9821bb76ff1Sjsg pr_debug("Queue id %d was restored successfully\n", queue_id); 9831bb76ff1Sjsg 9841bb76ff1Sjsg kfree(q_data); 985*6d61e7caSjsg kfree(q_extra_data); 9861bb76ff1Sjsg 9871bb76ff1Sjsg return ret; 9881bb76ff1Sjsg } 9891bb76ff1Sjsg 9901bb76ff1Sjsg int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, 9911bb76ff1Sjsg unsigned int qid, 9921bb76ff1Sjsg uint32_t *mqd_size, 9931bb76ff1Sjsg uint32_t *ctl_stack_size) 9941bb76ff1Sjsg { 9951bb76ff1Sjsg struct process_queue_node *pqn; 9961bb76ff1Sjsg 9971bb76ff1Sjsg pqn = get_queue_by_qid(pqm, qid); 9981bb76ff1Sjsg if (!pqn) { 9991bb76ff1Sjsg pr_debug("amdkfd: No queue %d exists for operation\n", qid); 10001bb76ff1Sjsg return -EFAULT; 10011bb76ff1Sjsg } 10021bb76ff1Sjsg 10031bb76ff1Sjsg if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) { 10041bb76ff1Sjsg pr_err("amdkfd: queue dumping not supported on this device\n"); 10051bb76ff1Sjsg return -EOPNOTSUPP; 10061bb76ff1Sjsg } 10071bb76ff1Sjsg 10081bb76ff1Sjsg pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm, 10091bb76ff1Sjsg pqn->q, mqd_size, 10101bb76ff1Sjsg ctl_stack_size); 10111bb76ff1Sjsg return 0; 10121bb76ff1Sjsg } 10131bb76ff1Sjsg 1014fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS) 1015fb4d8502Sjsg 1016fb4d8502Sjsg int pqm_debugfs_mqds(struct seq_file *m, void *data) 1017fb4d8502Sjsg { 1018fb4d8502Sjsg struct process_queue_manager *pqm = data; 1019fb4d8502Sjsg struct process_queue_node *pqn; 1020fb4d8502Sjsg struct queue *q; 1021fb4d8502Sjsg enum KFD_MQD_TYPE mqd_type; 1022fb4d8502Sjsg struct mqd_manager *mqd_mgr; 1023f005ef32Sjsg int r = 0, xcc, num_xccs = 1; 1024f005ef32Sjsg void *mqd; 1025f005ef32Sjsg uint64_t size = 0; 1026fb4d8502Sjsg 1027fb4d8502Sjsg list_for_each_entry(pqn, &pqm->queues, process_queue_list) { 1028fb4d8502Sjsg if (pqn->q) { 1029fb4d8502Sjsg q = pqn->q; 1030fb4d8502Sjsg switch (q->properties.type) { 1031fb4d8502Sjsg case KFD_QUEUE_TYPE_SDMA: 1032c349dbc7Sjsg case KFD_QUEUE_TYPE_SDMA_XGMI: 1033fb4d8502Sjsg seq_printf(m, " SDMA queue on device %x\n", 1034fb4d8502Sjsg q->device->id); 1035fb4d8502Sjsg mqd_type = KFD_MQD_TYPE_SDMA; 1036fb4d8502Sjsg break; 1037fb4d8502Sjsg case KFD_QUEUE_TYPE_COMPUTE: 1038fb4d8502Sjsg seq_printf(m, " Compute queue on device %x\n", 1039fb4d8502Sjsg q->device->id); 1040fb4d8502Sjsg mqd_type = KFD_MQD_TYPE_CP; 1041f005ef32Sjsg num_xccs = NUM_XCC(q->device->xcc_mask); 1042fb4d8502Sjsg break; 1043fb4d8502Sjsg default: 1044fb4d8502Sjsg seq_printf(m, 1045fb4d8502Sjsg " Bad user queue type %d on device %x\n", 1046fb4d8502Sjsg q->properties.type, q->device->id); 1047fb4d8502Sjsg continue; 1048fb4d8502Sjsg } 1049c349dbc7Sjsg mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; 1050f005ef32Sjsg size = mqd_mgr->mqd_stride(mqd_mgr, 1051f005ef32Sjsg &q->properties); 1052fb4d8502Sjsg } else if (pqn->kq) { 1053fb4d8502Sjsg q = pqn->kq->queue; 1054fb4d8502Sjsg mqd_mgr = pqn->kq->mqd_mgr; 1055fb4d8502Sjsg switch (q->properties.type) { 1056fb4d8502Sjsg case KFD_QUEUE_TYPE_DIQ: 1057fb4d8502Sjsg seq_printf(m, " DIQ on device %x\n", 1058fb4d8502Sjsg pqn->kq->dev->id); 1059fb4d8502Sjsg break; 1060fb4d8502Sjsg default: 1061fb4d8502Sjsg seq_printf(m, 1062fb4d8502Sjsg " Bad kernel queue type %d on device %x\n", 1063fb4d8502Sjsg q->properties.type, 1064fb4d8502Sjsg pqn->kq->dev->id); 1065fb4d8502Sjsg continue; 1066fb4d8502Sjsg } 1067fb4d8502Sjsg } else { 1068fb4d8502Sjsg seq_printf(m, 1069fb4d8502Sjsg " Weird: Queue node with neither kernel nor user queue\n"); 1070fb4d8502Sjsg continue; 1071fb4d8502Sjsg } 1072fb4d8502Sjsg 1073f005ef32Sjsg for (xcc = 0; xcc < num_xccs; xcc++) { 1074f005ef32Sjsg mqd = q->mqd + size * xcc; 1075f005ef32Sjsg r = mqd_mgr->debugfs_show_mqd(m, mqd); 1076fb4d8502Sjsg if (r != 0) 1077fb4d8502Sjsg break; 1078fb4d8502Sjsg } 1079f005ef32Sjsg } 1080fb4d8502Sjsg 1081fb4d8502Sjsg return r; 1082fb4d8502Sjsg } 1083fb4d8502Sjsg 1084fb4d8502Sjsg #endif 1085