xref: /openbsd-src/sys/dev/pci/drm/amd/amdkfd/kfd_process_queue_manager.c (revision 6d61e7ca60321e270d7ef2be7c49cc945d222ef6)
11bb76ff1Sjsg // SPDX-License-Identifier: GPL-2.0 OR MIT
2fb4d8502Sjsg /*
31bb76ff1Sjsg  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4fb4d8502Sjsg  *
5fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
6fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
7fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
8fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
10fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
11fb4d8502Sjsg  *
12fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
13fb4d8502Sjsg  * all copies or substantial portions of the Software.
14fb4d8502Sjsg  *
15fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
22fb4d8502Sjsg  *
23fb4d8502Sjsg  */
24fb4d8502Sjsg 
25fb4d8502Sjsg #include <linux/slab.h>
26fb4d8502Sjsg #include <linux/list.h>
27fb4d8502Sjsg #include "kfd_device_queue_manager.h"
28fb4d8502Sjsg #include "kfd_priv.h"
29fb4d8502Sjsg #include "kfd_kernel_queue.h"
30c349dbc7Sjsg #include "amdgpu_amdkfd.h"
318c1206e6Sjsg #include "amdgpu_reset.h"
32fb4d8502Sjsg 
33fb4d8502Sjsg static inline struct process_queue_node *get_queue_by_qid(
34fb4d8502Sjsg 			struct process_queue_manager *pqm, unsigned int qid)
35fb4d8502Sjsg {
36fb4d8502Sjsg 	struct process_queue_node *pqn;
37fb4d8502Sjsg 
38fb4d8502Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
39fb4d8502Sjsg 		if ((pqn->q && pqn->q->properties.queue_id == qid) ||
40fb4d8502Sjsg 		    (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
41fb4d8502Sjsg 			return pqn;
42fb4d8502Sjsg 	}
43fb4d8502Sjsg 
44fb4d8502Sjsg 	return NULL;
45fb4d8502Sjsg }
46fb4d8502Sjsg 
471bb76ff1Sjsg static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
481bb76ff1Sjsg 				    unsigned int qid)
491bb76ff1Sjsg {
501bb76ff1Sjsg 	if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
511bb76ff1Sjsg 		return -EINVAL;
521bb76ff1Sjsg 
531bb76ff1Sjsg 	if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
541bb76ff1Sjsg 		pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
551bb76ff1Sjsg 		return -ENOSPC;
561bb76ff1Sjsg 	}
571bb76ff1Sjsg 
581bb76ff1Sjsg 	return 0;
591bb76ff1Sjsg }
601bb76ff1Sjsg 
61fb4d8502Sjsg static int find_available_queue_slot(struct process_queue_manager *pqm,
62fb4d8502Sjsg 					unsigned int *qid)
63fb4d8502Sjsg {
64fb4d8502Sjsg 	unsigned long found;
65fb4d8502Sjsg 
66fb4d8502Sjsg 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
67fb4d8502Sjsg 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
68fb4d8502Sjsg 
69fb4d8502Sjsg 	pr_debug("The new slot id %lu\n", found);
70fb4d8502Sjsg 
71fb4d8502Sjsg 	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
72c349dbc7Sjsg 		pr_info("Cannot open more queues for process with pasid 0x%x\n",
73fb4d8502Sjsg 				pqm->process->pasid);
74fb4d8502Sjsg 		return -ENOMEM;
75fb4d8502Sjsg 	}
76fb4d8502Sjsg 
77fb4d8502Sjsg 	set_bit(found, pqm->queue_slot_bitmap);
78fb4d8502Sjsg 	*qid = found;
79fb4d8502Sjsg 
80fb4d8502Sjsg 	return 0;
81fb4d8502Sjsg }
82fb4d8502Sjsg 
83fb4d8502Sjsg void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
84fb4d8502Sjsg {
85f005ef32Sjsg 	struct kfd_node *dev = pdd->dev;
86fb4d8502Sjsg 
87fb4d8502Sjsg 	if (pdd->already_dequeued)
88fb4d8502Sjsg 		return;
89fb4d8502Sjsg 
90fb4d8502Sjsg 	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
918c1206e6Sjsg 	if (dev->kfd->shared_resources.enable_mes &&
928c1206e6Sjsg 	    down_read_trylock(&dev->adev->reset_domain->sem)) {
938c1206e6Sjsg 		amdgpu_mes_flush_shader_debugger(dev->adev,
948c1206e6Sjsg 						 pdd->proc_ctx_gpu_addr);
958c1206e6Sjsg 		up_read(&dev->adev->reset_domain->sem);
968c1206e6Sjsg 	}
97fb4d8502Sjsg 	pdd->already_dequeued = true;
98fb4d8502Sjsg }
99fb4d8502Sjsg 
100c349dbc7Sjsg int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
101c349dbc7Sjsg 			void *gws)
102c349dbc7Sjsg {
103f005ef32Sjsg 	struct kfd_node *dev = NULL;
104c349dbc7Sjsg 	struct process_queue_node *pqn;
105c349dbc7Sjsg 	struct kfd_process_device *pdd;
106c349dbc7Sjsg 	struct kgd_mem *mem = NULL;
107c349dbc7Sjsg 	int ret;
108c349dbc7Sjsg 
109c349dbc7Sjsg 	pqn = get_queue_by_qid(pqm, qid);
110c349dbc7Sjsg 	if (!pqn) {
111c349dbc7Sjsg 		pr_err("Queue id does not match any known queue\n");
112c349dbc7Sjsg 		return -EINVAL;
113c349dbc7Sjsg 	}
114c349dbc7Sjsg 
115c349dbc7Sjsg 	if (pqn->q)
116c349dbc7Sjsg 		dev = pqn->q->device;
117c349dbc7Sjsg 	if (WARN_ON(!dev))
118c349dbc7Sjsg 		return -ENODEV;
119c349dbc7Sjsg 
120c349dbc7Sjsg 	pdd = kfd_get_process_device_data(dev, pqm->process);
121c349dbc7Sjsg 	if (!pdd) {
122c349dbc7Sjsg 		pr_err("Process device data doesn't exist\n");
123c349dbc7Sjsg 		return -EINVAL;
124c349dbc7Sjsg 	}
125c349dbc7Sjsg 
126c349dbc7Sjsg 	/* Only allow one queue per process can have GWS assigned */
127c349dbc7Sjsg 	if (gws && pdd->qpd.num_gws)
128c349dbc7Sjsg 		return -EBUSY;
129c349dbc7Sjsg 
130c349dbc7Sjsg 	if (!gws && pdd->qpd.num_gws == 0)
131c349dbc7Sjsg 		return -EINVAL;
132c349dbc7Sjsg 
133f005ef32Sjsg 	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
134c349dbc7Sjsg 		if (gws)
135c349dbc7Sjsg 			ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
136c349dbc7Sjsg 				gws, &mem);
137c349dbc7Sjsg 		else
138c349dbc7Sjsg 			ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
139c349dbc7Sjsg 				pqn->q->gws);
140c349dbc7Sjsg 		if (unlikely(ret))
141c349dbc7Sjsg 			return ret;
142c349dbc7Sjsg 		pqn->q->gws = mem;
143f005ef32Sjsg 	} else {
144f005ef32Sjsg 		/*
145f005ef32Sjsg 		 * Intentionally set GWS to a non-NULL value
146f005ef32Sjsg 		 * for devices that do not use GWS for global wave
147f005ef32Sjsg 		 * synchronization but require the formality
148f005ef32Sjsg 		 * of setting GWS for cooperative groups.
149f005ef32Sjsg 		 */
150f005ef32Sjsg 		pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
151f005ef32Sjsg 	}
152f005ef32Sjsg 
1531bb76ff1Sjsg 	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
154c349dbc7Sjsg 
155c349dbc7Sjsg 	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
1561bb76ff1Sjsg 							pqn->q, NULL);
157c349dbc7Sjsg }
158c349dbc7Sjsg 
159fb4d8502Sjsg void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
160fb4d8502Sjsg {
1615ca02815Sjsg 	int i;
162fb4d8502Sjsg 
1635ca02815Sjsg 	for (i = 0; i < p->n_pdds; i++)
1645ca02815Sjsg 		kfd_process_dequeue_from_device(p->pdds[i]);
165fb4d8502Sjsg }
166fb4d8502Sjsg 
167fb4d8502Sjsg int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
168fb4d8502Sjsg {
169fb4d8502Sjsg 	INIT_LIST_HEAD(&pqm->queues);
1701bb76ff1Sjsg 	pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
1711bb76ff1Sjsg 					       GFP_KERNEL);
172fb4d8502Sjsg 	if (!pqm->queue_slot_bitmap)
173fb4d8502Sjsg 		return -ENOMEM;
174fb4d8502Sjsg 	pqm->process = p;
175fb4d8502Sjsg 
176fb4d8502Sjsg 	return 0;
177fb4d8502Sjsg }
178fb4d8502Sjsg 
1791aef8867Sjsg static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
1801aef8867Sjsg 				     struct process_queue_node *pqn)
1811aef8867Sjsg {
1821aef8867Sjsg 	struct kfd_node *dev;
1831aef8867Sjsg 	struct kfd_process_device *pdd;
1841aef8867Sjsg 
1851aef8867Sjsg 	dev = pqn->q->device;
1861aef8867Sjsg 
1871aef8867Sjsg 	pdd = kfd_get_process_device_data(dev, pqm->process);
1881aef8867Sjsg 	if (!pdd) {
1891aef8867Sjsg 		pr_err("Process device data doesn't exist\n");
1901aef8867Sjsg 		return;
1911aef8867Sjsg 	}
1921aef8867Sjsg 
1931aef8867Sjsg 	if (pqn->q->gws) {
1941aef8867Sjsg 		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
1951aef8867Sjsg 		    !dev->kfd->shared_resources.enable_mes)
1961aef8867Sjsg 			amdgpu_amdkfd_remove_gws_from_process(
1971aef8867Sjsg 				pqm->process->kgd_process_info, pqn->q->gws);
1981aef8867Sjsg 		pdd->qpd.num_gws = 0;
1991aef8867Sjsg 	}
2001aef8867Sjsg 
2011aef8867Sjsg 	if (dev->kfd->shared_resources.enable_mes) {
202ff6d5195Sjsg 		amdgpu_amdkfd_free_gtt_mem(dev->adev, &pqn->q->gang_ctx_bo);
2031aef8867Sjsg 		if (pqn->q->wptr_bo)
204ff6d5195Sjsg 			amdgpu_amdkfd_free_gtt_mem(dev->adev, (void **)&pqn->q->wptr_bo);
2051aef8867Sjsg 	}
2061aef8867Sjsg }
2071aef8867Sjsg 
208fb4d8502Sjsg void pqm_uninit(struct process_queue_manager *pqm)
209fb4d8502Sjsg {
210fb4d8502Sjsg 	struct process_queue_node *pqn, *next;
211fb4d8502Sjsg 
212fb4d8502Sjsg 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
2131aef8867Sjsg 		if (pqn->q)
2141aef8867Sjsg 			pqm_clean_queue_resource(pqm, pqn);
2151aef8867Sjsg 
2168f3bef5aSjsg 		kfd_procfs_del_queue(pqn->q);
217fb4d8502Sjsg 		uninit_queue(pqn->q);
218fb4d8502Sjsg 		list_del(&pqn->process_queue_list);
219fb4d8502Sjsg 		kfree(pqn);
220fb4d8502Sjsg 	}
221fb4d8502Sjsg 
2221bb76ff1Sjsg 	bitmap_free(pqm->queue_slot_bitmap);
223fb4d8502Sjsg 	pqm->queue_slot_bitmap = NULL;
224fb4d8502Sjsg }
225fb4d8502Sjsg 
226c349dbc7Sjsg static int init_user_queue(struct process_queue_manager *pqm,
227f005ef32Sjsg 				struct kfd_node *dev, struct queue **q,
228fb4d8502Sjsg 				struct queue_properties *q_properties,
2291bb76ff1Sjsg 				struct file *f, struct amdgpu_bo *wptr_bo,
2301bb76ff1Sjsg 				unsigned int qid)
231fb4d8502Sjsg {
232fb4d8502Sjsg 	int retval;
233fb4d8502Sjsg 
234fb4d8502Sjsg 	/* Doorbell initialized in user space*/
235fb4d8502Sjsg 	q_properties->doorbell_ptr = NULL;
236f005ef32Sjsg 	q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
237fb4d8502Sjsg 
238fb4d8502Sjsg 	/* let DQM handle it*/
239fb4d8502Sjsg 	q_properties->vmid = 0;
240fb4d8502Sjsg 	q_properties->queue_id = qid;
241fb4d8502Sjsg 
242fb4d8502Sjsg 	retval = init_queue(q, q_properties);
243fb4d8502Sjsg 	if (retval != 0)
244fb4d8502Sjsg 		return retval;
245fb4d8502Sjsg 
246fb4d8502Sjsg 	(*q)->device = dev;
247fb4d8502Sjsg 	(*q)->process = pqm->process;
248fb4d8502Sjsg 
249f005ef32Sjsg 	if (dev->kfd->shared_resources.enable_mes) {
2501bb76ff1Sjsg 		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
2511bb76ff1Sjsg 						AMDGPU_MES_GANG_CTX_SIZE,
2521bb76ff1Sjsg 						&(*q)->gang_ctx_bo,
2531bb76ff1Sjsg 						&(*q)->gang_ctx_gpu_addr,
2541bb76ff1Sjsg 						&(*q)->gang_ctx_cpu_ptr,
2551bb76ff1Sjsg 						false);
2561bb76ff1Sjsg 		if (retval) {
2571bb76ff1Sjsg 			pr_err("failed to allocate gang context bo\n");
2581bb76ff1Sjsg 			goto cleanup;
2591bb76ff1Sjsg 		}
2601bb76ff1Sjsg 		memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
2611bb76ff1Sjsg 		(*q)->wptr_bo = wptr_bo;
2621bb76ff1Sjsg 	}
263fb4d8502Sjsg 
2641bb76ff1Sjsg 	pr_debug("PQM After init queue");
2651bb76ff1Sjsg 	return 0;
2661bb76ff1Sjsg 
2671bb76ff1Sjsg cleanup:
2681bb76ff1Sjsg 	uninit_queue(*q);
269d9a48308Sjsg 	*q = NULL;
270fb4d8502Sjsg 	return retval;
271fb4d8502Sjsg }
272fb4d8502Sjsg 
273fb4d8502Sjsg int pqm_create_queue(struct process_queue_manager *pqm,
274f005ef32Sjsg 			    struct kfd_node *dev,
275fb4d8502Sjsg 			    struct file *f,
276fb4d8502Sjsg 			    struct queue_properties *properties,
277c349dbc7Sjsg 			    unsigned int *qid,
2781bb76ff1Sjsg 			    struct amdgpu_bo *wptr_bo,
2791bb76ff1Sjsg 			    const struct kfd_criu_queue_priv_data *q_data,
2801bb76ff1Sjsg 			    const void *restore_mqd,
2811bb76ff1Sjsg 			    const void *restore_ctl_stack,
282c349dbc7Sjsg 			    uint32_t *p_doorbell_offset_in_process)
283fb4d8502Sjsg {
284fb4d8502Sjsg 	int retval;
285fb4d8502Sjsg 	struct kfd_process_device *pdd;
286fb4d8502Sjsg 	struct queue *q;
287fb4d8502Sjsg 	struct process_queue_node *pqn;
288fb4d8502Sjsg 	struct kernel_queue *kq;
289fb4d8502Sjsg 	enum kfd_queue_type type = properties->type;
290fb4d8502Sjsg 	unsigned int max_queues = 127; /* HWS limit */
291fb4d8502Sjsg 
292f005ef32Sjsg 	/*
293f005ef32Sjsg 	 * On GFX 9.4.3, increase the number of queues that
294f005ef32Sjsg 	 * can be created to 255. No HWS limit on GFX 9.4.3.
295f005ef32Sjsg 	 */
296f005ef32Sjsg 	if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
297f005ef32Sjsg 		max_queues = 255;
298f005ef32Sjsg 
299fb4d8502Sjsg 	q = NULL;
300fb4d8502Sjsg 	kq = NULL;
301fb4d8502Sjsg 
302fb4d8502Sjsg 	pdd = kfd_get_process_device_data(dev, pqm->process);
303fb4d8502Sjsg 	if (!pdd) {
304fb4d8502Sjsg 		pr_err("Process device data doesn't exist\n");
305fb4d8502Sjsg 		return -1;
306fb4d8502Sjsg 	}
307fb4d8502Sjsg 
308fb4d8502Sjsg 	/*
309fb4d8502Sjsg 	 * for debug process, verify that it is within the static queues limit
310fb4d8502Sjsg 	 * currently limit is set to half of the total avail HQD slots
311fb4d8502Sjsg 	 * If we are just about to create DIQ, the is_debug flag is not set yet
312fb4d8502Sjsg 	 * Hence we also check the type as well
313fb4d8502Sjsg 	 */
314fb4d8502Sjsg 	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
315f005ef32Sjsg 		max_queues = dev->kfd->device_info.max_no_of_hqd/2;
316fb4d8502Sjsg 
317fb4d8502Sjsg 	if (pdd->qpd.queue_count >= max_queues)
318fb4d8502Sjsg 		return -ENOSPC;
319fb4d8502Sjsg 
3201bb76ff1Sjsg 	if (q_data) {
3211bb76ff1Sjsg 		retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
3221bb76ff1Sjsg 		*qid = q_data->q_id;
3231bb76ff1Sjsg 	} else
324fb4d8502Sjsg 		retval = find_available_queue_slot(pqm, qid);
3251bb76ff1Sjsg 
326fb4d8502Sjsg 	if (retval != 0)
327fb4d8502Sjsg 		return retval;
328fb4d8502Sjsg 
329fb4d8502Sjsg 	if (list_empty(&pdd->qpd.queues_list) &&
330fb4d8502Sjsg 	    list_empty(&pdd->qpd.priv_queue_list))
331fb4d8502Sjsg 		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
332fb4d8502Sjsg 
333fb4d8502Sjsg 	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
334fb4d8502Sjsg 	if (!pqn) {
335fb4d8502Sjsg 		retval = -ENOMEM;
336fb4d8502Sjsg 		goto err_allocate_pqn;
337fb4d8502Sjsg 	}
338fb4d8502Sjsg 
339fb4d8502Sjsg 	switch (type) {
340fb4d8502Sjsg 	case KFD_QUEUE_TYPE_SDMA:
341c349dbc7Sjsg 	case KFD_QUEUE_TYPE_SDMA_XGMI:
342c349dbc7Sjsg 		/* SDMA queues are always allocated statically no matter
343c349dbc7Sjsg 		 * which scheduler mode is used. We also do not need to
344c349dbc7Sjsg 		 * check whether a SDMA queue can be allocated here, because
345c349dbc7Sjsg 		 * allocate_sdma_queue() in create_queue() has the
346c349dbc7Sjsg 		 * corresponding check logic.
347c349dbc7Sjsg 		 */
3481bb76ff1Sjsg 		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
349fb4d8502Sjsg 		if (retval != 0)
350fb4d8502Sjsg 			goto err_create_queue;
351fb4d8502Sjsg 		pqn->q = q;
352fb4d8502Sjsg 		pqn->kq = NULL;
3531bb76ff1Sjsg 		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
3541bb76ff1Sjsg 						    restore_mqd, restore_ctl_stack);
355fb4d8502Sjsg 		print_queue(q);
356fb4d8502Sjsg 		break;
357fb4d8502Sjsg 
358fb4d8502Sjsg 	case KFD_QUEUE_TYPE_COMPUTE:
359fb4d8502Sjsg 		/* check if there is over subscription */
360fb4d8502Sjsg 		if ((dev->dqm->sched_policy ==
361fb4d8502Sjsg 		     KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
362fb4d8502Sjsg 		((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
363c349dbc7Sjsg 		(dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
364fb4d8502Sjsg 			pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
365fb4d8502Sjsg 			retval = -EPERM;
366fb4d8502Sjsg 			goto err_create_queue;
367fb4d8502Sjsg 		}
368fb4d8502Sjsg 
3691bb76ff1Sjsg 		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
370fb4d8502Sjsg 		if (retval != 0)
371fb4d8502Sjsg 			goto err_create_queue;
372fb4d8502Sjsg 		pqn->q = q;
373fb4d8502Sjsg 		pqn->kq = NULL;
3741bb76ff1Sjsg 		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
3751bb76ff1Sjsg 						    restore_mqd, restore_ctl_stack);
376fb4d8502Sjsg 		print_queue(q);
377fb4d8502Sjsg 		break;
378fb4d8502Sjsg 	case KFD_QUEUE_TYPE_DIQ:
379fb4d8502Sjsg 		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
380fb4d8502Sjsg 		if (!kq) {
381fb4d8502Sjsg 			retval = -ENOMEM;
382fb4d8502Sjsg 			goto err_create_queue;
383fb4d8502Sjsg 		}
384fb4d8502Sjsg 		kq->queue->properties.queue_id = *qid;
385fb4d8502Sjsg 		pqn->kq = kq;
386fb4d8502Sjsg 		pqn->q = NULL;
387f005ef32Sjsg 		retval = kfd_process_drain_interrupts(pdd);
388f005ef32Sjsg 		if (retval)
389f005ef32Sjsg 			break;
390f005ef32Sjsg 
391fb4d8502Sjsg 		retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
392fb4d8502Sjsg 							kq, &pdd->qpd);
393fb4d8502Sjsg 		break;
394fb4d8502Sjsg 	default:
395fb4d8502Sjsg 		WARN(1, "Invalid queue type %d", type);
396fb4d8502Sjsg 		retval = -EINVAL;
397fb4d8502Sjsg 	}
398fb4d8502Sjsg 
399fb4d8502Sjsg 	if (retval != 0) {
400c349dbc7Sjsg 		pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
401fb4d8502Sjsg 			pqm->process->pasid, type, retval);
402fb4d8502Sjsg 		goto err_create_queue;
403fb4d8502Sjsg 	}
404fb4d8502Sjsg 
405f005ef32Sjsg 	if (q && p_doorbell_offset_in_process) {
406fb4d8502Sjsg 		/* Return the doorbell offset within the doorbell page
407fb4d8502Sjsg 		 * to the caller so it can be passed up to user mode
408fb4d8502Sjsg 		 * (in bytes).
409f005ef32Sjsg 		 * relative doorbell index = Absolute doorbell index -
410f005ef32Sjsg 		 * absolute index of first doorbell in the page.
411fb4d8502Sjsg 		 */
412f005ef32Sjsg 		uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
413f005ef32Sjsg 								       pdd->qpd.proc_doorbells,
414f005ef32Sjsg 								       0,
415f005ef32Sjsg 								       pdd->dev->kfd->device_info.doorbell_size);
416f005ef32Sjsg 
417f005ef32Sjsg 		*p_doorbell_offset_in_process = (q->properties.doorbell_off
418f005ef32Sjsg 						- first_db_index) * sizeof(uint32_t);
419f005ef32Sjsg 	}
420fb4d8502Sjsg 
421fb4d8502Sjsg 	pr_debug("PQM After DQM create queue\n");
422fb4d8502Sjsg 
423fb4d8502Sjsg 	list_add(&pqn->process_queue_list, &pqm->queues);
424fb4d8502Sjsg 
425fb4d8502Sjsg 	if (q) {
426fb4d8502Sjsg 		pr_debug("PQM done creating queue\n");
427c349dbc7Sjsg 		kfd_procfs_add_queue(q);
428fb4d8502Sjsg 		print_queue_properties(&q->properties);
429fb4d8502Sjsg 	}
430fb4d8502Sjsg 
431fb4d8502Sjsg 	return retval;
432fb4d8502Sjsg 
433fb4d8502Sjsg err_create_queue:
434c349dbc7Sjsg 	uninit_queue(q);
435c349dbc7Sjsg 	if (kq)
436c349dbc7Sjsg 		kernel_queue_uninit(kq, false);
437fb4d8502Sjsg 	kfree(pqn);
438fb4d8502Sjsg err_allocate_pqn:
439fb4d8502Sjsg 	/* check if queues list is empty unregister process from device */
440fb4d8502Sjsg 	clear_bit(*qid, pqm->queue_slot_bitmap);
441fb4d8502Sjsg 	if (list_empty(&pdd->qpd.queues_list) &&
442fb4d8502Sjsg 	    list_empty(&pdd->qpd.priv_queue_list))
443fb4d8502Sjsg 		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
444fb4d8502Sjsg 	return retval;
445fb4d8502Sjsg }
446fb4d8502Sjsg 
447fb4d8502Sjsg int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
448fb4d8502Sjsg {
449fb4d8502Sjsg 	struct process_queue_node *pqn;
450fb4d8502Sjsg 	struct kfd_process_device *pdd;
451fb4d8502Sjsg 	struct device_queue_manager *dqm;
452f005ef32Sjsg 	struct kfd_node *dev;
453fb4d8502Sjsg 	int retval;
454fb4d8502Sjsg 
455fb4d8502Sjsg 	dqm = NULL;
456fb4d8502Sjsg 
457fb4d8502Sjsg 	retval = 0;
458fb4d8502Sjsg 
459fb4d8502Sjsg 	pqn = get_queue_by_qid(pqm, qid);
460fb4d8502Sjsg 	if (!pqn) {
461fb4d8502Sjsg 		pr_err("Queue id does not match any known queue\n");
462fb4d8502Sjsg 		return -EINVAL;
463fb4d8502Sjsg 	}
464fb4d8502Sjsg 
465fb4d8502Sjsg 	dev = NULL;
466fb4d8502Sjsg 	if (pqn->kq)
467fb4d8502Sjsg 		dev = pqn->kq->dev;
468fb4d8502Sjsg 	if (pqn->q)
469fb4d8502Sjsg 		dev = pqn->q->device;
470fb4d8502Sjsg 	if (WARN_ON(!dev))
471fb4d8502Sjsg 		return -ENODEV;
472fb4d8502Sjsg 
473fb4d8502Sjsg 	pdd = kfd_get_process_device_data(dev, pqm->process);
474fb4d8502Sjsg 	if (!pdd) {
475fb4d8502Sjsg 		pr_err("Process device data doesn't exist\n");
476fb4d8502Sjsg 		return -1;
477fb4d8502Sjsg 	}
478fb4d8502Sjsg 
479fb4d8502Sjsg 	if (pqn->kq) {
480fb4d8502Sjsg 		/* destroy kernel queue (DIQ) */
481fb4d8502Sjsg 		dqm = pqn->kq->dev->dqm;
482fb4d8502Sjsg 		dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
483c349dbc7Sjsg 		kernel_queue_uninit(pqn->kq, false);
484fb4d8502Sjsg 	}
485fb4d8502Sjsg 
486fb4d8502Sjsg 	if (pqn->q) {
487c349dbc7Sjsg 		kfd_procfs_del_queue(pqn->q);
488fb4d8502Sjsg 		dqm = pqn->q->device->dqm;
489fb4d8502Sjsg 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
490fb4d8502Sjsg 		if (retval) {
491c349dbc7Sjsg 			pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
492fb4d8502Sjsg 				pqm->process->pasid,
493fb4d8502Sjsg 				pqn->q->properties.queue_id, retval);
494fb4d8502Sjsg 			if (retval != -ETIME)
495fb4d8502Sjsg 				goto err_destroy_queue;
496fb4d8502Sjsg 		}
497c349dbc7Sjsg 
4981aef8867Sjsg 		pqm_clean_queue_resource(pqm, pqn);
499fb4d8502Sjsg 		uninit_queue(pqn->q);
500fb4d8502Sjsg 	}
501fb4d8502Sjsg 
502fb4d8502Sjsg 	list_del(&pqn->process_queue_list);
503fb4d8502Sjsg 	kfree(pqn);
504fb4d8502Sjsg 	clear_bit(qid, pqm->queue_slot_bitmap);
505fb4d8502Sjsg 
506fb4d8502Sjsg 	if (list_empty(&pdd->qpd.queues_list) &&
507fb4d8502Sjsg 	    list_empty(&pdd->qpd.priv_queue_list))
508fb4d8502Sjsg 		dqm->ops.unregister_process(dqm, &pdd->qpd);
509fb4d8502Sjsg 
510fb4d8502Sjsg err_destroy_queue:
511fb4d8502Sjsg 	return retval;
512fb4d8502Sjsg }
513fb4d8502Sjsg 
5141bb76ff1Sjsg int pqm_update_queue_properties(struct process_queue_manager *pqm,
5151bb76ff1Sjsg 				unsigned int qid, struct queue_properties *p)
516fb4d8502Sjsg {
517fb4d8502Sjsg 	int retval;
518fb4d8502Sjsg 	struct process_queue_node *pqn;
519fb4d8502Sjsg 
520fb4d8502Sjsg 	pqn = get_queue_by_qid(pqm, qid);
521fb4d8502Sjsg 	if (!pqn) {
522fb4d8502Sjsg 		pr_debug("No queue %d exists for update operation\n", qid);
523fb4d8502Sjsg 		return -EFAULT;
524fb4d8502Sjsg 	}
525fb4d8502Sjsg 
526fb4d8502Sjsg 	pqn->q->properties.queue_address = p->queue_address;
527fb4d8502Sjsg 	pqn->q->properties.queue_size = p->queue_size;
528fb4d8502Sjsg 	pqn->q->properties.queue_percent = p->queue_percent;
529fb4d8502Sjsg 	pqn->q->properties.priority = p->priority;
530f005ef32Sjsg 	pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
531fb4d8502Sjsg 
532fb4d8502Sjsg 	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
5331bb76ff1Sjsg 							pqn->q, NULL);
534fb4d8502Sjsg 	if (retval != 0)
535fb4d8502Sjsg 		return retval;
536fb4d8502Sjsg 
537fb4d8502Sjsg 	return 0;
538fb4d8502Sjsg }
539fb4d8502Sjsg 
5401bb76ff1Sjsg int pqm_update_mqd(struct process_queue_manager *pqm,
5411bb76ff1Sjsg 				unsigned int qid, struct mqd_update_info *minfo)
542fb4d8502Sjsg {
543fb4d8502Sjsg 	int retval;
544fb4d8502Sjsg 	struct process_queue_node *pqn;
545fb4d8502Sjsg 
546fb4d8502Sjsg 	pqn = get_queue_by_qid(pqm, qid);
547fb4d8502Sjsg 	if (!pqn) {
548fb4d8502Sjsg 		pr_debug("No queue %d exists for update operation\n", qid);
549fb4d8502Sjsg 		return -EFAULT;
550fb4d8502Sjsg 	}
551fb4d8502Sjsg 
552f005ef32Sjsg 	/* CUs are masked for debugger requirements so deny user mask  */
553f005ef32Sjsg 	if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
554f005ef32Sjsg 		return -EBUSY;
555f005ef32Sjsg 
5561bb76ff1Sjsg 	/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
557f005ef32Sjsg 	if (minfo && minfo->cu_mask.ptr &&
5581bb76ff1Sjsg 			KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
5591bb76ff1Sjsg 		int i;
560fb4d8502Sjsg 
5611bb76ff1Sjsg 		for (i = 0; i < minfo->cu_mask.count; i += 2) {
5621bb76ff1Sjsg 			uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
5631bb76ff1Sjsg 
5641bb76ff1Sjsg 			if (cu_pair && cu_pair != 0x3) {
5651bb76ff1Sjsg 				pr_debug("CUs must be adjacent pairwise enabled.\n");
5661bb76ff1Sjsg 				return -EINVAL;
5671bb76ff1Sjsg 			}
5681bb76ff1Sjsg 		}
5691bb76ff1Sjsg 	}
570fb4d8502Sjsg 
571fb4d8502Sjsg 	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
5721bb76ff1Sjsg 							pqn->q, minfo);
573fb4d8502Sjsg 	if (retval != 0)
574fb4d8502Sjsg 		return retval;
575fb4d8502Sjsg 
576f005ef32Sjsg 	if (minfo && minfo->cu_mask.ptr)
577f005ef32Sjsg 		pqn->q->properties.is_user_cu_masked = true;
578f005ef32Sjsg 
579fb4d8502Sjsg 	return 0;
580fb4d8502Sjsg }
581fb4d8502Sjsg 
582fb4d8502Sjsg struct kernel_queue *pqm_get_kernel_queue(
583fb4d8502Sjsg 					struct process_queue_manager *pqm,
584fb4d8502Sjsg 					unsigned int qid)
585fb4d8502Sjsg {
586fb4d8502Sjsg 	struct process_queue_node *pqn;
587fb4d8502Sjsg 
588fb4d8502Sjsg 	pqn = get_queue_by_qid(pqm, qid);
589fb4d8502Sjsg 	if (pqn && pqn->kq)
590fb4d8502Sjsg 		return pqn->kq;
591fb4d8502Sjsg 
592fb4d8502Sjsg 	return NULL;
593fb4d8502Sjsg }
594fb4d8502Sjsg 
595ad8b1aafSjsg struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
596ad8b1aafSjsg 					unsigned int qid)
597ad8b1aafSjsg {
598ad8b1aafSjsg 	struct process_queue_node *pqn;
599ad8b1aafSjsg 
600ad8b1aafSjsg 	pqn = get_queue_by_qid(pqm, qid);
601ad8b1aafSjsg 	return pqn ? pqn->q : NULL;
602ad8b1aafSjsg }
603ad8b1aafSjsg 
604c349dbc7Sjsg int pqm_get_wave_state(struct process_queue_manager *pqm,
605c349dbc7Sjsg 		       unsigned int qid,
606c349dbc7Sjsg 		       void __user *ctl_stack,
607c349dbc7Sjsg 		       u32 *ctl_stack_used_size,
608c349dbc7Sjsg 		       u32 *save_area_used_size)
609c349dbc7Sjsg {
610c349dbc7Sjsg 	struct process_queue_node *pqn;
611c349dbc7Sjsg 
612c349dbc7Sjsg 	pqn = get_queue_by_qid(pqm, qid);
613c349dbc7Sjsg 	if (!pqn) {
614c349dbc7Sjsg 		pr_debug("amdkfd: No queue %d exists for operation\n",
615c349dbc7Sjsg 			 qid);
616c349dbc7Sjsg 		return -EFAULT;
617c349dbc7Sjsg 	}
618c349dbc7Sjsg 
619c349dbc7Sjsg 	return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
620c349dbc7Sjsg 						       pqn->q,
621c349dbc7Sjsg 						       ctl_stack,
622c349dbc7Sjsg 						       ctl_stack_used_size,
623c349dbc7Sjsg 						       save_area_used_size);
624c349dbc7Sjsg }
625c349dbc7Sjsg 
626f005ef32Sjsg int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
627f005ef32Sjsg 			   uint64_t exception_clear_mask,
628f005ef32Sjsg 			   void __user *buf,
629f005ef32Sjsg 			   int *num_qss_entries,
630f005ef32Sjsg 			   uint32_t *entry_size)
631f005ef32Sjsg {
632f005ef32Sjsg 	struct process_queue_node *pqn;
633f005ef32Sjsg 	struct kfd_queue_snapshot_entry src;
634f005ef32Sjsg 	uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
635f005ef32Sjsg 	int r = 0;
636f005ef32Sjsg 
637f005ef32Sjsg 	*num_qss_entries = 0;
638f005ef32Sjsg 	if (!(*entry_size))
639f005ef32Sjsg 		return -EINVAL;
640f005ef32Sjsg 
641f005ef32Sjsg 	*entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
642f005ef32Sjsg 	mutex_lock(&pqm->process->event_mutex);
643f005ef32Sjsg 
644f005ef32Sjsg 	memset(&src, 0, sizeof(src));
645f005ef32Sjsg 
646f005ef32Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
647f005ef32Sjsg 		if (!pqn->q)
648f005ef32Sjsg 			continue;
649f005ef32Sjsg 
650f005ef32Sjsg 		if (*num_qss_entries < tmp_qss_entries) {
651f005ef32Sjsg 			set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
652f005ef32Sjsg 
653f005ef32Sjsg 			if (copy_to_user(buf, &src, *entry_size)) {
654f005ef32Sjsg 				r = -EFAULT;
655f005ef32Sjsg 				break;
656f005ef32Sjsg 			}
657f005ef32Sjsg 			buf += tmp_entry_size;
658f005ef32Sjsg 		}
659f005ef32Sjsg 		*num_qss_entries += 1;
660f005ef32Sjsg 	}
661f005ef32Sjsg 
662f005ef32Sjsg 	mutex_unlock(&pqm->process->event_mutex);
663f005ef32Sjsg 	return r;
664f005ef32Sjsg }
665f005ef32Sjsg 
6661bb76ff1Sjsg static int get_queue_data_sizes(struct kfd_process_device *pdd,
6671bb76ff1Sjsg 				struct queue *q,
6681bb76ff1Sjsg 				uint32_t *mqd_size,
6691bb76ff1Sjsg 				uint32_t *ctl_stack_size)
6701bb76ff1Sjsg {
6711bb76ff1Sjsg 	int ret;
6721bb76ff1Sjsg 
6731bb76ff1Sjsg 	ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
6741bb76ff1Sjsg 					    q->properties.queue_id,
6751bb76ff1Sjsg 					    mqd_size,
6761bb76ff1Sjsg 					    ctl_stack_size);
6771bb76ff1Sjsg 	if (ret)
6781bb76ff1Sjsg 		pr_err("Failed to get queue dump info (%d)\n", ret);
6791bb76ff1Sjsg 
6801bb76ff1Sjsg 	return ret;
6811bb76ff1Sjsg }
6821bb76ff1Sjsg 
6831bb76ff1Sjsg int kfd_process_get_queue_info(struct kfd_process *p,
6841bb76ff1Sjsg 			       uint32_t *num_queues,
6851bb76ff1Sjsg 			       uint64_t *priv_data_sizes)
6861bb76ff1Sjsg {
6871bb76ff1Sjsg 	uint32_t extra_data_sizes = 0;
6881bb76ff1Sjsg 	struct queue *q;
6891bb76ff1Sjsg 	int i;
6901bb76ff1Sjsg 	int ret;
6911bb76ff1Sjsg 
6921bb76ff1Sjsg 	*num_queues = 0;
6931bb76ff1Sjsg 
6941bb76ff1Sjsg 	/* Run over all PDDs of the process */
6951bb76ff1Sjsg 	for (i = 0; i < p->n_pdds; i++) {
6961bb76ff1Sjsg 		struct kfd_process_device *pdd = p->pdds[i];
6971bb76ff1Sjsg 
6981bb76ff1Sjsg 		list_for_each_entry(q, &pdd->qpd.queues_list, list) {
6991bb76ff1Sjsg 			if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
7001bb76ff1Sjsg 				q->properties.type == KFD_QUEUE_TYPE_SDMA ||
7011bb76ff1Sjsg 				q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
7021bb76ff1Sjsg 				uint32_t mqd_size, ctl_stack_size;
7031bb76ff1Sjsg 
7041bb76ff1Sjsg 				*num_queues = *num_queues + 1;
7051bb76ff1Sjsg 
7061bb76ff1Sjsg 				ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
7071bb76ff1Sjsg 				if (ret)
7081bb76ff1Sjsg 					return ret;
7091bb76ff1Sjsg 
7101bb76ff1Sjsg 				extra_data_sizes += mqd_size + ctl_stack_size;
7111bb76ff1Sjsg 			} else {
7121bb76ff1Sjsg 				pr_err("Unsupported queue type (%d)\n", q->properties.type);
7131bb76ff1Sjsg 				return -EOPNOTSUPP;
7141bb76ff1Sjsg 			}
7151bb76ff1Sjsg 		}
7161bb76ff1Sjsg 	}
7171bb76ff1Sjsg 	*priv_data_sizes = extra_data_sizes +
7181bb76ff1Sjsg 				(*num_queues * sizeof(struct kfd_criu_queue_priv_data));
7191bb76ff1Sjsg 
7201bb76ff1Sjsg 	return 0;
7211bb76ff1Sjsg }
7221bb76ff1Sjsg 
7231bb76ff1Sjsg static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
7241bb76ff1Sjsg 			      unsigned int qid,
7251bb76ff1Sjsg 			      void *mqd,
7261bb76ff1Sjsg 			      void *ctl_stack)
7271bb76ff1Sjsg {
7281bb76ff1Sjsg 	struct process_queue_node *pqn;
7291bb76ff1Sjsg 
7301bb76ff1Sjsg 	pqn = get_queue_by_qid(pqm, qid);
7311bb76ff1Sjsg 	if (!pqn) {
7321bb76ff1Sjsg 		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
7331bb76ff1Sjsg 		return -EFAULT;
7341bb76ff1Sjsg 	}
7351bb76ff1Sjsg 
7361bb76ff1Sjsg 	if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
7371bb76ff1Sjsg 		pr_err("amdkfd: queue dumping not supported on this device\n");
7381bb76ff1Sjsg 		return -EOPNOTSUPP;
7391bb76ff1Sjsg 	}
7401bb76ff1Sjsg 
7411bb76ff1Sjsg 	return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
7421bb76ff1Sjsg 						       pqn->q, mqd, ctl_stack);
7431bb76ff1Sjsg }
7441bb76ff1Sjsg 
7451bb76ff1Sjsg static int criu_checkpoint_queue(struct kfd_process_device *pdd,
7461bb76ff1Sjsg 			   struct queue *q,
7471bb76ff1Sjsg 			   struct kfd_criu_queue_priv_data *q_data)
7481bb76ff1Sjsg {
7491bb76ff1Sjsg 	uint8_t *mqd, *ctl_stack;
7501bb76ff1Sjsg 	int ret;
7511bb76ff1Sjsg 
7521bb76ff1Sjsg 	mqd = (void *)(q_data + 1);
7531bb76ff1Sjsg 	ctl_stack = mqd + q_data->mqd_size;
7541bb76ff1Sjsg 
7551bb76ff1Sjsg 	q_data->gpu_id = pdd->user_gpu_id;
7561bb76ff1Sjsg 	q_data->type = q->properties.type;
7571bb76ff1Sjsg 	q_data->format = q->properties.format;
7581bb76ff1Sjsg 	q_data->q_id =  q->properties.queue_id;
7591bb76ff1Sjsg 	q_data->q_address = q->properties.queue_address;
7601bb76ff1Sjsg 	q_data->q_size = q->properties.queue_size;
7611bb76ff1Sjsg 	q_data->priority = q->properties.priority;
7621bb76ff1Sjsg 	q_data->q_percent = q->properties.queue_percent;
7631bb76ff1Sjsg 	q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
7641bb76ff1Sjsg 	q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
7651bb76ff1Sjsg 	q_data->doorbell_id = q->doorbell_id;
7661bb76ff1Sjsg 
7671bb76ff1Sjsg 	q_data->sdma_id = q->sdma_id;
7681bb76ff1Sjsg 
7691bb76ff1Sjsg 	q_data->eop_ring_buffer_address =
7701bb76ff1Sjsg 		q->properties.eop_ring_buffer_address;
7711bb76ff1Sjsg 
7721bb76ff1Sjsg 	q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
7731bb76ff1Sjsg 
7741bb76ff1Sjsg 	q_data->ctx_save_restore_area_address =
7751bb76ff1Sjsg 		q->properties.ctx_save_restore_area_address;
7761bb76ff1Sjsg 
7771bb76ff1Sjsg 	q_data->ctx_save_restore_area_size =
7781bb76ff1Sjsg 		q->properties.ctx_save_restore_area_size;
7791bb76ff1Sjsg 
7801bb76ff1Sjsg 	q_data->gws = !!q->gws;
7811bb76ff1Sjsg 
7821bb76ff1Sjsg 	ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
7831bb76ff1Sjsg 	if (ret) {
7841bb76ff1Sjsg 		pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
7851bb76ff1Sjsg 		return ret;
7861bb76ff1Sjsg 	}
7871bb76ff1Sjsg 
7881bb76ff1Sjsg 	pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
7891bb76ff1Sjsg 	return ret;
7901bb76ff1Sjsg }
7911bb76ff1Sjsg 
7921bb76ff1Sjsg static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
7931bb76ff1Sjsg 				   uint8_t __user *user_priv,
7941bb76ff1Sjsg 				   unsigned int *q_index,
7951bb76ff1Sjsg 				   uint64_t *queues_priv_data_offset)
7961bb76ff1Sjsg {
7971bb76ff1Sjsg 	unsigned int q_private_data_size = 0;
7981bb76ff1Sjsg 	uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
7991bb76ff1Sjsg 	struct queue *q;
8001bb76ff1Sjsg 	int ret = 0;
8011bb76ff1Sjsg 
8021bb76ff1Sjsg 	list_for_each_entry(q, &pdd->qpd.queues_list, list) {
8031bb76ff1Sjsg 		struct kfd_criu_queue_priv_data *q_data;
8041bb76ff1Sjsg 		uint64_t q_data_size;
8051bb76ff1Sjsg 		uint32_t mqd_size;
8061bb76ff1Sjsg 		uint32_t ctl_stack_size;
8071bb76ff1Sjsg 
8081bb76ff1Sjsg 		if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
8091bb76ff1Sjsg 			q->properties.type != KFD_QUEUE_TYPE_SDMA &&
8101bb76ff1Sjsg 			q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
8111bb76ff1Sjsg 
8121bb76ff1Sjsg 			pr_err("Unsupported queue type (%d)\n", q->properties.type);
8131bb76ff1Sjsg 			ret = -EOPNOTSUPP;
8141bb76ff1Sjsg 			break;
8151bb76ff1Sjsg 		}
8161bb76ff1Sjsg 
8171bb76ff1Sjsg 		ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
8181bb76ff1Sjsg 		if (ret)
8191bb76ff1Sjsg 			break;
8201bb76ff1Sjsg 
8211bb76ff1Sjsg 		q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
8221bb76ff1Sjsg 
8231bb76ff1Sjsg 		/* Increase local buffer space if needed */
8241bb76ff1Sjsg 		if (q_private_data_size < q_data_size) {
8251bb76ff1Sjsg 			kfree(q_private_data);
8261bb76ff1Sjsg 
8271bb76ff1Sjsg 			q_private_data = kzalloc(q_data_size, GFP_KERNEL);
8281bb76ff1Sjsg 			if (!q_private_data) {
8291bb76ff1Sjsg 				ret = -ENOMEM;
8301bb76ff1Sjsg 				break;
8311bb76ff1Sjsg 			}
8321bb76ff1Sjsg 			q_private_data_size = q_data_size;
8331bb76ff1Sjsg 		}
8341bb76ff1Sjsg 
8351bb76ff1Sjsg 		q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
8361bb76ff1Sjsg 
8371bb76ff1Sjsg 		/* data stored in this order: priv_data, mqd, ctl_stack */
8381bb76ff1Sjsg 		q_data->mqd_size = mqd_size;
8391bb76ff1Sjsg 		q_data->ctl_stack_size = ctl_stack_size;
8401bb76ff1Sjsg 
8411bb76ff1Sjsg 		ret = criu_checkpoint_queue(pdd, q, q_data);
8421bb76ff1Sjsg 		if (ret)
8431bb76ff1Sjsg 			break;
8441bb76ff1Sjsg 
8451bb76ff1Sjsg 		q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
8461bb76ff1Sjsg 
8471bb76ff1Sjsg 		ret = copy_to_user(user_priv + *queues_priv_data_offset,
8481bb76ff1Sjsg 				q_data, q_data_size);
8491bb76ff1Sjsg 		if (ret) {
8501bb76ff1Sjsg 			ret = -EFAULT;
8511bb76ff1Sjsg 			break;
8521bb76ff1Sjsg 		}
8531bb76ff1Sjsg 		*queues_priv_data_offset += q_data_size;
8541bb76ff1Sjsg 		*q_index = *q_index + 1;
8551bb76ff1Sjsg 	}
8561bb76ff1Sjsg 
8571bb76ff1Sjsg 	kfree(q_private_data);
8581bb76ff1Sjsg 
8591bb76ff1Sjsg 	return ret;
8601bb76ff1Sjsg }
8611bb76ff1Sjsg 
8621bb76ff1Sjsg int kfd_criu_checkpoint_queues(struct kfd_process *p,
8631bb76ff1Sjsg 			 uint8_t __user *user_priv_data,
8641bb76ff1Sjsg 			 uint64_t *priv_data_offset)
8651bb76ff1Sjsg {
8661bb76ff1Sjsg 	int ret = 0, pdd_index, q_index = 0;
8671bb76ff1Sjsg 
8681bb76ff1Sjsg 	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
8691bb76ff1Sjsg 		struct kfd_process_device *pdd = p->pdds[pdd_index];
8701bb76ff1Sjsg 
8711bb76ff1Sjsg 		/*
8721bb76ff1Sjsg 		 * criu_checkpoint_queues_device will copy data to user and update q_index and
8731bb76ff1Sjsg 		 * queues_priv_data_offset
8741bb76ff1Sjsg 		 */
8751bb76ff1Sjsg 		ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
8761bb76ff1Sjsg 					      priv_data_offset);
8771bb76ff1Sjsg 
8781bb76ff1Sjsg 		if (ret)
8791bb76ff1Sjsg 			break;
8801bb76ff1Sjsg 	}
8811bb76ff1Sjsg 
8821bb76ff1Sjsg 	return ret;
8831bb76ff1Sjsg }
8841bb76ff1Sjsg 
8851bb76ff1Sjsg static void set_queue_properties_from_criu(struct queue_properties *qp,
8861bb76ff1Sjsg 					  struct kfd_criu_queue_priv_data *q_data)
8871bb76ff1Sjsg {
8881bb76ff1Sjsg 	qp->is_interop = false;
8891bb76ff1Sjsg 	qp->queue_percent = q_data->q_percent;
8901bb76ff1Sjsg 	qp->priority = q_data->priority;
8911bb76ff1Sjsg 	qp->queue_address = q_data->q_address;
8921bb76ff1Sjsg 	qp->queue_size = q_data->q_size;
8931bb76ff1Sjsg 	qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
8941bb76ff1Sjsg 	qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
8951bb76ff1Sjsg 	qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
8961bb76ff1Sjsg 	qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
8971bb76ff1Sjsg 	qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
8981bb76ff1Sjsg 	qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
8991bb76ff1Sjsg 	qp->ctl_stack_size = q_data->ctl_stack_size;
9001bb76ff1Sjsg 	qp->type = q_data->type;
9011bb76ff1Sjsg 	qp->format = q_data->format;
9021bb76ff1Sjsg }
9031bb76ff1Sjsg 
9041bb76ff1Sjsg int kfd_criu_restore_queue(struct kfd_process *p,
9051bb76ff1Sjsg 			   uint8_t __user *user_priv_ptr,
9061bb76ff1Sjsg 			   uint64_t *priv_data_offset,
9071bb76ff1Sjsg 			   uint64_t max_priv_data_size)
9081bb76ff1Sjsg {
9091bb76ff1Sjsg 	uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
9101bb76ff1Sjsg 	struct kfd_criu_queue_priv_data *q_data;
9111bb76ff1Sjsg 	struct kfd_process_device *pdd;
9121bb76ff1Sjsg 	uint64_t q_extra_data_size;
9131bb76ff1Sjsg 	struct queue_properties qp;
9141bb76ff1Sjsg 	unsigned int queue_id;
9151bb76ff1Sjsg 	int ret = 0;
9161bb76ff1Sjsg 
9171bb76ff1Sjsg 	if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
9181bb76ff1Sjsg 		return -EINVAL;
9191bb76ff1Sjsg 
9201bb76ff1Sjsg 	q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
9211bb76ff1Sjsg 	if (!q_data)
9221bb76ff1Sjsg 		return -ENOMEM;
9231bb76ff1Sjsg 
9241bb76ff1Sjsg 	ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
9251bb76ff1Sjsg 	if (ret) {
9261bb76ff1Sjsg 		ret = -EFAULT;
9271bb76ff1Sjsg 		goto exit;
9281bb76ff1Sjsg 	}
9291bb76ff1Sjsg 
9301bb76ff1Sjsg 	*priv_data_offset += sizeof(*q_data);
9311bb76ff1Sjsg 	q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
9321bb76ff1Sjsg 
9331bb76ff1Sjsg 	if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
9341bb76ff1Sjsg 		ret = -EINVAL;
9351bb76ff1Sjsg 		goto exit;
9361bb76ff1Sjsg 	}
9371bb76ff1Sjsg 
9381bb76ff1Sjsg 	q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
9391bb76ff1Sjsg 	if (!q_extra_data) {
9401bb76ff1Sjsg 		ret = -ENOMEM;
9411bb76ff1Sjsg 		goto exit;
9421bb76ff1Sjsg 	}
9431bb76ff1Sjsg 
9441bb76ff1Sjsg 	ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
9451bb76ff1Sjsg 	if (ret) {
9461bb76ff1Sjsg 		ret = -EFAULT;
9471bb76ff1Sjsg 		goto exit;
9481bb76ff1Sjsg 	}
9491bb76ff1Sjsg 
9501bb76ff1Sjsg 	*priv_data_offset += q_extra_data_size;
9511bb76ff1Sjsg 
9521bb76ff1Sjsg 	pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
9531bb76ff1Sjsg 	if (!pdd) {
9541bb76ff1Sjsg 		pr_err("Failed to get pdd\n");
9551bb76ff1Sjsg 		ret = -EINVAL;
9561bb76ff1Sjsg 		goto exit;
9571bb76ff1Sjsg 	}
9581bb76ff1Sjsg 
9591bb76ff1Sjsg 	/* data stored in this order: mqd, ctl_stack */
9601bb76ff1Sjsg 	mqd = q_extra_data;
9611bb76ff1Sjsg 	ctl_stack = mqd + q_data->mqd_size;
9621bb76ff1Sjsg 
9631bb76ff1Sjsg 	memset(&qp, 0, sizeof(qp));
9641bb76ff1Sjsg 	set_queue_properties_from_criu(&qp, q_data);
9651bb76ff1Sjsg 
9661bb76ff1Sjsg 	print_queue_properties(&qp);
9671bb76ff1Sjsg 
9681bb76ff1Sjsg 	ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
9691bb76ff1Sjsg 				NULL);
9701bb76ff1Sjsg 	if (ret) {
9711bb76ff1Sjsg 		pr_err("Failed to create new queue err:%d\n", ret);
9721bb76ff1Sjsg 		goto exit;
9731bb76ff1Sjsg 	}
9741bb76ff1Sjsg 
9751bb76ff1Sjsg 	if (q_data->gws)
9761bb76ff1Sjsg 		ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
9771bb76ff1Sjsg 
9781bb76ff1Sjsg exit:
9791bb76ff1Sjsg 	if (ret)
9801bb76ff1Sjsg 		pr_err("Failed to restore queue (%d)\n", ret);
9811bb76ff1Sjsg 	else
9821bb76ff1Sjsg 		pr_debug("Queue id %d was restored successfully\n", queue_id);
9831bb76ff1Sjsg 
9841bb76ff1Sjsg 	kfree(q_data);
985*6d61e7caSjsg 	kfree(q_extra_data);
9861bb76ff1Sjsg 
9871bb76ff1Sjsg 	return ret;
9881bb76ff1Sjsg }
9891bb76ff1Sjsg 
9901bb76ff1Sjsg int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
9911bb76ff1Sjsg 				  unsigned int qid,
9921bb76ff1Sjsg 				  uint32_t *mqd_size,
9931bb76ff1Sjsg 				  uint32_t *ctl_stack_size)
9941bb76ff1Sjsg {
9951bb76ff1Sjsg 	struct process_queue_node *pqn;
9961bb76ff1Sjsg 
9971bb76ff1Sjsg 	pqn = get_queue_by_qid(pqm, qid);
9981bb76ff1Sjsg 	if (!pqn) {
9991bb76ff1Sjsg 		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
10001bb76ff1Sjsg 		return -EFAULT;
10011bb76ff1Sjsg 	}
10021bb76ff1Sjsg 
10031bb76ff1Sjsg 	if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
10041bb76ff1Sjsg 		pr_err("amdkfd: queue dumping not supported on this device\n");
10051bb76ff1Sjsg 		return -EOPNOTSUPP;
10061bb76ff1Sjsg 	}
10071bb76ff1Sjsg 
10081bb76ff1Sjsg 	pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
10091bb76ff1Sjsg 						       pqn->q, mqd_size,
10101bb76ff1Sjsg 						       ctl_stack_size);
10111bb76ff1Sjsg 	return 0;
10121bb76ff1Sjsg }
10131bb76ff1Sjsg 
1014fb4d8502Sjsg #if defined(CONFIG_DEBUG_FS)
1015fb4d8502Sjsg 
1016fb4d8502Sjsg int pqm_debugfs_mqds(struct seq_file *m, void *data)
1017fb4d8502Sjsg {
1018fb4d8502Sjsg 	struct process_queue_manager *pqm = data;
1019fb4d8502Sjsg 	struct process_queue_node *pqn;
1020fb4d8502Sjsg 	struct queue *q;
1021fb4d8502Sjsg 	enum KFD_MQD_TYPE mqd_type;
1022fb4d8502Sjsg 	struct mqd_manager *mqd_mgr;
1023f005ef32Sjsg 	int r = 0, xcc, num_xccs = 1;
1024f005ef32Sjsg 	void *mqd;
1025f005ef32Sjsg 	uint64_t size = 0;
1026fb4d8502Sjsg 
1027fb4d8502Sjsg 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1028fb4d8502Sjsg 		if (pqn->q) {
1029fb4d8502Sjsg 			q = pqn->q;
1030fb4d8502Sjsg 			switch (q->properties.type) {
1031fb4d8502Sjsg 			case KFD_QUEUE_TYPE_SDMA:
1032c349dbc7Sjsg 			case KFD_QUEUE_TYPE_SDMA_XGMI:
1033fb4d8502Sjsg 				seq_printf(m, "  SDMA queue on device %x\n",
1034fb4d8502Sjsg 					   q->device->id);
1035fb4d8502Sjsg 				mqd_type = KFD_MQD_TYPE_SDMA;
1036fb4d8502Sjsg 				break;
1037fb4d8502Sjsg 			case KFD_QUEUE_TYPE_COMPUTE:
1038fb4d8502Sjsg 				seq_printf(m, "  Compute queue on device %x\n",
1039fb4d8502Sjsg 					   q->device->id);
1040fb4d8502Sjsg 				mqd_type = KFD_MQD_TYPE_CP;
1041f005ef32Sjsg 				num_xccs = NUM_XCC(q->device->xcc_mask);
1042fb4d8502Sjsg 				break;
1043fb4d8502Sjsg 			default:
1044fb4d8502Sjsg 				seq_printf(m,
1045fb4d8502Sjsg 				"  Bad user queue type %d on device %x\n",
1046fb4d8502Sjsg 					   q->properties.type, q->device->id);
1047fb4d8502Sjsg 				continue;
1048fb4d8502Sjsg 			}
1049c349dbc7Sjsg 			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1050f005ef32Sjsg 			size = mqd_mgr->mqd_stride(mqd_mgr,
1051f005ef32Sjsg 							&q->properties);
1052fb4d8502Sjsg 		} else if (pqn->kq) {
1053fb4d8502Sjsg 			q = pqn->kq->queue;
1054fb4d8502Sjsg 			mqd_mgr = pqn->kq->mqd_mgr;
1055fb4d8502Sjsg 			switch (q->properties.type) {
1056fb4d8502Sjsg 			case KFD_QUEUE_TYPE_DIQ:
1057fb4d8502Sjsg 				seq_printf(m, "  DIQ on device %x\n",
1058fb4d8502Sjsg 					   pqn->kq->dev->id);
1059fb4d8502Sjsg 				break;
1060fb4d8502Sjsg 			default:
1061fb4d8502Sjsg 				seq_printf(m,
1062fb4d8502Sjsg 				"  Bad kernel queue type %d on device %x\n",
1063fb4d8502Sjsg 					   q->properties.type,
1064fb4d8502Sjsg 					   pqn->kq->dev->id);
1065fb4d8502Sjsg 				continue;
1066fb4d8502Sjsg 			}
1067fb4d8502Sjsg 		} else {
1068fb4d8502Sjsg 			seq_printf(m,
1069fb4d8502Sjsg 		"  Weird: Queue node with neither kernel nor user queue\n");
1070fb4d8502Sjsg 			continue;
1071fb4d8502Sjsg 		}
1072fb4d8502Sjsg 
1073f005ef32Sjsg 		for (xcc = 0; xcc < num_xccs; xcc++) {
1074f005ef32Sjsg 			mqd = q->mqd + size * xcc;
1075f005ef32Sjsg 			r = mqd_mgr->debugfs_show_mqd(m, mqd);
1076fb4d8502Sjsg 			if (r != 0)
1077fb4d8502Sjsg 				break;
1078fb4d8502Sjsg 		}
1079f005ef32Sjsg 	}
1080fb4d8502Sjsg 
1081fb4d8502Sjsg 	return r;
1082fb4d8502Sjsg }
1083fb4d8502Sjsg 
1084fb4d8502Sjsg #endif
1085