xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.c (revision 6d0c4409fad6e08e491c73fa7789cd502266526a)
11bb76ff1Sjsg /*
21bb76ff1Sjsg  * Copyright 2019 Advanced Micro Devices, Inc.
31bb76ff1Sjsg  *
41bb76ff1Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
51bb76ff1Sjsg  * copy of this software and associated documentation files (the "Software"),
61bb76ff1Sjsg  * to deal in the Software without restriction, including without limitation
71bb76ff1Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81bb76ff1Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
91bb76ff1Sjsg  * Software is furnished to do so, subject to the following conditions:
101bb76ff1Sjsg  *
111bb76ff1Sjsg  * The above copyright notice and this permission notice shall be included in
121bb76ff1Sjsg  * all copies or substantial portions of the Software.
131bb76ff1Sjsg  *
141bb76ff1Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
151bb76ff1Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
161bb76ff1Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
171bb76ff1Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
181bb76ff1Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
191bb76ff1Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
201bb76ff1Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
211bb76ff1Sjsg  *
221bb76ff1Sjsg  */
231bb76ff1Sjsg 
243c4b8cfaSjsg #include <linux/firmware.h>
25f005ef32Sjsg #include <drm/drm_exec.h>
263c4b8cfaSjsg 
271bb76ff1Sjsg #include "amdgpu_mes.h"
281bb76ff1Sjsg #include "amdgpu.h"
291bb76ff1Sjsg #include "soc15_common.h"
301bb76ff1Sjsg #include "amdgpu_mes_ctx.h"
311bb76ff1Sjsg 
321bb76ff1Sjsg #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
331bb76ff1Sjsg #define AMDGPU_ONE_DOORBELL_SIZE 8
341bb76ff1Sjsg 
351bb76ff1Sjsg int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
361bb76ff1Sjsg {
371bb76ff1Sjsg 	return roundup(AMDGPU_ONE_DOORBELL_SIZE *
381bb76ff1Sjsg 		       AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,
391bb76ff1Sjsg 		       PAGE_SIZE);
401bb76ff1Sjsg }
411bb76ff1Sjsg 
42f005ef32Sjsg static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
431bb76ff1Sjsg 					 struct amdgpu_mes_process *process,
441bb76ff1Sjsg 					 int ip_type, uint64_t *doorbell_index)
451bb76ff1Sjsg {
461bb76ff1Sjsg 	unsigned int offset, found;
47f005ef32Sjsg 	struct amdgpu_mes *mes = &adev->mes;
481bb76ff1Sjsg 
49f005ef32Sjsg 	if (ip_type == AMDGPU_RING_TYPE_SDMA)
501bb76ff1Sjsg 		offset = adev->doorbell_index.sdma_engine[0];
51f005ef32Sjsg 	else
52f005ef32Sjsg 		offset = 0;
531bb76ff1Sjsg 
54f005ef32Sjsg 	found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset);
55f005ef32Sjsg 	if (found >= mes->num_mes_dbs) {
561bb76ff1Sjsg 		DRM_WARN("No doorbell available\n");
571bb76ff1Sjsg 		return -ENOSPC;
581bb76ff1Sjsg 	}
591bb76ff1Sjsg 
60f005ef32Sjsg 	set_bit(found, mes->doorbell_bitmap);
611bb76ff1Sjsg 
62f005ef32Sjsg 	/* Get the absolute doorbell index on BAR */
63f005ef32Sjsg 	*doorbell_index = mes->db_start_dw_offset + found * 2;
641bb76ff1Sjsg 	return 0;
651bb76ff1Sjsg }
661bb76ff1Sjsg 
67f005ef32Sjsg static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
681bb76ff1Sjsg 					   struct amdgpu_mes_process *process,
691bb76ff1Sjsg 					   uint32_t doorbell_index)
701bb76ff1Sjsg {
71f005ef32Sjsg 	unsigned int old, rel_index;
72f005ef32Sjsg 	struct amdgpu_mes *mes = &adev->mes;
731bb76ff1Sjsg 
74f005ef32Sjsg 	/* Find the relative index of the doorbell in this object */
75f005ef32Sjsg 	rel_index = (doorbell_index - mes->db_start_dw_offset) / 2;
76f005ef32Sjsg 	old = test_and_clear_bit(rel_index, mes->doorbell_bitmap);
771bb76ff1Sjsg 	WARN_ON(!old);
781bb76ff1Sjsg }
791bb76ff1Sjsg 
801bb76ff1Sjsg static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
811bb76ff1Sjsg {
821bb76ff1Sjsg 	int i;
83f005ef32Sjsg 	struct amdgpu_mes *mes = &adev->mes;
841bb76ff1Sjsg 
85f005ef32Sjsg 	/* Bitmap for dynamic allocation of kernel doorbells */
86f005ef32Sjsg 	mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL);
87f005ef32Sjsg 	if (!mes->doorbell_bitmap) {
88f005ef32Sjsg 		DRM_ERROR("Failed to allocate MES doorbell bitmap\n");
89f005ef32Sjsg 		return -ENOMEM;
90f005ef32Sjsg 	}
911bb76ff1Sjsg 
92f005ef32Sjsg 	mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE;
93f005ef32Sjsg 	for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) {
94f005ef32Sjsg 		adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2;
95f005ef32Sjsg 		set_bit(i, mes->doorbell_bitmap);
96f005ef32Sjsg 	}
971bb76ff1Sjsg 
981bb76ff1Sjsg 	return 0;
991bb76ff1Sjsg }
1001bb76ff1Sjsg 
101f005ef32Sjsg static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
102f005ef32Sjsg {
103f005ef32Sjsg 	bitmap_free(adev->mes.doorbell_bitmap);
104f005ef32Sjsg }
105f005ef32Sjsg 
1061bb76ff1Sjsg int amdgpu_mes_init(struct amdgpu_device *adev)
1071bb76ff1Sjsg {
1081bb76ff1Sjsg 	int i, r;
1091bb76ff1Sjsg 
1101bb76ff1Sjsg 	adev->mes.adev = adev;
1111bb76ff1Sjsg 
1121bb76ff1Sjsg 	idr_init(&adev->mes.pasid_idr);
1131bb76ff1Sjsg 	idr_init(&adev->mes.gang_id_idr);
1141bb76ff1Sjsg 	idr_init(&adev->mes.queue_id_idr);
1151bb76ff1Sjsg 	ida_init(&adev->mes.doorbell_ida);
1161bb76ff1Sjsg 	mtx_init(&adev->mes.queue_id_lock, IPL_TTY);
1171bb76ff1Sjsg 	mtx_init(&adev->mes.ring_lock, IPL_TTY);
1181bb76ff1Sjsg 	rw_init(&adev->mes.mutex_hidden, "agmes");
1191bb76ff1Sjsg 
1201bb76ff1Sjsg 	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1211bb76ff1Sjsg 	adev->mes.vmid_mask_mmhub = 0xffffff00;
1221bb76ff1Sjsg 	adev->mes.vmid_mask_gfxhub = 0xffffff00;
1231bb76ff1Sjsg 
1241bb76ff1Sjsg 	for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
1251bb76ff1Sjsg 		/* use only 1st MEC pipes */
1261bb76ff1Sjsg 		if (i >= 4)
1271bb76ff1Sjsg 			continue;
1281bb76ff1Sjsg 		adev->mes.compute_hqd_mask[i] = 0xc;
1291bb76ff1Sjsg 	}
1301bb76ff1Sjsg 
1311bb76ff1Sjsg 	for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++)
1321bb76ff1Sjsg 		adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
1331bb76ff1Sjsg 
1341bb76ff1Sjsg 	for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
1351bb76ff1Sjsg 		if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0))
1361bb76ff1Sjsg 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
1371bb76ff1Sjsg 		/* zero sdma_hqd_mask for non-existent engine */
1381bb76ff1Sjsg 		else if (adev->sdma.num_instances == 1)
1391bb76ff1Sjsg 			adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
1401bb76ff1Sjsg 		else
1411bb76ff1Sjsg 			adev->mes.sdma_hqd_mask[i] = 0xfc;
1421bb76ff1Sjsg 	}
1431bb76ff1Sjsg 
1441bb76ff1Sjsg 	r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
1451bb76ff1Sjsg 	if (r) {
1461bb76ff1Sjsg 		dev_err(adev->dev,
1471bb76ff1Sjsg 			"(%d) ring trail_fence_offs wb alloc failed\n", r);
1481bb76ff1Sjsg 		goto error_ids;
1491bb76ff1Sjsg 	}
1501bb76ff1Sjsg 	adev->mes.sch_ctx_gpu_addr =
1511bb76ff1Sjsg 		adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4);
1521bb76ff1Sjsg 	adev->mes.sch_ctx_ptr =
1531bb76ff1Sjsg 		(uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs];
1541bb76ff1Sjsg 
1551bb76ff1Sjsg 	r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
1561bb76ff1Sjsg 	if (r) {
1571bb76ff1Sjsg 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1581bb76ff1Sjsg 		dev_err(adev->dev,
1591bb76ff1Sjsg 			"(%d) query_status_fence_offs wb alloc failed\n", r);
1601bb76ff1Sjsg 		goto error_ids;
1611bb76ff1Sjsg 	}
1621bb76ff1Sjsg 	adev->mes.query_status_fence_gpu_addr =
1631bb76ff1Sjsg 		adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
1641bb76ff1Sjsg 	adev->mes.query_status_fence_ptr =
1651bb76ff1Sjsg 		(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
1661bb76ff1Sjsg 
1671bb76ff1Sjsg 	r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
1681bb76ff1Sjsg 	if (r) {
1691bb76ff1Sjsg 		amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1701bb76ff1Sjsg 		amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
1711bb76ff1Sjsg 		dev_err(adev->dev,
1721bb76ff1Sjsg 			"(%d) read_val_offs alloc failed\n", r);
1731bb76ff1Sjsg 		goto error_ids;
1741bb76ff1Sjsg 	}
1751bb76ff1Sjsg 	adev->mes.read_val_gpu_addr =
1761bb76ff1Sjsg 		adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
1771bb76ff1Sjsg 	adev->mes.read_val_ptr =
1781bb76ff1Sjsg 		(uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
1791bb76ff1Sjsg 
1801bb76ff1Sjsg 	r = amdgpu_mes_doorbell_init(adev);
1811bb76ff1Sjsg 	if (r)
1821bb76ff1Sjsg 		goto error;
1831bb76ff1Sjsg 
1841bb76ff1Sjsg 	return 0;
1851bb76ff1Sjsg 
1861bb76ff1Sjsg error:
1871bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
1881bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
1891bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
1901bb76ff1Sjsg error_ids:
1911bb76ff1Sjsg 	idr_destroy(&adev->mes.pasid_idr);
1921bb76ff1Sjsg 	idr_destroy(&adev->mes.gang_id_idr);
1931bb76ff1Sjsg 	idr_destroy(&adev->mes.queue_id_idr);
1941bb76ff1Sjsg 	ida_destroy(&adev->mes.doorbell_ida);
1951bb76ff1Sjsg 	mutex_destroy(&adev->mes.mutex_hidden);
1961bb76ff1Sjsg 	return r;
1971bb76ff1Sjsg }
1981bb76ff1Sjsg 
1991bb76ff1Sjsg void amdgpu_mes_fini(struct amdgpu_device *adev)
2001bb76ff1Sjsg {
2011bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
2021bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
2031bb76ff1Sjsg 	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
204f005ef32Sjsg 	amdgpu_mes_doorbell_free(adev);
2051bb76ff1Sjsg 
2061bb76ff1Sjsg 	idr_destroy(&adev->mes.pasid_idr);
2071bb76ff1Sjsg 	idr_destroy(&adev->mes.gang_id_idr);
2081bb76ff1Sjsg 	idr_destroy(&adev->mes.queue_id_idr);
2091bb76ff1Sjsg 	ida_destroy(&adev->mes.doorbell_ida);
2101bb76ff1Sjsg 	mutex_destroy(&adev->mes.mutex_hidden);
2111bb76ff1Sjsg }
2121bb76ff1Sjsg 
2131bb76ff1Sjsg static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
2141bb76ff1Sjsg {
2151bb76ff1Sjsg 	amdgpu_bo_free_kernel(&q->mqd_obj,
2161bb76ff1Sjsg 			      &q->mqd_gpu_addr,
2171bb76ff1Sjsg 			      &q->mqd_cpu_ptr);
2181bb76ff1Sjsg }
2191bb76ff1Sjsg 
2201bb76ff1Sjsg int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
2211bb76ff1Sjsg 			      struct amdgpu_vm *vm)
2221bb76ff1Sjsg {
2231bb76ff1Sjsg 	struct amdgpu_mes_process *process;
2241bb76ff1Sjsg 	int r;
2251bb76ff1Sjsg 
2261bb76ff1Sjsg 	/* allocate the mes process buffer */
2271bb76ff1Sjsg 	process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL);
2281bb76ff1Sjsg 	if (!process) {
2291bb76ff1Sjsg 		DRM_ERROR("no more memory to create mes process\n");
2301bb76ff1Sjsg 		return -ENOMEM;
2311bb76ff1Sjsg 	}
2321bb76ff1Sjsg 
2331bb76ff1Sjsg 	/* allocate the process context bo and map it */
2341bb76ff1Sjsg 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE,
2351bb76ff1Sjsg 				    AMDGPU_GEM_DOMAIN_GTT,
2361bb76ff1Sjsg 				    &process->proc_ctx_bo,
2371bb76ff1Sjsg 				    &process->proc_ctx_gpu_addr,
2381bb76ff1Sjsg 				    &process->proc_ctx_cpu_ptr);
2391bb76ff1Sjsg 	if (r) {
2401bb76ff1Sjsg 		DRM_ERROR("failed to allocate process context bo\n");
2411bb76ff1Sjsg 		goto clean_up_memory;
2421bb76ff1Sjsg 	}
2431bb76ff1Sjsg 	memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
2441bb76ff1Sjsg 
2451bb76ff1Sjsg 	/*
2461bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
2471bb76ff1Sjsg 	 * lock dependencies.
2481bb76ff1Sjsg 	 */
2491bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
2501bb76ff1Sjsg 
2511bb76ff1Sjsg 	/* add the mes process to idr list */
2521bb76ff1Sjsg 	r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1,
2531bb76ff1Sjsg 		      GFP_KERNEL);
2541bb76ff1Sjsg 	if (r < 0) {
2551bb76ff1Sjsg 		DRM_ERROR("failed to lock pasid=%d\n", pasid);
2561bb76ff1Sjsg 		goto clean_up_ctx;
2571bb76ff1Sjsg 	}
2581bb76ff1Sjsg 
2591bb76ff1Sjsg 	INIT_LIST_HEAD(&process->gang_list);
2601bb76ff1Sjsg 	process->vm = vm;
2611bb76ff1Sjsg 	process->pasid = pasid;
2621bb76ff1Sjsg 	process->process_quantum = adev->mes.default_process_quantum;
2631bb76ff1Sjsg 	process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo);
2641bb76ff1Sjsg 
2651bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
2661bb76ff1Sjsg 	return 0;
2671bb76ff1Sjsg 
2681bb76ff1Sjsg clean_up_ctx:
269f005ef32Sjsg 	amdgpu_mes_unlock(&adev->mes);
2701bb76ff1Sjsg 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
2711bb76ff1Sjsg 			      &process->proc_ctx_gpu_addr,
2721bb76ff1Sjsg 			      &process->proc_ctx_cpu_ptr);
2731bb76ff1Sjsg clean_up_memory:
2741bb76ff1Sjsg 	kfree(process);
2751bb76ff1Sjsg 	return r;
2761bb76ff1Sjsg }
2771bb76ff1Sjsg 
2781bb76ff1Sjsg void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid)
2791bb76ff1Sjsg {
2801bb76ff1Sjsg 	struct amdgpu_mes_process *process;
2811bb76ff1Sjsg 	struct amdgpu_mes_gang *gang, *tmp1;
2821bb76ff1Sjsg 	struct amdgpu_mes_queue *queue, *tmp2;
2831bb76ff1Sjsg 	struct mes_remove_queue_input queue_input;
2841bb76ff1Sjsg 	unsigned long flags;
2851bb76ff1Sjsg 	int r;
2861bb76ff1Sjsg 
2871bb76ff1Sjsg 	/*
2881bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
2891bb76ff1Sjsg 	 * lock dependencies.
2901bb76ff1Sjsg 	 */
2911bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
2921bb76ff1Sjsg 
2931bb76ff1Sjsg 	process = idr_find(&adev->mes.pasid_idr, pasid);
2941bb76ff1Sjsg 	if (!process) {
2951bb76ff1Sjsg 		DRM_WARN("pasid %d doesn't exist\n", pasid);
2961bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
2971bb76ff1Sjsg 		return;
2981bb76ff1Sjsg 	}
2991bb76ff1Sjsg 
3001bb76ff1Sjsg 	/* Remove all queues from hardware */
3011bb76ff1Sjsg 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
3021bb76ff1Sjsg 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
3031bb76ff1Sjsg 			spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
3041bb76ff1Sjsg 			idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
3051bb76ff1Sjsg 			spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
3061bb76ff1Sjsg 
3071bb76ff1Sjsg 			queue_input.doorbell_offset = queue->doorbell_off;
3081bb76ff1Sjsg 			queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
3091bb76ff1Sjsg 
3101bb76ff1Sjsg 			r = adev->mes.funcs->remove_hw_queue(&adev->mes,
3111bb76ff1Sjsg 							     &queue_input);
3121bb76ff1Sjsg 			if (r)
3131bb76ff1Sjsg 				DRM_WARN("failed to remove hardware queue\n");
3141bb76ff1Sjsg 		}
3151bb76ff1Sjsg 
3161bb76ff1Sjsg 		idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
3171bb76ff1Sjsg 	}
3181bb76ff1Sjsg 
3191bb76ff1Sjsg 	idr_remove(&adev->mes.pasid_idr, pasid);
3201bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
3211bb76ff1Sjsg 
3221bb76ff1Sjsg 	/* free all memory allocated by the process */
3231bb76ff1Sjsg 	list_for_each_entry_safe(gang, tmp1, &process->gang_list, list) {
3241bb76ff1Sjsg 		/* free all queues in the gang */
3251bb76ff1Sjsg 		list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list) {
3261bb76ff1Sjsg 			amdgpu_mes_queue_free_mqd(queue);
3271bb76ff1Sjsg 			list_del(&queue->list);
3281bb76ff1Sjsg 			kfree(queue);
3291bb76ff1Sjsg 		}
3301bb76ff1Sjsg 		amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
3311bb76ff1Sjsg 				      &gang->gang_ctx_gpu_addr,
3321bb76ff1Sjsg 				      &gang->gang_ctx_cpu_ptr);
3331bb76ff1Sjsg 		list_del(&gang->list);
3341bb76ff1Sjsg 		kfree(gang);
3351bb76ff1Sjsg 
3361bb76ff1Sjsg 	}
3371bb76ff1Sjsg 	amdgpu_bo_free_kernel(&process->proc_ctx_bo,
3381bb76ff1Sjsg 			      &process->proc_ctx_gpu_addr,
3391bb76ff1Sjsg 			      &process->proc_ctx_cpu_ptr);
3401bb76ff1Sjsg 	kfree(process);
3411bb76ff1Sjsg }
3421bb76ff1Sjsg 
3431bb76ff1Sjsg int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
3441bb76ff1Sjsg 			struct amdgpu_mes_gang_properties *gprops,
3451bb76ff1Sjsg 			int *gang_id)
3461bb76ff1Sjsg {
3471bb76ff1Sjsg 	struct amdgpu_mes_process *process;
3481bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
3491bb76ff1Sjsg 	int r;
3501bb76ff1Sjsg 
3511bb76ff1Sjsg 	/* allocate the mes gang buffer */
3521bb76ff1Sjsg 	gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL);
3531bb76ff1Sjsg 	if (!gang) {
3541bb76ff1Sjsg 		return -ENOMEM;
3551bb76ff1Sjsg 	}
3561bb76ff1Sjsg 
3571bb76ff1Sjsg 	/* allocate the gang context bo and map it to cpu space */
3581bb76ff1Sjsg 	r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE, PAGE_SIZE,
3591bb76ff1Sjsg 				    AMDGPU_GEM_DOMAIN_GTT,
3601bb76ff1Sjsg 				    &gang->gang_ctx_bo,
3611bb76ff1Sjsg 				    &gang->gang_ctx_gpu_addr,
3621bb76ff1Sjsg 				    &gang->gang_ctx_cpu_ptr);
3631bb76ff1Sjsg 	if (r) {
3641bb76ff1Sjsg 		DRM_ERROR("failed to allocate process context bo\n");
3651bb76ff1Sjsg 		goto clean_up_mem;
3661bb76ff1Sjsg 	}
3671bb76ff1Sjsg 	memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
3681bb76ff1Sjsg 
3691bb76ff1Sjsg 	/*
3701bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
3711bb76ff1Sjsg 	 * lock dependencies.
3721bb76ff1Sjsg 	 */
3731bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
3741bb76ff1Sjsg 
3751bb76ff1Sjsg 	process = idr_find(&adev->mes.pasid_idr, pasid);
3761bb76ff1Sjsg 	if (!process) {
3771bb76ff1Sjsg 		DRM_ERROR("pasid %d doesn't exist\n", pasid);
3781bb76ff1Sjsg 		r = -EINVAL;
3791bb76ff1Sjsg 		goto clean_up_ctx;
3801bb76ff1Sjsg 	}
3811bb76ff1Sjsg 
3821bb76ff1Sjsg 	/* add the mes gang to idr list */
3831bb76ff1Sjsg 	r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0,
3841bb76ff1Sjsg 		      GFP_KERNEL);
3851bb76ff1Sjsg 	if (r < 0) {
3861bb76ff1Sjsg 		DRM_ERROR("failed to allocate idr for gang\n");
3871bb76ff1Sjsg 		goto clean_up_ctx;
3881bb76ff1Sjsg 	}
3891bb76ff1Sjsg 
3901bb76ff1Sjsg 	gang->gang_id = r;
3911bb76ff1Sjsg 	*gang_id = r;
3921bb76ff1Sjsg 
3931bb76ff1Sjsg 	INIT_LIST_HEAD(&gang->queue_list);
3941bb76ff1Sjsg 	gang->process = process;
3951bb76ff1Sjsg 	gang->priority = gprops->priority;
3961bb76ff1Sjsg 	gang->gang_quantum = gprops->gang_quantum ?
3971bb76ff1Sjsg 		gprops->gang_quantum : adev->mes.default_gang_quantum;
3981bb76ff1Sjsg 	gang->global_priority_level = gprops->global_priority_level;
3991bb76ff1Sjsg 	gang->inprocess_gang_priority = gprops->inprocess_gang_priority;
4001bb76ff1Sjsg 	list_add_tail(&gang->list, &process->gang_list);
4011bb76ff1Sjsg 
4021bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
4031bb76ff1Sjsg 	return 0;
4041bb76ff1Sjsg 
4051bb76ff1Sjsg clean_up_ctx:
4061bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
4071bb76ff1Sjsg 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
4081bb76ff1Sjsg 			      &gang->gang_ctx_gpu_addr,
4091bb76ff1Sjsg 			      &gang->gang_ctx_cpu_ptr);
4101bb76ff1Sjsg clean_up_mem:
4111bb76ff1Sjsg 	kfree(gang);
4121bb76ff1Sjsg 	return r;
4131bb76ff1Sjsg }
4141bb76ff1Sjsg 
4151bb76ff1Sjsg int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id)
4161bb76ff1Sjsg {
4171bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
4181bb76ff1Sjsg 
4191bb76ff1Sjsg 	/*
4201bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
4211bb76ff1Sjsg 	 * lock dependencies.
4221bb76ff1Sjsg 	 */
4231bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
4241bb76ff1Sjsg 
4251bb76ff1Sjsg 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
4261bb76ff1Sjsg 	if (!gang) {
4271bb76ff1Sjsg 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
4281bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
4291bb76ff1Sjsg 		return -EINVAL;
4301bb76ff1Sjsg 	}
4311bb76ff1Sjsg 
4321bb76ff1Sjsg 	if (!list_empty(&gang->queue_list)) {
4331bb76ff1Sjsg 		DRM_ERROR("queue list is not empty\n");
4341bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
4351bb76ff1Sjsg 		return -EBUSY;
4361bb76ff1Sjsg 	}
4371bb76ff1Sjsg 
4381bb76ff1Sjsg 	idr_remove(&adev->mes.gang_id_idr, gang->gang_id);
4391bb76ff1Sjsg 	list_del(&gang->list);
4401bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
4411bb76ff1Sjsg 
4421bb76ff1Sjsg 	amdgpu_bo_free_kernel(&gang->gang_ctx_bo,
4431bb76ff1Sjsg 			      &gang->gang_ctx_gpu_addr,
4441bb76ff1Sjsg 			      &gang->gang_ctx_cpu_ptr);
4451bb76ff1Sjsg 
4461bb76ff1Sjsg 	kfree(gang);
4471bb76ff1Sjsg 
4481bb76ff1Sjsg 	return 0;
4491bb76ff1Sjsg }
4501bb76ff1Sjsg 
4511bb76ff1Sjsg int amdgpu_mes_suspend(struct amdgpu_device *adev)
4521bb76ff1Sjsg {
4531bb76ff1Sjsg 	struct idr *idp;
4541bb76ff1Sjsg 	struct amdgpu_mes_process *process;
4551bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
4561bb76ff1Sjsg 	struct mes_suspend_gang_input input;
4571bb76ff1Sjsg 	int r, pasid;
4581bb76ff1Sjsg 
4591bb76ff1Sjsg 	/*
4601bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
4611bb76ff1Sjsg 	 * lock dependencies.
4621bb76ff1Sjsg 	 */
4631bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
4641bb76ff1Sjsg 
4651bb76ff1Sjsg 	idp = &adev->mes.pasid_idr;
4661bb76ff1Sjsg 
4671bb76ff1Sjsg 	idr_for_each_entry(idp, process, pasid) {
4681bb76ff1Sjsg 		list_for_each_entry(gang, &process->gang_list, list) {
4691bb76ff1Sjsg 			r = adev->mes.funcs->suspend_gang(&adev->mes, &input);
4701bb76ff1Sjsg 			if (r)
4711bb76ff1Sjsg 				DRM_ERROR("failed to suspend pasid %d gangid %d",
4721bb76ff1Sjsg 					 pasid, gang->gang_id);
4731bb76ff1Sjsg 		}
4741bb76ff1Sjsg 	}
4751bb76ff1Sjsg 
4761bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
4771bb76ff1Sjsg 	return 0;
4781bb76ff1Sjsg }
4791bb76ff1Sjsg 
4801bb76ff1Sjsg int amdgpu_mes_resume(struct amdgpu_device *adev)
4811bb76ff1Sjsg {
4821bb76ff1Sjsg 	struct idr *idp;
4831bb76ff1Sjsg 	struct amdgpu_mes_process *process;
4841bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
4851bb76ff1Sjsg 	struct mes_resume_gang_input input;
4861bb76ff1Sjsg 	int r, pasid;
4871bb76ff1Sjsg 
4881bb76ff1Sjsg 	/*
4891bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
4901bb76ff1Sjsg 	 * lock dependencies.
4911bb76ff1Sjsg 	 */
4921bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
4931bb76ff1Sjsg 
4941bb76ff1Sjsg 	idp = &adev->mes.pasid_idr;
4951bb76ff1Sjsg 
4961bb76ff1Sjsg 	idr_for_each_entry(idp, process, pasid) {
4971bb76ff1Sjsg 		list_for_each_entry(gang, &process->gang_list, list) {
4981bb76ff1Sjsg 			r = adev->mes.funcs->resume_gang(&adev->mes, &input);
4991bb76ff1Sjsg 			if (r)
5001bb76ff1Sjsg 				DRM_ERROR("failed to resume pasid %d gangid %d",
5011bb76ff1Sjsg 					 pasid, gang->gang_id);
5021bb76ff1Sjsg 		}
5031bb76ff1Sjsg 	}
5041bb76ff1Sjsg 
5051bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
5061bb76ff1Sjsg 	return 0;
5071bb76ff1Sjsg }
5081bb76ff1Sjsg 
5091bb76ff1Sjsg static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev,
5101bb76ff1Sjsg 				     struct amdgpu_mes_queue *q,
5111bb76ff1Sjsg 				     struct amdgpu_mes_queue_properties *p)
5121bb76ff1Sjsg {
5131bb76ff1Sjsg 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
5141bb76ff1Sjsg 	u32 mqd_size = mqd_mgr->mqd_size;
5151bb76ff1Sjsg 	int r;
5161bb76ff1Sjsg 
5171bb76ff1Sjsg 	r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
5181bb76ff1Sjsg 				    AMDGPU_GEM_DOMAIN_GTT,
5191bb76ff1Sjsg 				    &q->mqd_obj,
5201bb76ff1Sjsg 				    &q->mqd_gpu_addr, &q->mqd_cpu_ptr);
5211bb76ff1Sjsg 	if (r) {
5221bb76ff1Sjsg 		dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r);
5231bb76ff1Sjsg 		return r;
5241bb76ff1Sjsg 	}
5251bb76ff1Sjsg 	memset(q->mqd_cpu_ptr, 0, mqd_size);
5261bb76ff1Sjsg 
5271bb76ff1Sjsg 	r = amdgpu_bo_reserve(q->mqd_obj, false);
5281bb76ff1Sjsg 	if (unlikely(r != 0))
5291bb76ff1Sjsg 		goto clean_up;
5301bb76ff1Sjsg 
5311bb76ff1Sjsg 	return 0;
5321bb76ff1Sjsg 
5331bb76ff1Sjsg clean_up:
5341bb76ff1Sjsg 	amdgpu_bo_free_kernel(&q->mqd_obj,
5351bb76ff1Sjsg 			      &q->mqd_gpu_addr,
5361bb76ff1Sjsg 			      &q->mqd_cpu_ptr);
5371bb76ff1Sjsg 	return r;
5381bb76ff1Sjsg }
5391bb76ff1Sjsg 
5401bb76ff1Sjsg static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev,
5411bb76ff1Sjsg 				     struct amdgpu_mes_queue *q,
5421bb76ff1Sjsg 				     struct amdgpu_mes_queue_properties *p)
5431bb76ff1Sjsg {
5441bb76ff1Sjsg 	struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type];
5451bb76ff1Sjsg 	struct amdgpu_mqd_prop mqd_prop = {0};
5461bb76ff1Sjsg 
5471bb76ff1Sjsg 	mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr;
5481bb76ff1Sjsg 	mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr;
5491bb76ff1Sjsg 	mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr;
5501bb76ff1Sjsg 	mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr;
5511bb76ff1Sjsg 	mqd_prop.queue_size = p->queue_size;
5521bb76ff1Sjsg 	mqd_prop.use_doorbell = true;
5531bb76ff1Sjsg 	mqd_prop.doorbell_index = p->doorbell_off;
5541bb76ff1Sjsg 	mqd_prop.eop_gpu_addr = p->eop_gpu_addr;
5551bb76ff1Sjsg 	mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority;
5561bb76ff1Sjsg 	mqd_prop.hqd_queue_priority = p->hqd_queue_priority;
5571bb76ff1Sjsg 	mqd_prop.hqd_active = false;
5581bb76ff1Sjsg 
559f005ef32Sjsg 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
560f005ef32Sjsg 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
561f005ef32Sjsg 		mutex_lock(&adev->srbm_mutex);
562f005ef32Sjsg 		amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0);
563f005ef32Sjsg 	}
564f005ef32Sjsg 
5651bb76ff1Sjsg 	mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop);
5661bb76ff1Sjsg 
567f005ef32Sjsg 	if (p->queue_type == AMDGPU_RING_TYPE_GFX ||
568f005ef32Sjsg 	    p->queue_type == AMDGPU_RING_TYPE_COMPUTE) {
569f005ef32Sjsg 		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0);
570f005ef32Sjsg 		mutex_unlock(&adev->srbm_mutex);
571f005ef32Sjsg 	}
572f005ef32Sjsg 
5731bb76ff1Sjsg 	amdgpu_bo_unreserve(q->mqd_obj);
5741bb76ff1Sjsg }
5751bb76ff1Sjsg 
5761bb76ff1Sjsg int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
5771bb76ff1Sjsg 			    struct amdgpu_mes_queue_properties *qprops,
5781bb76ff1Sjsg 			    int *queue_id)
5791bb76ff1Sjsg {
5801bb76ff1Sjsg 	struct amdgpu_mes_queue *queue;
5811bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
5821bb76ff1Sjsg 	struct mes_add_queue_input queue_input;
5831bb76ff1Sjsg 	unsigned long flags;
5841bb76ff1Sjsg 	int r;
5851bb76ff1Sjsg 
586f005ef32Sjsg 	memset(&queue_input, 0, sizeof(struct mes_add_queue_input));
587f005ef32Sjsg 
5881bb76ff1Sjsg 	/* allocate the mes queue buffer */
5891bb76ff1Sjsg 	queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL);
5901bb76ff1Sjsg 	if (!queue) {
5911bb76ff1Sjsg 		DRM_ERROR("Failed to allocate memory for queue\n");
5921bb76ff1Sjsg 		return -ENOMEM;
5931bb76ff1Sjsg 	}
5941bb76ff1Sjsg 
5951bb76ff1Sjsg 	/* Allocate the queue mqd */
5961bb76ff1Sjsg 	r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops);
5971bb76ff1Sjsg 	if (r)
5981bb76ff1Sjsg 		goto clean_up_memory;
5991bb76ff1Sjsg 
6001bb76ff1Sjsg 	/*
6011bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
6021bb76ff1Sjsg 	 * lock dependencies.
6031bb76ff1Sjsg 	 */
6041bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
6051bb76ff1Sjsg 
6061bb76ff1Sjsg 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
6071bb76ff1Sjsg 	if (!gang) {
6081bb76ff1Sjsg 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
6091bb76ff1Sjsg 		r = -EINVAL;
6101bb76ff1Sjsg 		goto clean_up_mqd;
6111bb76ff1Sjsg 	}
6121bb76ff1Sjsg 
6131bb76ff1Sjsg 	/* add the mes gang to idr list */
6141bb76ff1Sjsg 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
6151bb76ff1Sjsg 	r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0,
6161bb76ff1Sjsg 		      GFP_ATOMIC);
6171bb76ff1Sjsg 	if (r < 0) {
6181bb76ff1Sjsg 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
6191bb76ff1Sjsg 		goto clean_up_mqd;
6201bb76ff1Sjsg 	}
6211bb76ff1Sjsg 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
6221bb76ff1Sjsg 	*queue_id = queue->queue_id = r;
6231bb76ff1Sjsg 
6241bb76ff1Sjsg 	/* allocate a doorbell index for the queue */
625f005ef32Sjsg 	r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
6261bb76ff1Sjsg 					  qprops->queue_type,
6271bb76ff1Sjsg 					  &qprops->doorbell_off);
6281bb76ff1Sjsg 	if (r)
6291bb76ff1Sjsg 		goto clean_up_queue_id;
6301bb76ff1Sjsg 
6311bb76ff1Sjsg 	/* initialize the queue mqd */
6321bb76ff1Sjsg 	amdgpu_mes_queue_init_mqd(adev, queue, qprops);
6331bb76ff1Sjsg 
6341bb76ff1Sjsg 	/* add hw queue to mes */
6351bb76ff1Sjsg 	queue_input.process_id = gang->process->pasid;
6361bb76ff1Sjsg 
6371bb76ff1Sjsg 	queue_input.page_table_base_addr =
6381bb76ff1Sjsg 		adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr -
6391bb76ff1Sjsg 		adev->gmc.vram_start;
6401bb76ff1Sjsg 
6411bb76ff1Sjsg 	queue_input.process_va_start = 0;
6421bb76ff1Sjsg 	queue_input.process_va_end =
6431bb76ff1Sjsg 		(adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT;
6441bb76ff1Sjsg 	queue_input.process_quantum = gang->process->process_quantum;
6451bb76ff1Sjsg 	queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr;
6461bb76ff1Sjsg 	queue_input.gang_quantum = gang->gang_quantum;
6471bb76ff1Sjsg 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
6481bb76ff1Sjsg 	queue_input.inprocess_gang_priority = gang->inprocess_gang_priority;
6491bb76ff1Sjsg 	queue_input.gang_global_priority_level = gang->global_priority_level;
6501bb76ff1Sjsg 	queue_input.doorbell_offset = qprops->doorbell_off;
6511bb76ff1Sjsg 	queue_input.mqd_addr = queue->mqd_gpu_addr;
6521bb76ff1Sjsg 	queue_input.wptr_addr = qprops->wptr_gpu_addr;
6531bb76ff1Sjsg 	queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
6541bb76ff1Sjsg 	queue_input.queue_type = qprops->queue_type;
6551bb76ff1Sjsg 	queue_input.paging = qprops->paging;
6561bb76ff1Sjsg 	queue_input.is_kfd_process = 0;
6571bb76ff1Sjsg 
6581bb76ff1Sjsg 	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
6591bb76ff1Sjsg 	if (r) {
6601bb76ff1Sjsg 		DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",
6611bb76ff1Sjsg 			  qprops->doorbell_off);
6621bb76ff1Sjsg 		goto clean_up_doorbell;
6631bb76ff1Sjsg 	}
6641bb76ff1Sjsg 
6651bb76ff1Sjsg 	DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "
6661bb76ff1Sjsg 		  "queue type=%d, doorbell=0x%llx\n",
6671bb76ff1Sjsg 		  gang->process->pasid, gang_id, qprops->queue_type,
6681bb76ff1Sjsg 		  qprops->doorbell_off);
6691bb76ff1Sjsg 
6701bb76ff1Sjsg 	queue->ring = qprops->ring;
6711bb76ff1Sjsg 	queue->doorbell_off = qprops->doorbell_off;
6721bb76ff1Sjsg 	queue->wptr_gpu_addr = qprops->wptr_gpu_addr;
6731bb76ff1Sjsg 	queue->queue_type = qprops->queue_type;
6741bb76ff1Sjsg 	queue->paging = qprops->paging;
6751bb76ff1Sjsg 	queue->gang = gang;
6761bb76ff1Sjsg 	queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
6771bb76ff1Sjsg 	list_add_tail(&queue->list, &gang->queue_list);
6781bb76ff1Sjsg 
6791bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
6801bb76ff1Sjsg 	return 0;
6811bb76ff1Sjsg 
6821bb76ff1Sjsg clean_up_doorbell:
683f005ef32Sjsg 	amdgpu_mes_kernel_doorbell_free(adev, gang->process,
6841bb76ff1Sjsg 				       qprops->doorbell_off);
6851bb76ff1Sjsg clean_up_queue_id:
6861bb76ff1Sjsg 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
6871bb76ff1Sjsg 	idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
6881bb76ff1Sjsg 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
6891bb76ff1Sjsg clean_up_mqd:
6901bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
6911bb76ff1Sjsg 	amdgpu_mes_queue_free_mqd(queue);
6921bb76ff1Sjsg clean_up_memory:
6931bb76ff1Sjsg 	kfree(queue);
6941bb76ff1Sjsg 	return r;
6951bb76ff1Sjsg }
6961bb76ff1Sjsg 
6971bb76ff1Sjsg int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
6981bb76ff1Sjsg {
6991bb76ff1Sjsg 	unsigned long flags;
7001bb76ff1Sjsg 	struct amdgpu_mes_queue *queue;
7011bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
7021bb76ff1Sjsg 	struct mes_remove_queue_input queue_input;
7031bb76ff1Sjsg 	int r;
7041bb76ff1Sjsg 
7051bb76ff1Sjsg 	/*
7061bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
7071bb76ff1Sjsg 	 * lock dependencies.
7081bb76ff1Sjsg 	 */
7091bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
7101bb76ff1Sjsg 
7111bb76ff1Sjsg 	/* remove the mes gang from idr list */
7121bb76ff1Sjsg 	spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
7131bb76ff1Sjsg 
7141bb76ff1Sjsg 	queue = idr_find(&adev->mes.queue_id_idr, queue_id);
7151bb76ff1Sjsg 	if (!queue) {
7161bb76ff1Sjsg 		spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
7171bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
7181bb76ff1Sjsg 		DRM_ERROR("queue id %d doesn't exist\n", queue_id);
7191bb76ff1Sjsg 		return -EINVAL;
7201bb76ff1Sjsg 	}
7211bb76ff1Sjsg 
7221bb76ff1Sjsg 	idr_remove(&adev->mes.queue_id_idr, queue_id);
7231bb76ff1Sjsg 	spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags);
7241bb76ff1Sjsg 
7251bb76ff1Sjsg 	DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",
7261bb76ff1Sjsg 		  queue->doorbell_off);
7271bb76ff1Sjsg 
7281bb76ff1Sjsg 	gang = queue->gang;
7291bb76ff1Sjsg 	queue_input.doorbell_offset = queue->doorbell_off;
7301bb76ff1Sjsg 	queue_input.gang_context_addr = gang->gang_ctx_gpu_addr;
7311bb76ff1Sjsg 
7321bb76ff1Sjsg 	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
7331bb76ff1Sjsg 	if (r)
7341bb76ff1Sjsg 		DRM_ERROR("failed to remove hardware queue, queue id = %d\n",
7351bb76ff1Sjsg 			  queue_id);
7361bb76ff1Sjsg 
7371bb76ff1Sjsg 	list_del(&queue->list);
738f005ef32Sjsg 	amdgpu_mes_kernel_doorbell_free(adev, gang->process,
7391bb76ff1Sjsg 				       queue->doorbell_off);
7401bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
7411bb76ff1Sjsg 
7421bb76ff1Sjsg 	amdgpu_mes_queue_free_mqd(queue);
7431bb76ff1Sjsg 	kfree(queue);
7441bb76ff1Sjsg 	return 0;
7451bb76ff1Sjsg }
7461bb76ff1Sjsg 
7471bb76ff1Sjsg int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
7481bb76ff1Sjsg 				  struct amdgpu_ring *ring,
7491bb76ff1Sjsg 				  enum amdgpu_unmap_queues_action action,
7501bb76ff1Sjsg 				  u64 gpu_addr, u64 seq)
7511bb76ff1Sjsg {
7521bb76ff1Sjsg 	struct mes_unmap_legacy_queue_input queue_input;
7531bb76ff1Sjsg 	int r;
7541bb76ff1Sjsg 
7551bb76ff1Sjsg 	queue_input.action = action;
7561bb76ff1Sjsg 	queue_input.queue_type = ring->funcs->type;
7571bb76ff1Sjsg 	queue_input.doorbell_offset = ring->doorbell_index;
7581bb76ff1Sjsg 	queue_input.pipe_id = ring->pipe;
7591bb76ff1Sjsg 	queue_input.queue_id = ring->queue;
7601bb76ff1Sjsg 	queue_input.trail_fence_addr = gpu_addr;
7611bb76ff1Sjsg 	queue_input.trail_fence_data = seq;
7621bb76ff1Sjsg 
7631bb76ff1Sjsg 	r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input);
7641bb76ff1Sjsg 	if (r)
7651bb76ff1Sjsg 		DRM_ERROR("failed to unmap legacy queue\n");
7661bb76ff1Sjsg 
7671bb76ff1Sjsg 	return r;
7681bb76ff1Sjsg }
7691bb76ff1Sjsg 
7701bb76ff1Sjsg uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
7711bb76ff1Sjsg {
7721bb76ff1Sjsg 	struct mes_misc_op_input op_input;
7731bb76ff1Sjsg 	int r, val = 0;
7741bb76ff1Sjsg 
7751bb76ff1Sjsg 	op_input.op = MES_MISC_OP_READ_REG;
7761bb76ff1Sjsg 	op_input.read_reg.reg_offset = reg;
7771bb76ff1Sjsg 	op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
7781bb76ff1Sjsg 
7791bb76ff1Sjsg 	if (!adev->mes.funcs->misc_op) {
7801bb76ff1Sjsg 		DRM_ERROR("mes rreg is not supported!\n");
7811bb76ff1Sjsg 		goto error;
7821bb76ff1Sjsg 	}
7831bb76ff1Sjsg 
7841bb76ff1Sjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
7851bb76ff1Sjsg 	if (r)
7861bb76ff1Sjsg 		DRM_ERROR("failed to read reg (0x%x)\n", reg);
7871bb76ff1Sjsg 	else
7881bb76ff1Sjsg 		val = *(adev->mes.read_val_ptr);
7891bb76ff1Sjsg 
7901bb76ff1Sjsg error:
7911bb76ff1Sjsg 	return val;
7921bb76ff1Sjsg }
7931bb76ff1Sjsg 
7941bb76ff1Sjsg int amdgpu_mes_wreg(struct amdgpu_device *adev,
7951bb76ff1Sjsg 		    uint32_t reg, uint32_t val)
7961bb76ff1Sjsg {
7971bb76ff1Sjsg 	struct mes_misc_op_input op_input;
7981bb76ff1Sjsg 	int r;
7991bb76ff1Sjsg 
8001bb76ff1Sjsg 	op_input.op = MES_MISC_OP_WRITE_REG;
8011bb76ff1Sjsg 	op_input.write_reg.reg_offset = reg;
8021bb76ff1Sjsg 	op_input.write_reg.reg_value = val;
8031bb76ff1Sjsg 
8041bb76ff1Sjsg 	if (!adev->mes.funcs->misc_op) {
8051bb76ff1Sjsg 		DRM_ERROR("mes wreg is not supported!\n");
8061bb76ff1Sjsg 		r = -EINVAL;
8071bb76ff1Sjsg 		goto error;
8081bb76ff1Sjsg 	}
8091bb76ff1Sjsg 
8101bb76ff1Sjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
8111bb76ff1Sjsg 	if (r)
8121bb76ff1Sjsg 		DRM_ERROR("failed to write reg (0x%x)\n", reg);
8131bb76ff1Sjsg 
8141bb76ff1Sjsg error:
8151bb76ff1Sjsg 	return r;
8161bb76ff1Sjsg }
8171bb76ff1Sjsg 
8181bb76ff1Sjsg int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
8191bb76ff1Sjsg 				  uint32_t reg0, uint32_t reg1,
8201bb76ff1Sjsg 				  uint32_t ref, uint32_t mask)
8211bb76ff1Sjsg {
8221bb76ff1Sjsg 	struct mes_misc_op_input op_input;
8231bb76ff1Sjsg 	int r;
8241bb76ff1Sjsg 
8251bb76ff1Sjsg 	op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
8261bb76ff1Sjsg 	op_input.wrm_reg.reg0 = reg0;
8271bb76ff1Sjsg 	op_input.wrm_reg.reg1 = reg1;
8281bb76ff1Sjsg 	op_input.wrm_reg.ref = ref;
8291bb76ff1Sjsg 	op_input.wrm_reg.mask = mask;
8301bb76ff1Sjsg 
8311bb76ff1Sjsg 	if (!adev->mes.funcs->misc_op) {
8321bb76ff1Sjsg 		DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
8331bb76ff1Sjsg 		r = -EINVAL;
8341bb76ff1Sjsg 		goto error;
8351bb76ff1Sjsg 	}
8361bb76ff1Sjsg 
8371bb76ff1Sjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
8381bb76ff1Sjsg 	if (r)
8391bb76ff1Sjsg 		DRM_ERROR("failed to reg_write_reg_wait\n");
8401bb76ff1Sjsg 
8411bb76ff1Sjsg error:
8421bb76ff1Sjsg 	return r;
8431bb76ff1Sjsg }
8441bb76ff1Sjsg 
8451bb76ff1Sjsg int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
8461bb76ff1Sjsg 			uint32_t val, uint32_t mask)
8471bb76ff1Sjsg {
8481bb76ff1Sjsg 	struct mes_misc_op_input op_input;
8491bb76ff1Sjsg 	int r;
8501bb76ff1Sjsg 
8511bb76ff1Sjsg 	op_input.op = MES_MISC_OP_WRM_REG_WAIT;
8521bb76ff1Sjsg 	op_input.wrm_reg.reg0 = reg;
8531bb76ff1Sjsg 	op_input.wrm_reg.ref = val;
8541bb76ff1Sjsg 	op_input.wrm_reg.mask = mask;
8551bb76ff1Sjsg 
8561bb76ff1Sjsg 	if (!adev->mes.funcs->misc_op) {
8571bb76ff1Sjsg 		DRM_ERROR("mes reg wait is not supported!\n");
8581bb76ff1Sjsg 		r = -EINVAL;
8591bb76ff1Sjsg 		goto error;
8601bb76ff1Sjsg 	}
8611bb76ff1Sjsg 
8621bb76ff1Sjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
8631bb76ff1Sjsg 	if (r)
8641bb76ff1Sjsg 		DRM_ERROR("failed to reg_write_reg_wait\n");
8651bb76ff1Sjsg 
8661bb76ff1Sjsg error:
8671bb76ff1Sjsg 	return r;
8681bb76ff1Sjsg }
8691bb76ff1Sjsg 
870f005ef32Sjsg int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
871f005ef32Sjsg 				uint64_t process_context_addr,
872f005ef32Sjsg 				uint32_t spi_gdbg_per_vmid_cntl,
873f005ef32Sjsg 				const uint32_t *tcp_watch_cntl,
874f005ef32Sjsg 				uint32_t flags,
875f005ef32Sjsg 				bool trap_en)
876f005ef32Sjsg {
877f005ef32Sjsg 	struct mes_misc_op_input op_input = {0};
878f005ef32Sjsg 	int r;
879f005ef32Sjsg 
880f005ef32Sjsg 	if (!adev->mes.funcs->misc_op) {
881f005ef32Sjsg 		DRM_ERROR("mes set shader debugger is not supported!\n");
882f005ef32Sjsg 		return -EINVAL;
883f005ef32Sjsg 	}
884f005ef32Sjsg 
885f005ef32Sjsg 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
886f005ef32Sjsg 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
887f005ef32Sjsg 	op_input.set_shader_debugger.flags.u32all = flags;
888f074e99dSjsg 
889f074e99dSjsg 	/* use amdgpu mes_flush_shader_debugger instead */
890f074e99dSjsg 	if (op_input.set_shader_debugger.flags.process_ctx_flush)
891f074e99dSjsg 		return -EINVAL;
892f074e99dSjsg 
893f005ef32Sjsg 	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
894f005ef32Sjsg 	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
895f005ef32Sjsg 			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
896f005ef32Sjsg 
897f005ef32Sjsg 	if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
898f005ef32Sjsg 			AMDGPU_MES_API_VERSION_SHIFT) >= 14)
899f005ef32Sjsg 		op_input.set_shader_debugger.trap_en = trap_en;
900f005ef32Sjsg 
901f005ef32Sjsg 	amdgpu_mes_lock(&adev->mes);
902f005ef32Sjsg 
903f005ef32Sjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
904f005ef32Sjsg 	if (r)
905f005ef32Sjsg 		DRM_ERROR("failed to set_shader_debugger\n");
906f005ef32Sjsg 
907f005ef32Sjsg 	amdgpu_mes_unlock(&adev->mes);
908f005ef32Sjsg 
909f005ef32Sjsg 	return r;
910f005ef32Sjsg }
911f005ef32Sjsg 
912f074e99dSjsg int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
913f074e99dSjsg 				     uint64_t process_context_addr)
914f074e99dSjsg {
915f074e99dSjsg 	struct mes_misc_op_input op_input = {0};
916f074e99dSjsg 	int r;
917f074e99dSjsg 
918f074e99dSjsg 	if (!adev->mes.funcs->misc_op) {
919f074e99dSjsg 		DRM_ERROR("mes flush shader debugger is not supported!\n");
920f074e99dSjsg 		return -EINVAL;
921f074e99dSjsg 	}
922f074e99dSjsg 
923f074e99dSjsg 	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
924f074e99dSjsg 	op_input.set_shader_debugger.process_context_addr = process_context_addr;
925f074e99dSjsg 	op_input.set_shader_debugger.flags.process_ctx_flush = true;
926f074e99dSjsg 
927f074e99dSjsg 	amdgpu_mes_lock(&adev->mes);
928f074e99dSjsg 
929f074e99dSjsg 	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
930f074e99dSjsg 	if (r)
931f074e99dSjsg 		DRM_ERROR("failed to set_shader_debugger\n");
932f074e99dSjsg 
933f074e99dSjsg 	amdgpu_mes_unlock(&adev->mes);
934f074e99dSjsg 
935f074e99dSjsg 	return r;
936f074e99dSjsg }
937f074e99dSjsg 
9381bb76ff1Sjsg static void
9391bb76ff1Sjsg amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
9401bb76ff1Sjsg 			       struct amdgpu_ring *ring,
9411bb76ff1Sjsg 			       struct amdgpu_mes_queue_properties *props)
9421bb76ff1Sjsg {
9431bb76ff1Sjsg 	props->queue_type = ring->funcs->type;
9441bb76ff1Sjsg 	props->hqd_base_gpu_addr = ring->gpu_addr;
9451bb76ff1Sjsg 	props->rptr_gpu_addr = ring->rptr_gpu_addr;
9461bb76ff1Sjsg 	props->wptr_gpu_addr = ring->wptr_gpu_addr;
9471bb76ff1Sjsg 	props->wptr_mc_addr =
9481bb76ff1Sjsg 		ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
9491bb76ff1Sjsg 	props->queue_size = ring->ring_size;
9501bb76ff1Sjsg 	props->eop_gpu_addr = ring->eop_gpu_addr;
9511bb76ff1Sjsg 	props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
9521bb76ff1Sjsg 	props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM;
9531bb76ff1Sjsg 	props->paging = false;
9541bb76ff1Sjsg 	props->ring = ring;
9551bb76ff1Sjsg }
9561bb76ff1Sjsg 
9571bb76ff1Sjsg #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)			\
9581bb76ff1Sjsg do {									\
9591bb76ff1Sjsg        if (id_offs < AMDGPU_MES_CTX_MAX_OFFS)				\
9601bb76ff1Sjsg 		return offsetof(struct amdgpu_mes_ctx_meta_data,	\
9611bb76ff1Sjsg 				_eng[ring->idx].slots[id_offs]);        \
9621bb76ff1Sjsg        else if (id_offs == AMDGPU_MES_CTX_RING_OFFS)			\
9631bb76ff1Sjsg 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
9641bb76ff1Sjsg 				_eng[ring->idx].ring);                  \
9651bb76ff1Sjsg        else if (id_offs == AMDGPU_MES_CTX_IB_OFFS)			\
9661bb76ff1Sjsg 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
9671bb76ff1Sjsg 				_eng[ring->idx].ib);                    \
9681bb76ff1Sjsg        else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS)			\
9691bb76ff1Sjsg 		return offsetof(struct amdgpu_mes_ctx_meta_data,        \
9701bb76ff1Sjsg 				_eng[ring->idx].padding);               \
9711bb76ff1Sjsg } while(0)
9721bb76ff1Sjsg 
9731bb76ff1Sjsg int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs)
9741bb76ff1Sjsg {
9751bb76ff1Sjsg 	switch (ring->funcs->type) {
9761bb76ff1Sjsg 	case AMDGPU_RING_TYPE_GFX:
9771bb76ff1Sjsg 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx);
9781bb76ff1Sjsg 		break;
9791bb76ff1Sjsg 	case AMDGPU_RING_TYPE_COMPUTE:
9801bb76ff1Sjsg 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute);
9811bb76ff1Sjsg 		break;
9821bb76ff1Sjsg 	case AMDGPU_RING_TYPE_SDMA:
9831bb76ff1Sjsg 		DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma);
9841bb76ff1Sjsg 		break;
9851bb76ff1Sjsg 	default:
9861bb76ff1Sjsg 		break;
9871bb76ff1Sjsg 	}
9881bb76ff1Sjsg 
9891bb76ff1Sjsg 	WARN_ON(1);
9901bb76ff1Sjsg 	return -EINVAL;
9911bb76ff1Sjsg }
9921bb76ff1Sjsg 
9931bb76ff1Sjsg int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
9941bb76ff1Sjsg 			int queue_type, int idx,
9951bb76ff1Sjsg 			struct amdgpu_mes_ctx_data *ctx_data,
9961bb76ff1Sjsg 			struct amdgpu_ring **out)
9971bb76ff1Sjsg {
9981bb76ff1Sjsg 	struct amdgpu_ring *ring;
9991bb76ff1Sjsg 	struct amdgpu_mes_gang *gang;
10001bb76ff1Sjsg 	struct amdgpu_mes_queue_properties qprops = {0};
10011bb76ff1Sjsg 	int r, queue_id, pasid;
10021bb76ff1Sjsg 
10031bb76ff1Sjsg 	/*
10041bb76ff1Sjsg 	 * Avoid taking any other locks under MES lock to avoid circular
10051bb76ff1Sjsg 	 * lock dependencies.
10061bb76ff1Sjsg 	 */
10071bb76ff1Sjsg 	amdgpu_mes_lock(&adev->mes);
10081bb76ff1Sjsg 	gang = idr_find(&adev->mes.gang_id_idr, gang_id);
10091bb76ff1Sjsg 	if (!gang) {
10101bb76ff1Sjsg 		DRM_ERROR("gang id %d doesn't exist\n", gang_id);
10111bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
10121bb76ff1Sjsg 		return -EINVAL;
10131bb76ff1Sjsg 	}
10141bb76ff1Sjsg 	pasid = gang->process->pasid;
10151bb76ff1Sjsg 
10161bb76ff1Sjsg 	ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
10171bb76ff1Sjsg 	if (!ring) {
10181bb76ff1Sjsg 		amdgpu_mes_unlock(&adev->mes);
10191bb76ff1Sjsg 		return -ENOMEM;
10201bb76ff1Sjsg 	}
10211bb76ff1Sjsg 
10221bb76ff1Sjsg 	ring->ring_obj = NULL;
10231bb76ff1Sjsg 	ring->use_doorbell = true;
10241bb76ff1Sjsg 	ring->is_mes_queue = true;
10251bb76ff1Sjsg 	ring->mes_ctx = ctx_data;
10261bb76ff1Sjsg 	ring->idx = idx;
10271bb76ff1Sjsg 	ring->no_scheduler = true;
10281bb76ff1Sjsg 
10291bb76ff1Sjsg 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
10301bb76ff1Sjsg 		int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
10311bb76ff1Sjsg 				      compute[ring->idx].mec_hpd);
10321bb76ff1Sjsg 		ring->eop_gpu_addr =
10331bb76ff1Sjsg 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
10341bb76ff1Sjsg 	}
10351bb76ff1Sjsg 
10361bb76ff1Sjsg 	switch (queue_type) {
10371bb76ff1Sjsg 	case AMDGPU_RING_TYPE_GFX:
10381bb76ff1Sjsg 		ring->funcs = adev->gfx.gfx_ring[0].funcs;
1039f005ef32Sjsg 		ring->me = adev->gfx.gfx_ring[0].me;
1040f005ef32Sjsg 		ring->pipe = adev->gfx.gfx_ring[0].pipe;
10411bb76ff1Sjsg 		break;
10421bb76ff1Sjsg 	case AMDGPU_RING_TYPE_COMPUTE:
10431bb76ff1Sjsg 		ring->funcs = adev->gfx.compute_ring[0].funcs;
1044f005ef32Sjsg 		ring->me = adev->gfx.compute_ring[0].me;
1045f005ef32Sjsg 		ring->pipe = adev->gfx.compute_ring[0].pipe;
10461bb76ff1Sjsg 		break;
10471bb76ff1Sjsg 	case AMDGPU_RING_TYPE_SDMA:
10481bb76ff1Sjsg 		ring->funcs = adev->sdma.instance[0].ring.funcs;
10491bb76ff1Sjsg 		break;
10501bb76ff1Sjsg 	default:
10511bb76ff1Sjsg 		BUG();
10521bb76ff1Sjsg 	}
10531bb76ff1Sjsg 
10541bb76ff1Sjsg 	r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
10551bb76ff1Sjsg 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
1056*6d0c4409Sjsg 	if (r) {
1057*6d0c4409Sjsg 		amdgpu_mes_unlock(&adev->mes);
10581bb76ff1Sjsg 		goto clean_up_memory;
1059*6d0c4409Sjsg 	}
10601bb76ff1Sjsg 
10611bb76ff1Sjsg 	amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
10621bb76ff1Sjsg 
10631bb76ff1Sjsg 	dma_fence_wait(gang->process->vm->last_update, false);
10641bb76ff1Sjsg 	dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
10651bb76ff1Sjsg 	amdgpu_mes_unlock(&adev->mes);
10661bb76ff1Sjsg 
10671bb76ff1Sjsg 	r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
10681bb76ff1Sjsg 	if (r)
10691bb76ff1Sjsg 		goto clean_up_ring;
10701bb76ff1Sjsg 
10711bb76ff1Sjsg 	ring->hw_queue_id = queue_id;
10721bb76ff1Sjsg 	ring->doorbell_index = qprops.doorbell_off;
10731bb76ff1Sjsg 
10741bb76ff1Sjsg 	if (queue_type == AMDGPU_RING_TYPE_GFX)
10751bb76ff1Sjsg 		snprintf(ring->name, sizeof(ring->name), "gfx_%d.%d.%d", pasid, gang_id, queue_id);
10761bb76ff1Sjsg 	else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
10771bb76ff1Sjsg 		snprintf(ring->name, sizeof(ring->name), "compute_%d.%d.%d", pasid, gang_id,
10781bb76ff1Sjsg 			queue_id);
10791bb76ff1Sjsg 	else if (queue_type == AMDGPU_RING_TYPE_SDMA)
10801bb76ff1Sjsg 		snprintf(ring->name, sizeof(ring->name), "sdma_%d.%d.%d", pasid, gang_id,
10811bb76ff1Sjsg 			queue_id);
10821bb76ff1Sjsg 	else
10831bb76ff1Sjsg 		BUG();
10841bb76ff1Sjsg 
10851bb76ff1Sjsg 	*out = ring;
10861bb76ff1Sjsg 	return 0;
10871bb76ff1Sjsg 
10881bb76ff1Sjsg clean_up_ring:
10891bb76ff1Sjsg 	amdgpu_ring_fini(ring);
10901bb76ff1Sjsg clean_up_memory:
10911bb76ff1Sjsg 	kfree(ring);
10921bb76ff1Sjsg 	return r;
10931bb76ff1Sjsg }
10941bb76ff1Sjsg 
10951bb76ff1Sjsg void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
10961bb76ff1Sjsg 			    struct amdgpu_ring *ring)
10971bb76ff1Sjsg {
10981bb76ff1Sjsg 	if (!ring)
10991bb76ff1Sjsg 		return;
11001bb76ff1Sjsg 
11011bb76ff1Sjsg 	amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id);
110257d3fb69Sjsg 	del_timer_sync(&ring->fence_drv.fallback_timer);
11031bb76ff1Sjsg 	amdgpu_ring_fini(ring);
11041bb76ff1Sjsg 	kfree(ring);
11051bb76ff1Sjsg }
11061bb76ff1Sjsg 
11071bb76ff1Sjsg uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
11081bb76ff1Sjsg 						   enum amdgpu_mes_priority_level prio)
11091bb76ff1Sjsg {
11101bb76ff1Sjsg 	return adev->mes.aggregated_doorbells[prio];
11111bb76ff1Sjsg }
11121bb76ff1Sjsg 
11131bb76ff1Sjsg int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
11141bb76ff1Sjsg 				   struct amdgpu_mes_ctx_data *ctx_data)
11151bb76ff1Sjsg {
11161bb76ff1Sjsg 	int r;
11171bb76ff1Sjsg 
11181bb76ff1Sjsg 	r = amdgpu_bo_create_kernel(adev,
11191bb76ff1Sjsg 			    sizeof(struct amdgpu_mes_ctx_meta_data),
11201bb76ff1Sjsg 			    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
11211bb76ff1Sjsg 			    &ctx_data->meta_data_obj,
11221bb76ff1Sjsg 			    &ctx_data->meta_data_mc_addr,
11231bb76ff1Sjsg 			    &ctx_data->meta_data_ptr);
1124f005ef32Sjsg 	if (r) {
1125f005ef32Sjsg 		dev_warn(adev->dev, "(%d) create CTX bo failed\n", r);
1126f005ef32Sjsg 		return r;
1127f005ef32Sjsg 	}
1128f005ef32Sjsg 
11291bb76ff1Sjsg 	if (!ctx_data->meta_data_obj)
11301bb76ff1Sjsg 		return -ENOMEM;
11311bb76ff1Sjsg 
11321bb76ff1Sjsg 	memset(ctx_data->meta_data_ptr, 0,
11331bb76ff1Sjsg 	       sizeof(struct amdgpu_mes_ctx_meta_data));
11341bb76ff1Sjsg 
11351bb76ff1Sjsg 	return 0;
11361bb76ff1Sjsg }
11371bb76ff1Sjsg 
11381bb76ff1Sjsg void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
11391bb76ff1Sjsg {
11401bb76ff1Sjsg 	if (ctx_data->meta_data_obj)
11411bb76ff1Sjsg 		amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
11421bb76ff1Sjsg 				      &ctx_data->meta_data_mc_addr,
11431bb76ff1Sjsg 				      &ctx_data->meta_data_ptr);
11441bb76ff1Sjsg }
11451bb76ff1Sjsg 
11461bb76ff1Sjsg int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
11471bb76ff1Sjsg 				 struct amdgpu_vm *vm,
11481bb76ff1Sjsg 				 struct amdgpu_mes_ctx_data *ctx_data)
11491bb76ff1Sjsg {
11501bb76ff1Sjsg 	struct amdgpu_bo_va *bo_va;
11511bb76ff1Sjsg 	struct amdgpu_sync sync;
1152f005ef32Sjsg 	struct drm_exec exec;
11531bb76ff1Sjsg 	int r;
11541bb76ff1Sjsg 
11551bb76ff1Sjsg 	amdgpu_sync_create(&sync);
11561bb76ff1Sjsg 
1157f005ef32Sjsg 	drm_exec_init(&exec, 0);
1158f005ef32Sjsg 	drm_exec_until_all_locked(&exec) {
1159f005ef32Sjsg 		r = drm_exec_lock_obj(&exec,
1160f005ef32Sjsg 				      &ctx_data->meta_data_obj->tbo.base);
1161f005ef32Sjsg 		drm_exec_retry_on_contention(&exec);
1162f005ef32Sjsg 		if (unlikely(r))
1163f005ef32Sjsg 			goto error_fini_exec;
11641bb76ff1Sjsg 
1165f005ef32Sjsg 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
1166f005ef32Sjsg 		drm_exec_retry_on_contention(&exec);
1167f005ef32Sjsg 		if (unlikely(r))
1168f005ef32Sjsg 			goto error_fini_exec;
11691bb76ff1Sjsg 	}
11701bb76ff1Sjsg 
11711bb76ff1Sjsg 	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
11721bb76ff1Sjsg 	if (!bo_va) {
11731bb76ff1Sjsg 		DRM_ERROR("failed to create bo_va for meta data BO\n");
1174f005ef32Sjsg 		r = -ENOMEM;
1175f005ef32Sjsg 		goto error_fini_exec;
11761bb76ff1Sjsg 	}
11771bb76ff1Sjsg 
11781bb76ff1Sjsg 	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
11791bb76ff1Sjsg 			     sizeof(struct amdgpu_mes_ctx_meta_data),
11801bb76ff1Sjsg 			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
11811bb76ff1Sjsg 			     AMDGPU_PTE_EXECUTABLE);
11821bb76ff1Sjsg 
11831bb76ff1Sjsg 	if (r) {
11841bb76ff1Sjsg 		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
1185f005ef32Sjsg 		goto error_del_bo_va;
11861bb76ff1Sjsg 	}
11871bb76ff1Sjsg 
11881bb76ff1Sjsg 	r = amdgpu_vm_bo_update(adev, bo_va, false);
11891bb76ff1Sjsg 	if (r) {
11901bb76ff1Sjsg 		DRM_ERROR("failed to do vm_bo_update on meta data\n");
1191f005ef32Sjsg 		goto error_del_bo_va;
11921bb76ff1Sjsg 	}
11931bb76ff1Sjsg 	amdgpu_sync_fence(&sync, bo_va->last_pt_update);
11941bb76ff1Sjsg 
11951bb76ff1Sjsg 	r = amdgpu_vm_update_pdes(adev, vm, false);
11961bb76ff1Sjsg 	if (r) {
11971bb76ff1Sjsg 		DRM_ERROR("failed to update pdes on meta data\n");
1198f005ef32Sjsg 		goto error_del_bo_va;
11991bb76ff1Sjsg 	}
12001bb76ff1Sjsg 	amdgpu_sync_fence(&sync, vm->last_update);
12011bb76ff1Sjsg 
12021bb76ff1Sjsg 	amdgpu_sync_wait(&sync, false);
1203f005ef32Sjsg 	drm_exec_fini(&exec);
12041bb76ff1Sjsg 
12051bb76ff1Sjsg 	amdgpu_sync_free(&sync);
12061bb76ff1Sjsg 	ctx_data->meta_data_va = bo_va;
12071bb76ff1Sjsg 	return 0;
12081bb76ff1Sjsg 
1209f005ef32Sjsg error_del_bo_va:
12101bb76ff1Sjsg 	amdgpu_vm_bo_del(adev, bo_va);
1211f005ef32Sjsg 
1212f005ef32Sjsg error_fini_exec:
1213f005ef32Sjsg 	drm_exec_fini(&exec);
12141bb76ff1Sjsg 	amdgpu_sync_free(&sync);
12151bb76ff1Sjsg 	return r;
12161bb76ff1Sjsg }
12171bb76ff1Sjsg 
12181bb76ff1Sjsg int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
12191bb76ff1Sjsg 				   struct amdgpu_mes_ctx_data *ctx_data)
12201bb76ff1Sjsg {
12211bb76ff1Sjsg 	struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
12221bb76ff1Sjsg 	struct amdgpu_bo *bo = ctx_data->meta_data_obj;
12231bb76ff1Sjsg 	struct amdgpu_vm *vm = bo_va->base.vm;
1224f005ef32Sjsg 	struct dma_fence *fence;
1225f005ef32Sjsg 	struct drm_exec exec;
1226f005ef32Sjsg 	long r;
12271bb76ff1Sjsg 
1228f005ef32Sjsg 	drm_exec_init(&exec, 0);
1229f005ef32Sjsg 	drm_exec_until_all_locked(&exec) {
1230f005ef32Sjsg 		r = drm_exec_lock_obj(&exec,
1231f005ef32Sjsg 				      &ctx_data->meta_data_obj->tbo.base);
1232f005ef32Sjsg 		drm_exec_retry_on_contention(&exec);
1233f005ef32Sjsg 		if (unlikely(r))
1234f005ef32Sjsg 			goto out_unlock;
12351bb76ff1Sjsg 
1236f005ef32Sjsg 		r = amdgpu_vm_lock_pd(vm, &exec, 0);
1237f005ef32Sjsg 		drm_exec_retry_on_contention(&exec);
1238f005ef32Sjsg 		if (unlikely(r))
1239f005ef32Sjsg 			goto out_unlock;
12401bb76ff1Sjsg 	}
12411bb76ff1Sjsg 
12421bb76ff1Sjsg 	amdgpu_vm_bo_del(adev, bo_va);
12431bb76ff1Sjsg 	if (!amdgpu_vm_ready(vm))
12441bb76ff1Sjsg 		goto out_unlock;
12451bb76ff1Sjsg 
1246f005ef32Sjsg 	r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
1247f005ef32Sjsg 				   &fence);
12481bb76ff1Sjsg 	if (r)
12491bb76ff1Sjsg 		goto out_unlock;
12501bb76ff1Sjsg 	if (fence) {
12511bb76ff1Sjsg 		amdgpu_bo_fence(bo, fence, true);
12521bb76ff1Sjsg 		fence = NULL;
12531bb76ff1Sjsg 	}
12541bb76ff1Sjsg 
12551bb76ff1Sjsg 	r = amdgpu_vm_clear_freed(adev, vm, &fence);
12561bb76ff1Sjsg 	if (r || !fence)
12571bb76ff1Sjsg 		goto out_unlock;
12581bb76ff1Sjsg 
12591bb76ff1Sjsg 	dma_fence_wait(fence, false);
12601bb76ff1Sjsg 	amdgpu_bo_fence(bo, fence, true);
12611bb76ff1Sjsg 	dma_fence_put(fence);
12621bb76ff1Sjsg 
12631bb76ff1Sjsg out_unlock:
12641bb76ff1Sjsg 	if (unlikely(r < 0))
12651bb76ff1Sjsg 		dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
1266f005ef32Sjsg 	drm_exec_fini(&exec);
12671bb76ff1Sjsg 
12681bb76ff1Sjsg 	return r;
12691bb76ff1Sjsg }
12701bb76ff1Sjsg 
12711bb76ff1Sjsg static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
12721bb76ff1Sjsg 					  int pasid, int *gang_id,
12731bb76ff1Sjsg 					  int queue_type, int num_queue,
12741bb76ff1Sjsg 					  struct amdgpu_ring **added_rings,
12751bb76ff1Sjsg 					  struct amdgpu_mes_ctx_data *ctx_data)
12761bb76ff1Sjsg {
12771bb76ff1Sjsg 	struct amdgpu_ring *ring;
12781bb76ff1Sjsg 	struct amdgpu_mes_gang_properties gprops = {0};
12791bb76ff1Sjsg 	int r, j;
12801bb76ff1Sjsg 
12811bb76ff1Sjsg 	/* create a gang for the process */
12821bb76ff1Sjsg 	gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
12831bb76ff1Sjsg 	gprops.gang_quantum = adev->mes.default_gang_quantum;
12841bb76ff1Sjsg 	gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
12851bb76ff1Sjsg 	gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
12861bb76ff1Sjsg 	gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
12871bb76ff1Sjsg 
12881bb76ff1Sjsg 	r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id);
12891bb76ff1Sjsg 	if (r) {
12901bb76ff1Sjsg 		DRM_ERROR("failed to add gang\n");
12911bb76ff1Sjsg 		return r;
12921bb76ff1Sjsg 	}
12931bb76ff1Sjsg 
12941bb76ff1Sjsg 	/* create queues for the gang */
12951bb76ff1Sjsg 	for (j = 0; j < num_queue; j++) {
12961bb76ff1Sjsg 		r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j,
12971bb76ff1Sjsg 					ctx_data, &ring);
12981bb76ff1Sjsg 		if (r) {
12991bb76ff1Sjsg 			DRM_ERROR("failed to add ring\n");
13001bb76ff1Sjsg 			break;
13011bb76ff1Sjsg 		}
13021bb76ff1Sjsg 
13031bb76ff1Sjsg 		DRM_INFO("ring %s was added\n", ring->name);
13041bb76ff1Sjsg 		added_rings[j] = ring;
13051bb76ff1Sjsg 	}
13061bb76ff1Sjsg 
13071bb76ff1Sjsg 	return 0;
13081bb76ff1Sjsg }
13091bb76ff1Sjsg 
13101bb76ff1Sjsg static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings)
13111bb76ff1Sjsg {
13121bb76ff1Sjsg 	struct amdgpu_ring *ring;
13131bb76ff1Sjsg 	int i, r;
13141bb76ff1Sjsg 
13151bb76ff1Sjsg 	for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS; i++) {
13161bb76ff1Sjsg 		ring = added_rings[i];
13171bb76ff1Sjsg 		if (!ring)
13181bb76ff1Sjsg 			continue;
13191bb76ff1Sjsg 
1320f005ef32Sjsg 		r = amdgpu_ring_test_helper(ring);
1321f005ef32Sjsg 		if (r)
13221bb76ff1Sjsg 			return r;
13231bb76ff1Sjsg 
13241bb76ff1Sjsg 		r = amdgpu_ring_test_ib(ring, 1000 * 10);
13251bb76ff1Sjsg 		if (r) {
13261bb76ff1Sjsg 			DRM_DEV_ERROR(ring->adev->dev,
13271bb76ff1Sjsg 				      "ring %s ib test failed (%d)\n",
13281bb76ff1Sjsg 				      ring->name, r);
13291bb76ff1Sjsg 			return r;
13301bb76ff1Sjsg 		} else
13311bb76ff1Sjsg 			DRM_INFO("ring %s ib test pass\n", ring->name);
13321bb76ff1Sjsg 	}
13331bb76ff1Sjsg 
13341bb76ff1Sjsg 	return 0;
13351bb76ff1Sjsg }
13361bb76ff1Sjsg 
13371bb76ff1Sjsg int amdgpu_mes_self_test(struct amdgpu_device *adev)
13381bb76ff1Sjsg {
13391bb76ff1Sjsg 	struct amdgpu_vm *vm = NULL;
13401bb76ff1Sjsg 	struct amdgpu_mes_ctx_data ctx_data = {0};
13411bb76ff1Sjsg 	struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS] = { NULL };
13421bb76ff1Sjsg 	int gang_ids[3] = {0};
1343281cd0f4Sjsg 	int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 },
1344281cd0f4Sjsg 				 { AMDGPU_RING_TYPE_COMPUTE, 1 },
1345281cd0f4Sjsg 				 { AMDGPU_RING_TYPE_SDMA, 1} };
13461bb76ff1Sjsg 	int i, r, pasid, k = 0;
13471bb76ff1Sjsg 
13481bb76ff1Sjsg 	pasid = amdgpu_pasid_alloc(16);
13491bb76ff1Sjsg 	if (pasid < 0) {
13501bb76ff1Sjsg 		dev_warn(adev->dev, "No more PASIDs available!");
13511bb76ff1Sjsg 		pasid = 0;
13521bb76ff1Sjsg 	}
13531bb76ff1Sjsg 
13541bb76ff1Sjsg 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
13551bb76ff1Sjsg 	if (!vm) {
13561bb76ff1Sjsg 		r = -ENOMEM;
13571bb76ff1Sjsg 		goto error_pasid;
13581bb76ff1Sjsg 	}
13591bb76ff1Sjsg 
1360f005ef32Sjsg 	r = amdgpu_vm_init(adev, vm, -1);
13611bb76ff1Sjsg 	if (r) {
13621bb76ff1Sjsg 		DRM_ERROR("failed to initialize vm\n");
13631bb76ff1Sjsg 		goto error_pasid;
13641bb76ff1Sjsg 	}
13651bb76ff1Sjsg 
13661bb76ff1Sjsg 	r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
13671bb76ff1Sjsg 	if (r) {
13681bb76ff1Sjsg 		DRM_ERROR("failed to alloc ctx meta data\n");
13691bb76ff1Sjsg 		goto error_fini;
13701bb76ff1Sjsg 	}
13711bb76ff1Sjsg 
13721bb76ff1Sjsg 	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
13731bb76ff1Sjsg 	r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
13741bb76ff1Sjsg 	if (r) {
13751bb76ff1Sjsg 		DRM_ERROR("failed to map ctx meta data\n");
13761bb76ff1Sjsg 		goto error_vm;
13771bb76ff1Sjsg 	}
13781bb76ff1Sjsg 
13791bb76ff1Sjsg 	r = amdgpu_mes_create_process(adev, pasid, vm);
13801bb76ff1Sjsg 	if (r) {
13811bb76ff1Sjsg 		DRM_ERROR("failed to create MES process\n");
13821bb76ff1Sjsg 		goto error_vm;
13831bb76ff1Sjsg 	}
13841bb76ff1Sjsg 
13851bb76ff1Sjsg 	for (i = 0; i < ARRAY_SIZE(queue_types); i++) {
13861bb76ff1Sjsg 		/* On GFX v10.3, fw hasn't supported to map sdma queue. */
13871bb76ff1Sjsg 		if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) &&
13881bb76ff1Sjsg 		    adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) &&
13891bb76ff1Sjsg 		    queue_types[i][0] == AMDGPU_RING_TYPE_SDMA)
13901bb76ff1Sjsg 			continue;
13911bb76ff1Sjsg 
13921bb76ff1Sjsg 		r = amdgpu_mes_test_create_gang_and_queues(adev, pasid,
13931bb76ff1Sjsg 							   &gang_ids[i],
13941bb76ff1Sjsg 							   queue_types[i][0],
13951bb76ff1Sjsg 							   queue_types[i][1],
13961bb76ff1Sjsg 							   &added_rings[k],
13971bb76ff1Sjsg 							   &ctx_data);
13981bb76ff1Sjsg 		if (r)
13991bb76ff1Sjsg 			goto error_queues;
14001bb76ff1Sjsg 
14011bb76ff1Sjsg 		k += queue_types[i][1];
14021bb76ff1Sjsg 	}
14031bb76ff1Sjsg 
14041bb76ff1Sjsg 	/* start ring test and ib test for MES queues */
14051bb76ff1Sjsg 	amdgpu_mes_test_queues(added_rings);
14061bb76ff1Sjsg 
14071bb76ff1Sjsg error_queues:
14081bb76ff1Sjsg 	/* remove all queues */
14091bb76ff1Sjsg 	for (i = 0; i < ARRAY_SIZE(added_rings); i++) {
14101bb76ff1Sjsg 		if (!added_rings[i])
14111bb76ff1Sjsg 			continue;
14121bb76ff1Sjsg 		amdgpu_mes_remove_ring(adev, added_rings[i]);
14131bb76ff1Sjsg 	}
14141bb76ff1Sjsg 
14151bb76ff1Sjsg 	for (i = 0; i < ARRAY_SIZE(gang_ids); i++) {
14161bb76ff1Sjsg 		if (!gang_ids[i])
14171bb76ff1Sjsg 			continue;
14181bb76ff1Sjsg 		amdgpu_mes_remove_gang(adev, gang_ids[i]);
14191bb76ff1Sjsg 	}
14201bb76ff1Sjsg 
14211bb76ff1Sjsg 	amdgpu_mes_destroy_process(adev, pasid);
14221bb76ff1Sjsg 
14231bb76ff1Sjsg error_vm:
14241bb76ff1Sjsg 	amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
14251bb76ff1Sjsg 
14261bb76ff1Sjsg error_fini:
14271bb76ff1Sjsg 	amdgpu_vm_fini(adev, vm);
14281bb76ff1Sjsg 
14291bb76ff1Sjsg error_pasid:
14301bb76ff1Sjsg 	if (pasid)
14311bb76ff1Sjsg 		amdgpu_pasid_free(pasid);
14321bb76ff1Sjsg 
14331bb76ff1Sjsg 	amdgpu_mes_ctx_free_meta_data(&ctx_data);
14341bb76ff1Sjsg 	kfree(vm);
14351bb76ff1Sjsg 	return 0;
14361bb76ff1Sjsg }
14373c4b8cfaSjsg 
14383c4b8cfaSjsg int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
14393c4b8cfaSjsg {
14403c4b8cfaSjsg 	const struct mes_firmware_header_v1_0 *mes_hdr;
14413c4b8cfaSjsg 	struct amdgpu_firmware_info *info;
14423c4b8cfaSjsg 	char ucode_prefix[30];
14433c4b8cfaSjsg 	char fw_name[40];
1444428c00d9Sjsg 	bool need_retry = false;
14453c4b8cfaSjsg 	int r;
14463c4b8cfaSjsg 
1447428c00d9Sjsg 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
1448428c00d9Sjsg 				       sizeof(ucode_prefix));
1449428c00d9Sjsg 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
1450428c00d9Sjsg 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
1451428c00d9Sjsg 			 ucode_prefix,
1452428c00d9Sjsg 			 pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");
1453428c00d9Sjsg 		need_retry = true;
1454428c00d9Sjsg 	} else {
14553c4b8cfaSjsg 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
14563c4b8cfaSjsg 			 ucode_prefix,
14573c4b8cfaSjsg 			 pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1");
1458428c00d9Sjsg 	}
1459428c00d9Sjsg 
1460981d833cSjsg 	r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name);
1461428c00d9Sjsg 	if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) {
1462428c00d9Sjsg 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin",
1463428c00d9Sjsg 			 ucode_prefix);
1464428c00d9Sjsg 		DRM_INFO("try to fall back to %s\n", fw_name);
1465428c00d9Sjsg 		r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe],
1466428c00d9Sjsg 					 fw_name);
1467428c00d9Sjsg 	}
1468428c00d9Sjsg 
14693c4b8cfaSjsg 	if (r)
14703c4b8cfaSjsg 		goto out;
14713c4b8cfaSjsg 
14723c4b8cfaSjsg 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
14733c4b8cfaSjsg 		adev->mes.fw[pipe]->data;
14743c4b8cfaSjsg 	adev->mes.uc_start_addr[pipe] =
14753c4b8cfaSjsg 		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
14763c4b8cfaSjsg 		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
14773c4b8cfaSjsg 	adev->mes.data_start_addr[pipe] =
14783c4b8cfaSjsg 		le32_to_cpu(mes_hdr->mes_data_start_addr_lo) |
14793c4b8cfaSjsg 		((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32);
14803c4b8cfaSjsg 
14813c4b8cfaSjsg 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
14823c4b8cfaSjsg 		int ucode, ucode_data;
14833c4b8cfaSjsg 
14843c4b8cfaSjsg 		if (pipe == AMDGPU_MES_SCHED_PIPE) {
14853c4b8cfaSjsg 			ucode = AMDGPU_UCODE_ID_CP_MES;
14863c4b8cfaSjsg 			ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA;
14873c4b8cfaSjsg 		} else {
14883c4b8cfaSjsg 			ucode = AMDGPU_UCODE_ID_CP_MES1;
14893c4b8cfaSjsg 			ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA;
14903c4b8cfaSjsg 		}
14913c4b8cfaSjsg 
14923c4b8cfaSjsg 		info = &adev->firmware.ucode[ucode];
14933c4b8cfaSjsg 		info->ucode_id = ucode;
14943c4b8cfaSjsg 		info->fw = adev->mes.fw[pipe];
14953c4b8cfaSjsg 		adev->firmware.fw_size +=
1496f005ef32Sjsg 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),
14973c4b8cfaSjsg 			      PAGE_SIZE);
14983c4b8cfaSjsg 
14993c4b8cfaSjsg 		info = &adev->firmware.ucode[ucode_data];
15003c4b8cfaSjsg 		info->ucode_id = ucode_data;
15013c4b8cfaSjsg 		info->fw = adev->mes.fw[pipe];
15023c4b8cfaSjsg 		adev->firmware.fw_size +=
1503f005ef32Sjsg 			ALIGN(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),
15043c4b8cfaSjsg 			      PAGE_SIZE);
15053c4b8cfaSjsg 	}
15063c4b8cfaSjsg 
15073c4b8cfaSjsg 	return 0;
15083c4b8cfaSjsg out:
1509981d833cSjsg 	amdgpu_ucode_release(&adev->mes.fw[pipe]);
15103c4b8cfaSjsg 	return r;
15113c4b8cfaSjsg }
1512