xref: /openbsd-src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.h (revision f074e99dd34d6f7ac12551ffef94a2c762278207)
1c349dbc7Sjsg /*
2c349dbc7Sjsg  * Copyright 2019 Advanced Micro Devices, Inc.
3c349dbc7Sjsg  *
4c349dbc7Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5c349dbc7Sjsg  * copy of this software and associated documentation files (the "Software"),
6c349dbc7Sjsg  * to deal in the Software without restriction, including without limitation
7c349dbc7Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8c349dbc7Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9c349dbc7Sjsg  * Software is furnished to do so, subject to the following conditions:
10c349dbc7Sjsg  *
11c349dbc7Sjsg  * The above copyright notice and this permission notice shall be included in
12c349dbc7Sjsg  * all copies or substantial portions of the Software.
13c349dbc7Sjsg  *
14c349dbc7Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15c349dbc7Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16c349dbc7Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17c349dbc7Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18c349dbc7Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19c349dbc7Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20c349dbc7Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21c349dbc7Sjsg  *
22c349dbc7Sjsg  */
23c349dbc7Sjsg 
24c349dbc7Sjsg #ifndef __AMDGPU_MES_H__
25c349dbc7Sjsg #define __AMDGPU_MES_H__
26c349dbc7Sjsg 
271bb76ff1Sjsg #include "amdgpu_irq.h"
281bb76ff1Sjsg #include "kgd_kfd_interface.h"
291bb76ff1Sjsg #include "amdgpu_gfx.h"
30f005ef32Sjsg #include "amdgpu_doorbell.h"
311bb76ff1Sjsg #include <linux/sched/mm.h>
321bb76ff1Sjsg 
33ad8b1aafSjsg #define AMDGPU_MES_MAX_COMPUTE_PIPES        8
34ad8b1aafSjsg #define AMDGPU_MES_MAX_GFX_PIPES            2
35ad8b1aafSjsg #define AMDGPU_MES_MAX_SDMA_PIPES           2
36ad8b1aafSjsg 
371bb76ff1Sjsg #define AMDGPU_MES_API_VERSION_SHIFT	12
381bb76ff1Sjsg #define AMDGPU_MES_FEAT_VERSION_SHIFT	24
391bb76ff1Sjsg 
401bb76ff1Sjsg #define AMDGPU_MES_VERSION_MASK		0x00000fff
411bb76ff1Sjsg #define AMDGPU_MES_API_VERSION_MASK	0x00fff000
421bb76ff1Sjsg #define AMDGPU_MES_FEAT_VERSION_MASK	0xff000000
431bb76ff1Sjsg 
44ad8b1aafSjsg enum amdgpu_mes_priority_level {
45ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_LEVEL_LOW       = 0,
46ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_LEVEL_NORMAL    = 1,
47ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_LEVEL_MEDIUM    = 2,
48ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_LEVEL_HIGH      = 3,
49ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_LEVEL_REALTIME  = 4,
50ad8b1aafSjsg 	AMDGPU_MES_PRIORITY_NUM_LEVELS
51ad8b1aafSjsg };
52ad8b1aafSjsg 
531bb76ff1Sjsg #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
541bb76ff1Sjsg #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
551bb76ff1Sjsg 
56c349dbc7Sjsg struct amdgpu_mes_funcs;
57c349dbc7Sjsg 
581bb76ff1Sjsg enum admgpu_mes_pipe {
591bb76ff1Sjsg 	AMDGPU_MES_SCHED_PIPE = 0,
601bb76ff1Sjsg 	AMDGPU_MES_KIQ_PIPE,
611bb76ff1Sjsg 	AMDGPU_MAX_MES_PIPES = 2,
621bb76ff1Sjsg };
631bb76ff1Sjsg 
64c349dbc7Sjsg struct amdgpu_mes {
65ad8b1aafSjsg 	struct amdgpu_device            *adev;
66ad8b1aafSjsg 
671bb76ff1Sjsg 	struct rwlock			mutex_hidden;
681bb76ff1Sjsg 
691bb76ff1Sjsg 	struct idr                      pasid_idr;
701bb76ff1Sjsg 	struct idr                      gang_id_idr;
711bb76ff1Sjsg 	struct idr                      queue_id_idr;
721bb76ff1Sjsg 	struct ida                      doorbell_ida;
731bb76ff1Sjsg 
741bb76ff1Sjsg 	spinlock_t                      queue_id_lock;
751bb76ff1Sjsg 
761bb76ff1Sjsg 	uint32_t			sched_version;
771bb76ff1Sjsg 	uint32_t			kiq_version;
781bb76ff1Sjsg 
79ad8b1aafSjsg 	uint32_t                        total_max_queue;
80ad8b1aafSjsg 	uint32_t                        max_doorbell_slices;
81ad8b1aafSjsg 
82ad8b1aafSjsg 	uint64_t                        default_process_quantum;
83ad8b1aafSjsg 	uint64_t                        default_gang_quantum;
84ad8b1aafSjsg 
85ad8b1aafSjsg 	struct amdgpu_ring              ring;
861bb76ff1Sjsg 	spinlock_t                      ring_lock;
87c349dbc7Sjsg 
881bb76ff1Sjsg 	const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
89c349dbc7Sjsg 
90c349dbc7Sjsg 	/* mes ucode */
911bb76ff1Sjsg 	struct amdgpu_bo		*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
921bb76ff1Sjsg 	uint64_t			ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
931bb76ff1Sjsg 	uint32_t			*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
941bb76ff1Sjsg 	uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
95c349dbc7Sjsg 
96c349dbc7Sjsg 	/* mes ucode data */
971bb76ff1Sjsg 	struct amdgpu_bo		*data_fw_obj[AMDGPU_MAX_MES_PIPES];
981bb76ff1Sjsg 	uint64_t			data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
991bb76ff1Sjsg 	uint32_t			*data_fw_ptr[AMDGPU_MAX_MES_PIPES];
1001bb76ff1Sjsg 	uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
101c349dbc7Sjsg 
102ad8b1aafSjsg 	/* eop gpu obj */
1031bb76ff1Sjsg 	struct amdgpu_bo		*eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
1041bb76ff1Sjsg 	uint64_t                        eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
105ad8b1aafSjsg 
1061bb76ff1Sjsg 	void                            *mqd_backup[AMDGPU_MAX_MES_PIPES];
1071bb76ff1Sjsg 	struct amdgpu_irq_src	        irq[AMDGPU_MAX_MES_PIPES];
108ad8b1aafSjsg 
109ad8b1aafSjsg 	uint32_t                        vmid_mask_gfxhub;
110ad8b1aafSjsg 	uint32_t                        vmid_mask_mmhub;
111ad8b1aafSjsg 	uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
112ad8b1aafSjsg 	uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
113ad8b1aafSjsg 	uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
1141bb76ff1Sjsg 	uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
115ad8b1aafSjsg 	uint32_t                        sch_ctx_offs;
116ad8b1aafSjsg 	uint64_t			sch_ctx_gpu_addr;
117ad8b1aafSjsg 	uint64_t			*sch_ctx_ptr;
118ad8b1aafSjsg 	uint32_t			query_status_fence_offs;
119ad8b1aafSjsg 	uint64_t			query_status_fence_gpu_addr;
120ad8b1aafSjsg 	uint64_t			*query_status_fence_ptr;
1211bb76ff1Sjsg 	uint32_t                        read_val_offs;
1221bb76ff1Sjsg 	uint64_t			read_val_gpu_addr;
1231bb76ff1Sjsg 	uint32_t			*read_val_ptr;
1241bb76ff1Sjsg 
1251bb76ff1Sjsg #ifdef notyet
1261bb76ff1Sjsg 	uint32_t			saved_flags;
1271bb76ff1Sjsg #endif
1281bb76ff1Sjsg 
1291bb76ff1Sjsg 	/* initialize kiq pipe */
1301bb76ff1Sjsg 	int                             (*kiq_hw_init)(struct amdgpu_device *adev);
1311bb76ff1Sjsg 	int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
132ad8b1aafSjsg 
133f005ef32Sjsg 	/* MES doorbells */
134f005ef32Sjsg 	uint32_t			db_start_dw_offset;
135f005ef32Sjsg 	uint32_t			num_mes_dbs;
136f005ef32Sjsg 	unsigned long			*doorbell_bitmap;
137f005ef32Sjsg 
138c349dbc7Sjsg 	/* ip specific functions */
139ad8b1aafSjsg 	const struct amdgpu_mes_funcs   *funcs;
140c349dbc7Sjsg };
141c349dbc7Sjsg 
1421bb76ff1Sjsg struct amdgpu_mes_process {
1431bb76ff1Sjsg 	int			pasid;
1441bb76ff1Sjsg 	struct			amdgpu_vm *vm;
1451bb76ff1Sjsg 	uint64_t		pd_gpu_addr;
1461bb76ff1Sjsg 	struct amdgpu_bo 	*proc_ctx_bo;
1471bb76ff1Sjsg 	uint64_t 		proc_ctx_gpu_addr;
1481bb76ff1Sjsg 	void 			*proc_ctx_cpu_ptr;
1491bb76ff1Sjsg 	uint64_t 		process_quantum;
1501bb76ff1Sjsg 	struct 			list_head gang_list;
1511bb76ff1Sjsg 	uint32_t 		doorbell_index;
1521bb76ff1Sjsg 	struct mutex		doorbell_lock;
1531bb76ff1Sjsg };
1541bb76ff1Sjsg 
1551bb76ff1Sjsg struct amdgpu_mes_gang {
1561bb76ff1Sjsg 	int 				gang_id;
1571bb76ff1Sjsg 	int 				priority;
1581bb76ff1Sjsg 	int 				inprocess_gang_priority;
1591bb76ff1Sjsg 	int 				global_priority_level;
1601bb76ff1Sjsg 	struct list_head 		list;
1611bb76ff1Sjsg 	struct amdgpu_mes_process 	*process;
1621bb76ff1Sjsg 	struct amdgpu_bo 		*gang_ctx_bo;
1631bb76ff1Sjsg 	uint64_t 			gang_ctx_gpu_addr;
1641bb76ff1Sjsg 	void 				*gang_ctx_cpu_ptr;
1651bb76ff1Sjsg 	uint64_t 			gang_quantum;
1661bb76ff1Sjsg 	struct list_head 		queue_list;
1671bb76ff1Sjsg };
1681bb76ff1Sjsg 
1691bb76ff1Sjsg struct amdgpu_mes_queue {
1701bb76ff1Sjsg 	struct list_head 		list;
1711bb76ff1Sjsg 	struct amdgpu_mes_gang 		*gang;
1721bb76ff1Sjsg 	int 				queue_id;
1731bb76ff1Sjsg 	uint64_t 			doorbell_off;
1741bb76ff1Sjsg 	struct amdgpu_bo		*mqd_obj;
1751bb76ff1Sjsg 	void				*mqd_cpu_ptr;
1761bb76ff1Sjsg 	uint64_t 			mqd_gpu_addr;
1771bb76ff1Sjsg 	uint64_t 			wptr_gpu_addr;
1781bb76ff1Sjsg 	int 				queue_type;
1791bb76ff1Sjsg 	int 				paging;
1801bb76ff1Sjsg 	struct amdgpu_ring 		*ring;
1811bb76ff1Sjsg };
1821bb76ff1Sjsg 
1831bb76ff1Sjsg struct amdgpu_mes_queue_properties {
1841bb76ff1Sjsg 	int 			queue_type;
1851bb76ff1Sjsg 	uint64_t                hqd_base_gpu_addr;
1861bb76ff1Sjsg 	uint64_t                rptr_gpu_addr;
1871bb76ff1Sjsg 	uint64_t                wptr_gpu_addr;
1881bb76ff1Sjsg 	uint64_t                wptr_mc_addr;
1891bb76ff1Sjsg 	uint32_t                queue_size;
1901bb76ff1Sjsg 	uint64_t                eop_gpu_addr;
1911bb76ff1Sjsg 	uint32_t                hqd_pipe_priority;
1921bb76ff1Sjsg 	uint32_t                hqd_queue_priority;
1931bb76ff1Sjsg 	bool 			paging;
1941bb76ff1Sjsg 	struct amdgpu_ring 	*ring;
1951bb76ff1Sjsg 	/* out */
1961bb76ff1Sjsg 	uint64_t       		doorbell_off;
1971bb76ff1Sjsg };
1981bb76ff1Sjsg 
1991bb76ff1Sjsg struct amdgpu_mes_gang_properties {
2001bb76ff1Sjsg 	uint32_t 	priority;
2011bb76ff1Sjsg 	uint32_t 	gang_quantum;
2021bb76ff1Sjsg 	uint32_t 	inprocess_gang_priority;
2031bb76ff1Sjsg 	uint32_t 	priority_level;
2041bb76ff1Sjsg 	int 		global_priority_level;
2051bb76ff1Sjsg };
2061bb76ff1Sjsg 
207c349dbc7Sjsg struct mes_add_queue_input {
208c349dbc7Sjsg 	uint32_t	process_id;
209c349dbc7Sjsg 	uint64_t	page_table_base_addr;
210c349dbc7Sjsg 	uint64_t	process_va_start;
211c349dbc7Sjsg 	uint64_t	process_va_end;
212c349dbc7Sjsg 	uint64_t	process_quantum;
213c349dbc7Sjsg 	uint64_t	process_context_addr;
214c349dbc7Sjsg 	uint64_t	gang_quantum;
215c349dbc7Sjsg 	uint64_t	gang_context_addr;
216c349dbc7Sjsg 	uint32_t	inprocess_gang_priority;
217c349dbc7Sjsg 	uint32_t	gang_global_priority_level;
218c349dbc7Sjsg 	uint32_t	doorbell_offset;
219c349dbc7Sjsg 	uint64_t	mqd_addr;
220c349dbc7Sjsg 	uint64_t	wptr_addr;
2211bb76ff1Sjsg 	uint64_t	wptr_mc_addr;
222c349dbc7Sjsg 	uint32_t	queue_type;
223c349dbc7Sjsg 	uint32_t	paging;
2241bb76ff1Sjsg 	uint32_t        gws_base;
2251bb76ff1Sjsg 	uint32_t        gws_size;
2261bb76ff1Sjsg 	uint64_t	tba_addr;
2271bb76ff1Sjsg 	uint64_t	tma_addr;
228f005ef32Sjsg 	uint32_t	trap_en;
229f005ef32Sjsg 	uint32_t	skip_process_ctx_clear;
2301bb76ff1Sjsg 	uint32_t	is_kfd_process;
2311bb76ff1Sjsg 	uint32_t	is_aql_queue;
2321bb76ff1Sjsg 	uint32_t	queue_size;
233f005ef32Sjsg 	uint32_t	exclusively_scheduled;
234c349dbc7Sjsg };
235c349dbc7Sjsg 
236c349dbc7Sjsg struct mes_remove_queue_input {
237c349dbc7Sjsg 	uint32_t	doorbell_offset;
238c349dbc7Sjsg 	uint64_t	gang_context_addr;
239c349dbc7Sjsg };
240c349dbc7Sjsg 
2411bb76ff1Sjsg struct mes_unmap_legacy_queue_input {
2421bb76ff1Sjsg 	enum amdgpu_unmap_queues_action    action;
2431bb76ff1Sjsg 	uint32_t                           queue_type;
2441bb76ff1Sjsg 	uint32_t                           doorbell_offset;
2451bb76ff1Sjsg 	uint32_t                           pipe_id;
2461bb76ff1Sjsg 	uint32_t                           queue_id;
2471bb76ff1Sjsg 	uint64_t                           trail_fence_addr;
2481bb76ff1Sjsg 	uint64_t                           trail_fence_data;
2491bb76ff1Sjsg };
2501bb76ff1Sjsg 
251c349dbc7Sjsg struct mes_suspend_gang_input {
252c349dbc7Sjsg 	bool		suspend_all_gangs;
253c349dbc7Sjsg 	uint64_t	gang_context_addr;
254c349dbc7Sjsg 	uint64_t	suspend_fence_addr;
255c349dbc7Sjsg 	uint32_t	suspend_fence_value;
256c349dbc7Sjsg };
257c349dbc7Sjsg 
258c349dbc7Sjsg struct mes_resume_gang_input {
259c349dbc7Sjsg 	bool		resume_all_gangs;
260c349dbc7Sjsg 	uint64_t	gang_context_addr;
261c349dbc7Sjsg };
262c349dbc7Sjsg 
2631bb76ff1Sjsg enum mes_misc_opcode {
2641bb76ff1Sjsg 	MES_MISC_OP_WRITE_REG,
2651bb76ff1Sjsg 	MES_MISC_OP_READ_REG,
2661bb76ff1Sjsg 	MES_MISC_OP_WRM_REG_WAIT,
2671bb76ff1Sjsg 	MES_MISC_OP_WRM_REG_WR_WAIT,
268f005ef32Sjsg 	MES_MISC_OP_SET_SHADER_DEBUGGER,
2691bb76ff1Sjsg };
2701bb76ff1Sjsg 
2711bb76ff1Sjsg struct mes_misc_op_input {
2721bb76ff1Sjsg 	enum mes_misc_opcode op;
2731bb76ff1Sjsg 
2741bb76ff1Sjsg 	union {
2751bb76ff1Sjsg 		struct {
2761bb76ff1Sjsg 			uint32_t                  reg_offset;
2771bb76ff1Sjsg 			uint64_t                  buffer_addr;
2781bb76ff1Sjsg 		} read_reg;
2791bb76ff1Sjsg 
2801bb76ff1Sjsg 		struct {
2811bb76ff1Sjsg 			uint32_t                  reg_offset;
2821bb76ff1Sjsg 			uint32_t                  reg_value;
2831bb76ff1Sjsg 		} write_reg;
2841bb76ff1Sjsg 
2851bb76ff1Sjsg 		struct {
2861bb76ff1Sjsg 			uint32_t                   ref;
2871bb76ff1Sjsg 			uint32_t                   mask;
2881bb76ff1Sjsg 			uint32_t                   reg0;
2891bb76ff1Sjsg 			uint32_t                   reg1;
2901bb76ff1Sjsg 		} wrm_reg;
291f005ef32Sjsg 
292f005ef32Sjsg 		struct {
293f005ef32Sjsg 			uint64_t process_context_addr;
294f005ef32Sjsg 			union {
295f005ef32Sjsg 				struct {
296*f074e99dSjsg 					uint32_t single_memop : 1;
297*f074e99dSjsg 					uint32_t single_alu_op : 1;
298*f074e99dSjsg 					uint32_t reserved: 29;
299*f074e99dSjsg 					uint32_t process_ctx_flush: 1;
300f005ef32Sjsg 				};
301f005ef32Sjsg 				uint32_t u32all;
302f005ef32Sjsg 			} flags;
303f005ef32Sjsg 			uint32_t spi_gdbg_per_vmid_cntl;
304f005ef32Sjsg 			uint32_t tcp_watch_cntl[4];
305f005ef32Sjsg 			uint32_t trap_en;
306f005ef32Sjsg 		} set_shader_debugger;
3071bb76ff1Sjsg 	};
3081bb76ff1Sjsg };
3091bb76ff1Sjsg 
310c349dbc7Sjsg struct amdgpu_mes_funcs {
311c349dbc7Sjsg 	int (*add_hw_queue)(struct amdgpu_mes *mes,
312c349dbc7Sjsg 			    struct mes_add_queue_input *input);
313c349dbc7Sjsg 
314c349dbc7Sjsg 	int (*remove_hw_queue)(struct amdgpu_mes *mes,
315c349dbc7Sjsg 			       struct mes_remove_queue_input *input);
316c349dbc7Sjsg 
3171bb76ff1Sjsg 	int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
3181bb76ff1Sjsg 				  struct mes_unmap_legacy_queue_input *input);
3191bb76ff1Sjsg 
320c349dbc7Sjsg 	int (*suspend_gang)(struct amdgpu_mes *mes,
321c349dbc7Sjsg 			    struct mes_suspend_gang_input *input);
322c349dbc7Sjsg 
323c349dbc7Sjsg 	int (*resume_gang)(struct amdgpu_mes *mes,
324c349dbc7Sjsg 			   struct mes_resume_gang_input *input);
3251bb76ff1Sjsg 
3261bb76ff1Sjsg 	int (*misc_op)(struct amdgpu_mes *mes,
3271bb76ff1Sjsg 		       struct mes_misc_op_input *input);
328c349dbc7Sjsg };
329c349dbc7Sjsg 
3301bb76ff1Sjsg #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
3311bb76ff1Sjsg #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
3321bb76ff1Sjsg 
3331bb76ff1Sjsg int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
3341bb76ff1Sjsg 
3353c4b8cfaSjsg int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
3361bb76ff1Sjsg int amdgpu_mes_init(struct amdgpu_device *adev);
3371bb76ff1Sjsg void amdgpu_mes_fini(struct amdgpu_device *adev);
3381bb76ff1Sjsg 
3391bb76ff1Sjsg int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
3401bb76ff1Sjsg 			      struct amdgpu_vm *vm);
3411bb76ff1Sjsg void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
3421bb76ff1Sjsg 
3431bb76ff1Sjsg int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
3441bb76ff1Sjsg 			struct amdgpu_mes_gang_properties *gprops,
3451bb76ff1Sjsg 			int *gang_id);
3461bb76ff1Sjsg int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
3471bb76ff1Sjsg 
3481bb76ff1Sjsg int amdgpu_mes_suspend(struct amdgpu_device *adev);
3491bb76ff1Sjsg int amdgpu_mes_resume(struct amdgpu_device *adev);
3501bb76ff1Sjsg 
3511bb76ff1Sjsg int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
3521bb76ff1Sjsg 			    struct amdgpu_mes_queue_properties *qprops,
3531bb76ff1Sjsg 			    int *queue_id);
3541bb76ff1Sjsg int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
3551bb76ff1Sjsg 
3561bb76ff1Sjsg int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
3571bb76ff1Sjsg 				  struct amdgpu_ring *ring,
3581bb76ff1Sjsg 				  enum amdgpu_unmap_queues_action action,
3591bb76ff1Sjsg 				  u64 gpu_addr, u64 seq);
3601bb76ff1Sjsg 
3611bb76ff1Sjsg uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
3621bb76ff1Sjsg int amdgpu_mes_wreg(struct amdgpu_device *adev,
3631bb76ff1Sjsg 		    uint32_t reg, uint32_t val);
3641bb76ff1Sjsg int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
3651bb76ff1Sjsg 			uint32_t val, uint32_t mask);
3661bb76ff1Sjsg int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
3671bb76ff1Sjsg 				  uint32_t reg0, uint32_t reg1,
3681bb76ff1Sjsg 				  uint32_t ref, uint32_t mask);
369f005ef32Sjsg int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
370f005ef32Sjsg 				uint64_t process_context_addr,
371f005ef32Sjsg 				uint32_t spi_gdbg_per_vmid_cntl,
372f005ef32Sjsg 				const uint32_t *tcp_watch_cntl,
373f005ef32Sjsg 				uint32_t flags,
374f005ef32Sjsg 				bool trap_en);
375*f074e99dSjsg int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
376*f074e99dSjsg 				uint64_t process_context_addr);
3771bb76ff1Sjsg int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
3781bb76ff1Sjsg 			int queue_type, int idx,
3791bb76ff1Sjsg 			struct amdgpu_mes_ctx_data *ctx_data,
3801bb76ff1Sjsg 			struct amdgpu_ring **out);
3811bb76ff1Sjsg void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
3821bb76ff1Sjsg 			    struct amdgpu_ring *ring);
3831bb76ff1Sjsg 
3841bb76ff1Sjsg uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
3851bb76ff1Sjsg 						   enum amdgpu_mes_priority_level prio);
3861bb76ff1Sjsg 
3871bb76ff1Sjsg int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
3881bb76ff1Sjsg 				   struct amdgpu_mes_ctx_data *ctx_data);
3891bb76ff1Sjsg void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
3901bb76ff1Sjsg int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
3911bb76ff1Sjsg 				 struct amdgpu_vm *vm,
3921bb76ff1Sjsg 				 struct amdgpu_mes_ctx_data *ctx_data);
3931bb76ff1Sjsg int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
3941bb76ff1Sjsg 				   struct amdgpu_mes_ctx_data *ctx_data);
3951bb76ff1Sjsg 
3961bb76ff1Sjsg int amdgpu_mes_self_test(struct amdgpu_device *adev);
3971bb76ff1Sjsg 
3981bb76ff1Sjsg int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
3991bb76ff1Sjsg 
4001bb76ff1Sjsg /*
4011bb76ff1Sjsg  * MES lock can be taken in MMU notifiers.
4021bb76ff1Sjsg  *
4031bb76ff1Sjsg  * A bit more detail about why to set no-FS reclaim with MES lock:
4041bb76ff1Sjsg  *
4051bb76ff1Sjsg  * The purpose of the MMU notifier is to stop GPU access to memory so
4061bb76ff1Sjsg  * that the Linux VM subsystem can move pages around safely. This is
4071bb76ff1Sjsg  * done by preempting user mode queues for the affected process. When
4081bb76ff1Sjsg  * MES is used, MES lock needs to be taken to preempt the queues.
4091bb76ff1Sjsg  *
4101bb76ff1Sjsg  * The MMU notifier callback entry point in the driver is
4111bb76ff1Sjsg  * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
4121bb76ff1Sjsg  * there is:
4131bb76ff1Sjsg  * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
4141bb76ff1Sjsg  * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
4151bb76ff1Sjsg  *
4161bb76ff1Sjsg  * The last part of the chain is a function pointer where we take the
4171bb76ff1Sjsg  * MES lock.
4181bb76ff1Sjsg  *
4191bb76ff1Sjsg  * The problem with taking locks in the MMU notifier is, that MMU
4201bb76ff1Sjsg  * notifiers can be called in reclaim-FS context. That's where the
4211bb76ff1Sjsg  * kernel frees up pages to make room for new page allocations under
4221bb76ff1Sjsg  * memory pressure. While we are running in reclaim-FS context, we must
4231bb76ff1Sjsg  * not trigger another memory reclaim operation because that would
4241bb76ff1Sjsg  * recursively reenter the reclaim code and cause a deadlock. The
4251bb76ff1Sjsg  * memalloc_nofs_save/restore calls guarantee that.
4261bb76ff1Sjsg  *
4271bb76ff1Sjsg  * In addition we also need to avoid lock dependencies on other locks taken
4281bb76ff1Sjsg  * under the MES lock, for example reservation locks. Here is a possible
4291bb76ff1Sjsg  * scenario of a deadlock:
4301bb76ff1Sjsg  * Thread A: takes and holds reservation lock | triggers reclaim-FS |
4311bb76ff1Sjsg  * MMU notifier | blocks trying to take MES lock
4321bb76ff1Sjsg  * Thread B: takes and holds MES lock | blocks trying to take reservation lock
4331bb76ff1Sjsg  *
4341bb76ff1Sjsg  * In this scenario Thread B gets involved in a deadlock even without
4351bb76ff1Sjsg  * triggering a reclaim-FS operation itself.
4361bb76ff1Sjsg  * To fix this and break the lock dependency chain you'd need to either:
4371bb76ff1Sjsg  * 1. protect reservation locks with memalloc_nofs_save/restore, or
4381bb76ff1Sjsg  * 2. avoid taking reservation locks under the MES lock.
4391bb76ff1Sjsg  *
4401bb76ff1Sjsg  * Reservation locks are taken all over the kernel in different subsystems, we
4411bb76ff1Sjsg  * have no control over them and their lock dependencies.So the only workable
4421bb76ff1Sjsg  * solution is to avoid taking other locks under the MES lock.
4431bb76ff1Sjsg  * As a result, make sure no reclaim-FS happens while holding this lock anywhere
4441bb76ff1Sjsg  * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
4451bb76ff1Sjsg  */
amdgpu_mes_lock(struct amdgpu_mes * mes)4461bb76ff1Sjsg static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
4471bb76ff1Sjsg {
4481bb76ff1Sjsg 	mutex_lock(&mes->mutex_hidden);
4491bb76ff1Sjsg #ifdef notyet
4501bb76ff1Sjsg 	mes->saved_flags = memalloc_noreclaim_save();
4511bb76ff1Sjsg #endif
4521bb76ff1Sjsg }
4531bb76ff1Sjsg 
amdgpu_mes_unlock(struct amdgpu_mes * mes)4541bb76ff1Sjsg static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
4551bb76ff1Sjsg {
4561bb76ff1Sjsg #ifdef notyet
4571bb76ff1Sjsg 	memalloc_noreclaim_restore(mes->saved_flags);
4581bb76ff1Sjsg #endif
4591bb76ff1Sjsg 	mutex_unlock(&mes->mutex_hidden);
4601bb76ff1Sjsg }
461c349dbc7Sjsg #endif /* __AMDGPU_MES_H__ */
462