1c349dbc7Sjsg /*
2c349dbc7Sjsg * Copyright 2019 Advanced Micro Devices, Inc.
3c349dbc7Sjsg *
4c349dbc7Sjsg * Permission is hereby granted, free of charge, to any person obtaining a
5c349dbc7Sjsg * copy of this software and associated documentation files (the "Software"),
6c349dbc7Sjsg * to deal in the Software without restriction, including without limitation
7c349dbc7Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8c349dbc7Sjsg * and/or sell copies of the Software, and to permit persons to whom the
9c349dbc7Sjsg * Software is furnished to do so, subject to the following conditions:
10c349dbc7Sjsg *
11c349dbc7Sjsg * The above copyright notice and this permission notice shall be included in
12c349dbc7Sjsg * all copies or substantial portions of the Software.
13c349dbc7Sjsg *
14c349dbc7Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15c349dbc7Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16c349dbc7Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17c349dbc7Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18c349dbc7Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19c349dbc7Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20c349dbc7Sjsg * OTHER DEALINGS IN THE SOFTWARE.
21c349dbc7Sjsg *
22c349dbc7Sjsg */
23c349dbc7Sjsg
24c349dbc7Sjsg #ifndef __AMDGPU_MES_H__
25c349dbc7Sjsg #define __AMDGPU_MES_H__
26c349dbc7Sjsg
271bb76ff1Sjsg #include "amdgpu_irq.h"
281bb76ff1Sjsg #include "kgd_kfd_interface.h"
291bb76ff1Sjsg #include "amdgpu_gfx.h"
30f005ef32Sjsg #include "amdgpu_doorbell.h"
311bb76ff1Sjsg #include <linux/sched/mm.h>
321bb76ff1Sjsg
33ad8b1aafSjsg #define AMDGPU_MES_MAX_COMPUTE_PIPES 8
34ad8b1aafSjsg #define AMDGPU_MES_MAX_GFX_PIPES 2
35ad8b1aafSjsg #define AMDGPU_MES_MAX_SDMA_PIPES 2
36ad8b1aafSjsg
371bb76ff1Sjsg #define AMDGPU_MES_API_VERSION_SHIFT 12
381bb76ff1Sjsg #define AMDGPU_MES_FEAT_VERSION_SHIFT 24
391bb76ff1Sjsg
401bb76ff1Sjsg #define AMDGPU_MES_VERSION_MASK 0x00000fff
411bb76ff1Sjsg #define AMDGPU_MES_API_VERSION_MASK 0x00fff000
421bb76ff1Sjsg #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
431bb76ff1Sjsg
44ad8b1aafSjsg enum amdgpu_mes_priority_level {
45ad8b1aafSjsg AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
46ad8b1aafSjsg AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
47ad8b1aafSjsg AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
48ad8b1aafSjsg AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
49ad8b1aafSjsg AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
50ad8b1aafSjsg AMDGPU_MES_PRIORITY_NUM_LEVELS
51ad8b1aafSjsg };
52ad8b1aafSjsg
531bb76ff1Sjsg #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
541bb76ff1Sjsg #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
551bb76ff1Sjsg
56c349dbc7Sjsg struct amdgpu_mes_funcs;
57c349dbc7Sjsg
581bb76ff1Sjsg enum admgpu_mes_pipe {
591bb76ff1Sjsg AMDGPU_MES_SCHED_PIPE = 0,
601bb76ff1Sjsg AMDGPU_MES_KIQ_PIPE,
611bb76ff1Sjsg AMDGPU_MAX_MES_PIPES = 2,
621bb76ff1Sjsg };
631bb76ff1Sjsg
64c349dbc7Sjsg struct amdgpu_mes {
65ad8b1aafSjsg struct amdgpu_device *adev;
66ad8b1aafSjsg
671bb76ff1Sjsg struct rwlock mutex_hidden;
681bb76ff1Sjsg
691bb76ff1Sjsg struct idr pasid_idr;
701bb76ff1Sjsg struct idr gang_id_idr;
711bb76ff1Sjsg struct idr queue_id_idr;
721bb76ff1Sjsg struct ida doorbell_ida;
731bb76ff1Sjsg
741bb76ff1Sjsg spinlock_t queue_id_lock;
751bb76ff1Sjsg
761bb76ff1Sjsg uint32_t sched_version;
771bb76ff1Sjsg uint32_t kiq_version;
781bb76ff1Sjsg
79ad8b1aafSjsg uint32_t total_max_queue;
80ad8b1aafSjsg uint32_t max_doorbell_slices;
81ad8b1aafSjsg
82ad8b1aafSjsg uint64_t default_process_quantum;
83ad8b1aafSjsg uint64_t default_gang_quantum;
84ad8b1aafSjsg
85ad8b1aafSjsg struct amdgpu_ring ring;
861bb76ff1Sjsg spinlock_t ring_lock;
87c349dbc7Sjsg
881bb76ff1Sjsg const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
89c349dbc7Sjsg
90c349dbc7Sjsg /* mes ucode */
911bb76ff1Sjsg struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
921bb76ff1Sjsg uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
931bb76ff1Sjsg uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
941bb76ff1Sjsg uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
95c349dbc7Sjsg
96c349dbc7Sjsg /* mes ucode data */
971bb76ff1Sjsg struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
981bb76ff1Sjsg uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
991bb76ff1Sjsg uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
1001bb76ff1Sjsg uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
101c349dbc7Sjsg
102ad8b1aafSjsg /* eop gpu obj */
1031bb76ff1Sjsg struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
1041bb76ff1Sjsg uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
105ad8b1aafSjsg
1061bb76ff1Sjsg void *mqd_backup[AMDGPU_MAX_MES_PIPES];
1071bb76ff1Sjsg struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
108ad8b1aafSjsg
109ad8b1aafSjsg uint32_t vmid_mask_gfxhub;
110ad8b1aafSjsg uint32_t vmid_mask_mmhub;
111ad8b1aafSjsg uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
112ad8b1aafSjsg uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
113ad8b1aafSjsg uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
1141bb76ff1Sjsg uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
115ad8b1aafSjsg uint32_t sch_ctx_offs;
116ad8b1aafSjsg uint64_t sch_ctx_gpu_addr;
117ad8b1aafSjsg uint64_t *sch_ctx_ptr;
118ad8b1aafSjsg uint32_t query_status_fence_offs;
119ad8b1aafSjsg uint64_t query_status_fence_gpu_addr;
120ad8b1aafSjsg uint64_t *query_status_fence_ptr;
1211bb76ff1Sjsg uint32_t read_val_offs;
1221bb76ff1Sjsg uint64_t read_val_gpu_addr;
1231bb76ff1Sjsg uint32_t *read_val_ptr;
1241bb76ff1Sjsg
1251bb76ff1Sjsg #ifdef notyet
1261bb76ff1Sjsg uint32_t saved_flags;
1271bb76ff1Sjsg #endif
1281bb76ff1Sjsg
1291bb76ff1Sjsg /* initialize kiq pipe */
1301bb76ff1Sjsg int (*kiq_hw_init)(struct amdgpu_device *adev);
1311bb76ff1Sjsg int (*kiq_hw_fini)(struct amdgpu_device *adev);
132ad8b1aafSjsg
133f005ef32Sjsg /* MES doorbells */
134f005ef32Sjsg uint32_t db_start_dw_offset;
135f005ef32Sjsg uint32_t num_mes_dbs;
136f005ef32Sjsg unsigned long *doorbell_bitmap;
137f005ef32Sjsg
138c349dbc7Sjsg /* ip specific functions */
139ad8b1aafSjsg const struct amdgpu_mes_funcs *funcs;
140c349dbc7Sjsg };
141c349dbc7Sjsg
1421bb76ff1Sjsg struct amdgpu_mes_process {
1431bb76ff1Sjsg int pasid;
1441bb76ff1Sjsg struct amdgpu_vm *vm;
1451bb76ff1Sjsg uint64_t pd_gpu_addr;
1461bb76ff1Sjsg struct amdgpu_bo *proc_ctx_bo;
1471bb76ff1Sjsg uint64_t proc_ctx_gpu_addr;
1481bb76ff1Sjsg void *proc_ctx_cpu_ptr;
1491bb76ff1Sjsg uint64_t process_quantum;
1501bb76ff1Sjsg struct list_head gang_list;
1511bb76ff1Sjsg uint32_t doorbell_index;
1521bb76ff1Sjsg struct mutex doorbell_lock;
1531bb76ff1Sjsg };
1541bb76ff1Sjsg
1551bb76ff1Sjsg struct amdgpu_mes_gang {
1561bb76ff1Sjsg int gang_id;
1571bb76ff1Sjsg int priority;
1581bb76ff1Sjsg int inprocess_gang_priority;
1591bb76ff1Sjsg int global_priority_level;
1601bb76ff1Sjsg struct list_head list;
1611bb76ff1Sjsg struct amdgpu_mes_process *process;
1621bb76ff1Sjsg struct amdgpu_bo *gang_ctx_bo;
1631bb76ff1Sjsg uint64_t gang_ctx_gpu_addr;
1641bb76ff1Sjsg void *gang_ctx_cpu_ptr;
1651bb76ff1Sjsg uint64_t gang_quantum;
1661bb76ff1Sjsg struct list_head queue_list;
1671bb76ff1Sjsg };
1681bb76ff1Sjsg
1691bb76ff1Sjsg struct amdgpu_mes_queue {
1701bb76ff1Sjsg struct list_head list;
1711bb76ff1Sjsg struct amdgpu_mes_gang *gang;
1721bb76ff1Sjsg int queue_id;
1731bb76ff1Sjsg uint64_t doorbell_off;
1741bb76ff1Sjsg struct amdgpu_bo *mqd_obj;
1751bb76ff1Sjsg void *mqd_cpu_ptr;
1761bb76ff1Sjsg uint64_t mqd_gpu_addr;
1771bb76ff1Sjsg uint64_t wptr_gpu_addr;
1781bb76ff1Sjsg int queue_type;
1791bb76ff1Sjsg int paging;
1801bb76ff1Sjsg struct amdgpu_ring *ring;
1811bb76ff1Sjsg };
1821bb76ff1Sjsg
1831bb76ff1Sjsg struct amdgpu_mes_queue_properties {
1841bb76ff1Sjsg int queue_type;
1851bb76ff1Sjsg uint64_t hqd_base_gpu_addr;
1861bb76ff1Sjsg uint64_t rptr_gpu_addr;
1871bb76ff1Sjsg uint64_t wptr_gpu_addr;
1881bb76ff1Sjsg uint64_t wptr_mc_addr;
1891bb76ff1Sjsg uint32_t queue_size;
1901bb76ff1Sjsg uint64_t eop_gpu_addr;
1911bb76ff1Sjsg uint32_t hqd_pipe_priority;
1921bb76ff1Sjsg uint32_t hqd_queue_priority;
1931bb76ff1Sjsg bool paging;
1941bb76ff1Sjsg struct amdgpu_ring *ring;
1951bb76ff1Sjsg /* out */
1961bb76ff1Sjsg uint64_t doorbell_off;
1971bb76ff1Sjsg };
1981bb76ff1Sjsg
1991bb76ff1Sjsg struct amdgpu_mes_gang_properties {
2001bb76ff1Sjsg uint32_t priority;
2011bb76ff1Sjsg uint32_t gang_quantum;
2021bb76ff1Sjsg uint32_t inprocess_gang_priority;
2031bb76ff1Sjsg uint32_t priority_level;
2041bb76ff1Sjsg int global_priority_level;
2051bb76ff1Sjsg };
2061bb76ff1Sjsg
207c349dbc7Sjsg struct mes_add_queue_input {
208c349dbc7Sjsg uint32_t process_id;
209c349dbc7Sjsg uint64_t page_table_base_addr;
210c349dbc7Sjsg uint64_t process_va_start;
211c349dbc7Sjsg uint64_t process_va_end;
212c349dbc7Sjsg uint64_t process_quantum;
213c349dbc7Sjsg uint64_t process_context_addr;
214c349dbc7Sjsg uint64_t gang_quantum;
215c349dbc7Sjsg uint64_t gang_context_addr;
216c349dbc7Sjsg uint32_t inprocess_gang_priority;
217c349dbc7Sjsg uint32_t gang_global_priority_level;
218c349dbc7Sjsg uint32_t doorbell_offset;
219c349dbc7Sjsg uint64_t mqd_addr;
220c349dbc7Sjsg uint64_t wptr_addr;
2211bb76ff1Sjsg uint64_t wptr_mc_addr;
222c349dbc7Sjsg uint32_t queue_type;
223c349dbc7Sjsg uint32_t paging;
2241bb76ff1Sjsg uint32_t gws_base;
2251bb76ff1Sjsg uint32_t gws_size;
2261bb76ff1Sjsg uint64_t tba_addr;
2271bb76ff1Sjsg uint64_t tma_addr;
228f005ef32Sjsg uint32_t trap_en;
229f005ef32Sjsg uint32_t skip_process_ctx_clear;
2301bb76ff1Sjsg uint32_t is_kfd_process;
2311bb76ff1Sjsg uint32_t is_aql_queue;
2321bb76ff1Sjsg uint32_t queue_size;
233f005ef32Sjsg uint32_t exclusively_scheduled;
234c349dbc7Sjsg };
235c349dbc7Sjsg
236c349dbc7Sjsg struct mes_remove_queue_input {
237c349dbc7Sjsg uint32_t doorbell_offset;
238c349dbc7Sjsg uint64_t gang_context_addr;
239c349dbc7Sjsg };
240c349dbc7Sjsg
2411bb76ff1Sjsg struct mes_unmap_legacy_queue_input {
2421bb76ff1Sjsg enum amdgpu_unmap_queues_action action;
2431bb76ff1Sjsg uint32_t queue_type;
2441bb76ff1Sjsg uint32_t doorbell_offset;
2451bb76ff1Sjsg uint32_t pipe_id;
2461bb76ff1Sjsg uint32_t queue_id;
2471bb76ff1Sjsg uint64_t trail_fence_addr;
2481bb76ff1Sjsg uint64_t trail_fence_data;
2491bb76ff1Sjsg };
2501bb76ff1Sjsg
251c349dbc7Sjsg struct mes_suspend_gang_input {
252c349dbc7Sjsg bool suspend_all_gangs;
253c349dbc7Sjsg uint64_t gang_context_addr;
254c349dbc7Sjsg uint64_t suspend_fence_addr;
255c349dbc7Sjsg uint32_t suspend_fence_value;
256c349dbc7Sjsg };
257c349dbc7Sjsg
258c349dbc7Sjsg struct mes_resume_gang_input {
259c349dbc7Sjsg bool resume_all_gangs;
260c349dbc7Sjsg uint64_t gang_context_addr;
261c349dbc7Sjsg };
262c349dbc7Sjsg
2631bb76ff1Sjsg enum mes_misc_opcode {
2641bb76ff1Sjsg MES_MISC_OP_WRITE_REG,
2651bb76ff1Sjsg MES_MISC_OP_READ_REG,
2661bb76ff1Sjsg MES_MISC_OP_WRM_REG_WAIT,
2671bb76ff1Sjsg MES_MISC_OP_WRM_REG_WR_WAIT,
268f005ef32Sjsg MES_MISC_OP_SET_SHADER_DEBUGGER,
2691bb76ff1Sjsg };
2701bb76ff1Sjsg
2711bb76ff1Sjsg struct mes_misc_op_input {
2721bb76ff1Sjsg enum mes_misc_opcode op;
2731bb76ff1Sjsg
2741bb76ff1Sjsg union {
2751bb76ff1Sjsg struct {
2761bb76ff1Sjsg uint32_t reg_offset;
2771bb76ff1Sjsg uint64_t buffer_addr;
2781bb76ff1Sjsg } read_reg;
2791bb76ff1Sjsg
2801bb76ff1Sjsg struct {
2811bb76ff1Sjsg uint32_t reg_offset;
2821bb76ff1Sjsg uint32_t reg_value;
2831bb76ff1Sjsg } write_reg;
2841bb76ff1Sjsg
2851bb76ff1Sjsg struct {
2861bb76ff1Sjsg uint32_t ref;
2871bb76ff1Sjsg uint32_t mask;
2881bb76ff1Sjsg uint32_t reg0;
2891bb76ff1Sjsg uint32_t reg1;
2901bb76ff1Sjsg } wrm_reg;
291f005ef32Sjsg
292f005ef32Sjsg struct {
293f005ef32Sjsg uint64_t process_context_addr;
294f005ef32Sjsg union {
295f005ef32Sjsg struct {
296*f074e99dSjsg uint32_t single_memop : 1;
297*f074e99dSjsg uint32_t single_alu_op : 1;
298*f074e99dSjsg uint32_t reserved: 29;
299*f074e99dSjsg uint32_t process_ctx_flush: 1;
300f005ef32Sjsg };
301f005ef32Sjsg uint32_t u32all;
302f005ef32Sjsg } flags;
303f005ef32Sjsg uint32_t spi_gdbg_per_vmid_cntl;
304f005ef32Sjsg uint32_t tcp_watch_cntl[4];
305f005ef32Sjsg uint32_t trap_en;
306f005ef32Sjsg } set_shader_debugger;
3071bb76ff1Sjsg };
3081bb76ff1Sjsg };
3091bb76ff1Sjsg
310c349dbc7Sjsg struct amdgpu_mes_funcs {
311c349dbc7Sjsg int (*add_hw_queue)(struct amdgpu_mes *mes,
312c349dbc7Sjsg struct mes_add_queue_input *input);
313c349dbc7Sjsg
314c349dbc7Sjsg int (*remove_hw_queue)(struct amdgpu_mes *mes,
315c349dbc7Sjsg struct mes_remove_queue_input *input);
316c349dbc7Sjsg
3171bb76ff1Sjsg int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
3181bb76ff1Sjsg struct mes_unmap_legacy_queue_input *input);
3191bb76ff1Sjsg
320c349dbc7Sjsg int (*suspend_gang)(struct amdgpu_mes *mes,
321c349dbc7Sjsg struct mes_suspend_gang_input *input);
322c349dbc7Sjsg
323c349dbc7Sjsg int (*resume_gang)(struct amdgpu_mes *mes,
324c349dbc7Sjsg struct mes_resume_gang_input *input);
3251bb76ff1Sjsg
3261bb76ff1Sjsg int (*misc_op)(struct amdgpu_mes *mes,
3271bb76ff1Sjsg struct mes_misc_op_input *input);
328c349dbc7Sjsg };
329c349dbc7Sjsg
3301bb76ff1Sjsg #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
3311bb76ff1Sjsg #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
3321bb76ff1Sjsg
3331bb76ff1Sjsg int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
3341bb76ff1Sjsg
3353c4b8cfaSjsg int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
3361bb76ff1Sjsg int amdgpu_mes_init(struct amdgpu_device *adev);
3371bb76ff1Sjsg void amdgpu_mes_fini(struct amdgpu_device *adev);
3381bb76ff1Sjsg
3391bb76ff1Sjsg int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
3401bb76ff1Sjsg struct amdgpu_vm *vm);
3411bb76ff1Sjsg void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
3421bb76ff1Sjsg
3431bb76ff1Sjsg int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
3441bb76ff1Sjsg struct amdgpu_mes_gang_properties *gprops,
3451bb76ff1Sjsg int *gang_id);
3461bb76ff1Sjsg int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
3471bb76ff1Sjsg
3481bb76ff1Sjsg int amdgpu_mes_suspend(struct amdgpu_device *adev);
3491bb76ff1Sjsg int amdgpu_mes_resume(struct amdgpu_device *adev);
3501bb76ff1Sjsg
3511bb76ff1Sjsg int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
3521bb76ff1Sjsg struct amdgpu_mes_queue_properties *qprops,
3531bb76ff1Sjsg int *queue_id);
3541bb76ff1Sjsg int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
3551bb76ff1Sjsg
3561bb76ff1Sjsg int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
3571bb76ff1Sjsg struct amdgpu_ring *ring,
3581bb76ff1Sjsg enum amdgpu_unmap_queues_action action,
3591bb76ff1Sjsg u64 gpu_addr, u64 seq);
3601bb76ff1Sjsg
3611bb76ff1Sjsg uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
3621bb76ff1Sjsg int amdgpu_mes_wreg(struct amdgpu_device *adev,
3631bb76ff1Sjsg uint32_t reg, uint32_t val);
3641bb76ff1Sjsg int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
3651bb76ff1Sjsg uint32_t val, uint32_t mask);
3661bb76ff1Sjsg int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
3671bb76ff1Sjsg uint32_t reg0, uint32_t reg1,
3681bb76ff1Sjsg uint32_t ref, uint32_t mask);
369f005ef32Sjsg int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
370f005ef32Sjsg uint64_t process_context_addr,
371f005ef32Sjsg uint32_t spi_gdbg_per_vmid_cntl,
372f005ef32Sjsg const uint32_t *tcp_watch_cntl,
373f005ef32Sjsg uint32_t flags,
374f005ef32Sjsg bool trap_en);
375*f074e99dSjsg int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
376*f074e99dSjsg uint64_t process_context_addr);
3771bb76ff1Sjsg int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
3781bb76ff1Sjsg int queue_type, int idx,
3791bb76ff1Sjsg struct amdgpu_mes_ctx_data *ctx_data,
3801bb76ff1Sjsg struct amdgpu_ring **out);
3811bb76ff1Sjsg void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
3821bb76ff1Sjsg struct amdgpu_ring *ring);
3831bb76ff1Sjsg
3841bb76ff1Sjsg uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
3851bb76ff1Sjsg enum amdgpu_mes_priority_level prio);
3861bb76ff1Sjsg
3871bb76ff1Sjsg int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
3881bb76ff1Sjsg struct amdgpu_mes_ctx_data *ctx_data);
3891bb76ff1Sjsg void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
3901bb76ff1Sjsg int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
3911bb76ff1Sjsg struct amdgpu_vm *vm,
3921bb76ff1Sjsg struct amdgpu_mes_ctx_data *ctx_data);
3931bb76ff1Sjsg int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
3941bb76ff1Sjsg struct amdgpu_mes_ctx_data *ctx_data);
3951bb76ff1Sjsg
3961bb76ff1Sjsg int amdgpu_mes_self_test(struct amdgpu_device *adev);
3971bb76ff1Sjsg
3981bb76ff1Sjsg int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
3991bb76ff1Sjsg
4001bb76ff1Sjsg /*
4011bb76ff1Sjsg * MES lock can be taken in MMU notifiers.
4021bb76ff1Sjsg *
4031bb76ff1Sjsg * A bit more detail about why to set no-FS reclaim with MES lock:
4041bb76ff1Sjsg *
4051bb76ff1Sjsg * The purpose of the MMU notifier is to stop GPU access to memory so
4061bb76ff1Sjsg * that the Linux VM subsystem can move pages around safely. This is
4071bb76ff1Sjsg * done by preempting user mode queues for the affected process. When
4081bb76ff1Sjsg * MES is used, MES lock needs to be taken to preempt the queues.
4091bb76ff1Sjsg *
4101bb76ff1Sjsg * The MMU notifier callback entry point in the driver is
4111bb76ff1Sjsg * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
4121bb76ff1Sjsg * there is:
4131bb76ff1Sjsg * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
4141bb76ff1Sjsg * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
4151bb76ff1Sjsg *
4161bb76ff1Sjsg * The last part of the chain is a function pointer where we take the
4171bb76ff1Sjsg * MES lock.
4181bb76ff1Sjsg *
4191bb76ff1Sjsg * The problem with taking locks in the MMU notifier is, that MMU
4201bb76ff1Sjsg * notifiers can be called in reclaim-FS context. That's where the
4211bb76ff1Sjsg * kernel frees up pages to make room for new page allocations under
4221bb76ff1Sjsg * memory pressure. While we are running in reclaim-FS context, we must
4231bb76ff1Sjsg * not trigger another memory reclaim operation because that would
4241bb76ff1Sjsg * recursively reenter the reclaim code and cause a deadlock. The
4251bb76ff1Sjsg * memalloc_nofs_save/restore calls guarantee that.
4261bb76ff1Sjsg *
4271bb76ff1Sjsg * In addition we also need to avoid lock dependencies on other locks taken
4281bb76ff1Sjsg * under the MES lock, for example reservation locks. Here is a possible
4291bb76ff1Sjsg * scenario of a deadlock:
4301bb76ff1Sjsg * Thread A: takes and holds reservation lock | triggers reclaim-FS |
4311bb76ff1Sjsg * MMU notifier | blocks trying to take MES lock
4321bb76ff1Sjsg * Thread B: takes and holds MES lock | blocks trying to take reservation lock
4331bb76ff1Sjsg *
4341bb76ff1Sjsg * In this scenario Thread B gets involved in a deadlock even without
4351bb76ff1Sjsg * triggering a reclaim-FS operation itself.
4361bb76ff1Sjsg * To fix this and break the lock dependency chain you'd need to either:
4371bb76ff1Sjsg * 1. protect reservation locks with memalloc_nofs_save/restore, or
4381bb76ff1Sjsg * 2. avoid taking reservation locks under the MES lock.
4391bb76ff1Sjsg *
4401bb76ff1Sjsg * Reservation locks are taken all over the kernel in different subsystems, we
4411bb76ff1Sjsg * have no control over them and their lock dependencies.So the only workable
4421bb76ff1Sjsg * solution is to avoid taking other locks under the MES lock.
4431bb76ff1Sjsg * As a result, make sure no reclaim-FS happens while holding this lock anywhere
4441bb76ff1Sjsg * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
4451bb76ff1Sjsg */
amdgpu_mes_lock(struct amdgpu_mes * mes)4461bb76ff1Sjsg static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
4471bb76ff1Sjsg {
4481bb76ff1Sjsg mutex_lock(&mes->mutex_hidden);
4491bb76ff1Sjsg #ifdef notyet
4501bb76ff1Sjsg mes->saved_flags = memalloc_noreclaim_save();
4511bb76ff1Sjsg #endif
4521bb76ff1Sjsg }
4531bb76ff1Sjsg
amdgpu_mes_unlock(struct amdgpu_mes * mes)4541bb76ff1Sjsg static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
4551bb76ff1Sjsg {
4561bb76ff1Sjsg #ifdef notyet
4571bb76ff1Sjsg memalloc_noreclaim_restore(mes->saved_flags);
4581bb76ff1Sjsg #endif
4591bb76ff1Sjsg mutex_unlock(&mes->mutex_hidden);
4601bb76ff1Sjsg }
461c349dbc7Sjsg #endif /* __AMDGPU_MES_H__ */
462