xref: /openbsd-src/sys/dev/pci/drm/amd/include/kgd_kfd_interface.h (revision f005ef32267c16bdb134f0e9fa4477dbe07c263a)
1fb4d8502Sjsg /*
2fb4d8502Sjsg  * Copyright 2014 Advanced Micro Devices, Inc.
3fb4d8502Sjsg  *
4fb4d8502Sjsg  * Permission is hereby granted, free of charge, to any person obtaining a
5fb4d8502Sjsg  * copy of this software and associated documentation files (the "Software"),
6fb4d8502Sjsg  * to deal in the Software without restriction, including without limitation
7fb4d8502Sjsg  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8fb4d8502Sjsg  * and/or sell copies of the Software, and to permit persons to whom the
9fb4d8502Sjsg  * Software is furnished to do so, subject to the following conditions:
10fb4d8502Sjsg  *
11fb4d8502Sjsg  * The above copyright notice and this permission notice shall be included in
12fb4d8502Sjsg  * all copies or substantial portions of the Software.
13fb4d8502Sjsg  *
14fb4d8502Sjsg  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15fb4d8502Sjsg  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16fb4d8502Sjsg  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17fb4d8502Sjsg  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18fb4d8502Sjsg  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19fb4d8502Sjsg  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20fb4d8502Sjsg  * OTHER DEALINGS IN THE SOFTWARE.
21fb4d8502Sjsg  */
22fb4d8502Sjsg 
23fb4d8502Sjsg /*
24fb4d8502Sjsg  * This file defines the private interface between the
25fb4d8502Sjsg  * AMD kernel graphics drivers and the AMD KFD.
26fb4d8502Sjsg  */
27fb4d8502Sjsg 
28fb4d8502Sjsg #ifndef KGD_KFD_INTERFACE_H_INCLUDED
29fb4d8502Sjsg #define KGD_KFD_INTERFACE_H_INCLUDED
30fb4d8502Sjsg 
31fb4d8502Sjsg #include <linux/types.h>
32fb4d8502Sjsg #include <linux/bitmap.h>
33fb4d8502Sjsg #include <linux/dma-fence.h>
34*f005ef32Sjsg #include "amdgpu_irq.h"
35*f005ef32Sjsg #include "amdgpu_gfx.h"
36fb4d8502Sjsg 
37fb4d8502Sjsg struct pci_dev;
381bb76ff1Sjsg struct amdgpu_device;
39fb4d8502Sjsg 
40fb4d8502Sjsg struct kfd_dev;
41fb4d8502Sjsg struct kgd_mem;
42fb4d8502Sjsg 
43fb4d8502Sjsg enum kfd_preempt_type {
44fb4d8502Sjsg 	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
45fb4d8502Sjsg 	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
465ca02815Sjsg 	KFD_PREEMPT_TYPE_WAVEFRONT_SAVE
47fb4d8502Sjsg };
48fb4d8502Sjsg 
49fb4d8502Sjsg struct kfd_vm_fault_info {
50fb4d8502Sjsg 	uint64_t	page_addr;
51fb4d8502Sjsg 	uint32_t	vmid;
52fb4d8502Sjsg 	uint32_t	mc_id;
53fb4d8502Sjsg 	uint32_t	status;
54fb4d8502Sjsg 	bool		prot_valid;
55fb4d8502Sjsg 	bool		prot_read;
56fb4d8502Sjsg 	bool		prot_write;
57fb4d8502Sjsg 	bool		prot_exec;
58fb4d8502Sjsg };
59fb4d8502Sjsg 
60fb4d8502Sjsg struct kfd_cu_info {
61fb4d8502Sjsg 	uint32_t num_shader_engines;
62fb4d8502Sjsg 	uint32_t num_shader_arrays_per_engine;
63fb4d8502Sjsg 	uint32_t num_cu_per_sh;
64fb4d8502Sjsg 	uint32_t cu_active_number;
65fb4d8502Sjsg 	uint32_t cu_ao_mask;
66fb4d8502Sjsg 	uint32_t simd_per_cu;
67fb4d8502Sjsg 	uint32_t max_waves_per_simd;
68fb4d8502Sjsg 	uint32_t wave_front_size;
69fb4d8502Sjsg 	uint32_t max_scratch_slots_per_cu;
70fb4d8502Sjsg 	uint32_t lds_size;
71*f005ef32Sjsg 	uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
72fb4d8502Sjsg };
73fb4d8502Sjsg 
74fb4d8502Sjsg /* For getting GPU local memory information from KGD */
75fb4d8502Sjsg struct kfd_local_mem_info {
76fb4d8502Sjsg 	uint64_t local_mem_size_private;
77fb4d8502Sjsg 	uint64_t local_mem_size_public;
78fb4d8502Sjsg 	uint32_t vram_width;
79fb4d8502Sjsg 	uint32_t mem_clk_max;
80fb4d8502Sjsg };
81fb4d8502Sjsg 
82fb4d8502Sjsg enum kgd_memory_pool {
83fb4d8502Sjsg 	KGD_POOL_SYSTEM_CACHEABLE = 1,
84fb4d8502Sjsg 	KGD_POOL_SYSTEM_WRITECOMBINE = 2,
85fb4d8502Sjsg 	KGD_POOL_FRAMEBUFFER = 3,
86fb4d8502Sjsg };
87fb4d8502Sjsg 
88c349dbc7Sjsg /**
89c349dbc7Sjsg  * enum kfd_sched_policy
90c349dbc7Sjsg  *
91c349dbc7Sjsg  * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp)
92c349dbc7Sjsg  * scheduling. In this scheduling mode we're using the firmware code to
93c349dbc7Sjsg  * schedule the user mode queues and kernel queues such as HIQ and DIQ.
94c349dbc7Sjsg  * the HIQ queue is used as a special queue that dispatches the configuration
95c349dbc7Sjsg  * to the cp and the user mode queues list that are currently running.
96c349dbc7Sjsg  * the DIQ queue is a debugging queue that dispatches debugging commands to the
97c349dbc7Sjsg  * firmware.
98c349dbc7Sjsg  * in this scheduling mode user mode queues over subscription feature is
99c349dbc7Sjsg  * enabled.
100c349dbc7Sjsg  *
101c349dbc7Sjsg  * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over
102c349dbc7Sjsg  * subscription feature disabled.
103c349dbc7Sjsg  *
104c349dbc7Sjsg  * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly
105c349dbc7Sjsg  * set the command processor registers and sets the queues "manually". This
106c349dbc7Sjsg  * mode is used *ONLY* for debugging proposes.
107c349dbc7Sjsg  *
108c349dbc7Sjsg  */
109c349dbc7Sjsg enum kfd_sched_policy {
110c349dbc7Sjsg 	KFD_SCHED_POLICY_HWS = 0,
111c349dbc7Sjsg 	KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION,
112c349dbc7Sjsg 	KFD_SCHED_POLICY_NO_HWS
113fb4d8502Sjsg };
114fb4d8502Sjsg 
115fb4d8502Sjsg struct kgd2kfd_shared_resources {
116fb4d8502Sjsg 	/* Bit n == 1 means VMID n is available for KFD. */
117fb4d8502Sjsg 	unsigned int compute_vmid_bitmap;
118fb4d8502Sjsg 
119fb4d8502Sjsg 	/* number of pipes per mec */
120fb4d8502Sjsg 	uint32_t num_pipe_per_mec;
121fb4d8502Sjsg 
122fb4d8502Sjsg 	/* number of queues per pipe */
123fb4d8502Sjsg 	uint32_t num_queue_per_pipe;
124fb4d8502Sjsg 
125fb4d8502Sjsg 	/* Bit n == 1 means Queue n is available for KFD */
126c349dbc7Sjsg 	DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES);
127fb4d8502Sjsg 
128c349dbc7Sjsg 	/* SDMA doorbell assignments (SOC15 and later chips only). Only
129fb4d8502Sjsg 	 * specific doorbells are routed to each SDMA engine. Others
130fb4d8502Sjsg 	 * are routed to IH and VCN. They are not usable by the CP.
131fb4d8502Sjsg 	 */
132c349dbc7Sjsg 	uint32_t *sdma_doorbell_idx;
133c349dbc7Sjsg 
134c349dbc7Sjsg 	/* From SOC15 onward, the doorbell index range not usable for CP
135c349dbc7Sjsg 	 * queues.
136c349dbc7Sjsg 	 */
137c349dbc7Sjsg 	uint32_t non_cp_doorbells_start;
138c349dbc7Sjsg 	uint32_t non_cp_doorbells_end;
139fb4d8502Sjsg 
140fb4d8502Sjsg 	/* Base address of doorbell aperture. */
141fb4d8502Sjsg 	phys_addr_t doorbell_physical_address;
142fb4d8502Sjsg 
143fb4d8502Sjsg 	/* Size in bytes of doorbell aperture. */
144fb4d8502Sjsg 	size_t doorbell_aperture_size;
145fb4d8502Sjsg 
146fb4d8502Sjsg 	/* Number of bytes at start of aperture reserved for KGD. */
147fb4d8502Sjsg 	size_t doorbell_start_offset;
148fb4d8502Sjsg 
149fb4d8502Sjsg 	/* GPUVM address space size in bytes */
150fb4d8502Sjsg 	uint64_t gpuvm_size;
151fb4d8502Sjsg 
152fb4d8502Sjsg 	/* Minor device number of the render node */
153fb4d8502Sjsg 	int drm_render_minor;
154c349dbc7Sjsg 
1551bb76ff1Sjsg 	bool enable_mes;
156fb4d8502Sjsg };
157fb4d8502Sjsg 
158fb4d8502Sjsg struct tile_config {
159fb4d8502Sjsg 	uint32_t *tile_config_ptr;
160fb4d8502Sjsg 	uint32_t *macro_tile_config_ptr;
161fb4d8502Sjsg 	uint32_t num_tile_configs;
162fb4d8502Sjsg 	uint32_t num_macro_tile_configs;
163fb4d8502Sjsg 
164fb4d8502Sjsg 	uint32_t gb_addr_config;
165fb4d8502Sjsg 	uint32_t num_banks;
166fb4d8502Sjsg 	uint32_t num_ranks;
167fb4d8502Sjsg };
168fb4d8502Sjsg 
169c349dbc7Sjsg #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096
170fb4d8502Sjsg 
171fb4d8502Sjsg /**
172fb4d8502Sjsg  * struct kfd2kgd_calls
173fb4d8502Sjsg  *
174fb4d8502Sjsg  * @program_sh_mem_settings: A function that should initiate the memory
175fb4d8502Sjsg  * properties such as main aperture memory type (cache / non cached) and
176fb4d8502Sjsg  * secondary aperture base address, size and memory type.
177fb4d8502Sjsg  * This function is used only for no cp scheduling mode.
178fb4d8502Sjsg  *
179fb4d8502Sjsg  * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
180fb4d8502Sjsg  * scheduling mode. Only used for no cp scheduling mode.
181fb4d8502Sjsg  *
182fb4d8502Sjsg  * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
183fb4d8502Sjsg  * sceduling mode.
184fb4d8502Sjsg  *
185fb4d8502Sjsg  * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
186fb4d8502Sjsg  * used only for no HWS mode.
187fb4d8502Sjsg  *
188fb4d8502Sjsg  * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
189fb4d8502Sjsg  * Array is allocated with kmalloc, needs to be freed with kfree by caller.
190fb4d8502Sjsg  *
191fb4d8502Sjsg  * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
192fb4d8502Sjsg  * Array is allocated with kmalloc, needs to be freed with kfree by caller.
193fb4d8502Sjsg  *
194fb4d8502Sjsg  * @hqd_is_occupies: Checks if a hqd slot is occupied.
195fb4d8502Sjsg  *
196fb4d8502Sjsg  * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
197fb4d8502Sjsg  *
198fb4d8502Sjsg  * @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied.
199fb4d8502Sjsg  *
200fb4d8502Sjsg  * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that
201fb4d8502Sjsg  * SDMA hqd slot.
202fb4d8502Sjsg  *
203fb4d8502Sjsg  * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID.
204fb4d8502Sjsg  * Only used for no cp scheduling mode
205fb4d8502Sjsg  *
206fb4d8502Sjsg  * @set_vm_context_page_table_base: Program page table base for a VMID
207fb4d8502Sjsg  *
208fb4d8502Sjsg  * @invalidate_tlbs: Invalidate TLBs for a specific PASID
209fb4d8502Sjsg  *
210fb4d8502Sjsg  * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID
211fb4d8502Sjsg  *
212fb4d8502Sjsg  * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
213fb4d8502Sjsg  * IH ring entry. This function allows the KFD ISR to get the VMID
214fb4d8502Sjsg  * from the fault status register as early as possible.
215fb4d8502Sjsg  *
216ad8b1aafSjsg  * @get_cu_occupancy: Function pointer that returns to caller the number
217ad8b1aafSjsg  * of wave fronts that are in flight for all of the queues of a process
218ad8b1aafSjsg  * as identified by its pasid. It is important to note that the value
219ad8b1aafSjsg  * returned by this function is a snapshot of current moment and cannot
220ad8b1aafSjsg  * guarantee any minimum for the number of waves in-flight. This function
221ad8b1aafSjsg  * is defined for devices that belong to GFX9 and later GFX families. Care
222ad8b1aafSjsg  * must be taken in calling this function as it is not defined for devices
223ad8b1aafSjsg  * that belong to GFX8 and below GFX families.
224fb4d8502Sjsg  *
225fb4d8502Sjsg  * This structure contains function pointers to services that the kgd driver
226fb4d8502Sjsg  * provides to amdkfd driver.
227fb4d8502Sjsg  *
228fb4d8502Sjsg  */
229fb4d8502Sjsg struct kfd2kgd_calls {
230fb4d8502Sjsg 	/* Register access functions */
2311bb76ff1Sjsg 	void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid,
232fb4d8502Sjsg 			uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
233*f005ef32Sjsg 			uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases,
234*f005ef32Sjsg 			uint32_t inst);
235fb4d8502Sjsg 
2361bb76ff1Sjsg 	int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid,
237*f005ef32Sjsg 					unsigned int vmid, uint32_t inst);
238fb4d8502Sjsg 
239*f005ef32Sjsg 	int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id,
240*f005ef32Sjsg 			uint32_t inst);
241fb4d8502Sjsg 
242fb4d8502Sjsg #ifdef notyet
2431bb76ff1Sjsg 	int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
244fb4d8502Sjsg 			uint32_t queue_id, uint32_t __user *wptr,
245fb4d8502Sjsg 			uint32_t wptr_shift, uint32_t wptr_mask,
246*f005ef32Sjsg 			struct mm_struct *mm, uint32_t inst);
247c349dbc7Sjsg #endif
248fb4d8502Sjsg 
2491bb76ff1Sjsg 	int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd,
250c349dbc7Sjsg 			    uint32_t pipe_id, uint32_t queue_id,
251*f005ef32Sjsg 			    uint32_t doorbell_off, uint32_t inst);
252c349dbc7Sjsg 
253c349dbc7Sjsg #ifdef notyet
2541bb76ff1Sjsg 	int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd,
255fb4d8502Sjsg 			     uint32_t __user *wptr, struct mm_struct *mm);
256fb4d8502Sjsg #endif
257fb4d8502Sjsg 
2581bb76ff1Sjsg 	int (*hqd_dump)(struct amdgpu_device *adev,
259fb4d8502Sjsg 			uint32_t pipe_id, uint32_t queue_id,
260*f005ef32Sjsg 			uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
261fb4d8502Sjsg 
2621bb76ff1Sjsg 	int (*hqd_sdma_dump)(struct amdgpu_device *adev,
263fb4d8502Sjsg 			     uint32_t engine_id, uint32_t queue_id,
264fb4d8502Sjsg 			     uint32_t (**dump)[2], uint32_t *n_regs);
265fb4d8502Sjsg 
2661bb76ff1Sjsg 	bool (*hqd_is_occupied)(struct amdgpu_device *adev,
2671bb76ff1Sjsg 				uint64_t queue_address, uint32_t pipe_id,
268*f005ef32Sjsg 				uint32_t queue_id, uint32_t inst);
269fb4d8502Sjsg 
2701bb76ff1Sjsg 	int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd,
2711bb76ff1Sjsg 				enum kfd_preempt_type reset_type,
272fb4d8502Sjsg 				unsigned int timeout, uint32_t pipe_id,
273*f005ef32Sjsg 				uint32_t queue_id, uint32_t inst);
274fb4d8502Sjsg 
2751bb76ff1Sjsg 	bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd);
276fb4d8502Sjsg 
2771bb76ff1Sjsg 	int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd,
278fb4d8502Sjsg 				unsigned int timeout);
279fb4d8502Sjsg 
2801bb76ff1Sjsg 	int (*wave_control_execute)(struct amdgpu_device *adev,
281fb4d8502Sjsg 					uint32_t gfx_index_val,
282*f005ef32Sjsg 					uint32_t sq_cmd, uint32_t inst);
2831bb76ff1Sjsg 	bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev,
284c349dbc7Sjsg 					uint8_t vmid,
285c349dbc7Sjsg 					uint16_t *p_pasid);
286fb4d8502Sjsg 
287c349dbc7Sjsg 	/* No longer needed from GFXv9 onward. The scratch base address is
288c349dbc7Sjsg 	 * passed to the shader by the CP. It's the user mode driver's
289c349dbc7Sjsg 	 * responsibility.
290c349dbc7Sjsg 	 */
2911bb76ff1Sjsg 	void (*set_scratch_backing_va)(struct amdgpu_device *adev,
292fb4d8502Sjsg 				uint64_t va, uint32_t vmid);
293fb4d8502Sjsg 
2941bb76ff1Sjsg 	void (*set_vm_context_page_table_base)(struct amdgpu_device *adev,
295c349dbc7Sjsg 			uint32_t vmid, uint64_t page_table_base);
2961bb76ff1Sjsg 	uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev);
297fb4d8502Sjsg 
298*f005ef32Sjsg 	uint32_t (*enable_debug_trap)(struct amdgpu_device *adev,
299*f005ef32Sjsg 					bool restore_dbg_registers,
300*f005ef32Sjsg 					uint32_t vmid);
301*f005ef32Sjsg 	uint32_t (*disable_debug_trap)(struct amdgpu_device *adev,
302*f005ef32Sjsg 					bool keep_trap_enabled,
303*f005ef32Sjsg 					uint32_t vmid);
304*f005ef32Sjsg 	int (*validate_trap_override_request)(struct amdgpu_device *adev,
305*f005ef32Sjsg 					uint32_t trap_override,
306*f005ef32Sjsg 					uint32_t *trap_mask_supported);
307*f005ef32Sjsg 	uint32_t (*set_wave_launch_trap_override)(struct amdgpu_device *adev,
308*f005ef32Sjsg 					     uint32_t vmid,
309*f005ef32Sjsg 					     uint32_t trap_override,
310*f005ef32Sjsg 					     uint32_t trap_mask_bits,
311*f005ef32Sjsg 					     uint32_t trap_mask_request,
312*f005ef32Sjsg 					     uint32_t *trap_mask_prev,
313*f005ef32Sjsg 					     uint32_t kfd_dbg_trap_cntl_prev);
314*f005ef32Sjsg 	uint32_t (*set_wave_launch_mode)(struct amdgpu_device *adev,
315*f005ef32Sjsg 					uint8_t wave_launch_mode,
316*f005ef32Sjsg 					uint32_t vmid);
317*f005ef32Sjsg 	uint32_t (*set_address_watch)(struct amdgpu_device *adev,
318*f005ef32Sjsg 					uint64_t watch_address,
319*f005ef32Sjsg 					uint32_t watch_address_mask,
320*f005ef32Sjsg 					uint32_t watch_id,
321*f005ef32Sjsg 					uint32_t watch_mode,
322*f005ef32Sjsg 					uint32_t debug_vmid,
323*f005ef32Sjsg 					uint32_t inst);
324*f005ef32Sjsg 	uint32_t (*clear_address_watch)(struct amdgpu_device *adev,
325*f005ef32Sjsg 			uint32_t watch_id);
326*f005ef32Sjsg 	void (*get_iq_wait_times)(struct amdgpu_device *adev,
327*f005ef32Sjsg 			uint32_t *wait_times,
328*f005ef32Sjsg 			uint32_t inst);
329*f005ef32Sjsg 	void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
330*f005ef32Sjsg 			uint32_t wait_times,
331*f005ef32Sjsg 			uint32_t grace_period,
332*f005ef32Sjsg 			uint32_t *reg_offset,
333*f005ef32Sjsg 			uint32_t *reg_data);
3341bb76ff1Sjsg 	void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
335*f005ef32Sjsg 			int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
3361bb76ff1Sjsg 	void (*program_trap_handler_settings)(struct amdgpu_device *adev,
337*f005ef32Sjsg 			uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
338*f005ef32Sjsg 			uint32_t inst);
339fb4d8502Sjsg };
340fb4d8502Sjsg 
341fb4d8502Sjsg #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
342