1fb4d8502Sjsg /* 2fb4d8502Sjsg * Copyright 2014 Advanced Micro Devices, Inc. 3fb4d8502Sjsg * 4fb4d8502Sjsg * Permission is hereby granted, free of charge, to any person obtaining a 5fb4d8502Sjsg * copy of this software and associated documentation files (the "Software"), 6fb4d8502Sjsg * to deal in the Software without restriction, including without limitation 7fb4d8502Sjsg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8fb4d8502Sjsg * and/or sell copies of the Software, and to permit persons to whom the 9fb4d8502Sjsg * Software is furnished to do so, subject to the following conditions: 10fb4d8502Sjsg * 11fb4d8502Sjsg * The above copyright notice and this permission notice shall be included in 12fb4d8502Sjsg * all copies or substantial portions of the Software. 13fb4d8502Sjsg * 14fb4d8502Sjsg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15fb4d8502Sjsg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16fb4d8502Sjsg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17fb4d8502Sjsg * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18fb4d8502Sjsg * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19fb4d8502Sjsg * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20fb4d8502Sjsg * OTHER DEALINGS IN THE SOFTWARE. 21fb4d8502Sjsg */ 22fb4d8502Sjsg 23fb4d8502Sjsg /* 24fb4d8502Sjsg * This file defines the private interface between the 25fb4d8502Sjsg * AMD kernel graphics drivers and the AMD KFD. 26fb4d8502Sjsg */ 27fb4d8502Sjsg 28fb4d8502Sjsg #ifndef KGD_KFD_INTERFACE_H_INCLUDED 29fb4d8502Sjsg #define KGD_KFD_INTERFACE_H_INCLUDED 30fb4d8502Sjsg 31fb4d8502Sjsg #include <linux/types.h> 32fb4d8502Sjsg #include <linux/bitmap.h> 33fb4d8502Sjsg #include <linux/dma-fence.h> 34*f005ef32Sjsg #include "amdgpu_irq.h" 35*f005ef32Sjsg #include "amdgpu_gfx.h" 36fb4d8502Sjsg 37fb4d8502Sjsg struct pci_dev; 381bb76ff1Sjsg struct amdgpu_device; 39fb4d8502Sjsg 40fb4d8502Sjsg struct kfd_dev; 41fb4d8502Sjsg struct kgd_mem; 42fb4d8502Sjsg 43fb4d8502Sjsg enum kfd_preempt_type { 44fb4d8502Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, 45fb4d8502Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 465ca02815Sjsg KFD_PREEMPT_TYPE_WAVEFRONT_SAVE 47fb4d8502Sjsg }; 48fb4d8502Sjsg 49fb4d8502Sjsg struct kfd_vm_fault_info { 50fb4d8502Sjsg uint64_t page_addr; 51fb4d8502Sjsg uint32_t vmid; 52fb4d8502Sjsg uint32_t mc_id; 53fb4d8502Sjsg uint32_t status; 54fb4d8502Sjsg bool prot_valid; 55fb4d8502Sjsg bool prot_read; 56fb4d8502Sjsg bool prot_write; 57fb4d8502Sjsg bool prot_exec; 58fb4d8502Sjsg }; 59fb4d8502Sjsg 60fb4d8502Sjsg struct kfd_cu_info { 61fb4d8502Sjsg uint32_t num_shader_engines; 62fb4d8502Sjsg uint32_t num_shader_arrays_per_engine; 63fb4d8502Sjsg uint32_t num_cu_per_sh; 64fb4d8502Sjsg uint32_t cu_active_number; 65fb4d8502Sjsg uint32_t cu_ao_mask; 66fb4d8502Sjsg uint32_t simd_per_cu; 67fb4d8502Sjsg uint32_t max_waves_per_simd; 68fb4d8502Sjsg uint32_t wave_front_size; 69fb4d8502Sjsg uint32_t max_scratch_slots_per_cu; 70fb4d8502Sjsg uint32_t lds_size; 71*f005ef32Sjsg uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; 72fb4d8502Sjsg }; 73fb4d8502Sjsg 74fb4d8502Sjsg /* For getting GPU local memory information from KGD */ 75fb4d8502Sjsg struct kfd_local_mem_info { 76fb4d8502Sjsg uint64_t local_mem_size_private; 77fb4d8502Sjsg uint64_t local_mem_size_public; 78fb4d8502Sjsg uint32_t vram_width; 79fb4d8502Sjsg uint32_t mem_clk_max; 80fb4d8502Sjsg }; 81fb4d8502Sjsg 82fb4d8502Sjsg enum kgd_memory_pool { 83fb4d8502Sjsg KGD_POOL_SYSTEM_CACHEABLE = 1, 84fb4d8502Sjsg KGD_POOL_SYSTEM_WRITECOMBINE = 2, 85fb4d8502Sjsg KGD_POOL_FRAMEBUFFER = 3, 86fb4d8502Sjsg }; 87fb4d8502Sjsg 88c349dbc7Sjsg /** 89c349dbc7Sjsg * enum kfd_sched_policy 90c349dbc7Sjsg * 91c349dbc7Sjsg * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 92c349dbc7Sjsg * scheduling. In this scheduling mode we're using the firmware code to 93c349dbc7Sjsg * schedule the user mode queues and kernel queues such as HIQ and DIQ. 94c349dbc7Sjsg * the HIQ queue is used as a special queue that dispatches the configuration 95c349dbc7Sjsg * to the cp and the user mode queues list that are currently running. 96c349dbc7Sjsg * the DIQ queue is a debugging queue that dispatches debugging commands to the 97c349dbc7Sjsg * firmware. 98c349dbc7Sjsg * in this scheduling mode user mode queues over subscription feature is 99c349dbc7Sjsg * enabled. 100c349dbc7Sjsg * 101c349dbc7Sjsg * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 102c349dbc7Sjsg * subscription feature disabled. 103c349dbc7Sjsg * 104c349dbc7Sjsg * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 105c349dbc7Sjsg * set the command processor registers and sets the queues "manually". This 106c349dbc7Sjsg * mode is used *ONLY* for debugging proposes. 107c349dbc7Sjsg * 108c349dbc7Sjsg */ 109c349dbc7Sjsg enum kfd_sched_policy { 110c349dbc7Sjsg KFD_SCHED_POLICY_HWS = 0, 111c349dbc7Sjsg KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 112c349dbc7Sjsg KFD_SCHED_POLICY_NO_HWS 113fb4d8502Sjsg }; 114fb4d8502Sjsg 115fb4d8502Sjsg struct kgd2kfd_shared_resources { 116fb4d8502Sjsg /* Bit n == 1 means VMID n is available for KFD. */ 117fb4d8502Sjsg unsigned int compute_vmid_bitmap; 118fb4d8502Sjsg 119fb4d8502Sjsg /* number of pipes per mec */ 120fb4d8502Sjsg uint32_t num_pipe_per_mec; 121fb4d8502Sjsg 122fb4d8502Sjsg /* number of queues per pipe */ 123fb4d8502Sjsg uint32_t num_queue_per_pipe; 124fb4d8502Sjsg 125fb4d8502Sjsg /* Bit n == 1 means Queue n is available for KFD */ 126c349dbc7Sjsg DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); 127fb4d8502Sjsg 128c349dbc7Sjsg /* SDMA doorbell assignments (SOC15 and later chips only). Only 129fb4d8502Sjsg * specific doorbells are routed to each SDMA engine. Others 130fb4d8502Sjsg * are routed to IH and VCN. They are not usable by the CP. 131fb4d8502Sjsg */ 132c349dbc7Sjsg uint32_t *sdma_doorbell_idx; 133c349dbc7Sjsg 134c349dbc7Sjsg /* From SOC15 onward, the doorbell index range not usable for CP 135c349dbc7Sjsg * queues. 136c349dbc7Sjsg */ 137c349dbc7Sjsg uint32_t non_cp_doorbells_start; 138c349dbc7Sjsg uint32_t non_cp_doorbells_end; 139fb4d8502Sjsg 140fb4d8502Sjsg /* Base address of doorbell aperture. */ 141fb4d8502Sjsg phys_addr_t doorbell_physical_address; 142fb4d8502Sjsg 143fb4d8502Sjsg /* Size in bytes of doorbell aperture. */ 144fb4d8502Sjsg size_t doorbell_aperture_size; 145fb4d8502Sjsg 146fb4d8502Sjsg /* Number of bytes at start of aperture reserved for KGD. */ 147fb4d8502Sjsg size_t doorbell_start_offset; 148fb4d8502Sjsg 149fb4d8502Sjsg /* GPUVM address space size in bytes */ 150fb4d8502Sjsg uint64_t gpuvm_size; 151fb4d8502Sjsg 152fb4d8502Sjsg /* Minor device number of the render node */ 153fb4d8502Sjsg int drm_render_minor; 154c349dbc7Sjsg 1551bb76ff1Sjsg bool enable_mes; 156fb4d8502Sjsg }; 157fb4d8502Sjsg 158fb4d8502Sjsg struct tile_config { 159fb4d8502Sjsg uint32_t *tile_config_ptr; 160fb4d8502Sjsg uint32_t *macro_tile_config_ptr; 161fb4d8502Sjsg uint32_t num_tile_configs; 162fb4d8502Sjsg uint32_t num_macro_tile_configs; 163fb4d8502Sjsg 164fb4d8502Sjsg uint32_t gb_addr_config; 165fb4d8502Sjsg uint32_t num_banks; 166fb4d8502Sjsg uint32_t num_ranks; 167fb4d8502Sjsg }; 168fb4d8502Sjsg 169c349dbc7Sjsg #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 170fb4d8502Sjsg 171fb4d8502Sjsg /** 172fb4d8502Sjsg * struct kfd2kgd_calls 173fb4d8502Sjsg * 174fb4d8502Sjsg * @program_sh_mem_settings: A function that should initiate the memory 175fb4d8502Sjsg * properties such as main aperture memory type (cache / non cached) and 176fb4d8502Sjsg * secondary aperture base address, size and memory type. 177fb4d8502Sjsg * This function is used only for no cp scheduling mode. 178fb4d8502Sjsg * 179fb4d8502Sjsg * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp 180fb4d8502Sjsg * scheduling mode. Only used for no cp scheduling mode. 181fb4d8502Sjsg * 182fb4d8502Sjsg * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp 183fb4d8502Sjsg * sceduling mode. 184fb4d8502Sjsg * 185fb4d8502Sjsg * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. 186fb4d8502Sjsg * used only for no HWS mode. 187fb4d8502Sjsg * 188fb4d8502Sjsg * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. 189fb4d8502Sjsg * Array is allocated with kmalloc, needs to be freed with kfree by caller. 190fb4d8502Sjsg * 191fb4d8502Sjsg * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs. 192fb4d8502Sjsg * Array is allocated with kmalloc, needs to be freed with kfree by caller. 193fb4d8502Sjsg * 194fb4d8502Sjsg * @hqd_is_occupies: Checks if a hqd slot is occupied. 195fb4d8502Sjsg * 196fb4d8502Sjsg * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. 197fb4d8502Sjsg * 198fb4d8502Sjsg * @hqd_sdma_is_occupied: Checks if an SDMA hqd slot is occupied. 199fb4d8502Sjsg * 200fb4d8502Sjsg * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that 201fb4d8502Sjsg * SDMA hqd slot. 202fb4d8502Sjsg * 203fb4d8502Sjsg * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. 204fb4d8502Sjsg * Only used for no cp scheduling mode 205fb4d8502Sjsg * 206fb4d8502Sjsg * @set_vm_context_page_table_base: Program page table base for a VMID 207fb4d8502Sjsg * 208fb4d8502Sjsg * @invalidate_tlbs: Invalidate TLBs for a specific PASID 209fb4d8502Sjsg * 210fb4d8502Sjsg * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID 211fb4d8502Sjsg * 212fb4d8502Sjsg * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the 213fb4d8502Sjsg * IH ring entry. This function allows the KFD ISR to get the VMID 214fb4d8502Sjsg * from the fault status register as early as possible. 215fb4d8502Sjsg * 216ad8b1aafSjsg * @get_cu_occupancy: Function pointer that returns to caller the number 217ad8b1aafSjsg * of wave fronts that are in flight for all of the queues of a process 218ad8b1aafSjsg * as identified by its pasid. It is important to note that the value 219ad8b1aafSjsg * returned by this function is a snapshot of current moment and cannot 220ad8b1aafSjsg * guarantee any minimum for the number of waves in-flight. This function 221ad8b1aafSjsg * is defined for devices that belong to GFX9 and later GFX families. Care 222ad8b1aafSjsg * must be taken in calling this function as it is not defined for devices 223ad8b1aafSjsg * that belong to GFX8 and below GFX families. 224fb4d8502Sjsg * 225fb4d8502Sjsg * This structure contains function pointers to services that the kgd driver 226fb4d8502Sjsg * provides to amdkfd driver. 227fb4d8502Sjsg * 228fb4d8502Sjsg */ 229fb4d8502Sjsg struct kfd2kgd_calls { 230fb4d8502Sjsg /* Register access functions */ 2311bb76ff1Sjsg void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid, 232fb4d8502Sjsg uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, 233*f005ef32Sjsg uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases, 234*f005ef32Sjsg uint32_t inst); 235fb4d8502Sjsg 2361bb76ff1Sjsg int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid, 237*f005ef32Sjsg unsigned int vmid, uint32_t inst); 238fb4d8502Sjsg 239*f005ef32Sjsg int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id, 240*f005ef32Sjsg uint32_t inst); 241fb4d8502Sjsg 242fb4d8502Sjsg #ifdef notyet 2431bb76ff1Sjsg int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, 244fb4d8502Sjsg uint32_t queue_id, uint32_t __user *wptr, 245fb4d8502Sjsg uint32_t wptr_shift, uint32_t wptr_mask, 246*f005ef32Sjsg struct mm_struct *mm, uint32_t inst); 247c349dbc7Sjsg #endif 248fb4d8502Sjsg 2491bb76ff1Sjsg int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd, 250c349dbc7Sjsg uint32_t pipe_id, uint32_t queue_id, 251*f005ef32Sjsg uint32_t doorbell_off, uint32_t inst); 252c349dbc7Sjsg 253c349dbc7Sjsg #ifdef notyet 2541bb76ff1Sjsg int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd, 255fb4d8502Sjsg uint32_t __user *wptr, struct mm_struct *mm); 256fb4d8502Sjsg #endif 257fb4d8502Sjsg 2581bb76ff1Sjsg int (*hqd_dump)(struct amdgpu_device *adev, 259fb4d8502Sjsg uint32_t pipe_id, uint32_t queue_id, 260*f005ef32Sjsg uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst); 261fb4d8502Sjsg 2621bb76ff1Sjsg int (*hqd_sdma_dump)(struct amdgpu_device *adev, 263fb4d8502Sjsg uint32_t engine_id, uint32_t queue_id, 264fb4d8502Sjsg uint32_t (**dump)[2], uint32_t *n_regs); 265fb4d8502Sjsg 2661bb76ff1Sjsg bool (*hqd_is_occupied)(struct amdgpu_device *adev, 2671bb76ff1Sjsg uint64_t queue_address, uint32_t pipe_id, 268*f005ef32Sjsg uint32_t queue_id, uint32_t inst); 269fb4d8502Sjsg 2701bb76ff1Sjsg int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd, 2711bb76ff1Sjsg enum kfd_preempt_type reset_type, 272fb4d8502Sjsg unsigned int timeout, uint32_t pipe_id, 273*f005ef32Sjsg uint32_t queue_id, uint32_t inst); 274fb4d8502Sjsg 2751bb76ff1Sjsg bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd); 276fb4d8502Sjsg 2771bb76ff1Sjsg int (*hqd_sdma_destroy)(struct amdgpu_device *adev, void *mqd, 278fb4d8502Sjsg unsigned int timeout); 279fb4d8502Sjsg 2801bb76ff1Sjsg int (*wave_control_execute)(struct amdgpu_device *adev, 281fb4d8502Sjsg uint32_t gfx_index_val, 282*f005ef32Sjsg uint32_t sq_cmd, uint32_t inst); 2831bb76ff1Sjsg bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev, 284c349dbc7Sjsg uint8_t vmid, 285c349dbc7Sjsg uint16_t *p_pasid); 286fb4d8502Sjsg 287c349dbc7Sjsg /* No longer needed from GFXv9 onward. The scratch base address is 288c349dbc7Sjsg * passed to the shader by the CP. It's the user mode driver's 289c349dbc7Sjsg * responsibility. 290c349dbc7Sjsg */ 2911bb76ff1Sjsg void (*set_scratch_backing_va)(struct amdgpu_device *adev, 292fb4d8502Sjsg uint64_t va, uint32_t vmid); 293fb4d8502Sjsg 2941bb76ff1Sjsg void (*set_vm_context_page_table_base)(struct amdgpu_device *adev, 295c349dbc7Sjsg uint32_t vmid, uint64_t page_table_base); 2961bb76ff1Sjsg uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev); 297fb4d8502Sjsg 298*f005ef32Sjsg uint32_t (*enable_debug_trap)(struct amdgpu_device *adev, 299*f005ef32Sjsg bool restore_dbg_registers, 300*f005ef32Sjsg uint32_t vmid); 301*f005ef32Sjsg uint32_t (*disable_debug_trap)(struct amdgpu_device *adev, 302*f005ef32Sjsg bool keep_trap_enabled, 303*f005ef32Sjsg uint32_t vmid); 304*f005ef32Sjsg int (*validate_trap_override_request)(struct amdgpu_device *adev, 305*f005ef32Sjsg uint32_t trap_override, 306*f005ef32Sjsg uint32_t *trap_mask_supported); 307*f005ef32Sjsg uint32_t (*set_wave_launch_trap_override)(struct amdgpu_device *adev, 308*f005ef32Sjsg uint32_t vmid, 309*f005ef32Sjsg uint32_t trap_override, 310*f005ef32Sjsg uint32_t trap_mask_bits, 311*f005ef32Sjsg uint32_t trap_mask_request, 312*f005ef32Sjsg uint32_t *trap_mask_prev, 313*f005ef32Sjsg uint32_t kfd_dbg_trap_cntl_prev); 314*f005ef32Sjsg uint32_t (*set_wave_launch_mode)(struct amdgpu_device *adev, 315*f005ef32Sjsg uint8_t wave_launch_mode, 316*f005ef32Sjsg uint32_t vmid); 317*f005ef32Sjsg uint32_t (*set_address_watch)(struct amdgpu_device *adev, 318*f005ef32Sjsg uint64_t watch_address, 319*f005ef32Sjsg uint32_t watch_address_mask, 320*f005ef32Sjsg uint32_t watch_id, 321*f005ef32Sjsg uint32_t watch_mode, 322*f005ef32Sjsg uint32_t debug_vmid, 323*f005ef32Sjsg uint32_t inst); 324*f005ef32Sjsg uint32_t (*clear_address_watch)(struct amdgpu_device *adev, 325*f005ef32Sjsg uint32_t watch_id); 326*f005ef32Sjsg void (*get_iq_wait_times)(struct amdgpu_device *adev, 327*f005ef32Sjsg uint32_t *wait_times, 328*f005ef32Sjsg uint32_t inst); 329*f005ef32Sjsg void (*build_grace_period_packet_info)(struct amdgpu_device *adev, 330*f005ef32Sjsg uint32_t wait_times, 331*f005ef32Sjsg uint32_t grace_period, 332*f005ef32Sjsg uint32_t *reg_offset, 333*f005ef32Sjsg uint32_t *reg_data); 3341bb76ff1Sjsg void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, 335*f005ef32Sjsg int *wave_cnt, int *max_waves_per_cu, uint32_t inst); 3361bb76ff1Sjsg void (*program_trap_handler_settings)(struct amdgpu_device *adev, 337*f005ef32Sjsg uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 338*f005ef32Sjsg uint32_t inst); 339fb4d8502Sjsg }; 340fb4d8502Sjsg 341fb4d8502Sjsg #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ 342