1; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s 2; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s 3 4; Does not apply to wave64 5; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 7 8; Does not apply to gfx1101 9; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 10; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 11 12; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s 13; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,WORKAROUND %s 14 15; Does not apply to gfx1103 16; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 17; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,NOWORKAROUND %s 18 19; There aren't any stack objects, but we still enable the 20; private_segment_wavefront_offset to get to 16, and the workgroup ID 21; is in s14. 22 23; private_segment_buffer + workgroup_id_x = 5, + 11 padding 24 25; GCN-LABEL: {{^}}minimal_kernel_inputs: 26; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15 27; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s0 28; GCN-NEXT: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off 29 30; GCN: .amdhsa_kernel minimal_kernel_inputs 31; WORKAROUND: .amdhsa_user_sgpr_count 15 32; NOWORKAROUND: .amdhsa_user_sgpr_count 0 33; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 34; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0 35; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 36; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 37; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 38; GCN-NEXT: .amdhsa_wavefront_size32 39; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 40; GCN-NEXT: .amdhsa_enable_private_segment 0 41; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 42; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 43; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 44; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 45; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 46; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15 47; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 0 48define amdgpu_kernel void @minimal_kernel_inputs() #0 { 49 %id = call i32 @llvm.amdgcn.workgroup.id.x() 50 store volatile i32 %id, ptr addrspace(1) undef 51 ret void 52} 53 54; GCN-LABEL: {{^}}minimal_kernel_inputs_with_stack: 55; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15 56; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s0 57; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off 58 59; GCN: .amdhsa_kernel minimal_kernel_inputs 60; WORKAROUND: .amdhsa_user_sgpr_count 15 61; NOWORKAROUND: .amdhsa_user_sgpr_count 0 62; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 63; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 0 64; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 65; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 66; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 67; GCN-NEXT: .amdhsa_wavefront_size32 68; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 69; GCN-NEXT: .amdhsa_enable_private_segment 1 70; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 71; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 72; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 73; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 74; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 75; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15 76; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 0 77define amdgpu_kernel void @minimal_kernel_inputs_with_stack() #0 { 78 %alloca = alloca i32, addrspace(5) 79 %id = call i32 @llvm.amdgcn.workgroup.id.x() 80 store volatile i32 %id, ptr addrspace(1) undef 81 store volatile i32 0, ptr addrspace(5) %alloca 82 ret void 83} 84 85; GCN-LABEL: {{^}}queue_ptr: 86; GCN: global_load_u8 v{{[0-9]+}}, 87 88; WORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s15 89; NOWORKAROUND: v_mov_b32_e32 [[V:v[0-9]+]], s4 90; GCN-NEXT: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V]], off 91 92; GCN: .amdhsa_kernel queue_ptr 93; WORKAROUND: .amdhsa_user_sgpr_count 15 94; NOWORKAROUND: .amdhsa_user_sgpr_count 4 95; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 96; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1 97; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 98; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 0 99; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 100; GCN-NEXT: .amdhsa_wavefront_size32 101; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 102; GCN-NEXT: .amdhsa_enable_private_segment 0 103; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 104; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 105; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 106; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 107; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 108; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 15 109; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 4 110define amdgpu_kernel void @queue_ptr() #1 { 111 %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 112 %load = load volatile i8, ptr addrspace(4) %queue.ptr 113 %id = call i32 @llvm.amdgcn.workgroup.id.x() 114 store volatile i32 %id, ptr addrspace(1) undef 115 ret void 116} 117 118; GCN-LABEL: {{^}}all_inputs: 119; WORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s13 120; WORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s14 121; WORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s15 122 123; NOWORKAROUND: v_mov_b32_e32 [[V_X:v[0-9]+]], s8 124; NOWORKAROUND: v_mov_b32_e32 [[V_Y:v[0-9]+]], s9 125; NOWORKAROUND: v_mov_b32_e32 [[V_Z:v[0-9]+]], s10 126 127; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[0:1] 128; GCN: global_load_u8 v{{[0-9]+}}, 129; GCN: global_load_u8 v{{[0-9]+}}, v{{[0-9]+}}, s[4:5] 130 131; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_LO:[0-9]+]], s6 132; GCN-DAG: v_mov_b32_e32 v[[DISPATCH_HI:[0-9]+]], s7 133 134; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_X]], off 135; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Y]], off 136; GCN: global_store_b32 v{{\[[0-9]+:[0-9]+\]}}, [[V_Z]], off 137; GCN: global_store_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[DISPATCH_LO]]:[[DISPATCH_HI]]{{\]}}, off 138 139; GCN: .amdhsa_kernel all_inputs 140; WORKAROUND: .amdhsa_user_sgpr_count 13 141; NOWORKAROUND: .amdhsa_user_sgpr_count 8 142; GCN-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 143; GCN-NEXT: .amdhsa_user_sgpr_queue_ptr 1 144; GCN-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 145; GCN-NEXT: .amdhsa_user_sgpr_dispatch_id 1 146; GCN-NEXT: .amdhsa_user_sgpr_private_segment_size 0 147; GCN-NEXT: .amdhsa_wavefront_size32 148; GCN-NEXT: .amdhsa_uses_dynamic_stack 0 149; GCN-NEXT: .amdhsa_enable_private_segment 1 150; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 151; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 152; GCN-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 153; GCN-NEXT: .amdhsa_system_sgpr_workgroup_info 0 154; GCN-NEXT: .amdhsa_system_vgpr_workitem_id 0 155; WORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 13 156; NOWORKAROUND: ; COMPUTE_PGM_RSRC2:USER_SGPR: 8 157define amdgpu_kernel void @all_inputs() #2 { 158 %alloca = alloca i32, addrspace(5) 159 store volatile i32 0, ptr addrspace(5) %alloca 160 161 %dispatch.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() 162 %load.dispatch = load volatile i8, ptr addrspace(4) %dispatch.ptr 163 164 %queue.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() 165 %load.queue = load volatile i8, ptr addrspace(4) %queue.ptr 166 167 %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 168 %load.implicitarg = load volatile i8, ptr addrspace(4) %implicitarg.ptr 169 170 %id.x = call i32 @llvm.amdgcn.workgroup.id.x() 171 store volatile i32 %id.x, ptr addrspace(1) undef 172 173 %id.y = call i32 @llvm.amdgcn.workgroup.id.y() 174 store volatile i32 %id.y, ptr addrspace(1) undef 175 176 %id.z = call i32 @llvm.amdgcn.workgroup.id.z() 177 store volatile i32 %id.z, ptr addrspace(1) undef 178 179 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 180 store volatile i64 %dispatch.id, ptr addrspace(1) undef 181 182 ret void 183} 184 185declare i32 @llvm.amdgcn.workgroup.id.x() #3 186declare i32 @llvm.amdgcn.workgroup.id.y() #3 187declare i32 @llvm.amdgcn.workgroup.id.z() #3 188declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3 189declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #3 190declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() #3 191declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #3 192declare i64 @llvm.amdgcn.dispatch.id() #3 193 194attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 195attributes #1 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 196attributes #2 = { "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 197attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 198 199!llvm.module.flags = !{!0} 200!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 201