1; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s 2; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s 3; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s 4; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s 5 6; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty: 7 8; MESA: enable_sgpr_kernarg_segment_ptr = 1 9; MESA: kernarg_segment_byte_size = 16 10; MESA: kernarg_segment_alignment = 4 11 12; HSA: s_load_dword s0, s[4:5], 0x0 13 14; COV4: .amdhsa_kernarg_size 56 15; COV5: .amdhsa_kernarg_size 256 16define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { 17 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 18 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 19 ret void 20} 21 22; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty_0implicit: 23; MESA: enable_sgpr_kernarg_segment_ptr = 1 24; MESA: kernarg_segment_byte_size = 16 25; MESA: kernarg_segment_alignment = 4 26 27; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} 28; HSA: s_load_dword s0, [[NULL]], 0x0 29 30; MESA: s_load_dword s0, s[4:5], 0x0 31 32; COV4: .amdhsa_kernarg_size 0 33; COV5: .amdhsa_kernarg_size 0 34define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { 35 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 36 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 37 ret void 38} 39 40; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty: 41 42; MESA: enable_sgpr_kernarg_segment_ptr = 1 43; MESA: kernarg_segment_byte_size = 16 44; MESA: kernarg_segment_alignment = 4 45 46; HSA: s_load_dword s0, s[4:5], 0x0 47 48; HSA: .amdhsa_kernarg_size 48 49define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { 50 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 51 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 52 ret void 53} 54 55; GCN-LABEL: {{^}}kernel_implicitarg_ptr: 56 57; MESA: enable_sgpr_kernarg_segment_ptr = 1 58; MESA: kernarg_segment_byte_size = 128 59; MESA: kernarg_segment_alignment = 4 60 61; HSA: s_load_dword s0, s[4:5], 0x1c 62 63; COV4: .amdhsa_kernarg_size 168 64; COV5: .amdhsa_kernarg_size 368 65define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { 66 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 67 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 68 ret void 69} 70 71; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr: 72 73; MESA: enable_sgpr_kernarg_segment_ptr = 1 74; MESA: kernarg_segment_byte_size = 128 75; MESA: kernarg_segment_alignment = 4 76 77; HSA: s_load_dword s0, s[4:5], 0x1c 78 79; HSA: .amdhsa_kernarg_size 160 80define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { 81 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 82 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 83 ret void 84} 85 86; GCN-LABEL: {{^}}func_implicitarg_ptr: 87; GCN: s_waitcnt 88; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 89; GCN-NEXT: s_waitcnt 90; GCN-NEXT: s_setpc_b64 91define void @func_implicitarg_ptr() #0 { 92 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 93 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 94 ret void 95} 96 97; GCN-LABEL: {{^}}opencl_func_implicitarg_ptr: 98; GCN: s_waitcnt 99; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 100; GCN-NEXT: s_waitcnt 101; GCN-NEXT: s_setpc_b64 102define void @opencl_func_implicitarg_ptr() #0 { 103 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 104 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 105 ret void 106} 107 108; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty: 109 110; MESA: enable_sgpr_kernarg_segment_ptr = 1 111; MESA: kernarg_segment_byte_size = 16 112; MESA: kernarg_segment_alignment = 4 113 114; GCN: s_mov_b64 s[8:9], s[4:5] 115; GCN: s_swappc_b64 116 117; COV4: .amdhsa_kernarg_size 56 118; COV5: .amdhsa_kernarg_size 256 119define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 { 120 call void @func_implicitarg_ptr() 121 ret void 122} 123 124; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func_empty_implicit0: 125; MESA: enable_sgpr_kernarg_segment_ptr = 1 126; MESA: kernarg_segment_byte_size = 16 127; MESA: kernarg_segment_alignment = 4 128 129; HSA: s_mov_b64 s[8:9], 0{{$}} 130; MESA: s_mov_b64 s[8:9], s[4:5]{{$}} 131; GCN: s_swappc_b64 132 133; HSA: .amdhsa_kernarg_size 0 134define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 { 135 call void @func_implicitarg_ptr() 136 ret void 137} 138 139; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty: 140; MESA: enable_sgpr_kernarg_segment_ptr = 1 141; MESA: kernarg_segment_byte_size = 16 142; GCN: s_mov_b64 s[8:9], s[4:5] 143; GCN-NOT: s4 144; GCN-NOT: s5 145; GCN: s_swappc_b64 146 147; HSA: .amdhsa_kernarg_size 48 148define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 { 149 call void @func_implicitarg_ptr() 150 ret void 151} 152 153; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func: 154; MESA: enable_sgpr_kernarg_segment_ptr = 1 155; MESA: kernarg_segment_byte_size = 128 156; MESA: kernarg_segment_alignment = 4 157 158; HSA: s_add_u32 s8, s4, 0x70 159; MESA: s_add_u32 s8, s4, 0x70 160 161; GCN: s_addc_u32 s9, s5, 0{{$}} 162; GCN: s_swappc_b64 163 164; COV4: .amdhsa_kernarg_size 168 165; COV5: .amdhsa_kernarg_size 368 166define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 { 167 call void @func_implicitarg_ptr() 168 ret void 169} 170 171; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func: 172; MESA: enable_sgpr_kernarg_segment_ptr = 1 173; MESA: kernarg_segment_byte_size = 128 174; MESA: kernarg_segment_alignment = 4 175 176; GCN: s_add_u32 s8, s4, 0x70 177; GCN: s_addc_u32 s9, s5, 0{{$}} 178; GCN: s_swappc_b64 179 180; HSA: .amdhsa_kernarg_size 160 181define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 { 182 call void @func_implicitarg_ptr() 183 ret void 184} 185 186; GCN-LABEL: {{^}}func_call_implicitarg_ptr_func: 187; GCN-NOT: s8 188; GCN-NOT: s9 189; GCN-NOT: s[8:9] 190; GCN: s_swappc_b64 191; GCN: s_setpc_b64 s[30:31] 192define void @func_call_implicitarg_ptr_func() #0 { 193 call void @func_implicitarg_ptr() 194 ret void 195} 196 197; GCN-LABEL: {{^}}opencl_func_call_implicitarg_ptr_func: 198; GCN-NOT: s8 199; GCN-NOT: s9 200; GCN-NOT: s[8:9] 201; GCN: s_swappc_b64 202; GCN: s_setpc_b64 s[30:31] 203define void @opencl_func_call_implicitarg_ptr_func() #0 { 204 call void @func_implicitarg_ptr() 205 ret void 206} 207 208; GCN-LABEL: {{^}}func_kernarg_implicitarg_ptr: 209; GCN: s_waitcnt 210; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 211; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 212; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 213; GCN: s_waitcnt lgkmcnt(0) 214define void @func_kernarg_implicitarg_ptr() #0 { 215 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 216 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 217 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr 218 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr 219 ret void 220} 221 222; GCN-LABEL: {{^}}opencl_func_kernarg_implicitarg_ptr: 223; GCN: s_waitcnt 224; GCN-DAG: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0 225; GCN-DAG: s_load_dword s{{[0-9]+}}, [[NULL]], 0x0 226; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 227; GCN: s_waitcnt lgkmcnt(0) 228define void @opencl_func_kernarg_implicitarg_ptr() #0 { 229 %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 230 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 231 %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr 232 %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr 233 ret void 234} 235 236; GCN-LABEL: {{^}}kernel_call_kernarg_implicitarg_ptr_func: 237; GCN: s_add_u32 s8, s4, 0x70 238; GCN: s_addc_u32 s9, s5, 0 239; GCN: s_swappc_b64 240define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) #0 { 241 call void @func_kernarg_implicitarg_ptr() 242 ret void 243} 244 245; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding: 246; MESA: kernarg_segment_byte_size = 84 247; MESA: kernarg_segment_alignment = 6 248 249; HSA: .amdhsa_kernarg_size 120 250define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 { 251 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 252 %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr 253 ret void 254} 255 256; HSA-LABEL: amdhsa.kernels: 257; HSA: .kernarg_segment_align: 8 258; COV5-NEXT: .kernarg_segment_size: 256 259; COV4-NEXT: .kernarg_segment_size: 56 260; HSA-LABEL: .name: kernel_implicitarg_ptr_empty 261 262; HSA: .kernarg_segment_align: 4 263; HSA-NEXT: .kernarg_segment_size: 0 264; HSA-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit 265 266; HSA: .kernarg_segment_align: 8 267; HSA-NEXT: .kernarg_segment_size: 48 268; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr_empty 269 270; HSA: .kernarg_segment_align: 8 271; COV5-NEXT: .kernarg_segment_size: 368 272; COV4-NEXT: .kernarg_segment_size: 168 273; HSA-LABEL: .name: kernel_implicitarg_ptr 274 275; HSA: .kernarg_segment_align: 8 276; HSA-NEXT: .kernarg_segment_size: 160 277; HSA-LABEL: .name: opencl_kernel_implicitarg_ptr 278 279; HSA: .kernarg_segment_align: 8 280; COV5-NEXT: .kernarg_segment_size: 256 281; COV4-NEXT: .kernarg_segment_size: 56 282; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty 283 284; HSA: .kernarg_segment_align: 4 285; HSA-NEXT: .kernarg_segment_size: 0 286; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0 287 288; HSA: .kernarg_segment_align: 8 289; HSA-NEXT: .kernarg_segment_size: 48 290; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty 291 292; HSA: .kernarg_segment_align: 8 293; COV5-NEXT: .kernarg_segment_size: 368 294; COV4-NEXT: .kernarg_segment_size: 168 295; HSA-LABEL: .name: kernel_call_implicitarg_ptr_func 296 297; HSA: .kernarg_segment_align: 8 298; HSA-NEXT: .kernarg_segment_size: 160 299; HSA-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func 300 301; HSA: .kernarg_segment_align: 8 302; COV5-NEXT: .kernarg_segment_size: 368 303; COV4-NEXT: .kernarg_segment_size: 168 304; HSA-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func 305 306; HSA: .kernarg_segment_align: 64 307; HSA-NEXT: .kernarg_segment_size: 120 308; HSA-LABEL: .name: kernel_implicitarg_no_struct_align_padding 309 310declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2 311declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2 312 313attributes #0 = { nounwind noinline "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" } 314attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" } 315attributes #2 = { nounwind readnone speculatable } 316attributes #3 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" } 317 318!llvm.module.flags = !{!0} 319!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION} 320