1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s 3; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s 4 5; ALL-LABEL: {{^}}test: 6; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 7; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa 8 9; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa 10define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 { 11 %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 12 %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 13 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 14 %value = load i32, i32 addrspace(4)* %gep 15 store i32 %value, i32 addrspace(1)* %out 16 ret void 17} 18 19; ALL-LABEL: {{^}}test_implicit: 20; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 21; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 22define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { 23 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 24 %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 25 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 26 %value = load i32, i32 addrspace(4)* %gep 27 store i32 %value, i32 addrspace(1)* %out 28 ret void 29} 30 31; ALL-LABEL: {{^}}test_implicit_alignment 32; HSA: kernarg_segment_byte_size = 10 33; OS-MESA3D: kernarg_segment_byte_size = 28 34; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 35; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 36; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 37; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 38; MESA: buffer_store_dword [[V_VAL]] 39; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 40define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 { 41 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 42 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 43 %val = load i32, i32 addrspace(4)* %arg.ptr 44 store i32 %val, i32 addrspace(1)* %out 45 ret void 46} 47 48; ALL-LABEL: {{^}}opencl_test_implicit_alignment 49; HSA: kernarg_segment_byte_size = 64 50; OS-MESA3D: kernarg_segment_byte_size = 28 51; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc 52; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4 53; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3 54; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] 55; MESA: buffer_store_dword [[V_VAL]] 56; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] 57define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 { 58 %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 59 %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* 60 %val = load i32, i32 addrspace(4)* %arg.ptr 61 store i32 %val, i32 addrspace(1)* %out 62 ret void 63} 64 65; ALL-LABEL: {{^}}test_no_kernargs: 66; HSA: enable_sgpr_kernarg_segment_ptr = 1 67; HSA: s_load_dword s{{[0-9]+}}, s[4:5] 68define amdgpu_kernel void @test_no_kernargs() #1 { 69 %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 70 %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* 71 %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 72 %value = load i32, i32 addrspace(4)* %gep 73 store volatile i32 %value, i32 addrspace(1)* undef 74 ret void 75} 76 77declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 78declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 79 80attributes #0 = { nounwind readnone } 81attributes #1 = { nounwind } 82attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" } 83