1*6e0b0038SAlex Voicu// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 2d77c6205SChangpeng Fang// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s 3d77c6205SChangpeng Fang 4d77c6205SChangpeng Fangtypedef int int2 __attribute__((ext_vector_type(2))); 5d77c6205SChangpeng Fang 6d77c6205SChangpeng Fangtypedef struct { 7d77c6205SChangpeng Fang int cells[9]; 8d77c6205SChangpeng Fang} Mat3X3; 9d77c6205SChangpeng Fang 10d77c6205SChangpeng Fangtypedef struct { 11d77c6205SChangpeng Fang int cells[16]; 12d77c6205SChangpeng Fang} Mat4X4; 13d77c6205SChangpeng Fang 14d77c6205SChangpeng Fangtypedef struct { 15d77c6205SChangpeng Fang int cells[1024]; 16d77c6205SChangpeng Fang} Mat32X32; 17d77c6205SChangpeng Fang 18d77c6205SChangpeng Fangtypedef struct { 19d77c6205SChangpeng Fang int cells[4096]; 20d77c6205SChangpeng Fang} Mat64X64; 21d77c6205SChangpeng Fang 22d77c6205SChangpeng Fangstruct StructOneMember { 23d77c6205SChangpeng Fang int2 x; 24d77c6205SChangpeng Fang}; 25d77c6205SChangpeng Fang 26d77c6205SChangpeng Fangstruct StructTwoMember { 27d77c6205SChangpeng Fang int2 x; 28d77c6205SChangpeng Fang int2 y; 29d77c6205SChangpeng Fang}; 30d77c6205SChangpeng Fang 31d77c6205SChangpeng Fangstruct LargeStructOneMember { 32d77c6205SChangpeng Fang int2 x[100]; 33d77c6205SChangpeng Fang}; 34d77c6205SChangpeng Fang 35d77c6205SChangpeng Fangstruct LargeStructTwoMember { 36d77c6205SChangpeng Fang int2 x[40]; 37d77c6205SChangpeng Fang int2 y[20]; 38d77c6205SChangpeng Fang}; 39d77c6205SChangpeng Fang 40d77c6205SChangpeng Fang#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) 41d77c6205SChangpeng Fangstruct LargeStructOneMember g_s; 42d77c6205SChangpeng Fang#endif 43d77c6205SChangpeng Fang 44d77c6205SChangpeng Fang 45*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo( 46*6e0b0038SAlex Voicu// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { 47*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 48d77c6205SChangpeng Fang// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) 49d77c6205SChangpeng Fang// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5) 50*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr 51*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr 52*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0 53*6e0b0038SAlex Voicu// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4 54*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4 55d77c6205SChangpeng Fang// AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]] 56d77c6205SChangpeng Fang// 57d77c6205SChangpeng FangMat4X4 __attribute__((noinline)) foo(Mat3X3 in) { 58d77c6205SChangpeng Fang Mat4X4 out; 59d77c6205SChangpeng Fang return out; 60d77c6205SChangpeng Fang} 61d77c6205SChangpeng Fang 62d77c6205SChangpeng Fang// Expect two mem copies: one for the argument "in", and one for 63d77c6205SChangpeng Fang// the return value. 64d77c6205SChangpeng Fang 65*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker( 66*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] { 67*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 68d77c6205SChangpeng Fang// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 69d77c6205SChangpeng Fang// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 70d77c6205SChangpeng Fang// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) 71*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr 72*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 73*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 74*6e0b0038SAlex Voicu// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 75*6e0b0038SAlex Voicu// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 76*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 77d77c6205SChangpeng Fang// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0 78*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 79d77c6205SChangpeng Fang// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1 8094473f4dSHari Limaye// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 81d77c6205SChangpeng Fang// AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 821762e01cSEli Friedman// AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] 83*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 84d77c6205SChangpeng Fang// AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 85*6e0b0038SAlex Voicu// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 86*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) 87d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 88d77c6205SChangpeng Fang// 89d77c6205SChangpeng Fangkernel void ker(global Mat3X3 *in, global Mat4X4 *out) { 90d77c6205SChangpeng Fang out[0] = foo(in[1]); 91d77c6205SChangpeng Fang} 92d77c6205SChangpeng Fang 93*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @foo_large( 94*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { 95*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 96*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) 97*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr 98*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false) 99d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 100d77c6205SChangpeng Fang// 101d77c6205SChangpeng FangMat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { 102d77c6205SChangpeng Fang Mat64X64 out; 103d77c6205SChangpeng Fang return out; 104d77c6205SChangpeng Fang} 105d77c6205SChangpeng Fang 106*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large( 107*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] { 108*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 109d77c6205SChangpeng Fang// AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 110d77c6205SChangpeng Fang// AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 111d77c6205SChangpeng Fang// AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5) 112d77c6205SChangpeng Fang// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) 113*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr 114*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr 115*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr 116*6e0b0038SAlex Voicu// AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 117*6e0b0038SAlex Voicu// AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 118*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 119d77c6205SChangpeng Fang// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0 120*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 121d77c6205SChangpeng Fang// AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 122d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) 123*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] 124*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) 125d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 126d77c6205SChangpeng Fang// 127d77c6205SChangpeng Fangkernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { 128d77c6205SChangpeng Fang out[0] = foo_large(in[1]); 129d77c6205SChangpeng Fang} 130d77c6205SChangpeng Fang 131*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @FuncOneMember( 132*6e0b0038SAlex Voicu// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] { 133*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 134d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) 135d77c6205SChangpeng Fang// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) 136*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 137*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr 138*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 139*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 140*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 141*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 142*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 143*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8 144d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 145d77c6205SChangpeng Fang// 146d77c6205SChangpeng Fangvoid FuncOneMember(struct StructOneMember u) { 147d77c6205SChangpeng Fang u.x = (int2)(0, 0); 148d77c6205SChangpeng Fang} 149d77c6205SChangpeng Fang 150*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember( 151*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { 152*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 153*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) 154d77c6205SChangpeng Fang// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) 155*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr 156*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr 157*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false) 158*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 159*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 160*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0 161*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0 162*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 163d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 164d77c6205SChangpeng Fang// 165d77c6205SChangpeng Fangvoid FuncOneLargeMember(struct LargeStructOneMember u) { 166d77c6205SChangpeng Fang u.x[0] = (int2)(0, 0); 167d77c6205SChangpeng Fang} 168d77c6205SChangpeng Fang 169d77c6205SChangpeng Fang#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables)) 170*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl( 171*6e0b0038SAlex Voicu// AMDGCN-SAME: ) #[[ATTR0]] { 172*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 173d77c6205SChangpeng Fang// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) 174d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false) 175d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] 176d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 177d77c6205SChangpeng Fang// 178d77c6205SChangpeng Fangvoid test_indirect_arg_globl(void) { 179d77c6205SChangpeng Fang FuncOneLargeMember(g_s); 180d77c6205SChangpeng Fang} 181d77c6205SChangpeng Fang#endif 182d77c6205SChangpeng Fang 183*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local( 184*6e0b0038SAlex Voicu// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] { 185*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 186d77c6205SChangpeng Fang// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) 187d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false) 188d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] 189d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 190d77c6205SChangpeng Fang// 191d77c6205SChangpeng Fangkernel void test_indirect_arg_local(void) { 192d77c6205SChangpeng Fang local struct LargeStructOneMember l_s; 193d77c6205SChangpeng Fang FuncOneLargeMember(l_s); 194d77c6205SChangpeng Fang} 195d77c6205SChangpeng Fang 196*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private( 197*6e0b0038SAlex Voicu// AMDGCN-SAME: ) #[[ATTR0]] { 198*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 199d77c6205SChangpeng Fang// AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5) 200*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) 201*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr 202*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false) 203*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] 204d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 205d77c6205SChangpeng Fang// 206d77c6205SChangpeng Fangvoid test_indirect_arg_private(void) { 207d77c6205SChangpeng Fang struct LargeStructOneMember p_s; 208d77c6205SChangpeng Fang FuncOneLargeMember(p_s); 209d77c6205SChangpeng Fang} 210d77c6205SChangpeng Fang 211*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember( 212*6e0b0038SAlex Voicu// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] { 213*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 214d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5) 215*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 216*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 217*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8 218*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 219*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8 220d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]] 221d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 222d77c6205SChangpeng Fang// 223d77c6205SChangpeng Fangkernel void KernelOneMember(struct StructOneMember u) { 224d77c6205SChangpeng Fang FuncOneMember(u); 225d77c6205SChangpeng Fang} 226d77c6205SChangpeng Fang 227*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir( 228*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] { 229*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 230d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) 231*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr 232*6e0b0038SAlex Voicu// AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8 233*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8 23494473f4dSHari Limaye// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0 235d77c6205SChangpeng Fang// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8 236d77c6205SChangpeng Fang// AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]] 237d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 238d77c6205SChangpeng Fang// 239d77c6205SChangpeng Fangkernel void KernelOneMemberSpir(global struct StructOneMember* u) { 240d77c6205SChangpeng Fang FuncOneMember(*u); 241d77c6205SChangpeng Fang} 242d77c6205SChangpeng Fang 243*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember( 244*6e0b0038SAlex Voicu// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] { 245*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 246d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) 247*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5) 248*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 249*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0 2501762e01cSEli Friedman// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0 251*6e0b0038SAlex Voicu// AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 252*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false) 253*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] 254d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 255d77c6205SChangpeng Fang// 256d77c6205SChangpeng Fangkernel void KernelLargeOneMember(struct LargeStructOneMember u) { 257d77c6205SChangpeng Fang FuncOneLargeMember(u); 258d77c6205SChangpeng Fang} 259d77c6205SChangpeng Fang 260*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @FuncTwoMember( 261*6e0b0038SAlex Voicu// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] { 262*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 263d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5) 264d77c6205SChangpeng Fang// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) 265*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 266*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr 267*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 268*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8 269*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 270*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8 271*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 272*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 273*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 274*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8 275d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 276d77c6205SChangpeng Fang// 277d77c6205SChangpeng Fangvoid FuncTwoMember(struct StructTwoMember u) { 278d77c6205SChangpeng Fang u.y = (int2)(0, 0); 279d77c6205SChangpeng Fang} 280d77c6205SChangpeng Fang 281*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember( 282*6e0b0038SAlex Voicu// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { 283*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 284*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) 285d77c6205SChangpeng Fang// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5) 286*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr 287*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr 288*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false) 289*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 290*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8 291*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1 292*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0 293*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8 294d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 295d77c6205SChangpeng Fang// 296d77c6205SChangpeng Fangvoid FuncLargeTwoMember(struct LargeStructTwoMember u) { 297d77c6205SChangpeng Fang u.y[0] = (int2)(0, 0); 298d77c6205SChangpeng Fang} 299d77c6205SChangpeng Fang 300*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember( 301*6e0b0038SAlex Voicu// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] { 302*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 303d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5) 304*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 305*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 3061762e01cSEli Friedman// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0 307*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8 308*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 3091762e01cSEli Friedman// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1 310*6e0b0038SAlex Voicu// AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8 311*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 312*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 313*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 314*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 3151762e01cSEli Friedman// AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]] 316d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 317d77c6205SChangpeng Fang// 318d77c6205SChangpeng Fangkernel void KernelTwoMember(struct StructTwoMember u) { 319d77c6205SChangpeng Fang FuncTwoMember(u); 320d77c6205SChangpeng Fang} 321d77c6205SChangpeng Fang 322*6e0b0038SAlex Voicu// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember( 323*6e0b0038SAlex Voicu// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] { 324*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[ENTRY:.*:]] 325d77c6205SChangpeng Fang// AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) 326*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5) 327*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr 328*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0 3291762e01cSEli Friedman// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0 330*6e0b0038SAlex Voicu// AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8 331*6e0b0038SAlex Voicu// AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1 3321762e01cSEli Friedman// AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1 333*6e0b0038SAlex Voicu// AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8 334*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false) 335*6e0b0038SAlex Voicu// AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]] 336d77c6205SChangpeng Fang// AMDGCN-NEXT: ret void 337d77c6205SChangpeng Fang// 338d77c6205SChangpeng Fangkernel void KernelLargeTwoMember(struct LargeStructTwoMember u) { 339d77c6205SChangpeng Fang FuncLargeTwoMember(u); 340d77c6205SChangpeng Fang} 341*6e0b0038SAlex Voicu//. 342*6e0b0038SAlex Voicu// AMDGCN: [[META4]] = !{i32 1, i32 1} 343*6e0b0038SAlex Voicu// AMDGCN: [[META5]] = !{!"none", !"none"} 344*6e0b0038SAlex Voicu// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"} 345*6e0b0038SAlex Voicu// AMDGCN: [[META7]] = !{!"", !""} 346*6e0b0038SAlex Voicu// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"} 347*6e0b0038SAlex Voicu// AMDGCN: [[META9]] = !{} 348*6e0b0038SAlex Voicu// AMDGCN: [[META10]] = !{i32 0} 349*6e0b0038SAlex Voicu// AMDGCN: [[META11]] = !{!"none"} 350*6e0b0038SAlex Voicu// AMDGCN: [[META12]] = !{!"struct StructOneMember"} 351*6e0b0038SAlex Voicu// AMDGCN: [[META13]] = !{!""} 352*6e0b0038SAlex Voicu// AMDGCN: [[META14]] = !{i32 1} 353*6e0b0038SAlex Voicu// AMDGCN: [[META15]] = !{!"struct StructOneMember*"} 354*6e0b0038SAlex Voicu// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"} 355*6e0b0038SAlex Voicu// AMDGCN: [[META17]] = !{!"struct StructTwoMember"} 356*6e0b0038SAlex Voicu// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"} 357*6e0b0038SAlex Voicu//. 358