xref: /llvm-project/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (revision 6e0b0038cd65ce726ce404305a06e1cf33e36cca)
1// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix=AMDGCN %s
3
4typedef int int2 __attribute__((ext_vector_type(2)));
5
6typedef struct {
7  int cells[9];
8} Mat3X3;
9
10typedef struct {
11  int cells[16];
12} Mat4X4;
13
14typedef struct {
15  int cells[1024];
16} Mat32X32;
17
18typedef struct {
19  int cells[4096];
20} Mat64X64;
21
22struct StructOneMember {
23  int2 x;
24};
25
26struct StructTwoMember {
27  int2 x;
28  int2 y;
29};
30
31struct LargeStructOneMember {
32  int2 x[100];
33};
34
35struct LargeStructTwoMember {
36  int2 x[40];
37  int2 y[20];
38};
39
40#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
41struct LargeStructOneMember g_s;
42#endif
43
44
45// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
46// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
47// AMDGCN-NEXT:  [[ENTRY:.*:]]
48// AMDGCN-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
49// AMDGCN-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
50// AMDGCN-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
51// AMDGCN-NEXT:    [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
52// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
53// AMDGCN-NEXT:    store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
54// AMDGCN-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
55// AMDGCN-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
56//
57Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
58  Mat4X4 out;
59  return out;
60}
61
62// Expect two mem copies: one for the argument "in", and one for
63// the return value.
64
65// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
66// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
67// AMDGCN-NEXT:  [[ENTRY:.*:]]
68// AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
69// AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
70// AMDGCN-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
71// AMDGCN-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
72// AMDGCN-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
73// AMDGCN-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
74// AMDGCN-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
75// AMDGCN-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
76// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
77// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
78// AMDGCN-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
79// AMDGCN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
80// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
81// AMDGCN-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
82// AMDGCN-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
83// AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
84// AMDGCN-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
85// AMDGCN-NEXT:    store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
86// AMDGCN-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
87// AMDGCN-NEXT:    ret void
88//
89kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
90  out[0] = foo(in[1]);
91}
92
93// AMDGCN-LABEL: define dso_local void @foo_large(
94// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
95// AMDGCN-NEXT:  [[ENTRY:.*:]]
96// AMDGCN-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
97// AMDGCN-NEXT:    [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
98// AMDGCN-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
99// AMDGCN-NEXT:    ret void
100//
101Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
102  Mat64X64 out;
103  return out;
104}
105
106// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
107// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
108// AMDGCN-NEXT:  [[ENTRY:.*:]]
109// AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
110// AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
111// AMDGCN-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
112// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
113// AMDGCN-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
114// AMDGCN-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
115// AMDGCN-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
116// AMDGCN-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
117// AMDGCN-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
118// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
119// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
120// AMDGCN-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
121// AMDGCN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
122// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
123// AMDGCN-NEXT:    call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
124// AMDGCN-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
125// AMDGCN-NEXT:    ret void
126//
127kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
128  out[0] = foo_large(in[1]);
129}
130
131// AMDGCN-LABEL: define dso_local void @FuncOneMember(
132// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
133// AMDGCN-NEXT:  [[ENTRY:.*:]]
134// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
135// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
136// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
137// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
138// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
139// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
140// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
141// AMDGCN-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
142// AMDGCN-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
143// AMDGCN-NEXT:    store <2 x i32> [[TMP0]], ptr [[X]], align 8
144// AMDGCN-NEXT:    ret void
145//
146void FuncOneMember(struct StructOneMember u) {
147  u.x = (int2)(0, 0);
148}
149
150// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
151// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
152// AMDGCN-NEXT:  [[ENTRY:.*:]]
153// AMDGCN-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
154// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
155// AMDGCN-NEXT:    [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
156// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
157// AMDGCN-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
158// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
159// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
160// AMDGCN-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
161// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
162// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
163// AMDGCN-NEXT:    ret void
164//
165void FuncOneLargeMember(struct LargeStructOneMember u) {
166  u.x[0] = (int2)(0, 0);
167}
168
169#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
170// AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl(
171// AMDGCN-SAME: ) #[[ATTR0]] {
172// AMDGCN-NEXT:  [[ENTRY:.*:]]
173// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
174// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
175// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
176// AMDGCN-NEXT:    ret void
177//
178void test_indirect_arg_globl(void) {
179  FuncOneLargeMember(g_s);
180}
181#endif
182
183// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
184// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
185// AMDGCN-NEXT:  [[ENTRY:.*:]]
186// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
187// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
188// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
189// AMDGCN-NEXT:    ret void
190//
191kernel void test_indirect_arg_local(void) {
192  local struct LargeStructOneMember l_s;
193  FuncOneLargeMember(l_s);
194}
195
196// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
197// AMDGCN-SAME: ) #[[ATTR0]] {
198// AMDGCN-NEXT:  [[ENTRY:.*:]]
199// AMDGCN-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
200// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
201// AMDGCN-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
202// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
203// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
204// AMDGCN-NEXT:    ret void
205//
206void test_indirect_arg_private(void) {
207  struct LargeStructOneMember p_s;
208  FuncOneLargeMember(p_s);
209}
210
211// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
212// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
213// AMDGCN-NEXT:  [[ENTRY:.*:]]
214// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
215// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
216// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
217// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
218// AMDGCN-NEXT:    [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
219// AMDGCN-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
220// AMDGCN-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
221// AMDGCN-NEXT:    ret void
222//
223kernel void KernelOneMember(struct StructOneMember u) {
224  FuncOneMember(u);
225}
226
227// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
228// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
229// AMDGCN-NEXT:  [[ENTRY:.*:]]
230// AMDGCN-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
231// AMDGCN-NEXT:    [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
232// AMDGCN-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
233// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
234// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
235// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
236// AMDGCN-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
237// AMDGCN-NEXT:    ret void
238//
239kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
240  FuncOneMember(*u);
241}
242
243// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
244// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
245// AMDGCN-NEXT:  [[ENTRY:.*:]]
246// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
247// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
248// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
249// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
250// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
251// AMDGCN-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
252// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
253// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
254// AMDGCN-NEXT:    ret void
255//
256kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
257  FuncOneLargeMember(u);
258}
259
260// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
261// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
262// AMDGCN-NEXT:  [[ENTRY:.*:]]
263// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
264// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
265// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
266// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
267// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
268// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
269// AMDGCN-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
270// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
271// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
272// AMDGCN-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
273// AMDGCN-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
274// AMDGCN-NEXT:    store <2 x i32> [[TMP2]], ptr [[Y]], align 8
275// AMDGCN-NEXT:    ret void
276//
277void FuncTwoMember(struct StructTwoMember u) {
278  u.y = (int2)(0, 0);
279}
280
281// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
282// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
283// AMDGCN-NEXT:  [[ENTRY:.*:]]
284// AMDGCN-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
285// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
286// AMDGCN-NEXT:    [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
287// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
288// AMDGCN-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
289// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
290// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
291// AMDGCN-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
292// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
293// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
294// AMDGCN-NEXT:    ret void
295//
296void FuncLargeTwoMember(struct LargeStructTwoMember u) {
297  u.y[0] = (int2)(0, 0);
298}
299
300// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
301// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
302// AMDGCN-NEXT:  [[ENTRY:.*:]]
303// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
304// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
305// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
306// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
307// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
308// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
309// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
310// AMDGCN-NEXT:    store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
311// AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
312// AMDGCN-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
313// AMDGCN-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
314// AMDGCN-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
315// AMDGCN-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
316// AMDGCN-NEXT:    ret void
317//
318kernel void KernelTwoMember(struct StructTwoMember u) {
319  FuncTwoMember(u);
320}
321
322// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
323// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
324// AMDGCN-NEXT:  [[ENTRY:.*:]]
325// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
326// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
327// AMDGCN-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
328// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
329// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
330// AMDGCN-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
331// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
332// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
333// AMDGCN-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
334// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
335// AMDGCN-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
336// AMDGCN-NEXT:    ret void
337//
338kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
339  FuncLargeTwoMember(u);
340}
341//.
342// AMDGCN: [[META4]] = !{i32 1, i32 1}
343// AMDGCN: [[META5]] = !{!"none", !"none"}
344// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
345// AMDGCN: [[META7]] = !{!"", !""}
346// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
347// AMDGCN: [[META9]] = !{}
348// AMDGCN: [[META10]] = !{i32 0}
349// AMDGCN: [[META11]] = !{!"none"}
350// AMDGCN: [[META12]] = !{!"struct StructOneMember"}
351// AMDGCN: [[META13]] = !{!""}
352// AMDGCN: [[META14]] = !{i32 1}
353// AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
354// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
355// AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
356// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}
357//.
358