xref: /llvm-project/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (revision 6e0b0038cd65ce726ce404305a06e1cf33e36cca)
1// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=X86 %s
3// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN %s
4// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN20 %s
5// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -check-prefixes=SPIR %s
6// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn -cl-ext=+__opencl_c_program_scope_global_variables | FileCheck -check-prefixes=AMDGCN30-GVAR %s
7// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL3.0 -O0 -triple amdgcn | FileCheck -check-prefixes=AMDGCN30 %s
8
9typedef int int2 __attribute__((ext_vector_type(2)));
10
11typedef struct {
12  int cells[9];
13} Mat3X3;
14
15typedef struct {
16  int cells[16];
17} Mat4X4;
18
19typedef struct {
20  int cells[1024];
21} Mat32X32;
22
23typedef struct {
24  int cells[4096];
25} Mat64X64;
26
27struct StructOneMember {
28  int2 x;
29};
30
31struct StructTwoMember {
32  int2 x;
33  int2 y;
34};
35
36struct LargeStructOneMember {
37  int2 x[100];
38};
39
40struct LargeStructTwoMember {
41  int2 x[40];
42  int2 y[20];
43};
44
45#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
46struct LargeStructOneMember g_s;
47#endif
48
49//
50// X86-LABEL: define void @foo(
51// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
52// X86-NEXT:  [[ENTRY:.*:]]
53// X86-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
54// X86-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
55// X86-NEXT:    ret void
56//
57// AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
58// AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
59// AMDGCN-NEXT:  [[ENTRY:.*:]]
60// AMDGCN-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
61// AMDGCN-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
62// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
63// AMDGCN-NEXT:    store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
64// AMDGCN-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
65// AMDGCN-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
66//
67// AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo(
68// AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
69// AMDGCN20-NEXT:  [[ENTRY:.*:]]
70// AMDGCN20-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
71// AMDGCN20-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
72// AMDGCN20-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
73// AMDGCN20-NEXT:    [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
74// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
75// AMDGCN20-NEXT:    store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
76// AMDGCN20-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
77// AMDGCN20-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
78//
79// SPIR-LABEL: define dso_local spir_func void @foo(
80// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
81// SPIR-NEXT:  [[ENTRY:.*:]]
82// SPIR-NEXT:    ret void
83//
84// AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo(
85// AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
86// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
87// AMDGCN30-GVAR-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
88// AMDGCN30-GVAR-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
89// AMDGCN30-GVAR-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
90// AMDGCN30-GVAR-NEXT:    store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
91// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
92// AMDGCN30-GVAR-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
93//
94// AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo(
95// AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
96// AMDGCN30-NEXT:  [[ENTRY:.*:]]
97// AMDGCN30-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
98// AMDGCN30-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
99// AMDGCN30-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
100// AMDGCN30-NEXT:    store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
101// AMDGCN30-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
102// AMDGCN30-NEXT:    ret [[STRUCT_MAT4X4]] [[TMP0]]
103//
104Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
105  Mat4X4 out;
106  return out;
107}
108
109//
110// X86-LABEL: define spir_kernel void @ker(
111// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
112// X86-NEXT:  [[ENTRY:.*:]]
113// X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
114// X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
115// X86-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
116// X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
117// X86-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
118// X86-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
119// X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
120// X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
121// X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
122// X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
123// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
124// X86-NEXT:    call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
125// X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
126// X86-NEXT:    ret void
127//
128// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
129// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
130// AMDGCN-NEXT:  [[ENTRY:.*:]]
131// AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
132// AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
133// AMDGCN-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
134// AMDGCN-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
135// AMDGCN-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
136// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
137// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
138// AMDGCN-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
139// AMDGCN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
140// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
141// AMDGCN-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
142// AMDGCN-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
143// AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
144// AMDGCN-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
145// AMDGCN-NEXT:    store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
146// AMDGCN-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
147// AMDGCN-NEXT:    ret void
148//
149// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker(
150// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
151// AMDGCN20-NEXT:  [[ENTRY:.*:]]
152// AMDGCN20-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
153// AMDGCN20-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
154// AMDGCN20-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
155// AMDGCN20-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
156// AMDGCN20-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
157// AMDGCN20-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
158// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
159// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
160// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
161// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
162// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
163// AMDGCN20-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
164// AMDGCN20-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
165// AMDGCN20-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
166// AMDGCN20-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
167// AMDGCN20-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
168// AMDGCN20-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
169// AMDGCN20-NEXT:    store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
170// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
171// AMDGCN20-NEXT:    ret void
172//
173// SPIR-LABEL: define dso_local spir_kernel void @ker(
174// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
175// SPIR-NEXT:  [[ENTRY:.*:]]
176// SPIR-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
177// SPIR-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
178// SPIR-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
179// SPIR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
180// SPIR-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
181// SPIR-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
182// SPIR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
183// SPIR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
184// SPIR-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
185// SPIR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
186// SPIR-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
187// SPIR-NEXT:    call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
188// SPIR-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
189// SPIR-NEXT:    ret void
190//
191// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker(
192// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
193// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
194// AMDGCN30-GVAR-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
195// AMDGCN30-GVAR-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
196// AMDGCN30-GVAR-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
197// AMDGCN30-GVAR-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
198// AMDGCN30-GVAR-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
199// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
200// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
201// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
202// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
203// AMDGCN30-GVAR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
204// AMDGCN30-GVAR-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
205// AMDGCN30-GVAR-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
206// AMDGCN30-GVAR-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
207// AMDGCN30-GVAR-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
208// AMDGCN30-GVAR-NEXT:    store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
209// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
210// AMDGCN30-GVAR-NEXT:    ret void
211//
212// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker(
213// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
214// AMDGCN30-NEXT:  [[ENTRY:.*:]]
215// AMDGCN30-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
216// AMDGCN30-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
217// AMDGCN30-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
218// AMDGCN30-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
219// AMDGCN30-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
220// AMDGCN30-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
221// AMDGCN30-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
222// AMDGCN30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
223// AMDGCN30-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
224// AMDGCN30-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
225// AMDGCN30-NEXT:    [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
226// AMDGCN30-NEXT:    [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
227// AMDGCN30-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
228// AMDGCN30-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
229// AMDGCN30-NEXT:    store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
230// AMDGCN30-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
231// AMDGCN30-NEXT:    ret void
232//
233kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
234  out[0] = foo(in[1]);
235}
236
237//
238// X86-LABEL: define void @foo_large(
239// X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
240// X86-NEXT:  [[ENTRY:.*:]]
241// X86-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr, align 4
242// X86-NEXT:    store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
243// X86-NEXT:    ret void
244//
245// AMDGCN-LABEL: define dso_local void @foo_large(
246// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
247// AMDGCN-NEXT:  [[ENTRY:.*:]]
248// AMDGCN-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
249// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
250// AMDGCN-NEXT:    ret void
251//
252// AMDGCN20-LABEL: define dso_local void @foo_large(
253// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
254// AMDGCN20-NEXT:  [[ENTRY:.*:]]
255// AMDGCN20-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
256// AMDGCN20-NEXT:    [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
257// AMDGCN20-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
258// AMDGCN20-NEXT:    ret void
259//
260// SPIR-LABEL: define dso_local spir_func void @foo_large(
261// SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
262// SPIR-NEXT:  [[ENTRY:.*:]]
263// SPIR-NEXT:    ret void
264//
265// AMDGCN30-GVAR-LABEL: define dso_local void @foo_large(
266// AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
267// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
268// AMDGCN30-GVAR-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
269// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
270// AMDGCN30-GVAR-NEXT:    ret void
271//
272// AMDGCN30-LABEL: define dso_local void @foo_large(
273// AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
274// AMDGCN30-NEXT:  [[ENTRY:.*:]]
275// AMDGCN30-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
276// AMDGCN30-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
277// AMDGCN30-NEXT:    ret void
278//
279Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
280  Mat64X64 out;
281  return out;
282}
283
284//
285// X86-LABEL: define spir_kernel void @ker_large(
286// X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
287// X86-NEXT:  [[ENTRY:.*:]]
288// X86-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
289// X86-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
290// X86-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
291// X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
292// X86-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
293// X86-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
294// X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
295// X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
296// X86-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
297// X86-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
298// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
299// X86-NEXT:    call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
300// X86-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
301// X86-NEXT:    ret void
302//
303// AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
304// AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
305// AMDGCN-NEXT:  [[ENTRY:.*:]]
306// AMDGCN-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
307// AMDGCN-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
308// AMDGCN-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
309// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
310// AMDGCN-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
311// AMDGCN-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
312// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
313// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
314// AMDGCN-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
315// AMDGCN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
316// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
317// AMDGCN-NEXT:    call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
318// AMDGCN-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
319// AMDGCN-NEXT:    ret void
320//
321// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large(
322// AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
323// AMDGCN20-NEXT:  [[ENTRY:.*:]]
324// AMDGCN20-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
325// AMDGCN20-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
326// AMDGCN20-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
327// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
328// AMDGCN20-NEXT:    [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
329// AMDGCN20-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
330// AMDGCN20-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
331// AMDGCN20-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
332// AMDGCN20-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
333// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
334// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
335// AMDGCN20-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
336// AMDGCN20-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
337// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
338// AMDGCN20-NEXT:    call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
339// AMDGCN20-NEXT:    call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
340// AMDGCN20-NEXT:    ret void
341//
342// SPIR-LABEL: define dso_local spir_kernel void @ker_large(
343// SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] {
344// SPIR-NEXT:  [[ENTRY:.*:]]
345// SPIR-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
346// SPIR-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
347// SPIR-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
348// SPIR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
349// SPIR-NEXT:    store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
350// SPIR-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
351// SPIR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
352// SPIR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
353// SPIR-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
354// SPIR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
355// SPIR-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
356// SPIR-NEXT:    call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
357// SPIR-NEXT:    call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
358// SPIR-NEXT:    ret void
359//
360// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large(
361// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
362// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
363// AMDGCN30-GVAR-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
364// AMDGCN30-GVAR-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
365// AMDGCN30-GVAR-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
366// AMDGCN30-GVAR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
367// AMDGCN30-GVAR-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
368// AMDGCN30-GVAR-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
369// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
370// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
371// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
372// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
373// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
374// AMDGCN30-GVAR-NEXT:    call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
375// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
376// AMDGCN30-GVAR-NEXT:    ret void
377//
378// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large(
379// AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
380// AMDGCN30-NEXT:  [[ENTRY:.*:]]
381// AMDGCN30-NEXT:    [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
382// AMDGCN30-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
383// AMDGCN30-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
384// AMDGCN30-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
385// AMDGCN30-NEXT:    store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
386// AMDGCN30-NEXT:    store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
387// AMDGCN30-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
388// AMDGCN30-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
389// AMDGCN30-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
390// AMDGCN30-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
391// AMDGCN30-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
392// AMDGCN30-NEXT:    call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
393// AMDGCN30-NEXT:    call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
394// AMDGCN30-NEXT:    ret void
395//
396kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
397  out[0] = foo_large(in[1]);
398}
399
400//
401// X86-LABEL: define void @FuncOneMember(
402// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
403// X86-NEXT:  [[ENTRY:.*:]]
404// X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
405// X86-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
406// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
407// X86-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
408// X86-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
409// X86-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
410// X86-NEXT:    store <2 x i32> [[TMP1]], ptr [[X]], align 8
411// X86-NEXT:    ret void
412//
413// AMDGCN-LABEL: define dso_local void @FuncOneMember(
414// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
415// AMDGCN-NEXT:  [[ENTRY:.*:]]
416// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
417// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
418// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
419// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
420// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
421// AMDGCN-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
422// AMDGCN-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
423// AMDGCN-NEXT:    store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
424// AMDGCN-NEXT:    ret void
425//
426// AMDGCN20-LABEL: define dso_local void @FuncOneMember(
427// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
428// AMDGCN20-NEXT:  [[ENTRY:.*:]]
429// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
430// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
431// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
432// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
433// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
434// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
435// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
436// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
437// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
438// AMDGCN20-NEXT:    store <2 x i32> [[TMP0]], ptr [[X]], align 8
439// AMDGCN20-NEXT:    ret void
440//
441// SPIR-LABEL: define dso_local spir_func void @FuncOneMember(
442// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
443// SPIR-NEXT:  [[ENTRY:.*:]]
444// SPIR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
445// SPIR-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
446// SPIR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
447// SPIR-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
448// SPIR-NEXT:    store <2 x i32> [[TMP0]], ptr [[X]], align 8
449// SPIR-NEXT:    ret void
450//
451// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember(
452// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
453// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
454// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
455// AMDGCN30-GVAR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
456// AMDGCN30-GVAR-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
457// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
458// AMDGCN30-GVAR-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
459// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
460// AMDGCN30-GVAR-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
461// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
462// AMDGCN30-GVAR-NEXT:    ret void
463//
464// AMDGCN30-LABEL: define dso_local void @FuncOneMember(
465// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
466// AMDGCN30-NEXT:  [[ENTRY:.*:]]
467// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
468// AMDGCN30-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
469// AMDGCN30-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
470// AMDGCN30-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
471// AMDGCN30-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
472// AMDGCN30-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
473// AMDGCN30-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
474// AMDGCN30-NEXT:    store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
475// AMDGCN30-NEXT:    ret void
476//
477void FuncOneMember(struct StructOneMember u) {
478  u.x = (int2)(0, 0);
479}
480
481//
482// X86-LABEL: define void @FuncOneLargeMember(
483// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
484// X86-NEXT:  [[ENTRY:.*:]]
485// X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
486// X86-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
487// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
488// X86-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
489// X86-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
490// X86-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
491// X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
492// X86-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
493// X86-NEXT:    ret void
494//
495// AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
496// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
497// AMDGCN-NEXT:  [[ENTRY:.*:]]
498// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
499// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
500// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
501// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
502// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
503// AMDGCN-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
504// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
505// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
506// AMDGCN-NEXT:    ret void
507//
508// AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember(
509// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
510// AMDGCN20-NEXT:  [[ENTRY:.*:]]
511// AMDGCN20-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
512// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
513// AMDGCN20-NEXT:    [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
514// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
515// AMDGCN20-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
516// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
517// AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
518// AMDGCN20-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
519// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
520// AMDGCN20-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
521// AMDGCN20-NEXT:    ret void
522//
523// SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember(
524// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
525// SPIR-NEXT:  [[ENTRY:.*:]]
526// SPIR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
527// SPIR-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
528// SPIR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
529// SPIR-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
530// SPIR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
531// SPIR-NEXT:    store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
532// SPIR-NEXT:    ret void
533//
534// AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember(
535// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
536// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
537// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
538// AMDGCN30-GVAR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
539// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
540// AMDGCN30-GVAR-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
541// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
542// AMDGCN30-GVAR-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
543// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
544// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
545// AMDGCN30-GVAR-NEXT:    ret void
546//
547// AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember(
548// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
549// AMDGCN30-NEXT:  [[ENTRY:.*:]]
550// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
551// AMDGCN30-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
552// AMDGCN30-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
553// AMDGCN30-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
554// AMDGCN30-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
555// AMDGCN30-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
556// AMDGCN30-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
557// AMDGCN30-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
558// AMDGCN30-NEXT:    ret void
559//
560void FuncOneLargeMember(struct LargeStructOneMember u) {
561  u.x[0] = (int2)(0, 0);
562}
563
564#if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
565// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl(
566// AMDGCN20-SAME: ) #[[ATTR0]] {
567// AMDGCN20-NEXT:  [[ENTRY:.*:]]
568// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
569// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
570// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
571// AMDGCN20-NEXT:    ret void
572//
573// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl(
574// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
575// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
576// AMDGCN30-GVAR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
577// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
578// AMDGCN30-GVAR-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
579// AMDGCN30-GVAR-NEXT:    ret void
580//
581void test_indirect_arg_globl(void) {
582  FuncOneLargeMember(g_s);
583}
584#endif
585
586//
587// X86-LABEL: define spir_kernel void @test_indirect_arg_local(
588// X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
589// X86-NEXT:  [[ENTRY:.*:]]
590// X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4
591// X86-NEXT:    call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
592// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
593// X86-NEXT:    ret void
594//
595// AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
596// AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
597// AMDGCN-NEXT:  [[ENTRY:.*:]]
598// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
599// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
600// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
601// AMDGCN-NEXT:    ret void
602//
603// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
604// AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
605// AMDGCN20-NEXT:  [[ENTRY:.*:]]
606// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
607// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
608// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
609// AMDGCN20-NEXT:    ret void
610//
611// SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local(
612// SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
613// SPIR-NEXT:  [[ENTRY:.*:]]
614// SPIR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
615// SPIR-NEXT:    call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
616// SPIR-NEXT:    call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
617// SPIR-NEXT:    ret void
618//
619// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
620// AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
621// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
622// AMDGCN30-GVAR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
623// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
624// AMDGCN30-GVAR-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
625// AMDGCN30-GVAR-NEXT:    ret void
626//
627// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
628// AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
629// AMDGCN30-NEXT:  [[ENTRY:.*:]]
630// AMDGCN30-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
631// AMDGCN30-NEXT:    call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
632// AMDGCN30-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
633// AMDGCN30-NEXT:    ret void
634//
635kernel void test_indirect_arg_local(void) {
636  local struct LargeStructOneMember l_s;
637  FuncOneLargeMember(l_s);
638}
639
640//
641// X86-LABEL: define void @test_indirect_arg_private(
642// X86-SAME: ) #[[ATTR0]] {
643// X86-NEXT:  [[ENTRY:.*:]]
644// X86-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
645// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]]
646// X86-NEXT:    ret void
647//
648// AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
649// AMDGCN-SAME: ) #[[ATTR0]] {
650// AMDGCN-NEXT:  [[ENTRY:.*:]]
651// AMDGCN-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
652// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
653// AMDGCN-NEXT:    ret void
654//
655// AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private(
656// AMDGCN20-SAME: ) #[[ATTR0]] {
657// AMDGCN20-NEXT:  [[ENTRY:.*:]]
658// AMDGCN20-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
659// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
660// AMDGCN20-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
661// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
662// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
663// AMDGCN20-NEXT:    ret void
664//
665// SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private(
666// SPIR-SAME: ) #[[ATTR0]] {
667// SPIR-NEXT:  [[ENTRY:.*:]]
668// SPIR-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
669// SPIR-NEXT:    call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
670// SPIR-NEXT:    ret void
671//
672// AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private(
673// AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
674// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
675// AMDGCN30-GVAR-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
676// AMDGCN30-GVAR-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
677// AMDGCN30-GVAR-NEXT:    ret void
678//
679// AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private(
680// AMDGCN30-SAME: ) #[[ATTR0]] {
681// AMDGCN30-NEXT:  [[ENTRY:.*:]]
682// AMDGCN30-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
683// AMDGCN30-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
684// AMDGCN30-NEXT:    ret void
685//
686void test_indirect_arg_private(void) {
687  struct LargeStructOneMember p_s;
688  FuncOneLargeMember(p_s);
689}
690
691//
692// X86-LABEL: define spir_kernel void @KernelOneMember(
693// X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
694// X86-NEXT:  [[ENTRY:.*:]]
695// X86-NEXT:    call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
696// X86-NEXT:    ret void
697//
698// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
699// AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
700// AMDGCN-NEXT:  [[ENTRY:.*:]]
701// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
702// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
703// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
704// AMDGCN-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
705// AMDGCN-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
706// AMDGCN-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
707// AMDGCN-NEXT:    ret void
708//
709// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
710// AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
711// AMDGCN20-NEXT:  [[ENTRY:.*:]]
712// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
713// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
714// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
715// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
716// AMDGCN20-NEXT:    [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
717// AMDGCN20-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
718// AMDGCN20-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
719// AMDGCN20-NEXT:    ret void
720//
721// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember(
722// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] {
723// SPIR-NEXT:  [[ENTRY:.*:]]
724// SPIR-NEXT:    call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
725// SPIR-NEXT:    ret void
726//
727// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
728// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
729// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
730// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
731// AMDGCN30-GVAR-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
732// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
733// AMDGCN30-GVAR-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
734// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
735// AMDGCN30-GVAR-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
736// AMDGCN30-GVAR-NEXT:    ret void
737//
738// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
739// AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
740// AMDGCN30-NEXT:  [[ENTRY:.*:]]
741// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
742// AMDGCN30-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
743// AMDGCN30-NEXT:    store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
744// AMDGCN30-NEXT:    [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
745// AMDGCN30-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
746// AMDGCN30-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
747// AMDGCN30-NEXT:    ret void
748//
749kernel void KernelOneMember(struct StructOneMember u) {
750  FuncOneMember(u);
751}
752
753//
754// X86-LABEL: define spir_kernel void @KernelOneMemberSpir(
755// X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
756// X86-NEXT:  [[ENTRY:.*:]]
757// X86-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
758// X86-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4
759// X86-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
760// X86-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
761// X86-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
762// X86-NEXT:    call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
763// X86-NEXT:    ret void
764//
765// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
766// AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
767// AMDGCN-NEXT:  [[ENTRY:.*:]]
768// AMDGCN-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
769// AMDGCN-NEXT:    store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
770// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
771// AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
772// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
773// AMDGCN-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
774// AMDGCN-NEXT:    ret void
775//
776// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
777// AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
778// AMDGCN20-NEXT:  [[ENTRY:.*:]]
779// AMDGCN20-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
780// AMDGCN20-NEXT:    [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
781// AMDGCN20-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
782// AMDGCN20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
783// AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
784// AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
785// AMDGCN20-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
786// AMDGCN20-NEXT:    ret void
787//
788// SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir(
789// SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] {
790// SPIR-NEXT:  [[ENTRY:.*:]]
791// SPIR-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
792// SPIR-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8
793// SPIR-NEXT:    store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
794// SPIR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
795// SPIR-NEXT:    call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
796// SPIR-NEXT:    call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
797// SPIR-NEXT:    ret void
798//
799// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
800// AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
801// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
802// AMDGCN30-GVAR-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
803// AMDGCN30-GVAR-NEXT:    store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
804// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
805// AMDGCN30-GVAR-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
806// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
807// AMDGCN30-GVAR-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
808// AMDGCN30-GVAR-NEXT:    ret void
809//
810// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
811// AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
812// AMDGCN30-NEXT:  [[ENTRY:.*:]]
813// AMDGCN30-NEXT:    [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
814// AMDGCN30-NEXT:    store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
815// AMDGCN30-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
816// AMDGCN30-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
817// AMDGCN30-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
818// AMDGCN30-NEXT:    call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
819// AMDGCN30-NEXT:    ret void
820//
821kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
822  FuncOneMember(*u);
823}
824
825//
826// X86-LABEL: define spir_kernel void @KernelLargeOneMember(
827// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
828// X86-NEXT:  [[ENTRY:.*:]]
829// X86-NEXT:    call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
830// X86-NEXT:    ret void
831//
832// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
833// AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
834// AMDGCN-NEXT:  [[ENTRY:.*:]]
835// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
836// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
837// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
838// AMDGCN-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
839// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
840// AMDGCN-NEXT:    ret void
841//
842// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
843// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
844// AMDGCN20-NEXT:  [[ENTRY:.*:]]
845// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
846// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
847// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
848// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
849// AMDGCN20-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
850// AMDGCN20-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
851// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
852// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
853// AMDGCN20-NEXT:    ret void
854//
855// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember(
856// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] {
857// SPIR-NEXT:  [[ENTRY:.*:]]
858// SPIR-NEXT:    call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
859// SPIR-NEXT:    ret void
860//
861// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
862// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
863// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
864// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
865// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
866// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
867// AMDGCN30-GVAR-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
868// AMDGCN30-GVAR-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
869// AMDGCN30-GVAR-NEXT:    ret void
870//
871// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
872// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
873// AMDGCN30-NEXT:  [[ENTRY:.*:]]
874// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
875// AMDGCN30-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
876// AMDGCN30-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
877// AMDGCN30-NEXT:    store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
878// AMDGCN30-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
879// AMDGCN30-NEXT:    ret void
880//
881kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
882  FuncOneLargeMember(u);
883}
884
885//
886// X86-LABEL: define void @FuncTwoMember(
887// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
888// X86-NEXT:  [[ENTRY:.*:]]
889// X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
890// X86-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
891// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
892// X86-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
893// X86-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
894// X86-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
895// X86-NEXT:    store <2 x i32> [[TMP1]], ptr [[Y]], align 8
896// X86-NEXT:    ret void
897//
898// AMDGCN-LABEL: define dso_local void @FuncTwoMember(
899// AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
900// AMDGCN-NEXT:  [[ENTRY:.*:]]
901// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
902// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
903// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
904// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
905// AMDGCN-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
906// AMDGCN-NEXT:    store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
907// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
908// AMDGCN-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
909// AMDGCN-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
910// AMDGCN-NEXT:    store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
911// AMDGCN-NEXT:    ret void
912//
913// AMDGCN20-LABEL: define dso_local void @FuncTwoMember(
914// AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
915// AMDGCN20-NEXT:  [[ENTRY:.*:]]
916// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
917// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
918// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
919// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
920// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
921// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
922// AMDGCN20-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
923// AMDGCN20-NEXT:    store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
924// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
925// AMDGCN20-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
926// AMDGCN20-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
927// AMDGCN20-NEXT:    store <2 x i32> [[TMP2]], ptr [[Y]], align 8
928// AMDGCN20-NEXT:    ret void
929//
930// SPIR-LABEL: define dso_local spir_func void @FuncTwoMember(
931// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
932// SPIR-NEXT:  [[ENTRY:.*:]]
933// SPIR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
934// SPIR-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
935// SPIR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
936// SPIR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
937// SPIR-NEXT:    store <2 x i32> [[TMP0]], ptr [[Y]], align 8
938// SPIR-NEXT:    ret void
939//
940// AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember(
941// AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
942// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
943// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
944// AMDGCN30-GVAR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
945// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
946// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
947// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
948// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
949// AMDGCN30-GVAR-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
950// AMDGCN30-GVAR-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
951// AMDGCN30-GVAR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
952// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
953// AMDGCN30-GVAR-NEXT:    ret void
954//
955// AMDGCN30-LABEL: define dso_local void @FuncTwoMember(
956// AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
957// AMDGCN30-NEXT:  [[ENTRY:.*:]]
958// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
959// AMDGCN30-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
960// AMDGCN30-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
961// AMDGCN30-NEXT:    store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
962// AMDGCN30-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
963// AMDGCN30-NEXT:    store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
964// AMDGCN30-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
965// AMDGCN30-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
966// AMDGCN30-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
967// AMDGCN30-NEXT:    store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
968// AMDGCN30-NEXT:    ret void
969//
970void FuncTwoMember(struct StructTwoMember u) {
971  u.y = (int2)(0, 0);
972}
973
974//
975// X86-LABEL: define void @FuncLargeTwoMember(
976// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
977// X86-NEXT:  [[ENTRY:.*:]]
978// X86-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
979// X86-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
980// X86-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
981// X86-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
982// X86-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
983// X86-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
984// X86-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
985// X86-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
986// X86-NEXT:    ret void
987//
988// AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
989// AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
990// AMDGCN-NEXT:  [[ENTRY:.*:]]
991// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
992// AMDGCN-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
993// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
994// AMDGCN-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
995// AMDGCN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
996// AMDGCN-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
997// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
998// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
999// AMDGCN-NEXT:    ret void
1000//
1001// AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember(
1002// AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1003// AMDGCN20-NEXT:  [[ENTRY:.*:]]
1004// AMDGCN20-NEXT:    [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1005// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1006// AMDGCN20-NEXT:    [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
1007// AMDGCN20-NEXT:    [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
1008// AMDGCN20-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1009// AMDGCN20-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
1010// AMDGCN20-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
1011// AMDGCN20-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
1012// AMDGCN20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
1013// AMDGCN20-NEXT:    store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
1014// AMDGCN20-NEXT:    ret void
1015//
1016// SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember(
1017// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
1018// SPIR-NEXT:  [[ENTRY:.*:]]
1019// SPIR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
1020// SPIR-NEXT:    store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
1021// SPIR-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
1022// SPIR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
1023// SPIR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
1024// SPIR-NEXT:    store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
1025// SPIR-NEXT:    ret void
1026//
1027// AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember(
1028// AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1029// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
1030// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1031// AMDGCN30-GVAR-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1032// AMDGCN30-GVAR-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1033// AMDGCN30-GVAR-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1034// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1035// AMDGCN30-GVAR-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1036// AMDGCN30-GVAR-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
1037// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
1038// AMDGCN30-GVAR-NEXT:    ret void
1039//
1040// AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember(
1041// AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1042// AMDGCN30-NEXT:  [[ENTRY:.*:]]
1043// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1044// AMDGCN30-NEXT:    [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1045// AMDGCN30-NEXT:    call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1046// AMDGCN30-NEXT:    store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1047// AMDGCN30-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1048// AMDGCN30-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1049// AMDGCN30-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
1050// AMDGCN30-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
1051// AMDGCN30-NEXT:    ret void
1052//
1053void FuncLargeTwoMember(struct LargeStructTwoMember u) {
1054  u.y[0] = (int2)(0, 0);
1055}
1056
1057//
1058// X86-LABEL: define spir_kernel void @KernelTwoMember(
1059// X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1060// X86-NEXT:  [[ENTRY:.*:]]
1061// X86-NEXT:    call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
1062// X86-NEXT:    ret void
1063//
1064// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1065// AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1066// AMDGCN-NEXT:  [[ENTRY:.*:]]
1067// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1068// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1069// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1070// AMDGCN-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1071// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1072// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1073// AMDGCN-NEXT:    store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1074// AMDGCN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1075// AMDGCN-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1076// AMDGCN-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1077// AMDGCN-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1078// AMDGCN-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1079// AMDGCN-NEXT:    ret void
1080//
1081// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1082// AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1083// AMDGCN20-NEXT:  [[ENTRY:.*:]]
1084// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1085// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
1086// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1087// AMDGCN20-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1088// AMDGCN20-NEXT:    store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
1089// AMDGCN20-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1090// AMDGCN20-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1091// AMDGCN20-NEXT:    store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
1092// AMDGCN20-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1093// AMDGCN20-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
1094// AMDGCN20-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1095// AMDGCN20-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
1096// AMDGCN20-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1097// AMDGCN20-NEXT:    ret void
1098//
1099// SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember(
1100// SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] {
1101// SPIR-NEXT:  [[ENTRY:.*:]]
1102// SPIR-NEXT:    call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1103// SPIR-NEXT:    ret void
1104//
1105// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1106// AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1107// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
1108// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1109// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1110// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1111// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1112// AMDGCN30-GVAR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1113// AMDGCN30-GVAR-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1114// AMDGCN30-GVAR-NEXT:    store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1115// AMDGCN30-GVAR-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1116// AMDGCN30-GVAR-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1117// AMDGCN30-GVAR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1118// AMDGCN30-GVAR-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1119// AMDGCN30-GVAR-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1120// AMDGCN30-GVAR-NEXT:    ret void
1121//
1122// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1123// AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1124// AMDGCN30-NEXT:  [[ENTRY:.*:]]
1125// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1126// AMDGCN30-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1127// AMDGCN30-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1128// AMDGCN30-NEXT:    store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1129// AMDGCN30-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1130// AMDGCN30-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1131// AMDGCN30-NEXT:    store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1132// AMDGCN30-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1133// AMDGCN30-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1134// AMDGCN30-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1135// AMDGCN30-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1136// AMDGCN30-NEXT:    call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1137// AMDGCN30-NEXT:    ret void
1138//
1139kernel void KernelTwoMember(struct StructTwoMember u) {
1140  FuncTwoMember(u);
1141}
1142
1143//
1144// X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
1145// X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1146// X86-NEXT:  [[ENTRY:.*:]]
1147// X86-NEXT:    call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
1148// X86-NEXT:    ret void
1149//
1150// AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1151// AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1152// AMDGCN-NEXT:  [[ENTRY:.*:]]
1153// AMDGCN-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1154// AMDGCN-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1155// AMDGCN-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1156// AMDGCN-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1157// AMDGCN-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1158// AMDGCN-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1159// AMDGCN-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1160// AMDGCN-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1161// AMDGCN-NEXT:    ret void
1162//
1163// AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1164// AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1165// AMDGCN20-NEXT:  [[ENTRY:.*:]]
1166// AMDGCN20-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1167// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1168// AMDGCN20-NEXT:    [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
1169// AMDGCN20-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1170// AMDGCN20-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1171// AMDGCN20-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
1172// AMDGCN20-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1173// AMDGCN20-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1174// AMDGCN20-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
1175// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
1176// AMDGCN20-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
1177// AMDGCN20-NEXT:    ret void
1178//
1179// SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember(
1180// SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] {
1181// SPIR-NEXT:  [[ENTRY:.*:]]
1182// SPIR-NEXT:    call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1183// SPIR-NEXT:    ret void
1184//
1185// AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1186// AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1187// AMDGCN30-GVAR-NEXT:  [[ENTRY:.*:]]
1188// AMDGCN30-GVAR-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1189// AMDGCN30-GVAR-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1190// AMDGCN30-GVAR-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1191// AMDGCN30-GVAR-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1192// AMDGCN30-GVAR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1193// AMDGCN30-GVAR-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1194// AMDGCN30-GVAR-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1195// AMDGCN30-GVAR-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1196// AMDGCN30-GVAR-NEXT:    ret void
1197//
1198// AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1199// AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1200// AMDGCN30-NEXT:  [[ENTRY:.*:]]
1201// AMDGCN30-NEXT:    [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1202// AMDGCN30-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1203// AMDGCN30-NEXT:    [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1204// AMDGCN30-NEXT:    store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1205// AMDGCN30-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1206// AMDGCN30-NEXT:    [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1207// AMDGCN30-NEXT:    store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1208// AMDGCN30-NEXT:    call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1209// AMDGCN30-NEXT:    ret void
1210//
1211kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
1212  FuncLargeTwoMember(u);
1213}
1214//.
1215// X86: [[META4]] = !{i32 1, i32 1}
1216// X86: [[META5]] = !{!"none", !"none"}
1217// X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1218// X86: [[META7]] = !{!"", !""}
1219// X86: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1220// X86: [[META9]] = !{}
1221// X86: [[META10]] = !{i32 0}
1222// X86: [[META11]] = !{!"none"}
1223// X86: [[META12]] = !{!"struct StructOneMember"}
1224// X86: [[META13]] = !{!""}
1225// X86: [[META14]] = !{i32 1}
1226// X86: [[META15]] = !{!"struct StructOneMember*"}
1227// X86: [[META16]] = !{!"struct LargeStructOneMember"}
1228// X86: [[META17]] = !{!"struct StructTwoMember"}
1229// X86: [[META18]] = !{!"struct LargeStructTwoMember"}
1230//.
1231// AMDGCN: [[META4]] = !{i32 1, i32 1}
1232// AMDGCN: [[META5]] = !{!"none", !"none"}
1233// AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1234// AMDGCN: [[META7]] = !{!"", !""}
1235// AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1236// AMDGCN: [[META9]] = !{}
1237// AMDGCN: [[META10]] = !{i32 0}
1238// AMDGCN: [[META11]] = !{!"none"}
1239// AMDGCN: [[META12]] = !{!"struct StructOneMember"}
1240// AMDGCN: [[META13]] = !{!""}
1241// AMDGCN: [[META14]] = !{i32 1}
1242// AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
1243// AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
1244// AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
1245// AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}
1246//.
1247// AMDGCN20: [[META4]] = !{i32 1, i32 1}
1248// AMDGCN20: [[META5]] = !{!"none", !"none"}
1249// AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1250// AMDGCN20: [[META7]] = !{!"", !""}
1251// AMDGCN20: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1252// AMDGCN20: [[META9]] = !{}
1253// AMDGCN20: [[META10]] = !{i32 0}
1254// AMDGCN20: [[META11]] = !{!"none"}
1255// AMDGCN20: [[META12]] = !{!"struct StructOneMember"}
1256// AMDGCN20: [[META13]] = !{!""}
1257// AMDGCN20: [[META14]] = !{i32 1}
1258// AMDGCN20: [[META15]] = !{!"struct StructOneMember*"}
1259// AMDGCN20: [[META16]] = !{!"struct LargeStructOneMember"}
1260// AMDGCN20: [[META17]] = !{!"struct StructTwoMember"}
1261// AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"}
1262//.
1263// SPIR: [[META3]] = !{i32 1, i32 1}
1264// SPIR: [[META4]] = !{!"none", !"none"}
1265// SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"}
1266// SPIR: [[META6]] = !{!"", !""}
1267// SPIR: [[META7]] = !{!"Mat32X32*", !"Mat64X64*"}
1268// SPIR: [[META8]] = !{}
1269// SPIR: [[META9]] = !{i32 0}
1270// SPIR: [[META10]] = !{!"none"}
1271// SPIR: [[META11]] = !{!"struct StructOneMember"}
1272// SPIR: [[META12]] = !{!""}
1273// SPIR: [[META13]] = !{i32 1}
1274// SPIR: [[META14]] = !{!"struct StructOneMember*"}
1275// SPIR: [[META15]] = !{!"struct LargeStructOneMember"}
1276// SPIR: [[META16]] = !{!"struct StructTwoMember"}
1277// SPIR: [[META17]] = !{!"struct LargeStructTwoMember"}
1278//.
1279// AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1}
1280// AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"}
1281// AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1282// AMDGCN30-GVAR: [[META7]] = !{!"", !""}
1283// AMDGCN30-GVAR: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1284// AMDGCN30-GVAR: [[META9]] = !{}
1285// AMDGCN30-GVAR: [[META10]] = !{i32 0}
1286// AMDGCN30-GVAR: [[META11]] = !{!"none"}
1287// AMDGCN30-GVAR: [[META12]] = !{!"struct StructOneMember"}
1288// AMDGCN30-GVAR: [[META13]] = !{!""}
1289// AMDGCN30-GVAR: [[META14]] = !{i32 1}
1290// AMDGCN30-GVAR: [[META15]] = !{!"struct StructOneMember*"}
1291// AMDGCN30-GVAR: [[META16]] = !{!"struct LargeStructOneMember"}
1292// AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"}
1293// AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"}
1294//.
1295// AMDGCN30: [[META4]] = !{i32 1, i32 1}
1296// AMDGCN30: [[META5]] = !{!"none", !"none"}
1297// AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1298// AMDGCN30: [[META7]] = !{!"", !""}
1299// AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1300// AMDGCN30: [[META9]] = !{}
1301// AMDGCN30: [[META10]] = !{i32 0}
1302// AMDGCN30: [[META11]] = !{!"none"}
1303// AMDGCN30: [[META12]] = !{!"struct StructOneMember"}
1304// AMDGCN30: [[META13]] = !{!""}
1305// AMDGCN30: [[META14]] = !{i32 1}
1306// AMDGCN30: [[META15]] = !{!"struct StructOneMember*"}
1307// AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"}
1308// AMDGCN30: [[META17]] = !{!"struct StructTwoMember"}
1309// AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"}
1310//.
1311