xref: /llvm-project/llvm/test/CodeGen/NVPTX/param-vectorize-kernel.ll (revision ef8655adc8e025f1614c8540a791560f1a2a6bbc)
1; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s
2; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %}
3;
4; Check that parameters of a __global__ (kernel) function do not get increased
5; alignment, and no additional vectorization is performed on loads/stores with
6; that parameters.
7;
8; Test IR is a minimized version of IR generated with the following command
9; from the source code below:
10; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu
11;
12; ----------------------------------------------------------------------------
13; #include <stdint.h>
14;
15; struct St4x1 { uint32_t field[1]; };
16; struct St4x2 { uint32_t field[2]; };
17; struct St4x3 { uint32_t field[3]; };
18; struct St4x4 { uint32_t field[4]; };
19; struct St4x5 { uint32_t field[5]; };
20; struct St4x6 { uint32_t field[6]; };
21; struct St4x7 { uint32_t field[7]; };
22; struct St4x8 { uint32_t field[8]; };
23; struct St8x1 { uint64_t field[1]; };
24; struct St8x2 { uint64_t field[2]; };
25; struct St8x3 { uint64_t field[3]; };
26; struct St8x4 { uint64_t field[4]; };
27;
28; #define DECLARE_FUNCTION(StName)                                    \
29; static __global__  __attribute__((noinline))                        \
30; void foo_##StName(struct StName in, struct StName* ret) {           \
31;   const unsigned size = sizeof(ret->field) / sizeof(*ret->field);   \
32;   for (unsigned i = 0; i != size; ++i)                              \
33;     ret->field[i] = in.field[i];                                    \
34; }                                                                   \
35;
36; DECLARE_FUNCTION(St4x1)
37; DECLARE_FUNCTION(St4x2)
38; DECLARE_FUNCTION(St4x3)
39; DECLARE_FUNCTION(St4x4)
40; DECLARE_FUNCTION(St4x5)
41; DECLARE_FUNCTION(St4x6)
42; DECLARE_FUNCTION(St4x7)
43; DECLARE_FUNCTION(St4x8)
44; DECLARE_FUNCTION(St8x1)
45; DECLARE_FUNCTION(St8x2)
46; DECLARE_FUNCTION(St8x3)
47; DECLARE_FUNCTION(St8x4)
48; ----------------------------------------------------------------------------
49
50%struct.St4x1 = type { [1 x i32] }
51%struct.St4x2 = type { [2 x i32] }
52%struct.St4x3 = type { [3 x i32] }
53%struct.St4x4 = type { [4 x i32] }
54%struct.St4x5 = type { [5 x i32] }
55%struct.St4x6 = type { [6 x i32] }
56%struct.St4x7 = type { [7 x i32] }
57%struct.St4x8 = type { [8 x i32] }
58%struct.St8x1 = type { [1 x i64] }
59%struct.St8x2 = type { [2 x i64] }
60%struct.St8x3 = type { [3 x i64] }
61%struct.St8x4 = type { [4 x i64] }
62
63define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) {
64  ; CHECK-LABEL: .visible .func foo_St4x1(
65  ; CHECK:               .param .align 4 .b8 foo_St4x1_param_0[4],
66  ; CHECK:               .param .b64 foo_St4x1_param_1
67  ; CHECK:       )
68  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x1_param_1];
69  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0];
70  ; CHECK:       st.u32  [[[R1]]], [[R2]];
71  ; CHECK:       ret;
72  %1 = load i32, ptr %in, align 4
73  store i32 %1, ptr %ret, align 4
74  ret void
75}
76
77define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) {
78  ; CHECK-LABEL: .visible .func foo_St4x2(
79  ; CHECK:               .param .align 4 .b8 foo_St4x2_param_0[8],
80  ; CHECK:               .param .b64 foo_St4x2_param_1
81  ; CHECK:       )
82  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x2_param_1];
83  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0];
84  ; CHECK:       st.u32  [[[R1]]], [[R2]];
85  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4];
86  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
87  ; CHECK:       ret;
88  %1 = load i32, ptr %in, align 4
89  store i32 %1, ptr %ret, align 4
90  %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1
91  %2 = load i32, ptr %arrayidx.1, align 4
92  %arrayidx3.1 = getelementptr inbounds [2 x i32], ptr %ret, i64 0, i64 1
93  store i32 %2, ptr %arrayidx3.1, align 4
94  ret void
95}
96
97define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) {
98  ; CHECK-LABEL: .visible .func foo_St4x3(
99  ; CHECK:               .param .align 4 .b8 foo_St4x3_param_0[12],
100  ; CHECK:               .param .b64 foo_St4x3_param_1
101  ; CHECK:       )
102  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x3_param_1];
103  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0];
104  ; CHECK:       st.u32  [[[R1]]], [[R2]];
105  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4];
106  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
107  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8];
108  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
109  ; CHECK:       ret;
110  %1 = load i32, ptr %in, align 4
111  store i32 %1, ptr %ret, align 4
112  %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1
113  %2 = load i32, ptr %arrayidx.1, align 4
114  %arrayidx3.1 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 1
115  store i32 %2, ptr %arrayidx3.1, align 4
116  %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2
117  %3 = load i32, ptr %arrayidx.2, align 4
118  %arrayidx3.2 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 2
119  store i32 %3, ptr %arrayidx3.2, align 4
120  ret void
121}
122
123define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) {
124  ; CHECK-LABEL: .visible .func foo_St4x4(
125  ; CHECK:               .param .align 4 .b8 foo_St4x4_param_0[16],
126  ; CHECK:               .param .b64 foo_St4x4_param_1
127  ; CHECK:       )
128  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x4_param_1];
129  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0];
130  ; CHECK:       st.u32  [[[R1]]], [[R2]];
131  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4];
132  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
133  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8];
134  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
135  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12];
136  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
137  ; CHECK:       ret;
138  %1 = load i32, ptr %in, align 4
139  store i32 %1, ptr %ret, align 4
140  %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1
141  %2 = load i32, ptr %arrayidx.1, align 4
142  %arrayidx3.1 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 1
143  store i32 %2, ptr %arrayidx3.1, align 4
144  %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2
145  %3 = load i32, ptr %arrayidx.2, align 4
146  %arrayidx3.2 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 2
147  store i32 %3, ptr %arrayidx3.2, align 4
148  %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3
149  %4 = load i32, ptr %arrayidx.3, align 4
150  %arrayidx3.3 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 3
151  store i32 %4, ptr %arrayidx3.3, align 4
152  ret void
153}
154
155define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) {
156  ; CHECK-LABEL: .visible .func foo_St4x5(
157  ; CHECK:               .param .align 4 .b8 foo_St4x5_param_0[20],
158  ; CHECK:               .param .b64 foo_St4x5_param_1
159  ; CHECK:       )
160  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x5_param_1];
161  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0];
162  ; CHECK:       st.u32  [[[R1]]], [[R2]];
163  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4];
164  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
165  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8];
166  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
167  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12];
168  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
169  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16];
170  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
171  ; CHECK:       ret;
172  %1 = load i32, ptr %in, align 4
173  store i32 %1, ptr %ret, align 4
174  %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1
175  %2 = load i32, ptr %arrayidx.1, align 4
176  %arrayidx3.1 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 1
177  store i32 %2, ptr %arrayidx3.1, align 4
178  %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2
179  %3 = load i32, ptr %arrayidx.2, align 4
180  %arrayidx3.2 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 2
181  store i32 %3, ptr %arrayidx3.2, align 4
182  %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3
183  %4 = load i32, ptr %arrayidx.3, align 4
184  %arrayidx3.3 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 3
185  store i32 %4, ptr %arrayidx3.3, align 4
186  %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4
187  %5 = load i32, ptr %arrayidx.4, align 4
188  %arrayidx3.4 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 4
189  store i32 %5, ptr %arrayidx3.4, align 4
190  ret void
191}
192
193define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) {
194  ; CHECK-LABEL: .visible .func foo_St4x6(
195  ; CHECK:               .param .align 4 .b8 foo_St4x6_param_0[24],
196  ; CHECK:               .param .b64 foo_St4x6_param_1
197  ; CHECK:       )
198  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x6_param_1];
199  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0];
200  ; CHECK:       st.u32  [[[R1]]], [[R2]];
201  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4];
202  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
203  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8];
204  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
205  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12];
206  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
207  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16];
208  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
209  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20];
210  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
211  ; CHECK:       ret;
212  %1 = load i32, ptr %in, align 4
213  store i32 %1, ptr %ret, align 4
214  %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1
215  %2 = load i32, ptr %arrayidx.1, align 4
216  %arrayidx3.1 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 1
217  store i32 %2, ptr %arrayidx3.1, align 4
218  %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2
219  %3 = load i32, ptr %arrayidx.2, align 4
220  %arrayidx3.2 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 2
221  store i32 %3, ptr %arrayidx3.2, align 4
222  %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3
223  %4 = load i32, ptr %arrayidx.3, align 4
224  %arrayidx3.3 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 3
225  store i32 %4, ptr %arrayidx3.3, align 4
226  %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4
227  %5 = load i32, ptr %arrayidx.4, align 4
228  %arrayidx3.4 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 4
229  store i32 %5, ptr %arrayidx3.4, align 4
230  %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5
231  %6 = load i32, ptr %arrayidx.5, align 4
232  %arrayidx3.5 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 5
233  store i32 %6, ptr %arrayidx3.5, align 4
234  ret void
235}
236
237define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) {
238  ; CHECK-LABEL: .visible .func foo_St4x7(
239  ; CHECK:               .param .align 4 .b8 foo_St4x7_param_0[28],
240  ; CHECK:               .param .b64 foo_St4x7_param_1
241  ; CHECK:       )
242  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x7_param_1];
243  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0];
244  ; CHECK:       st.u32  [[[R1]]], [[R2]];
245  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4];
246  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
247  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8];
248  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
249  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12];
250  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
251  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16];
252  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
253  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20];
254  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
255  ; CHECK:       ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24];
256  ; CHECK:       st.u32  [[[R1]]+24], [[R8]];
257  ; CHECK:       ret;
258  %1 = load i32, ptr %in, align 4
259  store i32 %1, ptr %ret, align 4
260  %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1
261  %2 = load i32, ptr %arrayidx.1, align 4
262  %arrayidx3.1 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 1
263  store i32 %2, ptr %arrayidx3.1, align 4
264  %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2
265  %3 = load i32, ptr %arrayidx.2, align 4
266  %arrayidx3.2 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 2
267  store i32 %3, ptr %arrayidx3.2, align 4
268  %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3
269  %4 = load i32, ptr %arrayidx.3, align 4
270  %arrayidx3.3 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 3
271  store i32 %4, ptr %arrayidx3.3, align 4
272  %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4
273  %5 = load i32, ptr %arrayidx.4, align 4
274  %arrayidx3.4 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 4
275  store i32 %5, ptr %arrayidx3.4, align 4
276  %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5
277  %6 = load i32, ptr %arrayidx.5, align 4
278  %arrayidx3.5 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 5
279  store i32 %6, ptr %arrayidx3.5, align 4
280  %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6
281  %7 = load i32, ptr %arrayidx.6, align 4
282  %arrayidx3.6 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 6
283  store i32 %7, ptr %arrayidx3.6, align 4
284  ret void
285}
286
287define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) {
288  ; CHECK-LABEL: .visible .func foo_St4x8(
289  ; CHECK:               .param .align 4 .b8 foo_St4x8_param_0[32],
290  ; CHECK:               .param .b64 foo_St4x8_param_1
291  ; CHECK:       )
292  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x8_param_1];
293  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0];
294  ; CHECK:       st.u32  [[[R1]]], [[R2]];
295  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4];
296  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
297  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8];
298  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
299  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12];
300  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
301  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16];
302  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
303  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20];
304  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
305  ; CHECK:       ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24];
306  ; CHECK:       st.u32  [[[R1]]+24], [[R8]];
307  ; CHECK:       ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28];
308  ; CHECK:       st.u32  [[[R1]]+28], [[R9]];
309  ; CHECK:       ret;
310  %1 = load i32, ptr %in, align 4
311  store i32 %1, ptr %ret, align 4
312  %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1
313  %2 = load i32, ptr %arrayidx.1, align 4
314  %arrayidx3.1 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 1
315  store i32 %2, ptr %arrayidx3.1, align 4
316  %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2
317  %3 = load i32, ptr %arrayidx.2, align 4
318  %arrayidx3.2 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 2
319  store i32 %3, ptr %arrayidx3.2, align 4
320  %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3
321  %4 = load i32, ptr %arrayidx.3, align 4
322  %arrayidx3.3 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 3
323  store i32 %4, ptr %arrayidx3.3, align 4
324  %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4
325  %5 = load i32, ptr %arrayidx.4, align 4
326  %arrayidx3.4 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 4
327  store i32 %5, ptr %arrayidx3.4, align 4
328  %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5
329  %6 = load i32, ptr %arrayidx.5, align 4
330  %arrayidx3.5 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 5
331  store i32 %6, ptr %arrayidx3.5, align 4
332  %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6
333  %7 = load i32, ptr %arrayidx.6, align 4
334  %arrayidx3.6 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 6
335  store i32 %7, ptr %arrayidx3.6, align 4
336  %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7
337  %8 = load i32, ptr %arrayidx.7, align 4
338  %arrayidx3.7 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 7
339  store i32 %8, ptr %arrayidx3.7, align 4
340  ret void
341}
342
343define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) {
344  ; CHECK-LABEL: .visible .func foo_St8x1(
345  ; CHECK:               .param .align 8 .b8 foo_St8x1_param_0[8],
346  ; CHECK:               .param .b64 foo_St8x1_param_1
347  ; CHECK:       )
348  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x1_param_1];
349  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0];
350  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
351  ; CHECK:       ret;
352  %1 = load i64, ptr %in, align 8
353  store i64 %1, ptr %ret, align 8
354  ret void
355}
356
357define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) {
358  ; CHECK-LABEL: .visible .func foo_St8x2(
359  ; CHECK:               .param .align 8 .b8 foo_St8x2_param_0[16],
360  ; CHECK:               .param .b64 foo_St8x2_param_1
361  ; CHECK:       )
362  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x2_param_1];
363  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0];
364  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
365  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8];
366  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
367  ; CHECK:       ret;
368  %1 = load i64, ptr %in, align 8
369  store i64 %1, ptr %ret, align 8
370  %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1
371  %2 = load i64, ptr %arrayidx.1, align 8
372  %arrayidx3.1 = getelementptr inbounds [2 x i64], ptr %ret, i64 0, i64 1
373  store i64 %2, ptr %arrayidx3.1, align 8
374  ret void
375}
376
377define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) {
378  ; CHECK-LABEL: .visible .func foo_St8x3(
379  ; CHECK:               .param .align 8 .b8 foo_St8x3_param_0[24],
380  ; CHECK:               .param .b64 foo_St8x3_param_1
381  ; CHECK:       )
382  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x3_param_1];
383  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0];
384  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
385  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8];
386  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
387  ; CHECK:       ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16];
388  ; CHECK:       st.u64 [[[R1]]+16], [[RD3]];
389  ; CHECK:       ret;
390  %1 = load i64, ptr %in, align 8
391  store i64 %1, ptr %ret, align 8
392  %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1
393  %2 = load i64, ptr %arrayidx.1, align 8
394  %arrayidx3.1 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 1
395  store i64 %2, ptr %arrayidx3.1, align 8
396  %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2
397  %3 = load i64, ptr %arrayidx.2, align 8
398  %arrayidx3.2 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 2
399  store i64 %3, ptr %arrayidx3.2, align 8
400  ret void
401}
402
403define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) {
404  ; CHECK-LABEL: .visible .func foo_St8x4(
405  ; CHECK:               .param .align 8 .b8 foo_St8x4_param_0[32],
406  ; CHECK:               .param .b64 foo_St8x4_param_1
407  ; CHECK:       )
408  ; CHECK:       ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x4_param_1];
409  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0];
410  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
411  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8];
412  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
413  ; CHECK:       ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16];
414  ; CHECK:       st.u64 [[[R1]]+16], [[RD3]];
415  ; CHECK:       ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24];
416  ; CHECK:       st.u64 [[[R1]]+24], [[RD4]];
417  ; CHECK:       ret;
418  %1 = load i64, ptr %in, align 8
419  store i64 %1, ptr %ret, align 8
420  %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1
421  %2 = load i64, ptr %arrayidx.1, align 8
422  %arrayidx3.1 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 1
423  store i64 %2, ptr %arrayidx3.1, align 8
424  %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2
425  %3 = load i64, ptr %arrayidx.2, align 8
426  %arrayidx3.2 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 2
427  store i64 %3, ptr %arrayidx3.2, align 8
428  %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3
429  %4 = load i64, ptr %arrayidx.3, align 8
430  %arrayidx3.3 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 3
431  store i64 %4, ptr %arrayidx3.3, align 8
432  ret void
433}
434