xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-st1.ll (revision abd0d5d2626022d835c784b1fed557caf90e793f)
1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
3; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check.
4; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s
5
6define void @st1lane_16b(<16 x i8> %A, ptr %D) {
7; CHECK-LABEL: st1lane_16b
8; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
9  %ptr = getelementptr i8, ptr %D, i64 1
10  %tmp = extractelement <16 x i8> %A, i32 1
11  store i8 %tmp, ptr %ptr
12  ret void
13}
14
15define void @st1lane0_16b(<16 x i8> %A, ptr %D) {
16; CHECK-LABEL: st1lane0_16b
17; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
18  %ptr = getelementptr i8, ptr %D, i64 1
19  %tmp = extractelement <16 x i8> %A, i32 0
20  store i8 %tmp, ptr %ptr
21  ret void
22}
23
24define void @st1lane0u_16b(<16 x i8> %A, ptr %D) {
25; CHECK-LABEL: st1lane0u_16b
26; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
27  %ptr = getelementptr i8, ptr %D, i64 -1
28  %tmp = extractelement <16 x i8> %A, i32 0
29  store i8 %tmp, ptr %ptr
30  ret void
31}
32
33define void @st1lane_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) {
34; CHECK-LABEL: st1lane_ro_16b
35; CHECK: add x[[XREG:[0-9]+]], x0, x1
36; CHECK: st1.b { v0 }[1], [x[[XREG]]]
37  %ptr = getelementptr i8, ptr %D, i64 %offset
38  %tmp = extractelement <16 x i8> %A, i32 1
39  store i8 %tmp, ptr %ptr
40  ret void
41}
42
43define void @st1lane0_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) {
44; CHECK-LABEL: st1lane0_ro_16b
45; CHECK: add x[[XREG:[0-9]+]], x0, x1
46; CHECK: st1.b { v0 }[0], [x[[XREG]]]
47  %ptr = getelementptr i8, ptr %D, i64 %offset
48  %tmp = extractelement <16 x i8> %A, i32 0
49  store i8 %tmp, ptr %ptr
50  ret void
51}
52
53define void @st1lane_8h(<8 x i16> %A, ptr %D) {
54; CHECK-LABEL: st1lane_8h
55; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
56  %ptr = getelementptr i16, ptr %D, i64 1
57  %tmp = extractelement <8 x i16> %A, i32 1
58  store i16 %tmp, ptr %ptr
59  ret void
60}
61
62define void @st1lane0_8h(<8 x i16> %A, ptr %D) {
63; CHECK-LABEL: st1lane0_8h
64; CHECK: str h0, [x0, #2]
65  %ptr = getelementptr i16, ptr %D, i64 1
66  %tmp = extractelement <8 x i16> %A, i32 0
67  store i16 %tmp, ptr %ptr
68  ret void
69}
70
71define void @st1lane0u_8h(<8 x i16> %A, ptr %D) {
72; CHECK-LABEL: st1lane0u_8h
73; CHECK: stur h0, [x0, #-2]
74  %ptr = getelementptr i16, ptr %D, i64 -1
75  %tmp = extractelement <8 x i16> %A, i32 0
76  store i16 %tmp, ptr %ptr
77  ret void
78}
79
80define void @st1lane_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) {
81; CHECK-LABEL: st1lane_ro_8h
82; CHECK: add x[[XREG:[0-9]+]], x0, x1
83; CHECK: st1.h { v0 }[1], [x[[XREG]]]
84  %ptr = getelementptr i16, ptr %D, i64 %offset
85  %tmp = extractelement <8 x i16> %A, i32 1
86  store i16 %tmp, ptr %ptr
87  ret void
88}
89
90define void @st1lane0_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) {
91; CHECK-LABEL: st1lane0_ro_8h
92; CHECK: str h0, [x0, x1, lsl #1]
93  %ptr = getelementptr i16, ptr %D, i64 %offset
94  %tmp = extractelement <8 x i16> %A, i32 0
95  store i16 %tmp, ptr %ptr
96  ret void
97}
98
99define void @st1lane_4s(<4 x i32> %A, ptr %D) {
100; CHECK-LABEL: st1lane_4s
101; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
102  %ptr = getelementptr i32, ptr %D, i64 1
103  %tmp = extractelement <4 x i32> %A, i32 1
104  store i32 %tmp, ptr %ptr
105  ret void
106}
107
108define void @st1lane0_4s(<4 x i32> %A, ptr %D) {
109; CHECK-LABEL: st1lane0_4s
110; CHECK: str s0, [x0, #4]
111  %ptr = getelementptr i32, ptr %D, i64 1
112  %tmp = extractelement <4 x i32> %A, i32 0
113  store i32 %tmp, ptr %ptr
114  ret void
115}
116
117define void @st1lane0u_4s(<4 x i32> %A, ptr %D) {
118; CHECK-LABEL: st1lane0u_4s
119; CHECK: stur s0, [x0, #-4]
120  %ptr = getelementptr i32, ptr %D, i64 -1
121  %tmp = extractelement <4 x i32> %A, i32 0
122  store i32 %tmp, ptr %ptr
123  ret void
124}
125
126define void @st1lane_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) {
127; CHECK-LABEL: st1lane_ro_4s
128; CHECK: add x[[XREG:[0-9]+]], x0, x1
129; CHECK: st1.s { v0 }[1], [x[[XREG]]]
130  %ptr = getelementptr i32, ptr %D, i64 %offset
131  %tmp = extractelement <4 x i32> %A, i32 1
132  store i32 %tmp, ptr %ptr
133  ret void
134}
135
136define void @st1lane0_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) {
137; CHECK-LABEL: st1lane0_ro_4s
138; CHECK: str s0, [x0, x1, lsl #2]
139  %ptr = getelementptr i32, ptr %D, i64 %offset
140  %tmp = extractelement <4 x i32> %A, i32 0
141  store i32 %tmp, ptr %ptr
142  ret void
143}
144
145define void @st1lane_4s_float(<4 x float> %A, ptr %D) {
146; CHECK-LABEL: st1lane_4s_float
147; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
148  %ptr = getelementptr float, ptr %D, i64 1
149  %tmp = extractelement <4 x float> %A, i32 1
150  store float %tmp, ptr %ptr
151  ret void
152}
153
154define void @st1lane0_4s_float(<4 x float> %A, ptr %D) {
155; CHECK-LABEL: st1lane0_4s_float
156; CHECK: str s0, [x0, #4]
157  %ptr = getelementptr float, ptr %D, i64 1
158  %tmp = extractelement <4 x float> %A, i32 0
159  store float %tmp, ptr %ptr
160  ret void
161}
162
163define void @st1lane0u_4s_float(<4 x float> %A, ptr %D) {
164; CHECK-LABEL: st1lane0u_4s_float
165; CHECK: stur s0, [x0, #-4]
166  %ptr = getelementptr float, ptr %D, i64 -1
167  %tmp = extractelement <4 x float> %A, i32 0
168  store float %tmp, ptr %ptr
169  ret void
170}
171
172define void @st1lane_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) {
173; CHECK-LABEL: st1lane_ro_4s_float
174; CHECK: add x[[XREG:[0-9]+]], x0, x1
175; CHECK: st1.s { v0 }[1], [x[[XREG]]]
176  %ptr = getelementptr float, ptr %D, i64 %offset
177  %tmp = extractelement <4 x float> %A, i32 1
178  store float %tmp, ptr %ptr
179  ret void
180}
181
182define void @st1lane0_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) {
183; CHECK-LABEL: st1lane0_ro_4s_float
184; CHECK: str s0, [x0, x1, lsl #2]
185  %ptr = getelementptr float, ptr %D, i64 %offset
186  %tmp = extractelement <4 x float> %A, i32 0
187  store float %tmp, ptr %ptr
188  ret void
189}
190
191define void @st1lane_2d(<2 x i64> %A, ptr %D) {
192; CHECK-LABEL: st1lane_2d
193; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
194  %ptr = getelementptr i64, ptr %D, i64 1
195  %tmp = extractelement <2 x i64> %A, i32 1
196  store i64 %tmp, ptr %ptr
197  ret void
198}
199
200define void @st1lane0_2d(<2 x i64> %A, ptr %D) {
201; CHECK-LABEL: st1lane0_2d
202; CHECK: str d0, [x0, #8]
203  %ptr = getelementptr i64, ptr %D, i64 1
204  %tmp = extractelement <2 x i64> %A, i32 0
205  store i64 %tmp, ptr %ptr
206  ret void
207}
208
209define void @st1lane0u_2d(<2 x i64> %A, ptr %D) {
210; CHECK-LABEL: st1lane0u_2d
211; CHECK: stur d0, [x0, #-8]
212  %ptr = getelementptr i64, ptr %D, i64 -1
213  %tmp = extractelement <2 x i64> %A, i32 0
214  store i64 %tmp, ptr %ptr
215  ret void
216}
217
218define void @st1lane_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) {
219; CHECK-LABEL: st1lane_ro_2d
220; CHECK: add x[[XREG:[0-9]+]], x0, x1
221; CHECK: st1.d { v0 }[1], [x[[XREG]]]
222  %ptr = getelementptr i64, ptr %D, i64 %offset
223  %tmp = extractelement <2 x i64> %A, i32 1
224  store i64 %tmp, ptr %ptr
225  ret void
226}
227
228define void @st1lane0_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) {
229; CHECK-LABEL: st1lane0_ro_2d
230; CHECK: str d0, [x0, x1, lsl #3]
231  %ptr = getelementptr i64, ptr %D, i64 %offset
232  %tmp = extractelement <2 x i64> %A, i32 0
233  store i64 %tmp, ptr %ptr
234  ret void
235}
236
237define void @st1lane_2d_double(<2 x double> %A, ptr %D) {
238; CHECK-LABEL: st1lane_2d_double
239; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
240  %ptr = getelementptr double, ptr %D, i64 1
241  %tmp = extractelement <2 x double> %A, i32 1
242  store double %tmp, ptr %ptr
243  ret void
244}
245
246define void @st1lane0_2d_double(<2 x double> %A, ptr %D) {
247; CHECK-LABEL: st1lane0_2d_double
248; CHECK: str d0, [x0, #8]
249  %ptr = getelementptr double, ptr %D, i64 1
250  %tmp = extractelement <2 x double> %A, i32 0
251  store double %tmp, ptr %ptr
252  ret void
253}
254
255define void @st1lane0u_2d_double(<2 x double> %A, ptr %D) {
256; CHECK-LABEL: st1lane0u_2d_double
257; CHECK: stur d0, [x0, #-8]
258  %ptr = getelementptr double, ptr %D, i64 -1
259  %tmp = extractelement <2 x double> %A, i32 0
260  store double %tmp, ptr %ptr
261  ret void
262}
263
264define void @st1lane_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) {
265; CHECK-LABEL: st1lane_ro_2d_double
266; CHECK: add x[[XREG:[0-9]+]], x0, x1
267; CHECK: st1.d { v0 }[1], [x[[XREG]]]
268  %ptr = getelementptr double, ptr %D, i64 %offset
269  %tmp = extractelement <2 x double> %A, i32 1
270  store double %tmp, ptr %ptr
271  ret void
272}
273
274define void @st1lane0_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) {
275; CHECK-LABEL: st1lane0_ro_2d_double
276; CHECK: str d0, [x0, x1, lsl #3]
277  %ptr = getelementptr double, ptr %D, i64 %offset
278  %tmp = extractelement <2 x double> %A, i32 0
279  store double %tmp, ptr %ptr
280  ret void
281}
282
283define void @st1lane_8b(<8 x i8> %A, ptr %D) {
284; CHECK-LABEL: st1lane_8b
285; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
286  %ptr = getelementptr i8, ptr %D, i64 1
287  %tmp = extractelement <8 x i8> %A, i32 1
288  store i8 %tmp, ptr %ptr
289  ret void
290}
291
292define void @st1lane_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) {
293; CHECK-LABEL: st1lane_ro_8b
294; CHECK: add x[[XREG:[0-9]+]], x0, x1
295; CHECK: st1.b { v0 }[1], [x[[XREG]]]
296  %ptr = getelementptr i8, ptr %D, i64 %offset
297  %tmp = extractelement <8 x i8> %A, i32 1
298  store i8 %tmp, ptr %ptr
299  ret void
300}
301
302define void @st1lane0_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) {
303; CHECK-LABEL: st1lane0_ro_8b
304; CHECK: add x[[XREG:[0-9]+]], x0, x1
305; CHECK: st1.b { v0 }[0], [x[[XREG]]]
306  %ptr = getelementptr i8, ptr %D, i64 %offset
307  %tmp = extractelement <8 x i8> %A, i32 0
308  store i8 %tmp, ptr %ptr
309  ret void
310}
311
312define void @st1lane_4h(<4 x i16> %A, ptr %D) {
313; CHECK-LABEL: st1lane_4h
314; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
315  %ptr = getelementptr i16, ptr %D, i64 1
316  %tmp = extractelement <4 x i16> %A, i32 1
317  store i16 %tmp, ptr %ptr
318  ret void
319}
320
321define void @st1lane0_4h(<4 x i16> %A, ptr %D) {
322; CHECK-LABEL: st1lane0_4h
323; CHECK: str h0, [x0, #2]
324  %ptr = getelementptr i16, ptr %D, i64 1
325  %tmp = extractelement <4 x i16> %A, i32 0
326  store i16 %tmp, ptr %ptr
327  ret void
328}
329
330define void @st1lane0u_4h(<4 x i16> %A, ptr %D) {
331; CHECK-LABEL: st1lane0u_4h
332; CHECK: stur h0, [x0, #-2]
333  %ptr = getelementptr i16, ptr %D, i64 -1
334  %tmp = extractelement <4 x i16> %A, i32 0
335  store i16 %tmp, ptr %ptr
336  ret void
337}
338
339define void @st1lane_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) {
340; CHECK-LABEL: st1lane_ro_4h
341; CHECK: add x[[XREG:[0-9]+]], x0, x1
342; CHECK: st1.h { v0 }[1], [x[[XREG]]]
343  %ptr = getelementptr i16, ptr %D, i64 %offset
344  %tmp = extractelement <4 x i16> %A, i32 1
345  store i16 %tmp, ptr %ptr
346  ret void
347}
348
349define void @st1lane0_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) {
350; CHECK-LABEL: st1lane0_ro_4h
351; CHECK: str h0, [x0, x1, lsl #1]
352  %ptr = getelementptr i16, ptr %D, i64 %offset
353  %tmp = extractelement <4 x i16> %A, i32 0
354  store i16 %tmp, ptr %ptr
355  ret void
356}
357
358define void @st1lane_2s(<2 x i32> %A, ptr %D) {
359; CHECK-LABEL: st1lane_2s
360; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
361  %ptr = getelementptr i32, ptr %D, i64 1
362  %tmp = extractelement <2 x i32> %A, i32 1
363  store i32 %tmp, ptr %ptr
364  ret void
365}
366
367define void @st1lane0_2s(<2 x i32> %A, ptr %D) {
368; CHECK-LABEL: st1lane0_2s
369; CHECK: str s0, [x0, #4]
370  %ptr = getelementptr i32, ptr %D, i64 1
371  %tmp = extractelement <2 x i32> %A, i32 0
372  store i32 %tmp, ptr %ptr
373  ret void
374}
375
376define void @st1lane0u_2s(<2 x i32> %A, ptr %D) {
377; CHECK-LABEL: st1lane0u_2s
378; CHECK: stur s0, [x0, #-4]
379  %ptr = getelementptr i32, ptr %D, i64 -1
380  %tmp = extractelement <2 x i32> %A, i32 0
381  store i32 %tmp, ptr %ptr
382  ret void
383}
384
385define void @st1lane_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) {
386; CHECK-LABEL: st1lane_ro_2s
387; CHECK: add x[[XREG:[0-9]+]], x0, x1
388; CHECK: st1.s { v0 }[1], [x[[XREG]]]
389  %ptr = getelementptr i32, ptr %D, i64 %offset
390  %tmp = extractelement <2 x i32> %A, i32 1
391  store i32 %tmp, ptr %ptr
392  ret void
393}
394
395define void @st1lane0_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) {
396; CHECK-LABEL: st1lane0_ro_2s
397; CHECK: str s0, [x0, x1, lsl #2]
398  %ptr = getelementptr i32, ptr %D, i64 %offset
399  %tmp = extractelement <2 x i32> %A, i32 0
400  store i32 %tmp, ptr %ptr
401  ret void
402}
403
404define void @st1lane_2s_float(<2 x float> %A, ptr %D) {
405; CHECK-LABEL: st1lane_2s_float
406; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
407  %ptr = getelementptr float, ptr %D, i64 1
408  %tmp = extractelement <2 x float> %A, i32 1
409  store float %tmp, ptr %ptr
410  ret void
411}
412
413define void @st1lane0_2s_float(<2 x float> %A, ptr %D) {
414; CHECK-LABEL: st1lane0_2s_float
415; CHECK: str s0, [x0, #4]
416  %ptr = getelementptr float, ptr %D, i64 1
417  %tmp = extractelement <2 x float> %A, i32 0
418  store float %tmp, ptr %ptr
419  ret void
420}
421
422define void @st1lane0u_2s_float(<2 x float> %A, ptr %D) {
423; CHECK-LABEL: st1lane0u_2s_float
424; CHECK: stur s0, [x0, #-4]
425  %ptr = getelementptr float, ptr %D, i64 -1
426  %tmp = extractelement <2 x float> %A, i32 0
427  store float %tmp, ptr %ptr
428  ret void
429}
430
431define void @st1lane_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) {
432; CHECK-LABEL: st1lane_ro_2s_float
433; CHECK: add x[[XREG:[0-9]+]], x0, x1
434; CHECK: st1.s { v0 }[1], [x[[XREG]]]
435  %ptr = getelementptr float, ptr %D, i64 %offset
436  %tmp = extractelement <2 x float> %A, i32 1
437  store float %tmp, ptr %ptr
438  ret void
439}
440
441define void @st1lane0_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) {
442; CHECK-LABEL: st1lane0_ro_2s_float
443; CHECK: str s0, [x0, x1, lsl #2]
444  %ptr = getelementptr float, ptr %D, i64 %offset
445  %tmp = extractelement <2 x float> %A, i32 0
446  store float %tmp, ptr %ptr
447  ret void
448}
449
450define void @st1lane0_1d(<1 x i64> %A, ptr %D) {
451; CHECK-LABEL: st1lane0_1d
452; CHECK: str d0, [x0, #8]
453  %ptr = getelementptr i64, ptr %D, i64 1
454  %tmp = extractelement <1 x i64> %A, i32 0
455  store i64 %tmp, ptr %ptr
456  ret void
457}
458
459define void @st1lane0u_1d(<1 x i64> %A, ptr %D) {
460; CHECK-LABEL: st1lane0u_1d
461; CHECK: stur d0, [x0, #-8]
462  %ptr = getelementptr i64, ptr %D, i64 -1
463  %tmp = extractelement <1 x i64> %A, i32 0
464  store i64 %tmp, ptr %ptr
465  ret void
466}
467
468define void @st1lane0_ro_1d(<1 x i64> %A, ptr %D, i64 %offset) {
469; CHECK-LABEL: st1lane0_ro_1d
470; CHECK: str d0, [x0, x1, lsl #3]
471  %ptr = getelementptr i64, ptr %D, i64 %offset
472  %tmp = extractelement <1 x i64> %A, i32 0
473  store i64 %tmp, ptr %ptr
474  ret void
475}
476
477define void @st1lane0_1d_double(<1 x double> %A, ptr %D) {
478; CHECK-LABEL: st1lane0_1d_double
479; CHECK: str d0, [x0, #8]
480  %ptr = getelementptr double, ptr %D, i64 1
481  %tmp = extractelement <1 x double> %A, i32 0
482  store double %tmp, ptr %ptr
483  ret void
484}
485
486define void @st1lane0u_1d_double(<1 x double> %A, ptr %D) {
487; CHECK-LABEL: st1lane0u_1d_double
488; CHECK: stur d0, [x0, #-8]
489  %ptr = getelementptr double, ptr %D, i64 -1
490  %tmp = extractelement <1 x double> %A, i32 0
491  store double %tmp, ptr %ptr
492  ret void
493}
494
495define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) {
496; CHECK-LABEL: st1lane0_ro_1d_double
497; CHECK: str d0, [x0, x1, lsl #3]
498  %ptr = getelementptr double, ptr %D, i64 %offset
499  %tmp = extractelement <1 x double> %A, i32 0
500  store double %tmp, ptr %ptr
501  ret void
502}
503
504define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) {
505; CHECK-LABEL: st2lane_16b
506; CHECK: st2.b
507  call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, i64 1, ptr %D)
508  ret void
509}
510
511define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) {
512; CHECK-LABEL: st2lane_8h
513; CHECK: st2.h
514  call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, i64 1, ptr %D)
515  ret void
516}
517
518define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) {
519; CHECK-LABEL: st2lane_4s
520; CHECK: st2.s
521  call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, i64 1, ptr %D)
522  ret void
523}
524
525define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) {
526; CHECK-LABEL: st2lane_2d
527; CHECK: st2.d
528  call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, i64 1, ptr %D)
529  ret void
530}
531
532declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
533declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
534declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
535declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
536
537define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) {
538; CHECK-LABEL: st3lane_16b
539; CHECK: st3.b
540  call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, ptr %D)
541  ret void
542}
543
544define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) {
545; CHECK-LABEL: st3lane_8h
546; CHECK: st3.h
547  call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, ptr %D)
548  ret void
549}
550
551define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) {
552; CHECK-LABEL: st3lane_4s
553; CHECK: st3.s
554  call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, ptr %D)
555  ret void
556}
557
558define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %D) {
559; CHECK-LABEL: st3lane_2d
560; CHECK: st3.d
561  call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, ptr %D)
562  ret void
563}
564
565declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
566declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
567declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
568declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
569
570define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %E) {
571; CHECK-LABEL: st4lane_16b
572; CHECK: st4.b
573  call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, ptr %E)
574  ret void
575}
576
577define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %E) {
578; CHECK-LABEL: st4lane_8h
579; CHECK: st4.h
580  call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, ptr %E)
581  ret void
582}
583
584define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %E) {
585; CHECK-LABEL: st4lane_4s
586; CHECK: st4.s
587  call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, ptr %E)
588  ret void
589}
590
591define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %E) {
592; CHECK-LABEL: st4lane_2d
593; CHECK: st4.d
594  call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, ptr %E)
595  ret void
596}
597
598declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
599declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
600declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
601declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
602
603
604define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind {
605; CHECK-LABEL: st2_8b
606; CHECK: st2.8b
607; EXYNOS-LABEL: st2_8b
608; EXYNOS: zip1.8b
609; EXYNOS: zip2.8b
610; EXYNOS: stp
611	call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %P)
612	ret void
613}
614
615define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind {
616; CHECK-LABEL: st3_8b
617; CHECK: st3.8b
618	call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P)
619	ret void
620}
621
622define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) nounwind {
623; CHECK-LABEL: st4_8b
624; CHECK: st4.8b
625; EXYNOS-LABEL: st4_8b
626; EXYNOS: zip1.8b
627; EXYNOS: zip2.8b
628; EXYNOS: zip1.8b
629; EXYNOS: zip2.8b
630; EXYNOS: zip1.8b
631; EXYNOS: zip2.8b
632; EXYNOS: stp
633; EXYNOS: zip1.8b
634; EXYNOS: zip2.8b
635; EXYNOS: stp
636	call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P)
637	ret void
638}
639
640declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly
641declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
642declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
643
644define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind {
645; CHECK-LABEL: st2_16b
646; CHECK: st2.16b
647; EXYNOS-LABEL: st2_16b
648; EXYNOS: zip1.16b
649; EXYNOS: zip2.16b
650; EXYNOS: stp
651	call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %P)
652	ret void
653}
654
655define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind {
656; CHECK-LABEL: st3_16b
657; CHECK: st3.16b
658	call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P)
659	ret void
660}
661
662define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P) nounwind {
663; CHECK-LABEL: st4_16b
664; CHECK: st4.16b
665; EXYNOS-LABEL: st4_16b
666; EXYNOS: zip1.16b
667; EXYNOS: zip2.16b
668; EXYNOS: zip1.16b
669; EXYNOS: zip2.16b
670; EXYNOS: zip1.16b
671; EXYNOS: zip2.16b
672; EXYNOS: stp
673; EXYNOS: zip1.16b
674; EXYNOS: zip2.16b
675; EXYNOS: stp
676	call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P)
677	ret void
678}
679
680declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly
681declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
682declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
683
684define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind {
685; CHECK-LABEL: st2_4h
686; CHECK: st2.4h
687; EXYNOS-LABEL: st2_4h
688; EXYNOS: zip1.4h
689; EXYNOS: zip2.4h
690; EXYNOS: stp
691	call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %P)
692	ret void
693}
694
695define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind {
696; CHECK-LABEL: st3_4h
697; CHECK: st3.4h
698	call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P)
699	ret void
700}
701
702define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P) nounwind {
703; CHECK-LABEL: st4_4h
704; CHECK: st4.4h
705; EXYNOS-LABEL: st4_4h
706; EXYNOS: zip1.4h
707; EXYNOS: zip2.4h
708; EXYNOS: zip1.4h
709; EXYNOS: zip2.4h
710; EXYNOS: zip1.4h
711; EXYNOS: zip2.4h
712; EXYNOS: stp
713; EXYNOS: zip1.4h
714; EXYNOS: zip2.4h
715; EXYNOS: stp
716	call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P)
717	ret void
718}
719
720declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly
721declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
722declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
723
724define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind {
725; CHECK-LABEL: st2_8h
726; CHECK: st2.8h
727; EXYNOS-LABEL: st2_8h
728; EXYNOS: zip1.8h
729; EXYNOS: zip2.8h
730; EXYNOS: stp
731	call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %P)
732	ret void
733}
734
735define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind {
736; CHECK-LABEL: st3_8h
737; CHECK: st3.8h
738	call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P)
739	ret void
740}
741
742define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P) nounwind {
743; CHECK-LABEL: st4_8h
744; CHECK: st4.8h
745; EXYNOS-LABEL: st4_8h
746; EXYNOS: zip1.8h
747; EXYNOS: zip2.8h
748; EXYNOS: zip1.8h
749; EXYNOS: zip2.8h
750; EXYNOS: zip1.8h
751; EXYNOS: zip2.8h
752; EXYNOS: stp
753; EXYNOS: zip1.8h
754; EXYNOS: zip2.8h
755; EXYNOS: stp
756	call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P)
757	ret void
758}
759
760declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly
761declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
762declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
763
764define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind {
765; CHECK-LABEL: st2_2s
766; CHECK: st2.2s
767; EXYNOS-LABEL: st2_2s
768; EXYNOS: zip1.2s
769; EXYNOS: zip2.2s
770; EXYNOS: stp
771	call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %P)
772	ret void
773}
774
775define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind {
776; CHECK-LABEL: st3_2s
777; CHECK: st3.2s
778	call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P)
779	ret void
780}
781
782define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P) nounwind {
783; CHECK-LABEL: st4_2s
784; CHECK: st4.2s
785; EXYNOS-LABEL: st4_2s
786; EXYNOS: zip1.2s
787; EXYNOS: zip2.2s
788; EXYNOS: zip1.2s
789; EXYNOS: zip2.2s
790; EXYNOS: zip1.2s
791; EXYNOS: zip2.2s
792; EXYNOS: stp
793; EXYNOS: zip1.2s
794; EXYNOS: zip2.2s
795; EXYNOS: stp
796	call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P)
797	ret void
798}
799
800declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly
801declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
802declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
803
804define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind {
805; CHECK-LABEL: st2_4s
806; CHECK: st2.4s
807; EXYNOS-LABEL: st2_4s
808; EXYNOS: zip1.4s
809; EXYNOS: zip2.4s
810; EXYNOS: stp
811	call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %P)
812	ret void
813}
814
815define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind {
816; CHECK-LABEL: st3_4s
817; CHECK: st3.4s
818	call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P)
819	ret void
820}
821
822define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P) nounwind {
823; CHECK-LABEL: st4_4s
824; CHECK: st4.4s
825; EXYNOS-LABEL: st4_4s
826; EXYNOS: zip1.4s
827; EXYNOS: zip2.4s
828; EXYNOS: zip1.4s
829; EXYNOS: zip2.4s
830; EXYNOS: zip1.4s
831; EXYNOS: zip2.4s
832; EXYNOS: stp
833; EXYNOS: zip1.4s
834; EXYNOS: zip2.4s
835; EXYNOS: stp
836	call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P)
837	ret void
838}
839
840declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly
841declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
842declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
843
844; If there's only one element, st2/3/4 don't make much sense, stick to st1.
845define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind {
846; CHECK-LABEL: st2_1d
847; CHECK: st1.1d
848	call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %P)
849	ret void
850}
851
852define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind {
853; CHECK-LABEL: st3_1d
854; CHECK: st1.1d
855	call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P)
856	ret void
857}
858
859define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P) nounwind {
860; CHECK-LABEL: st4_1d
861; CHECK: st1.1d
862	call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P)
863	ret void
864}
865
866declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly
867declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
868declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
869
870define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind {
871; CHECK-LABEL: st2_2d
872; CHECK: st2.2d
873; EXYNOS-LABEL: st2_2d
874; EXYNOS: zip1.2d
875; EXYNOS: zip2.2d
876; EXYNOS: stp
877	call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %P)
878	ret void
879}
880
881define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind {
882; CHECK-LABEL: st3_2d
883; CHECK: st3.2d
884	call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P)
885	ret void
886}
887
888define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P) nounwind {
889; CHECK-LABEL: st4_2d
890; CHECK: st4.2d
891; EXYNOS-LABEL: st4_2d
892; EXYNOS: zip1.2d
893; EXYNOS: zip2.2d
894; EXYNOS: zip1.2d
895; EXYNOS: zip2.2d
896; EXYNOS: zip1.2d
897; EXYNOS: zip2.2d
898; EXYNOS: stp
899; EXYNOS: zip1.2d
900; EXYNOS: zip2.2d
901; EXYNOS: stp
902	call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P)
903	ret void
904}
905
906declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly
907declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
908declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
909
910declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly
911declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly
912declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly
913declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) nounwind readonly
914declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly
915declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) nounwind readonly
916
917define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, ptr %addr) {
918; CHECK-LABEL: st1_x2_v8i8:
919; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
920  call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %addr)
921  ret void
922}
923
924define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, ptr %addr) {
925; CHECK-LABEL: st1_x2_v4i16:
926; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
927  call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %addr)
928  ret void
929}
930
931define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, ptr %addr) {
932; CHECK-LABEL: st1_x2_v2i32:
933; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
934  call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %addr)
935  ret void
936}
937
938define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, ptr %addr) {
939; CHECK-LABEL: st1_x2_v2f32:
940; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
941  call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %A, <2 x float> %B, ptr %addr)
942  ret void
943}
944
945define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, ptr %addr) {
946; CHECK-LABEL: st1_x2_v1i64:
947; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
948  call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %addr)
949  ret void
950}
951
952define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, ptr %addr) {
953; CHECK-LABEL: st1_x2_v1f64:
954; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
955  call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %A, <1 x double> %B, ptr %addr)
956  ret void
957}
958
959declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly
960declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly
961declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly
962declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) nounwind readonly
963declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly
964declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) nounwind readonly
965
966define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, ptr %addr) {
967; CHECK-LABEL: st1_x2_v16i8:
968; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
969  call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %addr)
970  ret void
971}
972
973define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, ptr %addr) {
974; CHECK-LABEL: st1_x2_v8i16:
975; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
976  call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %addr)
977  ret void
978}
979
980define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, ptr %addr) {
981; CHECK-LABEL: st1_x2_v4i32:
982; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
983  call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %addr)
984  ret void
985}
986
987define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, ptr %addr) {
988; CHECK-LABEL: st1_x2_v4f32:
989; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
990  call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %A, <4 x float> %B, ptr %addr)
991  ret void
992}
993
994define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, ptr %addr) {
995; CHECK-LABEL: st1_x2_v2i64:
996; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
997  call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %addr)
998  ret void
999}
1000
1001define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, ptr %addr) {
1002; CHECK-LABEL: st1_x2_v2f64:
1003; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1004  call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %A, <2 x double> %B, ptr %addr)
1005  ret void
1006}
1007
1008declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
1009declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
1010declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
1011declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly
1012declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
1013declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly
1014
1015define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr) {
1016; CHECK-LABEL: st1_x3_v8i8:
1017; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1018  call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr)
1019  ret void
1020}
1021
1022define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr) {
1023; CHECK-LABEL: st1_x3_v4i16:
1024; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1025  call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr)
1026  ret void
1027}
1028
1029define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr) {
1030; CHECK-LABEL: st1_x3_v2i32:
1031; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1032  call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr)
1033  ret void
1034}
1035
1036define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr) {
1037; CHECK-LABEL: st1_x3_v2f32:
1038; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1039  call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr)
1040  ret void
1041}
1042
1043define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr) {
1044; CHECK-LABEL: st1_x3_v1i64:
1045; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1046  call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr)
1047  ret void
1048}
1049
1050define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr) {
1051; CHECK-LABEL: st1_x3_v1f64:
1052; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1053  call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr)
1054  ret void
1055}
1056
1057declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
1058declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
1059declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
1060declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly
1061declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
1062declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly
1063
1064define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr) {
1065; CHECK-LABEL: st1_x3_v16i8:
1066; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1067  call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr)
1068  ret void
1069}
1070
1071define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr) {
1072; CHECK-LABEL: st1_x3_v8i16:
1073; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1074  call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr)
1075  ret void
1076}
1077
1078define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr) {
1079; CHECK-LABEL: st1_x3_v4i32:
1080; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1081  call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr)
1082  ret void
1083}
1084
1085define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr) {
1086; CHECK-LABEL: st1_x3_v4f32:
1087; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1088  call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr)
1089  ret void
1090}
1091
1092define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr) {
1093; CHECK-LABEL: st1_x3_v2i64:
1094; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1095  call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr)
1096  ret void
1097}
1098
1099define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr) {
1100; CHECK-LABEL: st1_x3_v2f64:
1101; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1102  call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr)
1103  ret void
1104}
1105
1106
1107declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
1108declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
1109declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
1110declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly
1111declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
1112declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly
1113
1114define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr) {
1115; CHECK-LABEL: st1_x4_v8i8:
1116; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1117  call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr)
1118  ret void
1119}
1120
1121define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr) {
1122; CHECK-LABEL: st1_x4_v4i16:
1123; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1124  call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr)
1125  ret void
1126}
1127
1128define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr) {
1129; CHECK-LABEL: st1_x4_v2i32:
1130; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1131  call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr)
1132  ret void
1133}
1134
1135define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr) {
1136; CHECK-LABEL: st1_x4_v2f32:
1137; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1138  call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr)
1139  ret void
1140}
1141
1142define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr) {
1143; CHECK-LABEL: st1_x4_v1i64:
1144; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1145  call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr)
1146  ret void
1147}
1148
1149define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr) {
1150; CHECK-LABEL: st1_x4_v1f64:
1151; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1152  call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr)
1153  ret void
1154}
1155
1156declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
1157declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
1158declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
1159declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly
1160declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
1161declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly
1162
1163define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr) {
1164; CHECK-LABEL: st1_x4_v16i8:
1165; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1166  call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr)
1167  ret void
1168}
1169
1170define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr) {
1171; CHECK-LABEL: st1_x4_v8i16:
1172; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1173  call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr)
1174  ret void
1175}
1176
1177define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr) {
1178; CHECK-LABEL: st1_x4_v4i32:
1179; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1180  call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr)
1181  ret void
1182}
1183
1184define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr) {
1185; CHECK-LABEL: st1_x4_v4f32:
1186; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1187  call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr)
1188  ret void
1189}
1190
1191define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr) {
1192; CHECK-LABEL: st1_x4_v2i64:
1193; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1194  call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr)
1195  ret void
1196}
1197
1198define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr) {
1199; CHECK-LABEL: st1_x4_v2f64:
1200; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1201  call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr)
1202  ret void
1203}
1204