xref: /llvm-project/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll (revision cd6e462d012f289cc4ec12927ca8198f9ed1469e)
1; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NEON
2; RUN: opt < %s -mattr=-neon -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
3; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s -check-prefix=NEON
4; RUN: opt < %s -mattr=-neon -passes=interleaved-access -S | FileCheck %s -check-prefix=NO_NEON
5
6target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
7target triple = "aarch64--linux-gnu"
8
9define void @load_factor2(ptr %ptr) {
10; NEON-LABEL:    @load_factor2(
11; NEON-NEXT:       [[LDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %ptr)
12; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 1
13; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 0
14; NEON-NEXT:       ret void
15; NO_NEON-LABEL: @load_factor2(
16; NO_NEON-NOT:     @llvm.aarch64.neon
17; NO_NEON:         ret void
18;
19  %interleaved.vec = load <16 x i8>, ptr %ptr, align 4
20  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
21  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
22  ret void
23}
24
25define void @load_factor3(ptr %ptr) {
26; NEON-LABEL:    @load_factor3(
27; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr)
28; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
29; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
30; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
31; NEON-NEXT:       ret void
32; NO_NEON-LABEL: @load_factor3(
33; NO_NEON-NOT:     @llvm.aarch64.neon
34; NO_NEON:         ret void
35;
36  %interleaved.vec = load <12 x i32>, ptr %ptr, align 4
37  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
38  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
39  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
40  ret void
41}
42
43define void @load_factor4(ptr %ptr) {
44; NEON-LABEL:    @load_factor4(
45; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr)
46; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
47; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
48; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
49; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
50; NEON-NEXT:       ret void
51; NO_NEON-LABEL: @load_factor4(
52; NO_NEON-NOT:     @llvm.aarch64.neon
53; NO_NEON:         ret void
54;
55  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
56  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
57  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
58  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
59  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
60  ret void
61}
62
63define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) {
64; NEON-LABEL:    @store_factor2(
65; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
66; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
67; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr %ptr)
68; NEON-NEXT:       ret void
69; NO_NEON-LABEL: @store_factor2(
70; NO_NEON-NOT:     @llvm.aarch64.neon
71; NO_NEON:         ret void
72;
73  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
74  store <16 x i8> %interleaved.vec, ptr %ptr, align 4
75  ret void
76}
77
78define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
79; NEON-LABEL:    @store_factor3(
80; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
82; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
83; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
84; NEON-NEXT:       ret void
85; NO_NEON-LABEL: @store_factor3(
86; NO_NEON-NOT:     @llvm.aarch64.neon
87; NO_NEON:         ret void
88;
89  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
90  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
91  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
92  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
93  ret void
94}
95
96define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
97; NEON-LABEL:    @store_factor4(
98; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
99; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
100; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
101; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
102; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr)
103; NEON-NEXT:       ret void
104; NO_NEON-LABEL: @store_factor4(
105; NO_NEON-NOT:     @llvm.aarch64.neon
106; NO_NEON:         ret void
107;
108  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
109  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
110  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
111  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
112  ret void
113}
114
115define void @load_ptrvec_factor2(ptr %ptr) {
116; NEON-LABEL:    @load_ptrvec_factor2(
117; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %ptr)
118; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1
119; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr>
120; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0
121; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr>
122; NEON-NEXT:       ret void
123; NO_NEON-LABEL: @load_ptrvec_factor2(
124; NO_NEON-NOT:     @llvm.aarch64.neon
125; NO_NEON:         ret void
126;
127  %interleaved.vec = load <4 x ptr>, ptr %ptr, align 4
128  %v0 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> poison, <2 x i32> <i32 0, i32 2>
129  %v1 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> poison, <2 x i32> <i32 1, i32 3>
130  ret void
131}
132
133define void @load_ptrvec_factor3(ptr %ptr) {
134; NEON-LABEL:    @load_ptrvec_factor3(
135; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %ptr)
136; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
137; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr>
138; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
139; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr>
140; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
141; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
142; NEON-NEXT:       ret void
143; NO_NEON-LABEL: @load_ptrvec_factor3(
144; NO_NEON-NOT:     @llvm.aarch64.neon
145; NO_NEON:         ret void
146;
147  %interleaved.vec = load <6 x ptr>, ptr %ptr, align 4
148  %v0 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 0, i32 3>
149  %v1 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 1, i32 4>
150  %v2 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 2, i32 5>
151  ret void
152}
153
154define void @load_ptrvec_factor4(ptr %ptr) {
155; NEON-LABEL:    @load_ptrvec_factor4(
156; NEON-NEXT:       [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %ptr)
157; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 3
158; NEON-NEXT:       [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr>
159; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2
160; NEON-NEXT:       [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr>
161; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
162; NEON-NEXT:       [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
163; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
164; NEON-NEXT:       [[TMP9:%.*]] = inttoptr <2 x i64> [[TMP8]] to <2 x ptr>
165; NEON-NEXT:       ret void
166; NO_NEON-LABEL: @load_ptrvec_factor4(
167; NO_NEON-NOT:     @llvm.aarch64.neon
168; NO_NEON:         ret void
169;
170  %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4
171  %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 0, i32 4>
172  %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 1, i32 5>
173  %v2 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 2, i32 6>
174  %v3 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 3, i32 7>
175  ret void
176}
177
178define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) {
179; NEON-LABEL:    @store_ptrvec_factor2(
180; NEON-NEXT:       [[TMP1:%.*]] = ptrtoint <2 x ptr> %v0 to <2 x i64>
181; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <2 x ptr> %v1 to <2 x i64>
182; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
183; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
184; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr %ptr)
185; NEON-NEXT:       ret void
186; NO_NEON-LABEL: @store_ptrvec_factor2(
187; NO_NEON-NOT:     @llvm.aarch64.neon
188; NO_NEON:         ret void
189;
190  %interleaved.vec = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
191  store <4 x ptr> %interleaved.vec, ptr %ptr, align 4
192  ret void
193}
194
195define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) {
196; NEON-LABEL:    @store_ptrvec_factor3(
197; NEON:            [[TMP1:%.*]] = ptrtoint <4 x ptr> %s0 to <4 x i64>
198; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x ptr> %s1 to <4 x i64>
199; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
200; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
201; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
202; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr %ptr)
203; NEON-NEXT:       ret void
204; NO_NEON-LABEL: @store_ptrvec_factor3(
205; NO_NEON-NOT:     @llvm.aarch64.neon
206; NO_NEON:         ret void
207;
208  %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
209  %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
210  %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
211  store <6 x ptr> %interleaved.vec, ptr %ptr, align 4
212  ret void
213}
214
215define void @store_ptrvec_factor4(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2, <2 x ptr> %v3) {
216; NEON-LABEL:    @store_ptrvec_factor4(
217; NEON:            [[TMP1:%.*]] = ptrtoint <4 x ptr> %s0 to <4 x i64>
218; NEON-NEXT:       [[TMP2:%.*]] = ptrtoint <4 x ptr> %s1 to <4 x i64>
219; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1>
220; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3>
221; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5>
222; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 6, i32 7>
223; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], ptr %ptr)
224; NEON-NEXT:       ret void
225; NO_NEON-LABEL: @store_ptrvec_factor4(
226; NO_NEON-NOT:     @llvm.aarch64.neon
227; NO_NEON:         ret void
228;
229  %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
230  %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
231  %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
232  store <8 x ptr> %interleaved.vec, ptr %ptr, align 4
233  ret void
234}
235
236define void @load_undef_mask_factor2(ptr %ptr) {
237; NEON-LABEL:    @load_undef_mask_factor2(
238; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %ptr)
239; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
240; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
241; NEON-NEXT:       ret void
242; NO_NEON-LABEL: @load_undef_mask_factor2(
243; NO_NEON-NOT:     @llvm.aarch64.neon
244; NO_NEON:         ret void
245;
246  %interleaved.vec = load <8 x i32>, ptr %ptr, align 4
247  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
248  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
249  ret void
250}
251
252define void @load_undef_mask_factor3(ptr %ptr) {
253; NEON-LABEL:    @load_undef_mask_factor3(
254; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr)
255; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
256; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
257; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
258; NEON-NEXT:       ret void
259; NO_NEON-LABEL: @load_undef_mask_factor3(
260; NO_NEON-NOT:     @llvm.aarch64.neon
261; NO_NEON:         ret void
262;
263  %interleaved.vec = load <12 x i32>, ptr %ptr, align 4
264  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
265  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
266  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
267  ret void
268}
269
270define void @load_undef_mask_factor4(ptr %ptr) {
271; NEON-LABEL:    @load_undef_mask_factor4(
272; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr)
273; NEON-NEXT:       [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
274; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
275; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
276; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
277; NEON-NEXT:       ret void
278; NO_NEON-LABEL: @load_undef_mask_factor4(
279; NO_NEON-NOT:     @llvm.aarch64.neon
280; NO_NEON:         ret void
281;
282  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
283  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
284  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
285  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
286  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
287  ret void
288}
289
290define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
291; NEON-LABEL:    @store_undef_mask_factor2(
292; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
293; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
294; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], ptr %ptr)
295; NEON-NEXT:       ret void
296; NO_NEON-LABEL: @store_undef_mask_factor2(
297; NO_NEON-NOT:     @llvm.aarch64.neon
298; NO_NEON:         ret void
299;
300  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
301  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
302  ret void
303}
304
305define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
306; NEON-LABEL:    @store_undef_mask_factor3(
307; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
308; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
309; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
310; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
311; NEON-NEXT:       ret void
312; NO_NEON-LABEL: @store_undef_mask_factor3(
313; NO_NEON-NOT:     @llvm.aarch64.neon
314; NO_NEON:         ret void
315;
316  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
317  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
318  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
319  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
320  ret void
321}
322
323define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
324; NEON-LABEL:    @store_undef_mask_factor4(
325; NEON:            [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
326; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
327; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
328; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
329; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr)
330; NEON-NEXT:       ret void
331; NO_NEON-LABEL: @store_undef_mask_factor4(
332; NO_NEON-NOT:     @llvm.aarch64.neon
333; NO_NEON:         ret void
334;
335  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
336  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
337  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
338  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
339  ret void
340}
341
342define void @load_illegal_factor2(ptr %ptr) nounwind {
343; NEON-LABEL:    @load_illegal_factor2(
344; NEON-NOT:        @llvm.aarch64.neon
345; NEON:            ret void
346; NO_NEON-LABEL: @load_illegal_factor2(
347; NO_NEON-NOT:     @llvm.aarch64.neon
348; NO_NEON:         ret void
349;
350  %interleaved.vec = load <3 x float>, ptr %ptr, align 16
351  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
352  ret void
353}
354
355define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind {
356; NEON-LABEL:    @store_illegal_factor2(
357; NEON-NOT:        @llvm.aarch64.neon
358; NEON:            ret void
359; NO_NEON-LABEL: @store_illegal_factor2(
360; NO_NEON-NOT:     @llvm.aarch64.neon
361; NO_NEON:         ret void
362;
363  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
364  store <3 x float> %interleaved.vec, ptr %ptr, align 16
365  ret void
366}
367
368define void @store_general_mask_factor4(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
369; NEON-LABEL:    @store_general_mask_factor4(
370; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
371; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
372; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
373; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
374; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr)
375; NEON-NEXT:       ret void
376; NO_NEON-LABEL: @store_general_mask_factor4(
377; NO_NEON-NOT:     @llvm.aarch64.neon
378; NO_NEON:         ret void
379;
380  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
381  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
382  ret void
383}
384
385define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
386; NEON-LABEL:    @store_general_mask_factor4_undefbeg(
387; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
388; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
389; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
390; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
391; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr)
392; NEON-NEXT:       ret void
393; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg(
394; NO_NEON-NOT:     @llvm.aarch64.neon
395; NO_NEON:         ret void
396;
397  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
398  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
399  ret void
400}
401
402define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
403; NEON-LABEL:    @store_general_mask_factor4_undefend(
404; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
405; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
406; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
407; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
408; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr)
409; NEON-NEXT:       ret void
410; NO_NEON-LABEL: @store_general_mask_factor4_undefend(
411; NO_NEON-NOT:     @llvm.aarch64.neon
412; NO_NEON:         ret void
413;
414  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
415  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
416  ret void
417}
418
419define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
420; NEON-LABEL:    @store_general_mask_factor4_undefmid(
421; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
422; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17>
423; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33>
424; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
425; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr)
426; NEON-NEXT:       ret void
427; NO_NEON-LABEL: @store_general_mask_factor4_undefmid(
428; NO_NEON-NOT:     @llvm.aarch64.neon
429; NO_NEON:         ret void
430;
431  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
432  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
433  ret void
434}
435
436define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
437; NEON-LABEL:    @store_general_mask_factor4_undefmulti(
438; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5>
439; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
440; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1>
441; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9>
442; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr)
443; NEON-NEXT:       ret void
444; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti(
445; NO_NEON-NOT:     @llvm.aarch64.neon
446; NO_NEON:         ret void
447;
448  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
449  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
450  ret void
451}
452
453define void @store_general_mask_factor3(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
454; NEON-LABEL:    @store_general_mask_factor3(
455; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
456; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
457; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
458; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
459; NEON-NEXT:       ret void
460; NO_NEON-LABEL: @store_general_mask_factor3(
461; NO_NEON-NOT:     @llvm.aarch64.neon
462; NO_NEON:         ret void
463;
464  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
465  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
466  ret void
467}
468
469define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
470; NEON-LABEL:    @store_general_mask_factor3_undefmultimid(
471; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
472; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
473; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
474; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
475; NEON-NEXT:       ret void
476; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid(
477; NO_NEON-NOT:     @llvm.aarch64.neon
478; NO_NEON:         ret void
479;
480  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
481  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
482  ret void
483}
484
485define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
486; NEON-LABEL:    @store_general_mask_factor3_undef_fail(
487; NEON-NOT:        @llvm.aarch64.neon
488; NEON:            ret void
489; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail(
490; NO_NEON-NOT:     @llvm.aarch64.neon
491; NO_NEON:         ret void
492;
493  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
494  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
495  ret void
496}
497
498define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
499; NEON-LABEL:    @store_general_mask_factor3_undeflane(
500; NEON-NEXT:       [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
501; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35>
502; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
503; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
504; NEON-NEXT:       ret void
505; NO_NEON-LABEL: @store_general_mask_factor3_undeflane(
506; NO_NEON-NOT:     @llvm.aarch64.neon
507; NO_NEON:         ret void
508;
509  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
510  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
511  ret void
512}
513
514define void @store_general_mask_factor3_negativestart(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
515; NEON-LABEL:    @store_general_mask_factor3_negativestart(
516; NEON-NOT:        @llvm.aarch64.neon
517; NEON:            ret void
518; NO_NEON-LABEL: @store_general_mask_factor3_negativestart(
519; NO_NEON-NOT:     @llvm.aarch64.neon
520; NO_NEON:         ret void
521;
522  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
523  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
524  ret void
525}
526
527@g = external global <4 x float>
528
529; The following does not give a valid interleaved store
530; NEON-LABEL: define void @no_interleave
531; NEON-NOT: call void @llvm.aarch64.neon.st2
532; NEON: shufflevector
533; NEON: store
534; NEON: ret void
535; NO_NEON-LABEL: define void @no_interleave
536; NO_NEON: shufflevector
537; NO_NEON: store
538; NO_NEON: ret void
539define void @no_interleave(<4 x float> %a0) {
540  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef>
541  store <4 x float> %v0, ptr @g, align 16
542  ret void
543}
544
545define void @load_factor2_wide2(ptr %ptr) {
546; NEON-LABEL:    @load_factor2_wide2(
547; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %ptr)
548; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
549; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
550; NEON-NEXT:       [[TMP5:%.*]] = getelementptr i32, ptr %ptr, i32 8
551; NEON-NEXT:       [[LDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP5]])
552; NEON-NEXT:       [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 1
553; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 0
554; NEON-NEXT:       [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
555; NEON-NEXT:       [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
556; NEON-NEXT:       ret void
557; NO_NEON-LABEL: @load_factor2_wide2(
558; NO_NEON-NOT:     @llvm.aarch64.neon
559; NO_NEON:         ret void
560;
561  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
562  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
563  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
564  ret void
565}
566
567define void @load_factor2_wide3(ptr %ptr) {
568; NEON-LABEL:    @load_factor2_wide3(
569; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[PTR:%.*]])
570; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1
571; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0
572; NEON-NEXT:       [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
573; NEON-NEXT:       [[LDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP5]])
574; NEON-NEXT:       [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 1
575; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 0
576; NEON-NEXT:       [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 8
577; NEON-NEXT:       [[LDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP9]])
578; NEON-NEXT:       [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1
579; NEON-NEXT:       [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0
580; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
581; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
582; NEON-NEXT:       [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
583; NEON-NEXT:       [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
584; NEON-NEXT:       [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
585; NEON-NEXT:       [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
586; NEON-NEXT:       ret void
587; NO_NEON-LABEL: @load_factor2_wide3(
588; NO_NEON-NOT:     @llvm.aarch64.neon
589; NO_NEON:         ret void
590;
591  %interleaved.vec = load <24 x i32>, ptr %ptr, align 4
592  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
593  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
594  ret void
595}
596
597define void @load_factor3_wide(ptr %ptr) {
598; NEON-LABEL: @load_factor3_wide(
599; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr)
600; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
601; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
602; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
603; NEON-NEXT:       [[TMP6:%.*]] = getelementptr i32, ptr %ptr, i32 12
604; NEON-NEXT:       [[LDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[TMP6]])
605; NEON-NEXT:       [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 2
606; NEON-NEXT:       [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 1
607; NEON-NEXT:       [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 0
608; NEON-NEXT:       [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
609; NEON-NEXT:       [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
610; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
611; NEON-NEXT:       ret void
612; NO_NEON-LABEL: @load_factor3_wide(
613; NO_NEON-NOT:     @llvm.aarch64.neon
614; NO_NEON:         ret void
615;
616  %interleaved.vec = load <24 x i32>, ptr %ptr, align 4
617  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
618  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
619  %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
620  ret void
621}
622
623define void @load_factor4_wide(ptr %ptr) {
624; NEON-LABEL: @load_factor4_wide(
625; NEON-NEXT:       [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr)
626; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3
627; NEON-NEXT:       [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2
628; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1
629; NEON-NEXT:       [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0
630; NEON-NEXT:       [[TMP7:%.*]] = getelementptr i32, ptr %ptr, i32 16
631; NEON-NEXT:       [[LDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[TMP7]])
632; NEON-NEXT:       [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 3
633; NEON-NEXT:       [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 2
634; NEON-NEXT:       [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 1
635; NEON-NEXT:       [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 0
636; NEON-NEXT:       [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
637; NEON-NEXT:       [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
638; NEON-NEXT:       [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
639; NEON-NEXT:       [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
640; NEON-NEXT:       ret void
641; NO_NEON-LABEL: @load_factor4_wide(
642; NO_NEON-NOT:     @llvm.aarch64.neon
643; NO_NEON:         ret void
644;
645  %interleaved.vec = load <32 x i32>, ptr %ptr, align 4
646  %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
647  %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
648  %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
649  %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
650  ret void
651}
652
653define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) {
654; NEON-LABEL:    @store_factor2_wide(
655; NEON-NEXT:       [[TMP2:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
656; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
657; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr)
658; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
659; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
660; NEON-NEXT:       [[TMP7:%.*]] = getelementptr i32, ptr %ptr, i32 8
661; NEON-NEXT:       call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP5]], <4 x i32> [[TMP6]], ptr [[TMP7]])
662; NEON-NEXT:       ret void
663; NO_NEON-LABEL: @store_factor2_wide(
664; NO_NEON:         ret void
665;
666  %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
667  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
668  ret void
669}
670
671define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) {
672; NEON-LABEL:    @store_factor3_wide(
673; NEON:       [[TMP2:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
674; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
675; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
676; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr)
677; NEON-NEXT:       [[TMP6:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
678; NEON-NEXT:       [[TMP7:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
679; NEON-NEXT:       [[TMP8:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
680; NEON-NEXT:       [[TMP9:%.*]] = getelementptr i32, ptr %ptr, i32 12
681; NEON-NEXT:       call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr [[TMP9]])
682; NEON-NEXT:       ret void
683; NO_NEON-LABEL: @store_factor3_wide(
684; NO_NEON:         ret void
685;
686  %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
687  %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
688  %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
689  store <24 x i32> %interleaved.vec, ptr %ptr, align 4
690  ret void
691}
692
693define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2, <8 x i32> %v3) {
694; NEON-LABEL:    @store_factor4_wide(
695; NEON:       [[TMP2:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
696; NEON-NEXT:       [[TMP3:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
697; NEON-NEXT:       [[TMP4:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
698; NEON-NEXT:       [[TMP5:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 24, i32 25, i32 26, i32 27>
699; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr %ptr)
700; NEON-NEXT:       [[TMP7:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
701; NEON-NEXT:       [[TMP8:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
702; NEON-NEXT:       [[TMP9:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
703; NEON-NEXT:       [[TMP10:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
704; NEON-NEXT:       [[TMP11:%.*]] = getelementptr i32, ptr %ptr, i32 16
705; NEON-NEXT:       call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], ptr [[TMP11]])
706; NEON-NEXT:       ret void
707; NO_NEON-LABEL: @store_factor4_wide(
708; NO_NEON-NOT:     @llvm.aarch64.neon
709; NO_NEON:         ret void
710;
711  %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
712  %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
713  %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
714  store <32 x i32> %interleaved.vec, ptr %ptr, align 4
715  ret void
716}
717
718define void @load_factor2_fp128(ptr %ptr) {
719; NEON-LABEL:    @load_factor2_fp128(
720; NEON-NOT:        @llvm.aarch64.neon
721; NEON:            ret void
722; NO_NEON-LABEL: @load_factor2_fp128(
723; NO_NEON-NOT:     @llvm.aarch64.neon
724; NO_NEON:         ret void
725;
726  %interleaved.vec = load <4 x fp128>, ptr %ptr, align 16
727  %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
728  %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
729  ret void
730}
731
732define <4 x i1> @load_large_vector(ptr %p) {
733; NEON-LABEL:    @load_large_vector(
734; NEON:            [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr
735; NEON-NEXT:       [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1
736; NEON-NEXT:       [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP1]] to <2 x ptr>
737; NEON-NEXT:       [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0
738; NEON-NEXT:       [[TMP4:%.*]] = inttoptr <2 x i64> [[TMP3]] to <2 x ptr>
739; NEON:            [[LDN1:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr
740; NEON-NEXT:       [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 1
741; NEON-NEXT:       [[TMP6:%.*]] = inttoptr <2 x i64> [[TMP5]] to <2 x ptr>
742; NEON-NEXT:       [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 0
743; NEON-NEXT:       [[TMP8:%.*]] = inttoptr <2 x i64> [[TMP7]] to <2 x ptr>
744; NEON-NEXT:       shufflevector <2 x ptr> [[TMP2]], <2 x ptr> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
745; NEON-NEXT:       shufflevector <2 x ptr> [[TMP4]], <2 x ptr> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
746; NO_NEON-LABEL: @load_large_vector(
747; NO_NEON-NOT:     @llvm.aarch64.neon
748; NO_NEON:         ret
749;
750  %l = load <12 x ptr>, ptr %p
751  %s1 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
752  %s2 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
753  %ret = icmp ne <4 x ptr> %s1, %s2
754  ret <4 x i1> %ret
755}
756