xref: /llvm-project/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll (revision cd6e462d012f289cc4ec12927ca8198f9ed1469e)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NEON
3; RUN: opt < %s -mattr=+mve.fp -interleaved-access -S | FileCheck %s --check-prefix=CHECK-MVE
4; RUN: opt < %s -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NONE
5; RUN: opt < %s -mattr=+neon -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-NEON
6; RUN: opt < %s -mattr=+mve.fp -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-MVE
7; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-NONE
8
9target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
10target triple = "arm---eabi"
11
12define void @load_factor2(ptr %ptr) {
13; CHECK-NEON-LABEL: @load_factor2(
14; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0(ptr [[PTR:%.*]], i32 4)
15; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 1
16; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 0
17; CHECK-NEON-NEXT:    ret void
18;
19; CHECK-MVE-LABEL: @load_factor2(
20; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 4
21; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
22; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
23; CHECK-MVE-NEXT:    ret void
24;
25; CHECK-NONE-LABEL: @load_factor2(
26; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 4
27; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
28; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
29; CHECK-NONE-NEXT:    ret void
30;
31  %interleaved.vec = load <16 x i8>, ptr %ptr, align 4
32  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
33  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
34  ret void
35}
36
37define void @load_factor3(ptr %ptr) {
38; CHECK-NEON-LABEL: @load_factor3(
39; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[PTR:%.*]], i32 4)
40; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
41; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
42; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
43; CHECK-NEON-NEXT:    ret void
44;
45; CHECK-MVE-LABEL: @load_factor3(
46; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, ptr [[PTR:%.*]], align 4
47; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 0, i32 3>
48; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 1, i32 4>
49; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 2, i32 5>
50; CHECK-MVE-NEXT:    ret void
51;
52; CHECK-NONE-LABEL: @load_factor3(
53; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, ptr [[PTR:%.*]], align 4
54; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 0, i32 3>
55; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 1, i32 4>
56; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 2, i32 5>
57; CHECK-NONE-NEXT:    ret void
58;
59  %interleaved.vec = load <6 x i32>, ptr %ptr, align 4
60  %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 0, i32 3>
61  %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4>
62  %v2 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5>
63  ret void
64}
65
66define void @load_factor4(ptr %ptr) {
67; CHECK-NEON-LABEL: @load_factor4(
68; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4)
69; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
70; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
71; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
72; CHECK-NEON-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
73; CHECK-NEON-NEXT:    ret void
74;
75; CHECK-MVE-LABEL: @load_factor4(
76; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4
77; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
78; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
79; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
80; CHECK-MVE-NEXT:    [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
81; CHECK-MVE-NEXT:    ret void
82;
83; CHECK-NONE-LABEL: @load_factor4(
84; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4
85; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
86; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
87; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
88; CHECK-NONE-NEXT:    [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
89; CHECK-NONE-NEXT:    ret void
90;
91  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
92  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
93  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
94  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
95  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
96  ret void
97}
98
99define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) {
100; CHECK-NEON-LABEL: @store_factor2(
101; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
102; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
103; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p0.v8i8(ptr [[PTR:%.*]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 4)
104; CHECK-NEON-NEXT:    ret void
105;
106; CHECK-MVE-LABEL: @store_factor2(
107; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
108; CHECK-MVE-NEXT:    store <16 x i8> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
109; CHECK-MVE-NEXT:    ret void
110;
111; CHECK-NONE-LABEL: @store_factor2(
112; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
113; CHECK-NONE-NEXT:    store <16 x i8> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
114; CHECK-NONE-NEXT:    ret void
115;
116  %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
117  store <16 x i8> %interleaved.vec, ptr %ptr, align 4
118  ret void
119}
120
121define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
122; CHECK-NEON-LABEL: @store_factor3(
123; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
124; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
125; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
126; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
127; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
128; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
129; CHECK-NEON-NEXT:    ret void
130;
131; CHECK-MVE-LABEL: @store_factor3(
132; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
133; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
134; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
135; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
136; CHECK-MVE-NEXT:    ret void
137;
138; CHECK-NONE-LABEL: @store_factor3(
139; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
140; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
141; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
142; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
143; CHECK-NONE-NEXT:    ret void
144;
145  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
146  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
147  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
148  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
149  ret void
150}
151
152define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
153; CHECK-NEON-LABEL: @store_factor4(
154; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
155; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
156; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
157; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
158; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
159; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
160; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
161; CHECK-NEON-NEXT:    ret void
162;
163; CHECK-MVE-LABEL: @store_factor4(
164; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
165; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
167; CHECK-MVE-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
168; CHECK-MVE-NEXT:    ret void
169;
170; CHECK-NONE-LABEL: @store_factor4(
171; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
172; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
173; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
174; CHECK-NONE-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
175; CHECK-NONE-NEXT:    ret void
176;
177  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
178  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
179  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
180  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
181  ret void
182}
183
184define void @load_ptrvec_factor2(ptr %ptr) {
185; CHECK-NEON-LABEL: @load_ptrvec_factor2(
186; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0(ptr [[PTR:%.*]], i32 4)
187; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLDN]], 0
188; CHECK-NEON-NEXT:    [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr>
189; CHECK-NEON-NEXT:    ret void
190;
191; CHECK-MVE-LABEL: @load_ptrvec_factor2(
192; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x ptr>, ptr [[PTR:%.*]], align 4
193; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <4 x ptr> [[INTERLEAVED_VEC]], <4 x ptr> undef, <2 x i32> <i32 0, i32 2>
194; CHECK-MVE-NEXT:    ret void
195;
196; CHECK-NONE-LABEL: @load_ptrvec_factor2(
197; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x ptr>, ptr [[PTR:%.*]], align 4
198; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <4 x ptr> [[INTERLEAVED_VEC]], <4 x ptr> undef, <2 x i32> <i32 0, i32 2>
199; CHECK-NONE-NEXT:    ret void
200;
201  %interleaved.vec = load <4 x ptr>, ptr %ptr, align 4
202  %v0 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> undef, <2 x i32> <i32 0, i32 2>
203  ret void
204}
205
206define void @load_ptrvec_factor3(ptr %ptr) {
207; CHECK-NEON-LABEL: @load_ptrvec_factor3(
208; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[PTR:%.*]], i32 4)
209; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
210; CHECK-NEON-NEXT:    [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr>
211; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
212; CHECK-NEON-NEXT:    [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x ptr>
213; CHECK-NEON-NEXT:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
214; CHECK-NEON-NEXT:    [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr>
215; CHECK-NEON-NEXT:    ret void
216;
217; CHECK-MVE-LABEL: @load_ptrvec_factor3(
218; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <6 x ptr>, ptr [[PTR:%.*]], align 4
219; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 0, i32 3>
220; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 1, i32 4>
221; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 2, i32 5>
222; CHECK-MVE-NEXT:    ret void
223;
224; CHECK-NONE-LABEL: @load_ptrvec_factor3(
225; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <6 x ptr>, ptr [[PTR:%.*]], align 4
226; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 0, i32 3>
227; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 1, i32 4>
228; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 2, i32 5>
229; CHECK-NONE-NEXT:    ret void
230;
231  %interleaved.vec = load <6 x ptr>, ptr %ptr, align 4
232  %v0 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 0, i32 3>
233  %v1 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 1, i32 4>
234  %v2 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 2, i32 5>
235  ret void
236}
237
238define void @load_ptrvec_factor4(ptr %ptr) {
239; CHECK-NEON-LABEL: @load_ptrvec_factor4(
240; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr [[PTR:%.*]], i32 4)
241; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 3
242; CHECK-NEON-NEXT:    [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr>
243; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
244; CHECK-NEON-NEXT:    [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x ptr>
245; CHECK-NEON-NEXT:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
246; CHECK-NEON-NEXT:    [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr>
247; CHECK-NEON-NEXT:    [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
248; CHECK-NEON-NEXT:    [[TMP9:%.*]] = inttoptr <2 x i32> [[TMP8]] to <2 x ptr>
249; CHECK-NEON-NEXT:    ret void
250;
251; CHECK-MVE-LABEL: @load_ptrvec_factor4(
252; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x ptr>, ptr [[PTR:%.*]], align 4
253; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 0, i32 4>
254; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 1, i32 5>
255; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 2, i32 6>
256; CHECK-MVE-NEXT:    [[V3:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 3, i32 7>
257; CHECK-MVE-NEXT:    ret void
258;
259; CHECK-NONE-LABEL: @load_ptrvec_factor4(
260; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x ptr>, ptr [[PTR:%.*]], align 4
261; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 0, i32 4>
262; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 1, i32 5>
263; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 2, i32 6>
264; CHECK-NONE-NEXT:    [[V3:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 3, i32 7>
265; CHECK-NONE-NEXT:    ret void
266;
267  %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4
268  %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 0, i32 4>
269  %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 1, i32 5>
270  %v2 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 2, i32 6>
271  %v3 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 3, i32 7>
272  ret void
273}
274
275define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) {
276; CHECK-NEON-LABEL: @store_ptrvec_factor2(
277; CHECK-NEON-NEXT:    [[TMP1:%.*]] = ptrtoint <2 x ptr> [[V0:%.*]] to <2 x i32>
278; CHECK-NEON-NEXT:    [[TMP2:%.*]] = ptrtoint <2 x ptr> [[V1:%.*]] to <2 x i32>
279; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
280; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
281; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
282; CHECK-NEON-NEXT:    ret void
283;
284; CHECK-MVE-LABEL: @store_ptrvec_factor2(
285; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
286; CHECK-MVE-NEXT:    store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
287; CHECK-MVE-NEXT:    ret void
288;
289; CHECK-NONE-LABEL: @store_ptrvec_factor2(
290; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
291; CHECK-NONE-NEXT:    store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
292; CHECK-NONE-NEXT:    ret void
293;
294  %interleaved.vec = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
295  store <4 x ptr> %interleaved.vec, ptr %ptr, align 4
296  ret void
297}
298
299define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) {
300; CHECK-NEON-LABEL: @store_ptrvec_factor3(
301; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
302; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
303; CHECK-NEON-NEXT:    [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32>
304; CHECK-NEON-NEXT:    [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32>
305; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
306; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
307; CHECK-NEON-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
308; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
309; CHECK-NEON-NEXT:    ret void
310;
311; CHECK-MVE-LABEL: @store_ptrvec_factor3(
312; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
313; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
314; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
315; CHECK-MVE-NEXT:    store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
316; CHECK-MVE-NEXT:    ret void
317;
318; CHECK-NONE-LABEL: @store_ptrvec_factor3(
319; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
320; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
321; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
322; CHECK-NONE-NEXT:    store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
323; CHECK-NONE-NEXT:    ret void
324;
325  %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
326  %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
327  %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
328  store <6 x ptr> %interleaved.vec, ptr %ptr, align 4
329  ret void
330}
331
332define void @store_ptrvec_factor4(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2, <2 x ptr> %v3) {
333; CHECK-NEON-LABEL: @store_ptrvec_factor4(
334; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
335; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
336; CHECK-NEON-NEXT:    [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32>
337; CHECK-NEON-NEXT:    [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32>
338; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
339; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
340; CHECK-NEON-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
341; CHECK-NEON-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 6, i32 7>
342; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], i32 4)
343; CHECK-NEON-NEXT:    ret void
344;
345; CHECK-MVE-LABEL: @store_ptrvec_factor4(
346; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
347; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
348; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
349; CHECK-MVE-NEXT:    store <8 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
350; CHECK-MVE-NEXT:    ret void
351;
352; CHECK-NONE-LABEL: @store_ptrvec_factor4(
353; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
354; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
355; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
356; CHECK-NONE-NEXT:    store <8 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
357; CHECK-NONE-NEXT:    ret void
358;
359  %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
360  %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
361  %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
362  store <8 x ptr> %interleaved.vec, ptr %ptr, align 4
363  ret void
364}
365
366define void @load_undef_mask_factor2(ptr %ptr) {
367; CHECK-NEON-LABEL: @load_undef_mask_factor2(
368; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4)
369; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
370; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
371; CHECK-NEON-NEXT:    ret void
372;
373; CHECK-MVE-LABEL: @load_undef_mask_factor2(
374; CHECK-MVE-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]])
375; CHECK-MVE-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
376; CHECK-MVE-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
377; CHECK-MVE-NEXT:    ret void
378;
379; CHECK-NONE-LABEL: @load_undef_mask_factor2(
380; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr [[PTR:%.*]], align 4
381; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> <i32 poison, i32 2, i32 poison, i32 6>
382; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> <i32 poison, i32 3, i32 poison, i32 7>
383; CHECK-NONE-NEXT:    ret void
384;
385  %interleaved.vec = load <8 x i32>, ptr %ptr, align 4
386  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
387  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
388  ret void
389}
390
391define void @load_undef_mask_factor3(ptr %ptr) {
392; CHECK-NEON-LABEL: @load_undef_mask_factor3(
393; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[PTR:%.*]], i32 4)
394; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
395; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
396; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
397; CHECK-NEON-NEXT:    ret void
398;
399; CHECK-MVE-LABEL: @load_undef_mask_factor3(
400; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4
401; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
402; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
403; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
404; CHECK-MVE-NEXT:    ret void
405;
406; CHECK-NONE-LABEL: @load_undef_mask_factor3(
407; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4
408; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
409; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
410; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
411; CHECK-NONE-NEXT:    ret void
412;
413  %interleaved.vec = load <12 x i32>, ptr %ptr, align 4
414  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
415  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
416  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
417  ret void
418}
419
420define void @load_undef_mask_factor4(ptr %ptr) {
421; CHECK-NEON-LABEL: @load_undef_mask_factor4(
422; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4)
423; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
424; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
425; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
426; CHECK-NEON-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
427; CHECK-NEON-NEXT:    ret void
428;
429; CHECK-MVE-LABEL: @load_undef_mask_factor4(
430; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4
431; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
432; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
433; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 poison, i32 poison>
434; CHECK-MVE-NEXT:    [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 poison, i32 poison>
435; CHECK-MVE-NEXT:    ret void
436;
437; CHECK-NONE-LABEL: @load_undef_mask_factor4(
438; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4
439; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 poison, i32 poison>
440; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
441; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 poison, i32 poison>
442; CHECK-NONE-NEXT:    [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 poison, i32 poison>
443; CHECK-NONE-NEXT:    ret void
444;
445  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
446  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
447  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
448  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
449  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
450  ret void
451}
452
453define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
454; CHECK-NEON-LABEL: @store_undef_mask_factor2(
455; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
456; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
457; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 4)
458; CHECK-NEON-NEXT:    ret void
459;
460; CHECK-MVE-LABEL: @store_undef_mask_factor2(
461; CHECK-MVE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
462; CHECK-MVE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
463; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 0)
464; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1)
465; CHECK-MVE-NEXT:    ret void
466;
467; CHECK-NONE-LABEL: @store_undef_mask_factor2(
468; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 6, i32 3, i32 7>
469; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
470; CHECK-NONE-NEXT:    ret void
471;
472  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
473  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
474  ret void
475}
476
477define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
478; CHECK-NEON-LABEL: @store_undef_mask_factor3(
479; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
480; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
481; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
482; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
483; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
484; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
485; CHECK-NEON-NEXT:    ret void
486;
487; CHECK-MVE-LABEL: @store_undef_mask_factor3(
488; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
489; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
490; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 poison, i32 1, i32 poison, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
491; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
492; CHECK-MVE-NEXT:    ret void
493;
494; CHECK-NONE-LABEL: @store_undef_mask_factor3(
495; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
496; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
497; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 poison, i32 1, i32 poison, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
498; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
499; CHECK-NONE-NEXT:    ret void
500;
501  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
502  %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
503  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
504  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
505  ret void
506}
507
508define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
509; CHECK-NEON-LABEL: @store_undef_mask_factor4(
510; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
511; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
512; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
513; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
514; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
515; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
516; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
517; CHECK-NEON-NEXT:    ret void
518;
519; CHECK-MVE-LABEL: @store_undef_mask_factor4(
520; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
521; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
522; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 poison, i32 poison, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
523; CHECK-MVE-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
524; CHECK-MVE-NEXT:    ret void
525;
526; CHECK-NONE-LABEL: @store_undef_mask_factor4(
527; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
528; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
529; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 poison, i32 poison, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
530; CHECK-NONE-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
531; CHECK-NONE-NEXT:    ret void
532;
533  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
534  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
535  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
536  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
537  ret void
538}
539
540define void @load_address_space(ptr addrspace(1) %ptr) {
541; CHECK-NEON-LABEL: @load_address_space(
542; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1(ptr addrspace(1) [[PTR:%.*]], i32 32)
543; CHECK-NEON-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
544; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
545; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
546; CHECK-NEON-NEXT:    ret void
547;
548; CHECK-MVE-LABEL: @load_address_space(
549; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr addrspace(1) [[PTR:%.*]], align 32
550; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 0, i32 3>
551; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 1, i32 4>
552; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 2, i32 5>
553; CHECK-MVE-NEXT:    ret void
554;
555; CHECK-NONE-LABEL: @load_address_space(
556; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr addrspace(1) [[PTR:%.*]], align 32
557; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 0, i32 3>
558; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 1, i32 4>
559; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 2, i32 5>
560; CHECK-NONE-NEXT:    ret void
561;
562  %interleaved.vec = load <8 x i32>, ptr addrspace(1) %ptr
563  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 0, i32 3>
564  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 1, i32 4>
565  %v2 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 2, i32 5>
566  ret void
567}
568
569define void @store_address_space(ptr addrspace(1) %ptr, <2 x i32> %v0, <2 x i32> %v1) {
570; CHECK-NEON-LABEL: @store_address_space(
571; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> <i32 0, i32 1>
572; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[V0]], <2 x i32> [[V1]], <2 x i32> <i32 2, i32 3>
573; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p1.v2i32(ptr addrspace(1) [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 8)
574; CHECK-NEON-NEXT:    ret void
575;
576; CHECK-MVE-LABEL: @store_address_space(
577; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
578; CHECK-MVE-NEXT:    store <4 x i32> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 8
579; CHECK-MVE-NEXT:    ret void
580;
581; CHECK-NONE-LABEL: @store_address_space(
582; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
583; CHECK-NONE-NEXT:    store <4 x i32> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 8
584; CHECK-NONE-NEXT:    ret void
585;
586  %interleaved.vec = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
587  store <4 x i32> %interleaved.vec, ptr addrspace(1) %ptr
588  ret void
589}
590
591define void @load_f16_factor2(ptr %ptr) {
592; CHECK-NEON-LABEL: @load_f16_factor2(
593; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4
594; CHECK-NEON-NEXT:    [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
595; CHECK-NEON-NEXT:    [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
596; CHECK-NEON-NEXT:    ret void
597;
598; CHECK-MVE-LABEL: @load_f16_factor2(
599; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4
600; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
601; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
602; CHECK-MVE-NEXT:    ret void
603;
604; CHECK-NONE-LABEL: @load_f16_factor2(
605; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4
606; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
607; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
608; CHECK-NONE-NEXT:    ret void
609;
610  %interleaved.vec = load <8 x half>, ptr %ptr, align 4
611  %v0 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
612  %v1 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
613  ret void
614}
615
616define void @store_f16_factor2(ptr %ptr, <4 x half> %v0, <4 x half> %v1) {
617; CHECK-NEON-LABEL: @store_f16_factor2(
618; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
619; CHECK-NEON-NEXT:    store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
620; CHECK-NEON-NEXT:    ret void
621;
622; CHECK-MVE-LABEL: @store_f16_factor2(
623; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
624; CHECK-MVE-NEXT:    store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
625; CHECK-MVE-NEXT:    ret void
626;
627; CHECK-NONE-LABEL: @store_f16_factor2(
628; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
629; CHECK-NONE-NEXT:    store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
630; CHECK-NONE-NEXT:    ret void
631;
632  %interleaved.vec = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
633  store <8 x half> %interleaved.vec, ptr %ptr, align 4
634  ret void
635}
636
637define void @load_illegal_factor2(ptr %ptr) nounwind {
638; CHECK-NEON-LABEL: @load_illegal_factor2(
639; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16
640; CHECK-NEON-NEXT:    [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
641; CHECK-NEON-NEXT:    ret void
642;
643; CHECK-MVE-LABEL: @load_illegal_factor2(
644; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16
645; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
646; CHECK-MVE-NEXT:    ret void
647;
648; CHECK-NONE-LABEL: @load_illegal_factor2(
649; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16
650; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
651; CHECK-NONE-NEXT:    ret void
652;
653  %interleaved.vec = load <3 x float>, ptr %ptr, align 16
654  %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
655  ret void
656}
657
658define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind {
659; CHECK-NEON-LABEL: @store_illegal_factor2(
660; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
661; CHECK-NEON-NEXT:    store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16
662; CHECK-NEON-NEXT:    ret void
663;
664; CHECK-MVE-LABEL: @store_illegal_factor2(
665; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
666; CHECK-MVE-NEXT:    store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16
667; CHECK-MVE-NEXT:    ret void
668;
669; CHECK-NONE-LABEL: @store_illegal_factor2(
670; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison>
671; CHECK-NONE-NEXT:    store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16
672; CHECK-NONE-NEXT:    ret void
673;
674  %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
675  store <3 x float> %interleaved.vec, ptr %ptr, align 16
676  ret void
677}
678
679define void @store_general_mask_factor4(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
680; CHECK-NEON-LABEL: @store_general_mask_factor4(
681; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
682; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
683; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
684; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
685; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
686; CHECK-NEON-NEXT:    ret void
687;
688; CHECK-MVE-LABEL: @store_general_mask_factor4(
689; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
690; CHECK-MVE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
691; CHECK-MVE-NEXT:    ret void
692;
693; CHECK-NONE-LABEL: @store_general_mask_factor4(
694; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
695; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
696; CHECK-NONE-NEXT:    ret void
697;
698  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
699  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
700  ret void
701}
702
703define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
704; CHECK-NEON-LABEL: @store_general_mask_factor4_undefbeg(
705; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
706; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
707; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
708; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
709; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
710; CHECK-NEON-NEXT:    ret void
711;
712; CHECK-MVE-LABEL: @store_general_mask_factor4_undefbeg(
713; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
714; CHECK-MVE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
715; CHECK-MVE-NEXT:    ret void
716;
717; CHECK-NONE-LABEL: @store_general_mask_factor4_undefbeg(
718; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
719; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
720; CHECK-NONE-NEXT:    ret void
721;
722  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
723  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
724  ret void
725}
726
727define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
728; CHECK-NEON-LABEL: @store_general_mask_factor4_undefend(
729; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
730; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
731; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
732; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
733; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
734; CHECK-NEON-NEXT:    ret void
735;
736; CHECK-MVE-LABEL: @store_general_mask_factor4_undefend(
737; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 poison>
738; CHECK-MVE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
739; CHECK-MVE-NEXT:    ret void
740;
741; CHECK-NONE-LABEL: @store_general_mask_factor4_undefend(
742; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 poison>
743; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
744; CHECK-NONE-NEXT:    ret void
745;
746  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
747  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
748  ret void
749}
750
751define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
752; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmid(
753; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
754; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
755; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
756; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
757; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
758; CHECK-NEON-NEXT:    ret void
759;
760; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmid(
761; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 32, i32 8, i32 5, i32 17, i32 poison, i32 9>
762; CHECK-MVE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
763; CHECK-MVE-NEXT:    ret void
764;
765; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmid(
766; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 32, i32 8, i32 5, i32 17, i32 poison, i32 9>
767; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
768; CHECK-NONE-NEXT:    ret void
769;
770  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
771  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
772  ret void
773}
774
775define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
776; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmulti(
777; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
778; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1>
779; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1>
780; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
781; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
782; CHECK-NEON-NEXT:    ret void
783;
784; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmulti(
785; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 9>
786; CHECK-MVE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
787; CHECK-MVE-NEXT:    ret void
788;
789; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmulti(
790; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 9>
791; CHECK-NONE-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
792; CHECK-NONE-NEXT:    ret void
793;
794  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
795  store <8 x i32> %interleaved.vec, ptr %ptr, align 4
796  ret void
797}
798
799define void @store_general_mask_factor3(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
800; CHECK-NEON-LABEL: @store_general_mask_factor3(
801; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
802; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
803; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
804; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
805; CHECK-NEON-NEXT:    ret void
806;
807; CHECK-MVE-LABEL: @store_general_mask_factor3(
808; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
809; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
810; CHECK-MVE-NEXT:    ret void
811;
812; CHECK-NONE-LABEL: @store_general_mask_factor3(
813; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
814; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
815; CHECK-NONE-NEXT:    ret void
816;
817  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
818  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
819  ret void
820}
821
822define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
823; CHECK-NEON-LABEL: @store_general_mask_factor3_undefmultimid(
824; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
825; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
826; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
827; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
828; CHECK-NEON-NEXT:    ret void
829;
830; CHECK-MVE-LABEL: @store_general_mask_factor3_undefmultimid(
831; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19>
832; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
833; CHECK-MVE-NEXT:    ret void
834;
835; CHECK-NONE-LABEL: @store_general_mask_factor3_undefmultimid(
836; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19>
837; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
838; CHECK-NONE-NEXT:    ret void
839;
840  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
841  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
842  ret void
843}
844
845define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
846; CHECK-NEON-LABEL: @store_general_mask_factor3_undef_fail(
847; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19>
848; CHECK-NEON-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
849; CHECK-NEON-NEXT:    ret void
850;
851; CHECK-MVE-LABEL: @store_general_mask_factor3_undef_fail(
852; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19>
853; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
854; CHECK-MVE-NEXT:    ret void
855;
856; CHECK-NONE-LABEL: @store_general_mask_factor3_undef_fail(
857; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19>
858; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
859; CHECK-NONE-NEXT:    ret void
860;
861  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
862  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
863  ret void
864}
865
866define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
867; CHECK-NEON-LABEL: @store_general_mask_factor3_undeflane(
868; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
869; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
870; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
871; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
872; CHECK-NEON-NEXT:    ret void
873;
874; CHECK-MVE-LABEL: @store_general_mask_factor3_undeflane(
875; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
876; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
877; CHECK-MVE-NEXT:    ret void
878;
879; CHECK-NONE-LABEL: @store_general_mask_factor3_undeflane(
880; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
881; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
882; CHECK-NONE-NEXT:    ret void
883;
884  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
885  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
886  ret void
887}
888
889define void @store_general_mask_factor3_endstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
890; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_fail(
891; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19>
892; CHECK-NEON-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
893; CHECK-NEON-NEXT:    ret void
894;
895; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_fail(
896; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19>
897; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
898; CHECK-MVE-NEXT:    ret void
899;
900; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_fail(
901; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19>
902; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
903; CHECK-NONE-NEXT:    ret void
904;
905  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
906  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
907  ret void
908}
909
910define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
911; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_pass(
912; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
913; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
914; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
915; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
916; CHECK-NEON-NEXT:    ret void
917;
918; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_pass(
919; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19>
920; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
921; CHECK-MVE-NEXT:    ret void
922;
923; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_pass(
924; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19>
925; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
926; CHECK-NONE-NEXT:    ret void
927;
928  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
929  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
930  ret void
931}
932
933define void @store_general_mask_factor3_midstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
934; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_fail(
935; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
936; CHECK-NEON-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
937; CHECK-NEON-NEXT:    ret void
938;
939; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_fail(
940; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
941; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
942; CHECK-MVE-NEXT:    ret void
943;
944; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_fail(
945; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
946; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
947; CHECK-NONE-NEXT:    ret void
948;
949  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
950  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
951  ret void
952}
953
954define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) {
955; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_pass(
956; CHECK-NEON-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
957; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
958; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
959; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
960; CHECK-NEON-NEXT:    ret void
961;
962; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_pass(
963; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 1, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
964; CHECK-MVE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
965; CHECK-MVE-NEXT:    ret void
966;
967; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_pass(
968; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 1, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19>
969; CHECK-NONE-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
970; CHECK-NONE-NEXT:    ret void
971;
972  %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
973  store <12 x i32> %interleaved.vec, ptr %ptr, align 4
974  ret void
975}
976
977@g = external global <4 x float>
978
979; The following does not give a valid interleaved store
980define void @no_interleave(<4 x float> %a0) {
981; CHECK-NEON-LABEL: @no_interleave(
982; CHECK-NEON-NEXT:    [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison>
983; CHECK-NEON-NEXT:    store <4 x float> [[V0]], ptr @g, align 16
984; CHECK-NEON-NEXT:    ret void
985;
986; CHECK-MVE-LABEL: @no_interleave(
987; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison>
988; CHECK-MVE-NEXT:    store <4 x float> [[V0]], ptr @g, align 16
989; CHECK-MVE-NEXT:    ret void
990;
991; CHECK-NONE-LABEL: @no_interleave(
992; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison>
993; CHECK-NONE-NEXT:    store <4 x float> [[V0]], ptr @g, align 16
994; CHECK-NONE-NEXT:    ret void
995;
996  %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
997  store <4 x float> %v0, ptr @g, align 16
998  ret void
999}
1000
1001define void @load_factor2_wide2(ptr %ptr) {
1002; CHECK-NEON-LABEL: @load_factor2_wide2(
1003; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4)
1004; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1005; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1006; CHECK-NEON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1007; CHECK-NEON-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP5]], i32 4)
1008; CHECK-NEON-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1009; CHECK-NEON-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1010; CHECK-NEON-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1011; CHECK-NEON-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1012; CHECK-NEON-NEXT:    ret void
1013;
1014; CHECK-MVE-LABEL: @load_factor2_wide2(
1015; CHECK-MVE-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]])
1016; CHECK-MVE-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1017; CHECK-MVE-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1018; CHECK-MVE-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1019; CHECK-MVE-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP4]])
1020; CHECK-MVE-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1021; CHECK-MVE-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1022; CHECK-MVE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1023; CHECK-MVE-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1024; CHECK-MVE-NEXT:    ret void
1025;
1026; CHECK-NONE-LABEL: @load_factor2_wide2(
1027; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4
1028; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1029; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1030; CHECK-NONE-NEXT:    ret void
1031;
1032  %interleaved.vec = load <16 x i32>, ptr %ptr, align 4
1033  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1034  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1035  ret void
1036}
1037
1038define void @load_factor2_wide3(ptr %ptr) {
1039; CHECK-NEON-LABEL: @load_factor2_wide3(
1040; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4)
1041; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1042; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1043; CHECK-NEON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1044; CHECK-NEON-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP5]], i32 4)
1045; CHECK-NEON-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1046; CHECK-NEON-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1047; CHECK-NEON-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 8
1048; CHECK-NEON-NEXT:    [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP9]], i32 4)
1049; CHECK-NEON-NEXT:    [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
1050; CHECK-NEON-NEXT:    [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
1051; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1052; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1053; CHECK-NEON-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1054; CHECK-NEON-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1055; CHECK-NEON-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1056; CHECK-NEON-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1057; CHECK-NEON-NEXT:    ret void
1058;
1059; CHECK-MVE-LABEL: @load_factor2_wide3(
1060; CHECK-MVE-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]])
1061; CHECK-MVE-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1062; CHECK-MVE-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1063; CHECK-MVE-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1064; CHECK-MVE-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP4]])
1065; CHECK-MVE-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1066; CHECK-MVE-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1067; CHECK-MVE-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i32 8
1068; CHECK-MVE-NEXT:    [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP7]])
1069; CHECK-MVE-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
1070; CHECK-MVE-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
1071; CHECK-MVE-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1072; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1073; CHECK-MVE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1074; CHECK-MVE-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1075; CHECK-MVE-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
1076; CHECK-MVE-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1077; CHECK-MVE-NEXT:    ret void
1078;
1079; CHECK-NONE-LABEL: @load_factor2_wide3(
1080; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4
1081; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
1082; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
1083; CHECK-NONE-NEXT:    ret void
1084;
1085  %interleaved.vec = load <24 x i32>, ptr %ptr, align 4
1086  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
1087  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
1088  ret void
1089}
1090
1091define void @load_factor3_wide(ptr %ptr) {
1092; CHECK-NEON-LABEL: @load_factor3_wide(
1093; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[PTR:%.*]], i32 4)
1094; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
1095; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
1096; CHECK-NEON-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
1097; CHECK-NEON-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 12
1098; CHECK-NEON-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[TMP6]], i32 4)
1099; CHECK-NEON-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2
1100; CHECK-NEON-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1
1101; CHECK-NEON-NEXT:    [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0
1102; CHECK-NEON-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1103; CHECK-NEON-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1104; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1105; CHECK-NEON-NEXT:    ret void
1106;
1107; CHECK-MVE-LABEL: @load_factor3_wide(
1108; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4
1109; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1110; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1111; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1112; CHECK-MVE-NEXT:    ret void
1113;
1114; CHECK-NONE-LABEL: @load_factor3_wide(
1115; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4
1116; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1117; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1118; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1119; CHECK-NONE-NEXT:    ret void
1120;
1121  %interleaved.vec = load <24 x i32>, ptr %ptr, align 4
1122  %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1123  %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1124  %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1125  ret void
1126}
1127
1128define void @load_factor4_wide(ptr %ptr) {
1129; CHECK-NEON-LABEL: @load_factor4_wide(
1130; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4)
1131; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
1132; CHECK-NEON-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
1133; CHECK-NEON-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
1134; CHECK-NEON-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
1135; CHECK-NEON-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 16
1136; CHECK-NEON-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[TMP7]], i32 4)
1137; CHECK-NEON-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 3
1138; CHECK-NEON-NEXT:    [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2
1139; CHECK-NEON-NEXT:    [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1
1140; CHECK-NEON-NEXT:    [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0
1141; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1142; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1143; CHECK-NEON-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144; CHECK-NEON-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1145; CHECK-NEON-NEXT:    ret void
1146;
1147; CHECK-MVE-LABEL: @load_factor4_wide(
1148; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, ptr [[PTR:%.*]], align 4
1149; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1150; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1151; CHECK-MVE-NEXT:    [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1152; CHECK-MVE-NEXT:    [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1153; CHECK-MVE-NEXT:    ret void
1154;
1155; CHECK-NONE-LABEL: @load_factor4_wide(
1156; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, ptr [[PTR:%.*]], align 4
1157; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1158; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1159; CHECK-NONE-NEXT:    [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1160; CHECK-NONE-NEXT:    [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1161; CHECK-NONE-NEXT:    ret void
1162;
1163  %interleaved.vec = load <32 x i32>, ptr %ptr, align 4
1164  %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1165  %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1166  %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1167  %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1168  ret void
1169}
1170
1171define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) {
1172; CHECK-NEON-LABEL: @store_factor2_wide(
1173; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1174; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1175; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
1176; CHECK-NEON-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1177; CHECK-NEON-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1178; CHECK-NEON-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1179; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 4)
1180; CHECK-NEON-NEXT:    ret void
1181;
1182; CHECK-MVE-LABEL: @store_factor2_wide(
1183; CHECK-MVE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1184; CHECK-MVE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1185; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 0)
1186; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 1)
1187; CHECK-MVE-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1188; CHECK-MVE-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1189; CHECK-MVE-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1190; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 0)
1191; CHECK-MVE-NEXT:    call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 1)
1192; CHECK-MVE-NEXT:    ret void
1193;
1194; CHECK-NONE-LABEL: @store_factor2_wide(
1195; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1196; CHECK-NONE-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
1197; CHECK-NONE-NEXT:    ret void
1198;
1199  %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1200  store <16 x i32> %interleaved.vec, ptr %ptr, align 4
1201  ret void
1202}
1203
1204define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) {
1205; CHECK-NEON-LABEL: @store_factor3_wide(
1206; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1207; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1208; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1209; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1210; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
1211; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
1212; CHECK-NEON-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 12
1213; CHECK-NEON-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1214; CHECK-NEON-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1215; CHECK-NEON-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23>
1216; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
1217; CHECK-NEON-NEXT:    ret void
1218;
1219; CHECK-MVE-LABEL: @store_factor3_wide(
1220; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1221; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1222; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1223; CHECK-MVE-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
1224; CHECK-MVE-NEXT:    ret void
1225;
1226; CHECK-NONE-LABEL: @store_factor3_wide(
1227; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1228; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
1229; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1230; CHECK-NONE-NEXT:    store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
1231; CHECK-NONE-NEXT:    ret void
1232;
1233  %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1234  %s1 = shufflevector <8 x i32> %v2, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1235  %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1236  store <24 x i32> %interleaved.vec, ptr %ptr, align 4
1237  ret void
1238}
1239
1240define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2, <8 x i32> %v3) {
1241; CHECK-NEON-LABEL: @store_factor4_wide(
1242; CHECK-NEON-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1243; CHECK-NEON-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1244; CHECK-NEON-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1245; CHECK-NEON-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1246; CHECK-NEON-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
1247; CHECK-NEON-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 24, i32 25, i32 26, i32 27>
1248; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
1249; CHECK-NEON-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 16
1250; CHECK-NEON-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1251; CHECK-NEON-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1252; CHECK-NEON-NEXT:    [[TMP10:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23>
1253; CHECK-NEON-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 28, i32 29, i32 30, i32 31>
1254; CHECK-NEON-NEXT:    call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 4)
1255; CHECK-NEON-NEXT:    ret void
1256;
1257; CHECK-MVE-LABEL: @store_factor4_wide(
1258; CHECK-MVE-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1259; CHECK-MVE-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1260; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1261; CHECK-MVE-NEXT:    store <32 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
1262; CHECK-MVE-NEXT:    ret void
1263;
1264; CHECK-NONE-LABEL: @store_factor4_wide(
1265; CHECK-NONE-NEXT:    [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1266; CHECK-NONE-NEXT:    [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1267; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1268; CHECK-NONE-NEXT:    store <32 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4
1269; CHECK-NONE-NEXT:    ret void
1270;
1271  %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1272  %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1273  %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1274  store <32 x i32> %interleaved.vec, ptr %ptr, align 4
1275  ret void
1276}
1277
1278define void @load_factor2_fp128(ptr %ptr) {
1279; CHECK-NEON-LABEL: @load_factor2_fp128(
1280; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16
1281; CHECK-NEON-NEXT:    [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2>
1282; CHECK-NEON-NEXT:    [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
1283; CHECK-NEON-NEXT:    ret void
1284;
1285; CHECK-MVE-LABEL: @load_factor2_fp128(
1286; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16
1287; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2>
1288; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
1289; CHECK-MVE-NEXT:    ret void
1290;
1291; CHECK-NONE-LABEL: @load_factor2_fp128(
1292; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16
1293; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2>
1294; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
1295; CHECK-NONE-NEXT:    ret void
1296;
1297  %interleaved.vec = load <4 x fp128>, ptr %ptr, align 16
1298  %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 0, i32 2>
1299  %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3>
1300  ret void
1301}
1302
1303define void @load_factor2_wide_pointer(ptr %ptr) {
1304; CHECK-NEON-LABEL: @load_factor2_wide_pointer(
1305; CHECK-NEON-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4)
1306; CHECK-NEON-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1307; CHECK-NEON-NEXT:    [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x ptr>
1308; CHECK-NEON-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1309; CHECK-NEON-NEXT:    [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x ptr>
1310; CHECK-NEON-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1311; CHECK-NEON-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP7]], i32 4)
1312; CHECK-NEON-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1313; CHECK-NEON-NEXT:    [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x ptr>
1314; CHECK-NEON-NEXT:    [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1315; CHECK-NEON-NEXT:    [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x ptr>
1316; CHECK-NEON-NEXT:    [[TMP13:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1317; CHECK-NEON-NEXT:    [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1318; CHECK-NEON-NEXT:    ret void
1319;
1320; CHECK-MVE-LABEL: @load_factor2_wide_pointer(
1321; CHECK-MVE-NEXT:    [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]])
1322; CHECK-MVE-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1323; CHECK-MVE-NEXT:    [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr>
1324; CHECK-MVE-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1325; CHECK-MVE-NEXT:    [[TMP5:%.*]] = inttoptr <4 x i32> [[TMP4]] to <4 x ptr>
1326; CHECK-MVE-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 8
1327; CHECK-MVE-NEXT:    [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP6]])
1328; CHECK-MVE-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1329; CHECK-MVE-NEXT:    [[TMP8:%.*]] = inttoptr <4 x i32> [[TMP7]] to <4 x ptr>
1330; CHECK-MVE-NEXT:    [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1331; CHECK-MVE-NEXT:    [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x ptr>
1332; CHECK-MVE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1333; CHECK-MVE-NEXT:    [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1334; CHECK-MVE-NEXT:    ret void
1335;
1336; CHECK-NONE-LABEL: @load_factor2_wide_pointer(
1337; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <16 x ptr>, ptr [[PTR:%.*]], align 4
1338; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <16 x ptr> [[INTERLEAVED_VEC]], <16 x ptr> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1339; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <16 x ptr> [[INTERLEAVED_VEC]], <16 x ptr> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1340; CHECK-NONE-NEXT:    ret void
1341;
1342  %interleaved.vec = load <16 x ptr>, ptr %ptr, align 4
1343  %v0 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1344  %v1 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1345  ret void
1346}
1347
1348; This would be a candidate for interleaving, except that load doesn't
1349; actually load enough elements to satisfy the shuffle masks. (It would be
1350; possible to produce a vld2.v2i32, but that currently isn't implemented.)
1351define void @load_out_of_range(ptr %ptr) {
1352; CHECK-NEON-LABEL: @load_out_of_range(
1353; CHECK-NEON-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
1354; CHECK-NEON-NEXT:    [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison>
1355; CHECK-NEON-NEXT:    [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison>
1356; CHECK-NEON-NEXT:    ret void
1357;
1358; CHECK-MVE-LABEL: @load_out_of_range(
1359; CHECK-MVE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
1360; CHECK-MVE-NEXT:    [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison>
1361; CHECK-MVE-NEXT:    [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison>
1362; CHECK-MVE-NEXT:    ret void
1363;
1364; CHECK-NONE-LABEL: @load_out_of_range(
1365; CHECK-NONE-NEXT:    [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
1366; CHECK-NONE-NEXT:    [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison>
1367; CHECK-NONE-NEXT:    [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison>
1368; CHECK-NONE-NEXT:    ret void
1369;
1370  %interleaved.vec = load <4 x i32>, ptr %ptr, align 4
1371  %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
1372  %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1373  ret void
1374}
1375