xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll (revision 2c7786e94a1058bd4f96794a1d4f70dcb86e5cc5)
1; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF
2; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF
3; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S | FileCheck %s -check-prefix=CHECK-NOTF
4; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF
5; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF
6; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default+reductions+recurrences+reverse | FileCheck %s -check-prefix=CHECK-TF
7; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED
8; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC
9; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV
10; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED
11; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -sve-tail-folding=default -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1
12; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1
13; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1
14
15target triple = "aarch64-unknown-linux-gnu"
16
17define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
18; CHECK-NOTF-LABEL: @simple_memset(
19; CHECK-NOTF:       vector.ph:
20; CHECK-NOTF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
21; CHECK-NOTF:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
22; CHECK-NOTF:       vector.body:
23; CHECK-NOTF-NOT:     %{{.*}} = phi <vscale x 4 x i1>
24; CHECK-NOTF:         store <vscale x 4 x i32> %[[SPLAT]], ptr
25
26; CHECK-TF-NORED-LABEL: @simple_memset(
27; CHECK-TF-NORED:       vector.ph:
28; CHECK-TF-NORED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
29; CHECK-TF-NORED:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
30; CHECK-TF-NORED:       vector.body:
31; CHECK-TF-NORED:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
32; CHECK-TF-NORED:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]
33
34; CHECK-TF-NOREC-LABEL: @simple_memset(
35; CHECK-TF-NOREC:       vector.ph:
36; CHECK-TF-NOREC:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
37; CHECK-TF-NOREC:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
38; CHECK-TF-NOREC:       vector.body:
39; CHECK-TF-NOREC:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
40; CHECK-TF-NOREC:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]
41
42; CHECK-TF-NOREV-LABEL: @simple_memset(
43; CHECK-TF-NOREV:       vector.ph:
44; CHECK-TF-NOREV:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
45; CHECK-TF-NOREV:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
46; CHECK-TF-NOREV:       vector.body:
47; CHECK-TF-NOREV:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
48; CHECK-TF-NOREV:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]
49
50; CHECK-TF-LABEL: @simple_memset(
51; CHECK-TF:       vector.ph:
52; CHECK-TF:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
53; CHECK-TF:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
54; CHECK-TF:       vector.body:
55; CHECK-TF:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
56; CHECK-TF:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]
57
58; CHECK-TF-ONLYRED-LABEL: @simple_memset(
59; CHECK-TF-ONLYRED:       vector.ph:
60; CHECK-TF-ONLYRED:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
61; CHECK-TF-ONLYRED:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
62; CHECK-TF-ONLYRED:       vector.body:
63; CHECK-TF-ONLYRED-NOT:     %{{.*}} = phi <vscale x 4 x i1>
64; CHECK-TF-ONLYRED:         store <vscale x 4 x i32> %[[SPLAT]], ptr
65
66; CHECK-NEOVERSE-V1-LABEL: @simple_memset(
67; CHECK-NEOVERSE-V1:       vector.ph:
68; CHECK-NEOVERSE-V1:         %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0
69; CHECK-NEOVERSE-V1:         %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
70; CHECK-NEOVERSE-V1:       vector.body:
71; CHECK-NEOVERSE-V1:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
72; CHECK-NEOVERSE-V1:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]]
73
74entry:
75  br label %while.body
76
77while.body:                                       ; preds = %while.body, %entry
78  %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ]
79  %gep = getelementptr i32, ptr %ptr, i64 %index
80  store i32 %val, ptr %gep
81  %index.next = add nsw i64 %index, 1
82  %cmp10 = icmp ult i64 %index.next, %n
83  br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0
84
85while.end.loopexit:                               ; preds = %while.body
86  ret void
87}
88
89define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
90; CHECK-NOTF-LABEL: @fadd_red_fast
91; CHECK-NOTF:       vector.body:
92; CHECK-NOTF-NOT:     %{{.*}} = phi <vscale x 4 x i1>
93; CHECK-NOTF:         %[[LOAD:.*]] = load <vscale x 4 x float>
94; CHECK-NOTF:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
95; CHECK-NOTF:       middle.block:
96; CHECK-NOTF-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
97
98; CHECK-TF-NORED-LABEL: @fadd_red_fast
99; CHECK-TF-NORED:       vector.body:
100; CHECK-TF-NORED-NOT:     %{{.*}} = phi <vscale x 4 x i1>
101; CHECK-TF-NORED:         %[[LOAD:.*]] = load <vscale x 4 x float>
102; CHECK-TF-NORED:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
103; CHECK-TF-NORED:       middle.block:
104; CHECK-TF-NORED-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
105
106; CHECK-TF-NOREC-LABEL: @fadd_red_fast
107; CHECK-TF-NOREC:       vector.body:
108; CHECK-TF-NOREC:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
109; CHECK-TF-NOREC:         %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
110; CHECK-TF-NOREC:         %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
111; CHECK-TF-NOREC:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
112; CHECK-TF-NOREC:         %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
113; CHECK-TF-NOREC:       middle.block:
114; CHECK-TF-NOREC-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
115
116; CHECK-TF-NOREV-LABEL: @fadd_red_fast
117; CHECK-TF-NOREV:       vector.body:
118; CHECK-TF-NOREV:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
119; CHECK-TF-NOREV:         %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
120; CHECK-TF-NOREV:         %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
121; CHECK-TF-NOREV:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
122; CHECK-TF-NOREV:         %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
123; CHECK-TF-NOREV:       middle.block:
124; CHECK-TF-NOREV-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
125
126; CHECK-TF-LABEL: @fadd_red_fast
127; CHECK-TF:       vector.body:
128; CHECK-TF:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
129; CHECK-TF:         %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
130; CHECK-TF:         %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
131; CHECK-TF:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
132; CHECK-TF:         %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
133; CHECK-TF:       middle.block:
134; CHECK-TF-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
135
136; CHECK-TF-ONLYRED-LABEL: @fadd_red_fast
137; CHECK-TF-ONLYRED:       vector.body:
138; CHECK-TF-ONLYRED:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
139; CHECK-TF-ONLYRED:         %[[VEC_PHI:.*]] = phi <vscale x 4 x float>
140; CHECK-TF-ONLYRED:         %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
141; CHECK-TF-ONLYRED:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
142; CHECK-TF-ONLYRED:         %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]]
143; CHECK-TF-ONLYRED:       middle.block:
144; CHECK-TF-ONLYRED-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]])
145
146; CHECK-NEOVERSE-V1-LABEL: @fadd_red_fast
147; CHECK-NEOVERSE-V1:       vector.body:
148; CHECK-NEOVERSE-V1-NOT:     %{{.*}} = phi <vscale x 4 x i1>
149; CHECK-NEOVERSE-V1:         %[[LOAD:.*]] = load <vscale x 4 x float>
150; CHECK-NEOVERSE-V1:         %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]]
151; CHECK-NEOVERSE-V1:       middle.block:
152; CHECK-NEOVERSE-V1-NEXT:    call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]])
153
154entry:
155  br label %for.body
156
157for.body:
158  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
159  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
160  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
161  %0 = load float, ptr %arrayidx, align 4
162  %add = fadd fast float %0, %sum.07
163  %iv.next = add nuw nsw i64 %iv, 1
164  %exitcond.not = icmp eq i64 %iv.next, %n
165  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
166
167for.end:
168  ret float %add
169}
170
171define void @add_recur(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
172; CHECK-NOTF-LABEL: @add_recur
173; CHECK-NOTF:       entry:
174; CHECK-NOTF:         %[[PRE:.*]] = load i32, ptr %src, align 4
175; CHECK-NOTF:       vector.ph:
176; CHECK-NOTF:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
177; CHECK-NOTF:       vector.body:
178; CHECK-NOTF-NOT:     %{{.*}} = phi <vscale x 4 x i1>
179; CHECK-NOTF:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
180; CHECK-NOTF:         %[[LOAD]] = load <vscale x 4 x i32>
181; CHECK-NOTF:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
182; CHECK-NOTF:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
183; CHECK-NOTF:         store <vscale x 4 x i32> %[[ADD]]
184
185; CHECK-TF-NORED-LABEL: @add_recur
186; CHECK-TF-NORED:       entry:
187; CHECK-TF-NORED:         %[[PRE:.*]] = load i32, ptr %src, align 4
188; CHECK-TF-NORED:       vector.ph:
189; CHECK-TF-NORED:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
190; CHECK-TF-NORED:       vector.body:
191; CHECK-TF-NORED:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
192; CHECK-TF-NORED:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
193; CHECK-TF-NORED:         %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
194; CHECK-TF-NORED:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
195; CHECK-TF-NORED:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
196; CHECK-TF-NORED:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]])
197
198; CHECK-TF-NOREC-LABEL: @add_recur
199; CHECK-TF-NOREC:       entry:
200; CHECK-TF-NOREC:         %[[PRE:.*]] = load i32, ptr %src, align 4
201; CHECK-TF-NOREC:       vector.ph:
202; CHECK-TF-NOREC:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
203; CHECK-TF-NOREC:       vector.body:
204; CHECK-TF-NOREC-NOT:     %{{.*}} = phi <vscale x 4 x i1>
205; CHECK-TF-NOREC:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
206; CHECK-TF-NOREC:         %[[LOAD]] = load <vscale x 4 x i32>
207; CHECK-TF-NOREC:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
208; CHECK-TF-NOREC:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
209; CHECK-TF-NOREC:         store <vscale x 4 x i32> %[[ADD]]
210
211; CHECK-TF-NOREV-LABEL: @add_recur
212; CHECK-TF-NOREV:       entry:
213; CHECK-TF-NOREV:         %[[PRE:.*]] = load i32, ptr %src, align 4
214; CHECK-TF-NOREV:       vector.ph:
215; CHECK-TF-NOREV:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
216; CHECK-TF-NOREV:       vector.body:
217; CHECK-TF-NOREV:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
218; CHECK-TF-NOREV:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
219; CHECK-TF-NOREV:         %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
220; CHECK-TF-NOREV:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
221; CHECK-TF-NOREV:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
222; CHECK-TF-NOREV:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]])
223
224; CHECK-TF-LABEL: @add_recur
225; CHECK-TF:       entry:
226; CHECK-TF:         %[[PRE:.*]] = load i32, ptr %src, align 4
227; CHECK-TF:       vector.ph:
228; CHECK-TF:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
229; CHECK-TF:       vector.body:
230; CHECK-TF:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1>
231; CHECK-TF:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
232; CHECK-TF:         %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]]
233; CHECK-TF:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
234; CHECK-TF:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
235; CHECK-TF:         call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]])
236
237; CHECK-TF-ONLYRED-LABEL: @add_recur
238; CHECK-TF-ONLYRED:       entry:
239; CHECK-TF-ONLYRED:         %[[PRE:.*]] = load i32, ptr %src, align 4
240; CHECK-TF-ONLYRED:       vector.ph:
241; CHECK-TF-ONLYRED:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
242; CHECK-TF-ONLYRED:       vector.body:
243; CHECK-TF-ONLYRED-NOT:     %{{.*}} = phi <vscale x 4 x i1>
244; CHECK-TF-ONLYRED:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
245; CHECK-TF-ONLYRED:         %[[LOAD]] = load <vscale x 4 x i32>
246; CHECK-TF-ONLYRED:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
247; CHECK-TF-ONLYRED:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
248; CHECK-TF-ONLYRED:         store <vscale x 4 x i32> %[[ADD]]
249
250; CHECK-NEOVERSE-V1-LABEL: @add_recur
251; CHECK-NEOVERSE-V1:       entry:
252; CHECK-NEOVERSE-V1:         %[[PRE:.*]] = load i32, ptr %src, align 4
253; CHECK-NEOVERSE-V1:       vector.ph:
254; CHECK-NEOVERSE-V1:         %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]]
255; CHECK-NEOVERSE-V1:       vector.body:
256; CHECK-NEOVERSE-V1-NOT:     %{{.*}} = phi <vscale x 4 x i1>
257; CHECK-NEOVERSE-V1:         %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ]
258; CHECK-NEOVERSE-V1:         %[[LOAD]] = load <vscale x 4 x i32>
259; CHECK-NEOVERSE-V1:         %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1)
260; CHECK-NEOVERSE-V1:         %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]]
261; CHECK-NEOVERSE-V1:         store <vscale x 4 x i32> %[[ADD]]
262
263entry:
264  %.pre = load i32, ptr %src, align 4
265  br label %for.body
266
267for.body:                                         ; preds = %entry, %for.body
268  %0 = phi i32 [ %1, %for.body ], [ %.pre, %entry ]
269  %i.010 = phi i64 [ %add, %for.body ], [ 0, %entry ]
270  %add = add nuw nsw i64 %i.010, 1
271  %arrayidx1 = getelementptr inbounds i32, ptr %src, i64 %add
272  %1 = load i32, ptr %arrayidx1, align 4
273  %add2 = add nsw i32 %1, %0
274  %arrayidx3 = getelementptr inbounds i32, ptr %dst, i64 %i.010
275  store i32 %add2, ptr %arrayidx3, align 4
276  %exitcond.not = icmp eq i64 %add, %n
277  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
278
279for.end:                                          ; preds = %for.body
280  ret void
281}
282
283define void @interleave(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
284; CHECK-NOTF-LABEL: @interleave(
285; CHECK-NOTF:       vector.body:
286; CHECK-NOTF:         %[[LOAD:.*]] = load <8 x float>, ptr
287; CHECK-NOTF:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
288; CHECK-NOTF:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
289
290; CHECK-TF-LABEL: @interleave(
291; CHECK-TF:       vector.body:
292; CHECK-TF:         %[[LOAD:.*]] = load <8 x float>, ptr
293; CHECK-TF:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
294; CHECK-TF:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
295
296; CHECK-TF-NORED-LABEL: @interleave(
297; CHECK-TF-NORED:       vector.body:
298; CHECK-TF-NORED:         %[[LOAD:.*]] = load <8 x float>, ptr
299; CHECK-TF-NORED:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
300; CHECK-TF-NORED:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
301
302; CHECK-TF-NOREC-LABEL: @interleave(
303; CHECK-TF-NOREC:       vector.body:
304; CHECK-TF-NOREC:         %[[LOAD:.*]] = load <8 x float>, ptr
305; CHECK-TF-NOREC:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
306; CHECK-TF-NOREC:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
307
308; CHECK-TF-NOREV-LABEL: @interleave(
309; CHECK-TF-NOREV:       vector.body:
310; CHECK-TF-NOREV:         %[[LOAD:.*]] = load <8 x float>, ptr
311; CHECK-TF-NOREV:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
312; CHECK-TF-NOREV:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
313
314; CHECK-NEOVERSE-V1-LABEL: @interleave(
315; CHECK-NEOVERSE-V1:       vector.body:
316; CHECK-NEOVERSE-V1:         %[[LOAD:.*]] = load <8 x float>, ptr
317; CHECK-NEOVERSE-V1:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
318; CHECK-NEOVERSE-V1:         %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
319
320entry:
321  br label %for.body
322
323for.body:                                         ; preds = %entry, %for.body
324  %i.021 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
325  %mul = shl nuw nsw i64 %i.021, 1
326  %arrayidx = getelementptr inbounds float, ptr %src, i64 %mul
327  %0 = load float, ptr %arrayidx, align 4
328  %mul1 = mul nuw nsw i64 %i.021, 3
329  %arrayidx2 = getelementptr inbounds float, ptr %dst, i64 %mul1
330  store float %0, ptr %arrayidx2, align 4
331  %add = or disjoint i64 %mul, 1
332  %arrayidx4 = getelementptr inbounds float, ptr %src, i64 %add
333  %1 = load float, ptr %arrayidx4, align 4
334  %add6 = add nuw nsw i64 %mul1, 1
335  %arrayidx7 = getelementptr inbounds float, ptr %dst, i64 %add6
336  store float %1, ptr %arrayidx7, align 4
337  %add9 = add nuw nsw i64 %mul1, 2
338  %arrayidx10 = getelementptr inbounds float, ptr %dst, i64 %add9
339  store float 3.000000e+00, ptr %arrayidx10, align 4
340  %inc = add nuw nsw i64 %i.021, 1
341  %exitcond.not = icmp eq i64 %inc, %n
342  br i1 %exitcond.not, label %for.end, label %for.body
343
344for.end:                                          ; preds = %for.body, %entry
345  ret void
346}
347
348define void @reverse(ptr noalias %dst, ptr noalias %src) #0 {
349; CHECK-NOTF-LABEL: @reverse(
350; CHECK-NOTF:       vector.body:
351; CHECK-NOTF-NOT:     %{{.*}} = phi <vscale x 4 x i1>
352; CHECK-NOTF:         %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
353; CHECK-NOTF:         %{{.*}} = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]])
354
355; CHECK-TF-NOREV-LABEL: @reverse(
356; CHECK-TF-NOREV:       vector.body:
357; CHECK-TF-NOREV-NOT:     %{{.*}} = phi <vscale x 4 x i1>
358; CHECK-TF-NOREV:         %[[LOAD:.*]] = load <vscale x 2 x double>, ptr
359; CHECK-TF-NOREV:         %{{.*}} = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]])
360
361; CHECK-TF-LABEL: @reverse(
362; CHECK-TF:       vector.body:
363; CHECK-TF:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
364; CHECK-TF:         %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
365; CHECK-TF:         %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse
366
367; CHECK-TF-NORED-LABEL: @reverse(
368; CHECK-TF-NORED:       vector.body:
369; CHECK-TF-NORED:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
370; CHECK-TF-NORED:         %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
371; CHECK-TF-NORED:         %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse
372
373; CHECK-TF-NOREC-LABEL: @reverse(
374; CHECK-TF-NOREC:       vector.body:
375; CHECK-TF-NOREC:         %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1>
376; CHECK-TF-NOREC:         %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]])
377; CHECK-TF-NOREC:         %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse
378
379entry:
380  br label %for.body
381
382for.body:                                         ; preds = %entry, %for.body
383  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
384  %arrayidx = getelementptr inbounds double, ptr %src, i64 %indvars.iv
385  %0 = load double, ptr %arrayidx, align 8
386  %add = fadd double %0, 1.000000e+00
387  %arrayidx2 = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
388  store double %add, ptr %arrayidx2, align 8
389  %indvars.iv.next = add nsw i64 %indvars.iv, -1
390  %cmp.not = icmp eq i64 %indvars.iv, 0
391  br i1 %cmp.not, label %for.end, label %for.body
392
393for.end:                                          ; preds = %for.body
394  ret void
395}
396
397attributes #0 = { "target-features"="+sve" }
398
399!0 = distinct !{!0, !1, !2, !3, !4}
400!1 = !{!"llvm.loop.vectorize.width", i32 4}
401!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
402!3 = !{!"llvm.loop.interleave.count", i32 1}
403!4 = !{!"llvm.loop.vectorize.enable", i1 true}
404