1; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF 2; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF 3; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S | FileCheck %s -check-prefix=CHECK-NOTF 4; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF 5; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF 6; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default+reductions+recurrences+reverse | FileCheck %s -check-prefix=CHECK-TF 7; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED 8; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC 9; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV 10; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -sve-tail-folding=reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED 11; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -sve-tail-folding=default -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 12; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 -sve-tail-folding=default | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 13; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding-insn-threshold=0 -S -mcpu=neoverse-v1 | FileCheck %s -check-prefix=CHECK-NEOVERSE-V1 14 15target triple = "aarch64-unknown-linux-gnu" 16 17define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { 18; CHECK-NOTF-LABEL: @simple_memset( 19; CHECK-NOTF: vector.ph: 20; CHECK-NOTF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 21; CHECK-NOTF: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 22; CHECK-NOTF: vector.body: 23; CHECK-NOTF-NOT: %{{.*}} = phi <vscale x 4 x i1> 24; CHECK-NOTF: store <vscale x 4 x i32> %[[SPLAT]], ptr 25 26; CHECK-TF-NORED-LABEL: @simple_memset( 27; CHECK-TF-NORED: vector.ph: 28; CHECK-TF-NORED: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 29; CHECK-TF-NORED: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 30; CHECK-TF-NORED: vector.body: 31; CHECK-TF-NORED: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 32; CHECK-TF-NORED: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] 33 34; CHECK-TF-NOREC-LABEL: @simple_memset( 35; CHECK-TF-NOREC: vector.ph: 36; CHECK-TF-NOREC: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 37; CHECK-TF-NOREC: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 38; CHECK-TF-NOREC: vector.body: 39; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 40; CHECK-TF-NOREC: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] 41 42; CHECK-TF-NOREV-LABEL: @simple_memset( 43; CHECK-TF-NOREV: vector.ph: 44; CHECK-TF-NOREV: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 45; CHECK-TF-NOREV: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 46; CHECK-TF-NOREV: vector.body: 47; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 48; CHECK-TF-NOREV: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] 49 50; CHECK-TF-LABEL: @simple_memset( 51; CHECK-TF: vector.ph: 52; CHECK-TF: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 53; CHECK-TF: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 54; CHECK-TF: vector.body: 55; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 56; CHECK-TF: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] 57 58; CHECK-TF-ONLYRED-LABEL: @simple_memset( 59; CHECK-TF-ONLYRED: vector.ph: 60; CHECK-TF-ONLYRED: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 61; CHECK-TF-ONLYRED: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 62; CHECK-TF-ONLYRED: vector.body: 63; CHECK-TF-ONLYRED-NOT: %{{.*}} = phi <vscale x 4 x i1> 64; CHECK-TF-ONLYRED: store <vscale x 4 x i32> %[[SPLAT]], ptr 65 66; CHECK-NEOVERSE-V1-LABEL: @simple_memset( 67; CHECK-NEOVERSE-V1: vector.ph: 68; CHECK-NEOVERSE-V1: %[[INSERT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %val, i64 0 69; CHECK-NEOVERSE-V1: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 70; CHECK-NEOVERSE-V1: vector.body: 71; CHECK-NEOVERSE-V1: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 72; CHECK-NEOVERSE-V1: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[SPLAT]], {{.*}} %[[ACTIVE_LANE_MASK]] 73 74entry: 75 br label %while.body 76 77while.body: ; preds = %while.body, %entry 78 %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] 79 %gep = getelementptr i32, ptr %ptr, i64 %index 80 store i32 %val, ptr %gep 81 %index.next = add nsw i64 %index, 1 82 %cmp10 = icmp ult i64 %index.next, %n 83 br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 84 85while.end.loopexit: ; preds = %while.body 86 ret void 87} 88 89define float @fadd_red_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { 90; CHECK-NOTF-LABEL: @fadd_red_fast 91; CHECK-NOTF: vector.body: 92; CHECK-NOTF-NOT: %{{.*}} = phi <vscale x 4 x i1> 93; CHECK-NOTF: %[[LOAD:.*]] = load <vscale x 4 x float> 94; CHECK-NOTF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 95; CHECK-NOTF: middle.block: 96; CHECK-NOTF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]]) 97 98; CHECK-TF-NORED-LABEL: @fadd_red_fast 99; CHECK-TF-NORED: vector.body: 100; CHECK-TF-NORED-NOT: %{{.*}} = phi <vscale x 4 x i1> 101; CHECK-TF-NORED: %[[LOAD:.*]] = load <vscale x 4 x float> 102; CHECK-TF-NORED: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 103; CHECK-TF-NORED: middle.block: 104; CHECK-TF-NORED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]]) 105 106; CHECK-TF-NOREC-LABEL: @fadd_red_fast 107; CHECK-TF-NOREC: vector.body: 108; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 109; CHECK-TF-NOREC: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> 110; CHECK-TF-NOREC: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 111; CHECK-TF-NOREC: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 112; CHECK-TF-NOREC: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]] 113; CHECK-TF-NOREC: middle.block: 114; CHECK-TF-NOREC-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]]) 115 116; CHECK-TF-NOREV-LABEL: @fadd_red_fast 117; CHECK-TF-NOREV: vector.body: 118; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 119; CHECK-TF-NOREV: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> 120; CHECK-TF-NOREV: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 121; CHECK-TF-NOREV: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 122; CHECK-TF-NOREV: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]] 123; CHECK-TF-NOREV: middle.block: 124; CHECK-TF-NOREV-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]]) 125 126; CHECK-TF-LABEL: @fadd_red_fast 127; CHECK-TF: vector.body: 128; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 129; CHECK-TF: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> 130; CHECK-TF: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 131; CHECK-TF: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 132; CHECK-TF: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]] 133; CHECK-TF: middle.block: 134; CHECK-TF-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]]) 135 136; CHECK-TF-ONLYRED-LABEL: @fadd_red_fast 137; CHECK-TF-ONLYRED: vector.body: 138; CHECK-TF-ONLYRED: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 139; CHECK-TF-ONLYRED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> 140; CHECK-TF-ONLYRED: %[[LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 141; CHECK-TF-ONLYRED: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 142; CHECK-TF-ONLYRED: %[[SEL:.*]] = select fast <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]], <vscale x 4 x float> %[[ADD]], <vscale x 4 x float> %[[VEC_PHI]] 143; CHECK-TF-ONLYRED: middle.block: 144; CHECK-TF-ONLYRED-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[SEL]]) 145 146; CHECK-NEOVERSE-V1-LABEL: @fadd_red_fast 147; CHECK-NEOVERSE-V1: vector.body: 148; CHECK-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1> 149; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load <vscale x 4 x float> 150; CHECK-NEOVERSE-V1: %[[ADD:.*]] = fadd fast <vscale x 4 x float> %[[LOAD]] 151; CHECK-NEOVERSE-V1: middle.block: 152; CHECK-NEOVERSE-V1-NEXT: call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %[[ADD]]) 153 154entry: 155 br label %for.body 156 157for.body: 158 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 159 %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] 160 %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 161 %0 = load float, ptr %arrayidx, align 4 162 %add = fadd fast float %0, %sum.07 163 %iv.next = add nuw nsw i64 %iv, 1 164 %exitcond.not = icmp eq i64 %iv.next, %n 165 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 166 167for.end: 168 ret float %add 169} 170 171define void @add_recur(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { 172; CHECK-NOTF-LABEL: @add_recur 173; CHECK-NOTF: entry: 174; CHECK-NOTF: %[[PRE:.*]] = load i32, ptr %src, align 4 175; CHECK-NOTF: vector.ph: 176; CHECK-NOTF: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 177; CHECK-NOTF: vector.body: 178; CHECK-NOTF-NOT: %{{.*}} = phi <vscale x 4 x i1> 179; CHECK-NOTF: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 180; CHECK-NOTF: %[[LOAD]] = load <vscale x 4 x i32> 181; CHECK-NOTF: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 182; CHECK-NOTF: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 183; CHECK-NOTF: store <vscale x 4 x i32> %[[ADD]] 184 185; CHECK-TF-NORED-LABEL: @add_recur 186; CHECK-TF-NORED: entry: 187; CHECK-TF-NORED: %[[PRE:.*]] = load i32, ptr %src, align 4 188; CHECK-TF-NORED: vector.ph: 189; CHECK-TF-NORED: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 190; CHECK-TF-NORED: vector.body: 191; CHECK-TF-NORED: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 192; CHECK-TF-NORED: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 193; CHECK-TF-NORED: %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 194; CHECK-TF-NORED: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 195; CHECK-TF-NORED: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 196; CHECK-TF-NORED: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]]) 197 198; CHECK-TF-NOREC-LABEL: @add_recur 199; CHECK-TF-NOREC: entry: 200; CHECK-TF-NOREC: %[[PRE:.*]] = load i32, ptr %src, align 4 201; CHECK-TF-NOREC: vector.ph: 202; CHECK-TF-NOREC: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 203; CHECK-TF-NOREC: vector.body: 204; CHECK-TF-NOREC-NOT: %{{.*}} = phi <vscale x 4 x i1> 205; CHECK-TF-NOREC: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 206; CHECK-TF-NOREC: %[[LOAD]] = load <vscale x 4 x i32> 207; CHECK-TF-NOREC: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 208; CHECK-TF-NOREC: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 209; CHECK-TF-NOREC: store <vscale x 4 x i32> %[[ADD]] 210 211; CHECK-TF-NOREV-LABEL: @add_recur 212; CHECK-TF-NOREV: entry: 213; CHECK-TF-NOREV: %[[PRE:.*]] = load i32, ptr %src, align 4 214; CHECK-TF-NOREV: vector.ph: 215; CHECK-TF-NOREV: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 216; CHECK-TF-NOREV: vector.body: 217; CHECK-TF-NOREV: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 218; CHECK-TF-NOREV: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 219; CHECK-TF-NOREV: %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 220; CHECK-TF-NOREV: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 221; CHECK-TF-NOREV: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 222; CHECK-TF-NOREV: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]]) 223 224; CHECK-TF-LABEL: @add_recur 225; CHECK-TF: entry: 226; CHECK-TF: %[[PRE:.*]] = load i32, ptr %src, align 4 227; CHECK-TF: vector.ph: 228; CHECK-TF: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 229; CHECK-TF: vector.body: 230; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 4 x i1> 231; CHECK-TF: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 232; CHECK-TF: %[[LOAD]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0({{.*}} %[[ACTIVE_LANE_MASK]] 233; CHECK-TF: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 234; CHECK-TF: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 235; CHECK-TF: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %[[ADD]], {{.*}} <vscale x 4 x i1> %[[ACTIVE_LANE_MASK]]) 236 237; CHECK-TF-ONLYRED-LABEL: @add_recur 238; CHECK-TF-ONLYRED: entry: 239; CHECK-TF-ONLYRED: %[[PRE:.*]] = load i32, ptr %src, align 4 240; CHECK-TF-ONLYRED: vector.ph: 241; CHECK-TF-ONLYRED: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 242; CHECK-TF-ONLYRED: vector.body: 243; CHECK-TF-ONLYRED-NOT: %{{.*}} = phi <vscale x 4 x i1> 244; CHECK-TF-ONLYRED: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 245; CHECK-TF-ONLYRED: %[[LOAD]] = load <vscale x 4 x i32> 246; CHECK-TF-ONLYRED: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 247; CHECK-TF-ONLYRED: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 248; CHECK-TF-ONLYRED: store <vscale x 4 x i32> %[[ADD]] 249 250; CHECK-NEOVERSE-V1-LABEL: @add_recur 251; CHECK-NEOVERSE-V1: entry: 252; CHECK-NEOVERSE-V1: %[[PRE:.*]] = load i32, ptr %src, align 4 253; CHECK-NEOVERSE-V1: vector.ph: 254; CHECK-NEOVERSE-V1: %[[RECUR_INIT:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[PRE]] 255; CHECK-NEOVERSE-V1: vector.body: 256; CHECK-NEOVERSE-V1-NOT: %{{.*}} = phi <vscale x 4 x i1> 257; CHECK-NEOVERSE-V1: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i32> [ %[[RECUR_INIT]], %vector.ph ], [ %[[LOAD:.*]], %vector.body ] 258; CHECK-NEOVERSE-V1: %[[LOAD]] = load <vscale x 4 x i32> 259; CHECK-NEOVERSE-V1: %[[SPLICE:.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> %[[VECTOR_RECUR]], <vscale x 4 x i32> %[[LOAD]], i32 -1) 260; CHECK-NEOVERSE-V1: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[LOAD]], %[[SPLICE]] 261; CHECK-NEOVERSE-V1: store <vscale x 4 x i32> %[[ADD]] 262 263entry: 264 %.pre = load i32, ptr %src, align 4 265 br label %for.body 266 267for.body: ; preds = %entry, %for.body 268 %0 = phi i32 [ %1, %for.body ], [ %.pre, %entry ] 269 %i.010 = phi i64 [ %add, %for.body ], [ 0, %entry ] 270 %add = add nuw nsw i64 %i.010, 1 271 %arrayidx1 = getelementptr inbounds i32, ptr %src, i64 %add 272 %1 = load i32, ptr %arrayidx1, align 4 273 %add2 = add nsw i32 %1, %0 274 %arrayidx3 = getelementptr inbounds i32, ptr %dst, i64 %i.010 275 store i32 %add2, ptr %arrayidx3, align 4 276 %exitcond.not = icmp eq i64 %add, %n 277 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 278 279for.end: ; preds = %for.body 280 ret void 281} 282 283define void @interleave(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { 284; CHECK-NOTF-LABEL: @interleave( 285; CHECK-NOTF: vector.body: 286; CHECK-NOTF: %[[LOAD:.*]] = load <8 x float>, ptr 287; CHECK-NOTF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 288; CHECK-NOTF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 289 290; CHECK-TF-LABEL: @interleave( 291; CHECK-TF: vector.body: 292; CHECK-TF: %[[LOAD:.*]] = load <8 x float>, ptr 293; CHECK-TF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 294; CHECK-TF: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 295 296; CHECK-TF-NORED-LABEL: @interleave( 297; CHECK-TF-NORED: vector.body: 298; CHECK-TF-NORED: %[[LOAD:.*]] = load <8 x float>, ptr 299; CHECK-TF-NORED: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 300; CHECK-TF-NORED: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 301 302; CHECK-TF-NOREC-LABEL: @interleave( 303; CHECK-TF-NOREC: vector.body: 304; CHECK-TF-NOREC: %[[LOAD:.*]] = load <8 x float>, ptr 305; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 306; CHECK-TF-NOREC: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 307 308; CHECK-TF-NOREV-LABEL: @interleave( 309; CHECK-TF-NOREV: vector.body: 310; CHECK-TF-NOREV: %[[LOAD:.*]] = load <8 x float>, ptr 311; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 312; CHECK-TF-NOREV: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 313 314; CHECK-NEOVERSE-V1-LABEL: @interleave( 315; CHECK-NEOVERSE-V1: vector.body: 316; CHECK-NEOVERSE-V1: %[[LOAD:.*]] = load <8 x float>, ptr 317; CHECK-NEOVERSE-V1: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 318; CHECK-NEOVERSE-V1: %{{.*}} = shufflevector <8 x float> %[[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 319 320entry: 321 br label %for.body 322 323for.body: ; preds = %entry, %for.body 324 %i.021 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 325 %mul = shl nuw nsw i64 %i.021, 1 326 %arrayidx = getelementptr inbounds float, ptr %src, i64 %mul 327 %0 = load float, ptr %arrayidx, align 4 328 %mul1 = mul nuw nsw i64 %i.021, 3 329 %arrayidx2 = getelementptr inbounds float, ptr %dst, i64 %mul1 330 store float %0, ptr %arrayidx2, align 4 331 %add = or disjoint i64 %mul, 1 332 %arrayidx4 = getelementptr inbounds float, ptr %src, i64 %add 333 %1 = load float, ptr %arrayidx4, align 4 334 %add6 = add nuw nsw i64 %mul1, 1 335 %arrayidx7 = getelementptr inbounds float, ptr %dst, i64 %add6 336 store float %1, ptr %arrayidx7, align 4 337 %add9 = add nuw nsw i64 %mul1, 2 338 %arrayidx10 = getelementptr inbounds float, ptr %dst, i64 %add9 339 store float 3.000000e+00, ptr %arrayidx10, align 4 340 %inc = add nuw nsw i64 %i.021, 1 341 %exitcond.not = icmp eq i64 %inc, %n 342 br i1 %exitcond.not, label %for.end, label %for.body 343 344for.end: ; preds = %for.body, %entry 345 ret void 346} 347 348define void @reverse(ptr noalias %dst, ptr noalias %src) #0 { 349; CHECK-NOTF-LABEL: @reverse( 350; CHECK-NOTF: vector.body: 351; CHECK-NOTF-NOT: %{{.*}} = phi <vscale x 4 x i1> 352; CHECK-NOTF: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr 353; CHECK-NOTF: %{{.*}} = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]]) 354 355; CHECK-TF-NOREV-LABEL: @reverse( 356; CHECK-TF-NOREV: vector.body: 357; CHECK-TF-NOREV-NOT: %{{.*}} = phi <vscale x 4 x i1> 358; CHECK-TF-NOREV: %[[LOAD:.*]] = load <vscale x 2 x double>, ptr 359; CHECK-TF-NOREV: %{{.*}} = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> %[[LOAD]]) 360 361; CHECK-TF-LABEL: @reverse( 362; CHECK-TF: vector.body: 363; CHECK-TF: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1> 364; CHECK-TF: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]]) 365; CHECK-TF: %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse 366 367; CHECK-TF-NORED-LABEL: @reverse( 368; CHECK-TF-NORED: vector.body: 369; CHECK-TF-NORED: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1> 370; CHECK-TF-NORED: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]]) 371; CHECK-TF-NORED: %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse 372 373; CHECK-TF-NOREC-LABEL: @reverse( 374; CHECK-TF-NOREC: vector.body: 375; CHECK-TF-NOREC: %[[ACTIVE_LANE_MASK:.*]] = phi <vscale x 2 x i1> 376; CHECK-TF-NOREC: %[[REVERSE_MASK:.*]] = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %[[ACTIVE_LANE_MASK]]) 377; CHECK-TF-NOREC: %[[MASKED_LOAD:.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0({{.*}} <vscale x 2 x i1> %reverse 378 379entry: 380 br label %for.body 381 382for.body: ; preds = %entry, %for.body 383 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ] 384 %arrayidx = getelementptr inbounds double, ptr %src, i64 %indvars.iv 385 %0 = load double, ptr %arrayidx, align 8 386 %add = fadd double %0, 1.000000e+00 387 %arrayidx2 = getelementptr inbounds double, ptr %dst, i64 %indvars.iv 388 store double %add, ptr %arrayidx2, align 8 389 %indvars.iv.next = add nsw i64 %indvars.iv, -1 390 %cmp.not = icmp eq i64 %indvars.iv, 0 391 br i1 %cmp.not, label %for.end, label %for.body 392 393for.end: ; preds = %for.body 394 ret void 395} 396 397attributes #0 = { "target-features"="+sve" } 398 399!0 = distinct !{!0, !1, !2, !3, !4} 400!1 = !{!"llvm.loop.vectorize.width", i32 4} 401!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 402!3 = !{!"llvm.loop.interleave.count", i32 1} 403!4 = !{!"llvm.loop.vectorize.enable", i1 true} 404