1; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s 2; REQUIRES: asserts 3 4target triple = "aarch64-unknown-linux-gnu" 5 6;; Based on the following C code: 7;; 8;; void simple_histogram(int *buckets, unsigned *indices, int N) { 9;; for (int i = 0; i < N; ++i) 10;; buckets[indices[i]]++; 11;; } 12 13;; Check that the scalar plan contains the original instructions. 14; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { 15; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF 16; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count 17; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count 18; CHECK-EMPTY: 19; CHECK-NEXT: ir-bb<entry>: 20; CHECK-NEXT: Successor(s): vector.ph 21; CHECK-EMPTY: 22; CHECK-NEXT: vector.ph: 23; CHECK-NEXT: Successor(s): vector loop 24; CHECK-EMPTY: 25; CHECK-NEXT: <x1> vector loop: { 26; CHECK-NEXT: vector.body: 27; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] 28; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1> 29; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] 30; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]] 31; CHECK-NEXT: CLONE [[EXT_IDX:.*]] = zext [[IDX]] 32; CHECK-NEXT: CLONE [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] 33; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]] 34; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1> 35; CHECK-NEXT: CLONE store [[UPDATE]], [[GEP_BUCKET]] 36; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] 37; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] 38; CHECK-NEXT: No successors 39; CHECK-NEXT: } 40; CHECK-NEXT: Successor(s): middle.block 41; CHECK-EMPTY: 42; CHECK-NEXT: middle.block: 43; CHECK-NEXT: EMIT [[TC_CHECK:.*]] = icmp eq [[OTC:.*]], [[VTC]] 44; CHECK-NEXT: EMIT branch-on-cond [[TC_CHECK]] 45; CHECK-NEXT: Successor(s): ir-bb<for.exit>, scalar.ph 46; CHECK-EMPTY: 47; CHECK-NEXT: scalar.ph: 48; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0> 49; CHECK-NEXT: Successor(s): ir-bb<for.body> 50; CHECK-EMPTY: 51; CHECK-NEXT: ir-bb<for.body>: 52; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 53; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N 54; CHECK-NEXT: No successors 55; CHECK-EMPTY: 56; CHECK-NEXT: ir-bb<for.exit>: 57; CHECK-NEXT: No successors 58; CHECK-NEXT: } 59 60;; Check that the vectorized plan contains a histogram recipe instead. 61; CHECK: VPlan 'Initial VPlan for VF={vscale x 2,vscale x 4},UF>=1' { 62; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF 63; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count 64; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count 65; CHECK-EMPTY: 66; CHECK-NEXT: ir-bb<entry>: 67; CHECK-NEXT: Successor(s): vector.ph 68; CHECK-EMPTY: 69; CHECK-NEXT: vector.ph: 70; CHECK-NEXT: Successor(s): vector loop 71; CHECK-EMPTY: 72; CHECK-NEXT: <x1> vector loop: { 73; CHECK-NEXT: vector.body: 74; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]] 75; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1> 76; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]] 77; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]] 78; CHECK-NEXT: WIDEN [[IDX:.*]] = load [[VECP_IDX]] 79; CHECK-NEXT: WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64 80; CHECK-NEXT: WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]] 81; CHECK-NEXT: WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1> 82; CHECK-NEXT: EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]] 83; CHECK-NEXT: EMIT branch-on-count [[IV_NEXT]], [[VTC]] 84; CHECK-NEXT: No successors 85; CHECK-NEXT: } 86; CHECK-NEXT: Successor(s): middle.block 87; CHECK-EMPTY: 88; CHECK-NEXT: middle.block: 89; CHECK-NEXT: EMIT [[TC_CHECK:.*]] = icmp eq [[OTC]], [[VTC]] 90; CHECK-NEXT: EMIT branch-on-cond [[TC_CHECK]] 91; CHECK-NEXT: Successor(s): ir-bb<for.exit>, scalar.ph 92; CHECK-EMPTY: 93; CHECK-NEXT: scalar.ph: 94; CHECK-NEXT: EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0> 95; CHECK-NEXT: Successor(s): ir-bb<for.body> 96; CHECK-EMPTY: 97; CHECK-NEXT: ir-bb<for.body>: 98; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph) 99; CHECK: IR %exitcond = icmp eq i64 %iv.next, %N 100; CHECK-NEXT: No successors 101; CHECK-EMPTY: 102; CHECK-NEXT: ir-bb<for.exit>: 103; CHECK-NEXT: No successors 104; CHECK-NEXT: } 105 106define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) { 107entry: 108 br label %for.body 109 110for.body: 111 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 112 %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv 113 %l.idx = load i32, ptr %gep.indices, align 4 114 %idxprom1 = zext i32 %l.idx to i64 115 %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1 116 %l.bucket = load i32, ptr %gep.bucket, align 4 117 %inc = add nsw i32 %l.bucket, 1 118 store i32 %inc, ptr %gep.bucket, align 4 119 %iv.next = add nuw nsw i64 %iv, 1 120 %exitcond = icmp eq i64 %iv.next, %N 121 br i1 %exitcond, label %for.exit, label %for.body 122 123for.exit: 124 ret void 125} 126