xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll (revision c836b8956d393f98e0d4e136799a33f1bd06e5f5)
1; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s
2; REQUIRES: asserts
3
4target triple = "aarch64-unknown-linux-gnu"
5
6;; Based on the following C code:
7;;
8;; void simple_histogram(int *buckets, unsigned *indices, int N) {
9;;   for (int i = 0; i < N; ++i)
10;;     buckets[indices[i]]++;
11;; }
12
13;; Check that the scalar plan contains the original instructions.
14; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' {
15; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF
16; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count
17; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count
18; CHECK-EMPTY:
19; CHECK-NEXT: ir-bb<entry>:
20; CHECK-NEXT: Successor(s): vector.ph
21; CHECK-EMPTY:
22; CHECK-NEXT: vector.ph:
23; CHECK-NEXT: Successor(s): vector loop
24; CHECK-EMPTY:
25; CHECK-NEXT: <x1> vector loop: {
26; CHECK-NEXT:   vector.body:
27; CHECK-NEXT:     EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]]
28; CHECK-NEXT:     [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>
29; CHECK-NEXT:     CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
30; CHECK-NEXT:     CLONE [[IDX:.*]] = load [[GEP_IDX]]
31; CHECK-NEXT:     CLONE [[EXT_IDX:.*]] = zext [[IDX]]
32; CHECK-NEXT:     CLONE [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]]
33; CHECK-NEXT:     CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]]
34; CHECK-NEXT:     CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1>
35; CHECK-NEXT:     CLONE store [[UPDATE]], [[GEP_BUCKET]]
36; CHECK-NEXT:     EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]]
37; CHECK-NEXT:     EMIT branch-on-count [[IV_NEXT]], [[VTC]]
38; CHECK-NEXT:   No successors
39; CHECK-NEXT: }
40; CHECK-NEXT: Successor(s): middle.block
41; CHECK-EMPTY:
42; CHECK-NEXT: middle.block:
43; CHECK-NEXT:   EMIT [[TC_CHECK:.*]] = icmp eq [[OTC:.*]], [[VTC]]
44; CHECK-NEXT:   EMIT branch-on-cond [[TC_CHECK]]
45; CHECK-NEXT: Successor(s): ir-bb<for.exit>, scalar.ph
46; CHECK-EMPTY:
47; CHECK-NEXT: scalar.ph:
48; CHECK-NEXT:   EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0>
49; CHECK-NEXT: Successor(s): ir-bb<for.body>
50; CHECK-EMPTY:
51; CHECK-NEXT: ir-bb<for.body>:
52; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
53; CHECK:        IR   %exitcond = icmp eq i64 %iv.next, %N
54; CHECK-NEXT: No successors
55; CHECK-EMPTY:
56; CHECK-NEXT: ir-bb<for.exit>:
57; CHECK-NEXT: No successors
58; CHECK-NEXT: }
59
60;; Check that the vectorized plan contains a histogram recipe instead.
61; CHECK: VPlan 'Initial VPlan for VF={vscale x 2,vscale x 4},UF>=1' {
62; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF
63; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count
64; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count
65; CHECK-EMPTY:
66; CHECK-NEXT: ir-bb<entry>:
67; CHECK-NEXT: Successor(s): vector.ph
68; CHECK-EMPTY:
69; CHECK-NEXT: vector.ph:
70; CHECK-NEXT: Successor(s): vector loop
71; CHECK-EMPTY:
72; CHECK-NEXT: <x1> vector loop: {
73; CHECK-NEXT:   vector.body:
74; CHECK-NEXT:     EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]]
75; CHECK-NEXT:     [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>
76; CHECK-NEXT:     CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
77; CHECK-NEXT:     [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]]
78; CHECK-NEXT:     WIDEN [[IDX:.*]] = load [[VECP_IDX]]
79; CHECK-NEXT:     WIDEN-CAST [[EXT_IDX:.*]] = zext [[IDX]] to i64
80; CHECK-NEXT:     WIDEN-GEP Inv[Var] [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]]
81; CHECK-NEXT:     WIDEN-HISTOGRAM buckets: [[GEP_BUCKET]], inc: ir<1>
82; CHECK-NEXT:     EMIT [[IV_NEXT]] = add nuw [[IV]], [[VFxUF]]
83; CHECK-NEXT:     EMIT branch-on-count [[IV_NEXT]], [[VTC]]
84; CHECK-NEXT:   No successors
85; CHECK-NEXT: }
86; CHECK-NEXT: Successor(s): middle.block
87; CHECK-EMPTY:
88; CHECK-NEXT: middle.block:
89; CHECK-NEXT:   EMIT [[TC_CHECK:.*]] = icmp eq [[OTC]], [[VTC]]
90; CHECK-NEXT:   EMIT branch-on-cond [[TC_CHECK]]
91; CHECK-NEXT: Successor(s): ir-bb<for.exit>, scalar.ph
92; CHECK-EMPTY:
93; CHECK-NEXT: scalar.ph:
94; CHECK-NEXT:   EMIT vp<[[RESUME:%.+]]> = resume-phi [[VTC]], ir<0>
95; CHECK-NEXT: Successor(s): ir-bb<for.body>
96; CHECK-EMPTY:
97; CHECK-NEXT: ir-bb<for.body>:
98; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<[[RESUME]]> from scalar.ph)
99; CHECK:        IR   %exitcond = icmp eq i64 %iv.next, %N
100; CHECK-NEXT: No successors
101; CHECK-EMPTY:
102; CHECK-NEXT: ir-bb<for.exit>:
103; CHECK-NEXT: No successors
104; CHECK-NEXT: }
105
106define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) {
107entry:
108  br label %for.body
109
110for.body:
111  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
112  %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
113  %l.idx = load i32, ptr %gep.indices, align 4
114  %idxprom1 = zext i32 %l.idx to i64
115  %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
116  %l.bucket = load i32, ptr %gep.bucket, align 4
117  %inc = add nsw i32 %l.bucket, 1
118  store i32 %inc, ptr %gep.bucket, align 4
119  %iv.next = add nuw nsw i64 %iv, 1
120  %exitcond = icmp eq i64 %iv.next, %N
121  br i1 %exitcond, label %for.exit, label %for.body
122
123for.exit:
124  ret void
125}
126