xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll (revision c836b8956d393f98e0d4e136799a33f1bd06e5f5)
1; REQUIRES: asserts
2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
5
6; Test that the MaxVF for the following loop, that has no dependence distances,
7; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
8; (maximized bandwidth for i8 in the loop).
9define void @test0(ptr %a, ptr %b, ptr %c) #0 {
10; CHECK: LV: Checking a loop in 'test0'
11; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
12; CHECK_SCALABLE_ON: LV: Selecting VF: 16
13; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
14; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
15; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
16; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
17entry:
18  br label %loop
19
20loop:
21  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
22  %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv
23  %0 = load i32, ptr %arrayidx, align 4
24  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
25  %1 = load i8, ptr %arrayidx2, align 4
26  %zext = zext i8 %1 to i32
27  %add = add nsw i32 %zext, %0
28  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv
29  store i32 %add, ptr %arrayidx5, align 4
30  %iv.next = add nuw nsw i64 %iv, 1
31  %exitcond.not = icmp eq i64 %iv.next, 1024
32  br i1 %exitcond.not, label %exit, label %loop
33
34exit:
35  ret void
36}
37
38; Test that the MaxVF for the following loop, with a dependence distance
39; of 64 elements, is calculated as (maxvscale = 16) * 4.
40define void @test1(ptr %a, ptr %b) #0 {
41; CHECK: LV: Checking a loop in 'test1'
42; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
43; CHECK_SCALABLE_ON: LV: Selecting VF: 16
44; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
45; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
46; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
47; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
48entry:
49  br label %loop
50
51loop:
52  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
53  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
54  %0 = load i32, ptr %arrayidx, align 4
55  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
56  %1 = load i8, ptr %arrayidx2, align 4
57  %zext = zext i8 %1 to i32
58  %add = add nsw i32 %zext, %0
59  %2 = add nuw nsw i64 %iv, 64
60  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
61  store i32 %add, ptr %arrayidx5, align 4
62  %iv.next = add nuw nsw i64 %iv, 1
63  %exitcond.not = icmp eq i64 %iv.next, 1024
64  br i1 %exitcond.not, label %exit, label %loop
65
66exit:
67  ret void
68}
69
70; Test that the MaxVF for the following loop, with a dependence distance
71; of 32 elements, is calculated as (maxvscale = 16) * 2.
72define void @test2(ptr %a, ptr %b) #0 {
73; CHECK: LV: Checking a loop in 'test2'
74; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
75; CHECK_SCALABLE_ON: LV: Selecting VF: 16
76; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
77; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
78; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
79; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
80entry:
81  br label %loop
82
83loop:
84  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
85  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
86  %0 = load i32, ptr %arrayidx, align 4
87  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
88  %1 = load i8, ptr %arrayidx2, align 4
89  %zext = zext i8 %1 to i32
90  %add = add nsw i32 %zext, %0
91  %2 = add nuw nsw i64 %iv, 32
92  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
93  store i32 %add, ptr %arrayidx5, align 4
94  %iv.next = add nuw nsw i64 %iv, 1
95  %exitcond.not = icmp eq i64 %iv.next, 1024
96  br i1 %exitcond.not, label %exit, label %loop
97
98exit:
99  ret void
100}
101
102; Test that the MaxVF for the following loop, with a dependence distance
103; of 16 elements, is calculated as (maxvscale = 16) * 1.
104define void @test3(ptr %a, ptr %b) #0 {
105; CHECK: LV: Checking a loop in 'test3'
106; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
107; CHECK_SCALABLE_ON: LV: Selecting VF: 16
108; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
109; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
110; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
111; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
112entry:
113  br label %loop
114
115loop:
116  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
117  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
118  %0 = load i32, ptr %arrayidx, align 4
119  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv
120  %1 = load i8, ptr %arrayidx2, align 4
121  %zext = zext i8 %1 to i32
122  %add = add nsw i32 %zext, %0
123  %2 = add nuw nsw i64 %iv, 16
124  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
125  store i32 %add, ptr %arrayidx5, align 4
126  %iv.next = add nuw nsw i64 %iv, 1
127  %exitcond.not = icmp eq i64 %iv.next, 1024
128  br i1 %exitcond.not, label %exit, label %loop
129
130exit:
131  ret void
132}
133
134; Test the fallback mechanism when scalable vectors are not feasible due
135; to e.g. dependence distance.
136define void @test4(ptr %a, ptr %b) #0 {
137; CHECK: LV: Checking a loop in 'test4'
138; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
139; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
140; CHECK_SCALABLE_ON: LV: Selecting VF: 4
141; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
142; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
143; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
144; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
145entry:
146  br label %loop
147
148loop:
149  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
150  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
151  %0 = load i32, ptr %arrayidx, align 4
152  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
153  %1 = load i32, ptr %arrayidx2, align 4
154  %add = add nsw i32 %1, %0
155  %2 = add nuw nsw i64 %iv, 8
156  %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2
157  store i32 %add, ptr %arrayidx5, align 4
158  %iv.next = add nuw nsw i64 %iv, 1
159  %exitcond.not = icmp eq i64 %iv.next, 1024
160  br i1 %exitcond.not, label %exit, label %loop
161
162exit:
163  ret void
164}
165
166attributes #0 = { vscale_range(1, 16) }
167