xref: /llvm-project/llvm/test/Transforms/LoopVectorize/PowerPC/reg-usage.ll (revision cd28da390f8b8dedd00f9a2a383ec81e90436841)
1; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
2; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
3; REQUIRES: asserts
4
5@a = global [1024 x i8] zeroinitializer, align 16
6@b = global [1024 x i8] zeroinitializer, align 16
7
8define i32 @foo() {
9; CHECK-LABEL: foo
10
11; CHECK-PWR8: Executing best plan with VF=16, UF=4
12
13; CHECK-PWR9: Executing best plan with VF=8, UF=8
14
15
16entry:
17  br label %for.body
18
19for.cond.cleanup:
20  %add.lcssa = phi i32 [ %add, %for.body ]
21  ret i32 %add.lcssa
22
23for.body:
24  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
25  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
26  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %indvars.iv
27  %0 = load i8, ptr %arrayidx, align 1
28  %conv = zext i8 %0 to i32
29  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %indvars.iv
30  %1 = load i8, ptr %arrayidx2, align 1
31  %conv3 = zext i8 %1 to i32
32  %sub = sub nsw i32 %conv, %conv3
33  %ispos = icmp sgt i32 %sub, -1
34  %neg = sub nsw i32 0, %sub
35  %2 = select i1 %ispos, i32 %sub, i32 %neg
36  %add = add nsw i32 %2, %s.015
37  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
38  %exitcond = icmp eq i64 %indvars.iv.next, 1024
39  br i1 %exitcond, label %for.cond.cleanup, label %for.body
40}
41
42define i32 @goo() {
43; For indvars.iv used in a computating chain only feeding into getelementptr or cmp,
44; it will not have vector version and the vector register usage will not exceed the
45; available vector register number.
46
47; CHECK-LABEL: goo
48
49; CHECK: Executing best plan with VF=16, UF=4
50
51entry:
52  br label %for.body
53
54for.cond.cleanup:                                 ; preds = %for.body
55  %add.lcssa = phi i32 [ %add, %for.body ]
56  ret i32 %add.lcssa
57
58for.body:                                         ; preds = %for.body, %entry
59  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
60  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
61  %tmp1 = add nsw i64 %indvars.iv, 3
62  %arrayidx = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %tmp1
63  %tmp = load i8, ptr %arrayidx, align 1
64  %conv = zext i8 %tmp to i32
65  %tmp2 = add nsw i64 %indvars.iv, 2
66  %arrayidx2 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %tmp2
67  %tmp3 = load i8, ptr %arrayidx2, align 1
68  %conv3 = zext i8 %tmp3 to i32
69  %sub = sub nsw i32 %conv, %conv3
70  %ispos = icmp sgt i32 %sub, -1
71  %neg = sub nsw i32 0, %sub
72  %tmp4 = select i1 %ispos, i32 %sub, i32 %neg
73  %add = add nsw i32 %tmp4, %s.015
74  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
75  %exitcond = icmp eq i64 %indvars.iv.next, 1024
76  br i1 %exitcond, label %for.cond.cleanup, label %for.body
77}
78
79define i64 @bar(ptr nocapture %a) {
80; CHECK-LABEL: bar
81
82; CHECK: Executing best plan with VF=2, UF=8
83
84entry:
85  br label %for.body
86
87for.cond.cleanup:
88  %add2.lcssa = phi i64 [ %add2, %for.body ]
89  ret i64 %add2.lcssa
90
91for.body:
92  %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
93  %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
94  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %i.012
95  %0 = load i64, ptr %arrayidx, align 8
96  %add = add nsw i64 %0, %i.012
97  store i64 %add, ptr %arrayidx, align 8
98  %add2 = add nsw i64 %add, %s.011
99  %inc = add nuw nsw i64 %i.012, 1
100  %exitcond = icmp eq i64 %inc, 1024
101  br i1 %exitcond, label %for.cond.cleanup, label %for.body
102}
103
104@d = external global [0 x i64], align 8
105@e = external global [0 x i32], align 4
106@c = external global [0 x i32], align 4
107
108define void @hoo(i32 %n) {
109; CHECK-LABEL: hoo
110; CHECK: Executing best plan with VF=1, UF=8
111
112entry:
113  br label %for.body
114
115for.body:                                         ; preds = %for.body, %entry
116  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
117  %arrayidx = getelementptr inbounds [0 x i64], ptr @d, i64 0, i64 %indvars.iv
118  %tmp = load i64, ptr %arrayidx, align 8
119  %arrayidx1 = getelementptr inbounds [0 x i32], ptr @e, i64 0, i64 %tmp
120  %tmp1 = load i32, ptr %arrayidx1, align 4
121  %arrayidx3 = getelementptr inbounds [0 x i32], ptr @c, i64 0, i64 %indvars.iv
122  store i32 %tmp1, ptr %arrayidx3, align 4
123  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
124  %exitcond = icmp eq i64 %indvars.iv.next, 10000
125  br i1 %exitcond, label %for.end, label %for.body
126
127for.end:                                          ; preds = %for.body
128  ret void
129}
130
131define float @float_(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
132;CHECK-LABEL: float_
133;CHECK: LV(REG): VF = 1
134;CHECK: LV(REG): Found max usage: 2 item
135;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
136;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers
137;CHECK: LV(REG): Found invariant usage: 1 item
138;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
139
140entry:
141  %cmp = icmp sgt i32 %n, 0
142  br i1 %cmp, label %preheader, label %for.end
143
144preheader:
145  %t0 = sext i32 %n to i64
146  br label %for
147
148for:
149  %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
150  %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
151  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
152  %t1 = load float, ptr %arrayidx, align 4
153  %arrayidx3 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
154  %t2 = load float, ptr %arrayidx3, align 4
155  %add = fadd fast float %t1, %s.02
156  %add4 = fadd fast float %add, %t2
157  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
158  %cmp1 = icmp slt i64 %indvars.iv.next, %t0
159  br i1 %cmp1, label %for, label %loopexit
160
161loopexit:
162  %add4.lcssa = phi float [ %add4, %for ]
163  br label %for.end
164
165for.end:
166  %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
167  ret float %s.0.lcssa
168}
169
170
171define void @double_(ptr nocapture %A, i32 %n) nounwind uwtable ssp {
172;CHECK-LABEL: double_
173;CHECK-PWR8: LV(REG): VF = 2
174;CHECK-PWR8: LV(REG): Found max usage: 2 item
175;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
176;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
177;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
178;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
179
180;CHECK-PWR9: LV(REG): VF = 1
181;CHECK-PWR9: LV(REG): Found max usage: 2 item
182;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
183;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
184;CHECK-PWR9: LV(REG): Found invariant usage: 1 item
185;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
186
187  %1 = sext i32 %n to i64
188  br label %2
189
190; <label>:2                                       ; preds = %2, %0
191  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
192  %3 = getelementptr inbounds double, ptr %A, i64 %indvars.iv
193  %4 = load double, ptr %3, align 8
194  %5 = fadd double %4, 3.000000e+00
195  %6 = fmul double %4, 2.000000e+00
196  %7 = fadd double %5, %6
197  %8 = fadd double %7, 2.000000e+00
198  %9 = fmul double %8, 5.000000e-01
199  %10 = fadd double %6, %9
200  %11 = fsub double %10, %5
201  %12 = fadd double %4, %11
202  %13 = fdiv double %8, %12
203  %14 = fmul double %13, %8
204  %15 = fmul double %6, %14
205  %16 = fmul double %5, %15
206  %17 = fadd double %16, -3.000000e+00
207  %18 = fsub double %4, %5
208  %19 = fadd double %6, %18
209  %20 = fadd double %13, %19
210  %21 = fadd double %20, %17
211  %22 = fadd double %21, 3.000000e+00
212  %23 = fmul double %4, %22
213  store double %23, ptr %3, align 8
214  %indvars.iv.next = add i64 %indvars.iv, -1
215  %24 = trunc i64 %indvars.iv to i32
216  %25 = icmp eq i32 %24, 0
217  br i1 %25, label %26, label %2
218
219; <label>:26                                      ; preds = %2
220  ret void
221}
222
223define ppc_fp128 @fp128_(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
224;CHECK-LABEL: fp128_
225;CHECK: LV(REG): VF = 1
226;CHECK: LV(REG): Found max usage: 2 item
227;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
228;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers
229entry:
230  br label %for.body
231
232for.body:                                         ; preds = %for.body, %entry
233  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
234  %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ]
235  %arrayidx = getelementptr inbounds ppc_fp128, ptr %n, i32 %i.06
236  %0 = load ppc_fp128, ptr %arrayidx, align 8
237  %sub = fsub fast ppc_fp128 %x.05, %0
238  %inc = add nsw i32 %i.06, 1
239  %exitcond = icmp eq i32 %inc, 2048
240  br i1 %exitcond, label %for.end, label %for.body
241
242for.end:                                          ; preds = %for.body
243  ret ppc_fp128 %sub
244}
245
246
247define void @fp16_(ptr nocapture readonly %pIn, ptr nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
248;CHECK-LABEL: fp16_
249;CHECK: LV(REG): VF = 1
250;CHECK: LV(REG): Found max usage: 2 item
251;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers
252;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers
253entry:
254  %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
255  %0 = bitcast i16 %tmp.0.extract.trunc to half
256  %mul = mul i32 %numCols, %numRows
257  %shr = lshr i32 %mul, 2
258  %cmp26 = icmp eq i32 %shr, 0
259  br i1 %cmp26, label %while.end, label %while.body
260
261while.body:                                       ; preds = %entry, %while.body
262  %pIn.addr.029 = phi ptr [ %add.ptr, %while.body ], [ %pIn, %entry ]
263  %pOut.addr.028 = phi ptr [ %add.ptr7, %while.body ], [ %pOut, %entry ]
264  %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
265  %1 = load half, ptr %pIn.addr.029, align 2
266  %arrayidx2 = getelementptr inbounds half, ptr %pIn.addr.029, i32 1
267  %2 = load half, ptr %arrayidx2, align 2
268  %mul3 = fmul half %1, %0
269  %mul4 = fmul half %2, %0
270  store half %mul3, ptr %pOut.addr.028, align 2
271  %arrayidx6 = getelementptr inbounds half, ptr %pOut.addr.028, i32 1
272  store half %mul4, ptr %arrayidx6, align 2
273  %add.ptr = getelementptr inbounds half, ptr %pIn.addr.029, i32 2
274  %add.ptr7 = getelementptr inbounds half, ptr %pOut.addr.028, i32 2
275  %dec = add nsw i32 %blkCnt.027, -1
276  %cmp = icmp eq i32 %dec, 0
277  br i1 %cmp, label %while.end, label %while.body
278
279while.end:                                        ; preds = %while.body, %entry
280  ret void
281}
282