xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/pointer-runtime-checks-unprofitable.ll (revision 8fcb822da632ab4330b47641826ace01af0768e0)
1; RUN: opt -passes="loop-vectorize" -mtriple=x86_64-unknown-linux -S -debug %s 2>&1 | FileCheck %s
2; REQUIRES: asserts
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5
6target triple = "x86_64-unknown-linux"
7
8declare double @llvm.pow.f64(double, double)
9
10; Test case where the memory runtime checks and vector body is more expensive
11; than running the scalar loop.
12define void @test(ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E) {
13
14; CHECK: Calculating cost of runtime checks:
15; CHECK-NEXT:  0  for   {{.+}} = getelementptr i8, ptr %A, i64 128
16; CHECK-NEXT:  0  for   {{.+}} = getelementptr i8, ptr %B, i64 128
17; CHECK-NEXT:  0  for   {{.+}} = getelementptr i8, ptr %E, i64 128
18; CHECK-NEXT:  0  for   {{.+}} = getelementptr i8, ptr %C, i64 128
19; CHECK-NEXT:  0  for   {{.+}} = getelementptr i8, ptr %D, i64 128
20; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
21; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
22; CHECK-NEXT:  1  for   {{.+}} = and i1
23; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
24; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
25; CHECK-NEXT:  1  for   {{.+}} = and i1
26; CHECK-NEXT:  1  for   {{.+}} = or i1
27; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
28; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
29; CHECK-NEXT:  1  for   {{.+}} = and i1
30; CHECK-NEXT:  1  for   {{.+}} = or i1
31; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
32; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
33; CHECK-NEXT:  1  for   {{.+}} = and i1
34; CHECK-NEXT:  1  for   {{.+}} = or i1
35; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
36; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
37; CHECK-NEXT:  1  for   {{.+}} = and i1
38; CHECK-NEXT:  1  for   {{.+}} = or i1
39; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
40; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
41; CHECK-NEXT:  1  for   {{.+}} = and i1
42; CHECK-NEXT:  1  for   {{.+}} = or i1
43; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
44; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
45; CHECK-NEXT:  1  for   {{.+}} = and i1
46; CHECK-NEXT:  1  for   {{.+}} = or i1
47; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
48; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
49; CHECK-NEXT:  1  for   {{.+}} = and i1
50; CHECK-NEXT:  1  for   {{.+}} = or i1
51; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
52; CHECK-NEXT:  1  for   {{.+}} = icmp ult ptr
53; CHECK-NEXT:  1  for   {{.+}} = and i1
54; CHECK-NEXT:  1  for   {{.+}} = or i1
55; CHECK-NEXT: Total cost of runtime checks: 35
56
57; CHECK: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (16 < 24)
58;
59; CHECK-LABEL: @test(
60; CHECK-NEXT: entry:
61; CHECK-NEXT:  br label %for.body
62; CHECK-NOT: vector.memcheck
63; CHECK-NOT: vector.body
64;
65entry:
66  br label %for.body
67
68for.body:
69  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
70  %gep.A = getelementptr inbounds double, ptr %A, i64 %iv
71  %l.A = load double, ptr %gep.A, align 4
72  store double 0.0, ptr %gep.A, align 4
73  %p.1 = call double @llvm.pow.f64(double %l.A, double 2.0)
74
75  %gep.B = getelementptr inbounds double, ptr %B, i64 %iv
76  %l.B = load double, ptr %gep.B, align 4
77  %p.2 = call double @llvm.pow.f64(double %l.B, double %p.1)
78  store double 0.0, ptr %gep.B, align 4
79
80  %gep.C = getelementptr inbounds double, ptr %C, i64 %iv
81  %l.C = load double, ptr %gep.C, align 4
82  %p.3 = call double @llvm.pow.f64(double %p.1, double %l.C)
83
84  %gep.D = getelementptr inbounds double, ptr %D, i64 %iv
85  %l.D = load double, ptr %gep.D
86  %p.4 = call double @llvm.pow.f64(double %p.2, double %l.D)
87  %p.5 = call double @llvm.pow.f64(double %p.4, double %p.3)
88  %mul = fmul double 2.0, %p.5
89  %mul.2 = fmul double %mul, 2.0
90  %mul.3 = fmul double %mul, %mul.2
91  %gep.E = getelementptr inbounds double, ptr %E, i64 %iv
92  store double %mul.3, ptr %gep.E, align 4
93  %iv.next = add i64 %iv, 1
94  %exitcond = icmp eq i64 %iv.next, 16
95  br i1 %exitcond, label %for.end, label %for.body
96
97for.end:
98  ret void
99}
100