xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll (revision d1e5e6735a845f1281f11389da1e5a55a0d2e87a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes="default<O3>" -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefix=HOIST
3; RUN: opt -passes="default<O3>" -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefix=HOIST
4; RUN: opt -passes="default<O3>" -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefix=ROTATE
5; RUN: opt -passes="default<O3>" -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefix=ROTATE
6
7; This example is produced from a very basic C code:
8;
9;   void f0();
10;   void f1();
11;   void f2();
12;
13;   void loop(int width) {
14;       if(width < 1)
15;           return;
16;       for(int i = 0; i < width - 1; ++i) {
17;           f0();
18;           f1();
19;       }
20;       f0();
21;       f2();
22;   }
23
24; We have a choice here. We can either
25; * hoist the f0() call into loop header,
26;   * which potentially makes loop rotation unprofitable since loop header might
27;     have grown above certain threshold, and such unrotated loops will be
28;     ignored by LoopVectorizer, preventing vectorization
29;   * or loop rotation will succeed, resulting in some weird PHIs that will also
30;     harm vectorization
31; * or not hoist f0() call before performing loop rotation,
32;   at the cost of potential code bloat and/or potentially successfully rotating
33;   the loops, vectorizing them at the cost of compile time.
34
35target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
36
37declare void @f0()
38declare void @f1()
39declare void @f2()
40
41declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
42declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
43
44define void @_Z4loopi(i32 %width) {
45; HOIST-LABEL: @_Z4loopi(
46; HOIST-NEXT:  entry:
47; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
48; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
49; HOIST:       for.cond.preheader:
50; HOIST-NEXT:    [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1
51; HOIST-NEXT:    br label [[FOR_COND:%.*]]
52; HOIST:       for.cond:
53; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
54; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]]
55; HOIST-NEXT:    tail call void @f0()
56; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
57; HOIST:       for.cond.cleanup:
58; HOIST-NEXT:    tail call void @f2()
59; HOIST-NEXT:    br label [[RETURN]]
60; HOIST:       for.body:
61; HOIST-NEXT:    tail call void @f1()
62; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1
63; HOIST-NEXT:    br label [[FOR_COND]]
64; HOIST:       return:
65; HOIST-NEXT:    ret void
66;
67; ROTATE-LABEL: @_Z4loopi(
68; ROTATE-NEXT:  entry:
69; ROTATE-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
70; ROTATE-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
71; ROTATE:       for.cond.preheader:
72; ROTATE-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
73; ROTATE-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
74; ROTATE:       for.body.preheader:
75; ROTATE-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -2
76; ROTATE-NEXT:    br label [[FOR_BODY:%.*]]
77; ROTATE:       for.cond.cleanup:
78; ROTATE-NEXT:    tail call void @f0()
79; ROTATE-NEXT:    tail call void @f2()
80; ROTATE-NEXT:    br label [[RETURN]]
81; ROTATE:       for.body:
82; ROTATE-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
83; ROTATE-NEXT:    tail call void @f0()
84; ROTATE-NEXT:    tail call void @f1()
85; ROTATE-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
86; ROTATE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_04]], [[TMP0]]
87; ROTATE-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
88; ROTATE:       return:
89; ROTATE-NEXT:    ret void
90;
91entry:
92  %width.addr = alloca i32, align 4
93  %i = alloca i32, align 4
94  store i32 %width, ptr %width.addr, align 4
95  %i1 = load i32, ptr %width.addr, align 4
96  %cmp = icmp slt i32 %i1, 1
97  br i1 %cmp, label %if.then, label %if.end
98
99if.then:
100  br label %return
101
102if.end:
103  call void @llvm.lifetime.start.p0(i64 4, ptr %i)
104  store i32 0, ptr %i, align 4
105  br label %for.cond
106
107for.cond:
108  %i3 = load i32, ptr %i, align 4
109  %i4 = load i32, ptr %width.addr, align 4
110  %sub = sub nsw i32 %i4, 1
111  %cmp1 = icmp slt i32 %i3, %sub
112  br i1 %cmp1, label %for.body, label %for.cond.cleanup
113
114for.cond.cleanup:
115  call void @llvm.lifetime.end.p0(i64 4, ptr %i)
116  br label %for.end
117
118for.body:
119  call void @f0()
120  call void @f1()
121  br label %for.inc
122
123for.inc:
124  %i6 = load i32, ptr %i, align 4
125  %inc = add nsw i32 %i6, 1
126  store i32 %inc, ptr %i, align 4
127  br label %for.cond
128
129for.end:
130  call void @f0()
131  call void @f2()
132  br label %return
133
134return:
135  ret void
136}
137