xref: /llvm-project/llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll (revision b9808e5660f5fe9e7414c0c0b93acd899235471c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-count=2 | FileCheck %s
3
4; Make sure the loop is unrolled without a remainder loop based on an assumption
5; that the least significant bit is known to be zero.
6
7define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr {
8; CHECK-LABEL: @assumeDivisibleTC(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P:%.*]], 1
11; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
12; CHECK-NEXT:    br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]]
13; CHECK:       guarded:
14; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[Q:%.*]], 2
15; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[REM]], 0
16; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP2]])
17; CHECK-NEXT:    [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]]
18; CHECK-NEXT:    [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]]
19; CHECK-NEXT:    [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
20; CHECK-NEXT:    br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
21; CHECK:       for.body.preheader:
22; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
23; CHECK:       for.body:
24; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
25; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
26; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
27; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP0]], 3
28; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]]
29; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1
30; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1
31; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]]
32; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
33; CHECK-NEXT:    [[ADD_1:%.*]] = add i8 [[TMP1]], 3
34; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
35; CHECK-NEXT:    store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
36; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[I_011]], 2
37; CHECK-NEXT:    [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]]
38; CHECK-NEXT:    br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
39; CHECK:       exit.loopexit:
40; CHECK-NEXT:    br label [[EXIT]]
41; CHECK:       exit:
42; CHECK-NEXT:    ret void
43;
44entry:
45  %and = and i32 %p, 1
46  %cmp = icmp eq i32 %and, 0
47  br i1 %cmp, label %guarded, label %exit
48
49guarded:
50  %rem = urem i32 %q, 2
51  %cmp2 = icmp eq i32 %rem, 0
52  tail call void @llvm.assume(i1 %cmp2)
53  %gt = icmp sgt i32 %p, %q
54  %n = select i1 %gt, i32 %p, i32 %q
55  %cmp110 = icmp sgt i32 %n, 0
56  br i1 %cmp110, label %for.body, label %exit
57
58for.body:
59  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ]
60  %arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011
61  %0 = load i8, ptr %arrayidx, align 1
62  %add = add i8 %0, 3
63  %arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011
64  store i8 %add, ptr %arrayidx4, align 1
65  %inc = add nuw nsw i32 %i.011, 1
66  %cmp1 = icmp slt i32 %inc, %n
67  br i1 %cmp1, label %for.body, label %exit
68
69exit:
70  ret void
71}
72
73; Make sure the loop is unrolled with a remainder loop when the trip-count
74; is not provably divisible by the unroll factor.
75
76define dso_local void @cannotProveDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr {
77; CHECK-LABEL: @cannotProveDivisibleTC(
78; CHECK-NEXT:  entry:
79; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P:%.*]], 6
80; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
81; CHECK-NEXT:    br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]]
82; CHECK:       guarded:
83; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[Q:%.*]], 2
84; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[REM]], 0
85; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP2]])
86; CHECK-NEXT:    [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]]
87; CHECK-NEXT:    [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]]
88; CHECK-NEXT:    [[CMP110:%.*]] = icmp sgt i32 [[N]], 0
89; CHECK-NEXT:    br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]]
90; CHECK:       for.body.preheader:
91; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
92; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 1
93; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 1
94; CHECK-NEXT:    br i1 [[TMP1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
95; CHECK:       for.body.preheader.new:
96; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
97; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
98; CHECK:       for.body:
99; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ]
100; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ]
101; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]]
102; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
103; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP2]], 3
104; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]]
105; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1
106; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1
107; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]]
108; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1
109; CHECK-NEXT:    [[ADD_1:%.*]] = add i8 [[TMP3]], 3
110; CHECK-NEXT:    [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]]
111; CHECK-NEXT:    store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1
112; CHECK-NEXT:    [[INC_1]] = add nuw nsw i32 [[I_011]], 2
113; CHECK-NEXT:    [[NITER_NEXT_1]] = add i32 [[NITER]], 2
114; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]]
115; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
116; CHECK:       exit.loopexit.unr-lcssa.loopexit:
117; CHECK-NEXT:    [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ]
118; CHECK-NEXT:    br label [[EXIT_LOOPEXIT_UNR_LCSSA]]
119; CHECK:       exit.loopexit.unr-lcssa:
120; CHECK-NEXT:    [[I_011_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR_PH]], [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
121; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
122; CHECK-NEXT:    br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[EXIT_LOOPEXIT:%.*]]
123; CHECK:       for.body.epil.preheader:
124; CHECK-NEXT:    br label [[FOR_BODY_EPIL:%.*]]
125; CHECK:       for.body.epil:
126; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_UNR]]
127; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1
128; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3
129; CHECK-NEXT:    [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_UNR]]
130; CHECK-NEXT:    store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1
131; CHECK-NEXT:    br label [[EXIT_LOOPEXIT]]
132; CHECK:       exit.loopexit:
133; CHECK-NEXT:    br label [[EXIT]]
134; CHECK:       exit:
135; CHECK-NEXT:    ret void
136;
137entry:
138  %and = and i32 %p, 6
139  %cmp = icmp eq i32 %and, 0
140  br i1 %cmp, label %guarded, label %exit
141
142guarded:
143  %rem = urem i32 %q, 2
144  %cmp2 = icmp eq i32 %rem, 0
145  tail call void @llvm.assume(i1 %cmp2)
146  %gt = icmp sgt i32 %p, %q
147  %n = select i1 %gt, i32 %p, i32 %q
148  %cmp110 = icmp sgt i32 %n, 0
149  br i1 %cmp110, label %for.body, label %exit
150
151for.body:
152  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ]
153  %arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011
154  %0 = load i8, ptr %arrayidx, align 1
155  %add = add i8 %0, 3
156  %arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011
157  store i8 %add, ptr %arrayidx4, align 1
158  %inc = add nuw nsw i32 %i.011, 1
159  %cmp1 = icmp slt i32 %inc, %n
160  br i1 %cmp1, label %for.body, label %exit
161
162exit:
163  ret void
164}
165
166declare void @llvm.assume(i1 noundef) nofree nosync nounwind willreturn
167