xref: /llvm-project/llvm/test/Transforms/LICM/lnicm.ll (revision b941857b40edd7f3f3a9ec2ec85a26db24739774)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -aa-pipeline=basic-aa -passes='loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes INTC
3; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM
4; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM
5
6; This test represents the following function:
7; void test(int n, int m, int x[m][n], int y[n], int *z) {
8;   for (int k = 0; k < n; k++) {
9;     int tmp = *z;
10;     for (int i = 0; i < m; i++)
11;       x[i][k] += y[k] + tmp;
12;   }
13; }
14; We only want to hoist the load of z out of the loop nest.
15; LICM hoists the load of y[k] out of the i-loop, but LNICM doesn't do so
16; to keep perfect loop nest. This enables optimizations that require
17; perfect loop nest (e.g. loop-interchange) to perform.
18
19
20define dso_local void @test(i64 %n, i64 %m, ptr noalias %x, ptr noalias readonly %y, ptr readonly %z) {
21; The loopnest is not interchanged when we only run loop interchange.
22; INTC-LABEL: @test(
23; INTC-NEXT:  gurad:
24; INTC-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
25; INTC-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
26; INTC-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
27; INTC:       for.cond1.preheader.lr.ph:
28; INTC-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
29; INTC:       for.i.preheader:
30; INTC-NEXT:    br label [[ENTRY:%.*]]
31; INTC:       entry:
32; INTC-NEXT:    br label [[FOR_BODY:%.*]]
33; INTC:       for.body:
34; INTC-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
35; INTC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
36; INTC-NEXT:    br label [[FOR_BODY3:%.*]]
37; INTC:       for.body3:
38; INTC-NEXT:    [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
39; INTC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
40; INTC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
41; INTC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
42; INTC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
43; INTC-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
44; INTC-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
45; INTC-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
46; INTC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
47; INTC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
48; INTC-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
49; INTC-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
50; INTC-NEXT:    [[INC]] = add nsw i32 [[I_01]], 1
51; INTC-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
52; INTC-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
53; INTC-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
54; INTC:       for.end:
55; INTC-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
56; INTC-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
57; INTC-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
58; INTC-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
59; INTC:       for.end11.loopexit:
60; INTC-NEXT:    br label [[FOR_END11]]
61; INTC:       for.end11:
62; INTC-NEXT:    ret void
63;
64; The loopnest is interchanged when we run lnicm and loop interchange.
65; LNICM-LABEL: @test(
66; LNICM-NEXT:  gurad:
67; LNICM-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
68; LNICM-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
69; LNICM-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
70; LNICM:       for.cond1.preheader.lr.ph:
71; LNICM-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
72; LNICM:       for.i.preheader:
73; LNICM-NEXT:    br label [[FOR_BODY3_PREHEADER:%.*]]
74; LNICM:       entry:
75; LNICM-NEXT:    br label [[FOR_BODY:%.*]]
76; LNICM:       for.body:
77; LNICM-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
78; LNICM-NEXT:    br label [[FOR_BODY3_SPLIT1:%.*]]
79; LNICM:       for.body3.preheader:
80; LNICM-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
81; LNICM-NEXT:    br label [[FOR_BODY3:%.*]]
82; LNICM:       for.body3:
83; LNICM-NEXT:    [[I_01:%.*]] = phi i32 [ [[TMP3:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 0, [[FOR_BODY3_PREHEADER]] ]
84; LNICM-NEXT:    br label [[ENTRY]]
85; LNICM:       for.body3.split1:
86; LNICM-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
87; LNICM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
88; LNICM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
89; LNICM-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
90; LNICM-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
91; LNICM-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
92; LNICM-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
93; LNICM-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
94; LNICM-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
95; LNICM-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
96; LNICM-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
97; LNICM-NEXT:    [[INC:%.*]] = add nsw i32 [[I_01]], 1
98; LNICM-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
99; LNICM-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
100; LNICM-NEXT:    br label [[FOR_END]]
101; LNICM:       for.body3.split:
102; LNICM-NEXT:    [[TMP3]] = add nsw i32 [[I_01]], 1
103; LNICM-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
104; LNICM-NEXT:    [[TMP5:%.*]] = icmp slt i64 [[TMP4]], [[M]]
105; LNICM-NEXT:    br i1 [[TMP5]], label [[FOR_BODY3]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
106; LNICM:       for.end:
107; LNICM-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
108; LNICM-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
109; LNICM-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
110; LNICM-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]], !llvm.loop [[LOOP2:![0-9]+]]
111; LNICM:       for.end11.loopexit:
112; LNICM-NEXT:    br label [[FOR_END11]]
113; LNICM:       for.end11:
114; LNICM-NEXT:    ret void
115;
116; The loopnest is not interchanged when we run licm and loop interchange.
117; LICM-LABEL: @test(
118; LICM-NEXT:  gurad:
119; LICM-NEXT:    [[CMP23:%.*]] = icmp sgt i64 [[M:%.*]], 0
120; LICM-NEXT:    [[CMP32:%.*]] = icmp sgt i64 [[N:%.*]], 0
121; LICM-NEXT:    br i1 [[CMP23]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_END11:%.*]]
122; LICM:       for.cond1.preheader.lr.ph:
123; LICM-NEXT:    br i1 [[CMP32]], label [[FOR_I_PREHEADER:%.*]], label [[FOR_END11]]
124; LICM:       for.i.preheader:
125; LICM-NEXT:    br label [[ENTRY:%.*]]
126; LICM:       entry:
127; LICM-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Z:%.*]], align 4
128; LICM-NEXT:    br label [[FOR_BODY:%.*]]
129; LICM:       for.body:
130; LICM-NEXT:    [[K_02:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC10:%.*]], [[FOR_END:%.*]] ]
131; LICM-NEXT:    [[IDXPROM:%.*]] = sext i32 [[K_02]] to i64
132; LICM-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i64 [[IDXPROM]]
133; LICM-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
134; LICM-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
135; LICM-NEXT:    br label [[FOR_BODY3:%.*]]
136; LICM:       for.body3:
137; LICM-NEXT:    [[I_01:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[INC:%.*]], [[FOR_BODY3]] ]
138; LICM-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[I_01]] to i64
139; LICM-NEXT:    [[INDEX0:%.*]] = mul i64 [[IDXPROM4]], [[N]]
140; LICM-NEXT:    [[INDEX1:%.*]] = add i64 [[INDEX0]], [[IDXPROM]]
141; LICM-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i64 [[INDEX1]]
142; LICM-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4
143; LICM-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
144; LICM-NEXT:    store i32 [[ADD8]], ptr [[ARRAYIDX7]], align 4
145; LICM-NEXT:    [[INC]] = add nsw i32 [[I_01]], 1
146; LICM-NEXT:    [[INC_EXT:%.*]] = sext i32 [[INC]] to i64
147; LICM-NEXT:    [[CMP2:%.*]] = icmp slt i64 [[INC_EXT]], [[M]]
148; LICM-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]]
149; LICM:       for.end:
150; LICM-NEXT:    [[INC10]] = add nsw i32 [[K_02]], 1
151; LICM-NEXT:    [[INC10_EXT:%.*]] = sext i32 [[INC10]] to i64
152; LICM-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC10_EXT]], [[N]]
153; LICM-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END11_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
154; LICM:       for.end11.loopexit:
155; LICM-NEXT:    br label [[FOR_END11]]
156; LICM:       for.end11:
157; LICM-NEXT:    ret void
158;
159
160gurad:
161  %cmp23 = icmp sgt i64 %m, 0
162  %cmp32 = icmp sgt i64 %n, 0
163  br i1 %cmp23, label %for.cond1.preheader.lr.ph, label %for.end11
164
165for.cond1.preheader.lr.ph:                        ; preds = %gurad
166  br i1 %cmp32, label %for.i.preheader, label %for.end11
167
168for.i.preheader:                                  ; preds = %for.cond1.preheader.lr.ph
169  br label %entry
170
171entry:                                  ; preds = %for.i.preheader
172  br label %for.body
173
174for.body:
175  %k.02 = phi i32 [ 0, %entry ], [ %inc10, %for.end ]
176  %0 = load i32, ptr %z, align 4
177  br label %for.body3
178
179for.body3:
180  %i.01 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ]
181  %idxprom = sext i32 %k.02 to i64
182  %arrayidx = getelementptr inbounds i32, ptr %y, i64 %idxprom
183  %1 = load i32, ptr %arrayidx, align 4
184  %add = add nsw i32 %1, %0
185  %idxprom4 = sext i32 %i.01 to i64
186  %index0 = mul i64 %idxprom4, %n
187  %index1 = add i64 %index0, %idxprom
188  %arrayidx7 = getelementptr inbounds i32, ptr %x, i64 %index1
189  %2 = load i32, ptr %arrayidx7, align 4
190  %add8 = add nsw i32 %2, %add
191  store i32 %add8, ptr %arrayidx7, align 4
192  %inc = add nsw i32 %i.01, 1
193  %inc.ext = sext i32 %inc to i64
194  %cmp2 = icmp slt i64 %inc.ext, %m
195  br i1 %cmp2, label %for.body3, label %for.end, !llvm.loop !0
196
197for.end:
198  %inc10 = add nsw i32 %k.02, 1
199  %inc10.ext = sext i32 %inc10 to i64
200  %cmp = icmp slt i64 %inc10.ext, %n
201  br i1 %cmp, label %for.body, label %for.end11, !llvm.loop !2
202
203for.end11:
204  ret void
205}
206
207!0 = distinct !{!0, !1}
208!1 = !{!"llvm.loop.mustprogress"}
209!2 = distinct !{!2, !1}
210