xref: /llvm-project/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll (revision 456ec1c2f4e487de235c953e8f2832b97372e7b0)
1; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
2; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
3; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
4
5
6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7
8; REMARKS: --- !Passed
9; REMARKS-NEXT: Pass:            loop-interchange
10; REMARKS-NEXT: Name:            Interchanged
11; REMARKS-NEXT: Function:        test1
12
13define i64 @test1(ptr %Arr) {
14; CHECK-LABEL: @test1(
15; CHECK-NEXT:  entry:
16; CHECK-NEXT:    br label [[FOR2_PREHEADER:%.*]]
17; CHECK:       for1.header.preheader:
18; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
19; CHECK:       for1.header:
20; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
21; CHECK-NEXT:    [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
22; CHECK-NEXT:    br label [[FOR2_SPLIT1:%.*]]
23; CHECK:       for2.preheader:
24; CHECK-NEXT:    br label [[FOR2:%.*]]
25; CHECK:       for2:
26; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
27; CHECK-NEXT:    [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
28; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
29; CHECK:       for2.split1:
30; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], ptr [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
31; CHECK-NEXT:    [[LV:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
32; CHECK-NEXT:    [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
33; CHECK-NEXT:    [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
34; CHECK-NEXT:    [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100
35; CHECK-NEXT:    br label [[FOR1_INC]]
36; CHECK:       for2.split:
37; CHECK-NEXT:    [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
38; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
39; CHECK-NEXT:    [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
40; CHECK-NEXT:    br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
41; CHECK:       for1.inc:
42; CHECK-NEXT:    [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
43; CHECK-NEXT:    [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
44; CHECK-NEXT:    br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
45; CHECK:       for1.loopexit:
46; CHECK-NEXT:    [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
47; CHECK-NEXT:    ret i64 [[SUM_INC_LCSSA2]]
48;
49entry:
50  br label %for1.header
51
52for1.header:                                         ; preds = %for1.inc, %entry
53  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
54  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
55  br label %for2
56
57for2:                                        ; preds = %for2, %for1.header
58  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
59  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
60  %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
61  %lv = load i64, ptr %arrayidx, align 4
62  %sum.inc = add i64 %sum.inner, %lv
63  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
64  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
65  br i1 %exit1, label %for1.inc, label %for2
66
67for1.inc:                                ; preds = %for2
68  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
69  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
70  %exit2 = icmp eq i64 %indvars.iv.next24, 100
71  br i1 %exit2, label %for1.loopexit, label %for1.header
72
73for1.loopexit:                                 ; preds = %for1.inc
74  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
75  ret i64 %sum.inc.lcssa2
76}
77
78; In this test case, the inner reduction PHI %inner does not involve the outer
79; reduction PHI %sum.outer, do not interchange.
80; REMARKS: --- !Missed
81; REMARKS-NEXT: Pass:            loop-interchange
82; REMARKS-NEXT: Name:            UnsupportedPHIOuter
83; REMARKS-NEXT: Function:        test2
84
85define i64 @test2(ptr %Arr) {
86entry:
87  br label %for1.header
88
89for1.header:                                         ; preds = %for1.inc, %entry
90  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
91  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
92  br label %for2
93
94for2:                                        ; preds = %for2, %for1.header
95  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
96  %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
97  %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
98  %lv = load i64, ptr %arrayidx, align 4
99  %sum.inc = add i64 %inner, %lv
100  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
101  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
102  br i1 %exit1, label %for1.inc, label %for2
103
104for1.inc:                                ; preds = %for2
105  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
106  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
107  %exit2 = icmp eq i64 %indvars.iv.next24, 100
108  br i1 %exit2, label %for1.loopexit, label %for1.header
109
110for1.loopexit:                                 ; preds = %for1.inc
111  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
112  ret i64 %sum.inc.lcssa2
113}
114
115; Check that we do not interchange if there is an additional instruction
116; between the outer and inner reduction PHIs.
117; REMARKS: --- !Missed
118; REMARKS-NEXT: Pass:            loop-interchange
119; REMARKS-NEXT: Name:            UnsupportedPHIOuter
120; REMARKS-NEXT: Function:        test3
121
122define i64 @test3(ptr %Arr) {
123entry:
124  br label %for1.header
125
126for1.header:                                         ; preds = %for1.inc, %entry
127  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
128  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
129  %so = add i64 %sum.outer, 10
130  br label %for2
131
132for2:                                        ; preds = %for2, %for1.header
133  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
134  %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
135  %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
136  %lv = load i64, ptr %arrayidx, align 4
137  %sum.inc = add i64 %sum.inner, %lv
138  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
139  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
140  br i1 %exit1, label %for1.inc, label %for2
141
142for1.inc:                                ; preds = %for2
143  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
144  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
145  %exit2 = icmp eq i64 %indvars.iv.next24, 100
146  br i1 %exit2, label %for1.loopexit, label %for1.header
147
148for1.loopexit:                                 ; preds = %for1.inc
149  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
150  ret i64 %sum.inc.lcssa2
151}
152
153; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
154; REMARKS: --- !Missed
155; REMARKS-NEXT: Pass:            loop-interchange
156; REMARKS-NEXT: Name:            Dependence
157; REMARKS-NEXT: Function:        test4
158
159define i64 @test4(ptr %Arr, ptr %dst) {
160entry:
161  %gep.dst = getelementptr inbounds i64, ptr %dst, i64 42
162  br label %for1.header
163
164for1.header:                                         ; preds = %for1.inc, %entry
165  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
166  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
167  br label %for2
168
169for2:                                        ; preds = %for2, %for1.header
170  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
171  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
172  %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
173  %lv = load i64, ptr %arrayidx, align 4
174  %sum.inc = add i64 %sum.inner, %lv
175  store i64 %sum.inc, ptr %gep.dst, align 4
176  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
177  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
178  br i1 %exit1, label %for1.inc, label %for2
179
180for1.inc:                                ; preds = %for2
181  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
182  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
183  %exit2 = icmp eq i64 %indvars.iv.next24, 100
184  br i1 %exit2, label %for1.loopexit, label %for1.header
185
186for1.loopexit:                                 ; preds = %for1.inc
187  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
188  ret i64 %sum.inc.lcssa2
189}
190
191; Check that we do not interchange or crash if the PHI in the outer loop gets a
192; constant from the inner loop.
193; REMARKS: --- !Missed
194; REMARKS-NEXT: Pass:            loop-interchange
195; REMARKS-NEXT: Name:            UnsupportedPHIOuter
196; REMARKS-NEXT: Function:        test_constant_inner_loop_res
197
198define i64 @test_constant_inner_loop_res(ptr %Arr) {
199entry:
200  br label %for1.header
201
202for1.header:                                         ; preds = %for1.inc, %entry
203  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
204  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.amend, %for1.inc ]
205  br label %for2
206
207for2:                                        ; preds = %for2, %for1.header
208  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
209  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
210  %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
211  %lv = load i64, ptr %arrayidx, align 4
212  %sum.inc = add i64 %sum.inner, %lv
213  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
214  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
215  br i1 %exit1, label %for1.inc, label %for2
216
217for1.inc:                                ; preds = %for2
218  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
219  %const.lcssa = phi i64 [ 0, %for2 ]
220  %sum.inc.amend = add i64 %const.lcssa, %sum.inc.lcssa
221  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
222  %exit2 = icmp eq i64 %indvars.iv.next24, 100
223  br i1 %exit2, label %for1.loopexit, label %for1.header
224
225for1.loopexit:                                 ; preds = %for1.inc
226  %il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
227  ret i64 %il.res.lcssa2
228}
229
230; Floating point reductions are interchanged if all the fp instructions
231; involved allow reassociation.
232; REMARKS: --- !Passed
233; REMARKS-NEXT: Pass:            loop-interchange
234; REMARKS-NEXT: Name:            Interchanged
235; REMARKS-NEXT: Function:        test5
236
237define float @test5(ptr %Arr, ptr %Arr2) {
238entry:
239  br label %outer.header
240
241outer.header:                                     ; preds = %outer.inc, %entry
242  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
243  %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
244  br label %for.body3
245
246for.body3:                                        ; preds = %for.body3, %outer.header
247  %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
248  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
249  %arrayidx5 = getelementptr inbounds [100 x [100 x float]], ptr %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
250  %vA = load float, ptr %arrayidx5
251  %float.inner.inc = fadd fast float %float.inner, %vA
252  %arrayidx6 = getelementptr inbounds [100 x [100 x float]], ptr %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
253  %vB = load float, ptr %arrayidx6
254  %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
255  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
256  %exitcond = icmp eq i64 %iv.inner.next, 100
257  br i1 %exitcond, label %outer.inc, label %for.body3
258
259outer.inc:                                        ; preds = %for.body3
260  %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
261  %iv.outer.next = add nsw i64 %iv.outer, 1
262  %cmp = icmp eq i64 %iv.outer.next, 100
263  br i1 %cmp, label %outer.header, label %for.exit
264
265for.exit:                                         ; preds = %outer.inc
266  %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
267  ret float %float.outer.lcssa
268}
269
270; Floating point reductions are not interchanged if not all the fp instructions
271; involved allow reassociation.
272; REMARKS: --- !Missed
273; REMARKS-NEXT: Pass:            loop-interchange
274; REMARKS-NEXT: Name:            UnsupportedPHIOuter
275; REMARKS-NEXT: Function:        test6
276
277define float @test6(ptr %Arr, ptr %Arr2) {
278entry:
279  br label %outer.header
280
281outer.header:                                     ; preds = %outer.inc, %entry
282  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
283  %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
284  br label %for.body3
285
286for.body3:                                        ; preds = %for.body3, %outer.header
287  %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
288  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
289  %arrayidx5 = getelementptr inbounds [100 x [100 x float]], ptr %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
290  %vA = load float, ptr %arrayidx5
291  %float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
292  %arrayidx6 = getelementptr inbounds [100 x [100 x float]], ptr %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
293  %vB = load float, ptr %arrayidx6
294  %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
295  %iv.inner.next = add nuw nsw i64 %iv.inner, 1
296  %exitcond = icmp eq i64 %iv.inner.next, 100
297  br i1 %exitcond, label %outer.inc, label %for.body3
298
299outer.inc:                                        ; preds = %for.body3
300  %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
301  %iv.outer.next = add nsw i64 %iv.outer, 1
302  %cmp = icmp eq i64 %iv.outer.next, 100
303  br i1 %cmp, label %outer.header, label %for.exit
304
305for.exit:                                         ; preds = %outer.inc
306  %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
307  ret float %float.outer.lcssa
308}
309