xref: /llvm-project/mlir/test/Dialect/SCF/for-loop-peeling.mlir (revision c1730f42219365f5105148870422592c25083104)
1// RUN: mlir-opt %s -scf-for-loop-peeling -canonicalize -split-input-file -verify-diagnostics | FileCheck %s
2// RUN: mlir-opt %s -scf-for-loop-peeling=skip-partial=false -canonicalize -split-input-file -verify-diagnostics | FileCheck %s -check-prefix=CHECK-NO-SKIP
3
4//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (-s0 + s1) mod s2)>
5//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
6//      CHECK: func @fully_dynamic_bounds(
7// CHECK-SAME:     %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index
8//      CHECK:   %[[C0_I32:.*]] = arith.constant 0 : i32
9//      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]]
10//      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]]
11// CHECK-SAME:       step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
12//      CHECK:     %[[CAST:.*]] = arith.index_cast %[[STEP]] : index to i32
13//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32
14//      CHECK:     scf.yield %[[ADD]]
15//      CHECK:   }
16//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]]
17// CHECK-SAME:       step %[[STEP]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) {
18//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
19//      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[REM]]
20//      CHECK:     %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]]
21//      CHECK:     scf.yield %[[ADD2]]
22//      CHECK:   }
23//      CHECK:   return %[[RESULT]]
24#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
25func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
26  %c0 = arith.constant 0 : i32
27  %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0) -> i32 {
28    %s = affine.min #map(%ub, %iv)[%step]
29    %casted = arith.index_cast %s : index to i32
30    %0 = arith.addi %arg, %casted : i32
31    scf.yield %0 : i32
32  }
33  return %r : i32
34}
35
36// -----
37
38//      CHECK: func @fully_static_bounds(
39//  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32
40//  CHECK-DAG:   %[[C1_I32:.*]] = arith.constant 1 : i32
41//  CHECK-DAG:   %[[C4_I32:.*]] = arith.constant 4 : i32
42//  CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
43//  CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
44//  CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
45//      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]]
46// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
47//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32
48//      CHECK:     scf.yield %[[ADD]]
49//      CHECK:   }
50//      CHECK:   %[[RESULT:.*]] = arith.addi %[[LOOP]], %[[C1_I32]] : i32
51//      CHECK:   return %[[RESULT]]
52#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
53func.func @fully_static_bounds() -> i32 {
54  %c0_i32 = arith.constant 0 : i32
55  %lb = arith.constant 0 : index
56  %step = arith.constant 4 : index
57  %ub = arith.constant 17 : index
58  %r = scf.for %iv = %lb to %ub step %step
59               iter_args(%arg = %c0_i32) -> i32 {
60    %s = affine.min #map(%ub, %iv)[%step]
61    %casted = arith.index_cast %s : index to i32
62    %0 = arith.addi %arg, %casted : i32
63    scf.yield %0 : i32
64  }
65  return %r : i32
66}
67
68// -----
69
70//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
71//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
72//      CHECK: func @dynamic_upper_bound(
73// CHECK-SAME:     %[[UB:.*]]: index
74//  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32
75//  CHECK-DAG:   %[[C4_I32:.*]] = arith.constant 4 : i32
76//  CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
77//  CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
78//      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
79//      CHECK:   %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]]
80// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
81//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32
82//      CHECK:     scf.yield %[[ADD]]
83//      CHECK:   }
84//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]]
85// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) {
86//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
87//      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[REM]]
88//      CHECK:     %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]]
89//      CHECK:     scf.yield %[[ADD2]]
90//      CHECK:   }
91//      CHECK:   return %[[RESULT]]
92#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
93func.func @dynamic_upper_bound(%ub : index) -> i32 {
94  %c0_i32 = arith.constant 0 : i32
95  %lb = arith.constant 0 : index
96  %step = arith.constant 4 : index
97  %r = scf.for %iv = %lb to %ub step %step
98               iter_args(%arg = %c0_i32) -> i32 {
99    %s = affine.min #map(%ub, %iv)[%step]
100    %casted = arith.index_cast %s : index to i32
101    %0 = arith.addi %arg, %casted : i32
102    scf.yield %0 : i32
103  }
104  return %r : i32
105}
106
107// -----
108
109//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)>
110//  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
111//      CHECK: func @no_loop_results(
112// CHECK-SAME:     %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32>
113//  CHECK-DAG:   %[[C4_I32:.*]] = arith.constant 4 : i32
114//  CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
115//  CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
116//      CHECK:   %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]]
117//      CHECK:   scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] {
118//      CHECK:     %[[LOAD:.*]] = memref.load %[[MEMREF]][]
119//      CHECK:     %[[ADD:.*]] = arith.addi %[[LOAD]], %[[C4_I32]] : i32
120//      CHECK:     memref.store %[[ADD]], %[[MEMREF]]
121//      CHECK:   }
122//      CHECK:   scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] step %[[C4]] {
123//      CHECK:     %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]]
124//      CHECK:     %[[LOAD2:.*]] = memref.load %[[MEMREF]][]
125//      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[REM]]
126//      CHECK:     %[[ADD2:.*]] = arith.addi %[[LOAD2]], %[[CAST2]]
127//      CHECK:     memref.store %[[ADD2]], %[[MEMREF]]
128//      CHECK:   }
129//      CHECK:   return
130#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
131func.func @no_loop_results(%ub : index, %d : memref<i32>) {
132  %c0_i32 = arith.constant 0 : i32
133  %lb = arith.constant 0 : index
134  %step = arith.constant 4 : index
135  scf.for %iv = %lb to %ub step %step {
136    %s = affine.min #map(%ub, %iv)[%step]
137    %r = memref.load %d[] : memref<i32>
138    %casted = arith.index_cast %s : index to i32
139    %0 = arith.addi %r, %casted : i32
140    memref.store %0, %d[] : memref<i32>
141  }
142  return
143}
144
145// -----
146
147// Test rewriting of affine.min/max ops. Make sure that more general cases than
148// the ones above are successfully rewritten. Also make sure that the pattern
149// does not rewrite ops that should not be rewritten.
150
151//  CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
152//  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s1 - 1, s0)>
153//  CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (-d0 + s1, s2, s0)>
154//  CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0] -> (-s0)>
155//  CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
156//  CHECK-DAG: #[[MAP6:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 + 1)>
157//  CHECK-DAG: #[[MAP7:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)>
158//  CHECK-DAG: #[[MAP8:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
159//      CHECK: func @test_affine_op_rewrite(
160// CHECK-SAME:     %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index,
161// CHECK-SAME:     %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index
162//      CHECK:   scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] {
163//                 (affine.min folded away)
164//      CHECK:     memref.store %[[STEP]]
165//                 (affine.min folded away)
166//      CHECK:     memref.store %[[STEP]]
167//      CHECK:     %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]]
168//      CHECK:     memref.store %[[RES2]]
169//      CHECK:     %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]]
170//      CHECK:     memref.store %[[RES3]]
171//      CHECK:     %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
172//      CHECK:     memref.store %[[RES4]]
173//      CHECK:     %[[RES5:.*]] = affine.apply #[[MAP4]]()[%[[STEP]]]
174//      CHECK:     memref.store %[[RES5]]
175//      CHECK:   }
176//      CHECK:   scf.for %[[IV2:.*]] = {{.*}} to %[[UB]] step %[[STEP]] {
177//      CHECK:     %[[RES_IF_0:.*]] = affine.apply #[[MAP5]](%[[IV2]])[%[[UB]]]
178//      CHECK:     memref.store %[[RES_IF_0]]
179//      CHECK:     %[[RES_IF_1:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]]
180//      CHECK:     memref.store %[[RES_IF_1]]
181//      CHECK:     %[[RES_IF_2:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]]
182//      CHECK:     memref.store %[[RES_IF_2]]
183//      CHECK:     %[[RES_IF_3:.*]] = affine.apply #[[MAP7]](%[[IV2]])[%[[UB]]]
184//      CHECK:     memref.store %[[RES_IF_3]]
185//      CHECK:     %[[RES_IF_4:.*]] = affine.min #[[MAP3]](%[[IV2]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]]
186//      CHECK:     memref.store %[[RES_IF_4]]
187//      CHECK:     %[[RES_IF_5:.*]] = affine.apply #[[MAP8]](%[[IV2]])[%[[UB]]]
188//      CHECK:     memref.store %[[RES_IF_5]]
189#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
190#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)>
191#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)>
192#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)>
193#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)>
194#map5 = affine_map<(d0, d1)[s0] -> (-s0, -d0 + d1)>
195func.func @test_affine_op_rewrite(%lb : index, %ub: index,
196                             %step: index, %d : memref<?xindex>,
197                             %some_val: index) {
198  %c0 = arith.constant 0 : index
199  %c1 = arith.constant 1 : index
200  %c2 = arith.constant 2 : index
201  %c3 = arith.constant 3 : index
202  %c4 = arith.constant 4 : index
203  %c5 = arith.constant 5 : index
204  scf.for %iv = %lb to %ub step %step {
205    // Most common case: Rewrite min(%ub - %iv, %step) to %step.
206    %m0 = affine.min #map0(%ub, %iv)[%step]
207    memref.store %m0, %d[%c0] : memref<?xindex>
208
209    // Increase %ub - %iv a little bit, pattern should still apply.
210    %m1 = affine.min #map1(%ub, %iv)[%step]
211    memref.store %m1, %d[%c1] : memref<?xindex>
212
213    // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1.
214    %m2 = affine.min #map2(%ub, %iv)[%step]
215    memref.store %m2, %d[%c2] : memref<?xindex>
216
217    // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1
218    // can be smaller than %step. (Can be simplified in if-statement.)
219    %m3 = affine.min #map3(%ub, %iv)[%step]
220    memref.store %m3, %d[%c3] : memref<?xindex>
221
222    // min(%ub - %iv, %step, %some_val) cannot be simplified because the range
223    // of %some_val is unknown.
224    %m4 = affine.min #map4(%ub, %iv, %some_val)[%step]
225    memref.store %m4, %d[%c4] : memref<?xindex>
226
227    // Rewrite max(-%ub + %iv, -%step) to -%ub + %iv (and -%step in the scf.if).
228    %m5 = affine.max #map5(%ub, %iv)[%step]
229    memref.store %m5, %d[%c5] : memref<?xindex>
230  }
231  return
232}
233
234// -----
235
236//     CHECK: func @nested_loops
237//     CHECK:   scf.for {{.*}} {
238//     CHECK:     scf.for {{.*}} {
239//     CHECK:     }
240//     CHECK:     scf.for {{.*}} {
241//     CHECK:     }
242//     CHECK:   }
243//     CHECK:   scf.for {{.*}} {
244//     CHECK:     scf.for {{.*}} {
245//     CHECK:     }
246// CHECK-NOT:     scf.for
247//     CHECK:   }
248
249//     CHECK-NO-SKIP: func @nested_loops
250//     CHECK-NO-SKIP:   scf.for {{.*}} {
251//     CHECK-NO-SKIP:     scf.for {{.*}} {
252//     CHECK-NO-SKIP:     }
253//     CHECK-NO-SKIP:     scf.for {{.*}} {
254//     CHECK-NO-SKIP:     }
255//     CHECK-NO-SKIP:   }
256//     CHECK-NO-SKIP:   scf.for {{.*}} {
257//     CHECK-NO-SKIP:     scf.for {{.*}} {
258//     CHECK-NO-SKIP:     }
259//     CHECK-NO-SKIP:     scf.for {{.*}} {
260//     CHECK-NO-SKIP:     }
261//     CHECK-NO-SKIP:   }
262#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
263func.func @nested_loops(%lb0: index, %lb1 : index, %ub0: index, %ub1: index,
264                   %step: index) -> i32 {
265  %c0 = arith.constant 0 : i32
266  %r0 = scf.for %iv0 = %lb0 to %ub0 step %step iter_args(%arg0 = %c0) -> i32 {
267    %r1 = scf.for %iv1 = %lb1 to %ub1 step %step iter_args(%arg1 = %arg0) -> i32 {
268      %s = affine.min #map(%ub1, %iv1)[%step]
269      %casted = arith.index_cast %s : index to i32
270      %0 = arith.addi %arg1, %casted : i32
271      scf.yield %0 : i32
272    }
273    %1 = arith.addi %arg0, %r1 : i32
274    scf.yield %1 : i32
275  }
276  return %r0 : i32
277}
278
279// -----
280
281// CHECK-LABEL: func @regression
282func.func @regression(%arg0: memref<i64>, %arg1: index) {
283  %c0 = arith.constant 0 : index
284  %0 = affine.apply affine_map<()[s0] -> (s0 * s0)>()[%arg1]
285  scf.for %arg2 = %c0 to %0 step %arg1 {
286    %1 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + s0 * s0)>(%arg2)[%arg1]
287    %2 = arith.index_cast %0 : index to i64
288    memref.store %2, %arg0[] : memref<i64>
289  }
290  return
291}
292
293// -----
294
295// Regression test: Make sure that we do not crash.
296
297// CHECK-LABEL: func @zero_step(
298//       CHECK:   scf.for
299//       CHECK:   scf.for
300func.func @zero_step(%arg0: memref<i64>) {
301  %c0 = arith.constant 0 : index
302  %c1 = arith.constant 1 : index
303  %foldto0 = arith.subi %c1, %c1 : index
304  scf.for %arg2 = %c0 to %c1 step %foldto0 {
305    %2 = arith.index_cast %arg2 : index to i64
306    memref.store %2, %arg0[] : memref<i64>
307  }
308  return
309}
310
311