1// RUN: mlir-opt %s -scf-for-loop-peeling -canonicalize -split-input-file -verify-diagnostics | FileCheck %s 2// RUN: mlir-opt %s -scf-for-loop-peeling=skip-partial=false -canonicalize -split-input-file -verify-diagnostics | FileCheck %s -check-prefix=CHECK-NO-SKIP 3 4// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> (s1 - (-s0 + s1) mod s2)> 5// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> 6// CHECK: func @fully_dynamic_bounds( 7// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index 8// CHECK: %[[C0_I32:.*]] = arith.constant 0 : i32 9// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[LB]], %[[UB]], %[[STEP]]] 10// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[LB]] to %[[NEW_UB]] 11// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { 12// CHECK: %[[CAST:.*]] = arith.index_cast %[[STEP]] : index to i32 13// CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32 14// CHECK: scf.yield %[[ADD]] 15// CHECK: } 16// CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] 17// CHECK-SAME: step %[[STEP]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) { 18// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] 19// CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] 20// CHECK: %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]] 21// CHECK: scf.yield %[[ADD2]] 22// CHECK: } 23// CHECK: return %[[RESULT]] 24#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 25func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 { 26 %c0 = arith.constant 0 : i32 27 %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0) -> i32 { 28 %s = affine.min #map(%ub, %iv)[%step] 29 %casted = arith.index_cast %s : index to i32 30 %0 = arith.addi %arg, %casted : i32 31 scf.yield %0 : i32 32 } 33 return %r : i32 34} 35 36// ----- 37 38// CHECK: func @fully_static_bounds( 39// CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 40// CHECK-DAG: %[[C1_I32:.*]] = arith.constant 1 : i32 41// CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 42// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 43// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 44// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index 45// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C16]] 46// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { 47// CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32 48// CHECK: scf.yield %[[ADD]] 49// CHECK: } 50// CHECK: %[[RESULT:.*]] = arith.addi %[[LOOP]], %[[C1_I32]] : i32 51// CHECK: return %[[RESULT]] 52#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 53func.func @fully_static_bounds() -> i32 { 54 %c0_i32 = arith.constant 0 : i32 55 %lb = arith.constant 0 : index 56 %step = arith.constant 4 : index 57 %ub = arith.constant 17 : index 58 %r = scf.for %iv = %lb to %ub step %step 59 iter_args(%arg = %c0_i32) -> i32 { 60 %s = affine.min #map(%ub, %iv)[%step] 61 %casted = arith.index_cast %s : index to i32 62 %0 = arith.addi %arg, %casted : i32 63 scf.yield %0 : i32 64 } 65 return %r : i32 66} 67 68// ----- 69 70// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> 71// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> 72// CHECK: func @dynamic_upper_bound( 73// CHECK-SAME: %[[UB:.*]]: index 74// CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 75// CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 76// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 77// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 78// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] 79// CHECK: %[[LOOP:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] 80// CHECK-SAME: step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) { 81// CHECK: %[[ADD:.*]] = arith.addi %[[ACC]], %[[C4_I32]] : i32 82// CHECK: scf.yield %[[ADD]] 83// CHECK: } 84// CHECK: %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] 85// CHECK-SAME: step %[[C4]] iter_args(%[[ACC2:.*]] = %[[LOOP]]) -> (i32) { 86// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] 87// CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] 88// CHECK: %[[ADD2:.*]] = arith.addi %[[ACC2]], %[[CAST2]] 89// CHECK: scf.yield %[[ADD2]] 90// CHECK: } 91// CHECK: return %[[RESULT]] 92#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 93func.func @dynamic_upper_bound(%ub : index) -> i32 { 94 %c0_i32 = arith.constant 0 : i32 95 %lb = arith.constant 0 : index 96 %step = arith.constant 4 : index 97 %r = scf.for %iv = %lb to %ub step %step 98 iter_args(%arg = %c0_i32) -> i32 { 99 %s = affine.min #map(%ub, %iv)[%step] 100 %casted = arith.index_cast %s : index to i32 101 %0 = arith.addi %arg, %casted : i32 102 scf.yield %0 : i32 103 } 104 return %r : i32 105} 106 107// ----- 108 109// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> ((s0 floordiv 4) * 4)> 110// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> 111// CHECK: func @no_loop_results( 112// CHECK-SAME: %[[UB:.*]]: index, %[[MEMREF:.*]]: memref<i32> 113// CHECK-DAG: %[[C4_I32:.*]] = arith.constant 4 : i32 114// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 115// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 116// CHECK: %[[NEW_UB:.*]] = affine.apply #[[MAP0]]()[%[[UB]]] 117// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[NEW_UB]] step %[[C4]] { 118// CHECK: %[[LOAD:.*]] = memref.load %[[MEMREF]][] 119// CHECK: %[[ADD:.*]] = arith.addi %[[LOAD]], %[[C4_I32]] : i32 120// CHECK: memref.store %[[ADD]], %[[MEMREF]] 121// CHECK: } 122// CHECK: scf.for %[[IV2:.*]] = %[[NEW_UB]] to %[[UB]] step %[[C4]] { 123// CHECK: %[[REM:.*]] = affine.apply #[[MAP1]](%[[IV2]])[%[[UB]]] 124// CHECK: %[[LOAD2:.*]] = memref.load %[[MEMREF]][] 125// CHECK: %[[CAST2:.*]] = arith.index_cast %[[REM]] 126// CHECK: %[[ADD2:.*]] = arith.addi %[[LOAD2]], %[[CAST2]] 127// CHECK: memref.store %[[ADD2]], %[[MEMREF]] 128// CHECK: } 129// CHECK: return 130#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 131func.func @no_loop_results(%ub : index, %d : memref<i32>) { 132 %c0_i32 = arith.constant 0 : i32 133 %lb = arith.constant 0 : index 134 %step = arith.constant 4 : index 135 scf.for %iv = %lb to %ub step %step { 136 %s = affine.min #map(%ub, %iv)[%step] 137 %r = memref.load %d[] : memref<i32> 138 %casted = arith.index_cast %s : index to i32 139 %0 = arith.addi %r, %casted : i32 140 memref.store %0, %d[] : memref<i32> 141 } 142 return 143} 144 145// ----- 146 147// Test rewriting of affine.min/max ops. Make sure that more general cases than 148// the ones above are successfully rewritten. Also make sure that the pattern 149// does not rewrite ops that should not be rewritten. 150 151// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)> 152// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s1 - 1, s0)> 153// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0)[s0, s1, s2] -> (-d0 + s1, s2, s0)> 154// CHECK-DAG: #[[MAP4:.*]] = affine_map<()[s0] -> (-s0)> 155// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)> 156// CHECK-DAG: #[[MAP6:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 + 1)> 157// CHECK-DAG: #[[MAP7:.*]] = affine_map<(d0)[s0] -> (-d0 + s0 - 1)> 158// CHECK-DAG: #[[MAP8:.*]] = affine_map<(d0)[s0] -> (d0 - s0)> 159// CHECK: func @test_affine_op_rewrite( 160// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index, 161// CHECK-SAME: %[[MEMREF:.*]]: memref<?xindex>, %[[SOME_VAL:.*]]: index 162// CHECK: scf.for %[[IV:.*]] = %[[LB]] to %{{.*}} step %[[STEP]] { 163// (affine.min folded away) 164// CHECK: memref.store %[[STEP]] 165// (affine.min folded away) 166// CHECK: memref.store %[[STEP]] 167// CHECK: %[[RES2:.*]] = affine.apply #[[MAP1]]()[%[[STEP]]] 168// CHECK: memref.store %[[RES2]] 169// CHECK: %[[RES3:.*]] = affine.min #[[MAP2]](%[[IV]])[%[[STEP]], %[[UB]]] 170// CHECK: memref.store %[[RES3]] 171// CHECK: %[[RES4:.*]] = affine.min #[[MAP3]](%[[IV]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] 172// CHECK: memref.store %[[RES4]] 173// CHECK: %[[RES5:.*]] = affine.apply #[[MAP4]]()[%[[STEP]]] 174// CHECK: memref.store %[[RES5]] 175// CHECK: } 176// CHECK: scf.for %[[IV2:.*]] = {{.*}} to %[[UB]] step %[[STEP]] { 177// CHECK: %[[RES_IF_0:.*]] = affine.apply #[[MAP5]](%[[IV2]])[%[[UB]]] 178// CHECK: memref.store %[[RES_IF_0]] 179// CHECK: %[[RES_IF_1:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]] 180// CHECK: memref.store %[[RES_IF_1]] 181// CHECK: %[[RES_IF_2:.*]] = affine.apply #[[MAP6]](%[[IV2]])[%[[UB]]] 182// CHECK: memref.store %[[RES_IF_2]] 183// CHECK: %[[RES_IF_3:.*]] = affine.apply #[[MAP7]](%[[IV2]])[%[[UB]]] 184// CHECK: memref.store %[[RES_IF_3]] 185// CHECK: %[[RES_IF_4:.*]] = affine.min #[[MAP3]](%[[IV2]])[%[[STEP]], %[[UB]], %[[SOME_VAL]]] 186// CHECK: memref.store %[[RES_IF_4]] 187// CHECK: %[[RES_IF_5:.*]] = affine.apply #[[MAP8]](%[[IV2]])[%[[UB]]] 188// CHECK: memref.store %[[RES_IF_5]] 189#map0 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 190#map1 = affine_map<(d0, d1)[s0] -> (d0 - d1 + 1, s0)> 191#map2 = affine_map<(d0, d1)[s0] -> (s0 + 1, d0 - d1 + 1)> 192#map3 = affine_map<(d0, d1)[s0] -> (s0, d0 - d1 - 1)> 193#map4 = affine_map<(d0, d1, d2)[s0] -> (s0, d0 - d1, d2)> 194#map5 = affine_map<(d0, d1)[s0] -> (-s0, -d0 + d1)> 195func.func @test_affine_op_rewrite(%lb : index, %ub: index, 196 %step: index, %d : memref<?xindex>, 197 %some_val: index) { 198 %c0 = arith.constant 0 : index 199 %c1 = arith.constant 1 : index 200 %c2 = arith.constant 2 : index 201 %c3 = arith.constant 3 : index 202 %c4 = arith.constant 4 : index 203 %c5 = arith.constant 5 : index 204 scf.for %iv = %lb to %ub step %step { 205 // Most common case: Rewrite min(%ub - %iv, %step) to %step. 206 %m0 = affine.min #map0(%ub, %iv)[%step] 207 memref.store %m0, %d[%c0] : memref<?xindex> 208 209 // Increase %ub - %iv a little bit, pattern should still apply. 210 %m1 = affine.min #map1(%ub, %iv)[%step] 211 memref.store %m1, %d[%c1] : memref<?xindex> 212 213 // Rewrite min(%ub - %iv + 1, %step + 1) to %step + 1. 214 %m2 = affine.min #map2(%ub, %iv)[%step] 215 memref.store %m2, %d[%c2] : memref<?xindex> 216 217 // min(%ub - %iv - 1, %step) cannot be simplified because %ub - %iv - 1 218 // can be smaller than %step. (Can be simplified in if-statement.) 219 %m3 = affine.min #map3(%ub, %iv)[%step] 220 memref.store %m3, %d[%c3] : memref<?xindex> 221 222 // min(%ub - %iv, %step, %some_val) cannot be simplified because the range 223 // of %some_val is unknown. 224 %m4 = affine.min #map4(%ub, %iv, %some_val)[%step] 225 memref.store %m4, %d[%c4] : memref<?xindex> 226 227 // Rewrite max(-%ub + %iv, -%step) to -%ub + %iv (and -%step in the scf.if). 228 %m5 = affine.max #map5(%ub, %iv)[%step] 229 memref.store %m5, %d[%c5] : memref<?xindex> 230 } 231 return 232} 233 234// ----- 235 236// CHECK: func @nested_loops 237// CHECK: scf.for {{.*}} { 238// CHECK: scf.for {{.*}} { 239// CHECK: } 240// CHECK: scf.for {{.*}} { 241// CHECK: } 242// CHECK: } 243// CHECK: scf.for {{.*}} { 244// CHECK: scf.for {{.*}} { 245// CHECK: } 246// CHECK-NOT: scf.for 247// CHECK: } 248 249// CHECK-NO-SKIP: func @nested_loops 250// CHECK-NO-SKIP: scf.for {{.*}} { 251// CHECK-NO-SKIP: scf.for {{.*}} { 252// CHECK-NO-SKIP: } 253// CHECK-NO-SKIP: scf.for {{.*}} { 254// CHECK-NO-SKIP: } 255// CHECK-NO-SKIP: } 256// CHECK-NO-SKIP: scf.for {{.*}} { 257// CHECK-NO-SKIP: scf.for {{.*}} { 258// CHECK-NO-SKIP: } 259// CHECK-NO-SKIP: scf.for {{.*}} { 260// CHECK-NO-SKIP: } 261// CHECK-NO-SKIP: } 262#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)> 263func.func @nested_loops(%lb0: index, %lb1 : index, %ub0: index, %ub1: index, 264 %step: index) -> i32 { 265 %c0 = arith.constant 0 : i32 266 %r0 = scf.for %iv0 = %lb0 to %ub0 step %step iter_args(%arg0 = %c0) -> i32 { 267 %r1 = scf.for %iv1 = %lb1 to %ub1 step %step iter_args(%arg1 = %arg0) -> i32 { 268 %s = affine.min #map(%ub1, %iv1)[%step] 269 %casted = arith.index_cast %s : index to i32 270 %0 = arith.addi %arg1, %casted : i32 271 scf.yield %0 : i32 272 } 273 %1 = arith.addi %arg0, %r1 : i32 274 scf.yield %1 : i32 275 } 276 return %r0 : i32 277} 278 279// ----- 280 281// CHECK-LABEL: func @regression 282func.func @regression(%arg0: memref<i64>, %arg1: index) { 283 %c0 = arith.constant 0 : index 284 %0 = affine.apply affine_map<()[s0] -> (s0 * s0)>()[%arg1] 285 scf.for %arg2 = %c0 to %0 step %arg1 { 286 %1 = affine.min affine_map<(d0)[s0] -> (s0, -d0 + s0 * s0)>(%arg2)[%arg1] 287 %2 = arith.index_cast %0 : index to i64 288 memref.store %2, %arg0[] : memref<i64> 289 } 290 return 291} 292 293// ----- 294 295// Regression test: Make sure that we do not crash. 296 297// CHECK-LABEL: func @zero_step( 298// CHECK: scf.for 299// CHECK: scf.for 300func.func @zero_step(%arg0: memref<i64>) { 301 %c0 = arith.constant 0 : index 302 %c1 = arith.constant 1 : index 303 %foldto0 = arith.subi %c1, %c1 : index 304 scf.for %arg2 = %c0 to %c1 step %foldto0 { 305 %2 = arith.index_cast %arg2 : index to i64 306 memref.store %2, %arg0[] : memref<i64> 307 } 308 return 309} 310 311