1// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL 2// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT 3// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4 4// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1 5// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP 6 7// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> 8// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> 9// UNROLL-FULL-DAG: [[$MAP2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 3)> 10// UNROLL-FULL-DAG: [[$MAP3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 4)> 11// UNROLL-FULL-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 1)> 12// UNROLL-FULL-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)> 13// UNROLL-FULL-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)> 14 15// SHORT-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> 16 17// UNROLL-BY-4-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> 18// UNROLL-BY-4-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> 19// UNROLL-BY-4-DAG: [[$MAP2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 3)> 20// UNROLL-BY-4-DAG: [[$MAP3:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 1)> 21// UNROLL-BY-4-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)> 22// UNROLL-BY-4-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)> 23// UNROLL-BY-4-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)> 24// UNROLL-BY-4-DAG: [[$MAP11:#map[0-9]*]] = affine_map<(d0) -> (d0)> 25 26// UNROLL-FULL-LABEL: func @loop_nest_simplest() { 27func.func @loop_nest_simplest() { 28 // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 { 29 affine.for %i = 0 to 100 step 2 { 30 // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32 31 // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32 32 // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32 33 // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32 34 affine.for %j = 0 to 4 { 35 %x = arith.constant 1 : i32 36 } 37 } // UNROLL-FULL: } 38 return // UNROLL-FULL: return 39} // UNROLL-FULL } 40 41// UNROLL-FULL-LABEL: func @loop_nest_simple_iv_use() { 42func.func @loop_nest_simple_iv_use() { 43 // UNROLL-FULL: %c0 = arith.constant 0 : index 44 // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 step 2 { 45 affine.for %i = 0 to 100 step 2 { 46 // UNROLL-FULL: %0 = "addi32"(%c0, %c0) : (index, index) -> i32 47 // UNROLL-FULL: %1 = affine.apply [[$MAP0]](%c0) 48 // UNROLL-FULL-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32 49 // UNROLL-FULL: %3 = affine.apply [[$MAP1]](%c0) 50 // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32 51 // UNROLL-FULL: %5 = affine.apply [[$MAP2]](%c0) 52 // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32 53 affine.for %j = 0 to 4 { 54 %x = "addi32"(%j, %j) : (index, index) -> i32 55 } 56 } // UNROLL-FULL: } 57 return // UNROLL-FULL: return 58} // UNROLL-FULL } 59 60// Operations in the loop body have results that are used therein. 61// UNROLL-FULL-LABEL: func @loop_nest_body_def_use() { 62func.func @loop_nest_body_def_use() { 63 // UNROLL-FULL: %c0 = arith.constant 0 : index 64 // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 step 2 { 65 affine.for %i = 0 to 100 step 2 { 66 // UNROLL-FULL: %c0_0 = arith.constant 0 : index 67 %c0 = arith.constant 0 : index 68 // UNROLL-FULL: %0 = affine.apply [[$MAP0]](%c0) 69 // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %c0_0) : (index, index) -> index 70 // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP0]](%c0) 71 // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP0]](%2) 72 // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %c0_0) : (index, index) -> index 73 // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP1]](%c0) 74 // UNROLL-FULL-NEXT: %6 = affine.apply [[$MAP0]](%5) 75 // UNROLL-FULL-NEXT: %7 = "addi32"(%6, %c0_0) : (index, index) -> index 76 // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP2]](%c0) 77 // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%8) 78 // UNROLL-FULL-NEXT: %10 = "addi32"(%9, %c0_0) : (index, index) -> index 79 affine.for %j = 0 to 4 { 80 %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } : 81 (index) -> (index) 82 %y = "addi32"(%x, %c0) : (index, index) -> index 83 } 84 } // UNROLL-FULL: } 85 return // UNROLL-FULL: return 86} // UNROLL-FULL } 87 88// UNROLL-FULL-LABEL: func @loop_nest_strided() { 89func.func @loop_nest_strided() { 90 // UNROLL-FULL: %c2 = arith.constant 2 : index 91 // UNROLL-FULL-NEXT: %c2_0 = arith.constant 2 : index 92 // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 { 93 affine.for %i = 0 to 100 { 94 // UNROLL-FULL: %0 = affine.apply [[$MAP0]](%c2_0) 95 // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index 96 // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP1]](%c2_0) 97 // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP0]](%2) 98 // UNROLL-FULL-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> index 99 affine.for %j = 2 to 6 step 2 { 100 %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } : 101 (index) -> (index) 102 %y = "addi32"(%x, %x) : (index, index) -> index 103 } 104 // UNROLL-FULL: %5 = affine.apply [[$MAP0]](%c2) 105 // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index 106 // UNROLL-FULL-NEXT: %7 = affine.apply [[$MAP1]](%c2) 107 // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP0]](%7) 108 // UNROLL-FULL-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> index 109 // UNROLL-FULL-NEXT: %10 = affine.apply [[$MAP3]](%c2) 110 // UNROLL-FULL-NEXT: %11 = affine.apply [[$MAP0]](%10) 111 // UNROLL-FULL-NEXT: %12 = "addi32"(%11, %11) : (index, index) -> index 112 affine.for %k = 2 to 7 step 2 { 113 %z = "affine.apply" (%k) { map = affine_map<(d0) -> (d0 + 1)> } : 114 (index) -> (index) 115 %w = "addi32"(%z, %z) : (index, index) -> index 116 } 117 } // UNROLL-FULL: } 118 return // UNROLL-FULL: return 119} // UNROLL-FULL } 120 121// UNROLL-FULL-LABEL: func @loop_nest_multiple_results() { 122func.func @loop_nest_multiple_results() { 123 // UNROLL-FULL: %c0 = arith.constant 0 : index 124 // UNROLL-FULL-NEXT: affine.for %arg0 = 0 to 100 { 125 affine.for %i = 0 to 100 { 126 // UNROLL-FULL: %0 = affine.apply [[$MAP4]](%arg0, %c0) 127 // UNROLL-FULL-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index 128 // UNROLL-FULL-NEXT: %2 = affine.apply #map{{.*}}(%arg0, %c0) 129 // UNROLL-FULL-NEXT: %3:2 = "fma"(%2, %0, %0) : (index, index, index) -> (index, index) 130 // UNROLL-FULL-NEXT: %4 = affine.apply #map{{.*}}(%c0) 131 // UNROLL-FULL-NEXT: %5 = affine.apply #map{{.*}}(%arg0, %4) 132 // UNROLL-FULL-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> index 133 // UNROLL-FULL-NEXT: %7 = affine.apply #map{{.*}}(%arg0, %4) 134 // UNROLL-FULL-NEXT: %8:2 = "fma"(%7, %5, %5) : (index, index, index) -> (index, index) 135 affine.for %j = 0 to 2 step 1 { 136 %x = affine.apply affine_map<(d0, d1) -> (d0 + 1)> (%i, %j) 137 %y = "addi32"(%x, %x) : (index, index) -> index 138 %z = affine.apply affine_map<(d0, d1) -> (d0 + 3)> (%i, %j) 139 %w:2 = "fma"(%z, %x, %x) : (index, index, index) -> (index, index) 140 } 141 } // UNROLL-FULL: } 142 return // UNROLL-FULL: return 143} // UNROLL-FULL } 144 145 146// Imperfect loop nest. Unrolling innermost here yields a perfect nest. 147// UNROLL-FULL-LABEL: func @loop_nest_seq_imperfect(%arg0: memref<128x128xf32>) { 148func.func @loop_nest_seq_imperfect(%a : memref<128x128xf32>) { 149 // UNROLL-FULL: %c0 = arith.constant 0 : index 150 // UNROLL-FULL-NEXT: %c128 = arith.constant 128 : index 151 %c128 = arith.constant 128 : index 152 // UNROLL-FULL: affine.for %arg1 = 0 to 100 { 153 affine.for %i = 0 to 100 { 154 // UNROLL-FULL: %0 = "vld"(%arg1) : (index) -> i32 155 %ld = "vld"(%i) : (index) -> i32 156 // UNROLL-FULL: %1 = affine.apply [[$MAP0]](%c0) 157 // UNROLL-FULL-NEXT: %2 = "vmulf"(%c0, %1) : (index, index) -> index 158 // UNROLL-FULL-NEXT: %3 = "vaddf"(%2, %2) : (index, index) -> index 159 // UNROLL-FULL-NEXT: %4 = affine.apply [[$MAP0]](%c0) 160 // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP0]](%4) 161 // UNROLL-FULL-NEXT: %6 = "vmulf"(%4, %5) : (index, index) -> index 162 // UNROLL-FULL-NEXT: %7 = "vaddf"(%6, %6) : (index, index) -> index 163 // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP1]](%c0) 164 // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%8) 165 // UNROLL-FULL-NEXT: %10 = "vmulf"(%8, %9) : (index, index) -> index 166 // UNROLL-FULL-NEXT: %11 = "vaddf"(%10, %10) : (index, index) -> index 167 // UNROLL-FULL-NEXT: %12 = affine.apply [[$MAP2]](%c0) 168 // UNROLL-FULL-NEXT: %13 = affine.apply [[$MAP0]](%12) 169 // UNROLL-FULL-NEXT: %14 = "vmulf"(%12, %13) : (index, index) -> index 170 // UNROLL-FULL-NEXT: %15 = "vaddf"(%14, %14) : (index, index) -> index 171 affine.for %j = 0 to 4 { 172 %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } : 173 (index) -> (index) 174 %y = "vmulf"(%j, %x) : (index, index) -> index 175 %z = "vaddf"(%y, %y) : (index, index) -> index 176 } 177 // UNROLL-FULL: %16 = "scale"(%c128, %arg1) : (index, index) -> index 178 %addr = "scale"(%c128, %i) : (index, index) -> index 179 // UNROLL-FULL: "vst"(%16, %arg1) : (index, index) -> () 180 "vst"(%addr, %i) : (index, index) -> () 181 } // UNROLL-FULL } 182 return // UNROLL-FULL: return 183} 184 185// UNROLL-FULL-LABEL: func @loop_nest_seq_multiple() { 186func.func @loop_nest_seq_multiple() { 187 // UNROLL-FULL: c0 = arith.constant 0 : index 188 // UNROLL-FULL-NEXT: %c0_0 = arith.constant 0 : index 189 // UNROLL-FULL-NEXT: %0 = affine.apply [[$MAP0]](%c0_0) 190 // UNROLL-FULL-NEXT: "mul"(%0, %0) : (index, index) -> () 191 // UNROLL-FULL-NEXT: %1 = affine.apply [[$MAP0]](%c0_0) 192 // UNROLL-FULL-NEXT: %2 = affine.apply [[$MAP0]](%1) 193 // UNROLL-FULL-NEXT: "mul"(%2, %2) : (index, index) -> () 194 // UNROLL-FULL-NEXT: %3 = affine.apply [[$MAP1]](%c0_0) 195 // UNROLL-FULL-NEXT: %4 = affine.apply [[$MAP0]](%3) 196 // UNROLL-FULL-NEXT: "mul"(%4, %4) : (index, index) -> () 197 // UNROLL-FULL-NEXT: %5 = affine.apply [[$MAP2]](%c0_0) 198 // UNROLL-FULL-NEXT: %6 = affine.apply [[$MAP0]](%5) 199 // UNROLL-FULL-NEXT: "mul"(%6, %6) : (index, index) -> () 200 affine.for %j = 0 to 4 { 201 %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } : 202 (index) -> (index) 203 "mul"(%x, %x) : (index, index) -> () 204 } 205 206 // UNROLL-FULL: %c99 = arith.constant 99 : index 207 %k = arith.constant 99 : index 208 // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 { 209 affine.for %m = 0 to 100 step 2 { 210 // UNROLL-FULL: %7 = affine.apply [[$MAP0]](%c0) 211 // UNROLL-FULL-NEXT: %8 = affine.apply [[$MAP6]](%c0)[%c99] 212 // UNROLL-FULL-NEXT: %9 = affine.apply [[$MAP0]](%c0) 213 // UNROLL-FULL-NEXT: %10 = affine.apply [[$MAP0]](%9) 214 // UNROLL-FULL-NEXT: %11 = affine.apply [[$MAP6]](%9)[%c99] 215 // UNROLL-FULL-NEXT: %12 = affine.apply [[$MAP1]](%c0) 216 // UNROLL-FULL-NEXT: %13 = affine.apply [[$MAP0]](%12) 217 // UNROLL-FULL-NEXT: %14 = affine.apply [[$MAP6]](%12)[%c99] 218 // UNROLL-FULL-NEXT: %15 = affine.apply [[$MAP2]](%c0) 219 // UNROLL-FULL-NEXT: %16 = affine.apply [[$MAP0]](%15) 220 // UNROLL-FULL-NEXT: %17 = affine.apply [[$MAP6]](%15)[%c99] 221 affine.for %n = 0 to 4 { 222 %y = "affine.apply" (%n) { map = affine_map<(d0) -> (d0 + 1)> } : 223 (index) -> (index) 224 %z = "affine.apply" (%n, %k) { map = affine_map<(d0) [s0] -> (d0 + s0 + 1)> } : 225 (index, index) -> (index) 226 } // UNROLL-FULL } 227 } // UNROLL-FULL } 228 return // UNROLL-FULL: return 229} // UNROLL-FULL } 230 231// UNROLL-FULL-LABEL: func @loop_nest_unroll_full() { 232func.func @loop_nest_unroll_full() { 233 // UNROLL-FULL-NEXT: %0 = "foo"() : () -> i32 234 // UNROLL-FULL-NEXT: %1 = "bar"() : () -> i32 235 // UNROLL-FULL-NEXT: return 236 affine.for %i = 0 to 1 { 237 %x = "foo"() : () -> i32 238 %y = "bar"() : () -> i32 239 } 240 return 241} // UNROLL-FULL } 242 243// SHORT-LABEL: func @loop_nest_outer_unroll() { 244func.func @loop_nest_outer_unroll() { 245 // SHORT: affine.for %arg0 = 0 to 4 { 246 // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0) 247 // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index 248 // SHORT-NEXT: } 249 // SHORT-NEXT: affine.for %arg0 = 0 to 4 { 250 // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0) 251 // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index 252 // SHORT-NEXT: } 253 affine.for %i = 0 to 2 { 254 affine.for %j = 0 to 4 { 255 %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } : 256 (index) -> (index) 257 %y = "addi32"(%x, %x) : (index, index) -> index 258 } 259 } 260 return // SHORT: return 261} // SHORT } 262 263// We are doing a minimal FileCheck here. We just need this test case to 264// successfully run. Both %x and %y will get unrolled here as the min trip 265// count threshold set to 2. 266// SHORT-LABEL: func @loop_nest_seq_long() -> i32 { 267func.func @loop_nest_seq_long() -> i32 { 268 %A = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 269 %B = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 270 %C = memref.alloc() : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 271 272 %zero = arith.constant 0 : i32 273 %one = arith.constant 1 : i32 274 %two = arith.constant 2 : i32 275 276 %zero_idx = arith.constant 0 : index 277 278 // CHECK: affine.for %arg0 = 0 to 512 279 affine.for %n0 = 0 to 512 { 280 // CHECK: affine.for %arg1 = 0 to 8 281 affine.for %n1 = 0 to 8 { 282 memref.store %one, %A[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 283 memref.store %two, %B[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 284 memref.store %zero, %C[%n0, %n1] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 285 } 286 } 287 288 affine.for %x = 0 to 2 { 289 affine.for %y = 0 to 2 { 290 // CHECK: affine.for 291 affine.for %arg2 = 0 to 8 { 292 // CHECK-NOT: affine.for 293 // CHECK: %{{[0-9]+}} = affine.apply 294 %b2 = "affine.apply" (%y, %arg2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index 295 %z = memref.load %B[%x, %b2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 296 "op1"(%z) : (i32) -> () 297 } 298 affine.for %j1 = 0 to 8 { 299 affine.for %j2 = 0 to 8 { 300 %a2 = "affine.apply" (%y, %j2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index 301 %v203 = memref.load %A[%j1, %a2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 302 "op2"(%v203) : (i32) -> () 303 } 304 affine.for %k2 = 0 to 8 { 305 %s0 = "op3"() : () -> i32 306 %c2 = "affine.apply" (%x, %k2) {map = affine_map<(d0, d1) -> (16*d0 + d1)>} : (index, index) -> index 307 %s1 = memref.load %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 308 %s2 = "addi32"(%s0, %s1) : (i32, i32) -> i32 309 memref.store %s2, %C[%j1, %c2] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 310 } 311 } 312 "op4"() : () -> () 313 } 314 } 315 %ret = memref.load %C[%zero_idx, %zero_idx] : memref<512 x 512 x i32, affine_map<(d0, d1) -> (d0, d1)>, 2> 316 return %ret : i32 317} 318 319// UNROLL-BY-4-LABEL: func @unroll_unit_stride_no_cleanup() { 320func.func @unroll_unit_stride_no_cleanup() { 321 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 { 322 affine.for %i = 0 to 100 { 323 // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 { 324 // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32 325 // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32 326 // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]]) 327 // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32 328 // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32 329 // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]]) 330 // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32 331 // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32 332 // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]]) 333 // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32 334 // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32 335 // UNROLL-BY-4-NEXT: } 336 affine.for %j = 0 to 8 { 337 %x = "addi32"(%j, %j) : (index, index) -> i32 338 %y = "addi32"(%x, %x) : (i32, i32) -> i32 339 } 340 // empty loop 341 // UNROLL-BY-4: affine.for %arg1 = 0 to 8 { 342 affine.for %k = 0 to 8 { 343 } 344 } 345 return 346} 347 348// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() { 349func.func @unroll_unit_stride_cleanup() { 350 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 { 351 affine.for %i = 0 to 100 { 352 // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 { 353 // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32 354 // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32 355 // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]]) 356 // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32 357 // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32 358 // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]]) 359 // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32 360 // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32 361 // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]]) 362 // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32 363 // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32 364 // UNROLL-BY-4-NEXT: } 365 // UNROLL-BY-4-NEXT: for [[L2:%arg[0-9]+]] = 8 to 10 { 366 // UNROLL-BY-4-NEXT: %0 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32 367 // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32 368 // UNROLL-BY-4-NEXT: } 369 affine.for %j = 0 to 10 { 370 %x = "addi32"(%j, %j) : (index, index) -> i32 371 %y = "addi32"(%x, %x) : (i32, i32) -> i32 372 } 373 } 374 return 375} 376 377// UNROLL-BY-4-LABEL: func @unroll_non_unit_stride_cleanup() { 378func.func @unroll_non_unit_stride_cleanup() { 379 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 { 380 affine.for %i = 0 to 100 { 381 // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 2 to 42 step 20 { 382 // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32 383 // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32 384 // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]]) 385 // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32 386 // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32 387 // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]]) 388 // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32 389 // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32 390 // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]]) 391 // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32 392 // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32 393 // UNROLL-BY-4-NEXT: } 394 // UNROLL-BY-4-NEXT: for [[L2:%arg[0-9]+]] = 42 to 48 step 5 { 395 // UNROLL-BY-4-NEXT: %0 = "addi32"([[L2]], [[L2]]) : (index, index) -> i32 396 // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32 397 // UNROLL-BY-4-NEXT: } 398 affine.for %j = 2 to 48 step 5 { 399 %x = "addi32"(%j, %j) : (index, index) -> i32 400 %y = "addi32"(%x, %x) : (i32, i32) -> i32 401 } 402 } 403 return 404} 405 406// Both the unrolled loop and the cleanup loop are single iteration loops. 407// UNROLL-BY-4-LABEL: func @loop_nest_single_iteration_after_unroll 408func.func @loop_nest_single_iteration_after_unroll(%N: index) { 409 // UNROLL-BY-4: %c0 = arith.constant 0 : index 410 // UNROLL-BY-4: %c4 = arith.constant 4 : index 411 // UNROLL-BY-4: affine.for %arg1 = 0 to %arg0 { 412 affine.for %i = 0 to %N { 413 // UNROLL-BY-4: %0 = "addi32"(%c0, %c0) : (index, index) -> i32 414 // UNROLL-BY-4-NEXT: %1 = affine.apply [[$MAP0]](%c0) 415 // UNROLL-BY-4-NEXT: %2 = "addi32"(%1, %1) : (index, index) -> i32 416 // UNROLL-BY-4-NEXT: %3 = affine.apply [[$MAP1]](%c0) 417 // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (index, index) -> i32 418 // UNROLL-BY-4-NEXT: %5 = affine.apply [[$MAP2]](%c0) 419 // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32 420 // UNROLL-BY-4-NEXT: %7 = "addi32"(%c4, %c4) : (index, index) -> i32 421 // UNROLL-BY-4-NOT: for 422 affine.for %j = 0 to 5 { 423 %x = "addi32"(%j, %j) : (index, index) -> i32 424 } // UNROLL-BY-4-NOT: } 425 } // UNROLL-BY-4: } 426 return 427} 428 429// Test cases with loop bound operands. 430 431// No cleanup will be generated here. 432// UNROLL-BY-4-LABEL: func @loop_nest_operand1() { 433func.func @loop_nest_operand1() { 434// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 { 435// UNROLL-BY-4-NEXT: affine.for %arg1 = 0 to #map{{[0-9]*}}(%arg0) step 4 436// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32 437// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32 438// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32 439// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32 440// UNROLL-BY-4-NEXT: } 441// UNROLL-BY-4-NEXT: } 442// UNROLL-BY-4-NEXT: return 443 affine.for %i = 0 to 100 step 2 { 444 affine.for %j = 0 to affine_map<(d0) -> (d0 - d0 mod 4)> (%i) { 445 %x = "foo"() : () -> i32 446 } 447 } 448 return 449} 450 451// No cleanup will be generated here. 452// UNROLL-BY-4-LABEL: func @loop_nest_operand2() { 453func.func @loop_nest_operand2() { 454// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 { 455// UNROLL-BY-4-NEXT: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 { 456// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32 457// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32 458// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32 459// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32 460// UNROLL-BY-4-NEXT: } 461// UNROLL-BY-4-NEXT: } 462// UNROLL-BY-4-NEXT: return 463 affine.for %i = 0 to 100 step 2 { 464 affine.for %j = affine_map<(d0) -> (d0)> (%i) to affine_map<(d0) -> (5*d0 + 4)> (%i) { 465 %x = "foo"() : () -> i32 466 } 467 } 468 return 469} 470 471// UNROLL-BY-4-LABEL: func @floordiv_mod_ub 472func.func @floordiv_mod_ub(%M : index, %N : index) { 473 affine.for %i = 0 to %N step 4 { 474 // A cleanup should be generated here. 475 affine.for %j = 0 to min affine_map<(d0)[s0] -> ((16 * d0) floordiv (4 * s0))>(%i)[%N] { 476 "test.foo"() : () -> () 477 } 478 } 479 // UNROLL-BY-4-NEXT: affine.for 480 // UNROLL-BY-4-NEXT: affine.for %{{.*}} = 0 to {{.*}} step 4 481 // UNROLL-BY-4: affine.for 482 affine.for %i = 0 to %N step 4 { 483 // No cleanup needed here. 484 affine.for %j = 0 to min affine_map<(d0)[s0] -> ((16 * d0) mod (4 * s0))>(%i)[%N] { 485 "test.foo"() : () -> () 486 } 487 } 488 // UNROLL-BY-4: affine.for 489 // UNROLL-BY-4-NEXT: affine.for %{{.*}} = 0 to {{.*}} step 4 490 // UNROLL-BY-4-NOT: affine.for 491 // UNROLL-BY-4: return 492 return 493} 494 495// Difference between loop bounds is constant, but not a multiple of unroll 496// factor. The cleanup loop happens to be a single iteration one and is promoted. 497// UNROLL-BY-4-LABEL: func @loop_nest_operand3() { 498func.func @loop_nest_operand3() { 499 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 { 500 affine.for %i = 0 to 100 step 2 { 501 // UNROLL-BY-4: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 { 502 // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32 503 // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32 504 // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32 505 // UNROLL-BY-4-NEXT: %4 = "foo"() : () -> i32 506 // UNROLL-BY-4-NEXT: } 507 // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32 508 affine.for %j = affine_map<(d0) -> (d0)> (%i) to affine_map<(d0) -> (d0 + 9)> (%i) { 509 %x = "foo"() : () -> i32 510 } 511 } // UNROLL-BY-4: } 512 return 513} 514 515// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound(%arg0: index) { 516func.func @loop_nest_symbolic_bound(%N : index) { 517 // UNROLL-BY-4: affine.for %arg1 = 0 to 100 { 518 affine.for %i = 0 to 100 { 519 // UNROLL-BY-4: affine.for %arg2 = 0 to #map{{[0-9]*}}()[%arg0] step 4 { 520 // UNROLL-BY-4: %0 = "foo"() : () -> i32 521 // UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32 522 // UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32 523 // UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32 524 // UNROLL-BY-4-NEXT: } 525 // A cleanup loop will be generated here. 526 // UNROLL-BY-4-NEXT: affine.for %arg2 = #map{{[0-9]*}}()[%arg0] to %arg0 { 527 // UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32 528 // UNROLL-BY-4-NEXT: } 529 affine.for %j = 0 to %N { 530 %x = "foo"() : () -> i32 531 } 532 } 533 return 534} 535 536// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound_with_step 537// UNROLL-BY-4-SAME: %[[N:.*]]: index 538func.func @loop_nest_symbolic_bound_with_step(%N : index) { 539 // UNROLL-BY-4: affine.for %arg1 = 0 to 100 { 540 affine.for %i = 0 to 100 { 541 affine.for %j = 0 to %N step 3 { 542 %x = "foo"() : () -> i32 543 } 544// UNROLL-BY-4: affine.for %{{.*}} = 0 to #map{{[0-9]*}}()[%[[N]]] step 12 { 545// UNROLL-BY-4: "foo"() 546// UNROLL-BY-4-NEXT: "foo"() 547// UNROLL-BY-4-NEXT: "foo"() 548// UNROLL-BY-4-NEXT: "foo"() 549// UNROLL-BY-4-NEXT: } 550// A cleanup loop will be be generated here. 551// UNROLL-BY-4-NEXT: affine.for %{{.*}} = #map{{[0-9]*}}()[%[[N]]] to %[[N]] step 3 { 552// UNROLL-BY-4-NEXT: "foo"() 553// UNROLL-BY-4-NEXT: } 554 } 555 return 556} 557 558// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound 559func.func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) { 560 affine.for %i = %M to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%N, %K] { 561 "test.foo"() : () -> () 562 } 563 return 564} 565// No unrolling here. 566// UNROLL-BY-4: affine.for %{{.*}} = %{{.*}} to min #map{{.*}}()[%{{.*}}, %{{.*}}] { 567// UNROLL-BY-4-NEXT: "test.foo"() : () -> () 568// UNROLL-BY-4-NEXT: } 569// UNROLL-BY-4-NEXT: return 570 571// The trip count here is a multiple of four, but this can be inferred only 572// through composition. Check for no cleanup scf. 573// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound 574func.func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) { 575 %T = affine.apply affine_map<(d0) -> (4*d0 + 1)>(%M) 576 %K = affine.apply affine_map<(d0) -> (d0 - 1)> (%T) 577 affine.for %i = 0 to min affine_map<(d0, d1) -> (4 * d0, d1, 1024)>(%N, %K) { 578 "foo"() : () -> () 579 } 580 return 581} 582// UNROLL-BY-4: affine.for %arg2 = 0 to min 583// UNROLL-BY-4-NOT: for 584// UNROLL-BY-4: return 585 586// UNROLL-BY-4-LABEL: func @multi_upper_bound 587func.func @multi_upper_bound(%arg0: index) { 588 affine.for %i = 0 to min affine_map<()[s0] -> (8 * s0, 12 * s0)>()[%arg0] { 589 "test.foo"() : () -> () 590 } 591 // No unrolling possible here. 592 // UNROLL-BY-4: affine.for %{{.*}} = 0 to min #map{{.*}}()[%{{.*}}] 593 return 594} 595 596// UNROLL-BY-4-LABEL: func @multi_lower_bound 597func.func @multi_lower_bound(%arg0: index) { 598 affine.for %i = max affine_map<()[s0] -> (8 * s0, 12 * s0)>()[%arg0] to 100 { 599 "test.foo"() : () -> () 600 } 601 // TODO: Extend getTripCountMapAndOperands to handle multi-result lower bound 602 // maps. 603 // UNROLL-BY-4: affine.for %{{.*}} = max #map{{.*}}()[%{{.*}}] to 100 604 // UNROLL-BY-4-NOT: affine.for 605 return 606} 607 608// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt 609func.func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) { 610 %K = affine.apply affine_map<(d0) -> (4*d0)> (%M) 611 affine.for %i = 0 to min affine_map<()[s0, s1] -> (4 * s0, s1, 1024)>()[%N, %K] { 612 "foo"() : () -> () 613 } 614 // UNROLL-BY-4: affine.for %arg2 = 0 to min 615 // UNROLL-BY-4-NEXT: "foo" 616 // UNROLL-BY-4-NEXT: "foo" 617 // UNROLL-BY-4-NEXT: "foo" 618 // UNROLL-BY-4-NEXT: "foo" 619 // UNROLL-BY-4-NOT: for 620 // UNROLL-BY-4: return 621 return 622} 623 624// UNROLL-BY-1-LABEL: func @unroll_by_one_should_promote_single_iteration_loop() 625func.func @unroll_by_one_should_promote_single_iteration_loop() { 626 affine.for %i = 0 to 1 { 627 %x = "foo"(%i) : (index) -> i32 628 } 629 return 630// UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index 631// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32 632// UNROLL-BY-1-NEXT: return 633} 634 635// Test unrolling with affine.for iter_args. 636 637// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup 638func.func @loop_unroll_with_iter_args_and_cleanup(%arg0 : f32, %arg1 : f32, %n : index) -> (f32,f32) { 639 %cf1 = arith.constant 1.0 : f32 640 %cf2 = arith.constant 2.0 : f32 641 %sum:2 = affine.for %iv = 0 to 10 iter_args(%i0 = %arg0, %i1 = %arg1) -> (f32, f32) { 642 %sum0 = arith.addf %i0, %cf1 : f32 643 %sum1 = arith.addf %i1, %cf2 : f32 644 affine.yield %sum0, %sum1 : f32, f32 645 } 646 return %sum#0, %sum#1 : f32, f32 647 // UNROLL-BY-4: %[[SUM:.*]]:2 = affine.for {{.*}} = 0 to 8 step 4 iter_args 648 // UNROLL-BY-4-NEXT: arith.addf 649 // UNROLL-BY-4-NEXT: arith.addf 650 // UNROLL-BY-4-NEXT: arith.addf 651 // UNROLL-BY-4-NEXT: arith.addf 652 // UNROLL-BY-4-NEXT: arith.addf 653 // UNROLL-BY-4-NEXT: arith.addf 654 // UNROLL-BY-4-NEXT: %[[Y1:.*]] = arith.addf 655 // UNROLL-BY-4-NEXT: %[[Y2:.*]] = arith.addf 656 // UNROLL-BY-4-NEXT: affine.yield %[[Y1]], %[[Y2]] 657 // UNROLL-BY-4-NEXT: } 658 // UNROLL-BY-4-NEXT: %[[SUM1:.*]]:2 = affine.for {{.*}} = 8 to 10 iter_args(%[[V1:.*]] = %[[SUM]]#0, %[[V2:.*]] = %[[SUM]]#1) 659 // UNROLL-BY-4: } 660 // UNROLL-BY-4-NEXT: return %[[SUM1]]#0, %[[SUM1]]#1 661} 662 663// The epilogue being a single iteration loop gets promoted here. 664 665// UNROLL-BY-4-LABEL: unroll_with_iter_args_and_promotion 666func.func @unroll_with_iter_args_and_promotion(%arg0 : f32, %arg1 : f32) -> f32 { 667 %from = arith.constant 0 : index 668 %to = arith.constant 10 : index 669 %step = arith.constant 1 : index 670 %sum = affine.for %iv = 0 to 9 iter_args(%sum_iter = %arg0) -> (f32) { 671 %next = arith.addf %sum_iter, %arg1 : f32 672 affine.yield %next : f32 673 } 674 // UNROLL-BY-4: %[[SUM:.*]] = affine.for %{{.*}} = 0 to 8 step 4 iter_args(%[[V0:.*]] = 675 // UNROLL-BY-4-NEXT: %[[V1:.*]] = arith.addf %[[V0]] 676 // UNROLL-BY-4-NEXT: %[[V2:.*]] = arith.addf %[[V1]] 677 // UNROLL-BY-4-NEXT: %[[V3:.*]] = arith.addf %[[V2]] 678 // UNROLL-BY-4-NEXT: %[[V4:.*]] = arith.addf %[[V3]] 679 // UNROLL-BY-4-NEXT: affine.yield %[[V4]] 680 // UNROLL-BY-4-NEXT: } 681 // UNROLL-BY-4-NEXT: %[[RES:.*]] = arith.addf %[[SUM]], 682 // UNROLL-BY-4-NEXT: return %[[RES]] 683 return %sum : f32 684} 685 686// UNROLL-FULL: func @unroll_zero_trip_count_case 687func.func @unroll_zero_trip_count_case() { 688 // CHECK-NEXT: affine.for %{{.*}} = 0 to 0 689 affine.for %i = 0 to 0 { 690 } 691 return 692} 693 694// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_larger_unroll_factor() 695func.func @unroll_cleanup_loop_with_larger_unroll_factor() { 696 affine.for %i = 0 to 3 { 697 %x = "foo"(%i) : (index) -> i32 698 } 699 return 700// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index 701// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32 702// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}} 703// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32 704// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}} 705// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32 706// UNROLL-CLEANUP-LOOP-NEXT: return 707} 708 709// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor() 710func.func @unroll_cleanup_loop_with_smaller_unroll_factor() { 711 affine.for %i = 0 to 7 { 712 %x = "foo"(%i) : (index) -> i32 713 } 714 return 715// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index 716// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32 717// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}} 718// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32 719// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}} 720// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32 721// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}} 722// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32 723// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}} 724// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32 725// UNROLL-CLEANUP-LOOP-NEXT: %[[V5:.*]] = affine.apply {{.*}} 726// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V5]]) : (index) -> i32 727// UNROLL-CLEANUP-LOOP-NEXT: %[[V6:.*]] = affine.apply {{.*}} 728// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V6]]) : (index) -> i32 729// UNROLL-CLEANUP-LOOP-NEXT: return 730} 731 732// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_identical_unroll_factor() 733func.func @unroll_cleanup_loop_with_identical_unroll_factor() { 734 affine.for %i = 0 to 5 { 735 %x = "foo"(%i) : (index) -> i32 736 } 737 return 738// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index 739// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32 740// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}} 741// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32 742// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}} 743// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32 744// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}} 745// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32 746// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}} 747// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32 748// UNROLL-CLEANUP-LOOP-NEXT: return 749} 750 751// UNROLL-BY-4-LABEL: func @known_multiple_ceildiv 752func.func @known_multiple_ceildiv(%N: index, %S: index) { 753 %cst = arith.constant 0.0 : f32 754 %m = memref.alloc(%S) : memref<?xf32> 755 // This exercises affine expr getLargestKnownDivisor for the ceildiv case. 756 affine.for %i = 0 to affine_map<(d0) -> (32 * d0 + 64)>(%N) step 8 { 757 affine.store %cst, %m[%i] : memref<?xf32> 758 } 759 // UNROLL-BY-4: affine.for %{{.*}} = 0 to {{.*}} step 32 760 // UNROLL-BY-4-NOT: affine.for 761 762 // This exercises affine expr getLargestKnownDivisor for floordiv. 763 affine.for %i = 0 to affine_map<(d0) -> ((32 * d0 + 64) floordiv 8)>(%N) { 764 affine.store %cst, %m[%i] : memref<?xf32> 765 } 766 // UNROLL-BY-4: affine.for %{{.*}} = 0 to {{.*}} step 4 767 // UNROLL-BY-4-NOT: affine.for 768 // UNROLL-BY-4: return 769 return 770} 771