1// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s 2// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s 3 4// CHECK-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> 5// CHECK-DAG: [[$MAP_DIV_OFFSET:#map[0-9]*]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)> 6// CHECK-DAG: [[$MAP_SYM_UB:#map[0-9]*]] = affine_map<()[s0, s1] -> (s0, s1, 1024)> 7 8// UJAM-FOUR-DAG: [[$UBMAP:#map[0-9]*]] = affine_map<()[s0] -> (s0 + 8)> 9// UJAM-FOUR-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> 10// UJAM-FOUR-DAG: [[$MAP_PLUS_2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> 11// UJAM-FOUR-DAG: [[$MAP_PLUS_3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 3)> 12 13// CHECK-LABEL: func @unroll_jam_imperfect_nest() { 14func.func @unroll_jam_imperfect_nest() { 15 affine.for %i = 0 to 101 { 16 %x = "addi32"(%i, %i) : (index, index) -> i32 17 affine.for %j = 0 to 17 { 18 %y = "addi32"(%i, %i) : (index, index) -> i32 19 %z = "addi32"(%y, %y) : (i32, i32) -> i32 20 } 21 %w = "foo"(%i, %x) : (index, i32) -> i32 22 } 23 return 24} 25// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 26// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 27// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 28// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 29// CHECK-NEXT: affine.for %{{.*}} = 0 to 17 { 30// CHECK-NEXT: [[RES3:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 31// CHECK-NEXT: "addi32"([[RES3]], [[RES3]]) : (i32, i32) -> i32 32// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 33// CHECK-NEXT: [[RES4:%[0-9]+]] = "addi32"([[INC1]], [[INC1]]) 34// CHECK-NEXT: "addi32"([[RES4]], [[RES4]]) : (i32, i32) -> i32 35// CHECK-NEXT: } 36// CHECK: "foo"([[IV0]], [[RES1]]) 37// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 38// CHECK-NEXT: "foo"({{.*}}, [[RES2]]) 39// CHECK: } 40// Cleanup loop (single iteration). 41// CHECK: "addi32"(%c100, %c100) 42// CHECK-NEXT: affine.for [[IV0]] = 0 to 17 { 43// CHECK-NEXT: [[RESC:%[0-9]+]] = "addi32"(%c100, %c100) 44// CHECK-NEXT: "addi32"([[RESC]], [[RESC]]) : (i32, i32) -> i32 45// CHECK-NEXT: } 46// CHECK-NEXT: "foo"(%c100, %{{.*}}) 47// CHECK-NEXT: return 48 49// CHECK-LABEL: func @loop_nest_unknown_count_1 50// CHECK-SAME: [[N:arg[0-9]+]]: index 51func.func @loop_nest_unknown_count_1(%N : index) { 52 // CHECK-NEXT: affine.for %{{.*}} = 1 to [[$MAP_DIV_OFFSET]]()[%[[N]]] step 2 { 53 // CHECK-NEXT: affine.for %{{.*}} = 1 to 100 { 54 // CHECK-NEXT: "foo"() : () -> i32 55 // CHECK-NEXT: "foo"() : () -> i32 56 // CHECK-NEXT: } 57 // CHECK-NEXT: } 58 // A cleanup loop should be generated here. 59 // CHECK-NEXT: affine.for %{{.*}} = [[$MAP_DIV_OFFSET]]()[%[[N]]] to %[[N]] { 60 // CHECK-NEXT: affine.for %{{.*}} = 1 to 100 { 61 // CHECK-NEXT: "foo"() : () -> i32 62 // CHECK-NEXT: } 63 // CHECK-NEXT: } 64 affine.for %i = 1 to %N { 65 affine.for %j = 1 to 100 { 66 %x = "foo"() : () -> i32 67 } 68 } 69 return 70} 71 72// UJAM-FOUR-LABEL: func @loop_nest_unknown_count_2 73// UJAM-FOUR-SAME: %[[N:arg[0-9]+]]: index 74func.func @loop_nest_unknown_count_2(%N : index) { 75 // UJAM-FOUR-NEXT: affine.for [[IV0:%arg[0-9]+]] = %[[N]] to [[$UBMAP]]()[%[[N]]] step 4 { 76 // UJAM-FOUR-NEXT: affine.for [[IV1:%arg[0-9]+]] = 1 to 100 { 77 // UJAM-FOUR-NEXT: "foo"([[IV0]]) 78 // UJAM-FOUR-NEXT: [[IV_PLUS_1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 79 // UJAM-FOUR-NEXT: "foo"([[IV_PLUS_1]]) 80 // UJAM-FOUR-NEXT: [[IV_PLUS_2:%[0-9]+]] = affine.apply [[$MAP_PLUS_2]]([[IV0]]) 81 // UJAM-FOUR-NEXT: "foo"([[IV_PLUS_2]]) 82 // UJAM-FOUR-NEXT: [[IV_PLUS_3:%[0-9]+]] = affine.apply [[$MAP_PLUS_3]]([[IV0]]) 83 // UJAM-FOUR-NEXT: "foo"([[IV_PLUS_3]]) 84 // UJAM-FOUR-NEXT: } 85 // UJAM-FOUR-NEXT: } 86 // The cleanup loop is a single iteration one and is promoted. 87 // UJAM-FOUR-NEXT: [[RES:%[0-9]+]] = affine.apply [[$UBMAP]]()[%[[N]]] 88 // UJAM-FOUR-NEXT: affine.for [[IV0]] = 1 to 100 { 89 // UJAM-FOUR-NEXT: "foo"([[RES]]) 90 // UJAM-FOUR-NEXT: } 91 affine.for %i = %N to affine_map<()[s0] -> (s0+9)> ()[%N] { 92 affine.for %j = 1 to 100 { 93 "foo"(%i) : (index) -> () 94 } 95 } 96 return 97} 98 99// CHECK-LABEL: func @loop_nest_symbolic_and_min_upper_bound 100// CHECK-SAME: [[M:arg[0-9]+]]: index 101// CHECK-SAME: [[N:arg[0-9]+]]: index 102// CHECK-SAME: [[K:arg[0-9]+]]: index 103func.func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) { 104 affine.for %i = 0 to min affine_map<()[s0, s1] -> (s0, s1, 1024)>()[%M, %N] { 105 affine.for %j = 0 to %K { 106 "test.foo"(%i, %j) : (index, index) -> () 107 } 108 } 109 return 110} 111// No unroll-and-jam possible here as the lower bound for the cleanup loop won't 112// be representable. 113// CHECK-NEXT: affine.for [[IV0:%arg[0-9]+]] = 0 to min #map{{.*}}()[%[[M]], %[[N]]] { 114// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 0 to %[[K]] { 115// CHECK-NEXT: "test.foo"([[IV0]], [[IV1]]) 116// CHECK-NEXT: } 117// CHECK-NEXT: } 118// CHECK-NEXT: return 119 120// The inner loop trip count changes each iteration of outer loop. 121// Do no unroll-and-jam. 122// CHECK-LABEL: func @no_unroll_jam_dependent_ubound 123func.func @no_unroll_jam_dependent_ubound(%in0: memref<?xf32, 1>) { 124 affine.for %i = 0 to 100 { 125 affine.for %k = 0 to affine_map<(d0) -> (d0 + 1)>(%i) { 126 %y = "addi32"(%k, %k) : (index, index) -> i32 127 } 128 } 129 return 130} 131// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 { 132// CHECK-NEXT: affine.for [[IV1:%arg[0-9]+]] = 0 to [[$MAP_PLUS_1]]([[IV0]]) { 133// CHECK-NEXT: "addi32"([[IV1]], [[IV1]]) 134// CHECK-NEXT: } 135// CHECK-NEXT: } 136// CHECK-NEXT: return 137 138// Inner loop with one iter_arg. 139// CHECK-LABEL: func @unroll_jam_one_iter_arg 140func.func @unroll_jam_one_iter_arg() { 141 affine.for %i = 0 to 101 { 142 %cst = arith.constant 1 : i32 143 %x = "addi32"(%i, %i) : (index, index) -> i32 144 %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) { 145 %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32 146 affine.yield %y : i32 147 } 148 %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32 149 } 150 return 151} 152// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 153// CHECK-NEXT: [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 154// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 155// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 156// CHECK-NEXT: [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 157// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 158// CHECK-NEXT: [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) { 159// CHECK-NEXT: [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC1]]) 160// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 161// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]]) 162// CHECK-NEXT: affine.yield [[RES4]], [[RES5]] 163// CHECK-NEXT: } 164// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0) 165// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 166// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#1) 167// CHECK: } 168// Cleanup loop (single iteration). 169// CHECK: constant 1 : i32 170// CHECK-NEXT: "addi32"(%c100, %c100) 171// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for 172// CHECK-NEXT: [[RES7:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}) 173// CHECK-NEXT: affine.yield [[RES7]] : i32 174// CHECK-NEXT: } 175// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]) 176// CHECK-NEXT: return 177 178// Inner loop with multiple iter_args. 179// CHECK-LABEL: func @unroll_jam_iter_args 180func.func @unroll_jam_iter_args() { 181 affine.for %i = 0 to 101 { 182 %cst = arith.constant 0 : i32 183 %cst1 = arith.constant 1 : i32 184 %x = "addi32"(%i, %i) : (index, index) -> i32 185 %red:2 = affine.for %j = 0 to 17 iter_args(%acc = %cst, %acc1 = %cst1) -> (i32, i32) { 186 %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32 187 %z = "bar1"(%i, %j, %acc1) : (index, index, i32) -> i32 188 affine.yield %y, %z : i32, i32 189 } 190 %w = "foo"(%i, %x, %red#0, %red#1) : (index, i32, i32, i32) -> i32 191 } 192 return 193} 194// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 195// CHECK-NEXT: [[CONST0:%[a-zA-Z0-9_]*]] = arith.constant 0 : i32 196// CHECK-NEXT: [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 197// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 198// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 199// CHECK-NEXT: [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 0 : i32 200// CHECK-NEXT: [[CONST3:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 201// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 202// CHECK-NEXT: [[RES3:%[0-9]+]]:4 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC0:%arg[0-9]+]] = [[CONST0]], [[ACC1:%arg[0-9]+]] = [[CONST1]], 203// CHECK-SAME: [[ACC2:%arg[0-9]+]] = [[CONST2]], [[ACC3:%arg[0-9]+]] = [[CONST3]]) -> (i32, i32, i32, i32) { 204// CHECK-NEXT: [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC0]]) 205// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar1"([[IV0]], [[IV1]], [[ACC1]]) 206// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 207// CHECK-NEXT: [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]]) 208// CHECK-NEXT: [[RES7:%[0-9]+]] = "bar1"([[INC1]], [[IV1]], [[ACC3]]) 209// CHECK-NEXT: affine.yield [[RES4]], [[RES5]], [[RES6]], [[RES7]] 210// CHECK-NEXT: } 211// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0, [[RES3]]#1) 212// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 213// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#2, [[RES3]]#3) 214// CHECK: } 215// Cleanup loop (single iteration). 216// CHECK: constant 0 : i32 217// CHECK-NEXT: constant 1 : i32 218// CHECK-NEXT: "addi32"(%c100, %c100) 219// CHECK-NEXT: [[RES8:%[0-9]+]]:2 = affine.for 220// CHECK-NEXT: [[RES9:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}) 221// CHECK-NEXT: [[RES10:%[0-9]+]] = "bar1"(%c100, {{.*}}, {{.*}}) 222// CHECK-NEXT: affine.yield [[RES9]], [[RES10]] : i32, i32 223// CHECK-NEXT: } 224// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES8]]#0, [[RES8]]#1) 225// CHECK-NEXT: return 226 227// When an iter operand is a function argument, do not replace any use of the 228// operand . 229// CHECK-LABEL: func @unroll_jam_iter_args_func_arg 230// CHECK-SAME: [[INIT:%arg[0-9]+]]: i32 231func.func @unroll_jam_iter_args_func_arg(%in: i32) { 232 affine.for %i = 0 to 101 { 233 %x = "addi32"(%i, %i) : (index, index) -> i32 234 %red = affine.for %j = 0 to 17 iter_args(%acc = %in) -> (i32) { 235 %y = "bar"(%i, %j, %acc) : (index, index, i32) -> i32 236 affine.yield %y : i32 237 } 238 %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32 239 } 240 return 241} 242// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 243// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 244// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 245// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 246// CHECK-NEXT: [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[INIT]], [[ACC2:%arg[0-9]+]] = [[INIT]]) -> (i32, i32) { 247// CHECK-NEXT: [[RES4:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[ACC1]]) 248// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 249// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[ACC2]]) 250// CHECK-NEXT: affine.yield [[RES4]], [[RES5]] 251// CHECK-NEXT: } 252// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0) 253// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 254// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#1) 255// CHECK: } 256// Cleanup loop (single iteration). 257// CHECK: "addi32"(%c100, %c100) 258// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for 259// CHECK-NEXT: [[RES7:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}) 260// CHECK-NEXT: affine.yield [[RES7]] : i32 261// CHECK-NEXT: } 262// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]) 263// CHECK-NEXT: return 264 265// Nested inner loops, each with one iter_arg. The inner most loop uses its 266// outer loop's iter_arg as its iter operand. 267// CHECK-LABEL: func @unroll_jam_iter_args_nested 268func.func @unroll_jam_iter_args_nested() { 269 affine.for %i = 0 to 101 { 270 %cst = arith.constant 1 : i32 271 %x = "addi32"(%i, %i) : (index, index) -> i32 272 %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) { 273 %red1 = affine.for %k = 0 to 35 iter_args(%acc1 = %acc) -> (i32) { 274 %y = "bar"(%i, %j, %k, %acc1) : (index, index, index, i32) -> i32 275 affine.yield %y : i32 276 } 277 affine.yield %red1 : i32 278 } 279 %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32 280 } 281 return 282} 283// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 284// CHECK-NEXT: [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 285// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 286// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 287// CHECK-NEXT: [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 288// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 289// CHECK-NEXT: [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) { 290// CHECK-NEXT: [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC3:%arg[0-9]+]] = [[ACC1]], [[ACC4:%arg[0-9]+]] = [[ACC2]]) -> (i32, i32) { 291// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC3]]) 292// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 293// CHECK-NEXT: [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC4]]) 294// CHECK-NEXT: affine.yield [[RES5]], [[RES6]] 295// CHECK-NEXT: } 296// CHECK-NEXT: affine.yield [[RES4]]#0, [[RES4]]#1 297// CHECK-NEXT: } 298// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0) 299// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 300// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#1) 301// CHECK: } 302// Cleanup loop (single iteration). 303// CHECK: constant 1 : i32 304// CHECK-NEXT: "addi32"(%c100, %c100) 305// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for 306// CHECK-NEXT: [[RES7:%[0-9]+]] = affine.for 307// CHECK-NEXT: [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}}) 308// CHECK-NEXT: affine.yield [[RES8]] : i32 309// CHECK-NEXT: } 310// CHECK-NEXT: affine.yield [[RES7]] : i32 311// CHECK-NEXT: } 312// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]) 313// CHECK-NEXT: return 314 315// Nested inner loops, each with one iter_arg. One loop uses its sibling loop's 316// result as its iter operand. 317// CHECK-LABEL: func @unroll_jam_iter_args_nested_affine_for_result 318func.func @unroll_jam_iter_args_nested_affine_for_result() { 319 affine.for %i = 0 to 101 { 320 %cst = arith.constant 1 : i32 321 %x = "addi32"(%i, %i) : (index, index) -> i32 322 %red = affine.for %j = 0 to 17 iter_args(%acc = %cst) -> (i32) { 323 %red1 = affine.for %k = 0 to 35 iter_args(%acc1 = %acc) -> (i32) { 324 %y = "bar"(%i, %j, %k, %acc1) : (index, index, index, i32) -> i32 325 affine.yield %acc : i32 326 } 327 %red2 = affine.for %l = 0 to 36 iter_args(%acc2 = %red1) -> (i32) { 328 %y = "bar"(%i, %j, %l, %acc2) : (index, index, index, i32) -> i32 329 affine.yield %y : i32 330 } 331 affine.yield %red2 : i32 332 } 333 %w = "foo"(%i, %x, %red) : (index, i32, i32) -> i32 334 } 335 return 336} 337// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 338// CHECK-NEXT: [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 339// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 340// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 341// CHECK-NEXT: [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 342// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 343// CHECK-NEXT: [[RES3:%[0-9]+]]:2 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32) { 344// CHECK-NEXT: [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC3:%arg[0-9]+]] = [[ACC1]], [[ACC4:%arg[0-9]+]] = [[ACC2]]) -> (i32, i32) { 345// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC3]]) 346// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 347// CHECK-NEXT: [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC4]]) 348// CHECK-NEXT: affine.yield [[ACC1]], [[ACC2]] 349// CHECK-NEXT: } 350// CHECK-NEXT: [[RES14:%[0-9]+]]:2 = affine.for [[IV3:%arg[0-9]+]] = 0 to 36 iter_args([[ACC13:%arg[0-9]+]] = [[RES4]]#0, [[ACC14:%arg[0-9]+]] = [[RES4]]#1) -> (i32, i32) { 351// CHECK-NEXT: [[RES15:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV3]], [[ACC13]]) 352// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 353// CHECK-NEXT: [[RES16:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV3]], [[ACC14]]) 354// CHECK-NEXT: affine.yield [[RES15]], [[RES16]] 355// CHECK-NEXT: } 356// CHECK-NEXT: affine.yield [[RES14]]#0, [[RES14]]#1 357// CHECK-NEXT: } 358// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0) 359// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 360// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#1) 361// CHECK: } 362// Cleanup loop (single iteration). 363// CHECK: constant 1 : i32 364// CHECK-NEXT: "addi32"(%c100, %c100) 365// CHECK-NEXT: [[RES6:%[0-9]+]] = affine.for 366// CHECK-NEXT: [[RES7:%[0-9]+]] = affine.for 367// CHECK-NEXT: [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}}) 368// CHECK-NEXT: affine.yield 369// CHECK-NEXT: } 370// CHECK-NEXT: [[RES17:%[0-9]+]] = affine.for 371// CHECK-NEXT: [[RES18:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}}) 372// CHECK-NEXT: affine.yield [[RES18]] : i32 373// CHECK-NEXT: } 374// CHECK-NEXT: affine.yield [[RES17]] : i32 375// CHECK-NEXT: } 376// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]) 377// CHECK-NEXT: return 378 379// Nested inner loops, each with one or more iter_args. Yeild the same value 380// multiple times. 381// CHECK-LABEL: func @unroll_jam_iter_args_nested_yield 382func.func @unroll_jam_iter_args_nested_yield() { 383 affine.for %i = 0 to 101 { 384 %cst = arith.constant 1 : i32 385 %x = "addi32"(%i, %i) : (index, index) -> i32 386 %red:3 = affine.for %j = 0 to 17 iter_args(%acc = %cst, %acc1 = %cst, %acc2 = %cst) -> (i32, i32, i32) { 387 %red1 = affine.for %k = 0 to 35 iter_args(%acc3 = %acc) -> (i32) { 388 %y = "bar"(%i, %j, %k, %acc3) : (index, index, index, i32) -> i32 389 affine.yield %y : i32 390 } 391 %red2:2 = affine.for %l = 0 to 36 iter_args(%acc4 = %acc1, %acc5 = %acc2) -> (i32, i32) { 392 %y = "bar1"(%i, %j, %l, %acc4, %acc5) : (index, index, index, i32, i32) -> i32 393 affine.yield %y, %y : i32, i32 394 } 395 affine.yield %red1, %red1, %red2#1 : i32, i32, i32 396 } 397 %w = "foo"(%i, %x, %red#0, %red#2) : (index, i32, i32, i32) -> i32 398 } 399 return 400} 401// CHECK: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { 402// CHECK-NEXT: [[CONST1:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 403// CHECK-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) 404// CHECK-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 405// CHECK-NEXT: [[CONST2:%[a-zA-Z0-9_]*]] = arith.constant 1 : i32 406// CHECK-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) 407// CHECK-NEXT: [[RES3:%[0-9]+]]:6 = affine.for [[IV1:%arg[0-9]+]] = 0 to 17 iter_args([[ACC1:%arg[0-9]+]] = [[CONST1]], [[ACC2:%arg[0-9]+]] = [[CONST1]], 408// CHECK-SAME: [[ACC3:%arg[0-9]+]] = [[CONST1]], [[ACC4:%arg[0-9]+]] = [[CONST2]], [[ACC5:%arg[0-9]+]] = [[CONST2]], [[ACC6:%arg[0-9]+]] = [[CONST2]]) -> (i32, i32, i32, i32, i32, i32) { 409// CHECK-NEXT: [[RES4:%[0-9]+]]:2 = affine.for [[IV2:%arg[0-9]+]] = 0 to 35 iter_args([[ACC7:%arg[0-9]+]] = [[ACC1]], [[ACC8:%arg[0-9]+]] = [[ACC4]]) -> (i32, i32) { 410// CHECK-NEXT: [[RES5:%[0-9]+]] = "bar"([[IV0]], [[IV1]], [[IV2]], [[ACC7]]) 411// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 412// CHECK-NEXT: [[RES6:%[0-9]+]] = "bar"([[INC1]], [[IV1]], [[IV2]], [[ACC8]]) 413// CHECK-NEXT: affine.yield [[RES5]], [[RES6]] 414// CHECK-NEXT: } 415// CHECK-NEXT: [[RES14:%[0-9]+]]:4 = affine.for [[IV3:%arg[0-9]+]] = 0 to 36 iter_args([[ACC13:%arg[0-9]+]] = [[ACC2]], [[ACC14:%arg[0-9]+]] = [[ACC3]], 416// CHECK-SAME: [[ACC15:%arg[0-9]+]] = [[ACC5]], [[ACC16:%arg[0-9]+]] = [[ACC6]]) -> (i32, i32, i32, i32) { 417// CHECK-NEXT: [[RES15:%[0-9]+]] = "bar1"([[IV0]], [[IV1]], [[IV3]], [[ACC13]], [[ACC14]]) 418// CHECK-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) 419// CHECK-NEXT: [[RES16:%[0-9]+]] = "bar1"([[INC1]], [[IV1]], [[IV3]], [[ACC15]], [[ACC16]]) 420// CHECK-NEXT: affine.yield [[RES15]], [[RES15]], [[RES16]], [[RES16]] 421// CHECK-NEXT: } 422// CHECK-NEXT: affine.yield [[RES4]]#0, [[RES4]]#0, [[RES14]]#1, [[RES4]]#1, [[RES4]]#1, [[RES14]]#3 423// CHECK-NEXT: } 424// CHECK: "foo"([[IV0]], [[RES1]], [[RES3]]#0, [[RES3]]#2) 425// CHECK-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) 426// CHECK-NEXT: "foo"({{.*}}, [[RES2]], [[RES3]]#3, [[RES3]]#5) 427// CHECK: } 428// Cleanup loop (single iteration). 429// CHECK: constant 1 : i32 430// CHECK-NEXT: "addi32"(%c100, %c100) 431// CHECK-NEXT: [[RES6:%[0-9]+]]:3 = affine.for 432// CHECK-NEXT: [[RES7:%[0-9]+]] = affine.for 433// CHECK-NEXT: [[RES8:%[0-9]+]] = "bar"(%c100, {{.*}}, {{.*}}, {{.*}}) 434// CHECK-NEXT: affine.yield [[RES8]] : i32 435// CHECK-NEXT: } 436// CHECK-NEXT: [[RES17:%[0-9]+]]:2 = affine.for 437// CHECK-NEXT: [[RES18:%[0-9]+]] = "bar1"(%c100, {{.*}}, {{.*}}, {{.*}}, {{.*}}) 438// CHECK-NEXT: affine.yield [[RES18]], [[RES18]] : i32, i32 439// CHECK-NEXT: } 440// CHECK-NEXT: affine.yield [[RES7]], [[RES7]], [[RES17]]#1 : i32, i32, i32 441// CHECK-NEXT: } 442// CHECK-NEXT: "foo"(%c100, %{{.*}}, [[RES6]]#0, [[RES6]]#2) 443// CHECK-NEXT: return 444 445// CHECK-LABEL: func @unroll_jam_nested_iter_args_mulf 446// CHECK-SAME: [[INIT0:%arg[0-9]+]]: f32, [[INIT1:%arg[0-9]+]]: f32 447func.func @unroll_jam_nested_iter_args_mulf(%arg0: memref<21x30xf32, 1>, %init : f32, %init1 : f32) { 448 %0 = affine.for %arg3 = 0 to 21 iter_args(%arg4 = %init) -> (f32) { 449 %1 = affine.for %arg5 = 0 to 30 iter_args(%arg6 = %init1) -> (f32) { 450 %3 = affine.load %arg0[%arg3, %arg5] : memref<21x30xf32, 1> 451 %4 = arith.addf %arg6, %3 : f32 452 affine.yield %4 : f32 453 } 454 %2 = arith.mulf %arg4, %1 : f32 455 affine.yield %2 : f32 456 } 457 return 458} 459 460// CHECK: %[[CONST0:[a-zA-Z0-9_]*]] = arith.constant 20 : index 461// CHECK-NEXT: [[RES:%[0-9]+]]:2 = affine.for %[[IV0:arg[0-9]+]] = 0 to 20 step 2 iter_args([[ACC0:%arg[0-9]+]] = [[INIT0]], [[ACC1:%arg[0-9]+]] = [[INIT0]]) -> (f32, f32) { 462// CHECK-NEXT: [[RES1:%[0-9]+]]:2 = affine.for %[[IV1:arg[0-9]+]] = 0 to 30 iter_args([[ACC2:%arg[0-9]+]] = [[INIT1]], [[ACC3:%arg[0-9]+]] = [[INIT1]]) -> (f32, f32) { 463// CHECK-NEXT: [[LOAD1:%[0-9]+]] = affine.load {{.*}}[%[[IV0]], %[[IV1]]] 464// CHECK-NEXT: [[ADD1:%[0-9]+]] = arith.addf [[ACC2]], [[LOAD1]] : f32 465// CHECK-NEXT: %[[INC1:[0-9]+]] = affine.apply [[$MAP_PLUS_1]](%[[IV0]]) 466// CHECK-NEXT: [[LOAD2:%[0-9]+]] = affine.load {{.*}}[%[[INC1]], %[[IV1]]] 467// CHECK-NEXT: [[ADD2:%[0-9]+]] = arith.addf [[ACC3]], [[LOAD2]] : f32 468// CHECK-NEXT: affine.yield [[ADD1]], [[ADD2]] 469// CHECK-NEXT: } 470// CHECK-NEXT: [[MUL1:%[0-9]+]] = arith.mulf [[ACC0]], [[RES1]]#0 : f32 471// CHECK-NEXT: affine.apply 472// CHECK-NEXT: [[MUL2:%[0-9]+]] = arith.mulf [[ACC1]], [[RES1]]#1 : f32 473// CHECK-NEXT: affine.yield [[MUL1]], [[MUL2]] 474// CHECK-NEXT: } 475// Reduction op. 476// CHECK-NEXT: [[MUL3:%[0-9]+]] = arith.mulf [[RES]]#0, [[RES]]#1 : f32 477// Cleanup loop (single iteration). 478// CHECK-NEXT: [[RES2:%[0-9]+]] = affine.for %[[IV2:arg[0-9]+]] = 0 to 30 iter_args([[ACC4:%arg[0-9]+]] = [[INIT1]]) -> (f32) { 479// CHECK-NEXT: [[LOAD3:%[0-9]+]] = affine.load {{.*}}[%[[CONST0]], %[[IV2]]] 480// CHECK-NEXT: [[ADD3:%[0-9]+]] = arith.addf [[ACC4]], [[LOAD3]] : f32 481// CHECK-NEXT: affine.yield [[ADD3]] : f32 482// CHECK-NEXT: } 483// CHECK-NEXT: [[MUL4:%[0-9]+]] = arith.mulf [[MUL3]], [[RES2]] : f32 484// CHECK-NEXT: return 485 486// CHECK-LABEL: func @unroll_jam_iter_args_addi 487// CHECK-SAME: [[INIT0:%arg[0-9]+]]: i32 488func.func @unroll_jam_iter_args_addi(%arg0: memref<21xi32, 1>, %init : i32) { 489 %0 = affine.for %arg3 = 0 to 21 iter_args(%arg4 = %init) -> (i32) { 490 %1 = affine.load %arg0[%arg3] : memref<21xi32, 1> 491 %2 = arith.addi %arg4, %1 : i32 492 affine.yield %2 : i32 493 } 494 return 495} 496 497// CHECK: %[[CONST0:[a-zA-Z0-9_]*]] = arith.constant 20 : index 498// CHECK-NEXT: [[RES:%[0-9]+]]:2 = affine.for %[[IV0:arg[0-9]+]] = 0 to 20 step 2 iter_args([[ACC0:%arg[0-9]+]] = [[INIT0]], [[ACC1:%arg[0-9]+]] = [[INIT0]]) -> (i32, i32) { 499// CHECK-NEXT: [[LOAD1:%[0-9]+]] = affine.load {{.*}}[%[[IV0]]] 500// CHECK-NEXT: [[ADD1:%[0-9]+]] = arith.addi [[ACC0]], [[LOAD1]] : i32 501// CHECK-NEXT: %[[INC1:[0-9]+]] = affine.apply [[$MAP_PLUS_1]](%[[IV0]]) 502// CHECK-NEXT: [[LOAD2:%[0-9]+]] = affine.load {{.*}}[%[[INC1]]] 503// CHECK-NEXT: [[ADD2:%[0-9]+]] = arith.addi [[ACC1]], [[LOAD2]] : i32 504// CHECK-NEXT: affine.yield [[ADD1]], [[ADD2]] 505// CHECK-NEXT: } 506// Reduction op. 507// CHECK-NEXT: [[ADD3:%[0-9]+]] = arith.addi [[RES]]#0, [[RES]]#1 : i32 508// Cleanup loop (single iteration). 509// CHECK-NEXT: [[LOAD3:%[0-9]+]] = affine.load {{.*}}[%[[CONST0]]] 510// CHECK-NEXT: [[ADD4:%[0-9]+]] = arith.addi [[ADD3]], [[LOAD3]] : i32 511// CHECK-NEXT: return 512