1// RUN: mlir-opt -lower-affine %s | FileCheck %s 2 3// CHECK-LABEL: func @empty() { 4func.func @empty() { 5 return // CHECK: return 6} // CHECK: } 7 8func.func private @body(index) -> () 9 10// Simple loops are properly converted. 11// CHECK-LABEL: func @simple_loop 12// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 13// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 14// CHECK-NEXT: %[[c1_0:.*]] = arith.constant 1 : index 15// CHECK-NEXT: for %{{.*}} = %[[c1]] to %[[c42]] step %[[c1_0]] { 16// CHECK-NEXT: call @body(%{{.*}}) : (index) -> () 17// CHECK-NEXT: } 18// CHECK-NEXT: return 19// CHECK-NEXT: } 20func.func @simple_loop() { 21 affine.for %i = 1 to 42 { 22 func.call @body(%i) : (index) -> () 23 } 24 return 25} 26 27///////////////////////////////////////////////////////////////////// 28 29func.func @for_with_yield(%buffer: memref<1024xf32>) -> (f32) { 30 %sum_0 = arith.constant 0.0 : f32 31 %sum = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_0) -> (f32) { 32 %t = affine.load %buffer[%i] : memref<1024xf32> 33 %sum_next = arith.addf %sum_iter, %t : f32 34 affine.yield %sum_next : f32 35 } 36 return %sum : f32 37} 38 39// CHECK-LABEL: func @for_with_yield 40// CHECK: %[[INIT_SUM:.*]] = arith.constant 0.000000e+00 : f32 41// CHECK-NEXT: %[[LOWER:.*]] = arith.constant 0 : index 42// CHECK-NEXT: %[[UPPER:.*]] = arith.constant 10 : index 43// CHECK-NEXT: %[[STEP:.*]] = arith.constant 2 : index 44// CHECK-NEXT: %[[SUM:.*]] = scf.for %[[IV:.*]] = %[[LOWER]] to %[[UPPER]] step %[[STEP]] iter_args(%[[SUM_ITER:.*]] = %[[INIT_SUM]]) -> (f32) { 45// CHECK-NEXT: memref.load 46// CHECK-NEXT: %[[SUM_NEXT:.*]] = arith.addf 47// CHECK-NEXT: scf.yield %[[SUM_NEXT]] : f32 48// CHECK-NEXT: } 49// CHECK-NEXT: return %[[SUM]] : f32 50 51///////////////////////////////////////////////////////////////////// 52 53func.func private @pre(index) -> () 54func.func private @body2(index, index) -> () 55func.func private @post(index) -> () 56 57// CHECK-LABEL: func @imperfectly_nested_loops 58// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 59// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 60// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 61// CHECK-NEXT: for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] { 62// CHECK-NEXT: call @pre(%{{.*}}) : (index) -> () 63// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 64// CHECK-NEXT: %[[c56:.*]] = arith.constant 56 : index 65// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index 66// CHECK-NEXT: for %{{.*}} = %[[c7]] to %[[c56]] step %[[c2]] { 67// CHECK-NEXT: call @body2(%{{.*}}, %{{.*}}) : (index, index) -> () 68// CHECK-NEXT: } 69// CHECK-NEXT: call @post(%{{.*}}) : (index) -> () 70// CHECK-NEXT: } 71// CHECK-NEXT: return 72// CHECK-NEXT: } 73func.func @imperfectly_nested_loops() { 74 affine.for %i = 0 to 42 { 75 func.call @pre(%i) : (index) -> () 76 affine.for %j = 7 to 56 step 2 { 77 func.call @body2(%i, %j) : (index, index) -> () 78 } 79 func.call @post(%i) : (index) -> () 80 } 81 return 82} 83 84///////////////////////////////////////////////////////////////////// 85 86func.func private @mid(index) -> () 87func.func private @body3(index, index) -> () 88 89// CHECK-LABEL: func @more_imperfectly_nested_loops 90// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 91// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 92// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 93// CHECK-NEXT: for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] { 94// CHECK-NEXT: call @pre(%{{.*}}) : (index) -> () 95// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 96// CHECK-NEXT: %[[c56:.*]] = arith.constant 56 : index 97// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index 98// CHECK-NEXT: for %{{.*}} = %[[c7]] to %[[c56]] step %[[c2]] { 99// CHECK-NEXT: call @body2(%{{.*}}, %{{.*}}) : (index, index) -> () 100// CHECK-NEXT: } 101// CHECK-NEXT: call @mid(%{{.*}}) : (index) -> () 102// CHECK-NEXT: %[[c18:.*]] = arith.constant 18 : index 103// CHECK-NEXT: %[[c37:.*]] = arith.constant 37 : index 104// CHECK-NEXT: %[[c3:.*]] = arith.constant 3 : index 105// CHECK-NEXT: for %{{.*}} = %[[c18]] to %[[c37]] step %[[c3]] { 106// CHECK-NEXT: call @body3(%{{.*}}, %{{.*}}) : (index, index) -> () 107// CHECK-NEXT: } 108// CHECK-NEXT: call @post(%{{.*}}) : (index) -> () 109// CHECK-NEXT: } 110// CHECK-NEXT: return 111// CHECK-NEXT: } 112func.func @more_imperfectly_nested_loops() { 113 affine.for %i = 0 to 42 { 114 func.call @pre(%i) : (index) -> () 115 affine.for %j = 7 to 56 step 2 { 116 func.call @body2(%i, %j) : (index, index) -> () 117 } 118 func.call @mid(%i) : (index) -> () 119 affine.for %k = 18 to 37 step 3 { 120 func.call @body3(%i, %k) : (index, index) -> () 121 } 122 func.call @post(%i) : (index) -> () 123 } 124 return 125} 126 127// CHECK-LABEL: func @affine_apply_loops_shorthand 128// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 129// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 130// CHECK-NEXT: for %{{.*}} = %[[c0]] to %{{.*}} step %[[c1]] { 131// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 132// CHECK-NEXT: %[[c1_0:.*]] = arith.constant 1 : index 133// CHECK-NEXT: for %{{.*}} = %{{.*}} to %[[c42]] step %[[c1_0]] { 134// CHECK-NEXT: call @body2(%{{.*}}, %{{.*}}) : (index, index) -> () 135// CHECK-NEXT: } 136// CHECK-NEXT: } 137// CHECK-NEXT: return 138// CHECK-NEXT: } 139func.func @affine_apply_loops_shorthand(%N : index) { 140 affine.for %i = 0 to %N { 141 affine.for %j = affine_map<(d0)[]->(d0)>(%i)[] to 42 { 142 func.call @body2(%i, %j) : (index, index) -> () 143 } 144 } 145 return 146} 147 148///////////////////////////////////////////////////////////////////// 149 150func.func private @get_idx() -> (index) 151 152#set1 = affine_set<(d0) : (20 - d0 >= 0)> 153#set2 = affine_set<(d0) : (d0 - 10 >= 0)> 154 155// CHECK-LABEL: func @if_only 156// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 157// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 158// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 159// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index 160// CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index 161// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index 162// CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index 163// CHECK-NEXT: if %[[v3]] { 164// CHECK-NEXT: call @body(%[[v0:.*]]) : (index) -> () 165// CHECK-NEXT: } 166// CHECK-NEXT: return 167// CHECK-NEXT: } 168func.func @if_only() { 169 %i = call @get_idx() : () -> (index) 170 affine.if #set1(%i) { 171 func.call @body(%i) : (index) -> () 172 } 173 return 174} 175 176// CHECK-LABEL: func @if_else 177// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 178// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 179// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 180// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index 181// CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index 182// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index 183// CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index 184// CHECK-NEXT: if %[[v3]] { 185// CHECK-NEXT: call @body(%[[v0:.*]]) : (index) -> () 186// CHECK-NEXT: } else { 187// CHECK-NEXT: call @mid(%[[v0:.*]]) : (index) -> () 188// CHECK-NEXT: } 189// CHECK-NEXT: return 190// CHECK-NEXT: } 191func.func @if_else() { 192 %i = call @get_idx() : () -> (index) 193 affine.if #set1(%i) { 194 func.call @body(%i) : (index) -> () 195 } else { 196 func.call @mid(%i) : (index) -> () 197 } 198 return 199} 200 201// CHECK-LABEL: func @nested_ifs 202// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 203// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 204// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 205// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index 206// CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index 207// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index 208// CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index 209// CHECK-NEXT: if %[[v3]] { 210// CHECK-NEXT: %[[c0_0:.*]] = arith.constant 0 : index 211// CHECK-NEXT: %[[cm10:.*]] = arith.constant -10 : index 212// CHECK-NEXT: %[[v4:.*]] = arith.addi %[[v0]], %[[cm10]] : index 213// CHECK-NEXT: %[[v5:.*]] = arith.cmpi sge, %[[v4]], %[[c0_0]] : index 214// CHECK-NEXT: if %[[v5]] { 215// CHECK-NEXT: call @body(%[[v0:.*]]) : (index) -> () 216// CHECK-NEXT: } 217// CHECK-NEXT: } else { 218// CHECK-NEXT: %[[c0_0:.*]] = arith.constant 0 : index 219// CHECK-NEXT: %[[cm10:.*]] = arith.constant -10 : index 220// CHECK-NEXT: %{{.*}} = arith.addi %[[v0]], %[[cm10]] : index 221// CHECK-NEXT: %{{.*}} = arith.cmpi sge, %{{.*}}, %[[c0_0]] : index 222// CHECK-NEXT: if %{{.*}} { 223// CHECK-NEXT: call @mid(%[[v0:.*]]) : (index) -> () 224// CHECK-NEXT: } 225// CHECK-NEXT: } 226// CHECK-NEXT: return 227// CHECK-NEXT: } 228func.func @nested_ifs() { 229 %i = call @get_idx() : () -> (index) 230 affine.if #set1(%i) { 231 affine.if #set2(%i) { 232 func.call @body(%i) : (index) -> () 233 } 234 } else { 235 affine.if #set2(%i) { 236 func.call @mid(%i) : (index) -> () 237 } 238 } 239 return 240} 241 242// CHECK-LABEL: func @if_with_yield 243// CHECK-NEXT: %[[c0_i64:.*]] = arith.constant 0 : i64 244// CHECK-NEXT: %[[c1_i64:.*]] = arith.constant 1 : i64 245// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 246// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 247// CHECK-NEXT: %[[cm10:.*]] = arith.constant -10 : index 248// CHECK-NEXT: %[[v1:.*]] = arith.addi %[[v0]], %[[cm10]] : index 249// CHECK-NEXT: %[[v2:.*]] = arith.cmpi sge, %[[v1]], %[[c0]] : index 250// CHECK-NEXT: %[[v3:.*]] = scf.if %[[v2]] -> (i64) { 251// CHECK-NEXT: scf.yield %[[c0_i64]] : i64 252// CHECK-NEXT: } else { 253// CHECK-NEXT: scf.yield %[[c1_i64]] : i64 254// CHECK-NEXT: } 255// CHECK-NEXT: return %[[v3]] : i64 256// CHECK-NEXT: } 257func.func @if_with_yield() -> (i64) { 258 %cst0 = arith.constant 0 : i64 259 %cst1 = arith.constant 1 : i64 260 %i = call @get_idx() : () -> (index) 261 %1 = affine.if #set2(%i) -> (i64) { 262 affine.yield %cst0 : i64 263 } else { 264 affine.yield %cst1 : i64 265 } 266 return %1 : i64 267} 268 269#setN = affine_set<(d0)[N,M,K,L] : (N - d0 + 1 >= 0, N - 1 >= 0, M - 1 >= 0, K - 1 >= 0, L - 42 == 0)> 270 271// CHECK-LABEL: func @multi_cond 272// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 273// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 274// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 275// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index 276// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %{{.*}} : index 277// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 278// CHECK-NEXT: %[[v3:.*]] = arith.addi %[[v2]], %[[c1]] : index 279// CHECK-NEXT: %[[v4:.*]] = arith.cmpi sge, %[[v3]], %[[c0]] : index 280// CHECK-NEXT: %[[cm1_0:.*]] = arith.constant -1 : index 281// CHECK-NEXT: %[[v5:.*]] = arith.addi %{{.*}}, %[[cm1_0]] : index 282// CHECK-NEXT: %[[v6:.*]] = arith.cmpi sge, %[[v5]], %[[c0]] : index 283// CHECK-NEXT: %[[v7:.*]] = arith.andi %[[v4]], %[[v6]] : i1 284// CHECK-NEXT: %[[cm1_1:.*]] = arith.constant -1 : index 285// CHECK-NEXT: %[[v8:.*]] = arith.addi %{{.*}}, %[[cm1_1]] : index 286// CHECK-NEXT: %[[v9:.*]] = arith.cmpi sge, %[[v8]], %[[c0]] : index 287// CHECK-NEXT: %[[v10:.*]] = arith.andi %[[v7]], %[[v9]] : i1 288// CHECK-NEXT: %[[cm1_2:.*]] = arith.constant -1 : index 289// CHECK-NEXT: %[[v11:.*]] = arith.addi %{{.*}}, %[[cm1_2]] : index 290// CHECK-NEXT: %[[v12:.*]] = arith.cmpi sge, %[[v11]], %[[c0]] : index 291// CHECK-NEXT: %[[v13:.*]] = arith.andi %[[v10]], %[[v12]] : i1 292// CHECK-NEXT: %[[cm42:.*]] = arith.constant -42 : index 293// CHECK-NEXT: %[[v14:.*]] = arith.addi %{{.*}}, %[[cm42]] : index 294// CHECK-NEXT: %[[v15:.*]] = arith.cmpi eq, %[[v14]], %[[c0]] : index 295// CHECK-NEXT: %[[v16:.*]] = arith.andi %[[v13]], %[[v15]] : i1 296// CHECK-NEXT: if %[[v16]] { 297// CHECK-NEXT: call @body(%[[v0:.*]]) : (index) -> () 298// CHECK-NEXT: } else { 299// CHECK-NEXT: call @mid(%[[v0:.*]]) : (index) -> () 300// CHECK-NEXT: } 301// CHECK-NEXT: return 302// CHECK-NEXT: } 303func.func @multi_cond(%N : index, %M : index, %K : index, %L : index) { 304 %i = call @get_idx() : () -> (index) 305 affine.if #setN(%i)[%N,%M,%K,%L] { 306 func.call @body(%i) : (index) -> () 307 } else { 308 func.call @mid(%i) : (index) -> () 309 } 310 return 311} 312 313// CHECK-LABEL: func @if_for 314func.func @if_for() { 315// CHECK-NEXT: %[[v0:.*]] = call @get_idx() : () -> index 316 %i = call @get_idx() : () -> (index) 317// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 318// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 319// CHECK-NEXT: %[[v1:.*]] = arith.muli %[[v0]], %[[cm1]] overflow<nsw> : index 320// CHECK-NEXT: %[[c20:.*]] = arith.constant 20 : index 321// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v1]], %[[c20]] : index 322// CHECK-NEXT: %[[v3:.*]] = arith.cmpi sge, %[[v2]], %[[c0]] : index 323// CHECK-NEXT: if %[[v3]] { 324// CHECK-NEXT: %[[c0:.*]]{{.*}} = arith.constant 0 : index 325// CHECK-NEXT: %[[c42:.*]]{{.*}} = arith.constant 42 : index 326// CHECK-NEXT: %[[c1:.*]]{{.*}} = arith.constant 1 : index 327// CHECK-NEXT: for %{{.*}} = %[[c0:.*]]{{.*}} to %[[c42:.*]]{{.*}} step %[[c1:.*]]{{.*}} { 328// CHECK-NEXT: %[[c0_:.*]]{{.*}} = arith.constant 0 : index 329// CHECK-NEXT: %[[cm10:.*]] = arith.constant -10 : index 330// CHECK-NEXT: %[[v4:.*]] = arith.addi %{{.*}}, %[[cm10]] : index 331// CHECK-NEXT: %[[v5:.*]] = arith.cmpi sge, %[[v4]], %[[c0_:.*]]{{.*}} : index 332// CHECK-NEXT: if %[[v5]] { 333// CHECK-NEXT: call @body2(%[[v0]], %{{.*}}) : (index, index) -> () 334 affine.if #set1(%i) { 335 affine.for %j = 0 to 42 { 336 affine.if #set2(%j) { 337 func.call @body2(%i, %j) : (index, index) -> () 338 } 339 } 340 } 341// CHECK: %[[c0:.*]]{{.*}} = arith.constant 0 : index 342// CHECK-NEXT: %[[c42:.*]]{{.*}} = arith.constant 42 : index 343// CHECK-NEXT: %[[c1:.*]]{{.*}} = arith.constant 1 : index 344// CHECK-NEXT: for %{{.*}} = %[[c0:.*]]{{.*}} to %[[c42:.*]]{{.*}} step %[[c1:.*]]{{.*}} { 345// CHECK-NEXT: %[[c0:.*]]{{.*}} = arith.constant 0 : index 346// CHECK-NEXT: %[[cm10:.*]]{{.*}} = arith.constant -10 : index 347// CHECK-NEXT: %{{.*}} = arith.addi %{{.*}}, %[[cm10:.*]]{{.*}} : index 348// CHECK-NEXT: %{{.*}} = arith.cmpi sge, %{{.*}}, %[[c0:.*]]{{.*}} : index 349// CHECK-NEXT: if %{{.*}} { 350// CHECK-NEXT: %[[c0_:.*]]{{.*}} = arith.constant 0 : index 351// CHECK-NEXT: %[[c42_:.*]]{{.*}} = arith.constant 42 : index 352// CHECK-NEXT: %[[c1_:.*]]{{.*}} = arith.constant 1 : index 353// CHECK-NEXT: for %{{.*}} = %[[c0_:.*]]{{.*}} to %[[c42_:.*]]{{.*}} step %[[c1_:.*]]{{.*}} { 354 affine.for %k = 0 to 42 { 355 affine.if #set2(%k) { 356 affine.for %l = 0 to 42 { 357 func.call @body3(%k, %l) : (index, index) -> () 358 } 359 } 360 } 361// CHECK: return 362 return 363} 364 365#lbMultiMap = affine_map<(d0)[s0] -> (d0, s0 - d0)> 366#ubMultiMap = affine_map<(d0)[s0] -> (s0, d0 + 10)> 367 368// CHECK-LABEL: func @loop_min_max 369// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 370// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 371// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 372// CHECK-NEXT: for %{{.*}} = %[[c0]] to %[[c42]] step %[[c1]] { 373// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 374// CHECK-NEXT: %[[mul0:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow<nsw> : index 375// CHECK-NEXT: %[[add0:.*]] = arith.addi %[[mul0]], %{{.*}} : index 376// CHECK-NEXT: %[[max:.*]] = arith.maxsi %{{.*}}, %[[add0]] : index 377// CHECK-NEXT: %[[c10:.*]] = arith.constant 10 : index 378// CHECK-NEXT: %[[add1:.*]] = arith.addi %{{.*}}, %[[c10]] : index 379// CHECK-NEXT: %[[min:.*]] = arith.minsi %{{.*}}, %[[add1]] : index 380// CHECK-NEXT: %[[c1_0:.*]] = arith.constant 1 : index 381// CHECK-NEXT: for %{{.*}} = %[[max]] to %[[min]] step %[[c1_0]] { 382// CHECK-NEXT: call @body2(%{{.*}}, %{{.*}}) : (index, index) -> () 383// CHECK-NEXT: } 384// CHECK-NEXT: } 385// CHECK-NEXT: return 386// CHECK-NEXT: } 387func.func @loop_min_max(%N : index) { 388 affine.for %i = 0 to 42 { 389 affine.for %j = max #lbMultiMap(%i)[%N] to min #ubMultiMap(%i)[%N] { 390 func.call @body2(%i, %j) : (index, index) -> () 391 } 392 } 393 return 394} 395 396#map_7_values = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)> 397 398// Check that the "min" reduction sequence is emitted 399// correctly for an affine map with 7 results. 400 401// CHECK-LABEL: func @min_reduction_tree 402// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 403// CHECK-NEXT: %[[min:.+]] = arith.minsi %{{.*}}, %{{.*}} : index 404// CHECK-NEXT: %[[min_0:.+]] = arith.minsi %[[min]], %{{.*}} : index 405// CHECK-NEXT: %[[min_1:.+]] = arith.minsi %[[min_0]], %{{.*}} : index 406// CHECK-NEXT: %[[min_2:.+]] = arith.minsi %[[min_1]], %{{.*}} : index 407// CHECK-NEXT: %[[min_3:.+]] = arith.minsi %[[min_2]], %{{.*}} : index 408// CHECK-NEXT: %[[min_4:.+]] = arith.minsi %[[min_3]], %{{.*}} : index 409// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 410// CHECK-NEXT: for %{{.*}} = %[[c0]] to %[[min_4]] step %[[c1]] { 411// CHECK-NEXT: call @body(%{{.*}}) : (index) -> () 412// CHECK-NEXT: } 413// CHECK-NEXT: return 414// CHECK-NEXT: } 415func.func @min_reduction_tree(%v1 : index, %v2 : index, %v3 : index, %v4 : index, %v5 : index, %v6 : index, %v7 : index) { 416 affine.for %i = 0 to min #map_7_values(%v1, %v2, %v3, %v4, %v5, %v6, %v7)[] { 417 func.call @body(%i) : (index) -> () 418 } 419 return 420} 421 422///////////////////////////////////////////////////////////////////// 423 424#map0 = affine_map<() -> (0)> 425#map1 = affine_map<()[s0] -> (s0)> 426#map2 = affine_map<(d0) -> (d0)> 427#map3 = affine_map<(d0)[s0] -> (d0 + s0 + 1)> 428#map4 = affine_map<(d0,d1,d2,d3)[s0,s1,s2] -> (d0 + 2*d1 + 3*d2 + 4*d3 + 5*s0 + 6*s1 + 7*s2)> 429#map5 = affine_map<(d0,d1,d2) -> (d0,d1,d2)> 430#map6 = affine_map<(d0,d1,d2) -> (d0 + d1 + d2)> 431 432// CHECK-LABEL: func @affine_applies( 433func.func @affine_applies(%arg0 : index) { 434// CHECK: %[[c0:.*]] = arith.constant 0 : index 435 %zero = affine.apply #map0() 436 437// Identity maps are just discarded. 438// CHECK-NEXT: %[[c101:.*]] = arith.constant 101 : index 439 %101 = arith.constant 101 : index 440 %symbZero = affine.apply #map1()[%zero] 441// CHECK-NEXT: %[[c102:.*]] = arith.constant 102 : index 442 %102 = arith.constant 102 : index 443 %copy = affine.apply #map2(%zero) 444 445// CHECK-NEXT: %[[v0:.*]] = arith.addi %[[c0]], %[[c0]] : index 446// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 447// CHECK-NEXT: %[[v1:.*]] = arith.addi %[[v0]], %[[c1]] : index 448 %one = affine.apply #map3(%symbZero)[%zero] 449 450// CHECK-NEXT: %[[c2:.*]] = arith.constant 2 : index 451// CHECK-NEXT: %[[v2:.*]] = arith.muli %arg0, %[[c2]] overflow<nsw> : index 452// CHECK-NEXT: %[[v3:.*]] = arith.addi %arg0, %[[v2]] : index 453// CHECK-NEXT: %[[c3:.*]] = arith.constant 3 : index 454// CHECK-NEXT: %[[v4:.*]] = arith.muli %arg0, %[[c3]] overflow<nsw> : index 455// CHECK-NEXT: %[[v5:.*]] = arith.addi %[[v3]], %[[v4]] : index 456// CHECK-NEXT: %[[c4:.*]] = arith.constant 4 : index 457// CHECK-NEXT: %[[v6:.*]] = arith.muli %arg0, %[[c4]] overflow<nsw> : index 458// CHECK-NEXT: %[[v7:.*]] = arith.addi %[[v5]], %[[v6]] : index 459// CHECK-NEXT: %[[c5:.*]] = arith.constant 5 : index 460// CHECK-NEXT: %[[v8:.*]] = arith.muli %arg0, %[[c5]] overflow<nsw> : index 461// CHECK-NEXT: %[[v9:.*]] = arith.addi %[[v7]], %[[v8]] : index 462// CHECK-NEXT: %[[c6:.*]] = arith.constant 6 : index 463// CHECK-NEXT: %[[v10:.*]] = arith.muli %arg0, %[[c6]] overflow<nsw> : index 464// CHECK-NEXT: %[[v11:.*]] = arith.addi %[[v9]], %[[v10]] : index 465// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 466// CHECK-NEXT: %[[v12:.*]] = arith.muli %arg0, %[[c7]] overflow<nsw> : index 467// CHECK-NEXT: %[[v13:.*]] = arith.addi %[[v11]], %[[v12]] : index 468 %four = affine.apply #map4(%arg0, %arg0, %arg0, %arg0)[%arg0, %arg0, %arg0] 469 return 470} 471 472// CHECK-LABEL: func @args_ret_affine_apply( 473func.func @args_ret_affine_apply(index, index) -> (index, index) { 474^bb0(%0 : index, %1 : index): 475// CHECK-NEXT: return %{{.*}}, %{{.*}} : index, index 476 %00 = affine.apply #map2 (%0) 477 %11 = affine.apply #map1 ()[%1] 478 return %00, %11 : index, index 479} 480 481//===---------------------------------------------------------------------===// 482// Test lowering of Euclidean (floor) division, ceil division and modulo 483// operation used in affine expressions. In addition to testing the 484// operation-level output, check that the obtained results are correct by 485// applying constant folding transformation after affine lowering. 486//===---------------------------------------------------------------------===// 487 488// --------------------------------------------------------------------------// 489// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_mod 490// test in the "canonicalize.mlir" test to reflect the expected output of 491// affine.apply lowering. 492// --------------------------------------------------------------------------// 493 494#map_mod = affine_map<(i) -> (i mod 42)> 495// CHECK-LABEL: func @affine_apply_mod 496func.func @affine_apply_mod(%arg0 : index) -> (index) { 497// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 498// CHECK-NEXT: %[[v0:.*]] = arith.remsi %{{.*}}, %[[c42]] : index 499// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 500// CHECK-NEXT: %[[v1:.*]] = arith.cmpi slt, %[[v0]], %[[c0]] : index 501// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v0]], %[[c42]] : index 502// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v1]], %[[v2]], %[[v0]] : index 503 %0 = affine.apply #map_mod (%arg0) 504 return %0 : index 505} 506#map_mod_dynamic_divisor = affine_map<(i)[s] -> (i mod s)> 507// CHECK-LABEL: func @affine_apply_mod_dynamic_divisor 508func.func @affine_apply_mod_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) { 509// CHECK-NEXT: %[[v0:.*]] = arith.remsi %{{.*}}, %arg1 : index 510// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 511// CHECK-NEXT: %[[v1:.*]] = arith.cmpi slt, %[[v0]], %[[c0]] : index 512// CHECK-NEXT: %[[v2:.*]] = arith.addi %[[v0]], %arg1 : index 513// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v1]], %[[v2]], %[[v0]] : index 514 %0 = affine.apply #map_mod_dynamic_divisor (%arg0)[%arg1] 515 return %0 : index 516} 517 518// --------------------------------------------------------------------------// 519// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_floordiv 520// test in the "canonicalize.mlir" test to reflect the expected output of 521// affine.apply lowering. 522// --------------------------------------------------------------------------// 523#map_floordiv = affine_map<(i) -> (i floordiv 42)> 524// CHECK-LABEL: func @affine_apply_floordiv 525func.func @affine_apply_floordiv(%arg0 : index) -> (index) { 526// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 527// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 528// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 529// CHECK-NEXT: %[[v0:.*]] = arith.cmpi slt, %{{.*}}, %[[c0]] : index 530// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[cm1]], %{{.*}} : index 531// CHECK-NEXT: %[[v2:.*]] = arith.select %[[v0]], %[[v1]], %{{.*}} : index 532// CHECK-NEXT: %[[v3:.*]] = arith.divsi %[[v2]], %[[c42]] : index 533// CHECK-NEXT: %[[v4:.*]] = arith.subi %[[cm1]], %[[v3]] : index 534// CHECK-NEXT: %[[v5:.*]] = arith.select %[[v0]], %[[v4]], %[[v3]] : index 535 %0 = affine.apply #map_floordiv (%arg0) 536 return %0 : index 537} 538#map_floordiv_dynamic_divisor = affine_map<(i)[s] -> (i floordiv s)> 539// CHECK-LABEL: func @affine_apply_floordiv_dynamic_divisor 540func.func @affine_apply_floordiv_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) { 541// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 542// CHECK-NEXT: %[[cm1:.*]] = arith.constant -1 : index 543// CHECK-NEXT: %[[v0:.*]] = arith.cmpi slt, %{{.*}}, %[[c0]] : index 544// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[cm1]], %{{.*}} : index 545// CHECK-NEXT: %[[v2:.*]] = arith.select %[[v0]], %[[v1]], %{{.*}} : index 546// CHECK-NEXT: %[[v3:.*]] = arith.divsi %[[v2]], %arg1 : index 547// CHECK-NEXT: %[[v4:.*]] = arith.subi %[[cm1]], %[[v3]] : index 548// CHECK-NEXT: %[[v5:.*]] = arith.select %[[v0]], %[[v4]], %[[v3]] : index 549 %0 = affine.apply #map_floordiv_dynamic_divisor (%arg0)[%arg1] 550 return %0 : index 551} 552 553// --------------------------------------------------------------------------// 554// IMPORTANT NOTE: if you change this test, also change the @lowered_affine_ceildiv 555// test in the "canonicalize.mlir" test to reflect the expected output of 556// affine.apply lowering. 557// --------------------------------------------------------------------------// 558#map_ceildiv = affine_map<(i) -> (i ceildiv 42)> 559// CHECK-LABEL: func @affine_apply_ceildiv 560func.func @affine_apply_ceildiv(%arg0 : index) -> (index) { 561// CHECK-NEXT: %[[c42:.*]] = arith.constant 42 : index 562// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 563// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 564// CHECK-NEXT: %[[v0:.*]] = arith.cmpi sle, %{{.*}}, %[[c0]] : index 565// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[c0]], %{{.*}} : index 566// CHECK-NEXT: %[[v2:.*]] = arith.subi %{{.*}}, %[[c1]] : index 567// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v0]], %[[v1]], %[[v2]] : index 568// CHECK-NEXT: %[[v4:.*]] = arith.divsi %[[v3]], %[[c42]] : index 569// CHECK-NEXT: %[[v5:.*]] = arith.subi %[[c0]], %[[v4]] : index 570// CHECK-NEXT: %[[v6:.*]] = arith.addi %[[v4]], %[[c1]] : index 571// CHECK-NEXT: %[[v7:.*]] = arith.select %[[v0]], %[[v5]], %[[v6]] : index 572 %0 = affine.apply #map_ceildiv (%arg0) 573 return %0 : index 574} 575#map_ceildiv_dynamic_divisor = affine_map<(i)[s] -> (i ceildiv s)> 576// CHECK-LABEL: func @affine_apply_ceildiv_dynamic_divisor 577func.func @affine_apply_ceildiv_dynamic_divisor(%arg0 : index, %arg1 : index) -> (index) { 578// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : index 579// CHECK-NEXT: %[[c1:.*]] = arith.constant 1 : index 580// CHECK-NEXT: %[[v0:.*]] = arith.cmpi sle, %{{.*}}, %[[c0]] : index 581// CHECK-NEXT: %[[v1:.*]] = arith.subi %[[c0]], %{{.*}} : index 582// CHECK-NEXT: %[[v2:.*]] = arith.subi %{{.*}}, %[[c1]] : index 583// CHECK-NEXT: %[[v3:.*]] = arith.select %[[v0]], %[[v1]], %[[v2]] : index 584// CHECK-NEXT: %[[v4:.*]] = arith.divsi %[[v3]], %arg1 : index 585// CHECK-NEXT: %[[v5:.*]] = arith.subi %[[c0]], %[[v4]] : index 586// CHECK-NEXT: %[[v6:.*]] = arith.addi %[[v4]], %[[c1]] : index 587// CHECK-NEXT: %[[v7:.*]] = arith.select %[[v0]], %[[v5]], %[[v6]] : index 588 %0 = affine.apply #map_ceildiv_dynamic_divisor (%arg0)[%arg1] 589 return %0 : index 590} 591 592// CHECK-LABEL: func @affine_load 593func.func @affine_load(%arg0 : index) { 594 %0 = memref.alloc() : memref<10xf32> 595 affine.for %i0 = 0 to 10 { 596 %1 = affine.load %0[%i0 + symbol(%arg0) + 7] : memref<10xf32> 597 } 598// CHECK: %[[a:.*]] = arith.addi %{{.*}}, %{{.*}} : index 599// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 600// CHECK-NEXT: %[[b:.*]] = arith.addi %[[a]], %[[c7]] : index 601// CHECK-NEXT: %{{.*}} = memref.load %[[v0:.*]][%[[b]]] : memref<10xf32> 602 return 603} 604 605// CHECK-LABEL: func @affine_store 606func.func @affine_store(%arg0 : index) { 607 %0 = memref.alloc() : memref<10xf32> 608 %1 = arith.constant 11.0 : f32 609 affine.for %i0 = 0 to 10 { 610 affine.store %1, %0[%i0 - symbol(%arg0) + 7] : memref<10xf32> 611 } 612// CHECK: %[[cm1:.*]] = arith.constant -1 : index 613// CHECK-NEXT: %[[a:.*]] = arith.muli %{{.*}}, %[[cm1]] overflow<nsw> : index 614// CHECK-NEXT: %[[b:.*]] = arith.addi %{{.*}}, %[[a]] : index 615// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 616// CHECK-NEXT: %[[c:.*]] = arith.addi %[[b]], %[[c7]] : index 617// CHECK-NEXT: store %{{.*}}, %{{.*}}[%[[c]]] : memref<10xf32> 618 return 619} 620 621// CHECK-LABEL: func @affine_load_store_zero_dim 622func.func @affine_load_store_zero_dim(%arg0 : memref<i32>, %arg1 : memref<i32>) { 623 %0 = affine.load %arg0[] : memref<i32> 624 affine.store %0, %arg1[] : memref<i32> 625// CHECK: %[[x:.*]] = memref.load %arg0[] : memref<i32> 626// CHECK: store %[[x]], %arg1[] : memref<i32> 627 return 628} 629 630// CHECK-LABEL: func @affine_prefetch 631func.func @affine_prefetch(%arg0 : index) { 632 %0 = memref.alloc() : memref<10xf32> 633 affine.for %i0 = 0 to 10 { 634 affine.prefetch %0[%i0 + symbol(%arg0) + 7], read, locality<3>, data : memref<10xf32> 635 } 636// CHECK: %[[a:.*]] = arith.addi %{{.*}}, %{{.*}} : index 637// CHECK-NEXT: %[[c7:.*]] = arith.constant 7 : index 638// CHECK-NEXT: %[[b:.*]] = arith.addi %[[a]], %[[c7]] : index 639// CHECK-NEXT: memref.prefetch %[[v0:.*]][%[[b]]], read, locality<3>, data : memref<10xf32> 640 return 641} 642 643// CHECK-LABEL: func @affine_dma_start 644func.func @affine_dma_start(%arg0 : index) { 645 %0 = memref.alloc() : memref<100xf32> 646 %1 = memref.alloc() : memref<100xf32, 2> 647 %2 = memref.alloc() : memref<1xi32> 648 %c0 = arith.constant 0 : index 649 %c64 = arith.constant 64 : index 650 affine.for %i0 = 0 to 10 { 651 affine.dma_start %0[%i0 + 7], %1[%arg0 + 11], %2[%c0], %c64 652 : memref<100xf32>, memref<100xf32, 2>, memref<1xi32> 653 } 654// CHECK: %[[c7:.*]] = arith.constant 7 : index 655// CHECK-NEXT: %[[a:.*]] = arith.addi %{{.*}}, %[[c7]] : index 656// CHECK-NEXT: %[[c11:.*]] = arith.constant 11 : index 657// CHECK-NEXT: %[[b:.*]] = arith.addi %{{.*}}, %[[c11]] : index 658// CHECK-NEXT: dma_start %{{.*}}[%[[a]]], %{{.*}}[%[[b]]], %{{.*}}, %{{.*}}[%{{.*}}] : memref<100xf32>, memref<100xf32, 2>, memref<1xi32> 659 return 660} 661 662// CHECK-LABEL: func @affine_dma_wait 663func.func @affine_dma_wait(%arg0 : index) { 664 %2 = memref.alloc() : memref<1xi32> 665 %c64 = arith.constant 64 : index 666 affine.for %i0 = 0 to 10 { 667 affine.dma_wait %2[%i0 + %arg0 + 17], %c64 : memref<1xi32> 668 } 669// CHECK: %[[a:.*]] = arith.addi %{{.*}}, %arg0 : index 670// CHECK-NEXT: %[[c17:.*]] = arith.constant 17 : index 671// CHECK-NEXT: %[[b:.*]] = arith.addi %[[a]], %[[c17]] : index 672// CHECK-NEXT: dma_wait %{{.*}}[%[[b]]], %{{.*}} : memref<1xi32> 673 return 674} 675 676// CHECK-LABEL: func @affine_min 677// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index 678func.func @affine_min(%arg0: index, %arg1: index) -> index{ 679 // CHECK: %[[Cm1:.*]] = arith.constant -1 680 // CHECK: %[[neg1:.*]] = arith.muli %[[ARG1]], %[[Cm1:.*]] 681 // CHECK: %[[first:.*]] = arith.addi %[[ARG0]], %[[neg1]] 682 // CHECK: %[[Cm2:.*]] = arith.constant -1 683 // CHECK: %[[neg2:.*]] = arith.muli %[[ARG0]], %[[Cm2:.*]] 684 // CHECK: %[[second:.*]] = arith.addi %[[ARG1]], %[[neg2]] 685 // CHECK: arith.minsi %[[first]], %[[second]] 686 %0 = affine.min affine_map<(d0,d1) -> (d0 - d1, d1 - d0)>(%arg0, %arg1) 687 return %0 : index 688} 689 690// CHECK-LABEL: func @affine_max 691// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index 692func.func @affine_max(%arg0: index, %arg1: index) -> index{ 693 // CHECK: %[[Cm1:.*]] = arith.constant -1 694 // CHECK: %[[neg1:.*]] = arith.muli %[[ARG1]], %[[Cm1:.*]] 695 // CHECK: %[[first:.*]] = arith.addi %[[ARG0]], %[[neg1]] 696 // CHECK: %[[Cm2:.*]] = arith.constant -1 697 // CHECK: %[[neg2:.*]] = arith.muli %[[ARG0]], %[[Cm2:.*]] 698 // CHECK: %[[second:.*]] = arith.addi %[[ARG1]], %[[neg2]] 699 // CHECK: arith.maxsi %[[first]], %[[second]] 700 %0 = affine.max affine_map<(d0,d1) -> (d0 - d1, d1 - d0)>(%arg0, %arg1) 701 return %0 : index 702} 703 704// CHECK-LABEL: func @affine_parallel( 705// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>) { 706func.func @affine_parallel(%o: memref<100x100xf32>, %a: memref<100x100xf32>) { 707 affine.parallel (%i, %j) = (0, 0) to (100, 100) { 708 } 709 return 710} 711 712// CHECK-DAG: %[[C100:.*]] = arith.constant 100 713// CHECK-DAG: %[[C100_1:.*]] = arith.constant 100 714// CHECK-DAG: %[[C0:.*]] = arith.constant 0 715// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 716// CHECK-DAG: %[[C1:.*]] = arith.constant 1 717// CHECK-DAG: %[[C1_1:.*]] = arith.constant 1 718// CHECK-DAG: scf.parallel (%arg2, %arg3) = (%[[C0]], %[[C0_1]]) to (%[[C100]], %[[C100_1]]) step (%[[C1]], %[[C1_1]]) { 719 720// CHECK-LABEL: func @affine_parallel_tiled( 721// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>, %[[ARG2:.*]]: memref<100x100xf32>) { 722func.func @affine_parallel_tiled(%o: memref<100x100xf32>, %a: memref<100x100xf32>, %b: memref<100x100xf32>) { 723 affine.parallel (%i0, %j0, %k0) = (0, 0, 0) to (100, 100, 100) step (10, 10, 10) { 724 affine.parallel (%i1, %j1, %k1) = (%i0, %j0, %k0) to (%i0 + 10, %j0 + 10, %k0 + 10) { 725 %0 = affine.load %a[%i1, %k1] : memref<100x100xf32> 726 %1 = affine.load %b[%k1, %j1] : memref<100x100xf32> 727 %2 = arith.mulf %0, %1 : f32 728 } 729 } 730 return 731} 732 733// CHECK-DAG: %[[C100:.*]] = arith.constant 100 734// CHECK-DAG: %[[C100_0:.*]] = arith.constant 100 735// CHECK-DAG: %[[C100_1:.*]] = arith.constant 100 736// CHECK-DAG: %[[C0:.*]] = arith.constant 0 737// CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 738// CHECK-DAG: %[[C0_3:.*]] = arith.constant 0 739// CHECK-DAG: %[[C10:.*]] = arith.constant 10 740// CHECK-DAG: %[[C10_4:.*]] = arith.constant 10 741// CHECK-DAG: %[[C10_5:.*]] = arith.constant 10 742// CHECK: scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[C0]], %[[C0_2]], %[[C0_3]]) to (%[[C100]], %[[C100_0]], %[[C100_1]]) step (%[[C10]], %[[C10_4]], %[[C10_5]]) { 743// CHECK-DAG: %[[C10_6:.*]] = arith.constant 10 744// CHECK-DAG: %[[A0:.*]] = arith.addi %[[arg3]], %[[C10_6]] 745// CHECK-DAG: %[[C10_7:.*]] = arith.constant 10 746// CHECK-DAG: %[[A1:.*]] = arith.addi %[[arg4]], %[[C10_7]] 747// CHECK-DAG: %[[C10_8:.*]] = arith.constant 10 748// CHECK-DAG: %[[A2:.*]] = arith.addi %[[arg5]], %[[C10_8]] 749// CHECK-DAG: %[[C1:.*]] = arith.constant 1 750// CHECK-DAG: %[[C1_9:.*]] = arith.constant 1 751// CHECK-DAG: %[[C1_10:.*]] = arith.constant 1 752// CHECK: scf.parallel (%[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[arg3]], %[[arg4]], %[[arg5]]) to (%[[A0]], %[[A1]], %[[A2]]) step (%[[C1]], %[[C1_9]], %[[C1_10]]) { 753// CHECK: %[[A3:.*]] = memref.load %[[ARG1]][%[[arg6]], %[[arg8]]] : memref<100x100xf32> 754// CHECK: %[[A4:.*]] = memref.load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32> 755// CHECK: arith.mulf %[[A3]], %[[A4]] : f32 756// CHECK: scf.reduce 757 758///////////////////////////////////////////////////////////////////// 759 760func.func @affine_parallel_simple(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (memref<3x3xf32>) { 761 %O = memref.alloc() : memref<3x3xf32> 762 affine.parallel (%kx, %ky) = (0, 0) to (2, 2) { 763 %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32> 764 %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32> 765 %3 = arith.mulf %1, %2 : f32 766 affine.store %3, %O[%kx, %ky] : memref<3x3xf32> 767 } 768 return %O : memref<3x3xf32> 769} 770// CHECK-LABEL: func @affine_parallel_simple 771// CHECK: %[[LOWER_1:.*]] = arith.constant 0 : index 772// CHECK-NEXT: %[[UPPER_1:.*]] = arith.constant 2 : index 773// CHECK-NEXT: %[[LOWER_2:.*]] = arith.constant 0 : index 774// CHECK-NEXT: %[[UPPER_2:.*]] = arith.constant 2 : index 775// CHECK-NEXT: %[[STEP_1:.*]] = arith.constant 1 : index 776// CHECK-NEXT: %[[STEP_2:.*]] = arith.constant 1 : index 777// CHECK-NEXT: scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) { 778// CHECK-NEXT: %[[VAL_1:.*]] = memref.load 779// CHECK-NEXT: %[[VAL_2:.*]] = memref.load 780// CHECK-NEXT: %[[PRODUCT:.*]] = arith.mulf 781// CHECK-NEXT: store 782// CHECK-NEXT: scf.reduce 783// CHECK-NEXT: } 784// CHECK-NEXT: return 785// CHECK-NEXT: } 786 787///////////////////////////////////////////////////////////////////// 788 789func.func @affine_parallel_simple_dynamic_bounds(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) { 790 %c_0 = arith.constant 0 : index 791 %output_dim = memref.dim %arg0, %c_0 : memref<?x?xf32> 792 affine.parallel (%kx, %ky) = (%c_0, %c_0) to (%output_dim, %output_dim) { 793 %1 = affine.load %arg0[%kx, %ky] : memref<?x?xf32> 794 %2 = affine.load %arg1[%kx, %ky] : memref<?x?xf32> 795 %3 = arith.mulf %1, %2 : f32 796 affine.store %3, %arg2[%kx, %ky] : memref<?x?xf32> 797 } 798 return 799} 800// CHECK-LABEL: func @affine_parallel_simple_dynamic_bounds 801// CHECK-SAME: %[[ARG_0:.*]]: memref<?x?xf32>, %[[ARG_1:.*]]: memref<?x?xf32>, %[[ARG_2:.*]]: memref<?x?xf32> 802// CHECK: %[[DIM_INDEX:.*]] = arith.constant 0 : index 803// CHECK-NEXT: %[[UPPER:.*]] = memref.dim %[[ARG_0]], %[[DIM_INDEX]] : memref<?x?xf32> 804// CHECK-NEXT: %[[LOWER_1:.*]] = arith.constant 0 : index 805// CHECK-NEXT: %[[LOWER_2:.*]] = arith.constant 0 : index 806// CHECK-NEXT: %[[STEP_1:.*]] = arith.constant 1 : index 807// CHECK-NEXT: %[[STEP_2:.*]] = arith.constant 1 : index 808// CHECK-NEXT: scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER]], %[[UPPER]]) step (%[[STEP_1]], %[[STEP_2]]) { 809// CHECK-NEXT: %[[VAL_1:.*]] = memref.load 810// CHECK-NEXT: %[[VAL_2:.*]] = memref.load 811// CHECK-NEXT: %[[PRODUCT:.*]] = arith.mulf 812// CHECK-NEXT: store 813// CHECK-NEXT: scf.reduce 814// CHECK-NEXT: } 815// CHECK-NEXT: return 816// CHECK-NEXT: } 817 818///////////////////////////////////////////////////////////////////// 819 820func.func @affine_parallel_with_reductions(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>) -> (f32, f32) { 821 %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addf", "mulf") -> (f32, f32) { 822 %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf32> 823 %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf32> 824 %3 = arith.mulf %1, %2 : f32 825 %4 = arith.addf %1, %2 : f32 826 affine.yield %3, %4 : f32, f32 827 } 828 return %0#0, %0#1 : f32, f32 829} 830// CHECK-LABEL: func @affine_parallel_with_reductions 831// CHECK: %[[LOWER_1:.*]] = arith.constant 0 : index 832// CHECK-NEXT: %[[UPPER_1:.*]] = arith.constant 2 : index 833// CHECK-NEXT: %[[LOWER_2:.*]] = arith.constant 0 : index 834// CHECK-NEXT: %[[UPPER_2:.*]] = arith.constant 2 : index 835// CHECK-NEXT: %[[STEP_1:.*]] = arith.constant 1 : index 836// CHECK-NEXT: %[[STEP_2:.*]] = arith.constant 1 : index 837// CHECK-NEXT: %[[INIT_1:.*]] = arith.constant 0.000000e+00 : f32 838// CHECK-NEXT: %[[INIT_2:.*]] = arith.constant 1.000000e+00 : f32 839// CHECK-NEXT: %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f32, f32) { 840// CHECK-NEXT: %[[VAL_1:.*]] = memref.load 841// CHECK-NEXT: %[[VAL_2:.*]] = memref.load 842// CHECK-NEXT: %[[PRODUCT:.*]] = arith.mulf 843// CHECK-NEXT: %[[SUM:.*]] = arith.addf 844// CHECK-NEXT: scf.reduce(%[[PRODUCT]], %[[SUM]] : f32, f32) { 845// CHECK-NEXT: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32): 846// CHECK-NEXT: %[[RES:.*]] = arith.addf 847// CHECK-NEXT: scf.reduce.return %[[RES]] : f32 848// CHECK-NEXT: }, { 849// CHECK-NEXT: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32): 850// CHECK-NEXT: %[[RES:.*]] = arith.mulf 851// CHECK-NEXT: scf.reduce.return %[[RES]] : f32 852// CHECK-NEXT: } 853// CHECK-NEXT: } 854// CHECK-NEXT: return 855// CHECK-NEXT: } 856 857///////////////////////////////////////////////////////////////////// 858 859func.func @affine_parallel_with_reductions_f64(%arg0: memref<3x3xf64>, %arg1: memref<3x3xf64>) -> (f64, f64) { 860 %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addf", "mulf") -> (f64, f64) { 861 %1 = affine.load %arg0[%kx, %ky] : memref<3x3xf64> 862 %2 = affine.load %arg1[%kx, %ky] : memref<3x3xf64> 863 %3 = arith.mulf %1, %2 : f64 864 %4 = arith.addf %1, %2 : f64 865 affine.yield %3, %4 : f64, f64 866 } 867 return %0#0, %0#1 : f64, f64 868} 869// CHECK-LABEL: @affine_parallel_with_reductions_f64 870// CHECK: %[[LOWER_1:.*]] = arith.constant 0 : index 871// CHECK: %[[UPPER_1:.*]] = arith.constant 2 : index 872// CHECK: %[[LOWER_2:.*]] = arith.constant 0 : index 873// CHECK: %[[UPPER_2:.*]] = arith.constant 2 : index 874// CHECK: %[[STEP_1:.*]] = arith.constant 1 : index 875// CHECK: %[[STEP_2:.*]] = arith.constant 1 : index 876// CHECK: %[[INIT_1:.*]] = arith.constant 0.000000e+00 : f64 877// CHECK: %[[INIT_2:.*]] = arith.constant 1.000000e+00 : f64 878// CHECK: %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (f64, f64) { 879// CHECK: %[[VAL_1:.*]] = memref.load 880// CHECK: %[[VAL_2:.*]] = memref.load 881// CHECK: %[[PRODUCT:.*]] = arith.mulf 882// CHECK: %[[SUM:.*]] = arith.addf 883// CHECK: scf.reduce(%[[PRODUCT]], %[[SUM]] : f64, f64) { 884// CHECK: ^bb0(%[[LHS:.*]]: f64, %[[RHS:.*]]: f64): 885// CHECK: %[[RES:.*]] = arith.addf 886// CHECK: scf.reduce.return %[[RES]] : f64 887// CHECK: }, { 888// CHECK: ^bb0(%[[LHS:.*]]: f64, %[[RHS:.*]]: f64): 889// CHECK: %[[RES:.*]] = arith.mulf 890// CHECK: scf.reduce.return %[[RES]] : f64 891// CHECK: } 892// CHECK: } 893 894///////////////////////////////////////////////////////////////////// 895 896func.func @affine_parallel_with_reductions_i64(%arg0: memref<3x3xi64>, %arg1: memref<3x3xi64>) -> (i64, i64) { 897 %0:2 = affine.parallel (%kx, %ky) = (0, 0) to (2, 2) reduce ("addi", "muli") -> (i64, i64) { 898 %1 = affine.load %arg0[%kx, %ky] : memref<3x3xi64> 899 %2 = affine.load %arg1[%kx, %ky] : memref<3x3xi64> 900 %3 = arith.muli %1, %2 : i64 901 %4 = arith.addi %1, %2 : i64 902 affine.yield %3, %4 : i64, i64 903 } 904 return %0#0, %0#1 : i64, i64 905} 906// CHECK-LABEL: @affine_parallel_with_reductions_i64 907// CHECK: %[[LOWER_1:.*]] = arith.constant 0 : index 908// CHECK: %[[UPPER_1:.*]] = arith.constant 2 : index 909// CHECK: %[[LOWER_2:.*]] = arith.constant 0 : index 910// CHECK: %[[UPPER_2:.*]] = arith.constant 2 : index 911// CHECK: %[[STEP_1:.*]] = arith.constant 1 : index 912// CHECK: %[[STEP_2:.*]] = arith.constant 1 : index 913// CHECK: %[[INIT_1:.*]] = arith.constant 0 : i64 914// CHECK: %[[INIT_2:.*]] = arith.constant 1 : i64 915// CHECK: %[[RES:.*]] = scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[LOWER_1]], %[[LOWER_2]]) to (%[[UPPER_1]], %[[UPPER_2]]) step (%[[STEP_1]], %[[STEP_2]]) init (%[[INIT_1]], %[[INIT_2]]) -> (i64, i64) { 916// CHECK: %[[VAL_1:.*]] = memref.load 917// CHECK: %[[VAL_2:.*]] = memref.load 918// CHECK: %[[PRODUCT:.*]] = arith.muli 919// CHECK: %[[SUM:.*]] = arith.addi 920// CHECK: scf.reduce(%[[PRODUCT]], %[[SUM]] : i64, i64) { 921// CHECK: ^bb0(%[[LHS:.*]]: i64, %[[RHS:.*]]: i64): 922// CHECK: %[[RES:.*]] = arith.addi 923// CHECK: scf.reduce.return %[[RES]] : i64 924// CHECK: }, { 925// CHECK: ^bb0(%[[LHS:.*]]: i64, %[[RHS:.*]]: i64): 926// CHECK: %[[RES:.*]] = arith.muli 927// CHECK: scf.reduce.return %[[RES]] : i64 928// CHECK: } 929// CHECK: } 930