1// RUN: mlir-opt %s -split-input-file -loop-invariant-code-motion | FileCheck %s 2 3func.func @nested_loops_both_having_invariant_code() { 4 %m = memref.alloc() : memref<10xf32> 5 %cf7 = arith.constant 7.0 : f32 6 %cf8 = arith.constant 8.0 : f32 7 8 affine.for %arg0 = 0 to 10 { 9 %v0 = arith.addf %cf7, %cf8 : f32 10 affine.for %arg1 = 0 to 10 { 11 %v1 = arith.addf %v0, %cf8 : f32 12 affine.store %v0, %m[%arg0] : memref<10xf32> 13 } 14 } 15 16 // CHECK: memref.alloc() : memref<10xf32> 17 // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32 18 // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32 19 // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32 20 // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32 21 // CHECK-NEXT: affine.for 22 // CHECK-NEXT: affine.for 23 // CHECK-NEXT: affine.store 24 25 return 26} 27 28// ----- 29 30func.func @nested_loops_code_invariant_to_both() { 31 %m = memref.alloc() : memref<10xf32> 32 %cf7 = arith.constant 7.0 : f32 33 %cf8 = arith.constant 8.0 : f32 34 35 affine.for %arg0 = 0 to 10 { 36 affine.for %arg1 = 0 to 10 { 37 %v0 = arith.addf %cf7, %cf8 : f32 38 } 39 } 40 41 // CHECK: memref.alloc() : memref<10xf32> 42 // CHECK-NEXT: arith.constant 7.000000e+00 : f32 43 // CHECK-NEXT: arith.constant 8.000000e+00 : f32 44 // CHECK-NEXT: arith.addf 45 46 return 47} 48 49// ----- 50 51func.func @single_loop_nothing_invariant() { 52 %m1 = memref.alloc() : memref<10xf32> 53 %m2 = memref.alloc() : memref<10xf32> 54 affine.for %arg0 = 0 to 10 { 55 %v0 = affine.load %m1[%arg0] : memref<10xf32> 56 %v1 = affine.load %m2[%arg0] : memref<10xf32> 57 %v2 = arith.addf %v0, %v1 : f32 58 affine.store %v2, %m1[%arg0] : memref<10xf32> 59 } 60 61 // CHECK: memref.alloc() : memref<10xf32> 62 // CHECK-NEXT: memref.alloc() : memref<10xf32> 63 // CHECK-NEXT: affine.for 64 // CHECK-NEXT: affine.load 65 // CHECK-NEXT: affine.load 66 // CHECK-NEXT: arith.addf 67 // CHECK-NEXT: affine.store 68 69 return 70} 71 72// ----- 73 74func.func @invariant_code_inside_affine_if() { 75 %m = memref.alloc() : memref<10xf32> 76 %cf8 = arith.constant 8.0 : f32 77 78 affine.for %arg0 = 0 to 10 { 79 %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0) 80 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) { 81 %cf9 = arith.addf %cf8, %cf8 : f32 82 affine.store %cf9, %m[%arg0] : memref<10xf32> 83 84 } 85 } 86 87 // CHECK: memref.alloc() : memref<10xf32> 88 // CHECK-NEXT: arith.constant 8.000000e+00 : f32 89 // CHECK-NEXT: affine.for 90 // CHECK-NEXT: affine.apply 91 // CHECK-NEXT: affine.if 92 // CHECK-NEXT: arith.addf 93 // CHECK-NEXT: affine.store 94 // CHECK-NEXT: } 95 96 97 return 98} 99 100// ----- 101 102func.func @invariant_affine_if() { 103 %m = memref.alloc() : memref<10xf32> 104 %cf8 = arith.constant 8.0 : f32 105 affine.for %arg0 = 0 to 10 { 106 affine.for %arg1 = 0 to 20 { 107 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 108 %cf9 = arith.addf %cf8, %cf8 : f32 109 } 110 } 111 } 112 113 // CHECK: memref.alloc() : memref<10xf32> 114 // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32 115 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 20 { 116 // CHECK-NEXT: } 117 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 { 118 // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) { 119 // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32 120 // CHECK-NEXT: } 121 122 return 123} 124 125// ----- 126 127func.func @hoist_invariant_affine_if_success(%lb: index, %ub: index, %step: index) -> i32 { 128 %cst_0 = arith.constant 0 : i32 129 %cst_42 = arith.constant 42 : i32 130 %sum_result = affine.for %i = %lb to %ub iter_args(%acc = %cst_0) -> i32 { 131 %conditional_add = affine.if affine_set<() : ()> () -> (i32) { 132 %add = arith.addi %cst_42, %cst_42 : i32 133 affine.yield %add : i32 134 } else { 135 %poison = ub.poison : i32 136 affine.yield %poison : i32 137 } 138 %sum = arith.addi %acc, %conditional_add : i32 139 affine.yield %sum : i32 140 } 141 142 // CHECK-LABEL: hoist_invariant_affine_if_success 143 // CHECK-NEXT: arith.constant 0 : i32 144 // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32 145 // CHECK-NEXT: %[[IF:.*]] = affine.if 146 // CHECK-NEXT: arith.addi %[[CST]], %[[CST]] : i32 147 // CHECK: affine.for 148 // CHECK-NOT: affine.if 149 // CHECK-NEXT: arith.addi %{{.*}}, %[[IF]] 150 151 return %sum_result : i32 152} 153 154// ----- 155 156func.func @hoist_variant_affine_if_failure(%lb: index, %ub: index, %step: index) -> i32 { 157 %cst_0 = arith.constant 0 : i32 158 %cst_42 = arith.constant 42 : i32 159 %ind_7 = arith.constant 7 : index 160 %sum_result = affine.for %i = %lb to %ub iter_args(%acc = %cst_0) -> i32 { 161 %conditional_add = affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%i, %ind_7) -> (i32) { 162 %add = arith.addi %cst_42, %cst_42 : i32 163 affine.yield %add : i32 164 } else { 165 %poison = ub.poison : i32 166 affine.yield %poison : i32 167 } 168 %sum = arith.addi %acc, %conditional_add : i32 169 affine.yield %sum : i32 170 } 171 172 // CHECK-LABEL: hoist_variant_affine_if_failure 173 // CHECK-NEXT: arith.constant 0 : i32 174 // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32 175 // CHECK-NEXT: arith.constant 7 : index 176 // CHECK-NEXT: affine.for 177 // CHECK-NEXT: %[[IF:.*]] = affine.if 178 // CHECK: arith.addi %{{.*}}, %[[IF]] 179 180 return %sum_result : i32 181} 182 183// ----- 184 185func.func @hoist_affine_for_with_unknown_trip_count(%lb: index, %ub: index) { 186 affine.for %arg0 = 0 to 10 { 187 affine.for %arg1 = %lb to %ub { 188 } 189 } 190 191 // CHECK: @hoist_affine_for_with_unknown_trip_count(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) { 192 // CHECK-NEXT: affine.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]] { 193 // CHECK-NEXT: } 194 // CHECK-NEXT: affine.for %[[ARG3:.*]] = 0 to 10 { 195 // CHECK-NEXT: } 196 197 return 198} 199 200// ----- 201 202func.func @hoist_affine_for_with_unknown_trip_count_non_unit_step(%lb: index, %ub: index) { 203 affine.for %arg0 = 0 to 10 { 204 affine.for %arg1 = %lb to %ub step 2 { 205 } 206 } 207 208 // CHECK: @hoist_affine_for_with_unknown_trip_count_non_unit_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) { 209 // CHECK-NEXT: affine.for %[[ARG2:.*]] = 0 to 10 { 210 // CHECK-NEXT: affine.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] step 2 { 211 // CHECK-NEXT: } 212 // CHECK-NEXT: } 213 214 return 215} 216 217// ----- 218 219func.func @hoist_scf_for_with_unknown_trip_count_unit_step(%lb: index, %ub: index) { 220 %c1 = arith.constant 1 : index 221 scf.for %arg0 = %lb to %ub step %c1 { 222 scf.for %arg1 = %lb to %ub step %c1 { 223 } 224 } 225 226 // CHECK: @hoist_scf_for_with_unknown_trip_count_unit_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) { 227 // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]] 228 // CHECK-NEXT: } 229 // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] 230 // CHECK-NEXT: } 231 232 return 233} 234 235// ----- 236 237func.func @hoist_scf_for_with_unknown_trip_count_non_unit_constant_step(%lb: index, %ub: index) { 238 %c1 = arith.constant 1 : index 239 %c2 = arith.constant 2 : index 240 scf.for %arg0 = %lb to %ub step %c1 { 241 scf.for %arg1 = %lb to %ub step %c2 { 242 } 243 } 244 245 // CHECK: @hoist_scf_for_with_unknown_trip_count_non_unit_constant_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) { 246 // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]] 247 // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] 248 // CHECK-NEXT: } 249 // CHECK-NEXT: } 250 251 return 252} 253 254// ----- 255 256func.func @hoist_scf_for_with_unknown_trip_count_unknown_step(%lb: index, %ub: index, %step: index) { 257 %c1 = arith.constant 1 : index 258 scf.for %arg0 = %lb to %ub step %c1 { 259 scf.for %arg1 = %lb to %ub step %step { 260 } 261 } 262 263 // CHECK: @hoist_scf_for_with_unknown_trip_count_unknown_step(%[[ARG0:.*]]: index, %[[ARG1:.*]]: index, %[[STEP:.*]]: index) { 264 // CHECK: scf.for %[[ARG2:.*]] = %[[ARG0]] to %[[ARG1]] 265 // CHECK-NEXT: scf.for %[[ARG3:.*]] = %[[ARG0]] to %[[ARG1]] step %[[STEP]] 266 // CHECK-NEXT: } 267 // CHECK-NEXT: } 268 269 return 270} 271 272// ----- 273 274func.func @invariant_affine_if2() { 275 %m = memref.alloc() : memref<10xf32> 276 %cf8 = arith.constant 8.0 : f32 277 affine.for %arg0 = 0 to 10 { 278 affine.for %arg1 = 0 to 10 { 279 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 280 %cf9 = arith.addf %cf8, %cf8 : f32 281 affine.store %cf9, %m[%arg1] : memref<10xf32> 282 } 283 } 284 } 285 286 // CHECK: memref.alloc 287 // CHECK-NEXT: arith.constant 288 // CHECK-NEXT: affine.for 289 // CHECK-NEXT: affine.for 290 // CHECK-NEXT: affine.if 291 // CHECK-NEXT: arith.addf 292 // CHECK-NEXT: affine.store 293 // CHECK-NEXT: } 294 // CHECK-NEXT: } 295 296 return 297} 298 299// ----- 300 301func.func @invariant_affine_nested_if() { 302 %m = memref.alloc() : memref<10xf32> 303 %cf8 = arith.constant 8.0 : f32 304 affine.for %arg0 = 0 to 10 { 305 affine.for %arg1 = 0 to 10 { 306 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 307 %cf9 = arith.addf %cf8, %cf8 : f32 308 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 309 %cf10 = arith.addf %cf9, %cf9 : f32 310 } 311 } 312 } 313 } 314 315 // CHECK: memref.alloc 316 // CHECK-NEXT: arith.constant 317 // CHECK-NEXT: affine.for 318 // CHECK-NEXT: } 319 // CHECK-NEXT: affine.for 320 // CHECK-NEXT: affine.if 321 // CHECK-NEXT: arith.addf 322 // CHECK-NEXT: affine.if 323 // CHECK-NEXT: arith.addf 324 // CHECK-NEXT: } 325 // CHECK-NEXT: } 326 327 328 return 329} 330 331// ----- 332 333func.func @invariant_affine_nested_if_else() { 334 %m = memref.alloc() : memref<10xf32> 335 %cf8 = arith.constant 8.0 : f32 336 affine.for %arg0 = 0 to 10 { 337 affine.for %arg1 = 0 to 10 { 338 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 339 %cf9 = arith.addf %cf8, %cf8 : f32 340 affine.store %cf9, %m[%arg0] : memref<10xf32> 341 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 342 %cf10 = arith.addf %cf9, %cf9 : f32 343 } else { 344 affine.store %cf9, %m[%arg1] : memref<10xf32> 345 } 346 } 347 } 348 } 349 350 // CHECK: memref.alloc 351 // CHECK-NEXT: arith.constant 352 // CHECK-NEXT: affine.for 353 // CHECK-NEXT: affine.for 354 // CHECK-NEXT: affine.if 355 // CHECK-NEXT: arith.addf 356 // CHECK-NEXT: affine.store 357 // CHECK-NEXT: affine.if 358 // CHECK-NEXT: arith.addf 359 // CHECK-NEXT: } else { 360 // CHECK-NEXT: affine.store 361 // CHECK-NEXT: } 362 // CHECK-NEXT: } 363 // CHECK-NEXT: } 364 365 366 return 367} 368 369// ----- 370 371func.func @invariant_loop_dialect() { 372 %ci0 = arith.constant 0 : index 373 %ci10 = arith.constant 10 : index 374 %ci1 = arith.constant 1 : index 375 %m = memref.alloc() : memref<10xf32> 376 %cf7 = arith.constant 7.0 : f32 377 %cf8 = arith.constant 8.0 : f32 378 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 379 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 380 %v0 = arith.addf %cf7, %cf8 : f32 381 } 382 } 383 384 // CHECK: memref.alloc() : memref<10xf32> 385 // CHECK-NEXT: arith.constant 7.000000e+00 : f32 386 // CHECK-NEXT: arith.constant 8.000000e+00 : f32 387 // CHECK-NEXT: arith.addf 388 389 return 390} 391 392// ----- 393 394func.func @variant_loop_dialect() { 395 %ci0 = arith.constant 0 : index 396 %ci10 = arith.constant 10 : index 397 %ci1 = arith.constant 1 : index 398 %m = memref.alloc() : memref<10xf32> 399 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 400 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 401 %v0 = arith.addi %arg0, %arg1 : index 402 } 403 } 404 405 // CHECK: memref.alloc() : memref<10xf32> 406 // CHECK-NEXT: scf.for 407 // CHECK-NEXT: scf.for 408 // CHECK-NEXT: arith.addi 409 410 return 411} 412 413// ----- 414 415func.func @parallel_loop_with_invariant() { 416 %c0 = arith.constant 0 : index 417 %c10 = arith.constant 10 : index 418 %c1 = arith.constant 1 : index 419 %c7 = arith.constant 7 : i32 420 %c8 = arith.constant 8 : i32 421 scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { 422 %v0 = arith.addi %c7, %c8 : i32 423 %v3 = arith.addi %arg0, %arg1 : index 424 } 425 426 // CHECK-LABEL: func @parallel_loop_with_invariant 427 // CHECK: arith.constant 0 : index 428 // CHECK-NEXT: arith.constant 10 : index 429 // CHECK-NEXT: arith.constant 1 : index 430 // CHECK-NEXT: arith.constant 7 : i32 431 // CHECK-NEXT: arith.constant 8 : i32 432 // CHECK-NEXT: arith.addi 433 // CHECK-NEXT: scf.parallel (%[[A:.*]],{{.*}}) = 434 // CHECK-NEXT: arith.addi %[[A]] 435 // CHECK-NEXT: reduce 436 // CHECK-NEXT: } 437 // CHECK-NEXT: return 438 439 return 440} 441 442// ----- 443 444func.func @hoist_invariant_scf_if_success(%lb: index, %ub: index, %step: index) -> i32 { 445 %cst_0 = arith.constant 0 : i32 446 %cst_42 = arith.constant 42 : i32 447 %true = arith.constant true 448 %sum_result = scf.for %i = %lb to %ub step %step iter_args(%acc = %cst_0) -> i32 { 449 %conditional_add = scf.if %true -> (i32) { 450 %add = arith.addi %cst_42, %cst_42 : i32 451 scf.yield %add : i32 452 } else { 453 %poison = ub.poison : i32 454 scf.yield %poison : i32 455 } 456 %sum = arith.addi %acc, %conditional_add : i32 457 scf.yield %sum : i32 458 } 459 460 // CHECK-LABEL: hoist_invariant_scf_if_success 461 // CHECK-NEXT: arith.constant 0 : i32 462 // CHECK-NEXT: %[[CST:.*]] = arith.constant 42 : i32 463 // CHECK-NEXT: %[[TRUE:.*]] = arith.constant true 464 // CHECK-NEXT: %[[IF:.*]] = scf.if %[[TRUE]] 465 // CHECK-NEXT: arith.addi %[[CST]], %[[CST]] : i32 466 // CHECK: scf.for 467 // CHECK-NOT: scf.if 468 // CHECK-NEXT: arith.addi %{{.*}}, %[[IF]] 469 470 return %sum_result : i32 471} 472 473// ----- 474 475func.func @hoist_variant_scf_if_failure(%lb: index, %ub: index, %step: index) -> i32 { 476 %cst_0 = arith.constant 0 : i32 477 %cst_42 = arith.constant 42 : i32 478 %ind_7 = arith.constant 7 : index 479 %sum_result = scf.for %i = %lb to %ub step %step iter_args(%acc = %cst_0) -> i32 { 480 %cond = arith.cmpi ult, %i, %ind_7 : index 481 %conditional_add = scf.if %cond -> (i32) { 482 %add = arith.addi %cst_42, %cst_42 : i32 483 scf.yield %add : i32 484 } else { 485 %poison = ub.poison : i32 486 scf.yield %poison : i32 487 } 488 %sum = arith.addi %acc, %conditional_add : i32 489 scf.yield %sum : i32 490 } 491 492 // CHECK-LABEL: hoist_variant_scf_if_failure 493 // CHECK-NEXT: arith.constant 0 : i32 494 // CHECK-NEXT: %[[CST_42:.*]] = arith.constant 42 : i32 495 // CHECK-NEXT: %[[CST_7:.*]] = arith.constant 7 : index 496 // CHECK-NEXT: scf.for %[[IV:.*]] = %{{.*}} to %{{.*}} 497 // CHECK-NEXT: %[[CMP:.*]] = arith.cmpi ult, %[[IV]], %[[CST_7]] 498 // CHECK-NEXT: %[[IF:.*]] = scf.if %[[CMP]] 499 // CHECK-NEXT: arith.addi %[[CST_42]], %[[CST_42]] : i32 500 // CHECK: arith.addi %{{.*}}, %[[IF]] 501 502 return %sum_result : i32 503} 504 505// ----- 506 507func.func private @make_val() -> (index) 508 509// CHECK-LABEL: func @nested_uses_inside 510func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) { 511 %true = arith.constant true 512 513 // Check that ops that contain nested uses to values not defiend outside 514 // remain in the loop. 515 // CHECK-NEXT: arith.constant 516 // CHECK-NEXT: scf.for 517 // CHECK-NEXT: call @ 518 // CHECK-NEXT: call @ 519 // CHECK-NEXT: scf.if 520 // CHECK-NEXT: scf.yield 521 // CHECK-NEXT: else 522 // CHECK-NEXT: scf.yield 523 scf.for %i = %lb to %ub step %step { 524 %val = func.call @make_val() : () -> (index) 525 %val2 = func.call @make_val() : () -> (index) 526 %r = scf.if %true -> (index) { 527 scf.yield %val: index 528 } else { 529 scf.yield %val2: index 530 } 531 } 532 return 533} 534 535// ----- 536 537// Test that two ops that feed into each other are moved without violating 538// dominance in non-graph regions. 539// CHECK-LABEL: func @invariant_subgraph 540// CHECK-SAME: %{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %[[ARG:.*]]: i32 541func.func @invariant_subgraph(%lb: index, %ub: index, %step: index, %arg: i32) { 542 // CHECK: %[[V0:.*]] = arith.addi %[[ARG]], %[[ARG]] 543 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[ARG]], %[[V0]] 544 // CHECK-NEXT: scf.for 545 scf.for %i = %lb to %ub step %step { 546 // CHECK-NEXT: "test.sink"(%[[V1]]) 547 %v0 = arith.addi %arg, %arg : i32 548 %v1 = arith.addi %arg, %v0 : i32 549 "test.sink"(%v1) : (i32) -> () 550 } 551 return 552} 553 554// ----- 555 556// Test invariant nested loop is hoisted. 557// CHECK-LABEL: func @test_invariant_nested_loop 558func.func @test_invariant_nested_loop() { 559 // CHECK: %[[C:.*]] = arith.constant 560 %0 = arith.constant 5 : i32 561 // CHECK: %[[V0:.*]] = arith.addi %[[C]], %[[C]] 562 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[V0]], %[[C]] 563 // CHECK-NEXT: test.graph_loop 564 // CHECK-NEXT: ^bb0(%[[ARG0:.*]]: i32) 565 // CHECK-NEXT: %[[V2:.*]] = arith.subi %[[ARG0]], %[[ARG0]] 566 // CHECK-NEXT: test.region_yield %[[V2]] 567 // CHECK: test.graph_loop 568 // CHECK-NEXT: test.region_yield %[[V1]] 569 test.graph_loop { 570 %1 = arith.addi %0, %0 : i32 571 %2 = arith.addi %1, %0 : i32 572 test.graph_loop { 573 ^bb0(%arg0: i32): 574 %3 = arith.subi %arg0, %arg0 : i32 575 test.region_yield %3 : i32 576 } : () -> () 577 test.region_yield %2 : i32 578 } : () -> () 579 return 580} 581 582 583// ----- 584 585// Test ops in a graph region are hoisted. 586// CHECK-LABEL: func @test_invariants_in_graph_region 587func.func @test_invariants_in_graph_region() { 588 // CHECK: test.single_no_terminator_op 589 test.single_no_terminator_op : { 590 // CHECK-NEXT: %[[C:.*]] = arith.constant 591 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]] 592 // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]] 593 test.graph_loop { 594 %v0 = arith.addi %c0, %v1 : i32 595 %v1 = arith.addi %c0, %c0 : i32 596 %c0 = arith.constant 5 : i32 597 test.region_yield %v0 : i32 598 } : () -> () 599 } 600 return 601} 602 603// ----- 604 605// Test ops in a graph region are hoisted in topological order into non-graph 606// regions and that dominance is preserved. 607// CHECK-LABEL: func @test_invariant_backedge 608func.func @test_invariant_backedge() { 609 // CHECK-NEXT: %[[C:.*]] = arith.constant 610 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]] 611 // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]] 612 // CHECK-NEXT: test.graph_loop 613 test.graph_loop { 614 // CHECK-NEXT: test.region_yield %[[V0]] 615 %v0 = arith.addi %c0, %v1 : i32 616 %v1 = arith.addi %c0, %c0 : i32 617 %c0 = arith.constant 5 : i32 618 test.region_yield %v0 : i32 619 } : () -> () 620 return 621} 622 623// ----- 624 625// Test that cycles aren't hoisted from graph regions to non-graph regions. 626// CHECK-LABEL: func @test_invariant_cycle_not_hoisted 627func.func @test_invariant_cycle_not_hoisted() { 628 // CHECK: test.graph_loop 629 test.graph_loop { 630 // CHECK-NEXT: %[[A:.*]] = "test.a"(%[[B:.*]]) : 631 // CHECK-NEXT: %[[B]] = "test.b"(%[[A]]) : 632 // CHECK-NEXT: test.region_yield %[[A]] 633 %a = "test.a"(%b) : (i32) -> i32 634 %b = "test.b"(%a) : (i32) -> i32 635 test.region_yield %a : i32 636 } : () -> () 637 return 638} 639 640// ----- 641 642// CHECK-LABEL: test_always_speculatable_op 643func.func @test_always_speculatable_op(%lb: index, %ub: index, %step: index) { 644 // CHECK: test.always_speculatable_op 645 // CHECK-NEXT: scf.for 646 scf.for %i = %lb to %ub step %step { 647 %val = "test.always_speculatable_op"() : () -> i32 648 } 649 650 return 651} 652 653// CHECK-LABEL: test_never_speculatable_op 654func.func @test_never_speculatable_op(%lb: index, %ub: index, %step: index) { 655 // CHECK: scf.for 656 // CHECK-NEXT: test.never_speculatable_op 657 scf.for %i = %lb to %ub step %step { 658 %val = "test.never_speculatable_op"() : () -> i32 659 } 660 661 return 662} 663 664// CHECK-LABEL: test_conditionally_speculatable_op_success 665func.func @test_conditionally_speculatable_op_success(%lb: index, %ub: index, %step: index) { 666 // CHECK: test.conditionally_speculatable_op 667 // CHECK-NEXT: scf.for 668 scf.for %i = %lb to %ub step %step { 669 %const_val = arith.constant 5 : i32 670 %val = "test.conditionally_speculatable_op"(%const_val) : (i32) -> i32 671 } 672 673 return 674} 675 676// CHECK-LABEL: test_conditionally_speculatable_op_failure 677func.func @test_conditionally_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) { 678 // CHECK: scf.for 679 // CHECK-NEXT: test.conditionally_speculatable_op 680 %const_5 = arith.constant 5 : i32 681 %non_const = arith.addi %arg, %const_5 : i32 682 scf.for %i = %lb to %ub step %step { 683 %val = "test.conditionally_speculatable_op"(%non_const) : (i32) -> i32 684 } 685 686 return 687} 688 689// CHECK-LABEL: test_recursively_speculatable_op_success 690func.func @test_recursively_speculatable_op_success(%lb: index, %ub: index, %step: index, %arg: i32) { 691 // CHECK: test.recursively_speculatable_op 692 // CHECK: scf.for 693 scf.for %i = %lb to %ub step %step { 694 %val = "test.recursively_speculatable_op"()({ 695 %result = arith.addi %arg, %arg : i32 696 test.region_yield %result : i32 697 }) : () -> i32 698 } 699 700 return 701} 702 703// CHECK-LABEL: test_recursively_speculatable_op_failure 704func.func @test_recursively_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) { 705 // CHECK: scf.for 706 // CHECK-NEXT: test.recursively_speculatable_op 707 scf.for %i = %lb to %ub step %step { 708 %val = "test.recursively_speculatable_op"()({ 709 %result = "test.never_speculatable_op"() : () -> i32 710 test.region_yield %result : i32 711 }) : () -> i32 712 } 713 714 return 715} 716 717// ----- 718 719func.func @speculate_tensor_dim_unknown_rank_unknown_dim( 720// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_unknown_dim 721 %t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 722 // CHECK: scf.for 723 // CHECK-NEXT: tensor.dim 724 scf.for %i = %lb to %ub step %step { 725 %val = tensor.dim %t, %dim_idx : tensor<*xf32> 726 } 727 728 return 729} 730 731func.func @speculate_tensor_dim_known_rank_unknown_dim( 732// CHECK-LABEL: @speculate_tensor_dim_known_rank_unknown_dim 733 %t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 734 // CHECK: scf.for 735 // CHECK-NEXT: tensor.dim 736 scf.for %i = %lb to %ub step %step { 737 %val = tensor.dim %t, %dim_idx : tensor<?x?x?x?xf32> 738 } 739 740 return 741} 742 743func.func @speculate_tensor_dim_unknown_rank_known_dim( 744// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_known_dim 745 %t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 746 %c0 = arith.constant 0 : index 747 // CHECK: scf.for 748 // CHECK-NEXT: tensor.dim 749 scf.for %i = %lb to %ub step %step { 750 %val = tensor.dim %t, %c0 : tensor<*xf32> 751 } 752 753 return 754} 755 756func.func @speculate_tensor_dim_known_rank_known_dim_inbounds( 757// CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds 758 %t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 759 %c1 = arith.constant 1 : index 760 // CHECK: tensor.dim 761 // CHECK-NEXT: scf.for 762 scf.for %i = %lb to %ub step %step { 763 %val = tensor.dim %t, %c1 : tensor<?x?x?x?xf32> 764 } 765 766 return 767} 768 769// ----- 770 771func.func @speculate_memref_dim_unknown_rank_unknown_dim( 772// CHECK-LABEL: @speculate_memref_dim_unknown_rank_unknown_dim 773 %t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 774 // CHECK: scf.for 775 // CHECK-NEXT: memref.dim 776 scf.for %i = %lb to %ub step %step { 777 %val = memref.dim %t, %dim_idx : memref<*xf32> 778 } 779 780 return 781} 782 783func.func @speculate_memref_dim_known_rank_unknown_dim( 784// CHECK-LABEL: @speculate_memref_dim_known_rank_unknown_dim 785 %t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 786 // CHECK: scf.for 787 // CHECK-NEXT: memref.dim 788 scf.for %i = %lb to %ub step %step { 789 %val = memref.dim %t, %dim_idx : memref<?x?x?x?xf32> 790 } 791 792 return 793} 794 795func.func @speculate_memref_dim_unknown_rank_known_dim( 796// CHECK-LABEL: @speculate_memref_dim_unknown_rank_known_dim 797 %t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 798 %c0 = arith.constant 0 : index 799 // CHECK: scf.for 800 // CHECK-NEXT: memref.dim 801 scf.for %i = %lb to %ub step %step { 802 %val = memref.dim %t, %c0 : memref<*xf32> 803 } 804 805 return 806} 807 808func.func @speculate_memref_dim_known_rank_known_dim_inbounds( 809// CHECK-LABEL: @speculate_memref_dim_known_rank_known_dim_inbounds 810 %t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { 811 %c1 = arith.constant 1 : index 812 // CHECK: memref.dim 813 // CHECK-NEXT: scf.for 814 scf.for %i = %lb to %ub step %step { 815 %val = memref.dim %t, %c1 : memref<?x?x?x?xf32> 816 } 817 818 return 819} 820 821// ----- 822 823// CHECK-LABEL: @speculate_memref_dim_known_rank_known_dim_inbounds 824func.func @speculate_memref_dim_known_rank_known_dim_inbounds() { 825 %c0 = arith.constant 0 : index 826 %c1 = arith.constant 1 : index 827 %c22 = arith.constant 22 : index 828 %alloc = memref.alloc(%c22) : memref<?xi1> 829 scf.for %arg4 = %c0 to %c22 step %c1 { 830 %dim = memref.dim %alloc, %c0 : memref<?xi1> 831 } 832 return 833} 834// CHECK: memref.dim 835// CHECK-NEXT: scf.for 836 837// ----- 838 839// CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds 840func.func @speculate_tensor_dim_known_rank_known_dim_inbounds() { 841 %c0 = arith.constant 0 : index 842 %c1 = arith.constant 1 : index 843 %c22 = arith.constant 22 : index 844 %t = tensor.empty(%c22, %c22) : tensor<?x?xi1> 845 scf.for %arg4 = %c0 to %c22 step %c1 { 846 %dim = tensor.dim %t, %c1 : tensor<?x?xi1> 847 } 848 return 849} 850// CHECK: tensor.dim 851// CHECK-NEXT: scf.for 852 853// ----- 854 855// CHECK-LABEL: @no_speculate_memref_dim_known_rank_known_dim_out_of_bounds 856func.func @no_speculate_memref_dim_known_rank_known_dim_out_of_bounds() { 857 %c0 = arith.constant 0 : index 858 %c1 = arith.constant 1 : index 859 %c22 = arith.constant 22 : index 860 %alloc = memref.alloc(%c22) : memref<?xi1> 861 scf.for %arg4 = %c0 to %c22 step %c1 { 862 %dim = memref.dim %alloc, %c1 : memref<?xi1> 863 } 864 return 865} 866// CHECK: scf.for 867// CHECK-NEXT: memref.dim 868 869// ----- 870 871func.func @no_speculate_divui( 872// CHECK-LABEL: @no_speculate_divui( 873 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 874 scf.for %i = %lb to %ub step %step { 875// CHECK: scf.for 876// CHECK: arith.divui 877 %val = arith.divui %num, %denom : i32 878 } 879 880 return 881} 882 883func.func @no_speculate_divsi( 884// CHECK-LABEL: @no_speculate_divsi( 885 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 886 scf.for %i = %lb to %ub step %step { 887// CHECK: scf.for 888// CHECK: arith.divsi 889 %val = arith.divsi %num, %denom : i32 890 } 891 892 return 893} 894 895func.func @no_speculate_ceildivui( 896// CHECK-LABEL: @no_speculate_ceildivui( 897 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 898 scf.for %i = %lb to %ub step %step { 899// CHECK: scf.for 900// CHECK: arith.ceildivui 901 %val = arith.ceildivui %num, %denom : i32 902 } 903 904 return 905} 906 907func.func @no_speculate_ceildivsi( 908// CHECK-LABEL: @no_speculate_ceildivsi( 909 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 910 scf.for %i = %lb to %ub step %step { 911// CHECK: scf.for 912// CHECK: arith.ceildivsi 913 %val = arith.ceildivsi %num, %denom : i32 914 } 915 916 return 917} 918 919func.func @no_speculate_divui_const(%num: i32, %lb: index, %ub: index, %step: index) { 920// CHECK-LABEL: @no_speculate_divui_const( 921 %c0 = arith.constant 0 : i32 922 scf.for %i = %lb to %ub step %step { 923// CHECK: scf.for 924// CHECK: arith.divui 925 %val = arith.divui %num, %c0 : i32 926 } 927 928 return 929} 930 931func.func @speculate_divui_const( 932// CHECK-LABEL: @speculate_divui_const( 933 %num: i32, %lb: index, %ub: index, %step: index) { 934 %c5 = arith.constant 5 : i32 935// CHECK: arith.divui 936// CHECK: scf.for 937 scf.for %i = %lb to %ub step %step { 938 %val = arith.divui %num, %c5 : i32 939 } 940 941 return 942} 943 944func.func @no_speculate_ceildivui_const(%num: i32, %lb: index, %ub: index, %step: index) { 945// CHECK-LABEL: @no_speculate_ceildivui_const( 946 %c0 = arith.constant 0 : i32 947 scf.for %i = %lb to %ub step %step { 948// CHECK: scf.for 949// CHECK: arith.ceildivui 950 %val = arith.ceildivui %num, %c0 : i32 951 } 952 953 return 954} 955 956func.func @speculate_ceildivui_const( 957// CHECK-LABEL: @speculate_ceildivui_const( 958 %num: i32, %lb: index, %ub: index, %step: index) { 959 %c5 = arith.constant 5 : i32 960// CHECK: arith.ceildivui 961// CHECK: scf.for 962 scf.for %i = %lb to %ub step %step { 963 %val = arith.ceildivui %num, %c5 : i32 964 } 965 966 return 967} 968 969func.func @no_speculate_divsi_const0( 970// CHECK-LABEL: @no_speculate_divsi_const0( 971 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 972 %c0 = arith.constant 0 : i32 973 scf.for %i = %lb to %ub step %step { 974// CHECK: scf.for 975// CHECK: arith.divsi 976 %val = arith.divsi %num, %c0 : i32 977 } 978 979 return 980} 981 982func.func @no_speculate_divsi_const_minus1( 983// CHECK-LABEL: @no_speculate_divsi_const_minus1( 984 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 985 %cm1 = arith.constant -1 : i32 986 scf.for %i = %lb to %ub step %step { 987// CHECK: scf.for 988// CHECK: arith.divsi 989 %val = arith.divsi %num, %cm1 : i32 990 } 991 992 return 993} 994 995func.func @speculate_divsi_const( 996// CHECK-LABEL: @speculate_divsi_const( 997 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 998 %c5 = arith.constant 5 : i32 999 scf.for %i = %lb to %ub step %step { 1000// CHECK: arith.divsi 1001// CHECK: scf.for 1002 %val = arith.divsi %num, %c5 : i32 1003 } 1004 1005 return 1006} 1007 1008func.func @no_speculate_ceildivsi_const0( 1009// CHECK-LABEL: @no_speculate_ceildivsi_const0( 1010 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 1011 %c0 = arith.constant 0 : i32 1012 scf.for %i = %lb to %ub step %step { 1013// CHECK: scf.for 1014// CHECK: arith.ceildivsi 1015 %val = arith.ceildivsi %num, %c0 : i32 1016 } 1017 1018 return 1019} 1020 1021func.func @no_speculate_ceildivsi_const_minus1( 1022// CHECK-LABEL: @no_speculate_ceildivsi_const_minus1( 1023 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 1024 %cm1 = arith.constant -1 : i32 1025 scf.for %i = %lb to %ub step %step { 1026// CHECK: scf.for 1027// CHECK: arith.ceildivsi 1028 %val = arith.ceildivsi %num, %cm1 : i32 1029 } 1030 1031 return 1032} 1033 1034func.func @speculate_ceildivsi_const( 1035// CHECK-LABEL: @speculate_ceildivsi_const( 1036 %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { 1037 %c5 = arith.constant 5 : i32 1038 scf.for %i = %lb to %ub step %step { 1039// CHECK: arith.ceildivsi 1040// CHECK: scf.for 1041 %val = arith.ceildivsi %num, %c5 : i32 1042 } 1043 1044 return 1045} 1046 1047func.func @no_speculate_divui_range( 1048// CHECK-LABEL: @no_speculate_divui_range( 1049 %num: i8, %lb: index, %ub: index, %step: index) { 1050 %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1051 scf.for %i = %lb to %ub step %step { 1052// CHECK: scf.for 1053// CHECK: arith.divui 1054 %val = arith.divui %num, %denom : i8 1055 } 1056 1057 return 1058} 1059 1060func.func @no_speculate_divsi_range( 1061// CHECK-LABEL: @no_speculate_divsi_range( 1062 %num: i8, %lb: index, %ub: index, %step: index) { 1063 %denom0 = test.with_bounds {smax = -1: i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1064 %denom1 = test.with_bounds {smax = 127 : i8, smin = 0 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1065 scf.for %i = %lb to %ub step %step { 1066// CHECK: scf.for 1067// CHECK-COUNT-2: arith.divsi 1068 %val0 = arith.divsi %num, %denom0 : i8 1069 %val1 = arith.divsi %num, %denom1 : i8 1070 } 1071 1072 return 1073} 1074 1075func.func @no_speculate_ceildivui_range( 1076// CHECK-LABEL: @no_speculate_ceildivui_range( 1077 %num: i8, %lb: index, %ub: index, %step: index) { 1078 %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1079 scf.for %i = %lb to %ub step %step { 1080// CHECK: scf.for 1081// CHECK: arith.ceildivui 1082 %val = arith.ceildivui %num, %denom : i8 1083 } 1084 1085 return 1086} 1087 1088func.func @no_speculate_ceildivsi_range( 1089// CHECK-LABEL: @no_speculate_ceildivsi_range( 1090 %num: i8, %lb: index, %ub: index, %step: index) { 1091 %denom0 = test.with_bounds {smax = -1 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1092 %denom1 = test.with_bounds {smax = 127 : i8, smin = 0 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1093 scf.for %i = %lb to %ub step %step { 1094// CHECK: scf.for 1095// CHECK-COUNT-2: arith.ceildivsi 1096 %val0 = arith.ceildivsi %num, %denom0 : i8 1097 %val1 = arith.ceildivsi %num, %denom1 : i8 1098 } 1099 1100 return 1101} 1102 1103func.func @speculate_divui_range( 1104// CHECK-LABEL: @speculate_divui_range( 1105 %num: i8, %lb: index, %ub: index, %step: index) { 1106 %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 1 : i8} : i8 1107 scf.for %i = %lb to %ub step %step { 1108// CHECK: arith.divui 1109// CHECK: scf.for 1110 %val = arith.divui %num, %denom : i8 1111 } 1112 1113 return 1114} 1115 1116func.func @speculate_divsi_range( 1117// CHECK-LABEL: @speculate_divsi_range( 1118 %num: i8, %lb: index, %ub: index, %step: index) { 1119 %denom0 = test.with_bounds {smax = 127 : i8, smin = 1 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1120 %denom1 = test.with_bounds {smax = -2 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1121 scf.for %i = %lb to %ub step %step { 1122// CHECK-COUNT-2: arith.divsi 1123// CHECK: scf.for 1124 %val0 = arith.divsi %num, %denom0 : i8 1125 %val1 = arith.divsi %num, %denom1 : i8 1126 1127 } 1128 1129 return 1130} 1131 1132func.func @speculate_ceildivui_range( 1133// CHECK-LABEL: @speculate_ceildivui_range( 1134 %num: i8, %lb: index, %ub: index, %step: index) { 1135 %denom = test.with_bounds {smax = 127 : i8, smin = -128 : i8, umax = 255 : i8, umin = 1 : i8} : i8 1136 scf.for %i = %lb to %ub step %step { 1137// CHECK: arith.ceildivui 1138// CHECK: scf.for 1139 %val = arith.ceildivui %num, %denom : i8 1140 } 1141 1142 return 1143} 1144 1145func.func @speculate_ceildivsi_range( 1146// CHECK-LABEL: @speculate_ceildivsi_range( 1147 %num: i8, %lb: index, %ub: index, %step: index) { 1148 %denom0 = test.with_bounds {smax = 127 : i8, smin = 1 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1149 %denom1 = test.with_bounds {smax = -2 : i8, smin = -128 : i8, umax = 255 : i8, umin = 0 : i8} : i8 1150 scf.for %i = %lb to %ub step %step { 1151// CHECK-COUNT-2: arith.ceildivsi 1152// CHECK: scf.for 1153 %val0 = arith.ceildivsi %num, %denom0 : i8 1154 %val1 = arith.ceildivsi %num, %denom1 : i8 1155 1156 } 1157 1158 return 1159} 1160 1161// ----- 1162 1163func.func @speculate_static_pack_and_unpack(%source: tensor<128x256xf32>, 1164 %dest: tensor<4x16x32x16xf32>, %lb: index, %ub: index, %step: index) { 1165 1166 // CHECK: tensor.pack 1167 // CHECK-NEXT: scf.for 1168 scf.for %i = %lb to %ub step %step { 1169 %packed = tensor.pack %source 1170 inner_dims_pos = [0, 1] 1171 inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> 1172 } 1173 1174 // CHECK: tensor.unpack 1175 // CHECK-NEXT: scf.for 1176 scf.for %i = %lb to %ub step %step { 1177 %unpacked = tensor.unpack %dest 1178 inner_dims_pos = [0, 1] 1179 inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> 1180 } 1181 return 1182} 1183 1184// ----- 1185 1186func.func @speculate_dynamic_pack_and_unpack(%source: tensor<?x?xf32>, 1187 %dest: tensor<?x?x?x?xf32>, %lb: index, %ub: index, %step: index, 1188 %tile_m: index, %tile_n: index, %pad: f32) { 1189 1190 // CHECK: scf.for 1191 // CHECK-NEXT: tensor.pack 1192 scf.for %i = %lb to %ub step %step { 1193 %packed = tensor.pack %source 1194 inner_dims_pos = [0, 1] 1195 inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32> 1196 } 1197 1198 // CHECK: scf.for 1199 // CHECK-NEXT: tensor.unpack 1200 scf.for %i = %lb to %ub step %step { 1201 %unpacked = tensor.unpack %dest 1202 inner_dims_pos = [0, 1] 1203 inner_tiles = [%tile_n, %tile_m] into %source : tensor<?x?x?x?xf32> -> tensor<?x?xf32> 1204 } 1205 1206 // CHECK: tensor.pack 1207 // CHECK-NEXT: scf.for 1208 scf.for %i = %lb to %ub step %step { 1209 %packed = tensor.pack %source padding_value(%pad : f32) 1210 inner_dims_pos = [0, 1] 1211 inner_tiles = [%tile_n, %tile_m] into %dest : tensor<?x?xf32> -> tensor<?x?x?x?xf32> 1212 } 1213 return 1214} 1215 1216// ----- 1217 1218// CHECK-LABEL: func @hoist_from_scf_while( 1219// CHECK-SAME: %[[arg0:.*]]: i32, %{{.*}}: i32) 1220// CHECK-DAG: arith.constant 1 : i32 1221// CHECK-DAG: %[[c2:.*]] = arith.constant 2 : i32 1222// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : i32 1223// CHECK-DAG: %[[added:.*]] = arith.addi %[[arg0]], %[[c2]] 1224// CHECK: scf.while 1225// CHECK: %[[cmpi:.*]] = arith.cmpi slt, %{{.*}}, %[[added]] 1226// CHECK: scf.condition(%[[cmpi]]) 1227func.func @hoist_from_scf_while(%arg0: i32, %arg1: i32) -> i32 { 1228 %0 = scf.while (%arg2 = %arg1) : (i32) -> (i32) { 1229 %c2 = arith.constant 2 : i32 1230 %c10 = arith.constant 10 : i32 1231 %added = arith.addi %arg0, %c2 : i32 1232 %1 = arith.cmpi slt, %arg2, %added : i32 1233 scf.condition(%1) %arg2 : i32 1234 } do { 1235 ^bb0(%arg2: i32): 1236 %c1 = arith.constant 1 : i32 1237 %added2 = arith.addi %c1, %arg2 : i32 1238 scf.yield %added2 : i32 1239 } 1240 return %0 : i32 1241} 1242 1243// ----- 1244 1245#trait = { 1246 indexing_maps = [ 1247 affine_map<(m, n, k) -> (m, k)>, 1248 affine_map<(m, n, k) -> (k, n)>, 1249 affine_map<(m, n, k) -> (m, n)> 1250 ], 1251 iterator_types = ["parallel", "parallel", "reduction"] 1252} 1253 1254// CHECK-LABEL: func @hoist_linalg_ops 1255// CHECK: linalg.generic 1256// CHECK: scf.for 1257// CHECK-NOT: linalg.generic 1258// CHECK: tensor.insert_slice 1259// CHECK: scf.yield 1260func.func @hoist_linalg_ops(%a : tensor<128x128xf32>, 1261 %b : tensor<128x128xf32>, 1262 %c: tensor<128x128xf32>, 1263 %lb : index, 1264 %ub : index, 1265 %step : index, 1266 %output : tensor<?x128xf32>) -> tensor<?x128xf32> { 1267 %final = 1268 scf.for %i = %lb to %ub step %step iter_args(%acc = %output) 1269 -> tensor<?x128xf32> { 1270 %compute = linalg.generic #trait 1271 ins(%a, %b : tensor<128x128xf32>, tensor<128x128xf32>) 1272 outs(%c : tensor<128x128xf32>) { 1273 ^bb0(%in : f32, %in2 : f32, %in3 : f32): 1274 %mul = arith.mulf %in, %in2 : f32 1275 %add = arith.addf %mul, %in3 : f32 1276 linalg.yield %in3 : f32 1277 } -> tensor<128x128xf32> 1278 1279 %newacc = tensor.insert_slice %compute into 1280 %output[%i, 0][128, 128][1, 1] 1281 : tensor<128x128xf32> into tensor<?x128xf32> 1282 scf.yield %newacc : tensor<?x128xf32> 1283 } 1284 1285 func.return %final : tensor<?x128xf32> 1286} 1287 1288// ----- 1289 1290#trait = { 1291 indexing_maps = [ 1292 affine_map<(m, n, k) -> (m, k)>, 1293 affine_map<(m, n, k) -> (k, n)>, 1294 affine_map<(m, n, k) -> (m, n)> 1295 ], 1296 iterator_types = ["parallel", "parallel", "reduction"] 1297} 1298 1299// CHECK-LABEL: func @hoist_linalg_ops_div_by_zero 1300// CHECK-NOT: linalg.generic 1301// CHECK: scf.for 1302// CHECK: linalg.generic 1303// CHECK: tensor.insert_slice 1304// CHECK: scf.yield 1305func.func @hoist_linalg_ops_div_by_zero(%a : tensor<128x128xi32>, 1306 %b : tensor<128x128xi32>, 1307 %c: tensor<128x128xi32>, 1308 %lb : index, 1309 %ub : index, 1310 %step : index, 1311 %output : tensor<?x128xi32>) -> tensor<?x128xi32> { 1312 %cst0 = arith.constant 0 : i32 1313 %final = 1314 scf.for %i = %lb to %ub step %step iter_args(%acc = %output) 1315 -> tensor<?x128xi32> { 1316 %compute = linalg.generic #trait 1317 ins(%a, %b : tensor<128x128xi32>, tensor<128x128xi32>) 1318 outs(%c : tensor<128x128xi32>) { 1319 ^bb0(%in : i32, %in2 : i32, %in3 : i32): 1320 %div = arith.divui %in, %in2 : i32 1321 %add = arith.addi %div, %in3 : i32 1322 linalg.yield %in3 : i32 1323 } -> tensor<128x128xi32> 1324 1325 %newacc = tensor.insert_slice %compute into 1326 %output[%i, 0][128, 128][1, 1] 1327 : tensor<128x128xi32> into tensor<?x128xi32> 1328 scf.yield %newacc : tensor<?x128xi32> 1329 } 1330 1331 func.return %final : tensor<?x128xi32> 1332} 1333 1334// ----- 1335 1336// CHECK-LABEL: func @hoist_vector_transfer_ops 1337// CHECK: vector.transfer_read 1338// CHECK: scf.for 1339// CHECK-NOT: vector.transfer_read 1340// CHECK: arith.addf 1341// CHECK: scf.yield 1342func.func @hoist_vector_transfer_ops( 1343 %a : tensor<128x128xf32>, 1344 %lb : index, 1345 %ub : index, 1346 %step : index, 1347 %ida : index, 1348 %idb : index) -> vector<4x4xf32> { 1349 %cst_0 = arith.constant 0.0 : f32 1350 %cst = arith.constant dense<0.0> : vector<4x4xf32> 1351 %final = 1352 scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> { 1353 %read = vector.transfer_read %a[%ida, %idb], %cst_0 : tensor<128x128xf32>, vector<4x4xf32> 1354 %out = arith.addf %read, %acc : vector<4x4xf32> 1355 scf.yield %out : vector<4x4xf32> 1356 } 1357 func.return %final : vector<4x4xf32> 1358} 1359 1360// ----- 1361 1362// CHECK-LABEL: func @hoist_vector_transfer_ops 1363// CHECK: vector.transfer_write 1364// CHECK: vector.transfer_read 1365// CHECK: scf.for 1366// CHECK-NOT: vector.transfer_write 1367// CHECK-NOT: vector.transfer_read 1368// CHECK: arith.addf 1369// CHECK: scf.yield 1370func.func @hoist_vector_transfer_ops( 1371 %lb : index, 1372 %ub : index, 1373 %step : index, 1374 %ida : index, 1375 %idb : index) -> vector<4x4xf32> { 1376 %c0 = arith.constant 0 : index 1377 %cst_0 = arith.constant 0.0 : f32 1378 %cst = arith.constant dense<0.0> : vector<4x4xf32> 1379 %empty = tensor.empty() : tensor<4x4xf32> 1380 %final = 1381 scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> { 1382 %a = vector.transfer_write %cst, %empty[%c0, %c0] : vector<4x4xf32>, tensor<4x4xf32> 1383 %read = vector.transfer_read %a[%c0, %c0], %cst_0 : tensor<4x4xf32>, vector<4x4xf32> 1384 %out = arith.addf %read, %acc : vector<4x4xf32> 1385 scf.yield %out : vector<4x4xf32> 1386 } 1387 func.return %final : vector<4x4xf32> 1388} 1389 1390// ----- 1391 1392// CHECK-LABEL: func @do_not_hoist_vector_transfer_ops_loop_dep 1393// CHECK-NOT: vector.transfer_read 1394// CHECK: scf.for 1395// CHECK: vector.transfer_read 1396// CHECK: arith.addf 1397// CHECK: scf.yield 1398func.func @do_not_hoist_vector_transfer_ops_loop_dep( 1399 %a : tensor<128x128xf32>, 1400 %lb : index, 1401 %ub : index, 1402 %step : index, 1403 %ida : index) -> vector<4x4xf32> { 1404 %cst_0 = arith.constant 0.0 : f32 1405 %cst = arith.constant dense<0.0> : vector<4x4xf32> 1406 %final = 1407 scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> { 1408 %read = vector.transfer_read %a[%ida, %i], %cst_0 : tensor<128x128xf32>, vector<4x4xf32> 1409 %out = arith.addf %read, %acc : vector<4x4xf32> 1410 scf.yield %out : vector<4x4xf32> 1411 } 1412 func.return %final : vector<4x4xf32> 1413} 1414 1415// ----- 1416 1417// CHECK-LABEL: func @do_not_hoist_vector_transfer_ops_memref 1418// CHECK-NOT: vector.transfer_read 1419// CHECK: scf.for 1420// CHECK: vector.transfer_read 1421// CHECK: arith.addf 1422// CHECK: scf.yield 1423func.func @do_not_hoist_vector_transfer_ops_memref( 1424 %a : memref<128x128xf32>, 1425 %lb : index, 1426 %ub : index, 1427 %step : index, 1428 %ida : index, 1429 %idb : index) -> vector<4x4xf32> { 1430 %cst_0 = arith.constant 0.0 : f32 1431 %cst = arith.constant dense<0.0> : vector<4x4xf32> 1432 %final = 1433 scf.for %i = %lb to %ub step %step iter_args(%acc = %cst) -> vector<4x4xf32> { 1434 %read = vector.transfer_read %a[%ida, %idb], %cst_0 : memref<128x128xf32>, vector<4x4xf32> 1435 %out = arith.addf %read, %acc : vector<4x4xf32> 1436 scf.yield %out : vector<4x4xf32> 1437 } 1438 func.return %final : vector<4x4xf32> 1439} 1440