1// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s 2 3func.func @nested_loops_both_having_invariant_code() { 4 %m = memref.alloc() : memref<10xf32> 5 %cf7 = arith.constant 7.0 : f32 6 %cf8 = arith.constant 8.0 : f32 7 8 affine.for %arg0 = 0 to 10 { 9 %v0 = arith.addf %cf7, %cf8 : f32 10 affine.for %arg1 = 0 to 10 { 11 affine.store %v0, %m[%arg0] : memref<10xf32> 12 } 13 } 14 15 // CHECK: memref.alloc() : memref<10xf32> 16 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 17 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 18 // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32 19 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 20 // CHECK-NEXT: } 21 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 22 // CHECK-NEXT: affine.store 23 24 return 25} 26 27// ----- 28 29// The store-load forwarding can see through affine apply's since it relies on 30// dependence information. 31// CHECK-LABEL: func @store_affine_apply 32func.func @store_affine_apply() -> memref<10xf32> { 33 %cf7 = arith.constant 7.0 : f32 34 %m = memref.alloc() : memref<10xf32> 35 affine.for %arg0 = 0 to 10 { 36 %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0) 37 affine.store %cf7, %m[%t0] : memref<10xf32> 38 } 39 return %m : memref<10xf32> 40// CHECK: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 41// CHECK-NEXT: %[[VAR_0:.*]] = memref.alloc() : memref<10xf32> 42// CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 43// CHECK-NEXT: affine.apply 44// CHECK-NEXT: affine.store %[[cst]] 45// CHECK-NEXT: } 46// CHECK-NEXT: return %[[VAR_0]] : memref<10xf32> 47} 48 49// ----- 50 51func.func @nested_loops_code_invariant_to_both() { 52 %m = memref.alloc() : memref<10xf32> 53 %cf7 = arith.constant 7.0 : f32 54 %cf8 = arith.constant 8.0 : f32 55 56 affine.for %arg0 = 0 to 10 { 57 affine.for %arg1 = 0 to 10 { 58 %v0 = arith.addf %cf7, %cf8 : f32 59 } 60 } 61 62 // CHECK: memref.alloc() : memref<10xf32> 63 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 64 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 65 // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32 66 67 return 68} 69 70// ----- 71 72// CHECK-LABEL: func @nested_loops_inner_loops_invariant_to_outermost_loop 73func.func @nested_loops_inner_loops_invariant_to_outermost_loop(%m : memref<10xindex>) { 74 affine.for %arg0 = 0 to 20 { 75 affine.for %arg1 = 0 to 30 { 76 %v0 = affine.for %arg2 = 0 to 10 iter_args (%prevAccum = %arg1) -> index { 77 %v1 = affine.load %m[%arg2] : memref<10xindex> 78 %newAccum = arith.addi %prevAccum, %v1 : index 79 affine.yield %newAccum : index 80 } 81 } 82 } 83 84 // CHECK: affine.for %{{.*}} = 0 to 30 { 85 // CHECK-NEXT: %{{.*}} = affine.for %{{.*}} = 0 to 10 iter_args(%{{.*}} = %{{.*}}) -> (index) { 86 // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}} : memref<10xindex> 87 // CHECK-NEXT: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : index 88 // CHECK-NEXT: affine.yield %{{.*}} : index 89 // CHECK-NEXT: } 90 // CHECK-NEXT: } 91 // CHECK-NEXT: affine.for %{{.*}} = 0 to 20 { 92 // CHECK-NEXT: } 93 94 return 95} 96 97// ----- 98 99func.func @single_loop_nothing_invariant() { 100 %m1 = memref.alloc() : memref<10xf32> 101 %m2 = memref.alloc() : memref<11xf32> 102 affine.for %arg0 = 0 to 10 { 103 %v0 = affine.load %m1[%arg0] : memref<10xf32> 104 %v1 = affine.load %m2[%arg0] : memref<11xf32> 105 %v2 = arith.addf %v0, %v1 : f32 106 affine.store %v2, %m1[%arg0] : memref<10xf32> 107 } 108 109 // CHECK: memref.alloc() : memref<10xf32> 110 // CHECK-NEXT: memref.alloc() : memref<11xf32> 111 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 112 // CHECK-NEXT: affine.load %{{.*}} : memref<10xf32> 113 // CHECK-NEXT: affine.load %{{.*}} : memref<11xf32> 114 // CHECK-NEXT: arith.addf 115 // CHECK-NEXT: affine.store %{{.*}} : memref<10xf32> 116 117 return 118} 119 120// ----- 121 122func.func @invariant_code_inside_affine_if() { 123 %m = memref.alloc() : memref<10xf32> 124 %cf8 = arith.constant 8.0 : f32 125 126 affine.for %arg0 = 0 to 10 { 127 %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0) 128 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) { 129 %cf9 = arith.addf %cf8, %cf8 : f32 130 affine.store %cf9, %m[%arg0] : memref<10xf32> 131 132 } 133 } 134 135 // CHECK: memref.alloc() : memref<10xf32> 136 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 137 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 138 // CHECK-NEXT: affine.apply #map{{[0-9]*}}(%arg0) 139 // CHECK-NEXT: affine.if 140 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 141 // CHECK-NEXT: affine.store 142 // CHECK-NEXT: } 143 144 145 return 146} 147 148// ----- 149 150func.func @dependent_stores() { 151 %m = memref.alloc() : memref<10xf32> 152 %cf7 = arith.constant 7.0 : f32 153 %cf8 = arith.constant 8.0 : f32 154 155 affine.for %arg0 = 0 to 10 { 156 %v0 = arith.addf %cf7, %cf8 : f32 157 affine.for %arg1 = 0 to 10 { 158 %v1 = arith.mulf %cf7, %cf7 : f32 159 affine.store %v1, %m[%arg1] : memref<10xf32> 160 affine.store %v0, %m[%arg0] : memref<10xf32> 161 } 162 } 163 164 // CHECK: memref.alloc() : memref<10xf32> 165 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 166 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 167 // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32 168 // CHECK-NEXT: %[[mul:.*]] = arith.mulf %[[cst]], %[[cst]] : f32 169 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 170 171 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 172 // CHECK-NEXT: affine.store %[[mul]] 173 // CHECK-NEXT: affine.store 174 175 return 176} 177 178// ----- 179 180func.func @independent_stores() { 181 %m = memref.alloc() : memref<10xf32> 182 %cf7 = arith.constant 7.0 : f32 183 %cf8 = arith.constant 8.0 : f32 184 185 affine.for %arg0 = 0 to 10 { 186 %v0 = arith.addf %cf7, %cf8 : f32 187 affine.for %arg1 = 0 to 10 { 188 %v1 = arith.mulf %cf7, %cf7 : f32 189 affine.store %v0, %m[%arg0] : memref<10xf32> 190 affine.store %v1, %m[%arg1] : memref<10xf32> 191 } 192 } 193 194 // CHECK: memref.alloc() : memref<10xf32> 195 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 196 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 197 // CHECK-NEXT: %[[add:.*]] = arith.addf %[[cst]], %[[cst_0]] : f32 198 // CHECK-NEXT: %[[mul:.*]] = arith.mulf %[[cst]], %[[cst]] : f32 199 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 200 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 201 // CHECK-NEXT: affine.store %[[add]] 202 // CHECK-NEXT: affine.store %[[mul]] 203 // CHECK-NEXT: } 204 205 return 206} 207 208// ----- 209 210func.func @load_dependent_store() { 211 %m = memref.alloc() : memref<10xf32> 212 %cf7 = arith.constant 7.0 : f32 213 %cf8 = arith.constant 8.0 : f32 214 215 affine.for %arg0 = 0 to 10 { 216 %v0 = arith.addf %cf7, %cf8 : f32 217 affine.for %arg1 = 0 to 10 { 218 %v1 = arith.addf %cf7, %cf7 : f32 219 affine.store %v0, %m[%arg1] : memref<10xf32> 220 %v2 = affine.load %m[%arg0] : memref<10xf32> 221 } 222 } 223 224 // CHECK: memref.alloc() : memref<10xf32> 225 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 226 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 227 // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32 228 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 229 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 230 // CHECK-NEXT: affine.for 231 // CHECK-NEXT: affine.store 232 // CHECK-NEXT: affine.load 233 234 return 235} 236 237// ----- 238 239func.func @load_after_load() { 240 %m = memref.alloc() : memref<10xf32> 241 %cf7 = arith.constant 7.0 : f32 242 %cf8 = arith.constant 8.0 : f32 243 244 affine.for %arg0 = 0 to 10 { 245 %v0 = arith.addf %cf7, %cf8 : f32 246 affine.for %arg1 = 0 to 10 { 247 %v1 = arith.addf %cf7, %cf7 : f32 248 %v3 = affine.load %m[%arg1] : memref<10xf32> 249 %v2 = affine.load %m[%arg0] : memref<10xf32> 250 } 251 } 252 253 // CHECK: memref.alloc() : memref<10xf32> 254 // CHECK-NEXT: %[[cst:.*]] = arith.constant 7.000000e+00 : f32 255 // CHECK-NEXT: %[[cst_0:.*]] = arith.constant 8.000000e+00 : f32 256 // CHECK-NEXT: arith.addf %[[cst]], %[[cst_0]] : f32 257 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 258 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 259 // CHECK-NEXT: affine.load 260 // CHECK-NEXT: } 261 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 262 // CHECK-NEXT: %{{.*}} = affine.load %{{.*}}[%{{.*}}] : memref<10xf32> 263 264 return 265} 266 267// ----- 268 269func.func @invariant_affine_if() { 270 %m = memref.alloc() : memref<10xf32> 271 %cf8 = arith.constant 8.0 : f32 272 affine.for %arg0 = 0 to 10 { 273 affine.for %arg1 = 0 to 10 { 274 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 275 %cf9 = arith.addf %cf8, %cf8 : f32 276 affine.store %cf9, %m[%arg0] : memref<10xf32> 277 278 } 279 } 280 } 281 282 // CHECK: memref.alloc() : memref<10xf32> 283 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 284 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 285 // CHECK-NEXT: } 286 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 287 // CHECK-NEXT: affine.if 288 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 289 // CHECK-NEXT: affine.store 290 // CHECK-NEXT: } 291 292 293 return 294} 295 296// ----- 297 298func.func @invariant_affine_if2() { 299 %m = memref.alloc() : memref<10xf32> 300 %cf8 = arith.constant 8.0 : f32 301 affine.for %arg0 = 0 to 10 { 302 affine.for %arg1 = 0 to 10 { 303 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 304 %cf9 = arith.addf %cf8, %cf8 : f32 305 affine.store %cf9, %m[%arg1] : memref<10xf32> 306 307 } 308 } 309 } 310 311 // CHECK: memref.alloc() : memref<10xf32> 312 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 313 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 314 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 315 // CHECK-NEXT: affine.if 316 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 317 // CHECK-NEXT: affine.store 318 // CHECK-NEXT: } 319 // CHECK-NEXT: } 320 321 322 return 323} 324 325// ----- 326 327func.func @invariant_affine_nested_if() { 328 %m = memref.alloc() : memref<10xf32> 329 %cf8 = arith.constant 8.0 : f32 330 affine.for %arg0 = 0 to 10 { 331 affine.for %arg1 = 0 to 10 { 332 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 333 %cf9 = arith.addf %cf8, %cf8 : f32 334 affine.store %cf9, %m[%arg0] : memref<10xf32> 335 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 336 affine.store %cf9, %m[%arg1] : memref<10xf32> 337 } 338 } 339 } 340 } 341 342 // CHECK: memref.alloc() : memref<10xf32> 343 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 344 // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 { 345 // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 { 346 // CHECK-NEXT: affine.if 347 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 348 // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32> 349 // CHECK-NEXT: affine.if 350 // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32> 351 // CHECK-NEXT: } 352 // CHECK-NEXT: } 353 // CHECK-NEXT: } 354 355 356 return 357} 358 359// ----- 360 361func.func @invariant_affine_nested_if_else() { 362 %m = memref.alloc() : memref<10xf32> 363 %cf8 = arith.constant 8.0 : f32 364 affine.for %arg0 = 0 to 10 { 365 affine.for %arg1 = 0 to 10 { 366 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 367 %cf9 = arith.addf %cf8, %cf8 : f32 368 affine.store %cf9, %m[%arg0] : memref<10xf32> 369 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 370 affine.store %cf9, %m[%arg0] : memref<10xf32> 371 } else { 372 affine.store %cf9, %m[%arg1] : memref<10xf32> 373 } 374 } 375 } 376 } 377 378 // CHECK: memref.alloc() : memref<10xf32> 379 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 380 // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 { 381 // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 { 382 // CHECK-NEXT: affine.if 383 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 384 // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32> 385 // CHECK-NEXT: affine.if 386 // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32> 387 // CHECK-NEXT: } else { 388 // CHECK-NEXT: affine.store {{.*}}[%[[arg1]]] : memref<10xf32> 389 // CHECK-NEXT: } 390 // CHECK-NEXT: } 391 // CHECK-NEXT: } 392 393 394 return 395} 396 397// ----- 398 399func.func @invariant_affine_nested_if_else2() { 400 %m = memref.alloc() : memref<10xf32> 401 %m2 = memref.alloc() : memref<10xf32> 402 %cf8 = arith.constant 8.0 : f32 403 affine.for %arg0 = 0 to 10 { 404 affine.for %arg1 = 0 to 10 { 405 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 406 %cf9 = arith.addf %cf8, %cf8 : f32 407 %tload1 = affine.load %m[%arg0] : memref<10xf32> 408 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 409 affine.store %cf9, %m2[%arg0] : memref<10xf32> 410 } else { 411 %tload2 = affine.load %m[%arg0] : memref<10xf32> 412 } 413 } 414 } 415 } 416 417 // CHECK: memref.alloc() : memref<10xf32> 418 // CHECK-NEXT: memref.alloc() : memref<10xf32> 419 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 420 // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 { 421 // CHECK-NEXT: } 422 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 423 // CHECK-NEXT: affine.if 424 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 425 // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32> 426 // CHECK-NEXT: affine.if 427 // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32> 428 // CHECK-NEXT: } else { 429 // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32> 430 // CHECK-NEXT: } 431 // CHECK-NEXT: } 432 433 434 return 435} 436 437// ----- 438 439func.func @invariant_affine_nested_if2() { 440 %m = memref.alloc() : memref<10xf32> 441 %cf8 = arith.constant 8.0 : f32 442 affine.for %arg0 = 0 to 10 { 443 affine.for %arg1 = 0 to 10 { 444 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 445 %cf9 = arith.addf %cf8, %cf8 : f32 446 %v1 = affine.load %m[%arg0] : memref<10xf32> 447 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 448 %v2 = affine.load %m[%arg0] : memref<10xf32> 449 } 450 } 451 } 452 } 453 454 // CHECK: memref.alloc() : memref<10xf32> 455 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 456 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 457 // CHECK-NEXT: } 458 // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 { 459 // CHECK-NEXT: affine.if 460 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 461 // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32> 462 // CHECK-NEXT: affine.if 463 // CHECK-NEXT: affine.load {{.*}}[%[[arg0]]] : memref<10xf32> 464 // CHECK-NEXT: } 465 // CHECK-NEXT: } 466 467 468 return 469} 470 471// ----- 472 473func.func @invariant_affine_for_inside_affine_if() { 474 %m = memref.alloc() : memref<10xf32> 475 %cf8 = arith.constant 8.0 : f32 476 affine.for %arg0 = 0 to 10 { 477 affine.for %arg1 = 0 to 10 { 478 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 479 %cf9 = arith.addf %cf8, %cf8 : f32 480 affine.store %cf9, %m[%arg0] : memref<10xf32> 481 affine.for %arg2 = 0 to 10 { 482 affine.store %cf9, %m[%arg2] : memref<10xf32> 483 } 484 } 485 } 486 } 487 488 // CHECK: memref.alloc() : memref<10xf32> 489 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 490 // CHECK-NEXT: affine.for %[[arg0:.*]] = 0 to 10 { 491 // CHECK-NEXT: affine.for %[[arg1:.*]] = 0 to 10 { 492 // CHECK-NEXT: affine.if 493 // CHECK-NEXT: arith.addf %[[cst]], %[[cst]] : f32 494 // CHECK-NEXT: affine.store {{.*}}[%[[arg0]]] : memref<10xf32> 495 // CHECK-NEXT: affine.for %[[arg2:.*]] = 0 to 10 { 496 // CHECK-NEXT: affine.store {{.*}}[%[[arg2]]] : memref<10xf32> 497 // CHECK-NEXT: } 498 // CHECK-NEXT: } 499 // CHECK-NEXT: } 500 501 502 return 503} 504 505// ----- 506 507func.func @invariant_constant_and_load() { 508 %m = memref.alloc() : memref<100xf32> 509 %m2 = memref.alloc() : memref<100xf32> 510 affine.for %arg0 = 0 to 5 { 511 %c0 = arith.constant 0 : index 512 %v = affine.load %m2[%c0] : memref<100xf32> 513 affine.store %v, %m[%arg0] : memref<100xf32> 514 } 515 516 // CHECK: memref.alloc() : memref<100xf32> 517 // CHECK-NEXT: memref.alloc() : memref<100xf32> 518 // CHECK-NEXT: arith.constant 0 : index 519 // CHECK-NEXT: affine.load 520 // CHECK-NEXT: affine.for %{{.*}} = 0 to 5 { 521 // CHECK-NEXT: affine.store 522 523 524 return 525} 526 527// ----- 528 529func.func @nested_load_store_same_memref() { 530 %m = memref.alloc() : memref<10xf32> 531 %cst = arith.constant 8.0 : f32 532 %c0 = arith.constant 0 : index 533 affine.for %arg0 = 0 to 10 { 534 %v0 = affine.load %m[%c0] : memref<10xf32> 535 affine.for %arg1 = 0 to 10 { 536 affine.store %cst, %m[%arg1] : memref<10xf32> 537 } 538 } 539 540 // CHECK: memref.alloc() : memref<10xf32> 541 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 542 // CHECK-NEXT: arith.constant 0 : index 543 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 544 // CHECK-NEXT: affine.load 545 // CHECK-NEXT: affine.for 546 // CHECK-NEXT: affine.store %[[cst]] 547 548 549 return 550} 551 552// ----- 553 554func.func @nested_load_store_same_memref2() { 555 %m = memref.alloc() : memref<10xf32> 556 %cst = arith.constant 8.0 : f32 557 %c0 = arith.constant 0 : index 558 affine.for %arg0 = 0 to 10 { 559 affine.store %cst, %m[%c0] : memref<10xf32> 560 affine.for %arg1 = 0 to 10 { 561 %v0 = affine.load %m[%arg0] : memref<10xf32> 562 } 563 } 564 565 // CHECK: memref.alloc() : memref<10xf32> 566 // CHECK-NEXT: %[[cst:.*]] = arith.constant 8.000000e+00 : f32 567 // CHECK-NEXT: arith.constant 0 : index 568 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 569 // CHECK-NEXT: } 570 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 571 // CHECK-NEXT: affine.store %[[cst]] 572 // CHECK-NEXT: affine.load 573 574 575 return 576} 577 578// ----- 579 580// CHECK-LABEL: func @do_not_hoist_dependent_side_effect_free_op 581func.func @do_not_hoist_dependent_side_effect_free_op(%arg0: memref<10x512xf32>) { 582 %0 = memref.alloca() : memref<1xf32> 583 %cst = arith.constant 8.0 : f32 584 affine.for %i = 0 to 512 { 585 affine.for %j = 0 to 10 { 586 %5 = affine.load %arg0[%i, %j] : memref<10x512xf32> 587 %6 = affine.load %0[0] : memref<1xf32> 588 %add = arith.addf %5, %6 : f32 589 affine.store %add, %0[0] : memref<1xf32> 590 } 591 %3 = affine.load %0[0] : memref<1xf32> 592 %4 = arith.mulf %3, %cst : f32 // It shouldn't be hoisted. 593 } 594 return 595} 596 597// CHECK: affine.for 598// CHECK-NEXT: affine.for 599// CHECK-NEXT: affine.load 600// CHECK-NEXT: affine.load 601// CHECK-NEXT: arith.addf 602// CHECK-NEXT: affine.store 603// CHECK-NEXT: } 604// CHECK-NEXT: affine.load 605// CHECK-NEXT: arith.mulf 606// CHECK-NEXT: } 607 608// ----- 609 610// CHECK-LABEL: func @vector_loop_nothing_invariant 611func.func @vector_loop_nothing_invariant() { 612 %m1 = memref.alloc() : memref<40xf32> 613 %m2 = memref.alloc() : memref<40xf32> 614 affine.for %arg0 = 0 to 10 { 615 %v0 = affine.vector_load %m1[%arg0*4] : memref<40xf32>, vector<4xf32> 616 %v1 = affine.vector_load %m2[%arg0*4] : memref<40xf32>, vector<4xf32> 617 %v2 = arith.addf %v0, %v1 : vector<4xf32> 618 affine.vector_store %v2, %m1[%arg0*4] : memref<40xf32>, vector<4xf32> 619 } 620 return 621} 622 623// CHECK: affine.for 624// CHECK-NEXT: affine.vector_load 625// CHECK-NEXT: affine.vector_load 626// CHECK-NEXT: arith.addf 627// CHECK-NEXT: affine.vector_store 628// CHECK-NEXT: } 629 630// ----- 631 632// CHECK-LABEL: func @vector_loop_all_invariant 633func.func @vector_loop_all_invariant() { 634 %m1 = memref.alloc() : memref<4xf32> 635 %m2 = memref.alloc() : memref<4xf32> 636 %m3 = memref.alloc() : memref<4xf32> 637 affine.for %arg0 = 0 to 10 { 638 %v0 = affine.vector_load %m1[0] : memref<4xf32>, vector<4xf32> 639 %v1 = affine.vector_load %m2[0] : memref<4xf32>, vector<4xf32> 640 %v2 = arith.addf %v0, %v1 : vector<4xf32> 641 affine.vector_store %v2, %m3[0] : memref<4xf32>, vector<4xf32> 642 } 643 return 644} 645 646// CHECK: memref.alloc() 647// CHECK-NEXT: memref.alloc() 648// CHECK-NEXT: memref.alloc() 649// CHECK-NEXT: affine.vector_load 650// CHECK-NEXT: affine.vector_load 651// CHECK-NEXT: arith.addf 652// CHECK-NEXT: affine.vector_store 653// CHECK-NEXT: affine.for 654 655// ----- 656 657#set = affine_set<(d0): (d0 - 10 >= 0)> 658// CHECK-LABEL: func @affine_if_not_invariant( 659func.func @affine_if_not_invariant(%buffer: memref<1024xf32>) -> f32 { 660 %sum_init_0 = arith.constant 0.0 : f32 661 %sum_init_1 = arith.constant 1.0 : f32 662 %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 { 663 %t = affine.load %buffer[%i] : memref<1024xf32> 664 %sum_next = affine.if #set(%i) -> (f32) { 665 %new_sum = arith.addf %sum_iter, %t : f32 666 affine.yield %new_sum : f32 667 } else { 668 affine.yield %sum_iter : f32 669 } 670 %modified_sum = arith.addf %sum_next, %sum_init_1 : f32 671 affine.yield %modified_sum : f32 672 } 673 return %res : f32 674} 675 676// CHECK: arith.constant 0.000000e+00 : f32 677// CHECK-NEXT: arith.constant 1.000000e+00 : f32 678// CHECK-NEXT: affine.for 679// CHECK-NEXT: affine.load 680// CHECK-NEXT: affine.if 681// CHECK-NEXT: arith.addf 682// CHECK-NEXT: affine.yield 683// CHECK-NEXT: } else { 684// CHECK-NEXT: affine.yield 685// CHECK-NEXT: } 686// CHECK-NEXT: arith.addf 687// CHECK-NEXT: affine.yield 688// CHECK-NEXT: } 689 690// ----- 691 692// CHECK-LABEL: func @affine_for_not_invariant( 693func.func @affine_for_not_invariant(%in : memref<30x512xf32, 1>, 694 %out : memref<30x1xf32, 1>) { 695 %sum_0 = arith.constant 0.0 : f32 696 %cst_0 = arith.constant 1.1 : f32 697 affine.for %j = 0 to 30 { 698 %sum = affine.for %i = 0 to 512 iter_args(%sum_iter = %sum_0) -> (f32) { 699 %t = affine.load %in[%j,%i] : memref<30x512xf32,1> 700 %sum_next = arith.addf %sum_iter, %t : f32 701 affine.yield %sum_next : f32 702 } 703 %mod_sum = arith.mulf %sum, %cst_0 : f32 704 affine.store %mod_sum, %out[%j, 0] : memref<30x1xf32, 1> 705 } 706 return 707} 708 709// CHECK: arith.constant 0.000000e+00 : f32 710// CHECK-NEXT: arith.constant 1.100000e+00 : f32 711// CHECK-NEXT: affine.for 712// CHECK-NEXT: affine.for 713// CHECK-NEXT: affine.load 714// CHECK-NEXT: arith.addf 715// CHECK-NEXT: affine.yield 716// CHECK-NEXT: } 717// CHECK-NEXT: arith.mulf 718// CHECK-NEXT: affine.store 719 720// ----- 721 722// CHECK-LABEL: func @use_of_iter_operands_invariant 723func.func @use_of_iter_operands_invariant(%m : memref<10xindex>) { 724 %sum_1 = arith.constant 0 : index 725 %v0 = affine.for %arg1 = 0 to 11 iter_args (%prevAccum = %sum_1) -> index { 726 %prod = arith.muli %sum_1, %sum_1 : index 727 %newAccum = arith.addi %prevAccum, %prod : index 728 affine.yield %newAccum : index 729 } 730 return 731} 732 733// CHECK: constant 734// CHECK-NEXT: muli 735// CHECK-NEXT: affine.for 736// CHECK-NEXT: addi 737// CHECK-NEXT: affine.yield 738 739// ----- 740 741// CHECK-LABEL: func @use_of_iter_args_not_invariant 742func.func @use_of_iter_args_not_invariant(%m : memref<10xindex>) { 743 %sum_1 = arith.constant 0 : index 744 %v0 = affine.for %arg1 = 0 to 11 iter_args (%prevAccum = %sum_1) -> index { 745 %newAccum = arith.addi %prevAccum, %sum_1 : index 746 affine.yield %newAccum : index 747 } 748 return 749} 750 751// CHECK: arith.constant 752// CHECK-NEXT: affine.for 753// CHECK-NEXT: arith.addi 754// CHECK-NEXT: affine.yield 755 756#map = affine_map<(d0) -> (64, d0 * -64 + 1020)> 757// CHECK-LABEL: func.func @affine_parallel 758func.func @affine_parallel(%memref_8: memref<4090x2040xf32>, %x: index) { 759 %cst = arith.constant 0.000000e+00 : f32 760 affine.parallel (%arg3) = (0) to (32) { 761 affine.for %arg4 = 0 to 16 { 762 affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %arg3 * -64 + 2040)) { 763 affine.for %arg7 = 0 to min #map(%arg4) { 764 affine.store %cst, %memref_8[%arg5 + 3968, %arg6 + %arg3 * 64] : memref<4090x2040xf32> 765 } 766 } 767 } 768 } 769 // CHECK: affine.parallel 770 // CHECK-NEXT: affine.for 771 // CHECK-NEXT: affine.parallel 772 // CHECK-NEXT: affine.store 773 // CHECK-NEXT: affine.for 774 775 %c0 = arith.constant 0 : index 776 %c1 = arith.constant 1 : index 777 %c32 = arith.constant 32 : index 778 scf.parallel (%arg3) = (%c0) to (%c32) step (%c1) { 779 affine.for %arg4 = 0 to 16 { 780 affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %x * -64 + 2040)) { 781 affine.for %arg7 = 0 to min #map(%arg4) { 782 affine.store %cst, %memref_8[%arg5 + 3968, %arg6] : memref<4090x2040xf32> 783 } 784 } 785 } 786 } 787 // CHECK: scf.parallel 788 // CHECK-NEXT: affine.for 789 // CHECK-NEXT: affine.parallel 790 // CHECK-NEXT: affine.store 791 // CHECK-NEXT: affine.for 792 793 affine.for %arg3 = 0 to 32 { 794 affine.for %arg4 = 0 to 16 { 795 affine.parallel (%arg5, %arg6) = (0, 0) to (min(128, 122), min(64, %arg3 * -64 + 2040)) { 796 // Unknown region-holding op for this pass. 797 scf.for %arg7 = %c0 to %x step %c1 { 798 affine.store %cst, %memref_8[%arg5 + 3968, %arg6 + %arg3 * 64] : memref<4090x2040xf32> 799 } 800 } 801 } 802 } 803 // CHECK: affine.for 804 // CHECK-NEXT: affine.for 805 // CHECK-NEXT: affine.parallel 806 // CHECK-NEXT: scf.for 807 // CHECK-NEXT: affine.store 808 809 return 810} 811 812// ----- 813 814// CHECK-LABEL: func.func @affine_invariant_use_after_dma 815#map = affine_map<(d0) -> (d0 * 163840)> 816func.func @affine_invariant_use_after_dma(%arg0: memref<10485760xi32>, %arg1: memref<1xi32>, %arg2: memref<10485760xi32>) { 817 %c320 = arith.constant 320 : index 818 %c0 = arith.constant 0 : index 819 %c1 = arith.constant 1 : index 820 %alloc = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2> 821 %alloc_0 = memref.alloc() : memref<1xi32, 2> 822 affine.for %arg3 = 0 to 64 { 823 %0 = affine.apply #map(%arg3) 824 %alloc_1 = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2> 825 %alloc_2 = memref.alloc() : memref<320xi32, 2> 826 affine.dma_start %arg0[%0], %alloc_2[%c0], %alloc_1[%c0], %c320 : memref<10485760xi32>, memref<320xi32, 2>, memref<0xi32, 2> 827 affine.dma_start %arg1[%c0], %alloc_0[%c0], %alloc[%c0], %c1 : memref<1xi32>, memref<1xi32, 2>, memref<0xi32, 2> 828 affine.dma_wait %alloc_1[%c0], %c320 : memref<0xi32, 2> 829 affine.dma_wait %alloc[%c0], %c1 : memref<0xi32, 2> 830 %1 = affine.apply #map(%arg3) 831 %alloc_3 = memref.alloc() {alignment = 16 : i64} : memref<0xi32, 2> 832 %alloc_4 = memref.alloc() : memref<320xi32, 2> 833 affine.for %arg4 = 0 to 320 { 834 %2 = affine.load %alloc_2[%arg4] : memref<320xi32, 2> 835 %3 = affine.load %alloc_0[0] : memref<1xi32, 2> 836 %4 = arith.addi %2, %3 : i32 837 %5 = arith.addi %4, %2 : i32 838 affine.store %5, %alloc_4[%arg4] : memref<320xi32, 2> 839 } 840 affine.dma_start %alloc_4[%c0], %arg2[%1], %alloc_3[%c0], %c320 : memref<320xi32, 2>, memref<10485760xi32>, memref<0xi32, 2> 841 affine.dma_wait %alloc_3[%c0], %c320 : memref<0xi32, 2> 842 } 843 return 844} 845// CHECK: %[[zero:.*]] = arith.constant 0 : index 846// CHECK: %[[scalar_mem:.*]] = memref.alloc() : memref<1xi32, 2> 847// CHECK: affine.dma_start %arg1[%[[zero]]], %alloc_0[%[[zero]]], %alloc[%[[zero]]], %c1 848// CHECK: affine.load %[[scalar_mem]][0] 849 850// ----- 851 852// CHECK-LABEL: func @affine_prefetch_invariant 853func.func @affine_prefetch_invariant() { 854 %0 = memref.alloc() : memref<10x10xf32> 855 affine.for %i0 = 0 to 10 { 856 affine.for %i1 = 0 to 10 { 857 %1 = affine.load %0[%i0, %i1] : memref<10x10xf32> 858 // A prefetch shouldn't be hoisted. 859 affine.prefetch %0[%i0, %i0], write, locality<0>, data : memref<10x10xf32> 860 } 861 } 862 863 // CHECK: memref.alloc() : memref<10x10xf32> 864 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 865 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { 866 // CHECK-NEXT: affine.load %{{.*}}[%{{.*}} : memref<10x10xf32> 867 // CHECK-NEXT: affine.prefetch 868 // CHECK-NEXT: } 869 // CHECK-NEXT: } 870 return 871} 872 873// Side-effecting ops shouldn't be hoisted. 874 875// CHECK-LABEL: func @side_effecting_ops 876func.func @side_effecting_ops() { 877 %cst = arith.constant 0.0 : f32 878 %m0 = memref.alloc(): memref<1x512x16x16xf32> 879 %0 = gpu.wait async 880 affine.for %arg783 = 0 to 14 { 881 affine.for %arg784 = 0 to 14 { 882 affine.parallel (%arg785) = (0) to (512) { 883 affine.for %arg786 = 0 to 1 { 884 affine.for %arg787 = 0 to 1 { 885 affine.for %arg788 = 0 to 1 { 886 %m1 = memref.alloc() : memref<1xf32, 3> 887 %m2 = memref.alloc() : memref<1xf32, 3> 888 affine.store %cst, %m1[0] : memref<1xf32, 3> 889 affine.store %cst, %m2[0] : memref<1xf32, 3> 890 %memref_2897, %asyncToken_2898 = gpu.alloc async [%0] () : memref<1x512x16x16xf32> 891 %2432 = gpu.memcpy async [%0] %memref_2897, %m0 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32> 892 affine.for %arg789 = 0 to 16 { 893 affine.for %arg790 = 0 to 16 { 894 affine.store %cst, %memref_2897[0, %arg785 + %arg788, %arg789, %arg790] : memref<1x512x16x16xf32> 895 } 896 } 897 memref.dealloc %m2 : memref<1xf32, 3> 898 memref.dealloc %m1 : memref<1xf32, 3> 899 %2433 = gpu.memcpy async [%0] %m0, %memref_2897 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32> 900 %2434 = gpu.dealloc async [%asyncToken_2898] %memref_2897 : memref<1x512x16x16xf32> 901 } 902 } 903 } 904 } 905 } 906 } 907 // CHECK: affine.for %{{.*}} = 0 to 1 908 // CHECK-NEXT: affine.for %{{.*}} = 0 to 1 909 // CHECK: memref.alloc 910 // CHECK: memref.alloc 911 // CHECK: gpu.memcpy 912 // CHECK: affine.for %{{.*}} = 0 to 16 913 // CHECK: affine.for %{{.*}} = 0 to 16 914 // CHECK: memref.dealloc 915 return 916} 917