1 //===- EliminateBarriers.cpp - Eliminate extra barriers --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Barrier elimination pattern and pass. If a barrier does not enforce any 10 // conflicting pair of memory effects, including a pair that is enforced by 11 // another barrier, it is unnecessary and can be removed. Adapted from 12 // "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level 13 // Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in 14 // PPoPP 2023 and implementation in Polygeist. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "mlir/Dialect/Func/IR/FuncOps.h" 19 #include "mlir/Dialect/GPU/IR/GPUDialect.h" 20 #include "mlir/Dialect/GPU/Transforms/Passes.h" 21 #include "mlir/Dialect/MemRef/IR/MemRef.h" 22 #include "mlir/Dialect/SCF/IR/SCF.h" 23 #include "mlir/Dialect/Vector/IR/VectorOps.h" 24 #include "mlir/IR/Operation.h" 25 #include "mlir/Pass/Pass.h" 26 #include "mlir/Transforms/GreedyPatternRewriteDriver.h" 27 #include "llvm/ADT/TypeSwitch.h" 28 #include "llvm/Support/Debug.h" 29 30 namespace mlir { 31 #define GEN_PASS_DEF_GPUELIMINATEBARRIERS 32 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc" 33 } // namespace mlir 34 35 using namespace mlir; 36 using namespace mlir::gpu; 37 38 #define DEBUG_TYPE "gpu-erase-barriers" 39 #define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias" 40 41 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") 42 #define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ") 43 44 // The functions below provide interface-like verification, but are too specific 45 // to barrier elimination to become interfaces. 46 47 /// Implement the MemoryEffectsOpInterface in the suitable way. 48 static bool isKnownNoEffectsOpWithoutInterface(Operation *op) { 49 // memref::AssumeAlignment is conceptually pure, but marking it as such would 50 // make DCE immediately remove it. 51 return isa<memref::AssumeAlignmentOp>(op); 52 } 53 54 /// Returns `true` if the op is defines the parallel region that is subject to 55 /// barrier synchronization. 56 static bool isParallelRegionBoundary(Operation *op) { 57 if (op->hasAttr("__parallel_region_boundary_for_test")) 58 return true; 59 60 return isa<GPUFuncOp, LaunchOp>(op); 61 } 62 63 /// Returns `true` if the op behaves like a sequential loop, e.g., the control 64 /// flow "wraps around" from the end of the body region back to its start. 65 static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); } 66 67 /// Returns `true` if the regions of the op are guaranteed to be executed at 68 /// most once. Thus, if an operation in one of the nested regions of `op` is 69 /// executed than so are all the other operations in this region. 70 static bool hasSingleExecutionBody(Operation *op) { 71 return isa<scf::IfOp, memref::AllocaScopeOp>(op); 72 } 73 74 /// Returns `true` if the operation is known to produce a pointer-like object 75 /// distinct from any other object produced by a similar operation. For example, 76 /// an allocation produces such an object. 77 static bool producesDistinctBase(Operation *op) { 78 return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op); 79 } 80 81 /// Populates `effects` with all memory effects without associating them to a 82 /// specific value. 83 static void addAllValuelessEffects( 84 SmallVectorImpl<MemoryEffects::EffectInstance> &effects) { 85 effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Read>()); 86 effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Write>()); 87 effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Allocate>()); 88 effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Free>()); 89 } 90 91 /// Collect the memory effects of the given op in 'effects'. Returns 'true' if 92 /// it could extract the effect information from the op, otherwise returns 93 /// 'false' and conservatively populates the list with all possible effects 94 /// associated with no particular value or symbol. 95 static bool 96 collectEffects(Operation *op, 97 SmallVectorImpl<MemoryEffects::EffectInstance> &effects, 98 bool ignoreBarriers = true) { 99 // Skip over barriers to avoid infinite recursion (those barriers would ask 100 // this barrier again). 101 if (ignoreBarriers && isa<BarrierOp>(op)) 102 return true; 103 104 // Skip over ops that we know have no effects. 105 if (isKnownNoEffectsOpWithoutInterface(op)) 106 return true; 107 108 // Collect effect instances the operation. Note that the implementation of 109 // getEffects erases all effect instances that have the type other than the 110 // template parameter so we collect them first in a local buffer and then 111 // copy. 112 if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) { 113 SmallVector<MemoryEffects::EffectInstance> localEffects; 114 iface.getEffects(localEffects); 115 llvm::append_range(effects, localEffects); 116 return true; 117 } 118 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) { 119 for (auto ®ion : op->getRegions()) { 120 for (auto &block : region) { 121 for (auto &innerOp : block) 122 if (!collectEffects(&innerOp, effects, ignoreBarriers)) 123 return false; 124 } 125 } 126 return true; 127 } 128 129 // We need to be conservative here in case the op doesn't have the interface 130 // and assume it can have any possible effect. 131 addAllValuelessEffects(effects); 132 return false; 133 } 134 135 /// Get all effects before the given operation caused by other operations in the 136 /// same block. That is, this will not consider operations beyond the block. 137 static bool 138 getEffectsBeforeInBlock(Operation *op, 139 SmallVectorImpl<MemoryEffects::EffectInstance> &effects, 140 bool stopAtBarrier) { 141 if (op == &op->getBlock()->front()) 142 return true; 143 144 for (Operation *it = op->getPrevNode(); it != nullptr; 145 it = it->getPrevNode()) { 146 if (isa<BarrierOp>(it)) { 147 if (stopAtBarrier) 148 return true; 149 continue; 150 } 151 152 if (!collectEffects(it, effects)) 153 return false; 154 } 155 return true; 156 } 157 158 /// Collects memory effects from operations that may be executed before `op` in 159 /// a trivial structured control flow, e.g., without branches. Stops at the 160 /// parallel region boundary or at the barrier operation if `stopAtBarrier` is 161 /// set. Returns `true` if the memory effects added to `effects` are exact, 162 /// `false` if they are a conservative over-approximation. The latter means that 163 /// `effects` contain instances not associated with a specific value. 164 static bool 165 getEffectsBefore(Operation *op, 166 SmallVectorImpl<MemoryEffects::EffectInstance> &effects, 167 bool stopAtBarrier) { 168 if (!op->getBlock()) 169 return true; 170 171 // If there is a non-structured control flow, bail. 172 Region *region = op->getBlock()->getParent(); 173 if (region && !llvm::hasSingleElement(region->getBlocks())) { 174 addAllValuelessEffects(effects); 175 return false; 176 } 177 178 // Collect all effects before the op. 179 getEffectsBeforeInBlock(op, effects, stopAtBarrier); 180 181 // Stop if reached the parallel region boundary. 182 if (isParallelRegionBoundary(op->getParentOp())) 183 return true; 184 185 // Otherwise, keep collecting above the parent operation. 186 if (!getEffectsBefore(op->getParentOp(), effects, stopAtBarrier)) 187 return false; 188 189 // If the op is loop-like, collect effects from the trailing operations until 190 // we hit a barrier because they can executed before the current operation by 191 // the previous iteration of this loop. For example, in the following loop 192 // 193 // for i = ... { 194 // op1 195 // ... 196 // barrier 197 // op2 198 // } 199 // 200 // the operation `op2` at iteration `i` is known to be executed before the 201 // operation `op1` at iteration `i+1` and the side effects must be ordered 202 // appropriately. 203 if (isSequentialLoopLike(op->getParentOp())) { 204 // Assuming loop terminators have no side effects. 205 return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects, 206 /*stopAtBarrier=*/true); 207 } 208 209 // If the parent operation is not guaranteed to execute its (single-block) 210 // region once, walk the block. 211 bool conservative = false; 212 if (!hasSingleExecutionBody(op->getParentOp())) 213 op->getParentOp()->walk([&](Operation *in) { 214 if (conservative) 215 return WalkResult::interrupt(); 216 if (!collectEffects(in, effects)) { 217 conservative = true; 218 return WalkResult::interrupt(); 219 } 220 return WalkResult::advance(); 221 }); 222 223 return !conservative; 224 } 225 226 /// Get all effects after the given operation caused by other operations in the 227 /// same block. That is, this will not consider operations beyond the block. 228 static bool 229 getEffectsAfterInBlock(Operation *op, 230 SmallVectorImpl<MemoryEffects::EffectInstance> &effects, 231 bool stopAtBarrier) { 232 if (op == &op->getBlock()->back()) 233 return true; 234 235 for (Operation *it = op->getNextNode(); it != nullptr; 236 it = it->getNextNode()) { 237 if (isa<BarrierOp>(it)) { 238 if (stopAtBarrier) 239 return true; 240 continue; 241 } 242 if (!collectEffects(it, effects)) 243 return false; 244 } 245 return true; 246 } 247 248 /// Collects memory effects from operations that may be executed after `op` in 249 /// a trivial structured control flow, e.g., without branches. Stops at the 250 /// parallel region boundary or at the barrier operation if `stopAtBarrier` is 251 /// set. Returns `true` if the memory effects added to `effects` are exact, 252 /// `false` if they are a conservative over-approximation. The latter means that 253 /// `effects` contain instances not associated with a specific value. 254 static bool 255 getEffectsAfter(Operation *op, 256 SmallVectorImpl<MemoryEffects::EffectInstance> &effects, 257 bool stopAtBarrier) { 258 if (!op->getBlock()) 259 return true; 260 261 // If there is a non-structured control flow, bail. 262 Region *region = op->getBlock()->getParent(); 263 if (region && !llvm::hasSingleElement(region->getBlocks())) { 264 addAllValuelessEffects(effects); 265 return false; 266 } 267 268 // Collect all effects after the op. 269 getEffectsAfterInBlock(op, effects, stopAtBarrier); 270 271 // Stop if reached the parallel region boundary. 272 if (isParallelRegionBoundary(op->getParentOp())) 273 return true; 274 275 // Otherwise, keep collecting below the parent operation. 276 if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier)) 277 return false; 278 279 // If the op is loop-like, collect effects from the leading operations until 280 // we hit a barrier because they can executed after the current operation by 281 // the next iteration of this loop. For example, in the following loop 282 // 283 // for i = ... { 284 // op1 285 // ... 286 // barrier 287 // op2 288 // } 289 // 290 // the operation `op1` at iteration `i` is known to be executed after the 291 // operation `op2` at iteration `i-1` and the side effects must be ordered 292 // appropriately. 293 if (isSequentialLoopLike(op->getParentOp())) { 294 if (isa<BarrierOp>(op->getBlock()->front())) 295 return true; 296 297 bool exact = collectEffects(&op->getBlock()->front(), effects); 298 return getEffectsAfterInBlock(&op->getBlock()->front(), effects, 299 /*stopAtBarrier=*/true) && 300 exact; 301 } 302 303 // If the parent operation is not guaranteed to execute its (single-block) 304 // region once, walk the block. 305 bool conservative = false; 306 if (!hasSingleExecutionBody(op->getParentOp())) 307 op->getParentOp()->walk([&](Operation *in) { 308 if (conservative) 309 return WalkResult::interrupt(); 310 if (!collectEffects(in, effects)) { 311 conservative = true; 312 return WalkResult::interrupt(); 313 } 314 return WalkResult::advance(); 315 }); 316 317 return !conservative; 318 } 319 320 /// Looks through known "view-like" ops to find the base memref. 321 static Value getBase(Value v) { 322 while (true) { 323 Operation *definingOp = v.getDefiningOp(); 324 if (!definingOp) 325 break; 326 327 bool shouldContinue = 328 TypeSwitch<Operation *, bool>(v.getDefiningOp()) 329 .Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>( 330 [&](auto op) { 331 v = op.getSource(); 332 return true; 333 }) 334 .Case<memref::TransposeOp>([&](auto op) { 335 v = op.getIn(); 336 return true; 337 }) 338 .Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) { 339 v = op.getSrc(); 340 return true; 341 }) 342 .Default([](Operation *) { return false; }); 343 if (!shouldContinue) 344 break; 345 } 346 return v; 347 } 348 349 /// Returns `true` if the value is defined as a function argument. 350 static bool isFunctionArgument(Value v) { 351 auto arg = dyn_cast<BlockArgument>(v); 352 return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp()); 353 } 354 355 /// Returns the operand that the operation "propagates" through it for capture 356 /// purposes. That is, if the value produced by this operation is captured, then 357 /// so is the returned value. 358 static Value propagatesCapture(Operation *op) { 359 return llvm::TypeSwitch<Operation *, Value>(op) 360 .Case( 361 [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); }) 362 .Case([](CastOpInterface castLike) { return castLike->getOperand(0); }) 363 .Case([](memref::TransposeOp transpose) { return transpose.getIn(); }) 364 .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>( 365 [](auto op) { return op.getSrc(); }) 366 .Default([](Operation *) { return Value(); }); 367 } 368 369 /// Returns `true` if the given operation is known to capture the given value, 370 /// `false` if it is known not to capture the given value, `nullopt` if neither 371 /// is known. 372 static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) { 373 return llvm::TypeSwitch<Operation *, std::optional<bool>>(op) 374 // Store-like operations don't capture the destination, but do capture 375 // the value. 376 .Case<memref::StoreOp, vector::TransferWriteOp>( 377 [&](auto op) { return op.getValue() == v; }) 378 .Case<vector::StoreOp, vector::MaskedStoreOp>( 379 [&](auto op) { return op.getValueToStore() == v; }) 380 // These operations are known not to capture. 381 .Case([](memref::DeallocOp) { return false; }) 382 // By default, we don't know anything. 383 .Default([](Operation *) { return std::nullopt; }); 384 } 385 386 /// Returns `true` if the value may be captured by any of its users, i.e., if 387 /// the user may be storing this value into memory. This makes aliasing analysis 388 /// more conservative as it cannot assume the pointer-like value is only passed 389 /// around through SSA use-def. 390 static bool maybeCaptured(Value v) { 391 SmallVector<Value> todo = {v}; 392 while (!todo.empty()) { 393 Value v = todo.pop_back_val(); 394 for (Operation *user : v.getUsers()) { 395 // A user that is known to only read cannot capture. 396 auto iface = dyn_cast<MemoryEffectOpInterface>(user); 397 if (iface) { 398 SmallVector<MemoryEffects::EffectInstance> effects; 399 iface.getEffects(effects); 400 if (llvm::all_of(effects, 401 [](const MemoryEffects::EffectInstance &effect) { 402 return isa<MemoryEffects::Read>(effect.getEffect()); 403 })) { 404 continue; 405 } 406 } 407 408 // When an operation is known to create an alias, consider if the 409 // source is captured as well. 410 if (Value v = propagatesCapture(user)) { 411 todo.push_back(v); 412 continue; 413 } 414 415 std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v); 416 if (!knownCaptureStatus || *knownCaptureStatus) 417 return true; 418 } 419 } 420 421 return false; 422 } 423 424 /// Returns true if two values may be referencing aliasing memory. This is a 425 /// rather naive and conservative analysis. Values defined by different 426 /// allocation-like operations as well as values derived from those by casts and 427 /// views cannot alias each other. Similarly, values defined by allocations 428 /// inside a function cannot alias function arguments. Global values cannot 429 /// alias each other or local allocations. Values that are captured, i.e. 430 /// themselves potentially stored in memory, are considered as aliasing with 431 /// everything. This seems sufficient to achieve barrier removal in structured 432 /// control flow, more complex cases would require a proper dataflow analysis. 433 static bool mayAlias(Value first, Value second) { 434 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, { 435 DBGS_ALIAS() << "checking aliasing between "; 436 DBGS_ALIAS() << first << "\n"; 437 DBGS_ALIAS() << " and "; 438 DBGS_ALIAS() << second << "\n"; 439 }); 440 441 first = getBase(first); 442 second = getBase(second); 443 444 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, { 445 DBGS_ALIAS() << "base "; 446 DBGS_ALIAS() << first << "\n"; 447 DBGS_ALIAS() << " and "; 448 DBGS_ALIAS() << second << "\n"; 449 }); 450 451 // Values derived from the same base memref do alias (unless we do a more 452 // advanced analysis to prove non-overlapping accesses). 453 if (first == second) { 454 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n"); 455 return true; 456 } 457 458 // Different globals cannot alias. 459 if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) { 460 if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) { 461 return globFirst.getNameAttr() == globSecond.getNameAttr(); 462 } 463 } 464 465 // Two function arguments marked as noalias do not alias. 466 auto isNoaliasFuncArgument = [](Value value) { 467 auto bbArg = dyn_cast<BlockArgument>(value); 468 if (!bbArg) 469 return false; 470 auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp()); 471 if (!iface) 472 return false; 473 // TODO: we need a way to not depend on the LLVM dialect here. 474 return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr; 475 }; 476 if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second)) 477 return false; 478 479 bool isDistinct[] = {producesDistinctBase(first.getDefiningOp()), 480 producesDistinctBase(second.getDefiningOp())}; 481 bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr, 482 second.getDefiningOp<memref::GetGlobalOp>() != nullptr}; 483 484 // Non-equivalent distinct bases and globals cannot alias. At this point, we 485 // have already filtered out based on values being equal and global name being 486 // equal. 487 if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1])) 488 return false; 489 490 bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)}; 491 492 // Distinct bases (allocations) cannot have been passed as an argument. 493 if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0])) 494 return false; 495 496 // Non-captured base distinct values cannot conflict with another base value. 497 if (isDistinct[0] && !maybeCaptured(first)) 498 return false; 499 if (isDistinct[1] && !maybeCaptured(second)) 500 return false; 501 502 // Otherwise, conservatively assume aliasing. 503 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n"); 504 return true; 505 } 506 507 /// Returns `true` if the effect may be affecting memory aliasing the value. If 508 /// the effect is not associated with any value, it is assumed to affect all 509 /// memory and therefore aliases with everything. 510 static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) { 511 if (Value v = a.getValue()) { 512 return mayAlias(v, v2); 513 } 514 return true; 515 } 516 517 /// Returns `true` if the two effects may be affecting aliasing memory. If 518 /// an effect is not associated with any value, it is assumed to affect all 519 /// memory and therefore aliases with everything. Effects on different resources 520 /// cannot alias. 521 static bool mayAlias(MemoryEffects::EffectInstance a, 522 MemoryEffects::EffectInstance b) { 523 if (a.getResource()->getResourceID() != b.getResource()->getResourceID()) 524 return false; 525 if (Value v2 = b.getValue()) { 526 return mayAlias(a, v2); 527 } else if (Value v = a.getValue()) { 528 return mayAlias(b, v); 529 } 530 return true; 531 } 532 533 /// Returns `true` if any of the "before" effect instances has a conflict with 534 /// any "after" instance for the purpose of barrier elimination. The effects are 535 /// supposed to be limited to a barrier synchronization scope. A conflict exists 536 /// if effects instances affect aliasing memory locations and at least on of 537 /// then as a write. As an exception, if the non-write effect is an allocation 538 /// effect, there is no conflict since we are only expected to see the 539 /// allocation happening in the same thread and it cannot be accessed from 540 /// another thread without capture (which we do handle in alias analysis). 541 static bool 542 haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects, 543 ArrayRef<MemoryEffects::EffectInstance> afterEffects) { 544 for (const MemoryEffects::EffectInstance &before : beforeEffects) { 545 for (const MemoryEffects::EffectInstance &after : afterEffects) { 546 // If cannot alias, definitely no conflict. 547 if (!mayAlias(before, after)) 548 continue; 549 550 // Read/read is not a conflict. 551 if (isa<MemoryEffects::Read>(before.getEffect()) && 552 isa<MemoryEffects::Read>(after.getEffect())) { 553 continue; 554 } 555 556 // Allocate/* is not a conflict since the allocation happens within the 557 // thread context. 558 // TODO: This is not the case for */Free unless the allocation happened in 559 // the thread context, which we could also check for. 560 if (isa<MemoryEffects::Allocate>(before.getEffect()) || 561 isa<MemoryEffects::Allocate>(after.getEffect())) { 562 continue; 563 } 564 565 // In the particular case that the before effect is a free, we only have 2 566 // possibilities: 567 // 1. either the program is well-formed and there must be an interleaved 568 // alloc that must limit the scope of effect lookback and we can 569 // safely ignore the free -> read / free -> write and free -> free 570 // conflicts. 571 // 2. either the program is ill-formed and we are in undefined behavior 572 // territory. 573 if (isa<MemoryEffects::Free>(before.getEffect())) 574 continue; 575 576 // Other kinds of effects create a conflict, e.g. read-after-write. 577 LLVM_DEBUG( 578 DBGS() << "found a conflict between (before): " << before.getValue() 579 << " read:" << isa<MemoryEffects::Read>(before.getEffect()) 580 << " write:" << isa<MemoryEffects::Write>(before.getEffect()) 581 << " alloc:" 582 << isa<MemoryEffects::Allocate>(before.getEffect()) << " free:" 583 << isa<MemoryEffects::Free>(before.getEffect()) << "\n"); 584 LLVM_DEBUG( 585 DBGS() << "and (after): " << after.getValue() 586 << " read:" << isa<MemoryEffects::Read>(after.getEffect()) 587 << " write:" << isa<MemoryEffects::Write>(after.getEffect()) 588 << " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect()) 589 << " free:" << isa<MemoryEffects::Free>(after.getEffect()) 590 << "\n"); 591 return true; 592 } 593 } 594 595 return false; 596 } 597 598 namespace { 599 class BarrierElimination final : public OpRewritePattern<BarrierOp> { 600 public: 601 using OpRewritePattern<BarrierOp>::OpRewritePattern; 602 603 LogicalResult matchAndRewrite(BarrierOp barrier, 604 PatternRewriter &rewriter) const override { 605 LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " " 606 << barrier.getLoc() << "\n"); 607 608 SmallVector<MemoryEffects::EffectInstance> beforeEffects; 609 getEffectsBefore(barrier, beforeEffects, /*stopAtBarrier=*/true); 610 611 SmallVector<MemoryEffects::EffectInstance> afterEffects; 612 getEffectsAfter(barrier, afterEffects, /*stopAtBarrier=*/true); 613 614 if (!haveConflictingEffects(beforeEffects, afterEffects)) { 615 LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing " 616 << barrier << "\n"); 617 rewriter.eraseOp(barrier); 618 return success(); 619 } 620 621 LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " " 622 << barrier.getLoc() << "\n"); 623 return failure(); 624 } 625 }; 626 627 class GpuEliminateBarriersPass 628 : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> { 629 void runOnOperation() override { 630 auto funcOp = getOperation(); 631 RewritePatternSet patterns(&getContext()); 632 mlir::populateGpuEliminateBarriersPatterns(patterns); 633 if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) { 634 return signalPassFailure(); 635 } 636 } 637 }; 638 639 } // namespace 640 641 void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) { 642 patterns.insert<BarrierElimination>(patterns.getContext()); 643 } 644