xref: /llvm-project/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp (revision 09dfc5713d7e2342bea4c8447d1ed76c85eb8225)
1 //===- EliminateBarriers.cpp - Eliminate extra barriers --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Barrier elimination pattern and pass. If a barrier does not enforce any
10 // conflicting pair of memory effects, including a pair that is enforced by
11 // another barrier, it is unnecessary and can be removed. Adapted from
12 // "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level
13 // Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in
14 // PPoPP 2023 and implementation in Polygeist.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "mlir/Dialect/Func/IR/FuncOps.h"
19 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
20 #include "mlir/Dialect/GPU/Transforms/Passes.h"
21 #include "mlir/Dialect/MemRef/IR/MemRef.h"
22 #include "mlir/Dialect/SCF/IR/SCF.h"
23 #include "mlir/Dialect/Vector/IR/VectorOps.h"
24 #include "mlir/IR/Operation.h"
25 #include "mlir/Pass/Pass.h"
26 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Support/Debug.h"
29 
30 namespace mlir {
31 #define GEN_PASS_DEF_GPUELIMINATEBARRIERS
32 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
33 } // namespace mlir
34 
35 using namespace mlir;
36 using namespace mlir::gpu;
37 
38 #define DEBUG_TYPE "gpu-erase-barriers"
39 #define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
40 
41 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
42 #define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
43 
44 // The functions below provide interface-like verification, but are too specific
45 // to barrier elimination to become interfaces.
46 
47 /// Implement the MemoryEffectsOpInterface in the suitable way.
48 static bool isKnownNoEffectsOpWithoutInterface(Operation *op) {
49   // memref::AssumeAlignment is conceptually pure, but marking it as such would
50   // make DCE immediately remove it.
51   return isa<memref::AssumeAlignmentOp>(op);
52 }
53 
54 /// Returns `true` if the op is defines the parallel region that is subject to
55 /// barrier synchronization.
56 static bool isParallelRegionBoundary(Operation *op) {
57   if (op->hasAttr("__parallel_region_boundary_for_test"))
58     return true;
59 
60   return isa<GPUFuncOp, LaunchOp>(op);
61 }
62 
63 /// Returns `true` if the op behaves like a sequential loop, e.g., the control
64 /// flow "wraps around" from the end of the body region back to its start.
65 static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); }
66 
67 /// Returns `true` if the regions of the op are guaranteed to be executed at
68 /// most once. Thus, if an operation in one of the nested regions of `op` is
69 /// executed than so are all the other operations in this region.
70 static bool hasSingleExecutionBody(Operation *op) {
71   return isa<scf::IfOp, memref::AllocaScopeOp>(op);
72 }
73 
74 /// Returns `true` if the operation is known to produce a pointer-like object
75 /// distinct from any other object produced by a similar operation. For example,
76 /// an allocation produces such an object.
77 static bool producesDistinctBase(Operation *op) {
78   return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
79 }
80 
81 /// Populates `effects` with all memory effects without associating them to a
82 /// specific value.
83 static void addAllValuelessEffects(
84     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
85   effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Read>());
86   effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Write>());
87   effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Allocate>());
88   effects.emplace_back(MemoryEffects::Effect::get<MemoryEffects::Free>());
89 }
90 
91 /// Collect the memory effects of the given op in 'effects'. Returns 'true' if
92 /// it could extract the effect information from the op, otherwise returns
93 /// 'false' and conservatively populates the list with all possible effects
94 /// associated with no particular value or symbol.
95 static bool
96 collectEffects(Operation *op,
97                SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
98                bool ignoreBarriers = true) {
99   // Skip over barriers to avoid infinite recursion (those barriers would ask
100   // this barrier again).
101   if (ignoreBarriers && isa<BarrierOp>(op))
102     return true;
103 
104   // Skip over ops that we know have no effects.
105   if (isKnownNoEffectsOpWithoutInterface(op))
106     return true;
107 
108   // Collect effect instances the operation. Note that the implementation of
109   // getEffects erases all effect instances that have the type other than the
110   // template parameter so we collect them first in a local buffer and then
111   // copy.
112   if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
113     SmallVector<MemoryEffects::EffectInstance> localEffects;
114     iface.getEffects(localEffects);
115     llvm::append_range(effects, localEffects);
116     return true;
117   }
118   if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
119     for (auto &region : op->getRegions()) {
120       for (auto &block : region) {
121         for (auto &innerOp : block)
122           if (!collectEffects(&innerOp, effects, ignoreBarriers))
123             return false;
124       }
125     }
126     return true;
127   }
128 
129   // We need to be conservative here in case the op doesn't have the interface
130   // and assume it can have any possible effect.
131   addAllValuelessEffects(effects);
132   return false;
133 }
134 
135 /// Get all effects before the given operation caused by other operations in the
136 /// same block. That is, this will not consider operations beyond the block.
137 static bool
138 getEffectsBeforeInBlock(Operation *op,
139                         SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
140                         bool stopAtBarrier) {
141   if (op == &op->getBlock()->front())
142     return true;
143 
144   for (Operation *it = op->getPrevNode(); it != nullptr;
145        it = it->getPrevNode()) {
146     if (isa<BarrierOp>(it)) {
147       if (stopAtBarrier)
148         return true;
149       continue;
150     }
151 
152     if (!collectEffects(it, effects))
153       return false;
154   }
155   return true;
156 }
157 
158 /// Collects memory effects from operations that may be executed before `op` in
159 /// a trivial structured control flow, e.g., without branches. Stops at the
160 /// parallel region boundary or at the barrier operation if `stopAtBarrier` is
161 /// set. Returns `true` if the memory effects added to `effects` are exact,
162 /// `false` if they are a conservative over-approximation. The latter means that
163 /// `effects` contain instances not associated with a specific value.
164 static bool
165 getEffectsBefore(Operation *op,
166                  SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
167                  bool stopAtBarrier) {
168   if (!op->getBlock())
169     return true;
170 
171   // If there is a non-structured control flow, bail.
172   Region *region = op->getBlock()->getParent();
173   if (region && !llvm::hasSingleElement(region->getBlocks())) {
174     addAllValuelessEffects(effects);
175     return false;
176   }
177 
178   // Collect all effects before the op.
179   getEffectsBeforeInBlock(op, effects, stopAtBarrier);
180 
181   // Stop if reached the parallel region boundary.
182   if (isParallelRegionBoundary(op->getParentOp()))
183     return true;
184 
185   // Otherwise, keep collecting above the parent operation.
186   if (!getEffectsBefore(op->getParentOp(), effects, stopAtBarrier))
187     return false;
188 
189   // If the op is loop-like, collect effects from the trailing operations until
190   // we hit a barrier because they can executed before the current operation by
191   // the previous iteration of this loop. For example, in the following loop
192   //
193   //   for i = ... {
194   //     op1
195   //     ...
196   //     barrier
197   //     op2
198   //   }
199   //
200   // the operation `op2` at iteration `i` is known to be executed before the
201   // operation `op1` at iteration `i+1` and the side effects must be ordered
202   // appropriately.
203   if (isSequentialLoopLike(op->getParentOp())) {
204     // Assuming loop terminators have no side effects.
205     return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects,
206                                    /*stopAtBarrier=*/true);
207   }
208 
209   // If the parent operation is not guaranteed to execute its (single-block)
210   // region once, walk the block.
211   bool conservative = false;
212   if (!hasSingleExecutionBody(op->getParentOp()))
213     op->getParentOp()->walk([&](Operation *in) {
214       if (conservative)
215         return WalkResult::interrupt();
216       if (!collectEffects(in, effects)) {
217         conservative = true;
218         return WalkResult::interrupt();
219       }
220       return WalkResult::advance();
221     });
222 
223   return !conservative;
224 }
225 
226 /// Get all effects after the given operation caused by other operations in the
227 /// same block. That is, this will not consider operations beyond the block.
228 static bool
229 getEffectsAfterInBlock(Operation *op,
230                        SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
231                        bool stopAtBarrier) {
232   if (op == &op->getBlock()->back())
233     return true;
234 
235   for (Operation *it = op->getNextNode(); it != nullptr;
236        it = it->getNextNode()) {
237     if (isa<BarrierOp>(it)) {
238       if (stopAtBarrier)
239         return true;
240       continue;
241     }
242     if (!collectEffects(it, effects))
243       return false;
244   }
245   return true;
246 }
247 
248 /// Collects memory effects from operations that may be executed after `op` in
249 /// a trivial structured control flow, e.g., without branches. Stops at the
250 /// parallel region boundary or at the barrier operation if `stopAtBarrier` is
251 /// set. Returns `true` if the memory effects added to `effects` are exact,
252 /// `false` if they are a conservative over-approximation. The latter means that
253 /// `effects` contain instances not associated with a specific value.
254 static bool
255 getEffectsAfter(Operation *op,
256                 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
257                 bool stopAtBarrier) {
258   if (!op->getBlock())
259     return true;
260 
261   // If there is a non-structured control flow, bail.
262   Region *region = op->getBlock()->getParent();
263   if (region && !llvm::hasSingleElement(region->getBlocks())) {
264     addAllValuelessEffects(effects);
265     return false;
266   }
267 
268   // Collect all effects after the op.
269   getEffectsAfterInBlock(op, effects, stopAtBarrier);
270 
271   // Stop if reached the parallel region boundary.
272   if (isParallelRegionBoundary(op->getParentOp()))
273     return true;
274 
275   // Otherwise, keep collecting below the parent operation.
276   if (!getEffectsAfter(op->getParentOp(), effects, stopAtBarrier))
277     return false;
278 
279   // If the op is loop-like, collect effects from the leading operations until
280   // we hit a barrier because they can executed after the current operation by
281   // the next iteration of this loop. For example, in the following loop
282   //
283   //   for i = ... {
284   //     op1
285   //     ...
286   //     barrier
287   //     op2
288   //   }
289   //
290   // the operation `op1` at iteration `i` is known to be executed after the
291   // operation `op2` at iteration `i-1` and the side effects must be ordered
292   // appropriately.
293   if (isSequentialLoopLike(op->getParentOp())) {
294     if (isa<BarrierOp>(op->getBlock()->front()))
295       return true;
296 
297     bool exact = collectEffects(&op->getBlock()->front(), effects);
298     return getEffectsAfterInBlock(&op->getBlock()->front(), effects,
299                                   /*stopAtBarrier=*/true) &&
300            exact;
301   }
302 
303   // If the parent operation is not guaranteed to execute its (single-block)
304   // region once, walk the block.
305   bool conservative = false;
306   if (!hasSingleExecutionBody(op->getParentOp()))
307     op->getParentOp()->walk([&](Operation *in) {
308       if (conservative)
309         return WalkResult::interrupt();
310       if (!collectEffects(in, effects)) {
311         conservative = true;
312         return WalkResult::interrupt();
313       }
314       return WalkResult::advance();
315     });
316 
317   return !conservative;
318 }
319 
320 /// Looks through known "view-like" ops to find the base memref.
321 static Value getBase(Value v) {
322   while (true) {
323     Operation *definingOp = v.getDefiningOp();
324     if (!definingOp)
325       break;
326 
327     bool shouldContinue =
328         TypeSwitch<Operation *, bool>(v.getDefiningOp())
329             .Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
330                 [&](auto op) {
331                   v = op.getSource();
332                   return true;
333                 })
334             .Case<memref::TransposeOp>([&](auto op) {
335               v = op.getIn();
336               return true;
337             })
338             .Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) {
339               v = op.getSrc();
340               return true;
341             })
342             .Default([](Operation *) { return false; });
343     if (!shouldContinue)
344       break;
345   }
346   return v;
347 }
348 
349 /// Returns `true` if the value is defined as a function argument.
350 static bool isFunctionArgument(Value v) {
351   auto arg = dyn_cast<BlockArgument>(v);
352   return arg && isa<FunctionOpInterface>(arg.getOwner()->getParentOp());
353 }
354 
355 /// Returns the operand that the operation "propagates" through it for capture
356 /// purposes. That is, if the value produced by this operation is captured, then
357 /// so is the returned value.
358 static Value propagatesCapture(Operation *op) {
359   return llvm::TypeSwitch<Operation *, Value>(op)
360       .Case(
361           [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
362       .Case([](CastOpInterface castLike) { return castLike->getOperand(0); })
363       .Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
364       .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
365           [](auto op) { return op.getSrc(); })
366       .Default([](Operation *) { return Value(); });
367 }
368 
369 /// Returns `true` if the given operation is known to capture the given value,
370 /// `false` if it is known not to capture the given value, `nullopt` if neither
371 /// is known.
372 static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
373   return llvm::TypeSwitch<Operation *, std::optional<bool>>(op)
374       // Store-like operations don't capture the destination, but do capture
375       // the value.
376       .Case<memref::StoreOp, vector::TransferWriteOp>(
377           [&](auto op) { return op.getValue() == v; })
378       .Case<vector::StoreOp, vector::MaskedStoreOp>(
379           [&](auto op) { return op.getValueToStore() == v; })
380       // These operations are known not to capture.
381       .Case([](memref::DeallocOp) { return false; })
382       // By default, we don't know anything.
383       .Default([](Operation *) { return std::nullopt; });
384 }
385 
386 /// Returns `true` if the value may be captured by any of its users, i.e., if
387 /// the user may be storing this value into memory. This makes aliasing analysis
388 /// more conservative as it cannot assume the pointer-like value is only passed
389 /// around through SSA use-def.
390 static bool maybeCaptured(Value v) {
391   SmallVector<Value> todo = {v};
392   while (!todo.empty()) {
393     Value v = todo.pop_back_val();
394     for (Operation *user : v.getUsers()) {
395       // A user that is known to only read cannot capture.
396       auto iface = dyn_cast<MemoryEffectOpInterface>(user);
397       if (iface) {
398         SmallVector<MemoryEffects::EffectInstance> effects;
399         iface.getEffects(effects);
400         if (llvm::all_of(effects,
401                          [](const MemoryEffects::EffectInstance &effect) {
402                            return isa<MemoryEffects::Read>(effect.getEffect());
403                          })) {
404           continue;
405         }
406       }
407 
408       // When an operation is known to create an alias, consider if the
409       // source is captured as well.
410       if (Value v = propagatesCapture(user)) {
411         todo.push_back(v);
412         continue;
413       }
414 
415       std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(user, v);
416       if (!knownCaptureStatus || *knownCaptureStatus)
417         return true;
418     }
419   }
420 
421   return false;
422 }
423 
424 /// Returns true if two values may be referencing aliasing memory. This is a
425 /// rather naive and conservative analysis. Values defined by different
426 /// allocation-like operations as well as values derived from those by casts and
427 /// views cannot alias each other. Similarly, values defined by allocations
428 /// inside a function cannot alias function arguments. Global values cannot
429 /// alias each other or local allocations. Values that are captured, i.e.
430 /// themselves potentially stored in memory, are considered as aliasing with
431 /// everything. This seems sufficient to achieve barrier removal in structured
432 /// control flow, more complex cases would require a proper dataflow analysis.
433 static bool mayAlias(Value first, Value second) {
434   DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
435     DBGS_ALIAS() << "checking aliasing between ";
436     DBGS_ALIAS() << first << "\n";
437     DBGS_ALIAS() << "                      and ";
438     DBGS_ALIAS() << second << "\n";
439   });
440 
441   first = getBase(first);
442   second = getBase(second);
443 
444   DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
445     DBGS_ALIAS() << "base ";
446     DBGS_ALIAS() << first << "\n";
447     DBGS_ALIAS() << " and ";
448     DBGS_ALIAS() << second << "\n";
449   });
450 
451   // Values derived from the same base memref do alias (unless we do a more
452   // advanced analysis to prove non-overlapping accesses).
453   if (first == second) {
454     DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
455     return true;
456   }
457 
458   // Different globals cannot alias.
459   if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
460     if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
461       return globFirst.getNameAttr() == globSecond.getNameAttr();
462     }
463   }
464 
465   // Two function arguments marked as noalias do not alias.
466   auto isNoaliasFuncArgument = [](Value value) {
467     auto bbArg = dyn_cast<BlockArgument>(value);
468     if (!bbArg)
469       return false;
470     auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
471     if (!iface)
472       return false;
473     // TODO: we need a way to not depend on the LLVM dialect here.
474     return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
475   };
476   if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
477     return false;
478 
479   bool isDistinct[] = {producesDistinctBase(first.getDefiningOp()),
480                        producesDistinctBase(second.getDefiningOp())};
481   bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
482                      second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
483 
484   // Non-equivalent distinct bases and globals cannot alias. At this point, we
485   // have already filtered out based on values being equal and global name being
486   // equal.
487   if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
488     return false;
489 
490   bool isArg[] = {isFunctionArgument(first), isFunctionArgument(second)};
491 
492   // Distinct bases (allocations) cannot have been passed as an argument.
493   if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
494     return false;
495 
496   // Non-captured base distinct values cannot conflict with another base value.
497   if (isDistinct[0] && !maybeCaptured(first))
498     return false;
499   if (isDistinct[1] && !maybeCaptured(second))
500     return false;
501 
502   // Otherwise, conservatively assume aliasing.
503   DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
504   return true;
505 }
506 
507 /// Returns `true` if the effect may be affecting memory aliasing the value. If
508 /// the effect is not associated with any value, it is assumed to affect all
509 /// memory and therefore aliases with everything.
510 static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) {
511   if (Value v = a.getValue()) {
512     return mayAlias(v, v2);
513   }
514   return true;
515 }
516 
517 /// Returns `true` if the two effects may be affecting aliasing memory. If
518 /// an effect is not associated with any value, it is assumed to affect all
519 /// memory and therefore aliases with everything. Effects on different resources
520 /// cannot alias.
521 static bool mayAlias(MemoryEffects::EffectInstance a,
522                      MemoryEffects::EffectInstance b) {
523   if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
524     return false;
525   if (Value v2 = b.getValue()) {
526     return mayAlias(a, v2);
527   } else if (Value v = a.getValue()) {
528     return mayAlias(b, v);
529   }
530   return true;
531 }
532 
533 /// Returns `true` if any of the "before" effect instances has a conflict with
534 /// any "after" instance for the purpose of barrier elimination. The effects are
535 /// supposed to be limited to a barrier synchronization scope. A conflict exists
536 /// if effects instances affect aliasing memory locations and at least on of
537 /// then as a write. As an exception, if the non-write effect is an allocation
538 /// effect, there is no conflict since we are only expected to see the
539 /// allocation happening in the same thread and it cannot be accessed from
540 /// another thread without capture (which we do handle in alias analysis).
541 static bool
542 haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
543                        ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
544   for (const MemoryEffects::EffectInstance &before : beforeEffects) {
545     for (const MemoryEffects::EffectInstance &after : afterEffects) {
546       // If cannot alias, definitely no conflict.
547       if (!mayAlias(before, after))
548         continue;
549 
550       // Read/read is not a conflict.
551       if (isa<MemoryEffects::Read>(before.getEffect()) &&
552           isa<MemoryEffects::Read>(after.getEffect())) {
553         continue;
554       }
555 
556       // Allocate/* is not a conflict since the allocation happens within the
557       // thread context.
558       // TODO: This is not the case for */Free unless the allocation happened in
559       // the thread context, which we could also check for.
560       if (isa<MemoryEffects::Allocate>(before.getEffect()) ||
561           isa<MemoryEffects::Allocate>(after.getEffect())) {
562         continue;
563       }
564 
565       // In the particular case that the before effect is a free, we only have 2
566       // possibilities:
567       //   1. either the program is well-formed and there must be an interleaved
568       //      alloc that must limit the scope of effect lookback and we can
569       //      safely ignore the free -> read / free -> write and free -> free
570       //      conflicts.
571       //   2. either the program is ill-formed and we are in undefined behavior
572       //      territory.
573       if (isa<MemoryEffects::Free>(before.getEffect()))
574         continue;
575 
576       // Other kinds of effects create a conflict, e.g. read-after-write.
577       LLVM_DEBUG(
578           DBGS() << "found a conflict between (before): " << before.getValue()
579                  << " read:" << isa<MemoryEffects::Read>(before.getEffect())
580                  << " write:" << isa<MemoryEffects::Write>(before.getEffect())
581                  << " alloc:"
582                  << isa<MemoryEffects::Allocate>(before.getEffect()) << " free:"
583                  << isa<MemoryEffects::Free>(before.getEffect()) << "\n");
584       LLVM_DEBUG(
585           DBGS() << "and (after):                " << after.getValue()
586                  << " read:" << isa<MemoryEffects::Read>(after.getEffect())
587                  << " write:" << isa<MemoryEffects::Write>(after.getEffect())
588                  << " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
589                  << " free:" << isa<MemoryEffects::Free>(after.getEffect())
590                  << "\n");
591       return true;
592     }
593   }
594 
595   return false;
596 }
597 
598 namespace {
599 class BarrierElimination final : public OpRewritePattern<BarrierOp> {
600 public:
601   using OpRewritePattern<BarrierOp>::OpRewritePattern;
602 
603   LogicalResult matchAndRewrite(BarrierOp barrier,
604                                 PatternRewriter &rewriter) const override {
605     LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " "
606                       << barrier.getLoc() << "\n");
607 
608     SmallVector<MemoryEffects::EffectInstance> beforeEffects;
609     getEffectsBefore(barrier, beforeEffects, /*stopAtBarrier=*/true);
610 
611     SmallVector<MemoryEffects::EffectInstance> afterEffects;
612     getEffectsAfter(barrier, afterEffects, /*stopAtBarrier=*/true);
613 
614     if (!haveConflictingEffects(beforeEffects, afterEffects)) {
615       LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing "
616                         << barrier << "\n");
617       rewriter.eraseOp(barrier);
618       return success();
619     }
620 
621     LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " "
622                       << barrier.getLoc() << "\n");
623     return failure();
624   }
625 };
626 
627 class GpuEliminateBarriersPass
628     : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
629   void runOnOperation() override {
630     auto funcOp = getOperation();
631     RewritePatternSet patterns(&getContext());
632     mlir::populateGpuEliminateBarriersPatterns(patterns);
633     if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
634       return signalPassFailure();
635     }
636   }
637 };
638 
639 } // namespace
640 
641 void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) {
642   patterns.insert<BarrierElimination>(patterns.getContext());
643 }
644