1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file defines prototypes for various loop fusion utility 10 // methods: these are not passes by themselves but are used either by passes, 11 // optimization sequences, or in turn by other transformation utilities. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H 16 #define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H 17 18 #include "mlir/IR/Value.h" 19 #include "mlir/Support/LLVM.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallVector.h" 22 23 namespace mlir { 24 class Operation; 25 26 namespace affine { 27 class AffineForOp; 28 struct ComputationSliceState; 29 30 struct FusionResult { 31 enum ResultEnum { 32 Success, 33 FailPrecondition, // Failed precondition for fusion. (e.g. same block). 34 FailBlockDependence, // Fusion would violate another dependence in block. 35 FailFusionDependence, // Fusion would reverse dependences between loops. 36 FailComputationSlice, // Unable to compute src loop computation slice. 37 FailIncorrectSlice, // Slice is computed, but it is incorrect. 38 } value; FusionResultFusionResult39 FusionResult(ResultEnum v) : value(v) {} 40 }; 41 42 /// Describes the fusion strategy to be used in the Affine loop fusion 43 /// utilities. Currently, it is used to specialized the loop fusion utilities 44 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer 45 /// and sibling fusion, while sharing a single implementation. The latter 46 /// strategies are also limited to scenarios where a single memref is involved 47 /// in the producer-consume or sibling relationship between the candidate 48 /// loops. We use 'memref' to keep track of such a memref. 49 // TODO: Generalize utilities so that producer-consumer and sibling fusion 50 // strategies can be used without the assumptions made in the AffineLoopFusion 51 // pass. 52 class FusionStrategy { 53 public: 54 enum StrategyEnum { 55 // Generic loop fusion: Arbitrary loops are considered for fusion. No 56 // assumptions about a specific fusion strategy from AffineLoopFusion pass 57 // are made. 58 // TODO: Generic fusion is not fully implemented by fusion utilities yet. 59 // It should only be used for testing. 60 Generic, 61 // Producer-consumer fusion: Only loops with a producer-consumer 62 // memref dependence are considered for fusion. Currently, assumptions from 63 // the producer-consumer fusion implementation in AffineLoopFusion pass are 64 // made. See pass for specific details. 65 ProducerConsumer, 66 // Sibling fusion: Only sibling loops with no producer-consumer memref 67 // dependences are considered for fusion. Memref reuse is taken into account 68 // for profitability. Currently, assumptions from the sibling fusion 69 // implementation in AffineLoopFusion pass are made. See pass for specific 70 // details. 71 Sibling 72 }; 73 74 /// Construct a generic or producer-consumer fusion strategy. FusionStrategy(StrategyEnum strategy)75 FusionStrategy(StrategyEnum strategy) : strategy(strategy) { 76 assert(strategy != Sibling && 77 "Sibling fusion strategy requires a specific memref"); 78 } 79 80 /// Construct a sibling fusion strategy targeting 'memref'. This construct 81 /// should only be used for sibling fusion. FusionStrategy(Value memref)82 FusionStrategy(Value memref) : strategy(Sibling), memref(memref) {} 83 84 /// Returns the fusion strategy. getStrategy()85 StrategyEnum getStrategy() const { return strategy; }; 86 87 /// Returns the memref attached to this sibling fusion strategy. getSiblingFusionMemRef()88 Value getSiblingFusionMemRef() const { 89 assert(strategy == Sibling && "Memref is only valid for sibling fusion"); 90 return memref; 91 } 92 93 private: 94 /// Fusion strategy. 95 StrategyEnum strategy; 96 97 /// Target memref for this fusion transformation. Only used for sibling 98 /// fusion. 99 Value memref; 100 }; 101 102 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the 103 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult 104 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are 105 /// in the same block and dependences would not be violated). Otherwise 106 /// returns a FusionResult explaining why fusion is not feasible. 107 /// NOTE: This function is not feature complete and should only be used in 108 /// testing. 109 FusionResult 110 canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth, 111 ComputationSliceState *srcSlice, 112 FusionStrategy fusionStrategy = FusionStrategy::Generic); 113 114 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion 115 /// point and source slice loop bounds specified in 'srcSlice'. 116 /// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a 117 /// single-iteration reduction loop being sibling-fused into a 'dstForOp'. 118 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, 119 const ComputationSliceState &srcSlice, 120 bool isInnermostSiblingInsertionFusion = false); 121 122 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count 123 /// and operation count) for a loop nest up until (and including) the innermost 124 /// loop body. 125 struct LoopNestStats { 126 /// Map from AffineForOp to immediate child AffineForOps in its loop body. 127 DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap; 128 /// Map from AffineForOp to count of operations in its loop body. 129 DenseMap<Operation *, uint64_t> opCountMap; 130 /// Map from AffineForOp to its constant trip count. 131 DenseMap<Operation *, uint64_t> tripCountMap; 132 }; 133 134 /// Collect loop nest statistics (eg. loop trip count and operation count) 135 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success, 136 /// returns false otherwise. 137 // TODO: Consider moving this to LoopUtils. 138 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats); 139 140 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'. 141 /// Currently, the total cost is computed by counting the total operation 142 /// instance count (i.e. total number of operations in the loop body * loop 143 /// trip count) for the entire loop nest. 144 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats); 145 146 /// Computes and returns in 'computeCost', the total compute cost of fusing the 147 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently, 148 /// the total cost is computed by counting the total operation instance count 149 /// (i.e. total number of operations in the loop body * loop trip count) for 150 /// the entire loop nest. 151 /// Returns true on success, failure otherwise (e.g. non-constant trip counts). 152 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, 153 AffineForOp dstForOp, LoopNestStats &dstStats, 154 const ComputationSliceState &slice, 155 int64_t *computeCost); 156 157 /// Returns in 'producerConsumerMemrefs' the memrefs involved in a 158 /// producer-consumer dependence between write ops in 'srcOps' and read ops in 159 /// 'dstOps'. 160 void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps, 161 ArrayRef<Operation *> dstOps, 162 DenseSet<Value> &producerConsumerMemrefs); 163 164 } // namespace affine 165 } // namespace mlir 166 167 #endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H 168