xref: /llvm-project/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h (revision b4785cebfb0a756e19ff4ae3e41d2563f10cd292)
1 //===- LoopFusionUtils.h - Loop fusion utilities ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file defines prototypes for various loop fusion utility
10 // methods: these are not passes by themselves but are used either by passes,
11 // optimization sequences, or in turn by other transformation utilities.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
16 #define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
17 
18 #include "mlir/IR/Value.h"
19 #include "mlir/Support/LLVM.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 
23 namespace mlir {
24 class Operation;
25 
26 namespace affine {
27 class AffineForOp;
28 struct ComputationSliceState;
29 
30 struct FusionResult {
31   enum ResultEnum {
32     Success,
33     FailPrecondition,     // Failed precondition for fusion. (e.g. same block).
34     FailBlockDependence,  // Fusion would violate another dependence in block.
35     FailFusionDependence, // Fusion would reverse dependences between loops.
36     FailComputationSlice, // Unable to compute src loop computation slice.
37     FailIncorrectSlice,   // Slice is computed, but it is incorrect.
38   } value;
FusionResultFusionResult39   FusionResult(ResultEnum v) : value(v) {}
40 };
41 
42 /// Describes the fusion strategy to be used in the Affine loop fusion
43 /// utilities. Currently, it is used to specialized the loop fusion utilities
44 /// with the assumptions made in the AffineLoopFusion pass for producer-consumer
45 /// and sibling fusion, while sharing a single implementation. The latter
46 /// strategies are also limited to scenarios where a single memref is involved
47 /// in the producer-consume or sibling relationship between the candidate
48 /// loops. We use 'memref' to keep track of such a memref.
49 // TODO: Generalize utilities so that producer-consumer and sibling fusion
50 // strategies can be used without the assumptions made in the AffineLoopFusion
51 // pass.
52 class FusionStrategy {
53 public:
54   enum StrategyEnum {
55     // Generic loop fusion: Arbitrary loops are considered for fusion. No
56     // assumptions about a specific fusion strategy from AffineLoopFusion pass
57     // are made.
58     // TODO: Generic fusion is not fully implemented by fusion utilities yet.
59     // It should only be used for testing.
60     Generic,
61     // Producer-consumer fusion: Only loops with a producer-consumer
62     // memref dependence are considered for fusion. Currently, assumptions from
63     // the producer-consumer fusion implementation in AffineLoopFusion pass are
64     // made. See pass for specific details.
65     ProducerConsumer,
66     // Sibling fusion: Only sibling loops with no producer-consumer memref
67     // dependences are considered for fusion. Memref reuse is taken into account
68     // for profitability. Currently, assumptions from the sibling fusion
69     // implementation in AffineLoopFusion pass are made. See pass for specific
70     // details.
71     Sibling
72   };
73 
74   /// Construct a generic or producer-consumer fusion strategy.
FusionStrategy(StrategyEnum strategy)75   FusionStrategy(StrategyEnum strategy) : strategy(strategy) {
76     assert(strategy != Sibling &&
77            "Sibling fusion strategy requires a specific memref");
78   }
79 
80   /// Construct a sibling fusion strategy targeting 'memref'. This construct
81   /// should only be used for sibling fusion.
FusionStrategy(Value memref)82   FusionStrategy(Value memref) : strategy(Sibling), memref(memref) {}
83 
84   /// Returns the fusion strategy.
getStrategy()85   StrategyEnum getStrategy() const { return strategy; };
86 
87   /// Returns the memref attached to this sibling fusion strategy.
getSiblingFusionMemRef()88   Value getSiblingFusionMemRef() const {
89     assert(strategy == Sibling && "Memref is only valid for sibling fusion");
90     return memref;
91   }
92 
93 private:
94   /// Fusion strategy.
95   StrategyEnum strategy;
96 
97   /// Target memref for this fusion transformation. Only used for sibling
98   /// fusion.
99   Value memref;
100 };
101 
102 /// Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the
103 /// loop nest rooted at 'dstForOp' at 'dstLoopDepth'. Returns FusionResult
104 /// 'Success' if fusion of the src/dst loop nests is feasible (i.e. they are
105 /// in the same block and dependences would not be violated). Otherwise
106 /// returns a FusionResult explaining why fusion is not feasible.
107 /// NOTE: This function is not feature complete and should only be used in
108 /// testing.
109 FusionResult
110 canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth,
111              ComputationSliceState *srcSlice,
112              FusionStrategy fusionStrategy = FusionStrategy::Generic);
113 
114 /// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion
115 /// point and source slice loop bounds specified in 'srcSlice'.
116 /// `isInnermostSiblingInsertionFusion` enables cleanup of `srcForOp that is a
117 /// single-iteration reduction loop being sibling-fused into a 'dstForOp'.
118 void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
119                const ComputationSliceState &srcSlice,
120                bool isInnermostSiblingInsertionFusion = false);
121 
122 /// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
123 /// and operation count) for a loop nest up until (and including) the innermost
124 /// loop body.
125 struct LoopNestStats {
126   /// Map from AffineForOp to immediate child AffineForOps in its loop body.
127   DenseMap<Operation *, SmallVector<AffineForOp, 2>> loopMap;
128   /// Map from AffineForOp to count of operations in its loop body.
129   DenseMap<Operation *, uint64_t> opCountMap;
130   /// Map from AffineForOp to its constant trip count.
131   DenseMap<Operation *, uint64_t> tripCountMap;
132 };
133 
134 /// Collect loop nest statistics (eg. loop trip count and operation count)
135 /// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
136 /// returns false otherwise.
137 // TODO: Consider moving this to LoopUtils.
138 bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats);
139 
140 /// Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.
141 /// Currently, the total cost is computed by counting the total operation
142 /// instance count (i.e. total number of operations in the loop body * loop
143 /// trip count) for the entire loop nest.
144 int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats);
145 
146 /// Computes and returns in 'computeCost', the total compute cost of fusing the
147 /// 'slice' of the loop nest rooted at 'srcForOp' into 'dstForOp'. Currently,
148 /// the total cost is computed by counting the total operation instance count
149 /// (i.e. total number of operations in the loop body * loop trip count) for
150 /// the entire loop nest.
151 /// Returns true on success, failure otherwise (e.g. non-constant trip counts).
152 bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats,
153                           AffineForOp dstForOp, LoopNestStats &dstStats,
154                           const ComputationSliceState &slice,
155                           int64_t *computeCost);
156 
157 /// Returns in 'producerConsumerMemrefs' the memrefs involved in a
158 /// producer-consumer dependence between write ops in 'srcOps' and read ops in
159 /// 'dstOps'.
160 void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps,
161                                    ArrayRef<Operation *> dstOps,
162                                    DenseSet<Value> &producerConsumerMemrefs);
163 
164 } // namespace affine
165 } // namespace mlir
166 
167 #endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
168