1 //===- Utils.h - Affine dialect utilities -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header file declares a set of utilities for the affine dialect ops. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef MLIR_DIALECT_AFFINE_UTILS_H 14 #define MLIR_DIALECT_AFFINE_UTILS_H 15 16 #include "mlir/Analysis/AliasAnalysis.h" 17 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 18 #include "mlir/Dialect/Affine/IR/AffineOps.h" 19 #include "mlir/IR/OpDefinition.h" 20 #include <optional> 21 22 namespace mlir { 23 class DominanceInfo; 24 class Operation; 25 class PostDominanceInfo; 26 class ImplicitLocOpBuilder; 27 28 namespace func { 29 class FuncOp; 30 } // namespace func 31 32 namespace memref { 33 class AllocOp; 34 class AllocaOp; 35 } // namespace memref 36 37 namespace affine { 38 class AffineForOp; 39 class AffineIfOp; 40 class AffineParallelOp; 41 42 using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>; 43 44 /// Replaces a parallel affine.for op with a 1-d affine.parallel op. `forOp`'s 45 /// body is taken by the affine.parallel op and the former is erased. 46 /// (mlir::isLoopParallel can be used to detect a parallel affine.for op.) The 47 /// reductions specified in `parallelReductions` are also parallelized. 48 /// Parallelization will fail in the presence of loop iteration arguments that 49 /// are not listed in `parallelReductions`. `resOp` if non-null is set to the 50 /// newly created affine.parallel op. 51 LogicalResult affineParallelize(AffineForOp forOp, 52 ArrayRef<LoopReduction> parallelReductions = {}, 53 AffineParallelOp *resOp = nullptr); 54 55 /// Hoists out affine.if/else to as high as possible, i.e., past all invariant 56 /// affine.fors/parallel's. Returns success if any hoisting happened; folded` is 57 /// set to true if the op was folded or erased. This hoisting could lead to 58 /// significant code expansion in some cases. 59 LogicalResult hoistAffineIfOp(AffineIfOp ifOp, bool *folded = nullptr); 60 61 /// Holds parameters to perform n-D vectorization on a single loop nest. 62 /// For example, for the following loop nest: 63 /// 64 /// func @vec2d(%in: memref<64x128x512xf32>, %out: memref<64x128x512xf32>) { 65 /// affine.for %i0 = 0 to 64 { 66 /// affine.for %i1 = 0 to 128 { 67 /// affine.for %i2 = 0 to 512 { 68 /// %ld = affine.load %in[%i0, %i1, %i2] : memref<64x128x512xf32> 69 /// affine.store %ld, %out[%i0, %i1, %i2] : memref<64x128x512xf32> 70 /// } 71 /// } 72 /// } 73 /// return 74 /// } 75 /// 76 /// and VectorizationStrategy = 'vectorSizes = {8, 4}', 'loopToVectorDim = 77 /// {{i1->0}, {i2->1}}', SuperVectorizer will generate: 78 /// 79 /// func @vec2d(%arg0: memref<64x128x512xf32>, %arg1: memref<64x128x512xf32>) { 80 /// affine.for %arg2 = 0 to 64 { 81 /// affine.for %arg3 = 0 to 128 step 8 { 82 /// affine.for %arg4 = 0 to 512 step 4 { 83 /// %cst = arith.constant 0.000000e+00 : f32 84 /// %0 = vector.transfer_read %arg0[%arg2, %arg3, %arg4], %cst : ... 85 /// vector.transfer_write %0, %arg1[%arg2, %arg3, %arg4] : ... 86 /// } 87 /// } 88 /// } 89 /// return 90 /// } 91 // TODO: Hoist to a VectorizationStrategy.cpp when appropriate. 92 struct VectorizationStrategy { 93 // Vectorization factors to apply to each target vector dimension. 94 // Each factor will be applied to a different loop. 95 SmallVector<int64_t, 8> vectorSizes; 96 // Maps each AffineForOp vectorization candidate with its vector dimension. 97 // The candidate will be vectorized using the vectorization factor in 98 // 'vectorSizes' for that dimension. 99 DenseMap<Operation *, unsigned> loopToVectorDim; 100 // Maps loops that implement vectorizable reductions to the corresponding 101 // reduction descriptors. 102 ReductionLoopMap reductionLoops; 103 }; 104 105 /// Replace affine store and load accesses by scalars by forwarding stores to 106 /// loads and eliminate invariant affine loads; consequently, eliminate dead 107 /// allocs. 108 void affineScalarReplace(func::FuncOp f, DominanceInfo &domInfo, 109 PostDominanceInfo &postDomInfo, 110 AliasAnalysis &analysis); 111 112 /// Vectorizes affine loops in 'loops' using the n-D vectorization factors in 113 /// 'vectorSizes'. By default, each vectorization factor is applied 114 /// inner-to-outer to the loops of each loop nest. 'fastestVaryingPattern' can 115 /// be optionally used to provide a different loop vectorization order. 116 /// If `reductionLoops` is not empty, the given reduction loops may be 117 /// vectorized along the reduction dimension. 118 /// TODO: Vectorizing reductions is supported only for 1-D vectorization. 119 void vectorizeAffineLoops( 120 Operation *parentOp, 121 llvm::DenseSet<Operation *, DenseMapInfo<Operation *>> &loops, 122 ArrayRef<int64_t> vectorSizes, ArrayRef<int64_t> fastestVaryingPattern, 123 const ReductionLoopMap &reductionLoops = ReductionLoopMap()); 124 125 /// External utility to vectorize affine loops from a single loop nest using an 126 /// n-D vectorization strategy (see doc in VectorizationStrategy definition). 127 /// Loops are provided in a 2D vector container. The first dimension represents 128 /// the nesting level relative to the loops to be vectorized. The second 129 /// dimension contains the loops. This means that: 130 /// a) every loop in 'loops[i]' must have a parent loop in 'loops[i-1]', 131 /// b) a loop in 'loops[i]' may or may not have a child loop in 'loops[i+1]'. 132 /// 133 /// For example, for the following loop nest: 134 /// 135 /// func @vec2d(%in0: memref<64x128x512xf32>, %in1: memref<64x128x128xf32>, 136 /// %out0: memref<64x128x512xf32>, 137 /// %out1: memref<64x128x128xf32>) { 138 /// affine.for %i0 = 0 to 64 { 139 /// affine.for %i1 = 0 to 128 { 140 /// affine.for %i2 = 0 to 512 { 141 /// %ld = affine.load %in0[%i0, %i1, %i2] : memref<64x128x512xf32> 142 /// affine.store %ld, %out0[%i0, %i1, %i2] : memref<64x128x512xf32> 143 /// } 144 /// affine.for %i3 = 0 to 128 { 145 /// %ld = affine.load %in1[%i0, %i1, %i3] : memref<64x128x128xf32> 146 /// affine.store %ld, %out1[%i0, %i1, %i3] : memref<64x128x128xf32> 147 /// } 148 /// } 149 /// } 150 /// return 151 /// } 152 /// 153 /// loops = {{%i0}, {%i2, %i3}}, to vectorize the outermost and the two 154 /// innermost loops; 155 /// loops = {{%i1}, {%i2, %i3}}, to vectorize the middle and the two innermost 156 /// loops; 157 /// loops = {{%i2}}, to vectorize only the first innermost loop; 158 /// loops = {{%i3}}, to vectorize only the second innermost loop; 159 /// loops = {{%i1}}, to vectorize only the middle loop. 160 LogicalResult 161 vectorizeAffineLoopNest(std::vector<SmallVector<AffineForOp, 2>> &loops, 162 const VectorizationStrategy &strategy); 163 164 /// Normalize a affine.parallel op so that lower bounds are 0 and steps are 1. 165 /// As currently implemented, this transformation cannot fail and will return 166 /// early if the op is already in a normalized form. 167 void normalizeAffineParallel(AffineParallelOp op); 168 169 /// Normalize an affine.for op. An affine.for op is normalized by converting the 170 /// lower bound to zero and loop step to one. The upper bound is set to the trip 171 /// count of the loop. Original loops must have a lower bound with only a single 172 /// result. There is no such restriction on upper bounds. Returns success if the 173 /// loop has been normalized (or is already in the normal form). If 174 /// `promoteSingleIter` is true, the loop is simply promoted if it has a single 175 /// iteration. 176 LogicalResult normalizeAffineFor(AffineForOp op, 177 bool promoteSingleIter = false); 178 179 /// Traverse `e` and return an AffineExpr where all occurrences of `dim` have 180 /// been replaced by either: 181 /// - `min` if `positivePath` is true when we reach an occurrence of `dim` 182 /// - `max` if `positivePath` is true when we reach an occurrence of `dim` 183 /// `positivePath` is negated each time we hit a multiplicative or divisive 184 /// binary op with a constant negative coefficient. 185 AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min, 186 AffineExpr max, bool positivePath = true); 187 188 /// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while 189 /// optionally remapping the old memref's indices using the supplied affine map, 190 /// `indexRemap`. The new memref could be of a different shape or rank. 191 /// `extraIndices` provides any additional access indices to be added to the 192 /// start. 193 /// 194 /// `indexRemap` remaps indices of the old memref access to a new set of indices 195 /// that are used to index the memref. Additional input operands to indexRemap 196 /// can be optionally provided in `extraOperands`, and they occupy the start 197 /// of its input list. `indexRemap`'s dimensional inputs are expected to 198 /// correspond to memref's indices, and its symbolic inputs if any should be 199 /// provided in `symbolOperands`. 200 /// 201 /// `domOpFilter`, if non-null, restricts the replacement to only those 202 /// operations that are dominated by the former; similarly, `postDomOpFilter` 203 /// restricts replacement to only those operations that are postdominated by it. 204 /// 205 /// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing 206 /// uses of a memref without any requirement for access index rewrites as long 207 /// as the user operation has the MemRefsNormalizable trait. The default value 208 /// of this flag is false. 209 /// 210 /// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to 211 /// also be a candidate for replacement. The default value of this flag is 212 /// false. 213 /// 214 /// Returns true on success and false if the replacement is not possible, 215 /// whenever a memref is used as an operand in a non-dereferencing context and 216 /// 'allowNonDereferencingOps' is false, except for dealloc's on the memref 217 /// which are left untouched. See comments at function definition for an 218 /// example. 219 // 220 // Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]: 221 // The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and 222 // index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0, 223 // d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any 224 // extra operands, note that 'indexRemap' would just be applied to existing 225 // indices (%i, %j). 226 // TODO: allow extraIndices to be added at any position. 227 LogicalResult replaceAllMemRefUsesWith( 228 Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {}, 229 AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {}, 230 ArrayRef<Value> symbolOperands = {}, Operation *domOpFilter = nullptr, 231 Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false, 232 bool replaceInDeallocOp = false); 233 234 /// Performs the same replacement as the other version above but only for the 235 /// dereferencing uses of `oldMemRef` in `op`, except in cases where 236 /// 'allowNonDereferencingOps' is set to true where we replace the 237 /// non-dereferencing uses as well. 238 LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, 239 Operation *op, 240 ArrayRef<Value> extraIndices = {}, 241 AffineMap indexRemap = AffineMap(), 242 ArrayRef<Value> extraOperands = {}, 243 ArrayRef<Value> symbolOperands = {}, 244 bool allowNonDereferencingOps = false); 245 246 /// Rewrites the memref defined by this alloc op to have an identity layout map 247 /// and updates all its indexing uses. Returns failure if any of its uses 248 /// escape (while leaving the IR in a valid state). 249 template <typename AllocLikeOp> 250 LogicalResult normalizeMemRef(AllocLikeOp *op); 251 extern template LogicalResult 252 normalizeMemRef<memref::AllocaOp>(memref::AllocaOp *op); 253 extern template LogicalResult 254 normalizeMemRef<memref::AllocOp>(memref::AllocOp *op); 255 256 /// Normalizes `memrefType` so that the affine layout map of the memref is 257 /// transformed to an identity map with a new shape being computed for the 258 /// normalized memref type and returns it. The old memref type is simplify 259 /// returned if the normalization failed. 260 MemRefType normalizeMemRefType(MemRefType memrefType); 261 262 /// Given an operation, inserts one or more single result affine apply 263 /// operations, results of which are exclusively used by this operation. 264 /// The operands of these newly created affine apply ops are 265 /// guaranteed to be loop iterators or terminal symbols of a function. 266 /// 267 /// Before 268 /// 269 /// affine.for %i = 0 to #map(%N) 270 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) 271 /// send %A[%idx], ... 272 /// %v = "compute"(%idx, ...) 273 /// 274 /// After 275 /// 276 /// affine.for %i = 0 to #map(%N) 277 /// %idx = affine.apply (d0) -> (d0 mod 2) (%i) 278 /// send %A[%idx], ... 279 /// %idx_ = affine.apply (d0) -> (d0 mod 2) (%i) 280 /// %v = "compute"(%idx_, ...) 281 282 /// This allows the application of different transformations on send and 283 /// compute (for eg. different shifts/delays) 284 /// 285 /// Fills `sliceOps` with the list of affine.apply operations. 286 /// In the following cases, `sliceOps` remains empty: 287 /// 1. If none of opInst's operands were the result of an affine.apply 288 /// (i.e., there was no affine computation slice to create). 289 /// 2. If all the affine.apply op's supplying operands to this opInst did not 290 /// have any uses other than those in this opInst. 291 void createAffineComputationSlice(Operation *opInst, 292 SmallVectorImpl<AffineApplyOp> *sliceOps); 293 294 /// Emit code that computes the given affine expression using standard 295 /// arithmetic operations applied to the provided dimension and symbol values. 296 Value expandAffineExpr(OpBuilder &builder, Location loc, AffineExpr expr, 297 ValueRange dimValues, ValueRange symbolValues); 298 299 /// Create a sequence of operations that implement the `affineMap` applied to 300 /// the given `operands` (as it it were an AffineApplyOp). 301 std::optional<SmallVector<Value, 8>> expandAffineMap(OpBuilder &builder, 302 Location loc, 303 AffineMap affineMap, 304 ValueRange operands); 305 306 /// Holds the result of (div a, b) and (mod a, b). 307 struct DivModValue { 308 Value quotient; 309 Value remainder; 310 }; 311 312 /// Create IR to calculate (div lhs, rhs) and (mod lhs, rhs). 313 DivModValue getDivMod(OpBuilder &b, Location loc, Value lhs, Value rhs); 314 315 /// Generate the IR to delinearize `linearIndex` given the `basis` and return 316 /// the multi-index. `hasOuterBound` indicates whether `basis` has an entry 317 /// given the size of the first multi-index result - if it is true, the function 318 /// will return `basis.size()` values, otherwise, it will return `basis.size() + 319 /// 1`. 320 FailureOr<SmallVector<Value>> delinearizeIndex(OpBuilder &b, Location loc, 321 Value linearIndex, 322 ArrayRef<Value> basis, 323 bool hasOuterBound = true); 324 325 FailureOr<SmallVector<Value>> delinearizeIndex(OpBuilder &b, Location loc, 326 Value linearIndex, 327 ArrayRef<OpFoldResult> basis, 328 bool hasOuterBound = true); 329 330 // Generate IR that extracts the linear index from a multi-index according to 331 // a basis/shape. The basis may contain either `multiIndex.size()` or 332 // `multiIndex.size() - 1` elements. 333 OpFoldResult linearizeIndex(ArrayRef<OpFoldResult> multiIndex, 334 ArrayRef<OpFoldResult> basis, 335 ImplicitLocOpBuilder &builder); 336 337 OpFoldResult linearizeIndex(OpBuilder &builder, Location loc, 338 ArrayRef<OpFoldResult> multiIndex, 339 ArrayRef<OpFoldResult> basis); 340 341 /// Ensure that all operations that could be executed after `start` 342 /// (noninclusive) and prior to `memOp` (e.g. on a control flow/op path 343 /// between the operations) do not have the potential memory effect 344 /// `EffectType` on `memOp`. `memOp` is an operation that reads or writes to 345 /// a memref. For example, if `EffectType` is MemoryEffects::Write, this method 346 /// will check if there is no write to the memory between `start` and `memOp` 347 /// that would change the read within `memOp`. 348 template <typename EffectType, typename T> 349 bool hasNoInterveningEffect(Operation *start, T memOp, 350 llvm::function_ref<bool(Value, Value)> mayAlias); 351 352 struct AffineValueExpr { 353 explicit AffineValueExpr(AffineExpr e) : e(e) {} 354 AffineValueExpr bind(Value v) { 355 this->v = v; 356 return *this; 357 } 358 AffineValueExpr bind(OpFoldResult v) { 359 this->v = v; 360 return *this; 361 } 362 operator AffineExpr() const { return e; } 363 operator OpFoldResult() const { return v; } 364 AffineExpr e; 365 OpFoldResult v; 366 }; 367 368 /// Helper struct to build simple AffineValueExprs with minimal type inference 369 /// support. 370 struct AffineBuilder { 371 AffineBuilder(OpBuilder &b, Location loc) : b(b), loc(loc) {} 372 OpFoldResult add(AffineValueExpr lhs, AffineValueExpr rhs) { 373 return makeComposedFoldedAffineApply(b, loc, {lhs.e + rhs.e}, {lhs, rhs}); 374 } 375 OpFoldResult sub(AffineValueExpr lhs, AffineValueExpr rhs) { 376 return makeComposedFoldedAffineApply(b, loc, {lhs.e - rhs.e}, {lhs, rhs}); 377 } 378 OpFoldResult mul(AffineValueExpr lhs, AffineValueExpr rhs) { 379 return makeComposedFoldedAffineApply(b, loc, {lhs.e * rhs.e}, {lhs, rhs}); 380 } 381 OpFoldResult floor(AffineValueExpr lhs, AffineValueExpr rhs) { 382 return makeComposedFoldedAffineApply(b, loc, {lhs.e.floorDiv(rhs.e)}, 383 {lhs, rhs}); 384 } 385 OpFoldResult ceil(AffineValueExpr lhs, AffineValueExpr rhs) { 386 return makeComposedFoldedAffineApply(b, loc, {lhs.e.ceilDiv(rhs.e)}, 387 {lhs, rhs}); 388 } 389 OpFoldResult min(ArrayRef<OpFoldResult> vals) { 390 return makeComposedFoldedAffineMin( 391 b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()), 392 vals); 393 } 394 OpFoldResult max(ArrayRef<OpFoldResult> vals) { 395 return makeComposedFoldedAffineMax( 396 b, loc, AffineMap::getMultiDimIdentityMap(vals.size(), b.getContext()), 397 vals); 398 } 399 400 private: 401 OpBuilder &b; 402 Location loc; 403 }; 404 405 } // namespace affine 406 } // namespace mlir 407 408 #endif // MLIR_DIALECT_AFFINE_UTILS_H 409