1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements miscellaneous loop analysis routines. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" 14 15 #include "mlir/Analysis/SliceAnalysis.h" 16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h" 17 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h" 18 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h" 19 #include "mlir/Dialect/Affine/IR/AffineOps.h" 20 #include "mlir/Dialect/Affine/IR/AffineValueMap.h" 21 #include "llvm/Support/MathExtras.h" 22 23 #include "llvm/ADT/DenseSet.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/Support/Debug.h" 27 #include <numeric> 28 #include <optional> 29 #include <type_traits> 30 31 using namespace mlir; 32 using namespace mlir::affine; 33 34 #define DEBUG_TYPE "affine-loop-analysis" 35 36 /// Returns the trip count of the loop as an affine expression if the latter is 37 /// expressible as an affine expression, and nullptr otherwise. The trip count 38 /// expression is simplified before returning. This method only utilizes map 39 /// composition to construct lower and upper bounds before computing the trip 40 /// count expressions. 41 void mlir::affine::getTripCountMapAndOperands( 42 AffineForOp forOp, AffineMap *tripCountMap, 43 SmallVectorImpl<Value> *tripCountOperands) { 44 MLIRContext *context = forOp.getContext(); 45 int64_t step = forOp.getStepAsInt(); 46 int64_t loopSpan; 47 if (forOp.hasConstantBounds()) { 48 int64_t lb = forOp.getConstantLowerBound(); 49 int64_t ub = forOp.getConstantUpperBound(); 50 loopSpan = ub - lb; 51 if (loopSpan < 0) 52 loopSpan = 0; 53 *tripCountMap = AffineMap::getConstantMap( 54 llvm::divideCeilSigned(loopSpan, step), context); 55 tripCountOperands->clear(); 56 return; 57 } 58 auto lbMap = forOp.getLowerBoundMap(); 59 auto ubMap = forOp.getUpperBoundMap(); 60 if (lbMap.getNumResults() != 1) { 61 *tripCountMap = AffineMap(); 62 return; 63 } 64 65 // Difference of each upper bound expression from the single lower bound 66 // expression (divided by the step) provides the expressions for the trip 67 // count map. 68 AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands()); 69 70 SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(), 71 lbMap.getResult(0)); 72 auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(), 73 lbSplatExpr, context); 74 AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands()); 75 76 AffineValueMap tripCountValueMap; 77 AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap); 78 for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i) 79 tripCountValueMap.setResult(i, 80 tripCountValueMap.getResult(i).ceilDiv(step)); 81 82 *tripCountMap = tripCountValueMap.getAffineMap(); 83 tripCountOperands->assign(tripCountValueMap.getOperands().begin(), 84 tripCountValueMap.getOperands().end()); 85 } 86 87 /// Returns the trip count of the loop if it's a constant, std::nullopt 88 /// otherwise. This method uses affine expression analysis (in turn using 89 /// getTripCount) and is able to determine constant trip count in non-trivial 90 /// cases. 91 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) { 92 SmallVector<Value, 4> operands; 93 AffineMap map; 94 getTripCountMapAndOperands(forOp, &map, &operands); 95 96 if (!map) 97 return std::nullopt; 98 99 // Take the min if all trip counts are constant. 100 std::optional<uint64_t> tripCount; 101 for (auto resultExpr : map.getResults()) { 102 if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) { 103 if (tripCount.has_value()) 104 tripCount = 105 std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue())); 106 else 107 tripCount = constExpr.getValue(); 108 } else 109 return std::nullopt; 110 } 111 return tripCount; 112 } 113 114 /// Returns the greatest known integral divisor of the trip count. Affine 115 /// expression analysis is used (indirectly through getTripCount), and 116 /// this method is thus able to determine non-trivial divisors. 117 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) { 118 SmallVector<Value, 4> operands; 119 AffineMap map; 120 getTripCountMapAndOperands(forOp, &map, &operands); 121 122 if (!map) 123 return 1; 124 125 // The largest divisor of the trip count is the GCD of the individual largest 126 // divisors. 127 assert(map.getNumResults() >= 1 && "expected one or more results"); 128 std::optional<uint64_t> gcd; 129 for (auto resultExpr : map.getResults()) { 130 uint64_t thisGcd; 131 if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) { 132 uint64_t tripCount = constExpr.getValue(); 133 // 0 iteration loops (greatest divisor is 2^64 - 1). 134 if (tripCount == 0) 135 thisGcd = std::numeric_limits<uint64_t>::max(); 136 else 137 // The greatest divisor is the trip count. 138 thisGcd = tripCount; 139 } else { 140 // Trip count is not a known constant; return its largest known divisor. 141 thisGcd = resultExpr.getLargestKnownDivisor(); 142 } 143 if (gcd.has_value()) 144 gcd = std::gcd(*gcd, thisGcd); 145 else 146 gcd = thisGcd; 147 } 148 assert(gcd.has_value() && "value expected per above logic"); 149 return *gcd; 150 } 151 152 /// Given an affine.for `iv` and an access `index` of type index, returns `true` 153 /// if `index` is independent of `iv` and false otherwise. 154 /// 155 /// Prerequisites: `iv` and `index` of the proper type; 156 static bool isAccessIndexInvariant(Value iv, Value index) { 157 assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv"); 158 assert(isa<IndexType>(index.getType()) && "index must be of 'index' type"); 159 auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext()); 160 SmallVector<Value> operands = {index}; 161 AffineValueMap avm(map, operands); 162 avm.composeSimplifyAndCanonicalize(); 163 return !avm.isFunctionOf(0, iv); 164 } 165 166 // Pre-requisite: Loop bounds should be in canonical form. 167 template <typename LoadOrStoreOp> 168 bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) { 169 AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands()); 170 avm.composeSimplifyAndCanonicalize(); 171 return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar()); 172 } 173 174 // Explicitly instantiate the template so that the compiler knows we need them. 175 template bool mlir::affine::isInvariantAccess(AffineReadOpInterface, 176 AffineForOp); 177 template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface, 178 AffineForOp); 179 template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp); 180 template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp); 181 182 DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv, 183 ArrayRef<Value> indices) { 184 DenseSet<Value> res; 185 for (auto val : indices) { 186 if (isAccessIndexInvariant(iv, val)) { 187 res.insert(val); 188 } 189 } 190 return res; 191 } 192 193 // TODO: check access stride. 194 template <typename LoadOrStoreOp> 195 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, 196 int *memRefDim) { 197 static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface, 198 AffineWriteOpInterface>::value, 199 "Must be called on either an affine read or write op"); 200 assert(memRefDim && "memRefDim == nullptr"); 201 auto memRefType = memoryOp.getMemRefType(); 202 203 if (!memRefType.getLayout().isIdentity()) 204 return memoryOp.emitError("NYI: non-trivial layout map"), false; 205 206 int uniqueVaryingIndexAlongIv = -1; 207 auto accessMap = memoryOp.getAffineMap(); 208 SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands()); 209 unsigned numDims = accessMap.getNumDims(); 210 for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) { 211 // Gather map operands used in result expr 'i' in 'exprOperands'. 212 SmallVector<Value, 4> exprOperands; 213 auto resultExpr = accessMap.getResult(i); 214 resultExpr.walk([&](AffineExpr expr) { 215 if (auto dimExpr = dyn_cast<AffineDimExpr>(expr)) 216 exprOperands.push_back(mapOperands[dimExpr.getPosition()]); 217 else if (auto symExpr = dyn_cast<AffineSymbolExpr>(expr)) 218 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]); 219 }); 220 // Check access invariance of each operand in 'exprOperands'. 221 for (Value exprOperand : exprOperands) { 222 if (!isAccessIndexInvariant(iv, exprOperand)) { 223 if (uniqueVaryingIndexAlongIv != -1) { 224 // 2+ varying indices -> do not vectorize along iv. 225 return false; 226 } 227 uniqueVaryingIndexAlongIv = i; 228 } 229 } 230 } 231 232 if (uniqueVaryingIndexAlongIv == -1) 233 *memRefDim = -1; 234 else 235 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1); 236 return true; 237 } 238 239 template bool mlir::affine::isContiguousAccess(Value iv, 240 AffineReadOpInterface loadOp, 241 int *memRefDim); 242 template bool mlir::affine::isContiguousAccess(Value iv, 243 AffineWriteOpInterface loadOp, 244 int *memRefDim); 245 246 template <typename LoadOrStoreOp> 247 static bool isVectorElement(LoadOrStoreOp memoryOp) { 248 auto memRefType = memoryOp.getMemRefType(); 249 return isa<VectorType>(memRefType.getElementType()); 250 } 251 252 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>; 253 254 static bool 255 isVectorizableLoopBodyWithOpCond(AffineForOp loop, 256 const VectorizableOpFun &isVectorizableOp, 257 NestedPattern &vectorTransferMatcher) { 258 auto *forOp = loop.getOperation(); 259 260 // No vectorization across conditionals for now. 261 auto conditionals = matcher::If(); 262 SmallVector<NestedMatch, 8> conditionalsMatched; 263 conditionals.match(forOp, &conditionalsMatched); 264 if (!conditionalsMatched.empty()) { 265 return false; 266 } 267 268 // No vectorization for ops with operand or result types that are not 269 // vectorizable. 270 auto types = matcher::Op([](Operation &op) -> bool { 271 if (llvm::any_of(op.getOperandTypes(), [](Type type) { 272 if (MemRefType t = dyn_cast<MemRefType>(type)) 273 return !VectorType::isValidElementType(t.getElementType()); 274 return !VectorType::isValidElementType(type); 275 })) 276 return true; 277 return llvm::any_of(op.getResultTypes(), [](Type type) { 278 return !VectorType::isValidElementType(type); 279 }); 280 }); 281 SmallVector<NestedMatch, 8> opsMatched; 282 types.match(forOp, &opsMatched); 283 if (!opsMatched.empty()) { 284 return false; 285 } 286 287 // No vectorization across unknown regions. 288 auto regions = matcher::Op([](Operation &op) -> bool { 289 return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op); 290 }); 291 SmallVector<NestedMatch, 8> regionsMatched; 292 regions.match(forOp, ®ionsMatched); 293 if (!regionsMatched.empty()) { 294 return false; 295 } 296 297 SmallVector<NestedMatch, 8> vectorTransfersMatched; 298 vectorTransferMatcher.match(forOp, &vectorTransfersMatched); 299 if (!vectorTransfersMatched.empty()) { 300 return false; 301 } 302 303 auto loadAndStores = matcher::Op(matcher::isLoadOrStore); 304 SmallVector<NestedMatch, 8> loadAndStoresMatched; 305 loadAndStores.match(forOp, &loadAndStoresMatched); 306 for (auto ls : loadAndStoresMatched) { 307 auto *op = ls.getMatchedOperation(); 308 auto load = dyn_cast<AffineLoadOp>(op); 309 auto store = dyn_cast<AffineStoreOp>(op); 310 // Only scalar types are considered vectorizable, all load/store must be 311 // vectorizable for a loop to qualify as vectorizable. 312 // TODO: ponder whether we want to be more general here. 313 bool vector = load ? isVectorElement(load) : isVectorElement(store); 314 if (vector) { 315 return false; 316 } 317 if (isVectorizableOp && !isVectorizableOp(loop, *op)) { 318 return false; 319 } 320 } 321 return true; 322 } 323 324 bool mlir::affine::isVectorizableLoopBody( 325 AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) { 326 *memRefDim = -1; 327 VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) { 328 auto load = dyn_cast<AffineLoadOp>(op); 329 auto store = dyn_cast<AffineStoreOp>(op); 330 int thisOpMemRefDim = -1; 331 bool isContiguous = 332 load ? isContiguousAccess(loop.getInductionVar(), 333 cast<AffineReadOpInterface>(*load), 334 &thisOpMemRefDim) 335 : isContiguousAccess(loop.getInductionVar(), 336 cast<AffineWriteOpInterface>(*store), 337 &thisOpMemRefDim); 338 if (thisOpMemRefDim != -1) { 339 // If memory accesses vary across different dimensions then the loop is 340 // not vectorizable. 341 if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim) 342 return false; 343 *memRefDim = thisOpMemRefDim; 344 } 345 return isContiguous; 346 }); 347 return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher); 348 } 349 350 bool mlir::affine::isVectorizableLoopBody( 351 AffineForOp loop, NestedPattern &vectorTransferMatcher) { 352 return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher); 353 } 354 355 /// Checks whether SSA dominance would be violated if a for op's body 356 /// operations are shifted by the specified shifts. This method checks if a 357 /// 'def' and all its uses have the same shift factor. 358 // TODO: extend this to check for memory-based dependence violation when we have 359 // the support. 360 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp, 361 ArrayRef<uint64_t> shifts) { 362 auto *forBody = forOp.getBody(); 363 assert(shifts.size() == forBody->getOperations().size()); 364 365 // Work backwards over the body of the block so that the shift of a use's 366 // ancestor operation in the block gets recorded before it's looked up. 367 DenseMap<Operation *, uint64_t> forBodyShift; 368 for (const auto &it : 369 llvm::enumerate(llvm::reverse(forBody->getOperations()))) { 370 auto &op = it.value(); 371 372 // Get the index of the current operation, note that we are iterating in 373 // reverse so we need to fix it up. 374 size_t index = shifts.size() - it.index() - 1; 375 376 // Remember the shift of this operation. 377 uint64_t shift = shifts[index]; 378 forBodyShift.try_emplace(&op, shift); 379 380 // Validate the results of this operation if it were to be shifted. 381 for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) { 382 Value result = op.getResult(i); 383 for (auto *user : result.getUsers()) { 384 // If an ancestor operation doesn't lie in the block of forOp, 385 // there is no shift to check. 386 if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) { 387 assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map"); 388 if (shift != forBodyShift[ancOp]) 389 return false; 390 } 391 } 392 } 393 } 394 return true; 395 } 396 397 bool mlir::affine::isTilingValid(ArrayRef<AffineForOp> loops) { 398 assert(!loops.empty() && "no original loops provided"); 399 400 // We first find out all dependences we intend to check. 401 SmallVector<Operation *, 8> loadAndStoreOps; 402 loops[0]->walk([&](Operation *op) { 403 if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) 404 loadAndStoreOps.push_back(op); 405 }); 406 407 unsigned numOps = loadAndStoreOps.size(); 408 unsigned numLoops = loops.size(); 409 for (unsigned d = 1; d <= numLoops + 1; ++d) { 410 for (unsigned i = 0; i < numOps; ++i) { 411 Operation *srcOp = loadAndStoreOps[i]; 412 MemRefAccess srcAccess(srcOp); 413 for (unsigned j = 0; j < numOps; ++j) { 414 Operation *dstOp = loadAndStoreOps[j]; 415 MemRefAccess dstAccess(dstOp); 416 417 SmallVector<DependenceComponent, 2> depComps; 418 DependenceResult result = checkMemrefAccessDependence( 419 srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr, 420 &depComps); 421 422 // Skip if there is no dependence in this case. 423 if (!hasDependence(result)) 424 continue; 425 426 // Check whether there is any negative direction vector in the 427 // dependence components found above, which means that dependence is 428 // violated by the default hyper-rect tiling method. 429 LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated " 430 "for dependence at depth: " 431 << Twine(d) << " between:\n";); 432 LLVM_DEBUG(srcAccess.opInst->dump()); 433 LLVM_DEBUG(dstAccess.opInst->dump()); 434 for (const DependenceComponent &depComp : depComps) { 435 if (depComp.lb.has_value() && depComp.ub.has_value() && 436 *depComp.lb < *depComp.ub && *depComp.ub < 0) { 437 LLVM_DEBUG(llvm::dbgs() 438 << "Dependence component lb = " << Twine(*depComp.lb) 439 << " ub = " << Twine(*depComp.ub) 440 << " is negative at depth: " << Twine(d) 441 << " and thus violates the legality rule.\n"); 442 return false; 443 } 444 } 445 } 446 } 447 } 448 449 return true; 450 } 451