xref: /llvm-project/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp (revision fe04aafe6c27f32ad4ba38e552d06d14431cb2de)
1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous loop analysis routines.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
14 
15 #include "mlir/Analysis/SliceAnalysis.h"
16 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
18 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
19 #include "mlir/Dialect/Affine/IR/AffineOps.h"
20 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
21 #include "llvm/Support/MathExtras.h"
22 
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/Support/Debug.h"
27 #include <numeric>
28 #include <optional>
29 #include <type_traits>
30 
31 using namespace mlir;
32 using namespace mlir::affine;
33 
34 #define DEBUG_TYPE "affine-loop-analysis"
35 
36 /// Returns the trip count of the loop as an affine expression if the latter is
37 /// expressible as an affine expression, and nullptr otherwise. The trip count
38 /// expression is simplified before returning. This method only utilizes map
39 /// composition to construct lower and upper bounds before computing the trip
40 /// count expressions.
41 void mlir::affine::getTripCountMapAndOperands(
42     AffineForOp forOp, AffineMap *tripCountMap,
43     SmallVectorImpl<Value> *tripCountOperands) {
44   MLIRContext *context = forOp.getContext();
45   int64_t step = forOp.getStepAsInt();
46   int64_t loopSpan;
47   if (forOp.hasConstantBounds()) {
48     int64_t lb = forOp.getConstantLowerBound();
49     int64_t ub = forOp.getConstantUpperBound();
50     loopSpan = ub - lb;
51     if (loopSpan < 0)
52       loopSpan = 0;
53     *tripCountMap = AffineMap::getConstantMap(
54         llvm::divideCeilSigned(loopSpan, step), context);
55     tripCountOperands->clear();
56     return;
57   }
58   auto lbMap = forOp.getLowerBoundMap();
59   auto ubMap = forOp.getUpperBoundMap();
60   if (lbMap.getNumResults() != 1) {
61     *tripCountMap = AffineMap();
62     return;
63   }
64 
65   // Difference of each upper bound expression from the single lower bound
66   // expression (divided by the step) provides the expressions for the trip
67   // count map.
68   AffineValueMap ubValueMap(ubMap, forOp.getUpperBoundOperands());
69 
70   SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
71                                          lbMap.getResult(0));
72   auto lbMapSplat = AffineMap::get(lbMap.getNumDims(), lbMap.getNumSymbols(),
73                                    lbSplatExpr, context);
74   AffineValueMap lbSplatValueMap(lbMapSplat, forOp.getLowerBoundOperands());
75 
76   AffineValueMap tripCountValueMap;
77   AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
78   for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
79     tripCountValueMap.setResult(i,
80                                 tripCountValueMap.getResult(i).ceilDiv(step));
81 
82   *tripCountMap = tripCountValueMap.getAffineMap();
83   tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
84                             tripCountValueMap.getOperands().end());
85 }
86 
87 /// Returns the trip count of the loop if it's a constant, std::nullopt
88 /// otherwise. This method uses affine expression analysis (in turn using
89 /// getTripCount) and is able to determine constant trip count in non-trivial
90 /// cases.
91 std::optional<uint64_t> mlir::affine::getConstantTripCount(AffineForOp forOp) {
92   SmallVector<Value, 4> operands;
93   AffineMap map;
94   getTripCountMapAndOperands(forOp, &map, &operands);
95 
96   if (!map)
97     return std::nullopt;
98 
99   // Take the min if all trip counts are constant.
100   std::optional<uint64_t> tripCount;
101   for (auto resultExpr : map.getResults()) {
102     if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
103       if (tripCount.has_value())
104         tripCount =
105             std::min(*tripCount, static_cast<uint64_t>(constExpr.getValue()));
106       else
107         tripCount = constExpr.getValue();
108     } else
109       return std::nullopt;
110   }
111   return tripCount;
112 }
113 
114 /// Returns the greatest known integral divisor of the trip count. Affine
115 /// expression analysis is used (indirectly through getTripCount), and
116 /// this method is thus able to determine non-trivial divisors.
117 uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
118   SmallVector<Value, 4> operands;
119   AffineMap map;
120   getTripCountMapAndOperands(forOp, &map, &operands);
121 
122   if (!map)
123     return 1;
124 
125   // The largest divisor of the trip count is the GCD of the individual largest
126   // divisors.
127   assert(map.getNumResults() >= 1 && "expected one or more results");
128   std::optional<uint64_t> gcd;
129   for (auto resultExpr : map.getResults()) {
130     uint64_t thisGcd;
131     if (auto constExpr = dyn_cast<AffineConstantExpr>(resultExpr)) {
132       uint64_t tripCount = constExpr.getValue();
133       // 0 iteration loops (greatest divisor is 2^64 - 1).
134       if (tripCount == 0)
135         thisGcd = std::numeric_limits<uint64_t>::max();
136       else
137         // The greatest divisor is the trip count.
138         thisGcd = tripCount;
139     } else {
140       // Trip count is not a known constant; return its largest known divisor.
141       thisGcd = resultExpr.getLargestKnownDivisor();
142     }
143     if (gcd.has_value())
144       gcd = std::gcd(*gcd, thisGcd);
145     else
146       gcd = thisGcd;
147   }
148   assert(gcd.has_value() && "value expected per above logic");
149   return *gcd;
150 }
151 
152 /// Given an affine.for `iv` and an access `index` of type index, returns `true`
153 /// if `index` is independent of `iv` and false otherwise.
154 ///
155 /// Prerequisites: `iv` and `index` of the proper type;
156 static bool isAccessIndexInvariant(Value iv, Value index) {
157   assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
158   assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
159   auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
160   SmallVector<Value> operands = {index};
161   AffineValueMap avm(map, operands);
162   avm.composeSimplifyAndCanonicalize();
163   return !avm.isFunctionOf(0, iv);
164 }
165 
166 // Pre-requisite: Loop bounds should be in canonical form.
167 template <typename LoadOrStoreOp>
168 bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
169   AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
170   avm.composeSimplifyAndCanonicalize();
171   return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
172 }
173 
174 // Explicitly instantiate the template so that the compiler knows we need them.
175 template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
176                                               AffineForOp);
177 template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
178                                               AffineForOp);
179 template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
180 template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);
181 
182 DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
183                                                    ArrayRef<Value> indices) {
184   DenseSet<Value> res;
185   for (auto val : indices) {
186     if (isAccessIndexInvariant(iv, val)) {
187       res.insert(val);
188     }
189   }
190   return res;
191 }
192 
193 // TODO: check access stride.
194 template <typename LoadOrStoreOp>
195 bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
196                                       int *memRefDim) {
197   static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
198                                 AffineWriteOpInterface>::value,
199                 "Must be called on either an affine read or write op");
200   assert(memRefDim && "memRefDim == nullptr");
201   auto memRefType = memoryOp.getMemRefType();
202 
203   if (!memRefType.getLayout().isIdentity())
204     return memoryOp.emitError("NYI: non-trivial layout map"), false;
205 
206   int uniqueVaryingIndexAlongIv = -1;
207   auto accessMap = memoryOp.getAffineMap();
208   SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands());
209   unsigned numDims = accessMap.getNumDims();
210   for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
211     // Gather map operands used in result expr 'i' in 'exprOperands'.
212     SmallVector<Value, 4> exprOperands;
213     auto resultExpr = accessMap.getResult(i);
214     resultExpr.walk([&](AffineExpr expr) {
215       if (auto dimExpr = dyn_cast<AffineDimExpr>(expr))
216         exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
217       else if (auto symExpr = dyn_cast<AffineSymbolExpr>(expr))
218         exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
219     });
220     // Check access invariance of each operand in 'exprOperands'.
221     for (Value exprOperand : exprOperands) {
222       if (!isAccessIndexInvariant(iv, exprOperand)) {
223         if (uniqueVaryingIndexAlongIv != -1) {
224           // 2+ varying indices -> do not vectorize along iv.
225           return false;
226         }
227         uniqueVaryingIndexAlongIv = i;
228       }
229     }
230   }
231 
232   if (uniqueVaryingIndexAlongIv == -1)
233     *memRefDim = -1;
234   else
235     *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
236   return true;
237 }
238 
239 template bool mlir::affine::isContiguousAccess(Value iv,
240                                                AffineReadOpInterface loadOp,
241                                                int *memRefDim);
242 template bool mlir::affine::isContiguousAccess(Value iv,
243                                                AffineWriteOpInterface loadOp,
244                                                int *memRefDim);
245 
246 template <typename LoadOrStoreOp>
247 static bool isVectorElement(LoadOrStoreOp memoryOp) {
248   auto memRefType = memoryOp.getMemRefType();
249   return isa<VectorType>(memRefType.getElementType());
250 }
251 
252 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
253 
254 static bool
255 isVectorizableLoopBodyWithOpCond(AffineForOp loop,
256                                  const VectorizableOpFun &isVectorizableOp,
257                                  NestedPattern &vectorTransferMatcher) {
258   auto *forOp = loop.getOperation();
259 
260   // No vectorization across conditionals for now.
261   auto conditionals = matcher::If();
262   SmallVector<NestedMatch, 8> conditionalsMatched;
263   conditionals.match(forOp, &conditionalsMatched);
264   if (!conditionalsMatched.empty()) {
265     return false;
266   }
267 
268   // No vectorization for ops with operand or result types that are not
269   // vectorizable.
270   auto types = matcher::Op([](Operation &op) -> bool {
271     if (llvm::any_of(op.getOperandTypes(), [](Type type) {
272           if (MemRefType t = dyn_cast<MemRefType>(type))
273             return !VectorType::isValidElementType(t.getElementType());
274           return !VectorType::isValidElementType(type);
275         }))
276       return true;
277     return llvm::any_of(op.getResultTypes(), [](Type type) {
278       return !VectorType::isValidElementType(type);
279     });
280   });
281   SmallVector<NestedMatch, 8> opsMatched;
282   types.match(forOp, &opsMatched);
283   if (!opsMatched.empty()) {
284     return false;
285   }
286 
287   // No vectorization across unknown regions.
288   auto regions = matcher::Op([](Operation &op) -> bool {
289     return op.getNumRegions() != 0 && !isa<AffineIfOp, AffineForOp>(op);
290   });
291   SmallVector<NestedMatch, 8> regionsMatched;
292   regions.match(forOp, &regionsMatched);
293   if (!regionsMatched.empty()) {
294     return false;
295   }
296 
297   SmallVector<NestedMatch, 8> vectorTransfersMatched;
298   vectorTransferMatcher.match(forOp, &vectorTransfersMatched);
299   if (!vectorTransfersMatched.empty()) {
300     return false;
301   }
302 
303   auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
304   SmallVector<NestedMatch, 8> loadAndStoresMatched;
305   loadAndStores.match(forOp, &loadAndStoresMatched);
306   for (auto ls : loadAndStoresMatched) {
307     auto *op = ls.getMatchedOperation();
308     auto load = dyn_cast<AffineLoadOp>(op);
309     auto store = dyn_cast<AffineStoreOp>(op);
310     // Only scalar types are considered vectorizable, all load/store must be
311     // vectorizable for a loop to qualify as vectorizable.
312     // TODO: ponder whether we want to be more general here.
313     bool vector = load ? isVectorElement(load) : isVectorElement(store);
314     if (vector) {
315       return false;
316     }
317     if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
318       return false;
319     }
320   }
321   return true;
322 }
323 
324 bool mlir::affine::isVectorizableLoopBody(
325     AffineForOp loop, int *memRefDim, NestedPattern &vectorTransferMatcher) {
326   *memRefDim = -1;
327   VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
328     auto load = dyn_cast<AffineLoadOp>(op);
329     auto store = dyn_cast<AffineStoreOp>(op);
330     int thisOpMemRefDim = -1;
331     bool isContiguous =
332         load ? isContiguousAccess(loop.getInductionVar(),
333                                   cast<AffineReadOpInterface>(*load),
334                                   &thisOpMemRefDim)
335              : isContiguousAccess(loop.getInductionVar(),
336                                   cast<AffineWriteOpInterface>(*store),
337                                   &thisOpMemRefDim);
338     if (thisOpMemRefDim != -1) {
339       // If memory accesses vary across different dimensions then the loop is
340       // not vectorizable.
341       if (*memRefDim != -1 && *memRefDim != thisOpMemRefDim)
342         return false;
343       *memRefDim = thisOpMemRefDim;
344     }
345     return isContiguous;
346   });
347   return isVectorizableLoopBodyWithOpCond(loop, fun, vectorTransferMatcher);
348 }
349 
350 bool mlir::affine::isVectorizableLoopBody(
351     AffineForOp loop, NestedPattern &vectorTransferMatcher) {
352   return isVectorizableLoopBodyWithOpCond(loop, nullptr, vectorTransferMatcher);
353 }
354 
355 /// Checks whether SSA dominance would be violated if a for op's body
356 /// operations are shifted by the specified shifts. This method checks if a
357 /// 'def' and all its uses have the same shift factor.
358 // TODO: extend this to check for memory-based dependence violation when we have
359 // the support.
360 bool mlir::affine::isOpwiseShiftValid(AffineForOp forOp,
361                                       ArrayRef<uint64_t> shifts) {
362   auto *forBody = forOp.getBody();
363   assert(shifts.size() == forBody->getOperations().size());
364 
365   // Work backwards over the body of the block so that the shift of a use's
366   // ancestor operation in the block gets recorded before it's looked up.
367   DenseMap<Operation *, uint64_t> forBodyShift;
368   for (const auto &it :
369        llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
370     auto &op = it.value();
371 
372     // Get the index of the current operation, note that we are iterating in
373     // reverse so we need to fix it up.
374     size_t index = shifts.size() - it.index() - 1;
375 
376     // Remember the shift of this operation.
377     uint64_t shift = shifts[index];
378     forBodyShift.try_emplace(&op, shift);
379 
380     // Validate the results of this operation if it were to be shifted.
381     for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
382       Value result = op.getResult(i);
383       for (auto *user : result.getUsers()) {
384         // If an ancestor operation doesn't lie in the block of forOp,
385         // there is no shift to check.
386         if (auto *ancOp = forBody->findAncestorOpInBlock(*user)) {
387           assert(forBodyShift.count(ancOp) > 0 && "ancestor expected in map");
388           if (shift != forBodyShift[ancOp])
389             return false;
390         }
391       }
392     }
393   }
394   return true;
395 }
396 
397 bool mlir::affine::isTilingValid(ArrayRef<AffineForOp> loops) {
398   assert(!loops.empty() && "no original loops provided");
399 
400   // We first find out all dependences we intend to check.
401   SmallVector<Operation *, 8> loadAndStoreOps;
402   loops[0]->walk([&](Operation *op) {
403     if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
404       loadAndStoreOps.push_back(op);
405   });
406 
407   unsigned numOps = loadAndStoreOps.size();
408   unsigned numLoops = loops.size();
409   for (unsigned d = 1; d <= numLoops + 1; ++d) {
410     for (unsigned i = 0; i < numOps; ++i) {
411       Operation *srcOp = loadAndStoreOps[i];
412       MemRefAccess srcAccess(srcOp);
413       for (unsigned j = 0; j < numOps; ++j) {
414         Operation *dstOp = loadAndStoreOps[j];
415         MemRefAccess dstAccess(dstOp);
416 
417         SmallVector<DependenceComponent, 2> depComps;
418         DependenceResult result = checkMemrefAccessDependence(
419             srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
420             &depComps);
421 
422         // Skip if there is no dependence in this case.
423         if (!hasDependence(result))
424           continue;
425 
426         // Check whether there is any negative direction vector in the
427         // dependence components found above, which means that dependence is
428         // violated by the default hyper-rect tiling method.
429         LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
430                                    "for dependence at depth: "
431                                 << Twine(d) << " between:\n";);
432         LLVM_DEBUG(srcAccess.opInst->dump());
433         LLVM_DEBUG(dstAccess.opInst->dump());
434         for (const DependenceComponent &depComp : depComps) {
435           if (depComp.lb.has_value() && depComp.ub.has_value() &&
436               *depComp.lb < *depComp.ub && *depComp.ub < 0) {
437             LLVM_DEBUG(llvm::dbgs()
438                        << "Dependence component lb = " << Twine(*depComp.lb)
439                        << " ub = " << Twine(*depComp.ub)
440                        << " is negative  at depth: " << Twine(d)
441                        << " and thus violates the legality rule.\n");
442             return false;
443           }
444         }
445       }
446     }
447   }
448 
449   return true;
450 }
451