xref: /llvm-project/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp (revision 24da7fa029f999c0faf5c90de351237a273f385f)
1 //===- AffineAnalysis.cpp - Affine structures analysis routines -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements miscellaneous analysis routines for affine structures
10 // (expressions, maps, sets), and other utilities relying on such analysis.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
15 #include "mlir/Analysis/Presburger/IntegerRelation.h"
16 #include "mlir/Analysis/Presburger/PresburgerSpace.h"
17 #include "mlir/Analysis/SliceAnalysis.h"
18 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
19 #include "mlir/Dialect/Affine/Analysis/Utils.h"
20 #include "mlir/Dialect/Affine/IR/AffineOps.h"
21 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
22 #include "mlir/IR/AffineExprVisitor.h"
23 #include "mlir/IR/BuiltinOps.h"
24 #include "mlir/IR/IntegerSet.h"
25 #include "mlir/Interfaces/SideEffectInterfaces.h"
26 #include "mlir/Interfaces/ViewLikeInterface.h"
27 #include "llvm/ADT/TypeSwitch.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <optional>
31 
32 #define DEBUG_TYPE "affine-analysis"
33 
34 using namespace mlir;
35 using namespace affine;
36 using namespace presburger;
37 
38 /// Get the value that is being reduced by `pos`-th reduction in the loop if
39 /// such a reduction can be performed by affine parallel loops. This assumes
40 /// floating-point operations are commutative. On success, `kind` will be the
41 /// reduction kind suitable for use in affine parallel loop builder. If the
42 /// reduction is not supported, returns null.
getSupportedReduction(AffineForOp forOp,unsigned pos,arith::AtomicRMWKind & kind)43 static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
44                                    arith::AtomicRMWKind &kind) {
45   SmallVector<Operation *> combinerOps;
46   Value reducedVal =
47       matchReduction(forOp.getRegionIterArgs(), pos, combinerOps);
48   if (!reducedVal)
49     return nullptr;
50 
51   // Expected only one combiner operation.
52   if (combinerOps.size() > 1)
53     return nullptr;
54 
55   Operation *combinerOp = combinerOps.back();
56   std::optional<arith::AtomicRMWKind> maybeKind =
57       TypeSwitch<Operation *, std::optional<arith::AtomicRMWKind>>(combinerOp)
58           .Case([](arith::AddFOp) { return arith::AtomicRMWKind::addf; })
59           .Case([](arith::MulFOp) { return arith::AtomicRMWKind::mulf; })
60           .Case([](arith::AddIOp) { return arith::AtomicRMWKind::addi; })
61           .Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
62           .Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
63           .Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
64           .Case(
65               [](arith::MinimumFOp) { return arith::AtomicRMWKind::minimumf; })
66           .Case(
67               [](arith::MaximumFOp) { return arith::AtomicRMWKind::maximumf; })
68           .Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
69           .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
70           .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
71           .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
72           .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
73             // TODO: AtomicRMW supports other kinds of reductions this is
74             // currently not detecting, add those when the need arises.
75             return std::nullopt;
76           });
77   if (!maybeKind)
78     return nullptr;
79 
80   kind = *maybeKind;
81   return reducedVal;
82 }
83 
84 /// Populate `supportedReductions` with descriptors of the supported reductions.
getSupportedReductions(AffineForOp forOp,SmallVectorImpl<LoopReduction> & supportedReductions)85 void mlir::affine::getSupportedReductions(
86     AffineForOp forOp, SmallVectorImpl<LoopReduction> &supportedReductions) {
87   unsigned numIterArgs = forOp.getNumIterOperands();
88   if (numIterArgs == 0)
89     return;
90   supportedReductions.reserve(numIterArgs);
91   for (unsigned i = 0; i < numIterArgs; ++i) {
92     arith::AtomicRMWKind kind;
93     if (Value value = getSupportedReduction(forOp, i, kind))
94       supportedReductions.emplace_back(LoopReduction{kind, i, value});
95   }
96 }
97 
98 /// Returns true if `forOp' is a parallel loop. If `parallelReductions` is
99 /// provided, populates it with descriptors of the parallelizable reductions and
100 /// treats them as not preventing parallelization.
isLoopParallel(AffineForOp forOp,SmallVectorImpl<LoopReduction> * parallelReductions)101 bool mlir::affine::isLoopParallel(
102     AffineForOp forOp, SmallVectorImpl<LoopReduction> *parallelReductions) {
103   unsigned numIterArgs = forOp.getNumIterOperands();
104 
105   // Loop is not parallel if it has SSA loop-carried dependences and reduction
106   // detection is not requested.
107   if (numIterArgs > 0 && !parallelReductions)
108     return false;
109 
110   // Find supported reductions of requested.
111   if (parallelReductions) {
112     getSupportedReductions(forOp, *parallelReductions);
113     // Return later to allow for identifying all parallel reductions even if the
114     // loop is not parallel.
115     if (parallelReductions->size() != numIterArgs)
116       return false;
117   }
118 
119   // Check memory dependences.
120   return isLoopMemoryParallel(forOp);
121 }
122 
123 /// Returns true if `v` is allocated locally to `enclosingOp` -- i.e., it is
124 /// allocated by an operation nested within `enclosingOp`.
isLocallyDefined(Value v,Operation * enclosingOp)125 static bool isLocallyDefined(Value v, Operation *enclosingOp) {
126   Operation *defOp = v.getDefiningOp();
127   if (!defOp)
128     return false;
129 
130   if (hasSingleEffect<MemoryEffects::Allocate>(defOp, v) &&
131       enclosingOp->isProperAncestor(defOp))
132     return true;
133 
134   // Aliasing ops.
135   auto viewOp = dyn_cast<ViewLikeOpInterface>(defOp);
136   return viewOp && isLocallyDefined(viewOp.getViewSource(), enclosingOp);
137 }
138 
isLoopMemoryParallel(AffineForOp forOp)139 bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) {
140   // Any memref-typed iteration arguments are treated as serializing.
141   if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred<BaseMemRefType>))
142     return false;
143 
144   // Collect all load and store ops in loop nest rooted at 'forOp'.
145   SmallVector<Operation *, 8> loadAndStoreOps;
146   auto walkResult = forOp.walk([&](Operation *op) -> WalkResult {
147     if (auto readOp = dyn_cast<AffineReadOpInterface>(op)) {
148       // Memrefs that are allocated inside `forOp` need not be considered.
149       if (!isLocallyDefined(readOp.getMemRef(), forOp))
150         loadAndStoreOps.push_back(op);
151     } else if (auto writeOp = dyn_cast<AffineWriteOpInterface>(op)) {
152       // Filter out stores the same way as above.
153       if (!isLocallyDefined(writeOp.getMemRef(), forOp))
154         loadAndStoreOps.push_back(op);
155     } else if (!isa<AffineForOp, AffineYieldOp, AffineIfOp>(op) &&
156                !hasSingleEffect<MemoryEffects::Allocate>(op) &&
157                !isMemoryEffectFree(op)) {
158       // Alloc-like ops inside `forOp` are fine (they don't impact parallelism)
159       // as long as they don't escape the loop (which has been checked above).
160       return WalkResult::interrupt();
161     }
162 
163     return WalkResult::advance();
164   });
165 
166   // Stop early if the loop has unknown ops with side effects.
167   if (walkResult.wasInterrupted())
168     return false;
169 
170   // Dep check depth would be number of enclosing loops + 1.
171   unsigned depth = getNestingDepth(forOp) + 1;
172 
173   // Check dependences between all pairs of ops in 'loadAndStoreOps'.
174   for (auto *srcOp : loadAndStoreOps) {
175     MemRefAccess srcAccess(srcOp);
176     for (auto *dstOp : loadAndStoreOps) {
177       MemRefAccess dstAccess(dstOp);
178       DependenceResult result =
179           checkMemrefAccessDependence(srcAccess, dstAccess, depth);
180       if (result.value != DependenceResult::NoDependence)
181         return false;
182     }
183   }
184   return true;
185 }
186 
187 /// Returns the sequence of AffineApplyOp Operations operation in
188 /// 'affineApplyOps', which are reachable via a search starting from 'operands',
189 /// and ending at operands which are not defined by AffineApplyOps.
190 // TODO: Add a method to AffineApplyOp which forward substitutes the
191 // AffineApplyOp into any user AffineApplyOps.
getReachableAffineApplyOps(ArrayRef<Value> operands,SmallVectorImpl<Operation * > & affineApplyOps)192 void mlir::affine::getReachableAffineApplyOps(
193     ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) {
194   struct State {
195     // The ssa value for this node in the DFS traversal.
196     Value value;
197     // The operand index of 'value' to explore next during DFS traversal.
198     unsigned operandIndex;
199   };
200   SmallVector<State, 4> worklist;
201   for (auto operand : operands) {
202     worklist.push_back({operand, 0});
203   }
204 
205   while (!worklist.empty()) {
206     State &state = worklist.back();
207     auto *opInst = state.value.getDefiningOp();
208     // Note: getDefiningOp will return nullptr if the operand is not an
209     // Operation (i.e. block argument), which is a terminator for the search.
210     if (!isa_and_nonnull<AffineApplyOp>(opInst)) {
211       worklist.pop_back();
212       continue;
213     }
214 
215     if (state.operandIndex == 0) {
216       // Pre-Visit: Add 'opInst' to reachable sequence.
217       affineApplyOps.push_back(opInst);
218     }
219     if (state.operandIndex < opInst->getNumOperands()) {
220       // Visit: Add next 'affineApplyOp' operand to worklist.
221       // Get next operand to visit at 'operandIndex'.
222       auto nextOperand = opInst->getOperand(state.operandIndex);
223       // Increment 'operandIndex' in 'state'.
224       ++state.operandIndex;
225       // Add 'nextOperand' to worklist.
226       worklist.push_back({nextOperand, 0});
227     } else {
228       // Post-visit: done visiting operands AffineApplyOp, pop off stack.
229       worklist.pop_back();
230     }
231   }
232 }
233 
234 // Builds a system of constraints with dimensional variables corresponding to
235 // the loop IVs of the forOps appearing in that order. Any symbols founds in
236 // the bound operands are added as symbols in the system. Returns failure for
237 // the yet unimplemented cases.
238 // TODO: Handle non-unit steps through local variables or stride information in
239 // FlatAffineValueConstraints. (For eg., by using iv - lb % step = 0 and/or by
240 // introducing a method in FlatAffineValueConstraints
241 // setExprStride(ArrayRef<int64_t> expr, int64_t stride)
getIndexSet(MutableArrayRef<Operation * > ops,FlatAffineValueConstraints * domain)242 LogicalResult mlir::affine::getIndexSet(MutableArrayRef<Operation *> ops,
243                                         FlatAffineValueConstraints *domain) {
244   SmallVector<Value, 4> indices;
245   SmallVector<Operation *, 8> loopOps;
246   size_t numDims = 0;
247   for (Operation *op : ops) {
248     if (!isa<AffineForOp, AffineIfOp, AffineParallelOp>(op)) {
249       LLVM_DEBUG(llvm::dbgs() << "getIndexSet only handles affine.for/if/"
250                                  "parallel ops");
251       return failure();
252     }
253     if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
254       loopOps.push_back(forOp);
255       // An AffineForOp retains only 1 induction variable.
256       numDims += 1;
257     } else if (AffineParallelOp parallelOp = dyn_cast<AffineParallelOp>(op)) {
258       loopOps.push_back(parallelOp);
259       numDims += parallelOp.getNumDims();
260     }
261   }
262   extractInductionVars(loopOps, indices);
263   // Reset while associating Values in 'indices' to the domain.
264   *domain = FlatAffineValueConstraints(numDims, /*numSymbols=*/0,
265                                        /*numLocals=*/0, indices);
266   for (Operation *op : ops) {
267     // Add constraints from forOp's bounds.
268     if (AffineForOp forOp = dyn_cast<AffineForOp>(op)) {
269       if (failed(domain->addAffineForOpDomain(forOp)))
270         return failure();
271     } else if (auto ifOp = dyn_cast<AffineIfOp>(op)) {
272       domain->addAffineIfOpDomain(ifOp);
273     } else if (auto parallelOp = dyn_cast<AffineParallelOp>(op))
274       if (failed(domain->addAffineParallelOpDomain(parallelOp)))
275         return failure();
276   }
277   return success();
278 }
279 
280 /// Computes the iteration domain for 'op' and populates 'indexSet', which
281 /// encapsulates the constraints involving loops surrounding 'op' and
282 /// potentially involving any Function symbols. The dimensional variables in
283 /// 'indexSet' correspond to the loops surrounding 'op' from outermost to
284 /// innermost.
getOpIndexSet(Operation * op,FlatAffineValueConstraints * indexSet)285 static LogicalResult getOpIndexSet(Operation *op,
286                                    FlatAffineValueConstraints *indexSet) {
287   SmallVector<Operation *, 4> ops;
288   getEnclosingAffineOps(*op, &ops);
289   return getIndexSet(ops, indexSet);
290 }
291 
292 // Returns the number of outer loop common to 'src/dstDomain'.
293 // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null.
294 static unsigned
getNumCommonLoops(const FlatAffineValueConstraints & srcDomain,const FlatAffineValueConstraints & dstDomain,SmallVectorImpl<AffineForOp> * commonLoops=nullptr)295 getNumCommonLoops(const FlatAffineValueConstraints &srcDomain,
296                   const FlatAffineValueConstraints &dstDomain,
297                   SmallVectorImpl<AffineForOp> *commonLoops = nullptr) {
298   // Find the number of common loops shared by src and dst accesses.
299   unsigned minNumLoops =
300       std::min(srcDomain.getNumDimVars(), dstDomain.getNumDimVars());
301   unsigned numCommonLoops = 0;
302   for (unsigned i = 0; i < minNumLoops; ++i) {
303     if ((!isAffineForInductionVar(srcDomain.getValue(i)) &&
304          !isAffineParallelInductionVar(srcDomain.getValue(i))) ||
305         (!isAffineForInductionVar(dstDomain.getValue(i)) &&
306          !isAffineParallelInductionVar(dstDomain.getValue(i))) ||
307         srcDomain.getValue(i) != dstDomain.getValue(i))
308       break;
309     if (commonLoops != nullptr)
310       commonLoops->push_back(getForInductionVarOwner(srcDomain.getValue(i)));
311     ++numCommonLoops;
312   }
313   if (commonLoops != nullptr)
314     assert(commonLoops->size() == numCommonLoops);
315   return numCommonLoops;
316 }
317 
318 /// Returns the closest surrounding block common to `opA` and `opB`. `opA` and
319 /// `opB` should be in the same affine scope. Returns nullptr if such a block
320 /// does not exist (when the two ops are in different blocks of an op starting
321 /// an `AffineScope`).
getCommonBlockInAffineScope(Operation * opA,Operation * opB)322 static Block *getCommonBlockInAffineScope(Operation *opA, Operation *opB) {
323   // Get the chain of ancestor blocks for the given `MemRefAccess` instance. The
324   // chain extends up to and includnig an op that starts an affine scope.
325   auto getChainOfAncestorBlocks =
326       [&](Operation *op, SmallVectorImpl<Block *> &ancestorBlocks) {
327         Block *currBlock = op->getBlock();
328         // Loop terminates when the currBlock is nullptr or its parent operation
329         // holds an affine scope.
330         while (currBlock &&
331                !currBlock->getParentOp()->hasTrait<OpTrait::AffineScope>()) {
332           ancestorBlocks.push_back(currBlock);
333           currBlock = currBlock->getParentOp()->getBlock();
334         }
335         assert(currBlock &&
336                "parent op starting an affine scope is always expected");
337         ancestorBlocks.push_back(currBlock);
338       };
339 
340   // Find the closest common block.
341   SmallVector<Block *, 4> srcAncestorBlocks, dstAncestorBlocks;
342   getChainOfAncestorBlocks(opA, srcAncestorBlocks);
343   getChainOfAncestorBlocks(opB, dstAncestorBlocks);
344 
345   Block *commonBlock = nullptr;
346   for (int i = srcAncestorBlocks.size() - 1, j = dstAncestorBlocks.size() - 1;
347        i >= 0 && j >= 0 && srcAncestorBlocks[i] == dstAncestorBlocks[j];
348        i--, j--)
349     commonBlock = srcAncestorBlocks[i];
350 
351   return commonBlock;
352 }
353 
354 /// Returns true if the ancestor operation of 'srcAccess' appears before the
355 /// ancestor operation of 'dstAccess' in their common ancestral block. The
356 /// operations for `srcAccess` and `dstAccess` are expected to be in the same
357 /// affine scope and have a common surrounding block within it.
srcAppearsBeforeDstInAncestralBlock(const MemRefAccess & srcAccess,const MemRefAccess & dstAccess)358 static bool srcAppearsBeforeDstInAncestralBlock(const MemRefAccess &srcAccess,
359                                                 const MemRefAccess &dstAccess) {
360   // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'.
361   Block *commonBlock =
362       getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst);
363   assert(commonBlock &&
364          "ops expected to have a common surrounding block in affine scope");
365 
366   // Check the dominance relationship between the respective ancestors of the
367   // src and dst in the Block of the innermost among the common loops.
368   Operation *srcOp = commonBlock->findAncestorOpInBlock(*srcAccess.opInst);
369   assert(srcOp && "src access op must lie in common block");
370   Operation *dstOp = commonBlock->findAncestorOpInBlock(*dstAccess.opInst);
371   assert(dstOp && "dest access op must lie in common block");
372 
373   // Determine whether dstOp comes after srcOp.
374   return srcOp->isBeforeInBlock(dstOp);
375 }
376 
377 // Adds ordering constraints to 'dependenceDomain' based on number of loops
378 // common to 'src/dstDomain' and requested 'loopDepth'.
379 // Note that 'loopDepth' cannot exceed the number of common loops plus one.
380 // EX: Given a loop nest of depth 2 with IVs 'i' and 'j':
381 // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1
382 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1
383 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j'
addOrderingConstraints(const FlatAffineValueConstraints & srcDomain,const FlatAffineValueConstraints & dstDomain,unsigned loopDepth,IntegerRelation * dependenceDomain)384 static void addOrderingConstraints(const FlatAffineValueConstraints &srcDomain,
385                                    const FlatAffineValueConstraints &dstDomain,
386                                    unsigned loopDepth,
387                                    IntegerRelation *dependenceDomain) {
388   unsigned numCols = dependenceDomain->getNumCols();
389   SmallVector<int64_t, 4> eq(numCols);
390   unsigned numSrcDims = srcDomain.getNumDimVars();
391   unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
392   unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth);
393   for (unsigned i = 0; i < numCommonLoopConstraints; ++i) {
394     std::fill(eq.begin(), eq.end(), 0);
395     eq[i] = -1;
396     eq[i + numSrcDims] = 1;
397     if (i == loopDepth - 1) {
398       eq[numCols - 1] = -1;
399       dependenceDomain->addInequality(eq);
400     } else {
401       dependenceDomain->addEquality(eq);
402     }
403   }
404 }
405 
406 // Computes distance and direction vectors in 'dependences', by adding
407 // variables to 'dependenceDomain' which represent the difference of the IVs,
408 // eliminating all other variables, and reading off distance vectors from
409 // equality constraints (if possible), and direction vectors from inequalities.
computeDirectionVector(const FlatAffineValueConstraints & srcDomain,const FlatAffineValueConstraints & dstDomain,unsigned loopDepth,IntegerPolyhedron * dependenceDomain,SmallVector<DependenceComponent,2> * dependenceComponents)410 static void computeDirectionVector(
411     const FlatAffineValueConstraints &srcDomain,
412     const FlatAffineValueConstraints &dstDomain, unsigned loopDepth,
413     IntegerPolyhedron *dependenceDomain,
414     SmallVector<DependenceComponent, 2> *dependenceComponents) {
415   // Find the number of common loops shared by src and dst accesses.
416   SmallVector<AffineForOp, 4> commonLoops;
417   unsigned numCommonLoops =
418       getNumCommonLoops(srcDomain, dstDomain, &commonLoops);
419   if (numCommonLoops == 0)
420     return;
421   // Compute direction vectors for requested loop depth.
422   unsigned numIdsToEliminate = dependenceDomain->getNumVars();
423   // Add new variables to 'dependenceDomain' to represent the direction
424   // constraints for each shared loop.
425   dependenceDomain->insertVar(VarKind::SetDim, /*pos=*/0,
426                               /*num=*/numCommonLoops);
427 
428   // Add equality constraints for each common loop, setting newly introduced
429   // variable at column 'j' to the 'dst' IV minus the 'src IV.
430   SmallVector<int64_t, 4> eq;
431   eq.resize(dependenceDomain->getNumCols());
432   unsigned numSrcDims = srcDomain.getNumDimVars();
433   // Constraint variables format:
434   // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant]
435   for (unsigned j = 0; j < numCommonLoops; ++j) {
436     std::fill(eq.begin(), eq.end(), 0);
437     eq[j] = 1;
438     eq[j + numCommonLoops] = 1;
439     eq[j + numCommonLoops + numSrcDims] = -1;
440     dependenceDomain->addEquality(eq);
441   }
442 
443   // Eliminate all variables other than the direction variables just added.
444   dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate);
445 
446   // Scan each common loop variable column and set direction vectors based
447   // on eliminated constraint system.
448   dependenceComponents->resize(numCommonLoops);
449   for (unsigned j = 0; j < numCommonLoops; ++j) {
450     (*dependenceComponents)[j].op = commonLoops[j].getOperation();
451     auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j);
452     (*dependenceComponents)[j].lb =
453         lbConst.value_or(std::numeric_limits<int64_t>::min());
454     auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j);
455     (*dependenceComponents)[j].ub =
456         ubConst.value_or(std::numeric_limits<int64_t>::max());
457   }
458 }
459 
getAccessRelation(IntegerRelation & rel) const460 LogicalResult MemRefAccess::getAccessRelation(IntegerRelation &rel) const {
461   // Create set corresponding to domain of access.
462   FlatAffineValueConstraints domain;
463   if (failed(getOpIndexSet(opInst, &domain)))
464     return failure();
465 
466   // Get access relation from access map.
467   AffineValueMap accessValueMap;
468   getAccessMap(&accessValueMap);
469   if (failed(getRelationFromMap(accessValueMap, rel)))
470     return failure();
471 
472   // Merge and align domain ids of `rel` with ids of `domain`. Since the domain
473   // of the access map is a subset of the domain of access, the domain ids of
474   // `rel` are guranteed to be a subset of ids of `domain`.
475   unsigned inserts = 0;
476   for (unsigned i = 0, e = domain.getNumDimVars(); i < e; ++i) {
477     const Identifier domainIdi = Identifier(domain.getValue(i));
478     const Identifier *findBegin = rel.getIds(VarKind::SetDim).begin() + i;
479     const Identifier *findEnd = rel.getIds(VarKind::SetDim).end();
480     const Identifier *itr = std::find(findBegin, findEnd, domainIdi);
481     if (itr != findEnd) {
482       rel.swapVar(i, i + std::distance(findBegin, itr));
483     } else {
484       ++inserts;
485       rel.insertVar(VarKind::SetDim, i);
486       rel.setId(VarKind::SetDim, i, domainIdi);
487     }
488   }
489 
490   // Append domain constraints to `rel`.
491   IntegerRelation domainRel = domain;
492   if (rel.getSpace().isUsingIds() && !domainRel.getSpace().isUsingIds())
493     domainRel.resetIds();
494   domainRel.appendVar(VarKind::Range, accessValueMap.getNumResults());
495   domainRel.mergeAndAlignSymbols(rel);
496   domainRel.mergeLocalVars(rel);
497   rel.append(domainRel);
498 
499   rel.convertVarKind(VarKind::SetDim, 0, accessValueMap.getNumDims() + inserts,
500                      VarKind::Domain);
501 
502   return success();
503 }
504 
505 // Populates 'accessMap' with composition of AffineApplyOps reachable from
506 // indices of MemRefAccess.
getAccessMap(AffineValueMap * accessMap) const507 void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const {
508   // Get affine map from AffineLoad/Store.
509   AffineMap map;
510   if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst))
511     map = loadOp.getAffineMap();
512   else
513     map = cast<AffineWriteOpInterface>(opInst).getAffineMap();
514 
515   SmallVector<Value, 8> operands(indices.begin(), indices.end());
516   fullyComposeAffineMapAndOperands(&map, &operands);
517   map = simplifyAffineMap(map);
518   canonicalizeMapAndOperands(&map, &operands);
519   accessMap->reset(map, operands);
520 }
521 
522 // Builds a flat affine constraint system to check if there exists a dependence
523 // between memref accesses 'srcAccess' and 'dstAccess'.
524 // Returns 'NoDependence' if the accesses can be definitively shown not to
525 // access the same element.
526 // Returns 'HasDependence' if the accesses do access the same element.
527 // Returns 'Failure' if an error or unsupported case was encountered.
528 // If a dependence exists, returns in 'dependenceComponents' a direction
529 // vector for the dependence, with a component for each loop IV in loops
530 // common to both accesses (see Dependence in AffineAnalysis.h for details).
531 //
532 // The memref access dependence check is comprised of the following steps:
533 // *) Build access relation for each access. An access relation maps elements
534 //    of an iteration domain to the element(s) of an array domain accessed by
535 //    that iteration of the associated statement through some array reference.
536 // *) Compute the dependence relation by composing access relation of
537 //    `srcAccess` with the inverse of access relation of `dstAccess`.
538 //    Doing this builds a relation between iteration domain of `srcAccess`
539 //    to the iteration domain of `dstAccess` which access the same memory
540 //    location.
541 // *) Add ordering constraints for `srcAccess` to be accessed before
542 //    `dstAccess`.
543 //
544 // This method builds a constraint system with the following column format:
545 //
546 //  [src-dim-variables, dst-dim-variables, symbols, constant]
547 //
548 // For example, given the following MLIR code with "source" and "destination"
549 // accesses to the same memref label, and symbols %M, %N, %K:
550 //
551 //   affine.for %i0 = 0 to 100 {
552 //     affine.for %i1 = 0 to 50 {
553 //       %a0 = affine.apply
554 //         (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N]
555 //       // Source memref access.
556 //       store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32>
557 //     }
558 //   }
559 //
560 //   affine.for %i2 = 0 to 100 {
561 //     affine.for %i3 = 0 to 50 {
562 //       %a1 = affine.apply
563 //         (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M]
564 //       // Destination memref access.
565 //       %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32>
566 //     }
567 //   }
568 //
569 // The access relation for `srcAccess` would be the following:
570 //
571 //   [src_dim0, src_dim1, mem_dim0, mem_dim1,  %N,   %M,  const]
572 //       2        -4       -1         0         1     0     0     = 0
573 //       0         3        0        -1         0    -1     0     = 0
574 //       1         0        0         0         0     0     0    >= 0
575 //      -1         0        0         0         0     0     100  >= 0
576 //       0         1        0         0         0     0     0    >= 0
577 //       0        -1        0         0         0     0     50   >= 0
578 //
579 //  The access relation for `dstAccess` would be the following:
580 //
581 //   [dst_dim0, dst_dim1, mem_dim0, mem_dim1,  %M,   %K,  const]
582 //       7         9       -1         0        -1     0     0     = 0
583 //       0         11       0        -1         0    -1     0     = 0
584 //       1         0        0         0         0     0     0    >= 0
585 //      -1         0        0         0         0     0     100  >= 0
586 //       0         1        0         0         0     0     0    >= 0
587 //       0        -1        0         0         0     0     50   >= 0
588 //
589 //  The equalities in the above relations correspond to the access maps while
590 //  the inequalities corresspond to the iteration domain constraints.
591 //
592 // The dependence relation formed:
593 //
594 //   [src_dim0, src_dim1, dst_dim0, dst_dim1,  %M,   %N,   %K,  const]
595 //      2         -4        -7        -9        1     1     0     0    = 0
596 //      0          3         0        -11      -1     0     1     0    = 0
597 //       1         0         0         0        0     0     0     0    >= 0
598 //      -1         0         0         0        0     0     0     100  >= 0
599 //       0         1         0         0        0     0     0     0    >= 0
600 //       0        -1         0         0        0     0     0     50   >= 0
601 //       0         0         1         0        0     0     0     0    >= 0
602 //       0         0        -1         0        0     0     0     100  >= 0
603 //       0         0         0         1        0     0     0     0    >= 0
604 //       0         0         0        -1        0     0     0     50   >= 0
605 //
606 //
607 // TODO: Support AffineExprs mod/floordiv/ceildiv.
checkMemrefAccessDependence(const MemRefAccess & srcAccess,const MemRefAccess & dstAccess,unsigned loopDepth,FlatAffineValueConstraints * dependenceConstraints,SmallVector<DependenceComponent,2> * dependenceComponents,bool allowRAR)608 DependenceResult mlir::affine::checkMemrefAccessDependence(
609     const MemRefAccess &srcAccess, const MemRefAccess &dstAccess,
610     unsigned loopDepth, FlatAffineValueConstraints *dependenceConstraints,
611     SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) {
612   LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: "
613                           << Twine(loopDepth) << " between:\n";);
614   LLVM_DEBUG(srcAccess.opInst->dump());
615   LLVM_DEBUG(dstAccess.opInst->dump());
616 
617   // Return 'NoDependence' if these accesses do not access the same memref.
618   if (srcAccess.memref != dstAccess.memref)
619     return DependenceResult::NoDependence;
620 
621   // Return 'NoDependence' if one of these accesses is not an
622   // AffineWriteOpInterface.
623   if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) &&
624       !isa<AffineWriteOpInterface>(dstAccess.opInst))
625     return DependenceResult::NoDependence;
626 
627   // We can't analyze further if the ops lie in different affine scopes or have
628   // no common block in an affine scope.
629   if (getAffineScope(srcAccess.opInst) != getAffineScope(dstAccess.opInst))
630     return DependenceResult::Failure;
631   if (!getCommonBlockInAffineScope(srcAccess.opInst, dstAccess.opInst))
632     return DependenceResult::Failure;
633 
634   // Create access relation from each MemRefAccess.
635   PresburgerSpace space = PresburgerSpace::getRelationSpace();
636   IntegerRelation srcRel(space), dstRel(space);
637   if (failed(srcAccess.getAccessRelation(srcRel)))
638     return DependenceResult::Failure;
639   if (failed(dstAccess.getAccessRelation(dstRel)))
640     return DependenceResult::Failure;
641 
642   FlatAffineValueConstraints srcDomain(srcRel.getDomainSet());
643   FlatAffineValueConstraints dstDomain(dstRel.getDomainSet());
644 
645   // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor
646   // operation of 'srcAccess' does not properly dominate the ancestor
647   // operation of 'dstAccess' in the same common operation block.
648   // Note: this check is skipped if 'allowRAR' is true, because RAR deps
649   // can exist irrespective of lexicographic ordering b/w src and dst.
650   unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain);
651   assert(loopDepth <= numCommonLoops + 1);
652   if (!allowRAR && loopDepth > numCommonLoops &&
653       !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess)) {
654     return DependenceResult::NoDependence;
655   }
656 
657   // Compute the dependence relation by composing `srcRel` with the inverse of
658   // `dstRel`. Doing this builds a relation between iteration domain of
659   // `srcAccess` to the iteration domain of `dstAccess` which access the same
660   // memory locations.
661   dstRel.inverse();
662   dstRel.mergeAndCompose(srcRel);
663   dstRel.convertVarKind(VarKind::Domain, 0, dstRel.getNumDomainVars(),
664                         VarKind::Range, 0);
665   IntegerPolyhedron dependenceDomain(dstRel);
666 
667   // Add 'src' happens before 'dst' ordering constraints.
668   addOrderingConstraints(srcDomain, dstDomain, loopDepth, &dependenceDomain);
669 
670   // Return 'NoDependence' if the solution space is empty: no dependence.
671   if (dependenceDomain.isEmpty())
672     return DependenceResult::NoDependence;
673 
674   // Compute dependence direction vector and return true.
675   if (dependenceComponents != nullptr)
676     computeDirectionVector(srcDomain, dstDomain, loopDepth, &dependenceDomain,
677                            dependenceComponents);
678 
679   LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n");
680   LLVM_DEBUG(dependenceDomain.dump());
681 
682   FlatAffineValueConstraints result(dependenceDomain);
683   if (dependenceConstraints)
684     *dependenceConstraints = result;
685   return DependenceResult::HasDependence;
686 }
687 
688 /// Gathers dependence components for dependences between all ops in loop nest
689 /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth].
getDependenceComponents(AffineForOp forOp,unsigned maxLoopDepth,std::vector<SmallVector<DependenceComponent,2>> * depCompsVec)690 void mlir::affine::getDependenceComponents(
691     AffineForOp forOp, unsigned maxLoopDepth,
692     std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) {
693   // Collect all load and store ops in loop nest rooted at 'forOp'.
694   SmallVector<Operation *, 8> loadAndStoreOps;
695   forOp->walk([&](Operation *op) {
696     if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
697       loadAndStoreOps.push_back(op);
698   });
699 
700   unsigned numOps = loadAndStoreOps.size();
701   for (unsigned d = 1; d <= maxLoopDepth; ++d) {
702     for (unsigned i = 0; i < numOps; ++i) {
703       auto *srcOp = loadAndStoreOps[i];
704       MemRefAccess srcAccess(srcOp);
705       for (unsigned j = 0; j < numOps; ++j) {
706         auto *dstOp = loadAndStoreOps[j];
707         MemRefAccess dstAccess(dstOp);
708 
709         SmallVector<DependenceComponent, 2> depComps;
710         // TODO: Explore whether it would be profitable to pre-compute and store
711         // deps instead of repeatedly checking.
712         DependenceResult result = checkMemrefAccessDependence(
713             srcAccess, dstAccess, d, /*dependenceConstraints=*/nullptr,
714             &depComps);
715         if (hasDependence(result))
716           depCompsVec->push_back(depComps);
717       }
718     }
719   }
720 }
721