xref: /llvm-project/llvm/lib/Analysis/Delinearization.cpp (revision 236fda550d36d35a00785938c3e38b0f402aeda6)
1 //===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements an analysis pass that tries to delinearize all GEP
10 // instructions in all loops using the SCEV analysis functionality. This pass is
11 // only used for testing purposes: if your pass needs delinearization, please
12 // use the on-demand SCEVAddRecExpr::delinearize() function.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Analysis/Delinearization.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/Analysis/ScalarEvolution.h"
19 #include "llvm/Analysis/ScalarEvolutionDivision.h"
20 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/InstIterator.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 
30 using namespace llvm;
31 
32 #define DL_NAME "delinearize"
33 #define DEBUG_TYPE DL_NAME
34 
35 // Return true when S contains at least an undef value.
36 static inline bool containsUndefs(const SCEV *S) {
37   return SCEVExprContains(S, [](const SCEV *S) {
38     if (const auto *SU = dyn_cast<SCEVUnknown>(S))
39       return isa<UndefValue>(SU->getValue());
40     return false;
41   });
42 }
43 
44 namespace {
45 
46 // Collect all steps of SCEV expressions.
47 struct SCEVCollectStrides {
48   ScalarEvolution &SE;
49   SmallVectorImpl<const SCEV *> &Strides;
50 
51   SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
52       : SE(SE), Strides(S) {}
53 
54   bool follow(const SCEV *S) {
55     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
56       Strides.push_back(AR->getStepRecurrence(SE));
57     return true;
58   }
59 
60   bool isDone() const { return false; }
61 };
62 
63 // Collect all SCEVUnknown and SCEVMulExpr expressions.
64 struct SCEVCollectTerms {
65   SmallVectorImpl<const SCEV *> &Terms;
66 
67   SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
68 
69   bool follow(const SCEV *S) {
70     if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
71         isa<SCEVSignExtendExpr>(S)) {
72       if (!containsUndefs(S))
73         Terms.push_back(S);
74 
75       // Stop recursion: once we collected a term, do not walk its operands.
76       return false;
77     }
78 
79     // Keep looking.
80     return true;
81   }
82 
83   bool isDone() const { return false; }
84 };
85 
86 // Check if a SCEV contains an AddRecExpr.
87 struct SCEVHasAddRec {
88   bool &ContainsAddRec;
89 
90   SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
91     ContainsAddRec = false;
92   }
93 
94   bool follow(const SCEV *S) {
95     if (isa<SCEVAddRecExpr>(S)) {
96       ContainsAddRec = true;
97 
98       // Stop recursion: once we collected a term, do not walk its operands.
99       return false;
100     }
101 
102     // Keep looking.
103     return true;
104   }
105 
106   bool isDone() const { return false; }
107 };
108 
109 // Find factors that are multiplied with an expression that (possibly as a
110 // subexpression) contains an AddRecExpr. In the expression:
111 //
112 //  8 * (100 +  %p * %q * (%a + {0, +, 1}_loop))
113 //
114 // "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
115 // that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
116 // parameters as they form a product with an induction variable.
117 //
118 // This collector expects all array size parameters to be in the same MulExpr.
119 // It might be necessary to later add support for collecting parameters that are
120 // spread over different nested MulExpr.
121 struct SCEVCollectAddRecMultiplies {
122   SmallVectorImpl<const SCEV *> &Terms;
123   ScalarEvolution &SE;
124 
125   SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
126                               ScalarEvolution &SE)
127       : Terms(T), SE(SE) {}
128 
129   bool follow(const SCEV *S) {
130     if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
131       bool HasAddRec = false;
132       SmallVector<const SCEV *, 0> Operands;
133       for (const SCEV *Op : Mul->operands()) {
134         const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
135         if (Unknown && !isa<CallInst>(Unknown->getValue())) {
136           Operands.push_back(Op);
137         } else if (Unknown) {
138           HasAddRec = true;
139         } else {
140           bool ContainsAddRec = false;
141           SCEVHasAddRec ContiansAddRec(ContainsAddRec);
142           visitAll(Op, ContiansAddRec);
143           HasAddRec |= ContainsAddRec;
144         }
145       }
146       if (Operands.size() == 0)
147         return true;
148 
149       if (!HasAddRec)
150         return false;
151 
152       Terms.push_back(SE.getMulExpr(Operands));
153       // Stop recursion: once we collected a term, do not walk its operands.
154       return false;
155     }
156 
157     // Keep looking.
158     return true;
159   }
160 
161   bool isDone() const { return false; }
162 };
163 
164 } // end anonymous namespace
165 
166 /// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
167 /// two places:
168 ///   1) The strides of AddRec expressions.
169 ///   2) Unknowns that are multiplied with AddRec expressions.
170 void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
171                                   SmallVectorImpl<const SCEV *> &Terms) {
172   SmallVector<const SCEV *, 4> Strides;
173   SCEVCollectStrides StrideCollector(SE, Strides);
174   visitAll(Expr, StrideCollector);
175 
176   LLVM_DEBUG({
177     dbgs() << "Strides:\n";
178     for (const SCEV *S : Strides)
179       dbgs() << *S << "\n";
180   });
181 
182   for (const SCEV *S : Strides) {
183     SCEVCollectTerms TermCollector(Terms);
184     visitAll(S, TermCollector);
185   }
186 
187   LLVM_DEBUG({
188     dbgs() << "Terms:\n";
189     for (const SCEV *T : Terms)
190       dbgs() << *T << "\n";
191   });
192 
193   SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
194   visitAll(Expr, MulCollector);
195 }
196 
197 static bool findArrayDimensionsRec(ScalarEvolution &SE,
198                                    SmallVectorImpl<const SCEV *> &Terms,
199                                    SmallVectorImpl<const SCEV *> &Sizes) {
200   int Last = Terms.size() - 1;
201   const SCEV *Step = Terms[Last];
202 
203   // End of recursion.
204   if (Last == 0) {
205     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
206       SmallVector<const SCEV *, 2> Qs;
207       for (const SCEV *Op : M->operands())
208         if (!isa<SCEVConstant>(Op))
209           Qs.push_back(Op);
210 
211       Step = SE.getMulExpr(Qs);
212     }
213 
214     Sizes.push_back(Step);
215     return true;
216   }
217 
218   for (const SCEV *&Term : Terms) {
219     // Normalize the terms before the next call to findArrayDimensionsRec.
220     const SCEV *Q, *R;
221     SCEVDivision::divide(SE, Term, Step, &Q, &R);
222 
223     // Bail out when GCD does not evenly divide one of the terms.
224     if (!R->isZero())
225       return false;
226 
227     Term = Q;
228   }
229 
230   // Remove all SCEVConstants.
231   erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
232 
233   if (Terms.size() > 0)
234     if (!findArrayDimensionsRec(SE, Terms, Sizes))
235       return false;
236 
237   Sizes.push_back(Step);
238   return true;
239 }
240 
241 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
242 static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
243   for (const SCEV *T : Terms)
244     if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
245       return true;
246 
247   return false;
248 }
249 
250 // Return the number of product terms in S.
251 static inline int numberOfTerms(const SCEV *S) {
252   if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
253     return Expr->getNumOperands();
254   return 1;
255 }
256 
257 static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
258   if (isa<SCEVConstant>(T))
259     return nullptr;
260 
261   if (isa<SCEVUnknown>(T))
262     return T;
263 
264   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
265     SmallVector<const SCEV *, 2> Factors;
266     for (const SCEV *Op : M->operands())
267       if (!isa<SCEVConstant>(Op))
268         Factors.push_back(Op);
269 
270     return SE.getMulExpr(Factors);
271   }
272 
273   return T;
274 }
275 
276 void llvm::findArrayDimensions(ScalarEvolution &SE,
277                                SmallVectorImpl<const SCEV *> &Terms,
278                                SmallVectorImpl<const SCEV *> &Sizes,
279                                const SCEV *ElementSize) {
280   if (Terms.size() < 1 || !ElementSize)
281     return;
282 
283   // Early return when Terms do not contain parameters: we do not delinearize
284   // non parametric SCEVs.
285   if (!containsParameters(Terms))
286     return;
287 
288   LLVM_DEBUG({
289     dbgs() << "Terms:\n";
290     for (const SCEV *T : Terms)
291       dbgs() << *T << "\n";
292   });
293 
294   // Remove duplicates.
295   array_pod_sort(Terms.begin(), Terms.end());
296   Terms.erase(llvm::unique(Terms), Terms.end());
297 
298   // Put larger terms first.
299   llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
300     return numberOfTerms(LHS) > numberOfTerms(RHS);
301   });
302 
303   // Try to divide all terms by the element size. If term is not divisible by
304   // element size, proceed with the original term.
305   for (const SCEV *&Term : Terms) {
306     const SCEV *Q, *R;
307     SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
308     if (!Q->isZero())
309       Term = Q;
310   }
311 
312   SmallVector<const SCEV *, 4> NewTerms;
313 
314   // Remove constant factors.
315   for (const SCEV *T : Terms)
316     if (const SCEV *NewT = removeConstantFactors(SE, T))
317       NewTerms.push_back(NewT);
318 
319   LLVM_DEBUG({
320     dbgs() << "Terms after sorting:\n";
321     for (const SCEV *T : NewTerms)
322       dbgs() << *T << "\n";
323   });
324 
325   if (NewTerms.empty() || !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
326     Sizes.clear();
327     return;
328   }
329 
330   // The last element to be pushed into Sizes is the size of an element.
331   Sizes.push_back(ElementSize);
332 
333   LLVM_DEBUG({
334     dbgs() << "Sizes:\n";
335     for (const SCEV *S : Sizes)
336       dbgs() << *S << "\n";
337   });
338 }
339 
340 void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
341                                   SmallVectorImpl<const SCEV *> &Subscripts,
342                                   SmallVectorImpl<const SCEV *> &Sizes) {
343   // Early exit in case this SCEV is not an affine multivariate function.
344   if (Sizes.empty())
345     return;
346 
347   if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
348     if (!AR->isAffine())
349       return;
350 
351   const SCEV *Res = Expr;
352   int Last = Sizes.size() - 1;
353   for (int i = Last; i >= 0; i--) {
354     const SCEV *Q, *R;
355     SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
356 
357     LLVM_DEBUG({
358       dbgs() << "Res: " << *Res << "\n";
359       dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
360       dbgs() << "Res divided by Sizes[i]:\n";
361       dbgs() << "Quotient: " << *Q << "\n";
362       dbgs() << "Remainder: " << *R << "\n";
363     });
364 
365     Res = Q;
366 
367     // Do not record the last subscript corresponding to the size of elements in
368     // the array.
369     if (i == Last) {
370 
371       // Bail out if the byte offset is non-zero.
372       if (!R->isZero()) {
373         Subscripts.clear();
374         Sizes.clear();
375         return;
376       }
377 
378       continue;
379     }
380 
381     // Record the access function for the current subscript.
382     Subscripts.push_back(R);
383   }
384 
385   // Also push in last position the remainder of the last division: it will be
386   // the access function of the innermost dimension.
387   Subscripts.push_back(Res);
388 
389   std::reverse(Subscripts.begin(), Subscripts.end());
390 
391   LLVM_DEBUG({
392     dbgs() << "Subscripts:\n";
393     for (const SCEV *S : Subscripts)
394       dbgs() << *S << "\n";
395   });
396 }
397 
398 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
399 /// sizes of an array access. Returns the remainder of the delinearization that
400 /// is the offset start of the array.  The SCEV->delinearize algorithm computes
401 /// the multiples of SCEV coefficients: that is a pattern matching of sub
402 /// expressions in the stride and base of a SCEV corresponding to the
403 /// computation of a GCD (greatest common divisor) of base and stride.  When
404 /// SCEV->delinearize fails, it returns the SCEV unchanged.
405 ///
406 /// For example: when analyzing the memory access A[i][j][k] in this loop nest
407 ///
408 ///  void foo(long n, long m, long o, double A[n][m][o]) {
409 ///
410 ///    for (long i = 0; i < n; i++)
411 ///      for (long j = 0; j < m; j++)
412 ///        for (long k = 0; k < o; k++)
413 ///          A[i][j][k] = 1.0;
414 ///  }
415 ///
416 /// the delinearization input is the following AddRec SCEV:
417 ///
418 ///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
419 ///
420 /// From this SCEV, we are able to say that the base offset of the access is %A
421 /// because it appears as an offset that does not divide any of the strides in
422 /// the loops:
423 ///
424 ///  CHECK: Base offset: %A
425 ///
426 /// and then SCEV->delinearize determines the size of some of the dimensions of
427 /// the array as these are the multiples by which the strides are happening:
428 ///
429 ///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
430 ///  bytes.
431 ///
432 /// Note that the outermost dimension remains of UnknownSize because there are
433 /// no strides that would help identifying the size of the last dimension: when
434 /// the array has been statically allocated, one could compute the size of that
435 /// dimension by dividing the overall size of the array by the size of the known
436 /// dimensions: %m * %o * 8.
437 ///
438 /// Finally delinearize provides the access functions for the array reference
439 /// that does correspond to A[i][j][k] of the above C testcase:
440 ///
441 ///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
442 ///
443 /// The testcases are checking the output of a function pass:
444 /// DelinearizationPass that walks through all loads and stores of a function
445 /// asking for the SCEV of the memory access with respect to all enclosing
446 /// loops, calling SCEV->delinearize on that and printing the results.
447 void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
448                        SmallVectorImpl<const SCEV *> &Subscripts,
449                        SmallVectorImpl<const SCEV *> &Sizes,
450                        const SCEV *ElementSize) {
451   // First step: collect parametric terms.
452   SmallVector<const SCEV *, 4> Terms;
453   collectParametricTerms(SE, Expr, Terms);
454 
455   if (Terms.empty())
456     return;
457 
458   // Second step: find subscript sizes.
459   findArrayDimensions(SE, Terms, Sizes, ElementSize);
460 
461   if (Sizes.empty())
462     return;
463 
464   // Third step: compute the access functions for each subscript.
465   computeAccessFunctions(SE, Expr, Subscripts, Sizes);
466 
467   if (Subscripts.empty())
468     return;
469 
470   LLVM_DEBUG({
471     dbgs() << "succeeded to delinearize " << *Expr << "\n";
472     dbgs() << "ArrayDecl[UnknownSize]";
473     for (const SCEV *S : Sizes)
474       dbgs() << "[" << *S << "]";
475 
476     dbgs() << "\nArrayRef";
477     for (const SCEV *S : Subscripts)
478       dbgs() << "[" << *S << "]";
479     dbgs() << "\n";
480   });
481 }
482 
483 bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
484                                       const GetElementPtrInst *GEP,
485                                       SmallVectorImpl<const SCEV *> &Subscripts,
486                                       SmallVectorImpl<int> &Sizes) {
487   assert(Subscripts.empty() && Sizes.empty() &&
488          "Expected output lists to be empty on entry to this function.");
489   assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
490   Type *Ty = nullptr;
491   bool DroppedFirstDim = false;
492   for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
493     const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));
494     if (i == 1) {
495       Ty = GEP->getSourceElementType();
496       if (auto *Const = dyn_cast<SCEVConstant>(Expr))
497         if (Const->getValue()->isZero()) {
498           DroppedFirstDim = true;
499           continue;
500         }
501       Subscripts.push_back(Expr);
502       continue;
503     }
504 
505     auto *ArrayTy = dyn_cast<ArrayType>(Ty);
506     if (!ArrayTy) {
507       Subscripts.clear();
508       Sizes.clear();
509       return false;
510     }
511 
512     Subscripts.push_back(Expr);
513     if (!(DroppedFirstDim && i == 2))
514       Sizes.push_back(ArrayTy->getNumElements());
515 
516     Ty = ArrayTy->getElementType();
517   }
518   return !Subscripts.empty();
519 }
520 
521 bool llvm::tryDelinearizeFixedSizeImpl(
522     ScalarEvolution *SE, Instruction *Inst, const SCEV *AccessFn,
523     SmallVectorImpl<const SCEV *> &Subscripts, SmallVectorImpl<int> &Sizes) {
524   Value *SrcPtr = getLoadStorePointerOperand(Inst);
525 
526   // Check the simple case where the array dimensions are fixed size.
527   auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
528   if (!SrcGEP)
529     return false;
530 
531   getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes);
532 
533   // Check that the two size arrays are non-empty and equal in length and
534   // value.
535   // TODO: it would be better to let the caller to clear Subscripts, similar
536   // to how we handle Sizes.
537   if (Sizes.empty() || Subscripts.size() <= 1) {
538     Subscripts.clear();
539     return false;
540   }
541 
542   // Check that for identical base pointers we do not miss index offsets
543   // that have been added before this GEP is applied.
544   Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts();
545   const SCEVUnknown *SrcBase =
546       dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
547   if (!SrcBase || SrcBasePtr != SrcBase->getValue()) {
548     Subscripts.clear();
549     return false;
550   }
551 
552   assert(Subscripts.size() == Sizes.size() + 1 &&
553          "Expected equal number of entries in the list of size and "
554          "subscript.");
555 
556   return true;
557 }
558 
559 namespace {
560 
561 void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
562                           ScalarEvolution *SE) {
563   O << "Delinearization on function " << F->getName() << ":\n";
564   for (Instruction &Inst : instructions(F)) {
565     // Only analyze loads and stores.
566     if (!isa<StoreInst>(&Inst) && !isa<LoadInst>(&Inst) &&
567         !isa<GetElementPtrInst>(&Inst))
568       continue;
569 
570     const BasicBlock *BB = Inst.getParent();
571     // Delinearize the memory access as analyzed in all the surrounding loops.
572     // Do not analyze memory accesses outside loops.
573     for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
574       const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(&Inst), L);
575 
576       const SCEVUnknown *BasePointer =
577           dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
578       // Do not delinearize if we cannot find the base pointer.
579       if (!BasePointer)
580         break;
581       AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
582 
583       O << "\n";
584       O << "Inst:" << Inst << "\n";
585       O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
586       O << "AccessFunction: " << *AccessFn << "\n";
587 
588       SmallVector<const SCEV *, 3> Subscripts, Sizes;
589       delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
590       if (Subscripts.size() == 0 || Sizes.size() == 0 ||
591           Subscripts.size() != Sizes.size()) {
592         O << "failed to delinearize\n";
593         continue;
594       }
595 
596       O << "Base offset: " << *BasePointer << "\n";
597       O << "ArrayDecl[UnknownSize]";
598       int Size = Subscripts.size();
599       for (int i = 0; i < Size - 1; i++)
600         O << "[" << *Sizes[i] << "]";
601       O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
602 
603       O << "ArrayRef";
604       for (int i = 0; i < Size; i++)
605         O << "[" << *Subscripts[i] << "]";
606       O << "\n";
607     }
608   }
609 }
610 
611 } // end anonymous namespace
612 
613 DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
614     : OS(OS) {}
615 PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
616                                                   FunctionAnalysisManager &AM) {
617   printDelinearization(OS, &F, &AM.getResult<LoopAnalysis>(F),
618                        &AM.getResult<ScalarEvolutionAnalysis>(F));
619   return PreservedAnalyses::all();
620 }
621