xref: /llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp (revision 474512576ef75d3e400f8ac3d6109524e9cc053b)
1 //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //! \file
11 //! \brief This pass performs merges of loads and stores on both sides of a
12 //  diamond (hammock). It hoists the loads and sinks the stores.
13 //
14 // The algorithm iteratively hoists two loads to the same address out of a
15 // diamond (hammock) and merges them into a single load in the header. Similar
16 // it sinks and merges two stores to the tail block (footer). The algorithm
17 // iterates over the instructions of one side of the diamond and attempts to
18 // find a matching load/store on the other side. It hoists / sinks when it
19 // thinks it safe to do so.  This optimization helps with eg. hiding load
20 // latencies, triggering if-conversion, and reducing static code size.
21 //
22 //===----------------------------------------------------------------------===//
23 //
24 //
25 // Example:
26 // Diamond shaped code before merge:
27 //
28 //            header:
29 //                     br %cond, label %if.then, label %if.else
30 //                        +                    +
31 //                       +                      +
32 //                      +                        +
33 //            if.then:                         if.else:
34 //               %lt = load %addr_l               %le = load %addr_l
35 //               <use %lt>                        <use %le>
36 //               <...>                            <...>
37 //               store %st, %addr_s               store %se, %addr_s
38 //               br label %if.end                 br label %if.end
39 //                     +                         +
40 //                      +                       +
41 //                       +                     +
42 //            if.end ("footer"):
43 //                     <...>
44 //
45 // Diamond shaped code after merge:
46 //
47 //            header:
48 //                     %l = load %addr_l
49 //                     br %cond, label %if.then, label %if.else
50 //                        +                    +
51 //                       +                      +
52 //                      +                        +
53 //            if.then:                         if.else:
54 //               <use %l>                         <use %l>
55 //               <...>                            <...>
56 //               br label %if.end                 br label %if.end
57 //                      +                        +
58 //                       +                      +
59 //                        +                    +
60 //            if.end ("footer"):
61 //                     %s.sink = phi [%st, if.then], [%se, if.else]
62 //                     <...>
63 //                     store %s.sink, %addr_s
64 //                     <...>
65 //
66 //
67 //===----------------------- TODO -----------------------------------------===//
68 //
69 // 1) Generalize to regions other than diamonds
70 // 2) Be more aggressive merging memory operations
71 // Note that both changes require register pressure control
72 //
73 //===----------------------------------------------------------------------===//
74 
75 #include "llvm/ADT/Statistic.h"
76 #include "llvm/Analysis/AliasAnalysis.h"
77 #include "llvm/Analysis/CFG.h"
78 #include "llvm/Analysis/GlobalsModRef.h"
79 #include "llvm/Analysis/Loads.h"
80 #include "llvm/Analysis/MemoryBuiltins.h"
81 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
82 #include "llvm/IR/Metadata.h"
83 #include "llvm/IR/PatternMatch.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/raw_ostream.h"
86 #include "llvm/Transforms/Scalar.h"
87 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
88 #include "llvm/Transforms/Utils/SSAUpdater.h"
89 
90 using namespace llvm;
91 
92 #define DEBUG_TYPE "mldst-motion"
93 
94 //===----------------------------------------------------------------------===//
95 //                         MergedLoadStoreMotion Pass
96 //===----------------------------------------------------------------------===//
97 
98 namespace {
99 class MergedLoadStoreMotion : public FunctionPass {
100   AliasAnalysis *AA;
101   MemoryDependenceResults *MD;
102 
103 public:
104   static char ID; // Pass identification, replacement for typeid
105   MergedLoadStoreMotion()
106       : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
107     initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
108   }
109 
110   bool runOnFunction(Function &F) override;
111 
112 private:
113   // This transformation requires dominator postdominator info
114   void getAnalysisUsage(AnalysisUsage &AU) const override {
115     AU.setPreservesCFG();
116     AU.addRequired<AAResultsWrapperPass>();
117     AU.addPreserved<GlobalsAAWrapperPass>();
118     AU.addPreserved<MemoryDependenceWrapperPass>();
119   }
120 
121   // Helper routines
122 
123   ///
124   /// \brief Remove instruction from parent and update memory dependence
125   /// analysis.
126   ///
127   void removeInstruction(Instruction *Inst);
128   BasicBlock *getDiamondTail(BasicBlock *BB);
129   bool isDiamondHead(BasicBlock *BB);
130   // Routines for hoisting loads
131   bool isLoadHoistBarrierInRange(const Instruction &Start,
132                                  const Instruction &End, LoadInst *LI,
133                                  bool SafeToLoadUnconditionally);
134   LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
135   void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
136                         Instruction *ElseInst);
137   bool isSafeToHoist(Instruction *I) const;
138   bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
139   bool mergeLoads(BasicBlock *BB);
140   // Routines for sinking stores
141   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
142   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
143   bool isStoreSinkBarrierInRange(const Instruction &Start,
144                                  const Instruction &End, MemoryLocation Loc);
145   bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
146   bool mergeStores(BasicBlock *BB);
147   // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
148   // where Size0 and Size1 are the #instructions on the two sides of
149   // the diamond. The constant chosen here is arbitrary. Compiler Time
150   // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
151   const int MagicCompileTimeControl;
152 };
153 
154 char MergedLoadStoreMotion::ID = 0;
155 } // anonymous namespace
156 
157 ///
158 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
159 ///
160 FunctionPass *llvm::createMergedLoadStoreMotionPass() {
161   return new MergedLoadStoreMotion();
162 }
163 
164 INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
165                       "MergedLoadStoreMotion", false, false)
166 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
167 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
168 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
169 INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
170                     "MergedLoadStoreMotion", false, false)
171 
172 ///
173 /// \brief Remove instruction from parent and update memory dependence analysis.
174 ///
175 void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
176   // Notify the memory dependence analysis.
177   if (MD) {
178     MD->removeInstruction(Inst);
179     if (auto *LI = dyn_cast<LoadInst>(Inst))
180       MD->invalidateCachedPointerInfo(LI->getPointerOperand());
181     if (Inst->getType()->isPtrOrPtrVectorTy()) {
182       MD->invalidateCachedPointerInfo(Inst);
183     }
184   }
185   Inst->eraseFromParent();
186 }
187 
188 ///
189 /// \brief Return tail block of a diamond.
190 ///
191 BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
192   assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
193   return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor();
194 }
195 
196 ///
197 /// \brief True when BB is the head of a diamond (hammock)
198 ///
199 bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
200   if (!BB)
201     return false;
202   auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
203   if (!BI || !BI->isConditional())
204     return false;
205 
206   BasicBlock *Succ0 = BI->getSuccessor(0);
207   BasicBlock *Succ1 = BI->getSuccessor(1);
208 
209   if (!Succ0->getSinglePredecessor())
210     return false;
211   if (!Succ1->getSinglePredecessor())
212     return false;
213 
214   BasicBlock *Succ0Succ = Succ0->getSingleSuccessor();
215   BasicBlock *Succ1Succ = Succ1->getSingleSuccessor();
216   // Ignore triangles.
217   if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ)
218     return false;
219   return true;
220 }
221 
222 ///
223 /// \brief True when instruction is a hoist barrier for a load
224 ///
225 /// Whenever an instruction could possibly modify the value
226 /// being loaded or protect against the load from happening
227 /// it is considered a hoist barrier.
228 ///
229 bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
230     const Instruction &Start, const Instruction &End, LoadInst *LI,
231     bool SafeToLoadUnconditionally) {
232   if (!SafeToLoadUnconditionally)
233     for (const Instruction &Inst :
234          make_range(Start.getIterator(), End.getIterator()))
235       if (Inst.mayThrow())
236         return true;
237   MemoryLocation Loc = MemoryLocation::get(LI);
238   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
239 }
240 
241 ///
242 /// \brief Decide if a load can be hoisted
243 ///
244 /// When there is a load in \p BB to the same address as \p LI
245 /// and it can be hoisted from \p BB, return that load.
246 /// Otherwise return Null.
247 ///
248 LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
249                                                    LoadInst *Load0) {
250   BasicBlock *BB0 = Load0->getParent();
251   BasicBlock *Head = BB0->getSinglePredecessor();
252   bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally(
253       Load0->getPointerOperand(), Load0->getAlignment(),
254       Load0->getModule()->getDataLayout(),
255       /*ScanFrom=*/Head->getTerminator());
256   for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
257        ++BBI) {
258     Instruction *Inst = &*BBI;
259 
260     // Only merge and hoist loads when their result in used only in BB
261     auto *Load1 = dyn_cast<LoadInst>(Inst);
262     if (!Load1 || Inst->isUsedOutsideOfBlock(BB1))
263       continue;
264 
265     MemoryLocation Loc0 = MemoryLocation::get(Load0);
266     MemoryLocation Loc1 = MemoryLocation::get(Load1);
267     if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
268         !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
269                                    SafeToLoadUnconditionally) &&
270         !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
271                                    SafeToLoadUnconditionally)) {
272       return Load1;
273     }
274   }
275   return nullptr;
276 }
277 
278 ///
279 /// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
280 /// \p BB
281 ///
282 /// BB is the head of a diamond
283 ///
284 void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
285                                              Instruction *HoistCand,
286                                              Instruction *ElseInst) {
287   DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
288         dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
289         dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
290   // Hoist the instruction.
291   assert(HoistCand->getParent() != BB);
292 
293   // Intersect optional metadata.
294   HoistCand->intersectOptionalDataWith(ElseInst);
295   HoistCand->dropUnknownNonDebugMetadata();
296 
297   // Prepend point for instruction insert
298   Instruction *HoistPt = BB->getTerminator();
299 
300   // Merged instruction
301   Instruction *HoistedInst = HoistCand->clone();
302 
303   // Hoist instruction.
304   HoistedInst->insertBefore(HoistPt);
305 
306   HoistCand->replaceAllUsesWith(HoistedInst);
307   removeInstruction(HoistCand);
308   // Replace the else block instruction.
309   ElseInst->replaceAllUsesWith(HoistedInst);
310   removeInstruction(ElseInst);
311 }
312 
313 ///
314 /// \brief Return true if no operand of \p I is defined in I's parent block
315 ///
316 bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
317   BasicBlock *Parent = I->getParent();
318   for (Use &U : I->operands())
319     if (auto *Instr = dyn_cast<Instruction>(&U))
320       if (Instr->getParent() == Parent)
321         return false;
322   return true;
323 }
324 
325 ///
326 /// \brief Merge two equivalent loads and GEPs and hoist into diamond head
327 ///
328 bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
329                                       LoadInst *L1) {
330   // Only one definition?
331   auto *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
332   auto *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
333   if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
334       A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
335       A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
336       isa<GetElementPtrInst>(A0)) {
337     DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
338           dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
339           dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
340     hoistInstruction(BB, A0, A1);
341     hoistInstruction(BB, L0, L1);
342     return true;
343   }
344   return false;
345 }
346 
347 ///
348 /// \brief Try to hoist two loads to same address into diamond header
349 ///
350 /// Starting from a diamond head block, iterate over the instructions in one
351 /// successor block and try to match a load in the second successor.
352 ///
353 bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
354   bool MergedLoads = false;
355   assert(isDiamondHead(BB));
356   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
357   BasicBlock *Succ0 = BI->getSuccessor(0);
358   BasicBlock *Succ1 = BI->getSuccessor(1);
359   // #Instructions in Succ1 for Compile Time Control
360   int Size1 = Succ1->size();
361   int NLoads = 0;
362   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
363        BBI != BBE;) {
364     Instruction *I = &*BBI;
365     ++BBI;
366 
367     // Don't move non-simple (atomic, volatile) loads.
368     auto *L0 = dyn_cast<LoadInst>(I);
369     if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
370       continue;
371 
372     ++NLoads;
373     if (NLoads * Size1 >= MagicCompileTimeControl)
374       break;
375     if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
376       bool Res = hoistLoad(BB, L0, L1);
377       MergedLoads |= Res;
378       // Don't attempt to hoist above loads that had not been hoisted.
379       if (!Res)
380         break;
381     }
382   }
383   return MergedLoads;
384 }
385 
386 ///
387 /// \brief True when instruction is a sink barrier for a store
388 /// located in Loc
389 ///
390 /// Whenever an instruction could possibly read or modify the
391 /// value being stored or protect against the store from
392 /// happening it is considered a sink barrier.
393 ///
394 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
395                                                       const Instruction &End,
396                                                       MemoryLocation Loc) {
397   for (const Instruction &Inst :
398        make_range(Start.getIterator(), End.getIterator()))
399     if (Inst.mayThrow())
400       return true;
401   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
402 }
403 
404 ///
405 /// \brief Check if \p BB contains a store to the same address as \p SI
406 ///
407 /// \return The store in \p  when it is safe to sink. Otherwise return Null.
408 ///
409 StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
410                                                    StoreInst *Store0) {
411   DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
412   BasicBlock *BB0 = Store0->getParent();
413   for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
414        RBI != RBE; ++RBI) {
415     Instruction *Inst = &*RBI;
416 
417     auto *Store1 = dyn_cast<StoreInst>(Inst);
418     if (!Store1)
419       continue;
420 
421     MemoryLocation Loc0 = MemoryLocation::get(Store0);
422     MemoryLocation Loc1 = MemoryLocation::get(Store1);
423     if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
424         !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
425         !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
426       return Store1;
427     }
428   }
429   return nullptr;
430 }
431 
432 ///
433 /// \brief Create a PHI node in BB for the operands of S0 and S1
434 ///
435 PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
436                                               StoreInst *S1) {
437   // Create a phi if the values mismatch.
438   Value *Opd1 = S0->getValueOperand();
439   Value *Opd2 = S1->getValueOperand();
440   if (Opd1 == Opd2)
441     return nullptr;
442 
443   auto *NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
444                                 &BB->front());
445   NewPN->addIncoming(Opd1, S0->getParent());
446   NewPN->addIncoming(Opd2, S1->getParent());
447   if (MD && NewPN->getType()->getScalarType()->isPointerTy())
448     MD->invalidateCachedPointerInfo(NewPN);
449   return NewPN;
450 }
451 
452 ///
453 /// \brief Merge two stores to same address and sink into \p BB
454 ///
455 /// Also sinks GEP instruction computing the store address
456 ///
457 bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
458                                       StoreInst *S1) {
459   // Only one definition?
460   auto *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
461   auto *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
462   if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
463       (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
464       (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
465     DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
466           dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
467           dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
468     // Hoist the instruction.
469     BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
470     // Intersect optional metadata.
471     S0->intersectOptionalDataWith(S1);
472     S0->dropUnknownNonDebugMetadata();
473 
474     // Create the new store to be inserted at the join point.
475     StoreInst *SNew = cast<StoreInst>(S0->clone());
476     Instruction *ANew = A0->clone();
477     SNew->insertBefore(&*InsertPt);
478     ANew->insertBefore(SNew);
479 
480     assert(S0->getParent() == A0->getParent());
481     assert(S1->getParent() == A1->getParent());
482 
483     // New PHI operand? Use it.
484     if (PHINode *NewPN = getPHIOperand(BB, S0, S1))
485       SNew->setOperand(0, NewPN);
486     removeInstruction(S0);
487     removeInstruction(S1);
488     A0->replaceAllUsesWith(ANew);
489     removeInstruction(A0);
490     A1->replaceAllUsesWith(ANew);
491     removeInstruction(A1);
492     return true;
493   }
494   return false;
495 }
496 
497 ///
498 /// \brief True when two stores are equivalent and can sink into the footer
499 ///
500 /// Starting from a diamond tail block, iterate over the instructions in one
501 /// predecessor block and try to match a store in the second predecessor.
502 ///
503 bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
504 
505   bool MergedStores = false;
506   assert(T && "Footer of a diamond cannot be empty");
507 
508   pred_iterator PI = pred_begin(T), E = pred_end(T);
509   assert(PI != E);
510   BasicBlock *Pred0 = *PI;
511   ++PI;
512   BasicBlock *Pred1 = *PI;
513   ++PI;
514   // tail block  of a diamond/hammock?
515   if (Pred0 == Pred1)
516     return false; // No.
517   if (PI != E)
518     return false; // No. More than 2 predecessors.
519 
520   // #Instructions in Succ1 for Compile Time Control
521   int Size1 = Pred1->size();
522   int NStores = 0;
523 
524   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
525        RBI != RBE;) {
526 
527     Instruction *I = &*RBI;
528     ++RBI;
529 
530     // Don't sink non-simple (atomic, volatile) stores.
531     auto *S0 = dyn_cast<StoreInst>(I);
532     if (!S0 || !S0->isSimple())
533       continue;
534 
535     ++NStores;
536     if (NStores * Size1 >= MagicCompileTimeControl)
537       break;
538     if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
539       bool Res = sinkStore(T, S0, S1);
540       MergedStores |= Res;
541       // Don't attempt to sink below stores that had to stick around
542       // But after removal of a store and some of its feeding
543       // instruction search again from the beginning since the iterator
544       // is likely stale at this point.
545       if (!Res)
546         break;
547       RBI = Pred0->rbegin();
548       RBE = Pred0->rend();
549       DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
550     }
551   }
552   return MergedStores;
553 }
554 
555 ///
556 /// \brief Run the transformation for each function
557 ///
558 bool MergedLoadStoreMotion::runOnFunction(Function &F) {
559   if (skipFunction(F))
560     return false;
561 
562   auto *MDWP = getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
563   MD = MDWP ? &MDWP->getMemDep() : nullptr;
564   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
565 
566   bool Changed = false;
567   DEBUG(dbgs() << "Instruction Merger\n");
568 
569   // Merge unconditional branches, allowing PRE to catch more
570   // optimization opportunities.
571   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
572     BasicBlock *BB = &*FI++;
573 
574     // Hoist equivalent loads and sink stores
575     // outside diamonds when possible
576     if (isDiamondHead(BB)) {
577       Changed |= mergeLoads(BB);
578       Changed |= mergeStores(getDiamondTail(BB));
579     }
580   }
581   return Changed;
582 }
583