xref: /llvm-project/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp (revision aa641a51719eed9509566e8352bf59e75e2c81b4)
1 //===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //! \file
11 //! \brief This pass performs merges of loads and stores on both sides of a
12 //  diamond (hammock). It hoists the loads and sinks the stores.
13 //
14 // The algorithm iteratively hoists two loads to the same address out of a
15 // diamond (hammock) and merges them into a single load in the header. Similar
16 // it sinks and merges two stores to the tail block (footer). The algorithm
17 // iterates over the instructions of one side of the diamond and attempts to
18 // find a matching load/store on the other side. It hoists / sinks when it
19 // thinks it safe to do so.  This optimization helps with eg. hiding load
20 // latencies, triggering if-conversion, and reducing static code size.
21 //
22 //===----------------------------------------------------------------------===//
23 //
24 //
25 // Example:
26 // Diamond shaped code before merge:
27 //
28 //            header:
29 //                     br %cond, label %if.then, label %if.else
30 //                        +                    +
31 //                       +                      +
32 //                      +                        +
33 //            if.then:                         if.else:
34 //               %lt = load %addr_l               %le = load %addr_l
35 //               <use %lt>                        <use %le>
36 //               <...>                            <...>
37 //               store %st, %addr_s               store %se, %addr_s
38 //               br label %if.end                 br label %if.end
39 //                     +                         +
40 //                      +                       +
41 //                       +                     +
42 //            if.end ("footer"):
43 //                     <...>
44 //
45 // Diamond shaped code after merge:
46 //
47 //            header:
48 //                     %l = load %addr_l
49 //                     br %cond, label %if.then, label %if.else
50 //                        +                    +
51 //                       +                      +
52 //                      +                        +
53 //            if.then:                         if.else:
54 //               <use %l>                         <use %l>
55 //               <...>                            <...>
56 //               br label %if.end                 br label %if.end
57 //                      +                        +
58 //                       +                      +
59 //                        +                    +
60 //            if.end ("footer"):
61 //                     %s.sink = phi [%st, if.then], [%se, if.else]
62 //                     <...>
63 //                     store %s.sink, %addr_s
64 //                     <...>
65 //
66 //
67 //===----------------------- TODO -----------------------------------------===//
68 //
69 // 1) Generalize to regions other than diamonds
70 // 2) Be more aggressive merging memory operations
71 // Note that both changes require register pressure control
72 //
73 //===----------------------------------------------------------------------===//
74 
75 #include "llvm/ADT/Statistic.h"
76 #include "llvm/Analysis/AliasAnalysis.h"
77 #include "llvm/Analysis/CFG.h"
78 #include "llvm/Analysis/GlobalsModRef.h"
79 #include "llvm/Analysis/Loads.h"
80 #include "llvm/Analysis/MemoryBuiltins.h"
81 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
82 #include "llvm/Analysis/TargetLibraryInfo.h"
83 #include "llvm/IR/Metadata.h"
84 #include "llvm/IR/PatternMatch.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/raw_ostream.h"
87 #include "llvm/Transforms/Scalar.h"
88 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
89 #include "llvm/Transforms/Utils/SSAUpdater.h"
90 
91 using namespace llvm;
92 
93 #define DEBUG_TYPE "mldst-motion"
94 
95 //===----------------------------------------------------------------------===//
96 //                         MergedLoadStoreMotion Pass
97 //===----------------------------------------------------------------------===//
98 
99 namespace {
100 class MergedLoadStoreMotion : public FunctionPass {
101   AliasAnalysis *AA;
102   MemoryDependenceResults *MD;
103 
104 public:
105   static char ID; // Pass identification, replacement for typeid
106   MergedLoadStoreMotion()
107       : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
108     initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
109   }
110 
111   bool runOnFunction(Function &F) override;
112 
113 private:
114   // This transformation requires dominator postdominator info
115   void getAnalysisUsage(AnalysisUsage &AU) const override {
116     AU.setPreservesCFG();
117     AU.addRequired<TargetLibraryInfoWrapperPass>();
118     AU.addRequired<AAResultsWrapperPass>();
119     AU.addPreserved<GlobalsAAWrapperPass>();
120     AU.addPreserved<MemoryDependenceWrapperPass>();
121   }
122 
123   // Helper routines
124 
125   ///
126   /// \brief Remove instruction from parent and update memory dependence
127   /// analysis.
128   ///
129   void removeInstruction(Instruction *Inst);
130   BasicBlock *getDiamondTail(BasicBlock *BB);
131   bool isDiamondHead(BasicBlock *BB);
132   // Routines for hoisting loads
133   bool isLoadHoistBarrierInRange(const Instruction& Start,
134                                  const Instruction& End,
135                                  LoadInst* LI);
136   LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
137   void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
138                         Instruction *ElseInst);
139   bool isSafeToHoist(Instruction *I) const;
140   bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
141   bool mergeLoads(BasicBlock *BB);
142   // Routines for sinking stores
143   StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
144   PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
145   bool isStoreSinkBarrierInRange(const Instruction &Start,
146                                  const Instruction &End, MemoryLocation Loc);
147   bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst);
148   bool mergeStores(BasicBlock *BB);
149   // The mergeLoad/Store algorithms could have Size0 * Size1 complexity,
150   // where Size0 and Size1 are the #instructions on the two sides of
151   // the diamond. The constant chosen here is arbitrary. Compiler Time
152   // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl.
153   const int MagicCompileTimeControl;
154 };
155 
156 char MergedLoadStoreMotion::ID = 0;
157 } // anonymous namespace
158 
159 ///
160 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
161 ///
162 FunctionPass *llvm::createMergedLoadStoreMotionPass() {
163   return new MergedLoadStoreMotion();
164 }
165 
166 INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
167                       "MergedLoadStoreMotion", false, false)
168 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
169 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
170 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
171 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
172 INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
173                     "MergedLoadStoreMotion", false, false)
174 
175 ///
176 /// \brief Remove instruction from parent and update memory dependence analysis.
177 ///
178 void MergedLoadStoreMotion::removeInstruction(Instruction *Inst) {
179   // Notify the memory dependence analysis.
180   if (MD) {
181     MD->removeInstruction(Inst);
182     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
183       MD->invalidateCachedPointerInfo(LI->getPointerOperand());
184     if (Inst->getType()->getScalarType()->isPointerTy()) {
185       MD->invalidateCachedPointerInfo(Inst);
186     }
187   }
188   Inst->eraseFromParent();
189 }
190 
191 ///
192 /// \brief Return tail block of a diamond.
193 ///
194 BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) {
195   assert(isDiamondHead(BB) && "Basic block is not head of a diamond");
196   BranchInst *BI = (BranchInst *)(BB->getTerminator());
197   BasicBlock *Succ0 = BI->getSuccessor(0);
198   BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
199   return Tail;
200 }
201 
202 ///
203 /// \brief True when BB is the head of a diamond (hammock)
204 ///
205 bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
206   if (!BB)
207     return false;
208   if (!isa<BranchInst>(BB->getTerminator()))
209     return false;
210   if (BB->getTerminator()->getNumSuccessors() != 2)
211     return false;
212 
213   BranchInst *BI = (BranchInst *)(BB->getTerminator());
214   BasicBlock *Succ0 = BI->getSuccessor(0);
215   BasicBlock *Succ1 = BI->getSuccessor(1);
216 
217   if (!Succ0->getSinglePredecessor() ||
218       Succ0->getTerminator()->getNumSuccessors() != 1)
219     return false;
220   if (!Succ1->getSinglePredecessor() ||
221       Succ1->getTerminator()->getNumSuccessors() != 1)
222     return false;
223 
224   BasicBlock *Tail = Succ0->getTerminator()->getSuccessor(0);
225   // Ignore triangles.
226   if (Succ1->getTerminator()->getSuccessor(0) != Tail)
227     return false;
228   return true;
229 }
230 
231 ///
232 /// \brief True when instruction is a hoist barrier for a load
233 ///
234 /// Whenever an instruction could possibly modify the value
235 /// being loaded or protect against the load from happening
236 /// it is considered a hoist barrier.
237 ///
238 bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start,
239                                                       const Instruction& End,
240                                                       LoadInst* LI) {
241   MemoryLocation Loc = MemoryLocation::get(LI);
242   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
243 }
244 
245 ///
246 /// \brief Decide if a load can be hoisted
247 ///
248 /// When there is a load in \p BB to the same address as \p LI
249 /// and it can be hoisted from \p BB, return that load.
250 /// Otherwise return Null.
251 ///
252 LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
253                                                    LoadInst *Load0) {
254 
255   for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
256        ++BBI) {
257     Instruction *Inst = &*BBI;
258 
259     // Only merge and hoist loads when their result in used only in BB
260     if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
261       continue;
262 
263     LoadInst *Load1 = dyn_cast<LoadInst>(Inst);
264     BasicBlock *BB0 = Load0->getParent();
265 
266     MemoryLocation Loc0 = MemoryLocation::get(Load0);
267     MemoryLocation Loc1 = MemoryLocation::get(Load1);
268     if (AA->isMustAlias(Loc0, Loc1) && Load0->isSameOperationAs(Load1) &&
269         !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1) &&
270         !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0)) {
271       return Load1;
272     }
273   }
274   return nullptr;
275 }
276 
277 ///
278 /// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
279 /// \p BB
280 ///
281 /// BB is the head of a diamond
282 ///
283 void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
284                                              Instruction *HoistCand,
285                                              Instruction *ElseInst) {
286   DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
287         dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
288         dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
289   // Hoist the instruction.
290   assert(HoistCand->getParent() != BB);
291 
292   // Intersect optional metadata.
293   HoistCand->intersectOptionalDataWith(ElseInst);
294   HoistCand->dropUnknownNonDebugMetadata();
295 
296   // Prepend point for instruction insert
297   Instruction *HoistPt = BB->getTerminator();
298 
299   // Merged instruction
300   Instruction *HoistedInst = HoistCand->clone();
301 
302   // Hoist instruction.
303   HoistedInst->insertBefore(HoistPt);
304 
305   HoistCand->replaceAllUsesWith(HoistedInst);
306   removeInstruction(HoistCand);
307   // Replace the else block instruction.
308   ElseInst->replaceAllUsesWith(HoistedInst);
309   removeInstruction(ElseInst);
310 }
311 
312 ///
313 /// \brief Return true if no operand of \p I is defined in I's parent block
314 ///
315 bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
316   BasicBlock *Parent = I->getParent();
317   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
318     Instruction *Instr = dyn_cast<Instruction>(I->getOperand(i));
319     if (Instr && Instr->getParent() == Parent)
320       return false;
321   }
322   return true;
323 }
324 
325 ///
326 /// \brief Merge two equivalent loads and GEPs and hoist into diamond head
327 ///
328 bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
329                                       LoadInst *L1) {
330   // Only one definition?
331   Instruction *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
332   Instruction *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
333   if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
334       A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
335       A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
336       isa<GetElementPtrInst>(A0)) {
337     DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
338           dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
339           dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
340     hoistInstruction(BB, A0, A1);
341     hoistInstruction(BB, L0, L1);
342     return true;
343   } else
344     return false;
345 }
346 
347 ///
348 /// \brief Try to hoist two loads to same address into diamond header
349 ///
350 /// Starting from a diamond head block, iterate over the instructions in one
351 /// successor block and try to match a load in the second successor.
352 ///
353 bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
354   bool MergedLoads = false;
355   assert(isDiamondHead(BB));
356   BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
357   BasicBlock *Succ0 = BI->getSuccessor(0);
358   BasicBlock *Succ1 = BI->getSuccessor(1);
359   // #Instructions in Succ1 for Compile Time Control
360   int Size1 = Succ1->size();
361   int NLoads = 0;
362   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
363        BBI != BBE;) {
364     Instruction *I = &*BBI;
365     ++BBI;
366 
367     // Only move non-simple (atomic, volatile) loads.
368     LoadInst *L0 = dyn_cast<LoadInst>(I);
369     if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
370       continue;
371 
372     ++NLoads;
373     if (NLoads * Size1 >= MagicCompileTimeControl)
374       break;
375     if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
376       bool Res = hoistLoad(BB, L0, L1);
377       MergedLoads |= Res;
378       // Don't attempt to hoist above loads that had not been hoisted.
379       if (!Res)
380         break;
381     }
382   }
383   return MergedLoads;
384 }
385 
386 ///
387 /// \brief True when instruction is a sink barrier for a store
388 /// located in Loc
389 ///
390 /// Whenever an instruction could possibly read or modify the
391 /// value being stored or protect against the store from
392 /// happening it is considered a sink barrier.
393 ///
394 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
395                                                       const Instruction &End,
396                                                       MemoryLocation Loc) {
397   return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
398 }
399 
400 ///
401 /// \brief Check if \p BB contains a store to the same address as \p SI
402 ///
403 /// \return The store in \p  when it is safe to sink. Otherwise return Null.
404 ///
405 StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
406                                                    StoreInst *Store0) {
407   DEBUG(dbgs() << "can Sink? : "; Store0->dump(); dbgs() << "\n");
408   BasicBlock *BB0 = Store0->getParent();
409   for (BasicBlock::reverse_iterator RBI = BB1->rbegin(), RBE = BB1->rend();
410        RBI != RBE; ++RBI) {
411     Instruction *Inst = &*RBI;
412 
413     if (!isa<StoreInst>(Inst))
414        continue;
415 
416     StoreInst *Store1 = cast<StoreInst>(Inst);
417 
418     MemoryLocation Loc0 = MemoryLocation::get(Store0);
419     MemoryLocation Loc1 = MemoryLocation::get(Store1);
420     if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
421       !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store1))),
422                                  BB1->back(), Loc1) &&
423       !isStoreSinkBarrierInRange(*(std::next(BasicBlock::iterator(Store0))),
424                                  BB0->back(), Loc0)) {
425       return Store1;
426     }
427   }
428   return nullptr;
429 }
430 
431 ///
432 /// \brief Create a PHI node in BB for the operands of S0 and S1
433 ///
434 PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
435                                               StoreInst *S1) {
436   // Create a phi if the values mismatch.
437   PHINode *NewPN = nullptr;
438   Value *Opd1 = S0->getValueOperand();
439   Value *Opd2 = S1->getValueOperand();
440   if (Opd1 != Opd2) {
441     NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
442                             &BB->front());
443     NewPN->addIncoming(Opd1, S0->getParent());
444     NewPN->addIncoming(Opd2, S1->getParent());
445     if (MD && NewPN->getType()->getScalarType()->isPointerTy())
446       MD->invalidateCachedPointerInfo(NewPN);
447   }
448   return NewPN;
449 }
450 
451 ///
452 /// \brief Merge two stores to same address and sink into \p BB
453 ///
454 /// Also sinks GEP instruction computing the store address
455 ///
456 bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
457                                       StoreInst *S1) {
458   // Only one definition?
459   Instruction *A0 = dyn_cast<Instruction>(S0->getPointerOperand());
460   Instruction *A1 = dyn_cast<Instruction>(S1->getPointerOperand());
461   if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() &&
462       (A0->getParent() == S0->getParent()) && A1->hasOneUse() &&
463       (A1->getParent() == S1->getParent()) && isa<GetElementPtrInst>(A0)) {
464     DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump();
465           dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n";
466           dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n");
467     // Hoist the instruction.
468     BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
469     // Intersect optional metadata.
470     S0->intersectOptionalDataWith(S1);
471     S0->dropUnknownNonDebugMetadata();
472 
473     // Create the new store to be inserted at the join point.
474     StoreInst *SNew = (StoreInst *)(S0->clone());
475     Instruction *ANew = A0->clone();
476     SNew->insertBefore(&*InsertPt);
477     ANew->insertBefore(SNew);
478 
479     assert(S0->getParent() == A0->getParent());
480     assert(S1->getParent() == A1->getParent());
481 
482     PHINode *NewPN = getPHIOperand(BB, S0, S1);
483     // New PHI operand? Use it.
484     if (NewPN)
485       SNew->setOperand(0, NewPN);
486     removeInstruction(S0);
487     removeInstruction(S1);
488     A0->replaceAllUsesWith(ANew);
489     removeInstruction(A0);
490     A1->replaceAllUsesWith(ANew);
491     removeInstruction(A1);
492     return true;
493   }
494   return false;
495 }
496 
497 ///
498 /// \brief True when two stores are equivalent and can sink into the footer
499 ///
500 /// Starting from a diamond tail block, iterate over the instructions in one
501 /// predecessor block and try to match a store in the second predecessor.
502 ///
503 bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
504 
505   bool MergedStores = false;
506   assert(T && "Footer of a diamond cannot be empty");
507 
508   pred_iterator PI = pred_begin(T), E = pred_end(T);
509   assert(PI != E);
510   BasicBlock *Pred0 = *PI;
511   ++PI;
512   BasicBlock *Pred1 = *PI;
513   ++PI;
514   // tail block  of a diamond/hammock?
515   if (Pred0 == Pred1)
516     return false; // No.
517   if (PI != E)
518     return false; // No. More than 2 predecessors.
519 
520   // #Instructions in Succ1 for Compile Time Control
521   int Size1 = Pred1->size();
522   int NStores = 0;
523 
524   for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend();
525        RBI != RBE;) {
526 
527     Instruction *I = &*RBI;
528     ++RBI;
529 
530     // Sink move non-simple (atomic, volatile) stores
531     if (!isa<StoreInst>(I))
532       continue;
533     StoreInst *S0 = (StoreInst *)I;
534     if (!S0->isSimple())
535       continue;
536 
537     ++NStores;
538     if (NStores * Size1 >= MagicCompileTimeControl)
539       break;
540     if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) {
541       bool Res = sinkStore(T, S0, S1);
542       MergedStores |= Res;
543       // Don't attempt to sink below stores that had to stick around
544       // But after removal of a store and some of its feeding
545       // instruction search again from the beginning since the iterator
546       // is likely stale at this point.
547       if (!Res)
548         break;
549       else {
550         RBI = Pred0->rbegin();
551         RBE = Pred0->rend();
552         DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump());
553       }
554     }
555   }
556   return MergedStores;
557 }
558 
559 ///
560 /// \brief Run the transformation for each function
561 ///
562 bool MergedLoadStoreMotion::runOnFunction(Function &F) {
563   if (skipFunction(F))
564     return false;
565 
566   auto *MDWP = getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
567   MD = MDWP ? &MDWP->getMemDep() : nullptr;
568   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
569 
570   bool Changed = false;
571   DEBUG(dbgs() << "Instruction Merger\n");
572 
573   // Merge unconditional branches, allowing PRE to catch more
574   // optimization opportunities.
575   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
576     BasicBlock *BB = &*FI++;
577 
578     // Hoist equivalent loads and sink stores
579     // outside diamonds when possible
580     if (isDiamondHead(BB)) {
581       Changed |= mergeLoads(BB);
582       Changed |= mergeStores(getDiamondTail(BB));
583     }
584   }
585   return Changed;
586 }
587