1 //===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements a CFG stacking pass. 12 /// 13 /// This pass reorders the blocks in a function to put them into a reverse 14 /// post-order [0], with special care to keep the order as similar as possible 15 /// to the original order, and to keep loops contiguous even in the case of 16 /// split backedges. 17 /// 18 /// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since 19 /// scope boundaries serve as the labels for WebAssembly's control transfers. 20 /// 21 /// This is sufficient to convert arbitrary CFGs into a form that works on 22 /// WebAssembly, provided that all loops are single-entry. 23 /// 24 /// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings 25 /// 26 //===----------------------------------------------------------------------===// 27 28 #include "WebAssembly.h" 29 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 30 #include "WebAssemblySubtarget.h" 31 #include "llvm/ADT/SCCIterator.h" 32 #include "llvm/ADT/SetVector.h" 33 #include "llvm/CodeGen/MachineDominators.h" 34 #include "llvm/CodeGen/MachineFunction.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineLoopInfo.h" 37 #include "llvm/CodeGen/Passes.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 using namespace llvm; 41 42 #define DEBUG_TYPE "wasm-cfg-stackify" 43 44 namespace { 45 class WebAssemblyCFGStackify final : public MachineFunctionPass { 46 const char *getPassName() const override { 47 return "WebAssembly CFG Stackify"; 48 } 49 50 void getAnalysisUsage(AnalysisUsage &AU) const override { 51 AU.setPreservesCFG(); 52 AU.addRequired<MachineDominatorTree>(); 53 AU.addPreserved<MachineDominatorTree>(); 54 AU.addRequired<MachineLoopInfo>(); 55 AU.addPreserved<MachineLoopInfo>(); 56 MachineFunctionPass::getAnalysisUsage(AU); 57 } 58 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61 public: 62 static char ID; // Pass identification, replacement for typeid 63 WebAssemblyCFGStackify() : MachineFunctionPass(ID) {} 64 }; 65 } // end anonymous namespace 66 67 char WebAssemblyCFGStackify::ID = 0; 68 FunctionPass *llvm::createWebAssemblyCFGStackify() { 69 return new WebAssemblyCFGStackify(); 70 } 71 72 static void EliminateMultipleEntryLoops(MachineFunction &MF, 73 const MachineLoopInfo &MLI) { 74 SmallPtrSet<MachineBasicBlock *, 8> InSet; 75 for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF); 76 I != E; ++I) { 77 const std::vector<MachineBasicBlock *> &CurrentSCC = *I; 78 79 // Skip trivial SCCs. 80 if (CurrentSCC.size() == 1) 81 continue; 82 83 InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); 84 MachineBasicBlock *Header = nullptr; 85 for (MachineBasicBlock *MBB : CurrentSCC) { 86 for (MachineBasicBlock *Pred : MBB->predecessors()) { 87 if (InSet.count(Pred)) 88 continue; 89 if (!Header) { 90 Header = MBB; 91 break; 92 } 93 // TODO: Implement multiple-entry loops. 94 report_fatal_error("multiple-entry loops are not supported yet"); 95 } 96 } 97 assert(MLI.isLoopHeader(Header)); 98 99 InSet.clear(); 100 } 101 } 102 103 namespace { 104 /// Post-order traversal stack entry. 105 struct POStackEntry { 106 MachineBasicBlock *MBB; 107 SmallVector<MachineBasicBlock *, 0> Succs; 108 109 POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, 110 const MachineLoopInfo &MLI); 111 }; 112 } // end anonymous namespace 113 114 static bool LoopContains(const MachineLoop *Loop, 115 const MachineBasicBlock *MBB) { 116 return Loop ? Loop->contains(MBB) : true; 117 } 118 119 POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, 120 const MachineLoopInfo &MLI) 121 : MBB(MBB), Succs(MBB->successors()) { 122 // RPO is not a unique form, since at every basic block with multiple 123 // successors, the DFS has to pick which order to visit the successors in. 124 // Sort them strategically (see below). 125 MachineLoop *Loop = MLI.getLoopFor(MBB); 126 MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); 127 MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; 128 std::stable_sort( 129 Succs.begin(), Succs.end(), 130 [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { 131 if (A == B) 132 return false; 133 134 // Keep loops contiguous by preferring the block that's in the same 135 // loop. 136 bool LoopContainsA = LoopContains(Loop, A); 137 bool LoopContainsB = LoopContains(Loop, B); 138 if (LoopContainsA && !LoopContainsB) 139 return true; 140 if (!LoopContainsA && LoopContainsB) 141 return false; 142 143 // Minimize perturbation by preferring the block which is the immediate 144 // layout successor. 145 if (A == LayoutSucc) 146 return true; 147 if (B == LayoutSucc) 148 return false; 149 150 // TODO: More sophisticated orderings may be profitable here. 151 152 return false; 153 }); 154 } 155 156 /// Return the "bottom" block of a loop. This differs from 157 /// MachineLoop::getBottomBlock in that it works even if the loop is 158 /// discontiguous. 159 static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { 160 MachineBasicBlock *Bottom = Loop->getHeader(); 161 for (MachineBasicBlock *MBB : Loop->blocks()) 162 if (MBB->getNumber() > Bottom->getNumber()) 163 Bottom = MBB; 164 return Bottom; 165 } 166 167 /// Sort the blocks in RPO, taking special care to make sure that loops are 168 /// contiguous even in the case of split backedges. 169 /// 170 /// TODO: Determine whether RPO is actually worthwhile, or whether we should 171 /// move to just a stable-topological-sort-based approach that would preserve 172 /// more of the original order. 173 static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { 174 // Note that we do our own RPO rather than using 175 // "llvm/ADT/PostOrderIterator.h" because we want control over the order that 176 // successors are visited in (see above). Also, we can sort the blocks in the 177 // MachineFunction as we go. 178 SmallPtrSet<MachineBasicBlock *, 16> Visited; 179 SmallVector<POStackEntry, 16> Stack; 180 181 MachineBasicBlock *EntryBlock = &*MF.begin(); 182 Visited.insert(EntryBlock); 183 Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); 184 185 for (;;) { 186 POStackEntry &Entry = Stack.back(); 187 SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; 188 if (!Succs.empty()) { 189 MachineBasicBlock *Succ = Succs.pop_back_val(); 190 if (Visited.insert(Succ).second) 191 Stack.push_back(POStackEntry(Succ, MF, MLI)); 192 continue; 193 } 194 195 // Put the block in its position in the MachineFunction. 196 MachineBasicBlock &MBB = *Entry.MBB; 197 MBB.moveBefore(&*MF.begin()); 198 199 // Branch instructions may utilize a fallthrough, so update them if a 200 // fallthrough has been added or removed. 201 if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && 202 !MBB.back().isBarrier()) 203 report_fatal_error( 204 "Non-branch terminator with fallthrough cannot yet be rewritten"); 205 if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) 206 MBB.updateTerminator(); 207 208 Stack.pop_back(); 209 if (Stack.empty()) 210 break; 211 } 212 213 // Now that we've sorted the blocks in RPO, renumber them. 214 MF.RenumberBlocks(); 215 216 #ifndef NDEBUG 217 SmallSetVector<MachineLoop *, 8> OnStack; 218 219 // Insert a sentinel representing the degenerate loop that starts at the 220 // function entry block and includes the entire function as a "loop" that 221 // executes once. 222 OnStack.insert(nullptr); 223 224 for (auto &MBB : MF) { 225 assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative."); 226 227 MachineLoop *Loop = MLI.getLoopFor(&MBB); 228 if (Loop && &MBB == Loop->getHeader()) { 229 // Loop header. The loop predecessor should be sorted above, and the other 230 // predecessors should be backedges below. 231 for (auto Pred : MBB.predecessors()) 232 assert( 233 (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) && 234 "Loop header predecessors must be loop predecessors or backedges"); 235 assert(OnStack.insert(Loop) && "Loops should be declared at most once."); 236 } else { 237 // Not a loop header. All predecessors should be sorted above. 238 for (auto Pred : MBB.predecessors()) 239 assert(Pred->getNumber() < MBB.getNumber() && 240 "Non-loop-header predecessors should be topologically sorted"); 241 assert(OnStack.count(MLI.getLoopFor(&MBB)) && 242 "Blocks must be nested in their loops"); 243 } 244 while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back())) 245 OnStack.pop_back(); 246 } 247 assert(OnStack.pop_back_val() == nullptr && 248 "The function entry block shouldn't actually be a loop header"); 249 assert(OnStack.empty() && 250 "Control flow stack pushes and pops should be balanced."); 251 #endif 252 } 253 254 /// Test whether Pred has any terminators explicitly branching to MBB, as 255 /// opposed to falling through. Note that it's possible (eg. in unoptimized 256 /// code) for a branch instruction to both branch to a block and fallthrough 257 /// to it, so we check the actual branch operands to see if there are any 258 /// explicit mentions. 259 static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, 260 MachineBasicBlock *MBB) { 261 for (MachineInstr &MI : Pred->terminators()) 262 for (MachineOperand &MO : MI.explicit_operands()) 263 if (MO.isMBB() && MO.getMBB() == MBB) 264 return true; 265 return false; 266 } 267 268 /// Insert a BLOCK marker for branches to MBB (if needed). 269 static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, 270 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 271 const WebAssemblyInstrInfo &TII, 272 const MachineLoopInfo &MLI, 273 MachineDominatorTree &MDT) { 274 // First compute the nearest common dominator of all forward non-fallthrough 275 // predecessors so that we minimize the time that the BLOCK is on the stack, 276 // which reduces overall stack height. 277 MachineBasicBlock *Header = nullptr; 278 bool IsBranchedTo = false; 279 int MBBNumber = MBB.getNumber(); 280 for (MachineBasicBlock *Pred : MBB.predecessors()) 281 if (Pred->getNumber() < MBBNumber) { 282 Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred; 283 if (ExplicitlyBranchesTo(Pred, &MBB)) 284 IsBranchedTo = true; 285 } 286 if (!Header) 287 return; 288 if (!IsBranchedTo) 289 return; 290 291 assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors"); 292 MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB)); 293 294 // If the nearest common dominator is inside a more deeply nested context, 295 // walk out to the nearest scope which isn't more deeply nested. 296 for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) { 297 if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) { 298 if (ScopeTop->getNumber() > Header->getNumber()) { 299 // Skip over an intervening scope. 300 I = next(MachineFunction::iterator(ScopeTop)); 301 } else { 302 // We found a scope level at an appropriate depth. 303 Header = ScopeTop; 304 break; 305 } 306 } 307 } 308 309 // If there's a loop which ends just before MBB which contains Header, we can 310 // reuse its label instead of inserting a new BLOCK. 311 for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred); 312 Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop()) 313 if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header)) 314 return; 315 316 // Decide where in Header to put the BLOCK. 317 MachineBasicBlock::iterator InsertPos; 318 MachineLoop *HeaderLoop = MLI.getLoopFor(Header); 319 if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { 320 // Header is the header of a loop that does not lexically contain MBB, so 321 // the BLOCK needs to be above the LOOP. 322 InsertPos = Header->begin(); 323 } else { 324 // Otherwise, insert the BLOCK as late in Header as we can, but before the 325 // beginning of the local expression tree and any nested BLOCKs. 326 InsertPos = Header->getFirstTerminator(); 327 while (InsertPos != Header->begin() && 328 prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && 329 prev(InsertPos)->getOpcode() != WebAssembly::LOOP) 330 --InsertPos; 331 } 332 333 // Add the BLOCK. 334 BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK)) 335 .addMBB(&MBB); 336 337 // Track the farthest-spanning scope that ends at this point. 338 int Number = MBB.getNumber(); 339 if (!ScopeTops[Number] || 340 ScopeTops[Number]->getNumber() > Header->getNumber()) 341 ScopeTops[Number] = Header; 342 } 343 344 /// Insert a LOOP marker for a loop starting at MBB (if it's a loop header). 345 static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF, 346 SmallVectorImpl<MachineBasicBlock *> &ScopeTops, 347 const WebAssemblyInstrInfo &TII, 348 const MachineLoopInfo &MLI) { 349 MachineLoop *Loop = MLI.getLoopFor(&MBB); 350 if (!Loop || Loop->getHeader() != &MBB) 351 return; 352 353 // The operand of a LOOP is the first block after the loop. If the loop is the 354 // bottom of the function, insert a dummy block at the end. 355 MachineBasicBlock *Bottom = LoopBottom(Loop); 356 auto Iter = next(MachineFunction::iterator(Bottom)); 357 if (Iter == MF.end()) { 358 MachineBasicBlock *Label = MF.CreateMachineBasicBlock(); 359 // Give it a fake predecessor so that AsmPrinter prints its label. 360 Label->addSuccessor(Label); 361 MF.push_back(Label); 362 Iter = next(MachineFunction::iterator(Bottom)); 363 } 364 MachineBasicBlock *AfterLoop = &*Iter; 365 BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP)) 366 .addMBB(AfterLoop); 367 368 // Emit a special no-op telling the asm printer that we need a label to close 369 // the loop scope, even though the destination is only reachable by 370 // fallthrough. 371 if (!Bottom->back().isBarrier()) 372 BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END)); 373 374 assert((!ScopeTops[AfterLoop->getNumber()] || 375 ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && 376 "With RPO we should visit the outer-most loop for a block first."); 377 if (!ScopeTops[AfterLoop->getNumber()]) 378 ScopeTops[AfterLoop->getNumber()] = &MBB; 379 } 380 381 /// Insert LOOP and BLOCK markers at appropriate places. 382 static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, 383 const WebAssemblyInstrInfo &TII, 384 MachineDominatorTree &MDT) { 385 // For each block whose label represents the end of a scope, record the block 386 // which holds the beginning of the scope. This will allow us to quickly skip 387 // over scoped regions when walking blocks. We allocate one more than the 388 // number of blocks in the function to accommodate for the possible fake block 389 // we may insert at the end. 390 SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1); 391 392 for (auto &MBB : MF) { 393 // Place the LOOP for MBB if MBB is the header of a loop. 394 PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI); 395 396 // Place the BLOCK for MBB if MBB is branched to from above. 397 PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); 398 } 399 } 400 401 #ifndef NDEBUG 402 static bool 403 IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack, 404 const MachineBasicBlock *MBB) { 405 for (const auto &Pair : Stack) 406 if (Pair.first == MBB) 407 return true; 408 return false; 409 } 410 #endif 411 412 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { 413 DEBUG(dbgs() << "********** CFG Stackifying **********\n" 414 "********** Function: " 415 << MF.getName() << '\n'); 416 417 const auto &MLI = getAnalysis<MachineLoopInfo>(); 418 auto &MDT = getAnalysis<MachineDominatorTree>(); 419 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); 420 421 // RPO sorting needs all loops to be single-entry. 422 EliminateMultipleEntryLoops(MF, MLI); 423 424 // Sort the blocks in RPO, with contiguous loops. 425 SortBlocks(MF, MLI); 426 427 // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. 428 PlaceMarkers(MF, MLI, TII, MDT); 429 430 #ifndef NDEBUG 431 // Verify that block and loop beginnings and endings are in LIFO order, and 432 // that all references to blocks are to blocks on the stack at the point of 433 // the reference. 434 SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack; 435 for (auto &MBB : MF) { 436 while (!Stack.empty() && Stack.back().first == &MBB) 437 if (Stack.back().second) { 438 assert(Stack.size() >= 2); 439 Stack.pop_back(); 440 Stack.pop_back(); 441 } else { 442 assert(Stack.size() >= 1); 443 Stack.pop_back(); 444 } 445 for (auto &MI : MBB) 446 switch (MI.getOpcode()) { 447 case WebAssembly::LOOP: 448 Stack.push_back(std::make_pair(&MBB, false)); 449 Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true)); 450 break; 451 case WebAssembly::BLOCK: 452 Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false)); 453 break; 454 default: 455 // Verify that all referenced blocks are in scope. A reference to a 456 // block with a negative number is invalid, but can happen with inline 457 // asm, so we shouldn't assert on it, but instead let CodeGen properly 458 // fail on it. 459 for (const MachineOperand &MO : MI.explicit_operands()) 460 if (MO.isMBB() && MO.getMBB()->getNumber() >= 0) 461 assert(IsOnStack(Stack, MO.getMBB())); 462 break; 463 } 464 } 465 assert(Stack.empty()); 466 #endif 467 468 return true; 469 } 470