1 //===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass is responsible for finalizing the functions frame layout, saving 10 // callee saved registers, and for emitting prolog & epilog code for the 11 // function. 12 // 13 // This pass must be run after register allocation. After this pass is 14 // executed, it is illegal to construct MO_FrameIndex operands. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineDominators.h" 29 #include "llvm/CodeGen/MachineFrameInfo.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineLoopInfo.h" 35 #include "llvm/CodeGen/MachineModuleInfo.h" 36 #include "llvm/CodeGen/MachineOperand.h" 37 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/RegisterScavenging.h" 40 #include "llvm/CodeGen/TargetFrameLowering.h" 41 #include "llvm/CodeGen/TargetInstrInfo.h" 42 #include "llvm/CodeGen/TargetOpcodes.h" 43 #include "llvm/CodeGen/TargetRegisterInfo.h" 44 #include "llvm/CodeGen/TargetSubtargetInfo.h" 45 #include "llvm/CodeGen/WinEHFuncInfo.h" 46 #include "llvm/IR/Attributes.h" 47 #include "llvm/IR/CallingConv.h" 48 #include "llvm/IR/DebugInfoMetadata.h" 49 #include "llvm/IR/DiagnosticInfo.h" 50 #include "llvm/IR/Function.h" 51 #include "llvm/IR/InlineAsm.h" 52 #include "llvm/IR/LLVMContext.h" 53 #include "llvm/InitializePasses.h" 54 #include "llvm/MC/MCRegisterInfo.h" 55 #include "llvm/Pass.h" 56 #include "llvm/Support/CodeGen.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/ErrorHandling.h" 59 #include "llvm/Support/FormatVariadic.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include "llvm/Target/TargetMachine.h" 62 #include "llvm/Target/TargetOptions.h" 63 #include <algorithm> 64 #include <cassert> 65 #include <cstdint> 66 #include <functional> 67 #include <limits> 68 #include <utility> 69 #include <vector> 70 71 using namespace llvm; 72 73 #define DEBUG_TYPE "prologepilog" 74 75 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 76 77 STATISTIC(NumLeafFuncWithSpills, "Number of leaf functions with CSRs"); 78 STATISTIC(NumFuncSeen, "Number of functions seen in PEI"); 79 80 81 namespace { 82 83 class PEI : public MachineFunctionPass { 84 public: 85 static char ID; 86 87 PEI() : MachineFunctionPass(ID) { 88 initializePEIPass(*PassRegistry::getPassRegistry()); 89 } 90 91 void getAnalysisUsage(AnalysisUsage &AU) const override; 92 93 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract 94 /// frame indexes with appropriate references. 95 bool runOnMachineFunction(MachineFunction &MF) override; 96 97 private: 98 RegScavenger *RS = nullptr; 99 100 // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved 101 // stack frame indexes. 102 unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max(); 103 unsigned MaxCSFrameIndex = 0; 104 105 // Save and Restore blocks of the current function. Typically there is a 106 // single save block, unless Windows EH funclets are involved. 107 MBBVector SaveBlocks; 108 MBBVector RestoreBlocks; 109 110 // Flag to control whether to use the register scavenger to resolve 111 // frame index materialization registers. Set according to 112 // TRI->requiresFrameIndexScavenging() for the current function. 113 bool FrameIndexVirtualScavenging = false; 114 115 // Flag to control whether the scavenger should be passed even though 116 // FrameIndexVirtualScavenging is used. 117 bool FrameIndexEliminationScavenging = false; 118 119 // Emit remarks. 120 MachineOptimizationRemarkEmitter *ORE = nullptr; 121 122 void calculateCallFrameInfo(MachineFunction &MF); 123 void calculateSaveRestoreBlocks(MachineFunction &MF); 124 void spillCalleeSavedRegs(MachineFunction &MF); 125 126 void calculateFrameObjectOffsets(MachineFunction &MF); 127 void replaceFrameIndices(MachineFunction &MF); 128 void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, 129 int &SPAdj); 130 // Frame indices in debug values are encoded in a target independent 131 // way with simply the frame index and offset rather than any 132 // target-specific addressing mode. 133 bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, 134 unsigned OpIdx, int SPAdj = 0); 135 // Does same as replaceFrameIndices but using the backward MIR walk and 136 // backward register scavenger walk. 137 void replaceFrameIndicesBackward(MachineFunction &MF); 138 void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF, 139 int &SPAdj); 140 141 void insertPrologEpilogCode(MachineFunction &MF); 142 void insertZeroCallUsedRegs(MachineFunction &MF); 143 }; 144 145 } // end anonymous namespace 146 147 char PEI::ID = 0; 148 149 char &llvm::PrologEpilogCodeInserterID = PEI::ID; 150 151 INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, 152 false) 153 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 154 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 155 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) 156 INITIALIZE_PASS_END(PEI, DEBUG_TYPE, 157 "Prologue/Epilogue Insertion & Frame Finalization", false, 158 false) 159 160 MachineFunctionPass *llvm::createPrologEpilogInserterPass() { 161 return new PEI(); 162 } 163 164 STATISTIC(NumBytesStackSpace, 165 "Number of bytes used for stack in all functions"); 166 167 void PEI::getAnalysisUsage(AnalysisUsage &AU) const { 168 AU.setPreservesCFG(); 169 AU.addPreserved<MachineLoopInfo>(); 170 AU.addPreserved<MachineDominatorTree>(); 171 AU.addRequired<MachineOptimizationRemarkEmitterPass>(); 172 MachineFunctionPass::getAnalysisUsage(AU); 173 } 174 175 /// StackObjSet - A set of stack object indexes 176 using StackObjSet = SmallSetVector<int, 8>; 177 178 using SavedDbgValuesMap = 179 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>; 180 181 /// Stash DBG_VALUEs that describe parameters and which are placed at the start 182 /// of the block. Later on, after the prologue code has been emitted, the 183 /// stashed DBG_VALUEs will be reinserted at the start of the block. 184 static void stashEntryDbgValues(MachineBasicBlock &MBB, 185 SavedDbgValuesMap &EntryDbgValues) { 186 SmallVector<const MachineInstr *, 4> FrameIndexValues; 187 188 for (auto &MI : MBB) { 189 if (!MI.isDebugInstr()) 190 break; 191 if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter()) 192 continue; 193 if (any_of(MI.debug_operands(), 194 [](const MachineOperand &MO) { return MO.isFI(); })) { 195 // We can only emit valid locations for frame indices after the frame 196 // setup, so do not stash away them. 197 FrameIndexValues.push_back(&MI); 198 continue; 199 } 200 const DILocalVariable *Var = MI.getDebugVariable(); 201 const DIExpression *Expr = MI.getDebugExpression(); 202 auto Overlaps = [Var, Expr](const MachineInstr *DV) { 203 return Var == DV->getDebugVariable() && 204 Expr->fragmentsOverlap(DV->getDebugExpression()); 205 }; 206 // See if the debug value overlaps with any preceding debug value that will 207 // not be stashed. If that is the case, then we can't stash this value, as 208 // we would then reorder the values at reinsertion. 209 if (llvm::none_of(FrameIndexValues, Overlaps)) 210 EntryDbgValues[&MBB].push_back(&MI); 211 } 212 213 // Remove stashed debug values from the block. 214 if (EntryDbgValues.count(&MBB)) 215 for (auto *MI : EntryDbgValues[&MBB]) 216 MI->removeFromParent(); 217 } 218 219 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract 220 /// frame indexes with appropriate references. 221 bool PEI::runOnMachineFunction(MachineFunction &MF) { 222 NumFuncSeen++; 223 const Function &F = MF.getFunction(); 224 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 225 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 226 227 RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr; 228 FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); 229 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); 230 231 // Calculate the MaxCallFrameSize value for the function's frame 232 // information. Also eliminates call frame pseudo instructions. 233 calculateCallFrameInfo(MF); 234 235 // Determine placement of CSR spill/restore code and prolog/epilog code: 236 // place all spills in the entry block, all restores in return blocks. 237 calculateSaveRestoreBlocks(MF); 238 239 // Stash away DBG_VALUEs that should not be moved by insertion of prolog code. 240 SavedDbgValuesMap EntryDbgValues; 241 for (MachineBasicBlock *SaveBlock : SaveBlocks) 242 stashEntryDbgValues(*SaveBlock, EntryDbgValues); 243 244 // Handle CSR spilling and restoring, for targets that need it. 245 if (MF.getTarget().usesPhysRegsForValues()) 246 spillCalleeSavedRegs(MF); 247 248 // Allow the target machine to make final modifications to the function 249 // before the frame layout is finalized. 250 TFI->processFunctionBeforeFrameFinalized(MF, RS); 251 252 // Calculate actual frame offsets for all abstract stack objects... 253 calculateFrameObjectOffsets(MF); 254 255 // Add prolog and epilog code to the function. This function is required 256 // to align the stack frame as necessary for any stack variables or 257 // called functions. Because of this, calculateCalleeSavedRegisters() 258 // must be called before this function in order to set the AdjustsStack 259 // and MaxCallFrameSize variables. 260 if (!F.hasFnAttribute(Attribute::Naked)) 261 insertPrologEpilogCode(MF); 262 263 // Reinsert stashed debug values at the start of the entry blocks. 264 for (auto &I : EntryDbgValues) 265 I.first->insert(I.first->begin(), I.second.begin(), I.second.end()); 266 267 // Allow the target machine to make final modifications to the function 268 // before the frame layout is finalized. 269 TFI->processFunctionBeforeFrameIndicesReplaced(MF, RS); 270 271 // Replace all MO_FrameIndex operands with physical register references 272 // and actual offsets. 273 if (TFI->needsFrameIndexResolution(MF)) { 274 // Allow the target to determine this after knowing the frame size. 275 FrameIndexEliminationScavenging = 276 (RS && !FrameIndexVirtualScavenging) || 277 TRI->requiresFrameIndexReplacementScavenging(MF); 278 279 if (TRI->eliminateFrameIndicesBackwards()) 280 replaceFrameIndicesBackward(MF); 281 else 282 replaceFrameIndices(MF); 283 } 284 285 // If register scavenging is needed, as we've enabled doing it as a 286 // post-pass, scavenge the virtual registers that frame index elimination 287 // inserted. 288 if (TRI->requiresRegisterScavenging(MF) && FrameIndexVirtualScavenging) 289 scavengeFrameVirtualRegs(MF, *RS); 290 291 // Warn on stack size when we exceeds the given limit. 292 MachineFrameInfo &MFI = MF.getFrameInfo(); 293 uint64_t StackSize = MFI.getStackSize(); 294 295 uint64_t Threshold = TFI->getStackThreshold(); 296 if (MF.getFunction().hasFnAttribute("warn-stack-size")) { 297 bool Failed = MF.getFunction() 298 .getFnAttribute("warn-stack-size") 299 .getValueAsString() 300 .getAsInteger(10, Threshold); 301 // Verifier should have caught this. 302 assert(!Failed && "Invalid warn-stack-size fn attr value"); 303 (void)Failed; 304 } 305 uint64_t UnsafeStackSize = MFI.getUnsafeStackSize(); 306 if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) 307 StackSize += UnsafeStackSize; 308 309 if (StackSize > Threshold) { 310 DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); 311 F.getContext().diagnose(DiagStackSize); 312 int64_t SpillSize = 0; 313 for (int Idx = MFI.getObjectIndexBegin(), End = MFI.getObjectIndexEnd(); 314 Idx != End; ++Idx) { 315 if (MFI.isSpillSlotObjectIndex(Idx)) 316 SpillSize += MFI.getObjectSize(Idx); 317 } 318 319 [[maybe_unused]] float SpillPct = 320 static_cast<float>(SpillSize) / static_cast<float>(StackSize); 321 LLVM_DEBUG( 322 dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables", 323 SpillSize, StackSize, StackSize - SpillSize, SpillPct, 324 1.0f - SpillPct)); 325 if (UnsafeStackSize != 0) { 326 LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack", 327 UnsafeStackSize, 328 static_cast<float>(UnsafeStackSize) / 329 static_cast<float>(StackSize), 330 StackSize)); 331 } 332 LLVM_DEBUG(dbgs() << "\n"); 333 } 334 335 ORE->emit([&]() { 336 return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", 337 MF.getFunction().getSubprogram(), 338 &MF.front()) 339 << ore::NV("NumStackBytes", StackSize) 340 << " stack bytes in function '" 341 << ore::NV("Function", MF.getFunction().getName()) << "'"; 342 }); 343 344 delete RS; 345 SaveBlocks.clear(); 346 RestoreBlocks.clear(); 347 MFI.setSavePoint(nullptr); 348 MFI.setRestorePoint(nullptr); 349 return true; 350 } 351 352 /// Calculate the MaxCallFrameSize variable for the function's frame 353 /// information and eliminate call frame pseudo instructions. 354 void PEI::calculateCallFrameInfo(MachineFunction &MF) { 355 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 356 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 357 MachineFrameInfo &MFI = MF.getFrameInfo(); 358 359 // Get the function call frame set-up and tear-down instruction opcode 360 unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); 361 unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); 362 363 // Early exit for targets which have no call frame setup/destroy pseudo 364 // instructions. 365 if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) 366 return; 367 368 // (Re-)Compute the MaxCallFrameSize. 369 [[maybe_unused]] uint32_t MaxCFSIn = 370 MFI.isMaxCallFrameSizeComputed() ? MFI.getMaxCallFrameSize() : UINT32_MAX; 371 std::vector<MachineBasicBlock::iterator> FrameSDOps; 372 MFI.computeMaxCallFrameSize(MF, &FrameSDOps); 373 assert(MFI.getMaxCallFrameSize() <= MaxCFSIn && 374 "Recomputing MaxCFS gave a larger value."); 375 376 if (TFI->canSimplifyCallFramePseudos(MF)) { 377 // If call frames are not being included as part of the stack frame, and 378 // the target doesn't indicate otherwise, remove the call frame pseudos 379 // here. The sub/add sp instruction pairs are still inserted, but we don't 380 // need to track the SP adjustment for frame index elimination. 381 for (MachineBasicBlock::iterator I : FrameSDOps) 382 TFI->eliminateCallFramePseudoInstr(MF, *I->getParent(), I); 383 384 // We can't track the call frame size after call frame pseudos have been 385 // eliminated. Set it to zero everywhere to keep MachineVerifier happy. 386 for (MachineBasicBlock &MBB : MF) 387 MBB.setCallFrameSize(0); 388 } 389 } 390 391 /// Compute the sets of entry and return blocks for saving and restoring 392 /// callee-saved registers, and placing prolog and epilog code. 393 void PEI::calculateSaveRestoreBlocks(MachineFunction &MF) { 394 const MachineFrameInfo &MFI = MF.getFrameInfo(); 395 396 // Even when we do not change any CSR, we still want to insert the 397 // prologue and epilogue of the function. 398 // So set the save points for those. 399 400 // Use the points found by shrink-wrapping, if any. 401 if (MFI.getSavePoint()) { 402 SaveBlocks.push_back(MFI.getSavePoint()); 403 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 404 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 405 // If RestoreBlock does not have any successor and is not a return block 406 // then the end point is unreachable and we do not need to insert any 407 // epilogue. 408 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 409 RestoreBlocks.push_back(RestoreBlock); 410 return; 411 } 412 413 // Save refs to entry and return blocks. 414 SaveBlocks.push_back(&MF.front()); 415 for (MachineBasicBlock &MBB : MF) { 416 if (MBB.isEHFuncletEntry()) 417 SaveBlocks.push_back(&MBB); 418 if (MBB.isReturnBlock()) 419 RestoreBlocks.push_back(&MBB); 420 } 421 } 422 423 static void assignCalleeSavedSpillSlots(MachineFunction &F, 424 const BitVector &SavedRegs, 425 unsigned &MinCSFrameIndex, 426 unsigned &MaxCSFrameIndex) { 427 if (SavedRegs.empty()) 428 return; 429 430 const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); 431 const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs(); 432 BitVector CSMask(SavedRegs.size()); 433 434 for (unsigned i = 0; CSRegs[i]; ++i) 435 CSMask.set(CSRegs[i]); 436 437 std::vector<CalleeSavedInfo> CSI; 438 for (unsigned i = 0; CSRegs[i]; ++i) { 439 unsigned Reg = CSRegs[i]; 440 if (SavedRegs.test(Reg)) { 441 bool SavedSuper = false; 442 for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) { 443 // Some backends set all aliases for some registers as saved, such as 444 // Mips's $fp, so they appear in SavedRegs but not CSRegs. 445 if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) { 446 SavedSuper = true; 447 break; 448 } 449 } 450 451 if (!SavedSuper) 452 CSI.push_back(CalleeSavedInfo(Reg)); 453 } 454 } 455 456 const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); 457 MachineFrameInfo &MFI = F.getFrameInfo(); 458 if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI, MinCSFrameIndex, 459 MaxCSFrameIndex)) { 460 // If target doesn't implement this, use generic code. 461 462 if (CSI.empty()) 463 return; // Early exit if no callee saved registers are modified! 464 465 unsigned NumFixedSpillSlots; 466 const TargetFrameLowering::SpillSlot *FixedSpillSlots = 467 TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); 468 469 // Now that we know which registers need to be saved and restored, allocate 470 // stack slots for them. 471 for (auto &CS : CSI) { 472 // If the target has spilled this register to another register, we don't 473 // need to allocate a stack slot. 474 if (CS.isSpilledToReg()) 475 continue; 476 477 unsigned Reg = CS.getReg(); 478 const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); 479 480 int FrameIdx; 481 if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { 482 CS.setFrameIdx(FrameIdx); 483 continue; 484 } 485 486 // Check to see if this physreg must be spilled to a particular stack slot 487 // on this target. 488 const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; 489 while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && 490 FixedSlot->Reg != Reg) 491 ++FixedSlot; 492 493 unsigned Size = RegInfo->getSpillSize(*RC); 494 if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { 495 // Nope, just spill it anywhere convenient. 496 Align Alignment = RegInfo->getSpillAlign(*RC); 497 // We may not be able to satisfy the desired alignment specification of 498 // the TargetRegisterClass if the stack alignment is smaller. Use the 499 // min. 500 Alignment = std::min(Alignment, TFI->getStackAlign()); 501 FrameIdx = MFI.CreateStackObject(Size, Alignment, true); 502 if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; 503 if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; 504 } else { 505 // Spill it to the stack where we must. 506 FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); 507 } 508 509 CS.setFrameIdx(FrameIdx); 510 } 511 } 512 513 MFI.setCalleeSavedInfo(CSI); 514 } 515 516 /// Helper function to update the liveness information for the callee-saved 517 /// registers. 518 static void updateLiveness(MachineFunction &MF) { 519 MachineFrameInfo &MFI = MF.getFrameInfo(); 520 // Visited will contain all the basic blocks that are in the region 521 // where the callee saved registers are alive: 522 // - Anything that is not Save or Restore -> LiveThrough. 523 // - Save -> LiveIn. 524 // - Restore -> LiveOut. 525 // The live-out is not attached to the block, so no need to keep 526 // Restore in this set. 527 SmallPtrSet<MachineBasicBlock *, 8> Visited; 528 SmallVector<MachineBasicBlock *, 8> WorkList; 529 MachineBasicBlock *Entry = &MF.front(); 530 MachineBasicBlock *Save = MFI.getSavePoint(); 531 532 if (!Save) 533 Save = Entry; 534 535 if (Entry != Save) { 536 WorkList.push_back(Entry); 537 Visited.insert(Entry); 538 } 539 Visited.insert(Save); 540 541 MachineBasicBlock *Restore = MFI.getRestorePoint(); 542 if (Restore) 543 // By construction Restore cannot be visited, otherwise it 544 // means there exists a path to Restore that does not go 545 // through Save. 546 WorkList.push_back(Restore); 547 548 while (!WorkList.empty()) { 549 const MachineBasicBlock *CurBB = WorkList.pop_back_val(); 550 // By construction, the region that is after the save point is 551 // dominated by the Save and post-dominated by the Restore. 552 if (CurBB == Save && Save != Restore) 553 continue; 554 // Enqueue all the successors not already visited. 555 // Those are by construction either before Save or after Restore. 556 for (MachineBasicBlock *SuccBB : CurBB->successors()) 557 if (Visited.insert(SuccBB).second) 558 WorkList.push_back(SuccBB); 559 } 560 561 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 562 563 MachineRegisterInfo &MRI = MF.getRegInfo(); 564 for (const CalleeSavedInfo &I : CSI) { 565 for (MachineBasicBlock *MBB : Visited) { 566 MCPhysReg Reg = I.getReg(); 567 // Add the callee-saved register as live-in. 568 // It's killed at the spill. 569 if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) 570 MBB->addLiveIn(Reg); 571 } 572 // If callee-saved register is spilled to another register rather than 573 // spilling to stack, the destination register has to be marked as live for 574 // each MBB between the prologue and epilogue so that it is not clobbered 575 // before it is reloaded in the epilogue. The Visited set contains all 576 // blocks outside of the region delimited by prologue/epilogue. 577 if (I.isSpilledToReg()) { 578 for (MachineBasicBlock &MBB : MF) { 579 if (Visited.count(&MBB)) 580 continue; 581 MCPhysReg DstReg = I.getDstReg(); 582 if (!MBB.isLiveIn(DstReg)) 583 MBB.addLiveIn(DstReg); 584 } 585 } 586 } 587 } 588 589 /// Insert spill code for the callee-saved registers used in the function. 590 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 591 ArrayRef<CalleeSavedInfo> CSI) { 592 MachineFunction &MF = *SaveBlock.getParent(); 593 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 594 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 595 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 596 597 MachineBasicBlock::iterator I = SaveBlock.begin(); 598 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 599 for (const CalleeSavedInfo &CS : CSI) { 600 // Insert the spill to the stack frame. 601 unsigned Reg = CS.getReg(); 602 603 if (CS.isSpilledToReg()) { 604 BuildMI(SaveBlock, I, DebugLoc(), 605 TII.get(TargetOpcode::COPY), CS.getDstReg()) 606 .addReg(Reg, getKillRegState(true)); 607 } else { 608 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 609 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, 610 TRI, Register()); 611 } 612 } 613 } 614 } 615 616 /// Insert restore code for the callee-saved registers used in the function. 617 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 618 std::vector<CalleeSavedInfo> &CSI) { 619 MachineFunction &MF = *RestoreBlock.getParent(); 620 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 621 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 622 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 623 624 // Restore all registers immediately before the return and any 625 // terminators that precede it. 626 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 627 628 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 629 for (const CalleeSavedInfo &CI : reverse(CSI)) { 630 unsigned Reg = CI.getReg(); 631 if (CI.isSpilledToReg()) { 632 BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg) 633 .addReg(CI.getDstReg(), getKillRegState(true)); 634 } else { 635 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 636 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, 637 TRI, Register()); 638 assert(I != RestoreBlock.begin() && 639 "loadRegFromStackSlot didn't insert any code!"); 640 // Insert in reverse order. loadRegFromStackSlot can insert 641 // multiple instructions. 642 } 643 } 644 } 645 } 646 647 void PEI::spillCalleeSavedRegs(MachineFunction &MF) { 648 // We can't list this requirement in getRequiredProperties because some 649 // targets (WebAssembly) use virtual registers past this point, and the pass 650 // pipeline is set up without giving the passes a chance to look at the 651 // TargetMachine. 652 // FIXME: Find a way to express this in getRequiredProperties. 653 assert(MF.getProperties().hasProperty( 654 MachineFunctionProperties::Property::NoVRegs)); 655 656 const Function &F = MF.getFunction(); 657 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 658 MachineFrameInfo &MFI = MF.getFrameInfo(); 659 MinCSFrameIndex = std::numeric_limits<unsigned>::max(); 660 MaxCSFrameIndex = 0; 661 662 // Determine which of the registers in the callee save list should be saved. 663 BitVector SavedRegs; 664 TFI->determineCalleeSaves(MF, SavedRegs, RS); 665 666 // Assign stack slots for any callee-saved registers that must be spilled. 667 assignCalleeSavedSpillSlots(MF, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); 668 669 // Add the code to save and restore the callee saved registers. 670 if (!F.hasFnAttribute(Attribute::Naked)) { 671 MFI.setCalleeSavedInfoValid(true); 672 673 std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 674 if (!CSI.empty()) { 675 if (!MFI.hasCalls()) 676 NumLeafFuncWithSpills++; 677 678 for (MachineBasicBlock *SaveBlock : SaveBlocks) 679 insertCSRSaves(*SaveBlock, CSI); 680 681 // Update the live-in information of all the blocks up to the save point. 682 updateLiveness(MF); 683 684 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 685 insertCSRRestores(*RestoreBlock, CSI); 686 } 687 } 688 } 689 690 /// AdjustStackOffset - Helper function used to adjust the stack frame offset. 691 static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, 692 bool StackGrowsDown, int64_t &Offset, 693 Align &MaxAlign) { 694 // If the stack grows down, add the object size to find the lowest address. 695 if (StackGrowsDown) 696 Offset += MFI.getObjectSize(FrameIdx); 697 698 Align Alignment = MFI.getObjectAlign(FrameIdx); 699 700 // If the alignment of this object is greater than that of the stack, then 701 // increase the stack alignment to match. 702 MaxAlign = std::max(MaxAlign, Alignment); 703 704 // Adjust to alignment boundary. 705 Offset = alignTo(Offset, Alignment); 706 707 if (StackGrowsDown) { 708 LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset 709 << "]\n"); 710 MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset 711 } else { 712 LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset 713 << "]\n"); 714 MFI.setObjectOffset(FrameIdx, Offset); 715 Offset += MFI.getObjectSize(FrameIdx); 716 } 717 } 718 719 /// Compute which bytes of fixed and callee-save stack area are unused and keep 720 /// track of them in StackBytesFree. 721 static inline void 722 computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, 723 unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, 724 int64_t FixedCSEnd, BitVector &StackBytesFree) { 725 // Avoid undefined int64_t -> int conversion below in extreme case. 726 if (FixedCSEnd > std::numeric_limits<int>::max()) 727 return; 728 729 StackBytesFree.resize(FixedCSEnd, true); 730 731 SmallVector<int, 16> AllocatedFrameSlots; 732 // Add fixed objects. 733 for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) 734 // StackSlot scavenging is only implemented for the default stack. 735 if (MFI.getStackID(i) == TargetStackID::Default) 736 AllocatedFrameSlots.push_back(i); 737 // Add callee-save objects if there are any. 738 if (MinCSFrameIndex <= MaxCSFrameIndex) { 739 for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) 740 if (MFI.getStackID(i) == TargetStackID::Default) 741 AllocatedFrameSlots.push_back(i); 742 } 743 744 for (int i : AllocatedFrameSlots) { 745 // These are converted from int64_t, but they should always fit in int 746 // because of the FixedCSEnd check above. 747 int ObjOffset = MFI.getObjectOffset(i); 748 int ObjSize = MFI.getObjectSize(i); 749 int ObjStart, ObjEnd; 750 if (StackGrowsDown) { 751 // ObjOffset is negative when StackGrowsDown is true. 752 ObjStart = -ObjOffset - ObjSize; 753 ObjEnd = -ObjOffset; 754 } else { 755 ObjStart = ObjOffset; 756 ObjEnd = ObjOffset + ObjSize; 757 } 758 // Ignore fixed holes that are in the previous stack frame. 759 if (ObjEnd > 0) 760 StackBytesFree.reset(ObjStart, ObjEnd); 761 } 762 } 763 764 /// Assign frame object to an unused portion of the stack in the fixed stack 765 /// object range. Return true if the allocation was successful. 766 static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, 767 bool StackGrowsDown, Align MaxAlign, 768 BitVector &StackBytesFree) { 769 if (MFI.isVariableSizedObjectIndex(FrameIdx)) 770 return false; 771 772 if (StackBytesFree.none()) { 773 // clear it to speed up later scavengeStackSlot calls to 774 // StackBytesFree.none() 775 StackBytesFree.clear(); 776 return false; 777 } 778 779 Align ObjAlign = MFI.getObjectAlign(FrameIdx); 780 if (ObjAlign > MaxAlign) 781 return false; 782 783 int64_t ObjSize = MFI.getObjectSize(FrameIdx); 784 int FreeStart; 785 for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; 786 FreeStart = StackBytesFree.find_next(FreeStart)) { 787 788 // Check that free space has suitable alignment. 789 unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; 790 if (alignTo(ObjStart, ObjAlign) != ObjStart) 791 continue; 792 793 if (FreeStart + ObjSize > StackBytesFree.size()) 794 return false; 795 796 bool AllBytesFree = true; 797 for (unsigned Byte = 0; Byte < ObjSize; ++Byte) 798 if (!StackBytesFree.test(FreeStart + Byte)) { 799 AllBytesFree = false; 800 break; 801 } 802 if (AllBytesFree) 803 break; 804 } 805 806 if (FreeStart == -1) 807 return false; 808 809 if (StackGrowsDown) { 810 int ObjStart = -(FreeStart + ObjSize); 811 LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" 812 << ObjStart << "]\n"); 813 MFI.setObjectOffset(FrameIdx, ObjStart); 814 } else { 815 LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" 816 << FreeStart << "]\n"); 817 MFI.setObjectOffset(FrameIdx, FreeStart); 818 } 819 820 StackBytesFree.reset(FreeStart, FreeStart + ObjSize); 821 return true; 822 } 823 824 /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., 825 /// those required to be close to the Stack Protector) to stack offsets. 826 static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, 827 SmallSet<int, 16> &ProtectedObjs, 828 MachineFrameInfo &MFI, bool StackGrowsDown, 829 int64_t &Offset, Align &MaxAlign) { 830 831 for (int i : UnassignedObjs) { 832 AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign); 833 ProtectedObjs.insert(i); 834 } 835 } 836 837 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the 838 /// abstract stack objects. 839 void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { 840 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 841 842 bool StackGrowsDown = 843 TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; 844 845 // Loop over all of the stack objects, assigning sequential addresses... 846 MachineFrameInfo &MFI = MF.getFrameInfo(); 847 848 // Start at the beginning of the local area. 849 // The Offset is the distance from the stack top in the direction 850 // of stack growth -- so it's always nonnegative. 851 int LocalAreaOffset = TFI.getOffsetOfLocalArea(); 852 if (StackGrowsDown) 853 LocalAreaOffset = -LocalAreaOffset; 854 assert(LocalAreaOffset >= 0 855 && "Local area offset should be in direction of stack growth"); 856 int64_t Offset = LocalAreaOffset; 857 858 #ifdef EXPENSIVE_CHECKS 859 for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) 860 if (!MFI.isDeadObjectIndex(i) && 861 MFI.getStackID(i) == TargetStackID::Default) 862 assert(MFI.getObjectAlign(i) <= MFI.getMaxAlign() && 863 "MaxAlignment is invalid"); 864 #endif 865 866 // If there are fixed sized objects that are preallocated in the local area, 867 // non-fixed objects can't be allocated right at the start of local area. 868 // Adjust 'Offset' to point to the end of last fixed sized preallocated 869 // object. 870 for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { 871 // Only allocate objects on the default stack. 872 if (MFI.getStackID(i) != TargetStackID::Default) 873 continue; 874 875 int64_t FixedOff; 876 if (StackGrowsDown) { 877 // The maximum distance from the stack pointer is at lower address of 878 // the object -- which is given by offset. For down growing stack 879 // the offset is negative, so we negate the offset to get the distance. 880 FixedOff = -MFI.getObjectOffset(i); 881 } else { 882 // The maximum distance from the start pointer is at the upper 883 // address of the object. 884 FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i); 885 } 886 if (FixedOff > Offset) Offset = FixedOff; 887 } 888 889 Align MaxAlign = MFI.getMaxAlign(); 890 // First assign frame offsets to stack objects that are used to spill 891 // callee saved registers. 892 if (MaxCSFrameIndex >= MinCSFrameIndex) { 893 for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) { 894 unsigned FrameIndex = 895 StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i; 896 897 // Only allocate objects on the default stack. 898 if (MFI.getStackID(FrameIndex) != TargetStackID::Default) 899 continue; 900 901 // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex)) 902 if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex)) 903 continue; 904 905 AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign); 906 } 907 } 908 909 assert(MaxAlign == MFI.getMaxAlign() && 910 "MFI.getMaxAlign should already account for all callee-saved " 911 "registers without a fixed stack slot"); 912 913 // FixedCSEnd is the stack offset to the end of the fixed and callee-save 914 // stack area. 915 int64_t FixedCSEnd = Offset; 916 917 // Make sure the special register scavenging spill slot is closest to the 918 // incoming stack pointer if a frame pointer is required and is closer 919 // to the incoming rather than the final stack pointer. 920 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 921 bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF); 922 if (RS && EarlyScavengingSlots) { 923 SmallVector<int, 2> SFIs; 924 RS->getScavengingFrameIndices(SFIs); 925 for (int SFI : SFIs) 926 AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); 927 } 928 929 // FIXME: Once this is working, then enable flag will change to a target 930 // check for whether the frame is large enough to want to use virtual 931 // frame index registers. Functions which don't want/need this optimization 932 // will continue to use the existing code path. 933 if (MFI.getUseLocalStackAllocationBlock()) { 934 Align Alignment = MFI.getLocalFrameMaxAlign(); 935 936 // Adjust to alignment boundary. 937 Offset = alignTo(Offset, Alignment); 938 939 LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); 940 941 // Resolve offsets for objects in the local block. 942 for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) { 943 std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i); 944 int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; 945 LLVM_DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << FIOffset 946 << "]\n"); 947 MFI.setObjectOffset(Entry.first, FIOffset); 948 } 949 // Allocate the local block 950 Offset += MFI.getLocalFrameSize(); 951 952 MaxAlign = std::max(Alignment, MaxAlign); 953 } 954 955 // Retrieve the Exception Handler registration node. 956 int EHRegNodeFrameIndex = std::numeric_limits<int>::max(); 957 if (const WinEHFuncInfo *FuncInfo = MF.getWinEHFuncInfo()) 958 EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; 959 960 // Make sure that the stack protector comes before the local variables on the 961 // stack. 962 SmallSet<int, 16> ProtectedObjs; 963 if (MFI.hasStackProtectorIndex()) { 964 int StackProtectorFI = MFI.getStackProtectorIndex(); 965 StackObjSet LargeArrayObjs; 966 StackObjSet SmallArrayObjs; 967 StackObjSet AddrOfObjs; 968 969 // If we need a stack protector, we need to make sure that 970 // LocalStackSlotPass didn't already allocate a slot for it. 971 // If we are told to use the LocalStackAllocationBlock, the stack protector 972 // is expected to be already pre-allocated. 973 if (MFI.getStackID(StackProtectorFI) != TargetStackID::Default) { 974 // If the stack protector isn't on the default stack then it's up to the 975 // target to set the stack offset. 976 assert(MFI.getObjectOffset(StackProtectorFI) != 0 && 977 "Offset of stack protector on non-default stack expected to be " 978 "already set."); 979 assert(!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()) && 980 "Stack protector on non-default stack expected to not be " 981 "pre-allocated by LocalStackSlotPass."); 982 } else if (!MFI.getUseLocalStackAllocationBlock()) { 983 AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, 984 MaxAlign); 985 } else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) { 986 llvm_unreachable( 987 "Stack protector not pre-allocated by LocalStackSlotPass."); 988 } 989 990 // Assign large stack objects first. 991 for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { 992 if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) 993 continue; 994 if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) 995 continue; 996 if (RS && RS->isScavengingFrameIndex((int)i)) 997 continue; 998 if (MFI.isDeadObjectIndex(i)) 999 continue; 1000 if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) 1001 continue; 1002 // Only allocate objects on the default stack. 1003 if (MFI.getStackID(i) != TargetStackID::Default) 1004 continue; 1005 1006 switch (MFI.getObjectSSPLayout(i)) { 1007 case MachineFrameInfo::SSPLK_None: 1008 continue; 1009 case MachineFrameInfo::SSPLK_SmallArray: 1010 SmallArrayObjs.insert(i); 1011 continue; 1012 case MachineFrameInfo::SSPLK_AddrOf: 1013 AddrOfObjs.insert(i); 1014 continue; 1015 case MachineFrameInfo::SSPLK_LargeArray: 1016 LargeArrayObjs.insert(i); 1017 continue; 1018 } 1019 llvm_unreachable("Unexpected SSPLayoutKind."); 1020 } 1021 1022 // We expect **all** the protected stack objects to be pre-allocated by 1023 // LocalStackSlotPass. If it turns out that PEI still has to allocate some 1024 // of them, we may end up messing up the expected order of the objects. 1025 if (MFI.getUseLocalStackAllocationBlock() && 1026 !(LargeArrayObjs.empty() && SmallArrayObjs.empty() && 1027 AddrOfObjs.empty())) 1028 llvm_unreachable("Found protected stack objects not pre-allocated by " 1029 "LocalStackSlotPass."); 1030 1031 AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, 1032 Offset, MaxAlign); 1033 AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, 1034 Offset, MaxAlign); 1035 AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, 1036 Offset, MaxAlign); 1037 } 1038 1039 SmallVector<int, 8> ObjectsToAllocate; 1040 1041 // Then prepare to assign frame offsets to stack objects that are not used to 1042 // spill callee saved registers. 1043 for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { 1044 if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock()) 1045 continue; 1046 if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) 1047 continue; 1048 if (RS && RS->isScavengingFrameIndex((int)i)) 1049 continue; 1050 if (MFI.isDeadObjectIndex(i)) 1051 continue; 1052 if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i) 1053 continue; 1054 if (ProtectedObjs.count(i)) 1055 continue; 1056 // Only allocate objects on the default stack. 1057 if (MFI.getStackID(i) != TargetStackID::Default) 1058 continue; 1059 1060 // Add the objects that we need to allocate to our working set. 1061 ObjectsToAllocate.push_back(i); 1062 } 1063 1064 // Allocate the EH registration node first if one is present. 1065 if (EHRegNodeFrameIndex != std::numeric_limits<int>::max()) 1066 AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, 1067 MaxAlign); 1068 1069 // Give the targets a chance to order the objects the way they like it. 1070 if (MF.getTarget().getOptLevel() != CodeGenOptLevel::None && 1071 MF.getTarget().Options.StackSymbolOrdering) 1072 TFI.orderFrameObjects(MF, ObjectsToAllocate); 1073 1074 // Keep track of which bytes in the fixed and callee-save range are used so we 1075 // can use the holes when allocating later stack objects. Only do this if 1076 // stack protector isn't being used and the target requests it and we're 1077 // optimizing. 1078 BitVector StackBytesFree; 1079 if (!ObjectsToAllocate.empty() && 1080 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && 1081 MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(MF)) 1082 computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, 1083 FixedCSEnd, StackBytesFree); 1084 1085 // Now walk the objects and actually assign base offsets to them. 1086 for (auto &Object : ObjectsToAllocate) 1087 if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign, 1088 StackBytesFree)) 1089 AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign); 1090 1091 // Make sure the special register scavenging spill slot is closest to the 1092 // stack pointer. 1093 if (RS && !EarlyScavengingSlots) { 1094 SmallVector<int, 2> SFIs; 1095 RS->getScavengingFrameIndices(SFIs); 1096 for (int SFI : SFIs) 1097 AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); 1098 } 1099 1100 if (!TFI.targetHandlesStackFrameRounding()) { 1101 // If we have reserved argument space for call sites in the function 1102 // immediately on entry to the current function, count it as part of the 1103 // overall stack size. 1104 if (MFI.adjustsStack() && TFI.hasReservedCallFrame(MF)) 1105 Offset += MFI.getMaxCallFrameSize(); 1106 1107 // Round up the size to a multiple of the alignment. If the function has 1108 // any calls or alloca's, align to the target's StackAlignment value to 1109 // ensure that the callee's frame or the alloca data is suitably aligned; 1110 // otherwise, for leaf functions, align to the TransientStackAlignment 1111 // value. 1112 Align StackAlign; 1113 if (MFI.adjustsStack() || MFI.hasVarSizedObjects() || 1114 (RegInfo->hasStackRealignment(MF) && MFI.getObjectIndexEnd() != 0)) 1115 StackAlign = TFI.getStackAlign(); 1116 else 1117 StackAlign = TFI.getTransientStackAlign(); 1118 1119 // If the frame pointer is eliminated, all frame offsets will be relative to 1120 // SP not FP. Align to MaxAlign so this works. 1121 StackAlign = std::max(StackAlign, MaxAlign); 1122 int64_t OffsetBeforeAlignment = Offset; 1123 Offset = alignTo(Offset, StackAlign); 1124 1125 // If we have increased the offset to fulfill the alignment constrants, 1126 // then the scavenging spill slots may become harder to reach from the 1127 // stack pointer, float them so they stay close. 1128 if (StackGrowsDown && OffsetBeforeAlignment != Offset && RS && 1129 !EarlyScavengingSlots) { 1130 SmallVector<int, 2> SFIs; 1131 RS->getScavengingFrameIndices(SFIs); 1132 LLVM_DEBUG(if (!SFIs.empty()) llvm::dbgs() 1133 << "Adjusting emergency spill slots!\n";); 1134 int64_t Delta = Offset - OffsetBeforeAlignment; 1135 for (int SFI : SFIs) { 1136 LLVM_DEBUG(llvm::dbgs() 1137 << "Adjusting offset of emergency spill slot #" << SFI 1138 << " from " << MFI.getObjectOffset(SFI);); 1139 MFI.setObjectOffset(SFI, MFI.getObjectOffset(SFI) - Delta); 1140 LLVM_DEBUG(llvm::dbgs() << " to " << MFI.getObjectOffset(SFI) << "\n";); 1141 } 1142 } 1143 } 1144 1145 // Update frame info to pretend that this is part of the stack... 1146 int64_t StackSize = Offset - LocalAreaOffset; 1147 MFI.setStackSize(StackSize); 1148 NumBytesStackSpace += StackSize; 1149 } 1150 1151 /// insertPrologEpilogCode - Scan the function for modified callee saved 1152 /// registers, insert spill code for these callee saved registers, then add 1153 /// prolog and epilog code to the function. 1154 void PEI::insertPrologEpilogCode(MachineFunction &MF) { 1155 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1156 1157 // Add prologue to the function... 1158 for (MachineBasicBlock *SaveBlock : SaveBlocks) 1159 TFI.emitPrologue(MF, *SaveBlock); 1160 1161 // Add epilogue to restore the callee-save registers in each exiting block. 1162 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 1163 TFI.emitEpilogue(MF, *RestoreBlock); 1164 1165 // Zero call used registers before restoring callee-saved registers. 1166 insertZeroCallUsedRegs(MF); 1167 1168 for (MachineBasicBlock *SaveBlock : SaveBlocks) 1169 TFI.inlineStackProbe(MF, *SaveBlock); 1170 1171 // Emit additional code that is required to support segmented stacks, if 1172 // we've been asked for it. This, when linked with a runtime with support 1173 // for segmented stacks (libgcc is one), will result in allocating stack 1174 // space in small chunks instead of one large contiguous block. 1175 if (MF.shouldSplitStack()) { 1176 for (MachineBasicBlock *SaveBlock : SaveBlocks) 1177 TFI.adjustForSegmentedStacks(MF, *SaveBlock); 1178 } 1179 1180 // Emit additional code that is required to explicitly handle the stack in 1181 // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The 1182 // approach is rather similar to that of Segmented Stacks, but it uses a 1183 // different conditional check and another BIF for allocating more stack 1184 // space. 1185 if (MF.getFunction().getCallingConv() == CallingConv::HiPE) 1186 for (MachineBasicBlock *SaveBlock : SaveBlocks) 1187 TFI.adjustForHiPEPrologue(MF, *SaveBlock); 1188 } 1189 1190 /// insertZeroCallUsedRegs - Zero out call used registers. 1191 void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { 1192 const Function &F = MF.getFunction(); 1193 1194 if (!F.hasFnAttribute("zero-call-used-regs")) 1195 return; 1196 1197 using namespace ZeroCallUsedRegs; 1198 1199 ZeroCallUsedRegsKind ZeroRegsKind = 1200 StringSwitch<ZeroCallUsedRegsKind>( 1201 F.getFnAttribute("zero-call-used-regs").getValueAsString()) 1202 .Case("skip", ZeroCallUsedRegsKind::Skip) 1203 .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg) 1204 .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR) 1205 .Case("used-arg", ZeroCallUsedRegsKind::UsedArg) 1206 .Case("used", ZeroCallUsedRegsKind::Used) 1207 .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg) 1208 .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR) 1209 .Case("all-arg", ZeroCallUsedRegsKind::AllArg) 1210 .Case("all", ZeroCallUsedRegsKind::All); 1211 1212 if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) 1213 return; 1214 1215 const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR; 1216 const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED; 1217 const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG; 1218 1219 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 1220 const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); 1221 1222 // Mark all used registers. 1223 BitVector UsedRegs(TRI.getNumRegs()); 1224 if (OnlyUsed) 1225 for (const MachineBasicBlock &MBB : MF) 1226 for (const MachineInstr &MI : MBB) { 1227 // skip debug instructions 1228 if (MI.isDebugInstr()) 1229 continue; 1230 1231 for (const MachineOperand &MO : MI.operands()) { 1232 if (!MO.isReg()) 1233 continue; 1234 1235 MCRegister Reg = MO.getReg(); 1236 if (AllocatableSet[Reg] && !MO.isImplicit() && 1237 (MO.isDef() || MO.isUse())) 1238 UsedRegs.set(Reg); 1239 } 1240 } 1241 1242 // Get a list of registers that are used. 1243 BitVector LiveIns(TRI.getNumRegs()); 1244 for (const MachineBasicBlock::RegisterMaskPair &LI : MF.front().liveins()) 1245 LiveIns.set(LI.PhysReg); 1246 1247 BitVector RegsToZero(TRI.getNumRegs()); 1248 for (MCRegister Reg : AllocatableSet.set_bits()) { 1249 // Skip over fixed registers. 1250 if (TRI.isFixedRegister(MF, Reg)) 1251 continue; 1252 1253 // Want only general purpose registers. 1254 if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg)) 1255 continue; 1256 1257 // Want only used registers. 1258 if (OnlyUsed && !UsedRegs[Reg]) 1259 continue; 1260 1261 // Want only registers used for arguments. 1262 if (OnlyArg) { 1263 if (OnlyUsed) { 1264 if (!LiveIns[Reg]) 1265 continue; 1266 } else if (!TRI.isArgumentRegister(MF, Reg)) { 1267 continue; 1268 } 1269 } 1270 1271 RegsToZero.set(Reg); 1272 } 1273 1274 // Don't clear registers that are live when leaving the function. 1275 for (const MachineBasicBlock &MBB : MF) 1276 for (const MachineInstr &MI : MBB.terminators()) { 1277 if (!MI.isReturn()) 1278 continue; 1279 1280 for (const auto &MO : MI.operands()) { 1281 if (!MO.isReg()) 1282 continue; 1283 1284 MCRegister Reg = MO.getReg(); 1285 if (!Reg) 1286 continue; 1287 1288 // This picks up sibling registers (e.q. %al -> %ah). 1289 for (MCRegUnit Unit : TRI.regunits(Reg)) 1290 RegsToZero.reset(Unit); 1291 1292 for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg)) 1293 RegsToZero.reset(SReg); 1294 } 1295 } 1296 1297 // Don't need to clear registers that are used/clobbered by terminating 1298 // instructions. 1299 for (const MachineBasicBlock &MBB : MF) { 1300 if (!MBB.isReturnBlock()) 1301 continue; 1302 1303 MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); 1304 for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E; 1305 ++I) { 1306 for (const MachineOperand &MO : I->operands()) { 1307 if (!MO.isReg()) 1308 continue; 1309 1310 MCRegister Reg = MO.getReg(); 1311 if (!Reg) 1312 continue; 1313 1314 for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg)) 1315 RegsToZero.reset(Reg); 1316 } 1317 } 1318 } 1319 1320 // Don't clear registers that must be preserved. 1321 for (const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 1322 MCPhysReg CSReg = *CSRegs; ++CSRegs) 1323 for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSReg)) 1324 RegsToZero.reset(Reg); 1325 1326 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1327 for (MachineBasicBlock &MBB : MF) 1328 if (MBB.isReturnBlock()) 1329 TFI.emitZeroCallUsedRegs(RegsToZero, MBB); 1330 } 1331 1332 /// Replace all FrameIndex operands with physical register references and actual 1333 /// offsets. 1334 void PEI::replaceFrameIndicesBackward(MachineFunction &MF) { 1335 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1336 1337 for (auto &MBB : MF) { 1338 int SPAdj = 0; 1339 if (!MBB.succ_empty()) { 1340 // Get the SP adjustment for the end of MBB from the start of any of its 1341 // successors. They should all be the same. 1342 assert(all_of(MBB.successors(), [&MBB](const MachineBasicBlock *Succ) { 1343 return Succ->getCallFrameSize() == 1344 (*MBB.succ_begin())->getCallFrameSize(); 1345 })); 1346 const MachineBasicBlock &FirstSucc = **MBB.succ_begin(); 1347 SPAdj = TFI.alignSPAdjust(FirstSucc.getCallFrameSize()); 1348 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) 1349 SPAdj = -SPAdj; 1350 } 1351 1352 replaceFrameIndicesBackward(&MBB, MF, SPAdj); 1353 1354 // We can't track the call frame size after call frame pseudos have been 1355 // eliminated. Set it to zero everywhere to keep MachineVerifier happy. 1356 MBB.setCallFrameSize(0); 1357 } 1358 } 1359 1360 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical 1361 /// register references and actual offsets. 1362 void PEI::replaceFrameIndices(MachineFunction &MF) { 1363 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1364 1365 for (auto &MBB : MF) { 1366 int SPAdj = TFI.alignSPAdjust(MBB.getCallFrameSize()); 1367 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) 1368 SPAdj = -SPAdj; 1369 1370 replaceFrameIndices(&MBB, MF, SPAdj); 1371 1372 // We can't track the call frame size after call frame pseudos have been 1373 // eliminated. Set it to zero everywhere to keep MachineVerifier happy. 1374 MBB.setCallFrameSize(0); 1375 } 1376 } 1377 1378 bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, 1379 unsigned OpIdx, int SPAdj) { 1380 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 1381 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 1382 if (MI.isDebugValue()) { 1383 1384 MachineOperand &Op = MI.getOperand(OpIdx); 1385 assert(MI.isDebugOperand(&Op) && 1386 "Frame indices can only appear as a debug operand in a DBG_VALUE*" 1387 " machine instruction"); 1388 Register Reg; 1389 unsigned FrameIdx = Op.getIndex(); 1390 unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx); 1391 1392 StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); 1393 Op.ChangeToRegister(Reg, false /*isDef*/); 1394 1395 const DIExpression *DIExpr = MI.getDebugExpression(); 1396 1397 // If we have a direct DBG_VALUE, and its location expression isn't 1398 // currently complex, then adding an offset will morph it into a 1399 // complex location that is interpreted as being a memory address. 1400 // This changes a pointer-valued variable to dereference that pointer, 1401 // which is incorrect. Fix by adding DW_OP_stack_value. 1402 1403 if (MI.isNonListDebugValue()) { 1404 unsigned PrependFlags = DIExpression::ApplyOffset; 1405 if (!MI.isIndirectDebugValue() && !DIExpr->isComplex()) 1406 PrependFlags |= DIExpression::StackValue; 1407 1408 // If we have DBG_VALUE that is indirect and has a Implicit location 1409 // expression need to insert a deref before prepending a Memory 1410 // location expression. Also after doing this we change the DBG_VALUE 1411 // to be direct. 1412 if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) { 1413 SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; 1414 bool WithStackValue = true; 1415 DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); 1416 // Make the DBG_VALUE direct. 1417 MI.getDebugOffset().ChangeToRegister(0, false); 1418 } 1419 DIExpr = TRI.prependOffsetExpression(DIExpr, PrependFlags, Offset); 1420 } else { 1421 // The debug operand at DebugOpIndex was a frame index at offset 1422 // `Offset`; now the operand has been replaced with the frame 1423 // register, we must add Offset with `register x, plus Offset`. 1424 unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op); 1425 SmallVector<uint64_t, 3> Ops; 1426 TRI.getOffsetOpcodes(Offset, Ops); 1427 DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex); 1428 } 1429 MI.getDebugExpressionOp().setMetadata(DIExpr); 1430 return true; 1431 } 1432 1433 if (MI.isDebugPHI()) { 1434 // Allow stack ref to continue onwards. 1435 return true; 1436 } 1437 1438 // TODO: This code should be commoned with the code for 1439 // PATCHPOINT. There's no good reason for the difference in 1440 // implementation other than historical accident. The only 1441 // remaining difference is the unconditional use of the stack 1442 // pointer as the base register. 1443 if (MI.getOpcode() == TargetOpcode::STATEPOINT) { 1444 assert((!MI.isDebugValue() || OpIdx == 0) && 1445 "Frame indicies can only appear as the first operand of a " 1446 "DBG_VALUE machine instruction"); 1447 Register Reg; 1448 MachineOperand &Offset = MI.getOperand(OpIdx + 1); 1449 StackOffset refOffset = TFI->getFrameIndexReferencePreferSP( 1450 MF, MI.getOperand(OpIdx).getIndex(), Reg, /*IgnoreSPUpdates*/ false); 1451 assert(!refOffset.getScalable() && 1452 "Frame offsets with a scalable component are not supported"); 1453 Offset.setImm(Offset.getImm() + refOffset.getFixed() + SPAdj); 1454 MI.getOperand(OpIdx).ChangeToRegister(Reg, false /*isDef*/); 1455 return true; 1456 } 1457 return false; 1458 } 1459 1460 void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB, 1461 MachineFunction &MF, int &SPAdj) { 1462 assert(MF.getSubtarget().getRegisterInfo() && 1463 "getRegisterInfo() must be implemented!"); 1464 1465 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1466 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 1467 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1468 1469 RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr; 1470 if (LocalRS) 1471 LocalRS->enterBasicBlockEnd(*BB); 1472 1473 for (MachineBasicBlock::iterator I = BB->end(); I != BB->begin();) { 1474 MachineInstr &MI = *std::prev(I); 1475 1476 if (TII.isFrameInstr(MI)) { 1477 SPAdj -= TII.getSPAdjust(MI); 1478 TFI.eliminateCallFramePseudoInstr(MF, *BB, &MI); 1479 continue; 1480 } 1481 1482 // Step backwards to get the liveness state at (immedately after) MI. 1483 if (LocalRS) 1484 LocalRS->backward(I); 1485 1486 bool RemovedMI = false; 1487 for (const auto &[Idx, Op] : enumerate(MI.operands())) { 1488 if (!Op.isFI()) 1489 continue; 1490 1491 if (replaceFrameIndexDebugInstr(MF, MI, Idx, SPAdj)) 1492 continue; 1493 1494 // Eliminate this FrameIndex operand. 1495 RemovedMI = TRI.eliminateFrameIndex(MI, SPAdj, Idx, LocalRS); 1496 if (RemovedMI) 1497 break; 1498 } 1499 1500 if (!RemovedMI) 1501 --I; 1502 } 1503 } 1504 1505 void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, 1506 int &SPAdj) { 1507 assert(MF.getSubtarget().getRegisterInfo() && 1508 "getRegisterInfo() must be implemented!"); 1509 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1510 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); 1511 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 1512 1513 bool InsideCallSequence = false; 1514 1515 for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { 1516 if (TII.isFrameInstr(*I)) { 1517 InsideCallSequence = TII.isFrameSetup(*I); 1518 SPAdj += TII.getSPAdjust(*I); 1519 I = TFI->eliminateCallFramePseudoInstr(MF, *BB, I); 1520 continue; 1521 } 1522 1523 MachineInstr &MI = *I; 1524 bool DoIncr = true; 1525 bool DidFinishLoop = true; 1526 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 1527 if (!MI.getOperand(i).isFI()) 1528 continue; 1529 1530 if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj)) 1531 continue; 1532 1533 // Some instructions (e.g. inline asm instructions) can have 1534 // multiple frame indices and/or cause eliminateFrameIndex 1535 // to insert more than one instruction. We need the register 1536 // scavenger to go through all of these instructions so that 1537 // it can update its register information. We keep the 1538 // iterator at the point before insertion so that we can 1539 // revisit them in full. 1540 bool AtBeginning = (I == BB->begin()); 1541 if (!AtBeginning) --I; 1542 1543 // If this instruction has a FrameIndex operand, we need to 1544 // use that target machine register info object to eliminate 1545 // it. 1546 TRI.eliminateFrameIndex(MI, SPAdj, i); 1547 1548 // Reset the iterator if we were at the beginning of the BB. 1549 if (AtBeginning) { 1550 I = BB->begin(); 1551 DoIncr = false; 1552 } 1553 1554 DidFinishLoop = false; 1555 break; 1556 } 1557 1558 // If we are looking at a call sequence, we need to keep track of 1559 // the SP adjustment made by each instruction in the sequence. 1560 // This includes both the frame setup/destroy pseudos (handled above), 1561 // as well as other instructions that have side effects w.r.t the SP. 1562 // Note that this must come after eliminateFrameIndex, because 1563 // if I itself referred to a frame index, we shouldn't count its own 1564 // adjustment. 1565 if (DidFinishLoop && InsideCallSequence) 1566 SPAdj += TII.getSPAdjust(MI); 1567 1568 if (DoIncr && I != BB->end()) 1569 ++I; 1570 } 1571 } 1572