xref: /llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp (revision dd437d3a26265eaf3ceb7c47f9cae5dc8cd35e0b)
1 //===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass is responsible for finalizing the functions frame layout, saving
11 // callee saved registers, and for emitting prolog & epilog code for the
12 // function.
13 //
14 // This pass must be run after register allocation.  After this pass is
15 // executed, it is illegal to construct MO_FrameIndex operands.
16 //
17 // This pass implements a shrink wrapping variant of prolog/epilog insertion:
18 // - Places callee saved register (CSR) spills and restores in the CFG to
19 //   tightly surround uses so that execution paths that do not use CSRs do not
20 //   pay the spill/restore penalty.
21 //
22 // - Avoiding placment of spills/restores in loops: if a CSR is used inside a
23 //   loop(nest), the spills are placed in the loop preheader, and restores are
24 //   placed in the loop exit nodes (the successors of the loop _exiting_ nodes).
25 //
26 // - Covering paths without CSR uses: e.g. if a restore is placed in a join
27 //   block, a matching spill is added to the end of all immediate predecessor
28 //   blocks that are not reached by a spill. Similarly for saves placed in
29 //   branch blocks.
30 //
31 // Shrink wrapping uses an analysis similar to the one in GVNPRE to determine
32 // which basic blocks require callee-saved register save/restore code.
33 //
34 // This pass uses MachineDominators and MachineLoopInfo. Loop information
35 // is used to prevent shrink wrapping of callee-saved register save/restore
36 // code into loops.
37 //
38 //===----------------------------------------------------------------------===//
39 
40 #define DEBUG_TYPE "shrink-wrap"
41 
42 #include "llvm/CodeGen/Passes.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineLoopInfo.h"
45 #include "llvm/CodeGen/MachineFunctionPass.h"
46 #include "llvm/CodeGen/MachineInstr.h"
47 #include "llvm/CodeGen/MachineFrameInfo.h"
48 #include "llvm/CodeGen/MachineModuleInfo.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RegisterScavenging.h"
51 #include "llvm/Target/TargetMachine.h"
52 #include "llvm/Target/TargetRegisterInfo.h"
53 #include "llvm/Target/TargetFrameInfo.h"
54 #include "llvm/Target/TargetInstrInfo.h"
55 #include "llvm/ADT/SparseBitVector.h"
56 #include "llvm/ADT/DenseMap.h"
57 #include "llvm/ADT/PostOrderIterator.h"
58 #include "llvm/ADT/Statistic.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/ADT/STLExtras.h"
63 #include "llvm/ADT/Statistic.h"
64 #include <climits>
65 #include <sstream>
66 
67 using namespace llvm;
68 
69 STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
70 
71 // Shrink Wrapping:
72 static cl::opt<bool>
73 ShrinkWrapping("shrink-wrap",
74                cl::desc("Shrink wrap callee-saved register spills/restores"));
75 
76 // Shrink wrap only the specified function, a debugging aid.
77 static cl::opt<std::string>
78 ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
79                cl::desc("Shrink wrap the specified function"),
80                cl::value_desc("funcname"),
81                cl::init(""));
82 
83 // Debugging level for shrink wrapping.
84 enum ShrinkWrapDebugLevel {
85   None, BasicInfo, Iterations, Details
86 };
87 
88 static cl::opt<enum ShrinkWrapDebugLevel>
89 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
90   cl::desc("Print shrink wrapping debugging information"),
91   cl::values(
92     clEnumVal(None      , "disable debug output"),
93     clEnumVal(BasicInfo , "print basic DF sets"),
94     clEnumVal(Iterations, "print SR sets for each iteration"),
95     clEnumVal(Details   , "print all DF sets"),
96     clEnumValEnd));
97 
98 
99 namespace {
100   struct VISIBILITY_HIDDEN PEI : public MachineFunctionPass {
101     static char ID;
102     PEI() : MachineFunctionPass(&ID) {}
103 
104     const char *getPassName() const {
105       return "Prolog/Epilog Insertion & Frame Finalization";
106     }
107 
108     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
109       AU.setPreservesCFG();
110       if (ShrinkWrapping || ShrinkWrapFunc != "") {
111         AU.addRequired<MachineLoopInfo>();
112         AU.addRequired<MachineDominatorTree>();
113       }
114       AU.addPreserved<MachineLoopInfo>();
115       AU.addPreserved<MachineDominatorTree>();
116       MachineFunctionPass::getAnalysisUsage(AU);
117     }
118 
119     /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
120     /// frame indexes with appropriate references.
121     ///
122     bool runOnMachineFunction(MachineFunction &Fn) {
123       const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
124       RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
125 
126       DEBUG(MF = &Fn);
127 
128       // Get MachineModuleInfo so that we can track the construction of the
129       // frame.
130       if (MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>())
131         Fn.getFrameInfo()->setMachineModuleInfo(MMI);
132 
133       // Allow the target machine to make some adjustments to the function
134       // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
135       TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
136 
137       // Scan the function for modified callee saved registers and insert spill
138       // code for any callee saved registers that are modified.  Also calculate
139       // the MaxCallFrameSize and HasCalls variables for the function's frame
140       // information and eliminates call frame pseudo instructions.
141       calculateCalleeSavedRegisters(Fn);
142 
143       // Determine placement of CSR spill/restore code:
144       //  - with shrink wrapping, place spills and restores to tightly
145       //    enclose regions in the Machine CFG of the function where
146       //    they are used. Without shrink wrapping
147       //  - default (no shrink wrapping), place all spills in the
148       //    entry block, all restores in return blocks.
149       placeCSRSpillsAndRestores(Fn);
150 
151       // Add the code to save and restore the callee saved registers
152       insertCSRSpillsAndRestores(Fn);
153 
154       // Allow the target machine to make final modifications to the function
155       // before the frame layout is finalized.
156       TRI->processFunctionBeforeFrameFinalized(Fn);
157 
158       // Calculate actual frame offsets for all abstract stack objects...
159       calculateFrameObjectOffsets(Fn);
160 
161       // Add prolog and epilog code to the function.  This function is required
162       // to align the stack frame as necessary for any stack variables or
163       // called functions.  Because of this, calculateCalleeSavedRegisters
164       // must be called before this function in order to set the HasCalls
165       // and MaxCallFrameSize variables.
166       insertPrologEpilogCode(Fn);
167 
168       // Replace all MO_FrameIndex operands with physical register references
169       // and actual offsets.
170       //
171       replaceFrameIndices(Fn);
172 
173       delete RS;
174       clearAllSets();
175       return true;
176     }
177 
178   private:
179     RegScavenger *RS;
180 
181     // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
182     // stack frame indexes.
183     unsigned MinCSFrameIndex, MaxCSFrameIndex;
184 
185     // Analysis info for spill/restore placement.
186     // "CSR": "callee saved register".
187 
188     // CSRegSet contains indices into the Callee Saved Register Info
189     // vector built by calculateCalleeSavedRegisters() and accessed
190     // via MF.getFrameInfo()->getCalleeSavedInfo().
191     typedef SparseBitVector<> CSRegSet;
192 
193     // CSRegBlockMap maps MachineBasicBlocks to sets of callee
194     // saved register indices.
195     typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
196 
197     // Set and maps for computing CSR spill/restore placement:
198     //  used in function (UsedCSRegs)
199     //  used in a basic block (CSRUsed)
200     //  anticipatable in a basic block (Antic{In,Out})
201     //  available in a basic block (Avail{In,Out})
202     //  to be spilled at the entry to a basic block (CSRSave)
203     //  to be restored at the end of a basic block (CSRRestore)
204     CSRegSet UsedCSRegs;
205     CSRegBlockMap CSRUsed;
206     CSRegBlockMap AnticIn, AnticOut;
207     CSRegBlockMap AvailIn, AvailOut;
208     CSRegBlockMap CSRSave;
209     CSRegBlockMap CSRRestore;
210 
211     // Entry and return blocks of the current function.
212     MachineBasicBlock* EntryBlock;
213     SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
214 
215     // Map of MBBs to top level MachineLoops.
216     DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
217 
218     // Flag to control shrink wrapping per-function:
219     // may choose to skip shrink wrapping for certain
220     // functions.
221     bool ShrinkWrapThisFunction;
222 
223 #ifndef NDEBUG
224     // Machine function handle.
225     MachineFunction* MF;
226 
227     // Flag indicating that the current function
228     // has at least one "short" path in the machine
229     // CFG from the entry block to an exit block.
230     bool HasFastExitPath;
231 #endif
232 
233     bool calculateSets(MachineFunction &Fn);
234     bool calcAnticInOut(MachineBasicBlock* MBB);
235     bool calcAvailInOut(MachineBasicBlock* MBB);
236     void calculateAnticAvail(MachineFunction &Fn);
237     bool addUsesForMEMERegion(MachineBasicBlock* MBB,
238                               SmallVector<MachineBasicBlock*, 4>& blks);
239     bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
240     bool calcSpillPlacements(MachineBasicBlock* MBB,
241                              SmallVector<MachineBasicBlock*, 4> &blks,
242                              CSRegBlockMap &prevSpills);
243     bool calcRestorePlacements(MachineBasicBlock* MBB,
244                                SmallVector<MachineBasicBlock*, 4> &blks,
245                                CSRegBlockMap &prevRestores);
246     void placeSpillsAndRestores(MachineFunction &Fn);
247     void placeCSRSpillsAndRestores(MachineFunction &Fn);
248     void calculateCalleeSavedRegisters(MachineFunction &Fn);
249     void insertCSRSpillsAndRestores(MachineFunction &Fn);
250     void calculateFrameObjectOffsets(MachineFunction &Fn);
251     void replaceFrameIndices(MachineFunction &Fn);
252     void insertPrologEpilogCode(MachineFunction &Fn);
253 
254     // Initialize DFA sets, called before iterations.
255     void clearAnticAvailSets();
256     // Clear all sets constructed by shrink wrapping.
257     void clearAllSets();
258 
259     // Initialize all shrink wrapping data.
260     void initShrinkWrappingInfo();
261 
262     // Convienences for dealing with machine loops.
263     MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP) {
264       assert(LP && "Machine loop is NULL.");
265       MachineBasicBlock* PHDR = LP->getLoopPreheader();
266       MachineLoop* PLP = LP->getParentLoop();
267       while (PLP) {
268         PHDR = PLP->getLoopPreheader();
269         PLP = PLP->getParentLoop();
270       }
271       return PHDR;
272     }
273 
274     MachineLoop* getTopLevelLoopParent(MachineLoop *LP) {
275       if (LP == 0)
276         return 0;
277       MachineLoop* PLP = LP->getParentLoop();
278       while (PLP) {
279         LP = PLP;
280         PLP = PLP->getParentLoop();
281       }
282       return LP;
283     }
284 
285     // Propgate CSRs used in MBB to all MBBs of loop LP.
286     void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
287 
288     // Convenience for recognizing return blocks.
289     bool isReturnBlock(MachineBasicBlock* MBB) {
290       return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
291     }
292 
293 #ifndef NDEBUG
294     // Debugging methods.
295 
296     // Mark this function as having fast exit paths.
297     void findFastExitPath();
298 
299     // Verify placement of spills/restores.
300     void verifySpillRestorePlacement();
301 
302     std::string getBasicBlockName(const MachineBasicBlock* MBB);
303     std::string stringifyCSRegSet(const CSRegSet& s);
304     void dumpSet(const CSRegSet& s);
305     void dumpUsed(MachineBasicBlock* MBB);
306     void dumpAllUsed();
307     void dumpSets(MachineBasicBlock* MBB);
308     void dumpSets1(MachineBasicBlock* MBB);
309     void dumpAllSets();
310     void dumpSRSets();
311 #endif
312 
313   };
314   char PEI::ID = 0;
315 }
316 
317 // Initialize shrink wrapping DFA sets, called before iterations.
318 void PEI::clearAnticAvailSets() {
319   AnticIn.clear();
320   AnticOut.clear();
321   AvailIn.clear();
322   AvailOut.clear();
323 }
324 
325 // Clear all sets constructed by shrink wrapping.
326 void PEI::clearAllSets() {
327   ReturnBlocks.clear();
328   clearAnticAvailSets();
329   UsedCSRegs.clear();
330   CSRUsed.clear();
331   TLLoops.clear();
332   CSRSave.clear();
333   CSRRestore.clear();
334 }
335 
336 // Initialize all shrink wrapping data.
337 void PEI::initShrinkWrappingInfo() {
338   clearAllSets();
339   EntryBlock = 0;
340   HasFastExitPath = false;
341   ShrinkWrapThisFunction = ShrinkWrapping;
342   // DEBUG: enable or disable shrink wrapping for the current function
343   // via --shrink-wrap-func=<funcname>.
344 #ifndef NDEBUG
345   if (ShrinkWrapFunc != "") {
346     std::string MFName = MF->getFunction()->getName();
347     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
348   }
349 #endif
350 }
351 
352 
353 /// createPrologEpilogCodeInserter - This function returns a pass that inserts
354 /// prolog and epilog code, and eliminates abstract frame references.
355 ///
356 FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
357 
358 /// placeCSRSpillsAndRestores - determine which MBBs of the function
359 /// need save, restore code for callee-saved registers by doing a DF analysis
360 /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
361 /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
362 /// is used to ensure that CSR save/restore code is not placed inside loops.
363 /// This function computes the maps of MBBs -> CSRs to spill and restore
364 /// in CSRSave, CSRRestore.
365 ///
366 /// If shrink wrapping is not being performed, place all spills in
367 /// the entry block, all restores in return blocks. In this case,
368 /// CSRSave has a single mapping, CSRRestore has mappings for each
369 /// return block.
370 ///
371 void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
372 
373   initShrinkWrappingInfo();
374 
375   DEBUG(if (ShrinkWrapThisFunction) {
376       DOUT << "Place CSR spills/restores for "
377            << MF->getFunction()->getName() << "\n";
378     });
379 
380   if (calculateSets(Fn))
381     placeSpillsAndRestores(Fn);
382 }
383 
384 /// calcAnticInOut - calculate the anticipated in/out reg sets
385 /// for the given MBB by looking forward in the MCFG at MBB's
386 /// successors.
387 ///
388 bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
389   bool changed = false;
390 
391   // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
392   SmallVector<MachineBasicBlock*, 4> successors;
393   for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
394          SE = MBB->succ_end(); SI != SE; ++SI) {
395     MachineBasicBlock* SUCC = *SI;
396     if (SUCC != MBB)
397       successors.push_back(SUCC);
398   }
399 
400   unsigned i = 0, e = successors.size();
401   if (i != e) {
402     CSRegSet prevAnticOut = AnticOut[MBB];
403     MachineBasicBlock* SUCC = successors[i];
404 
405     AnticOut[MBB] = AnticIn[SUCC];
406     for (++i; i != e; ++i) {
407       SUCC = successors[i];
408       AnticOut[MBB] &= AnticIn[SUCC];
409     }
410     if (prevAnticOut != AnticOut[MBB])
411       changed = true;
412   }
413 
414   // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
415   CSRegSet prevAnticIn = AnticIn[MBB];
416   AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
417   if (prevAnticIn |= AnticIn[MBB])
418     changed = true;
419   return changed;
420 }
421 
422 /// calcAvailInOut - calculate the available in/out reg sets
423 /// for the given MBB by looking backward in the MCFG at MBB's
424 /// predecessors.
425 ///
426 bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
427   bool changed = false;
428 
429   // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
430   SmallVector<MachineBasicBlock*, 4> predecessors;
431   for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
432          PE = MBB->pred_end(); PI != PE; ++PI) {
433     MachineBasicBlock* PRED = *PI;
434     if (PRED != MBB)
435       predecessors.push_back(PRED);
436   }
437 
438   unsigned i = 0, e = predecessors.size();
439   if (i != e) {
440     CSRegSet prevAvailIn = AvailIn[MBB];
441     MachineBasicBlock* PRED = predecessors[i];
442 
443     AvailIn[MBB] = AvailOut[PRED];
444     for (++i; i != e; ++i) {
445       PRED = predecessors[i];
446       AvailIn[MBB] &= AvailOut[PRED];
447     }
448     if (prevAvailIn != AvailIn[MBB])
449       changed = true;
450   }
451 
452   // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
453   CSRegSet prevAvailOut = AvailOut[MBB];
454   AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
455   if (prevAvailOut |= AvailOut[MBB])
456     changed = true;
457   return changed;
458 }
459 
460 /// calculateAnticAvail - build the sets anticipated and available
461 /// registers in the MCFG of the current function iteratively,
462 /// doing a combined forward and backward analysis.
463 ///
464 void PEI::calculateAnticAvail(MachineFunction &Fn) {
465   // Initialize data flow sets.
466   clearAnticAvailSets();
467 
468   // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
469   bool changed = true;
470   unsigned iterations = 0;
471   while (changed) {
472     changed = false;
473     ++iterations;
474     for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
475          MBBI != MBBE; ++MBBI) {
476       MachineBasicBlock* MBB = MBBI;
477 
478       // Calculate anticipated in, out regs at MBB from
479       // anticipated at successors of MBB.
480       changed |= calcAnticInOut(MBB);
481 
482       // Calculate available in, out regs at MBB from
483       // available at predecessors of MBB.
484       changed |= calcAvailInOut(MBB);
485     }
486   }
487 
488   DEBUG(if (ShrinkWrapDebugging >= Details) {
489       DOUT << "-----------------------------------------------------------\n";
490       DOUT << " Antic/Avail Sets:\n";
491       DOUT << "-----------------------------------------------------------\n";
492       DOUT << "iterations = " << iterations << "\n";
493       DOUT << "-----------------------------------------------------------\n";
494       DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n";
495       DOUT << "-----------------------------------------------------------\n";
496       for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
497            MBBI != MBBE; ++MBBI) {
498         MachineBasicBlock* MBB = MBBI;
499         dumpSets(MBB);
500       }
501       DOUT << "-----------------------------------------------------------\n";
502     });
503 }
504 
505 /// propagateUsesAroundLoop - copy used register info from MBB to all blocks
506 /// of the loop given by LP and its parent loops. This prevents spills/restores
507 /// from being placed in the bodies of loops.
508 ///
509 void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
510   if (! MBB || !LP)
511     return;
512 
513   std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
514   for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
515     MachineBasicBlock* LBB = loopBlocks[i];
516     if (LBB == MBB)
517       continue;
518     if (CSRUsed[LBB].contains(CSRUsed[MBB]))
519       continue;
520     CSRUsed[LBB] |= CSRUsed[MBB];
521   }
522 }
523 
524 /// calculateSets - collect the CSRs used in this function, compute
525 /// the DF sets that describe the initial minimal regions in the
526 /// Machine CFG around which CSR spills and restores must be placed.
527 ///
528 /// Additionally, this function decides if shrink wrapping should
529 /// be disabled for the current function, checking the following:
530 ///  1. the current function has more than 500 MBBs: heuristic limit
531 ///     on function size to reduce compile time impact of the current
532 ///     iterative algorithm.
533 ///  2. all CSRs are used in the entry block.
534 ///  3. all CSRs are used in all immediate successors of the entry block.
535 ///  4. all CSRs are used in a subset of blocks, each of which dominates
536 ///     all return blocks. These blocks, taken as a subgraph of the MCFG,
537 ///     are equivalent to the entry block since all execution paths pass
538 ///     through them.
539 ///
540 bool PEI::calculateSets(MachineFunction &Fn) {
541   // Sets used to compute spill, restore placement sets.
542   const std::vector<CalleeSavedInfo> CSI =
543     Fn.getFrameInfo()->getCalleeSavedInfo();
544 
545   // If no CSRs used, we are done.
546   if (CSI.empty()) {
547     DEBUG(if (ShrinkWrapThisFunction)
548             DOUT << "DISABLED: " << Fn.getFunction()->getName()
549                  << ": uses no callee-saved registers\n");
550     return false;
551   }
552 
553   // Save refs to entry and return blocks.
554   EntryBlock = Fn.begin();
555   for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
556        MBB != E; ++MBB)
557     if (isReturnBlock(MBB))
558       ReturnBlocks.push_back(MBB);
559 
560   // Determine if this function has fast exit paths.
561   DEBUG(if (ShrinkWrapThisFunction)
562           findFastExitPath());
563 
564   // Limit shrink wrapping via the current iterative bit vector
565   // implementation to functions with <= 500 MBBs.
566   if (Fn.size() > 500) {
567     DEBUG(if (ShrinkWrapThisFunction)
568             DOUT << "DISABLED: " << Fn.getFunction()->getName()
569                  << ": too large (" << Fn.size() << " MBBs)\n");
570     ShrinkWrapThisFunction = false;
571   }
572 
573   // Return now if not shrink wrapping.
574   if (! ShrinkWrapThisFunction)
575     return false;
576 
577   // Collect set of used CSRs.
578   for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
579     UsedCSRegs.set(inx);
580   }
581 
582   // Walk instructions in all MBBs, create CSRUsed[] sets, choose
583   // whether or not to shrink wrap this function.
584   MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
585   MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
586   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
587 
588   bool allCSRUsesInEntryBlock = true;
589   for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
590        MBBI != MBBE; ++MBBI) {
591     MachineBasicBlock* MBB = MBBI;
592     for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
593       for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
594         unsigned Reg = CSI[inx].getReg();
595         // If instruction I reads or modifies Reg, add it to UsedCSRegs,
596         // CSRUsed map for the current block.
597         for (unsigned opInx = 0, opEnd = I->getNumOperands();
598              opInx != opEnd; ++opInx) {
599           const MachineOperand &MO = I->getOperand(opInx);
600           if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
601             continue;
602           unsigned MOReg = MO.getReg();
603           if (!MOReg)
604             continue;
605           if (MOReg == Reg ||
606               (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
607                TargetRegisterInfo::isPhysicalRegister(Reg) &&
608                TRI->isSubRegister(Reg, MOReg))) {
609             // CSR Reg is defined/used in block MBB.
610             CSRUsed[MBB].set(inx);
611             // Check for uses in EntryBlock.
612             if (MBB != EntryBlock)
613               allCSRUsesInEntryBlock = false;
614           }
615         }
616       }
617     }
618 
619     if (CSRUsed[MBB].empty())
620       continue;
621 
622     // Propagate CSRUsed[MBB] in loops
623     if (MachineLoop* LP = LI.getLoopFor(MBB)) {
624       // Add top level loop to work list.
625       MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
626       MachineLoop* PLP = getTopLevelLoopParent(LP);
627 
628       if (! HDR) {
629         HDR = PLP->getHeader();
630         assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
631         MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
632         HDR = *PI;
633       }
634       TLLoops[HDR] = PLP;
635 
636       // Push uses from inside loop to its parent loops,
637       // or to all other MBBs in its loop.
638       if (LP->getLoopDepth() > 1) {
639         for (MachineLoop* PLP = LP->getParentLoop(); PLP;
640              PLP = PLP->getParentLoop()) {
641           propagateUsesAroundLoop(MBB, PLP);
642         }
643       } else {
644         propagateUsesAroundLoop(MBB, LP);
645       }
646     }
647   }
648 
649   if (allCSRUsesInEntryBlock) {
650     DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
651           << ": all CSRs used in EntryBlock\n");
652     ShrinkWrapThisFunction = false;
653   } else {
654     bool allCSRsUsedInEntryFanout = true;
655     for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
656            SE = EntryBlock->succ_end(); SI != SE; ++SI) {
657       MachineBasicBlock* SUCC = *SI;
658       if (CSRUsed[SUCC] != UsedCSRegs)
659         allCSRsUsedInEntryFanout = false;
660     }
661     if (allCSRsUsedInEntryFanout) {
662       DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
663             << ": all CSRs used in imm successors of EntryBlock\n");
664       ShrinkWrapThisFunction = false;
665     }
666   }
667 
668   if (ShrinkWrapThisFunction) {
669     // Check if MBB uses CSRs and dominates all exit nodes.
670     // Such nodes are equiv. to the entry node w.r.t.
671     // CSR uses: every path through the function must
672     // pass through this node. If each CSR is used at least
673     // once by these nodes, shrink wrapping is disabled.
674     CSRegSet CSRUsedInChokePoints;
675     for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
676          MBBI != MBBE; ++MBBI) {
677       MachineBasicBlock* MBB = MBBI;
678       if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
679         continue;
680       bool dominatesExitNodes = true;
681       for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
682         if (! DT.dominates(MBB, ReturnBlocks[ri])) {
683           dominatesExitNodes = false;
684           break;
685         }
686       if (dominatesExitNodes) {
687         CSRUsedInChokePoints |= CSRUsed[MBB];
688         if (CSRUsedInChokePoints == UsedCSRegs) {
689           DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName()
690                 << ": all CSRs used in choke point(s) at "
691                 << getBasicBlockName(MBB) << "\n");
692           ShrinkWrapThisFunction = false;
693           break;
694         }
695       }
696     }
697   }
698 
699   // Return now if we have decided not to apply shrink wrapping
700   // to the current function.
701   if (! ShrinkWrapThisFunction)
702     return false;
703 
704   DEBUG({
705       DOUT << "ENABLED: " << Fn.getFunction()->getName();
706       if (HasFastExitPath)
707         DOUT << " (fast exit path)";
708       DOUT << "\n";
709       if (ShrinkWrapDebugging >= BasicInfo) {
710         DOUT << "------------------------------"
711              << "-----------------------------\n";
712         DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
713         if (ShrinkWrapDebugging >= Details) {
714           DOUT << "------------------------------"
715                << "-----------------------------\n";
716           dumpAllUsed();
717         }
718       }
719     });
720 
721   // Build initial DF sets to determine minimal regions in the
722   // Machine CFG around which CSRs must be spilled and restored.
723   calculateAnticAvail(Fn);
724 
725   return true;
726 }
727 
728 /// addUsesForMEMERegion - add uses of CSRs spilled or restored in
729 /// multi-entry, multi-exit (MEME) regions so spill and restore
730 /// placement will not break code that enters or leaves a
731 /// shrink-wrapped region by inducing spills with no matching
732 /// restores or restores with no matching spills. A MEME region
733 /// is a subgraph of the MCFG with multiple entry edges, multiple
734 /// exit edges, or both. This code propagates use information
735 /// through the MCFG until all paths requiring spills and restores
736 /// _outside_ the computed minimal placement regions have been covered.
737 ///
738 bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
739                                SmallVector<MachineBasicBlock*, 4>& blks) {
740   if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
741     bool processThisBlock = false;
742     for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
743            SE = MBB->succ_end(); SI != SE; ++SI) {
744       MachineBasicBlock* SUCC = *SI;
745       if (SUCC->pred_size() > 1) {
746         processThisBlock = true;
747         break;
748       }
749     }
750     if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
751       for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
752              PE = MBB->pred_end(); PI != PE; ++PI) {
753         MachineBasicBlock* PRED = *PI;
754         if (PRED->succ_size() > 1) {
755           processThisBlock = true;
756           break;
757         }
758       }
759     }
760     if (! processThisBlock)
761       return false;
762   }
763 
764   CSRegSet prop;
765   if (!CSRSave[MBB].empty())
766     prop = CSRSave[MBB];
767   else if (!CSRRestore[MBB].empty())
768     prop = CSRRestore[MBB];
769   else
770     prop = CSRUsed[MBB];
771   if (prop.empty())
772     return false;
773 
774   // Propagate selected bits to successors, predecessors of MBB.
775   bool addedUses = false;
776   for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
777          SE = MBB->succ_end(); SI != SE; ++SI) {
778     MachineBasicBlock* SUCC = *SI;
779     // Self-loop
780     if (SUCC == MBB)
781       continue;
782     if (! CSRUsed[SUCC].contains(prop)) {
783       CSRUsed[SUCC] |= prop;
784       addedUses = true;
785       blks.push_back(SUCC);
786       DEBUG(if (ShrinkWrapDebugging >= Iterations)
787               DOUT << getBasicBlockName(MBB)
788                    << "(" << stringifyCSRegSet(prop) << ")->"
789                    << "successor " << getBasicBlockName(SUCC) << "\n");
790     }
791   }
792   for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
793          PE = MBB->pred_end(); PI != PE; ++PI) {
794     MachineBasicBlock* PRED = *PI;
795     // Self-loop
796     if (PRED == MBB)
797       continue;
798     if (! CSRUsed[PRED].contains(prop)) {
799       CSRUsed[PRED] |= prop;
800       addedUses = true;
801       blks.push_back(PRED);
802       DEBUG(if (ShrinkWrapDebugging >= Iterations)
803               DOUT << getBasicBlockName(MBB)
804                    << "(" << stringifyCSRegSet(prop) << ")->"
805                    << "predecessor " << getBasicBlockName(PRED) << "\n");
806     }
807   }
808   return addedUses;
809 }
810 
811 /// addUsesForTopLevelLoops - add uses for CSRs used inside top
812 /// level loops to the exit blocks of those loops.
813 ///
814 bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
815   bool addedUses = false;
816 
817   // Place restores for top level loops where needed.
818   for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
819          I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
820     MachineBasicBlock* MBB = I->first;
821     MachineLoop* LP = I->second;
822     MachineBasicBlock* HDR = LP->getHeader();
823     SmallVector<MachineBasicBlock*, 4> exitBlocks;
824     CSRegSet loopSpills;
825 
826     loopSpills = CSRSave[MBB];
827     if (CSRSave[MBB].empty()) {
828       loopSpills = CSRUsed[HDR];
829       assert(!loopSpills.empty() && "No CSRs used in loop?");
830     } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
831       continue;
832 
833     LP->getExitBlocks(exitBlocks);
834     assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
835     for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
836       MachineBasicBlock* EXB = exitBlocks[i];
837       if (! CSRUsed[EXB].contains(loopSpills)) {
838         CSRUsed[EXB] |= loopSpills;
839         addedUses = true;
840         DEBUG(if (ShrinkWrapDebugging >= Iterations)
841                 DOUT << "LOOP " << getBasicBlockName(MBB)
842                      << "(" << stringifyCSRegSet(loopSpills) << ")->"
843                      << getBasicBlockName(EXB) << "\n");
844         if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
845           blks.push_back(EXB);
846       }
847     }
848   }
849   return addedUses;
850 }
851 
852 /// calcSpillPlacements - determine which CSRs should be spilled
853 /// in MBB using AnticIn sets of MBB's predecessors, keeping track
854 /// of changes to spilled reg sets. Add MBB to the set of blocks
855 /// that need to be processed for propagating use info to cover
856 /// multi-entry/exit regions.
857 ///
858 bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
859                               SmallVector<MachineBasicBlock*, 4> &blks,
860                               CSRegBlockMap &prevSpills) {
861   bool placedSpills = false;
862   // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
863   CSRegSet anticInPreds;
864   SmallVector<MachineBasicBlock*, 4> predecessors;
865   for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
866          PE = MBB->pred_end(); PI != PE; ++PI) {
867     MachineBasicBlock* PRED = *PI;
868     if (PRED != MBB)
869       predecessors.push_back(PRED);
870   }
871   unsigned i = 0, e = predecessors.size();
872   if (i != e) {
873     MachineBasicBlock* PRED = predecessors[i];
874     anticInPreds = UsedCSRegs - AnticIn[PRED];
875     for (++i; i != e; ++i) {
876       PRED = predecessors[i];
877       anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
878     }
879   } else {
880     // Handle uses in entry blocks (which have no predecessors).
881     // This is necessary because the DFA formulation assumes the
882     // entry and (multiple) exit nodes cannot have CSR uses, which
883     // is not the case in the real world.
884     anticInPreds = UsedCSRegs;
885   }
886   // Compute spills required at MBB:
887   CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
888 
889   if (! CSRSave[MBB].empty()) {
890     if (MBB == EntryBlock) {
891       for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
892         CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
893     } else {
894       // Reset all regs spilled in MBB that are also spilled in EntryBlock.
895       if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
896         CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
897       }
898     }
899   }
900   placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
901   prevSpills[MBB] = CSRSave[MBB];
902   // Remember this block for adding restores to successor
903   // blocks for multi-entry region.
904   if (placedSpills)
905     blks.push_back(MBB);
906 
907   DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
908           DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
909                << stringifyCSRegSet(CSRSave[MBB]) << "\n");
910 
911   return placedSpills;
912 }
913 
914 /// calcRestorePlacements - determine which CSRs should be restored
915 /// in MBB using AvailOut sets of MBB's succcessors, keeping track
916 /// of changes to restored reg sets. Add MBB to the set of blocks
917 /// that need to be processed for propagating use info to cover
918 /// multi-entry/exit regions.
919 ///
920 bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
921                                 SmallVector<MachineBasicBlock*, 4> &blks,
922                                 CSRegBlockMap &prevRestores) {
923   bool placedRestores = false;
924   // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
925   CSRegSet availOutSucc;
926   SmallVector<MachineBasicBlock*, 4> successors;
927   for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
928          SE = MBB->succ_end(); SI != SE; ++SI) {
929     MachineBasicBlock* SUCC = *SI;
930     if (SUCC != MBB)
931       successors.push_back(SUCC);
932   }
933   unsigned i = 0, e = successors.size();
934   if (i != e) {
935     MachineBasicBlock* SUCC = successors[i];
936     availOutSucc = UsedCSRegs - AvailOut[SUCC];
937     for (++i; i != e; ++i) {
938       SUCC = successors[i];
939       availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
940     }
941   } else {
942     if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
943       // Handle uses in return blocks (which have no successors).
944       // This is necessary because the DFA formulation assumes the
945       // entry and (multiple) exit nodes cannot have CSR uses, which
946       // is not the case in the real world.
947       availOutSucc = UsedCSRegs;
948     }
949   }
950   // Compute restores required at MBB:
951   CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
952 
953   // Postprocess restore placements at MBB.
954   // Remove the CSRs that are restored in the return blocks.
955   // Lest this be confusing, note that:
956   // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
957   if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
958     if (! CSRSave[EntryBlock].empty())
959       CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
960   }
961   placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
962   prevRestores[MBB] = CSRRestore[MBB];
963   // Remember this block for adding saves to predecessor
964   // blocks for multi-entry region.
965   if (placedRestores)
966     blks.push_back(MBB);
967 
968   DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
969           DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
970                << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
971 
972   return placedRestores;
973 }
974 
975 /// placeSpillsAndRestores - place spills and restores of CSRs
976 /// used in MBBs in minimal regions that contain the uses.
977 ///
978 void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
979   CSRegBlockMap prevCSRSave;
980   CSRegBlockMap prevCSRRestore;
981   SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
982   bool changed = true;
983   unsigned iterations = 0;
984 
985   // Iterate computation of spill and restore placements in the MCFG until:
986   //   1. CSR use info has been fully propagated around the MCFG, and
987   //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
988   while (changed) {
989     changed = false;
990     ++iterations;
991 
992     DEBUG(if (ShrinkWrapDebugging >= Iterations)
993             DOUT << "iter " << iterations
994                  << " --------------------------------------------------\n");
995 
996     // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
997     // which determines the placements of spills and restores.
998     // Keep track of changes to spills, restores in each iteration to
999     // minimize the total iterations.
1000     bool SRChanged = false;
1001     for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
1002          MBBI != MBBE; ++MBBI) {
1003       MachineBasicBlock* MBB = MBBI;
1004 
1005       // Place spills for CSRs in MBB.
1006       SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
1007 
1008       // Place restores for CSRs in MBB.
1009       SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
1010     }
1011 
1012     // Add uses of CSRs used inside loops where needed.
1013     changed |= addUsesForTopLevelLoops(cvBlocks);
1014 
1015     // Add uses for CSRs spilled or restored at branch, join points.
1016     if (changed || SRChanged) {
1017       while (! cvBlocks.empty()) {
1018         MachineBasicBlock* MBB = cvBlocks.pop_back_val();
1019         changed |= addUsesForMEMERegion(MBB, ncvBlocks);
1020       }
1021       if (! ncvBlocks.empty()) {
1022         cvBlocks = ncvBlocks;
1023         ncvBlocks.clear();
1024       }
1025     }
1026 
1027     if (changed) {
1028       calculateAnticAvail(Fn);
1029       CSRSave.clear();
1030       CSRRestore.clear();
1031     }
1032   }
1033 
1034   // Check for effectiveness:
1035   //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
1036   //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
1037   // Gives a measure of how many CSR spills have been moved from EntryBlock
1038   // to minimal regions enclosing their uses.
1039   CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
1040   unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
1041   numSRReduced += numSRReducedThisFunc;
1042   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
1043       DOUT << "-----------------------------------------------------------\n";
1044       DOUT << "total iterations = " << iterations << " ( "
1045            << Fn.getFunction()->getName()
1046            << " " << numSRReducedThisFunc
1047            << " " << Fn.size()
1048            << " )\n";
1049       DOUT << "-----------------------------------------------------------\n";
1050       dumpSRSets();
1051       DOUT << "-----------------------------------------------------------\n";
1052       if (numSRReducedThisFunc)
1053         verifySpillRestorePlacement();
1054     });
1055 }
1056 
1057 /// calculateCalleeSavedRegisters - Scan the function for modified callee saved
1058 /// registers.  Also calculate the MaxCallFrameSize and HasCalls variables for
1059 /// the function's frame information and eliminates call frame pseudo
1060 /// instructions.
1061 ///
1062 void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
1063   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1064   const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
1065 
1066   // Get the callee saved register list...
1067   const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
1068 
1069   // Get the function call frame set-up and tear-down instruction opcode
1070   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
1071   int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
1072 
1073   // These are used to keep track the callee-save area. Initialize them.
1074   MinCSFrameIndex = INT_MAX;
1075   MaxCSFrameIndex = 0;
1076 
1077   // Early exit for targets which have no callee saved registers and no call
1078   // frame setup/destroy pseudo instructions.
1079   if ((CSRegs == 0 || CSRegs[0] == 0) &&
1080       FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
1081     return;
1082 
1083   unsigned MaxCallFrameSize = 0;
1084   bool HasCalls = false;
1085 
1086   std::vector<MachineBasicBlock::iterator> FrameSDOps;
1087   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
1088     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
1089       if (I->getOpcode() == FrameSetupOpcode ||
1090           I->getOpcode() == FrameDestroyOpcode) {
1091         assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
1092                " instructions should have a single immediate argument!");
1093         unsigned Size = I->getOperand(0).getImm();
1094         if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
1095         HasCalls = true;
1096         FrameSDOps.push_back(I);
1097       }
1098 
1099   MachineFrameInfo *FFI = Fn.getFrameInfo();
1100   FFI->setHasCalls(HasCalls);
1101   FFI->setMaxCallFrameSize(MaxCallFrameSize);
1102 
1103   for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) {
1104     MachineBasicBlock::iterator I = FrameSDOps[i];
1105     // If call frames are not being included as part of the stack frame,
1106     // and there is no dynamic allocation (therefore referencing frame slots
1107     // off sp), leave the pseudo ops alone. We'll eliminate them later.
1108     if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
1109       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
1110   }
1111 
1112   // Now figure out which *callee saved* registers are modified by the current
1113   // function, thus needing to be saved and restored in the prolog/epilog.
1114   //
1115   const TargetRegisterClass* const *CSRegClasses =
1116     RegInfo->getCalleeSavedRegClasses(&Fn);
1117   std::vector<CalleeSavedInfo> CSI;
1118   for (unsigned i = 0; CSRegs[i]; ++i) {
1119     unsigned Reg = CSRegs[i];
1120     if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
1121         // If the reg is modified, save it!
1122       CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1123     } else {
1124       for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
1125            *AliasSet; ++AliasSet) {  // Check alias registers too.
1126         if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
1127           CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
1128           break;
1129         }
1130       }
1131     }
1132   }
1133 
1134   if (CSI.empty())
1135     return;   // Early exit if no callee saved registers are modified!
1136 
1137   unsigned NumFixedSpillSlots;
1138   const std::pair<unsigned,int> *FixedSpillSlots =
1139     TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
1140 
1141   // Now that we know which registers need to be saved and restored, allocate
1142   // stack slots for them.
1143   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1144     unsigned Reg = CSI[i].getReg();
1145     const TargetRegisterClass *RC = CSI[i].getRegClass();
1146 
1147     // Check to see if this physreg must be spilled to a particular stack slot
1148     // on this target.
1149     const std::pair<unsigned,int> *FixedSlot = FixedSpillSlots;
1150     while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
1151            FixedSlot->first != Reg)
1152       ++FixedSlot;
1153 
1154     int FrameIdx;
1155     if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) {
1156       // Nope, just spill it anywhere convenient.
1157       unsigned Align = RC->getAlignment();
1158       unsigned StackAlign = TFI->getStackAlignment();
1159       // We may not be able to sastify the desired alignment specification of
1160       // the TargetRegisterClass if the stack alignment is smaller.
1161       // Use the min.
1162       Align = std::min(Align, StackAlign);
1163       FrameIdx = FFI->CreateStackObject(RC->getSize(), Align);
1164       if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
1165       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
1166     } else {
1167       // Spill it to the stack where we must.
1168       FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second);
1169     }
1170     CSI[i].setFrameIdx(FrameIdx);
1171   }
1172 
1173   FFI->setCalleeSavedInfo(CSI);
1174 }
1175 
1176 /// insertCSRSpillsAndRestores - Insert spill and restore code for
1177 /// callee saved registers used in the function, handling shrink wrapping.
1178 ///
1179 void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
1180   // Get callee saved register information.
1181   MachineFrameInfo *FFI = Fn.getFrameInfo();
1182   const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
1183 
1184   // Early exit if no callee saved registers are modified!
1185   if (CSI.empty())
1186     return;
1187 
1188   const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
1189   MachineBasicBlock::iterator I;
1190 
1191   DEBUG(if (ShrinkWrapThisFunction && ShrinkWrapDebugging >= Details)
1192           DOUT << "Inserting CSR spills/restores in function "
1193                << Fn.getFunction()->getName() << "\n");
1194 
1195   if (! ShrinkWrapThisFunction) {
1196     // Spill using target interface.
1197     I = EntryBlock->begin();
1198     if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
1199       for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1200         // Add the callee-saved register as live-in.
1201         // It's killed at the spill.
1202         EntryBlock->addLiveIn(CSI[i].getReg());
1203 
1204         // Insert the spill to the stack frame.
1205         TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
1206                                 CSI[i].getFrameIdx(), CSI[i].getRegClass());
1207       }
1208     }
1209 
1210     // Restore using target interface.
1211     for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
1212       MachineBasicBlock* MBB = ReturnBlocks[ri];
1213       I = MBB->end(); --I;
1214 
1215       // Skip over all terminator instructions, which are part of the return
1216       // sequence.
1217       MachineBasicBlock::iterator I2 = I;
1218       while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1219         I = I2;
1220 
1221       bool AtStart = I == MBB->begin();
1222       MachineBasicBlock::iterator BeforeI = I;
1223       if (!AtStart)
1224         --BeforeI;
1225 
1226       // Restore all registers immediately before the return and any
1227       // terminators that preceed it.
1228       if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
1229         for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1230           TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
1231                                    CSI[i].getFrameIdx(),
1232                                    CSI[i].getRegClass());
1233           assert(I != MBB->begin() &&
1234                  "loadRegFromStackSlot didn't insert any code!");
1235           // Insert in reverse order.  loadRegFromStackSlot can insert
1236           // multiple instructions.
1237           if (AtStart)
1238             I = MBB->begin();
1239           else {
1240             I = BeforeI;
1241             ++I;
1242           }
1243         }
1244       }
1245     }
1246     return;
1247   }
1248 
1249   // Insert spills.
1250   std::vector<CalleeSavedInfo> blockCSI;
1251   for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1252          BE = CSRSave.end(); BI != BE; ++BI) {
1253     MachineBasicBlock* MBB = BI->first;
1254     CSRegSet save = BI->second;
1255 
1256     if (save.empty())
1257       continue;
1258 
1259     DEBUG(if (ShrinkWrapDebugging >= Details)
1260             DOUT << "Spilling " << stringifyCSRegSet(save)
1261                  << " in " << getBasicBlockName(MBB) << "\n");
1262 
1263     blockCSI.clear();
1264     for (CSRegSet::iterator RI = save.begin(),
1265            RE = save.end(); RI != RE; ++RI) {
1266       blockCSI.push_back(CSI[*RI]);
1267     }
1268     assert(blockCSI.size() > 0 &&
1269            "Could not collect callee saved register info");
1270 
1271     I = MBB->begin();
1272 
1273     // When shrink wrapping, use stack slot stores/loads.
1274     for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1275       // Add the callee-saved register as live-in.
1276       // It's killed at the spill.
1277       MBB->addLiveIn(blockCSI[i].getReg());
1278 
1279       // Insert the spill to the stack frame.
1280       TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
1281                               true,
1282                               blockCSI[i].getFrameIdx(),
1283                               blockCSI[i].getRegClass());
1284     }
1285   }
1286 
1287   DEBUG(if (ShrinkWrapDebugging >= Details)
1288           DOUT << "------------------------------"
1289                << "-----------------------------\n");
1290 
1291   for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1292          BE = CSRRestore.end(); BI != BE; ++BI) {
1293     MachineBasicBlock* MBB = BI->first;
1294     CSRegSet restore = BI->second;
1295 
1296     if (restore.empty())
1297       continue;
1298 
1299     DEBUG(if (ShrinkWrapDebugging >= Details)
1300             DOUT << "Restoring " << stringifyCSRegSet(restore)
1301                  << " in " << getBasicBlockName(MBB) << "\n");
1302 
1303     blockCSI.clear();
1304     for (CSRegSet::iterator RI = restore.begin(),
1305            RE = restore.end(); RI != RE; ++RI) {
1306       blockCSI.push_back(CSI[*RI]);
1307     }
1308     assert(blockCSI.size() > 0 &&
1309            "Could not find callee saved register info");
1310 
1311     // If MBB is empty and needs restores, insert at the _beginning_.
1312     if (MBB->empty()) {
1313       I = MBB->begin();
1314     } else {
1315       I = MBB->end();
1316       --I;
1317 
1318       // Skip over all terminator instructions, which are part of the
1319       // return sequence.
1320       if (! I->getDesc().isTerminator()) {
1321         ++I;
1322       } else {
1323         MachineBasicBlock::iterator I2 = I;
1324         while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
1325           I = I2;
1326       }
1327     }
1328 
1329     bool AtStart = I == MBB->begin();
1330     MachineBasicBlock::iterator BeforeI = I;
1331     if (!AtStart)
1332       --BeforeI;
1333 
1334     // Restore all registers immediately before the return and any
1335     // terminators that preceed it.
1336     for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
1337       TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
1338                                blockCSI[i].getFrameIdx(),
1339                                blockCSI[i].getRegClass());
1340       assert(I != MBB->begin() &&
1341              "loadRegFromStackSlot didn't insert any code!");
1342       // Insert in reverse order.  loadRegFromStackSlot can insert
1343       // multiple instructions.
1344       if (AtStart)
1345         I = MBB->begin();
1346       else {
1347         I = BeforeI;
1348         ++I;
1349       }
1350     }
1351   }
1352 
1353   DEBUG(if (ShrinkWrapDebugging >= Details)
1354           DOUT << "------------------------------"
1355                << "-----------------------------\n");
1356 }
1357 
1358 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
1359 static inline void
1360 AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx,
1361                   bool StackGrowsDown, int64_t &Offset,
1362                   unsigned &MaxAlign) {
1363   // If stack grows down, we need to add size of find the lowest address of the
1364   // object.
1365   if (StackGrowsDown)
1366     Offset += FFI->getObjectSize(FrameIdx);
1367 
1368   unsigned Align = FFI->getObjectAlignment(FrameIdx);
1369 
1370   // If the alignment of this object is greater than that of the stack, then
1371   // increase the stack alignment to match.
1372   MaxAlign = std::max(MaxAlign, Align);
1373 
1374   // Adjust to alignment boundary.
1375   Offset = (Offset + Align - 1) / Align * Align;
1376 
1377   if (StackGrowsDown) {
1378     FFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
1379   } else {
1380     FFI->setObjectOffset(FrameIdx, Offset);
1381     Offset += FFI->getObjectSize(FrameIdx);
1382   }
1383 }
1384 
1385 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
1386 /// abstract stack objects.
1387 ///
1388 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
1389   const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
1390 
1391   bool StackGrowsDown =
1392     TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1393 
1394   // Loop over all of the stack objects, assigning sequential addresses...
1395   MachineFrameInfo *FFI = Fn.getFrameInfo();
1396 
1397   unsigned MaxAlign = FFI->getMaxAlignment();
1398 
1399   // Start at the beginning of the local area.
1400   // The Offset is the distance from the stack top in the direction
1401   // of stack growth -- so it's always nonnegative.
1402   int64_t Offset = TFI.getOffsetOfLocalArea();
1403   if (StackGrowsDown)
1404     Offset = -Offset;
1405   assert(Offset >= 0
1406          && "Local area offset should be in direction of stack growth");
1407 
1408   // If there are fixed sized objects that are preallocated in the local area,
1409   // non-fixed objects can't be allocated right at the start of local area.
1410   // We currently don't support filling in holes in between fixed sized
1411   // objects, so we adjust 'Offset' to point to the end of last fixed sized
1412   // preallocated object.
1413   for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
1414     int64_t FixedOff;
1415     if (StackGrowsDown) {
1416       // The maximum distance from the stack pointer is at lower address of
1417       // the object -- which is given by offset. For down growing stack
1418       // the offset is negative, so we negate the offset to get the distance.
1419       FixedOff = -FFI->getObjectOffset(i);
1420     } else {
1421       // The maximum distance from the start pointer is at the upper
1422       // address of the object.
1423       FixedOff = FFI->getObjectOffset(i) + FFI->getObjectSize(i);
1424     }
1425     if (FixedOff > Offset) Offset = FixedOff;
1426   }
1427 
1428   // First assign frame offsets to stack objects that are used to spill
1429   // callee saved registers.
1430   if (StackGrowsDown) {
1431     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
1432       // If stack grows down, we need to add size of find the lowest
1433       // address of the object.
1434       Offset += FFI->getObjectSize(i);
1435 
1436       unsigned Align = FFI->getObjectAlignment(i);
1437       // If the alignment of this object is greater than that of the stack,
1438       // then increase the stack alignment to match.
1439       MaxAlign = std::max(MaxAlign, Align);
1440       // Adjust to alignment boundary
1441       Offset = (Offset+Align-1)/Align*Align;
1442 
1443       FFI->setObjectOffset(i, -Offset);        // Set the computed offset
1444     }
1445   } else {
1446     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
1447     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
1448       unsigned Align = FFI->getObjectAlignment(i);
1449       // If the alignment of this object is greater than that of the stack,
1450       // then increase the stack alignment to match.
1451       MaxAlign = std::max(MaxAlign, Align);
1452       // Adjust to alignment boundary
1453       Offset = (Offset+Align-1)/Align*Align;
1454 
1455       FFI->setObjectOffset(i, Offset);
1456       Offset += FFI->getObjectSize(i);
1457     }
1458   }
1459 
1460   // Make sure the special register scavenging spill slot is closest to the
1461   // frame pointer if a frame pointer is required.
1462   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
1463   if (RS && RegInfo->hasFP(Fn)) {
1464     int SFI = RS->getScavengingFrameIndex();
1465     if (SFI >= 0)
1466       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1467   }
1468 
1469   // Make sure that the stack protector comes before the local variables on the
1470   // stack.
1471   if (FFI->getStackProtectorIndex() >= 0)
1472     AdjustStackOffset(FFI, FFI->getStackProtectorIndex(), StackGrowsDown,
1473                       Offset, MaxAlign);
1474 
1475   // Then assign frame offsets to stack objects that are not used to spill
1476   // callee saved registers.
1477   for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
1478     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
1479       continue;
1480     if (RS && (int)i == RS->getScavengingFrameIndex())
1481       continue;
1482     if (FFI->isDeadObjectIndex(i))
1483       continue;
1484     if (FFI->getStackProtectorIndex() == (int)i)
1485       continue;
1486 
1487     AdjustStackOffset(FFI, i, StackGrowsDown, Offset, MaxAlign);
1488   }
1489 
1490   // Make sure the special register scavenging spill slot is closest to the
1491   // stack pointer.
1492   if (RS && !RegInfo->hasFP(Fn)) {
1493     int SFI = RS->getScavengingFrameIndex();
1494     if (SFI >= 0)
1495       AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign);
1496   }
1497 
1498   // Round up the size to a multiple of the alignment, but only if there are
1499   // calls or alloca's in the function.  This ensures that any calls to
1500   // subroutines have their stack frames suitable aligned.
1501   // Also do this if we need runtime alignment of the stack.  In this case
1502   // offsets will be relative to SP not FP; round up the stack size so this
1503   // works.
1504   if (!RegInfo->targetHandlesStackFrameRounding() &&
1505       (FFI->hasCalls() || FFI->hasVarSizedObjects() ||
1506        (RegInfo->needsStackRealignment(Fn) &&
1507         FFI->getObjectIndexEnd() != 0))) {
1508     // If we have reserved argument space for call sites in the function
1509     // immediately on entry to the current function, count it as part of the
1510     // overall stack size.
1511     if (RegInfo->hasReservedCallFrame(Fn))
1512       Offset += FFI->getMaxCallFrameSize();
1513 
1514     unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1;
1515     Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
1516   }
1517 
1518   // Update frame info to pretend that this is part of the stack...
1519   FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea());
1520 
1521   // Remember the required stack alignment in case targets need it to perform
1522   // dynamic stack alignment.
1523   FFI->setMaxAlignment(MaxAlign);
1524 }
1525 
1526 
1527 /// insertPrologEpilogCode - Scan the function for modified callee saved
1528 /// registers, insert spill code for these callee saved registers, then add
1529 /// prolog and epilog code to the function.
1530 ///
1531 void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
1532   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
1533 
1534   // Add prologue to the function...
1535   TRI->emitPrologue(Fn);
1536 
1537   // Add epilogue to restore the callee-save registers in each exiting block
1538   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
1539     // If last instruction is a return instruction, add an epilogue
1540     if (!I->empty() && I->back().getDesc().isReturn())
1541       TRI->emitEpilogue(Fn, *I);
1542   }
1543 }
1544 
1545 
1546 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
1547 /// register references and actual offsets.
1548 ///
1549 void PEI::replaceFrameIndices(MachineFunction &Fn) {
1550   if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
1551 
1552   const TargetMachine &TM = Fn.getTarget();
1553   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
1554   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
1555   const TargetFrameInfo *TFI = TM.getFrameInfo();
1556   bool StackGrowsDown =
1557     TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
1558   int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
1559   int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
1560 
1561   for (MachineFunction::iterator BB = Fn.begin(),
1562          E = Fn.end(); BB != E; ++BB) {
1563     int SPAdj = 0;  // SP offset due to call frame setup / destroy.
1564     if (RS) RS->enterBasicBlock(BB);
1565 
1566     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
1567       if (I->getOpcode() == TargetInstrInfo::DECLARE) {
1568         // Ignore it.
1569         ++I;
1570         continue;
1571       }
1572 
1573       if (I->getOpcode() == FrameSetupOpcode ||
1574           I->getOpcode() == FrameDestroyOpcode) {
1575         // Remember how much SP has been adjusted to create the call
1576         // frame.
1577         int Size = I->getOperand(0).getImm();
1578 
1579         if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
1580             (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
1581           Size = -Size;
1582 
1583         SPAdj += Size;
1584 
1585         MachineBasicBlock::iterator PrevI = BB->end();
1586         if (I != BB->begin()) PrevI = prior(I);
1587         TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
1588 
1589         // Visit the instructions created by eliminateCallFramePseudoInstr().
1590         if (PrevI == BB->end())
1591           I = BB->begin();     // The replaced instr was the first in the block.
1592         else
1593           I = next(PrevI);
1594         continue;
1595       }
1596 
1597       MachineInstr *MI = I;
1598       bool DoIncr = true;
1599       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
1600         if (MI->getOperand(i).isFI()) {
1601           // Some instructions (e.g. inline asm instructions) can have
1602           // multiple frame indices and/or cause eliminateFrameIndex
1603           // to insert more than one instruction. We need the register
1604           // scavenger to go through all of these instructions so that
1605           // it can update its register information. We keep the
1606           // iterator at the point before insertion so that we can
1607           // revisit them in full.
1608           bool AtBeginning = (I == BB->begin());
1609           if (!AtBeginning) --I;
1610 
1611           // If this instruction has a FrameIndex operand, we need to
1612           // use that target machine register info object to eliminate
1613           // it.
1614 
1615           TRI.eliminateFrameIndex(MI, SPAdj, RS);
1616 
1617           // Reset the iterator if we were at the beginning of the BB.
1618           if (AtBeginning) {
1619             I = BB->begin();
1620             DoIncr = false;
1621           }
1622 
1623           MI = 0;
1624           break;
1625         }
1626 
1627       if (DoIncr && I != BB->end()) ++I;
1628 
1629       // Update register states.
1630       if (RS && MI) RS->forward(MI);
1631     }
1632 
1633     assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
1634   }
1635 }
1636 
1637 // Debugging methods for shrink wrapping.
1638 #ifndef NDEBUG
1639 /// findFastExitPath - debugging method used to detect functions
1640 /// with at least one path from the entry block to a return block
1641 /// directly or which has a very small number of edges.
1642 ///
1643 void PEI::findFastExitPath() {
1644   if (! EntryBlock)
1645     return;
1646   // Fina a path from EntryBlock to any return block that does not branch:
1647   //        Entry
1648   //          |     ...
1649   //          v      |
1650   //         B1<-----+
1651   //          |
1652   //          v
1653   //       Return
1654   for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
1655          SE = EntryBlock->succ_end(); SI != SE; ++SI) {
1656     MachineBasicBlock* SUCC = *SI;
1657 
1658     // Assume positive, disprove existence of fast path.
1659     HasFastExitPath = true;
1660 
1661     // Check the immediate successors.
1662     if (isReturnBlock(SUCC)) {
1663       if (ShrinkWrapDebugging >= BasicInfo)
1664         DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1665              << "->" << getBasicBlockName(SUCC) << "\n";
1666       break;
1667     }
1668     // Traverse df from SUCC, look for a branch block.
1669     std::string exitPath = getBasicBlockName(SUCC);
1670     for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
1671            BE = df_end(SUCC); BI != BE; ++BI) {
1672       MachineBasicBlock* SBB = *BI;
1673       // Reject paths with branch nodes.
1674       if (SBB->succ_size() > 1) {
1675         HasFastExitPath = false;
1676         break;
1677       }
1678       exitPath += "->" + getBasicBlockName(SBB);
1679     }
1680     if (HasFastExitPath) {
1681       if (ShrinkWrapDebugging >= BasicInfo)
1682         DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock)
1683              << "->" << exitPath << "\n";
1684       break;
1685     }
1686   }
1687 }
1688 
1689 /// verifySpillRestorePlacement - check the current spill/restore
1690 /// sets for safety. Attempt to find spills without restores or
1691 /// restores without spills.
1692 /// Spills: walk df from each MBB in spill set ensuring that
1693 ///         all CSRs spilled at MMBB are restored on all paths
1694 ///         from MBB to all exit blocks.
1695 /// Restores: walk idf from each MBB in restore set ensuring that
1696 ///           all CSRs restored at MBB are spilled on all paths
1697 ///           reaching MBB.
1698 ///
1699 void PEI::verifySpillRestorePlacement() {
1700   unsigned numReturnBlocks = 0;
1701   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1702        MBBI != MBBE; ++MBBI) {
1703     MachineBasicBlock* MBB = MBBI;
1704     if (isReturnBlock(MBB) || MBB->succ_size() == 0)
1705       ++numReturnBlocks;
1706   }
1707   for (CSRegBlockMap::iterator BI = CSRSave.begin(),
1708          BE = CSRSave.end(); BI != BE; ++BI) {
1709     MachineBasicBlock* MBB = BI->first;
1710     CSRegSet spilled = BI->second;
1711     CSRegSet restored;
1712 
1713     if (spilled.empty())
1714       continue;
1715 
1716     DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1717          << stringifyCSRegSet(spilled)
1718          << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1719          << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1720 
1721     if (CSRRestore[MBB].intersects(spilled)) {
1722       restored |= (CSRRestore[MBB] & spilled);
1723     }
1724 
1725     // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
1726     // we must find restores for all spills w/no intervening spills on all
1727     // paths from MBB to all return blocks.
1728     for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
1729            BE = df_end(MBB); BI != BE; ++BI) {
1730       MachineBasicBlock* SBB = *BI;
1731       if (SBB == MBB)
1732         continue;
1733       // Stop when we encounter spills of any CSRs spilled at MBB that
1734       // have not yet been seen to be restored.
1735       if (CSRSave[SBB].intersects(spilled) &&
1736           !restored.contains(CSRSave[SBB] & spilled))
1737         break;
1738       // Collect the CSRs spilled at MBB that are restored
1739       // at this DF successor of MBB.
1740       if (CSRRestore[SBB].intersects(spilled))
1741         restored |= (CSRRestore[SBB] & spilled);
1742       // If we are at a retun block, check that the restores
1743       // we have seen so far exhaust the spills at MBB, then
1744       // reset the restores.
1745       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
1746         if (restored != spilled) {
1747           CSRegSet notRestored = (spilled - restored);
1748           DOUT << MF->getFunction()->getName() << ": "
1749                << stringifyCSRegSet(notRestored)
1750                << " spilled at " << getBasicBlockName(MBB)
1751                << " are never restored on path to return "
1752                << getBasicBlockName(SBB) << "\n";
1753         }
1754         restored.clear();
1755       }
1756     }
1757   }
1758 
1759   // Check restore placements.
1760   for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
1761          BE = CSRRestore.end(); BI != BE; ++BI) {
1762     MachineBasicBlock* MBB = BI->first;
1763     CSRegSet restored = BI->second;
1764     CSRegSet spilled;
1765 
1766     if (restored.empty())
1767       continue;
1768 
1769     DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1770          << stringifyCSRegSet(CSRSave[MBB])
1771          << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
1772          << stringifyCSRegSet(restored) << "\n";
1773 
1774     if (CSRSave[MBB].intersects(restored)) {
1775       spilled |= (CSRSave[MBB] & restored);
1776     }
1777     // Walk inverse depth first from MBB to find spills of all
1778     // CSRs restored at MBB:
1779     for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
1780            BE = idf_end(MBB); BI != BE; ++BI) {
1781       MachineBasicBlock* PBB = *BI;
1782       if (PBB == MBB)
1783         continue;
1784       // Stop when we encounter restores of any CSRs restored at MBB that
1785       // have not yet been seen to be spilled.
1786       if (CSRRestore[PBB].intersects(restored) &&
1787           !spilled.contains(CSRRestore[PBB] & restored))
1788         break;
1789       // Collect the CSRs restored at MBB that are spilled
1790       // at this DF predecessor of MBB.
1791       if (CSRSave[PBB].intersects(restored))
1792         spilled |= (CSRSave[PBB] & restored);
1793     }
1794     if (spilled != restored) {
1795       CSRegSet notSpilled = (restored - spilled);
1796       DOUT << MF->getFunction()->getName() << ": "
1797            << stringifyCSRegSet(notSpilled)
1798            << " restored at " << getBasicBlockName(MBB)
1799            << " are never spilled\n";
1800     }
1801   }
1802 }
1803 
1804 // Debugging print methods.
1805 std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
1806   std::ostringstream name;
1807   if (MBB) {
1808     if (MBB->getBasicBlock())
1809       name << MBB->getBasicBlock()->getName();
1810     else
1811       name << "_MBB_" << MBB->getNumber();
1812   }
1813   return name.str();
1814 }
1815 
1816 std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
1817   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
1818   const std::vector<CalleeSavedInfo> CSI =
1819     MF->getFrameInfo()->getCalleeSavedInfo();
1820 
1821   std::ostringstream srep;
1822   if (CSI.size() == 0) {
1823     srep << "[]";
1824     return srep.str();
1825   }
1826   srep << "[";
1827   CSRegSet::iterator I = s.begin(), E = s.end();
1828   if (I != E) {
1829     unsigned reg = CSI[*I].getReg();
1830     srep << TRI->getName(reg);
1831     for (++I; I != E; ++I) {
1832       reg = CSI[*I].getReg();
1833       srep << ",";
1834       srep << TRI->getName(reg);
1835     }
1836   }
1837   srep << "]";
1838   return srep.str();
1839 }
1840 
1841 void PEI::dumpSet(const CSRegSet& s) {
1842   DOUT << stringifyCSRegSet(s) << "\n";
1843 }
1844 
1845 void PEI::dumpUsed(MachineBasicBlock* MBB) {
1846   if (MBB) {
1847     DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
1848          << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
1849   }
1850 }
1851 
1852 void PEI::dumpAllUsed() {
1853     for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1854          MBBI != MBBE; ++MBBI) {
1855       MachineBasicBlock* MBB = MBBI;
1856       dumpUsed(MBB);
1857     }
1858 }
1859 
1860 void PEI::dumpSets(MachineBasicBlock* MBB) {
1861   if (MBB) {
1862     DOUT << getBasicBlockName(MBB)           << " | "
1863          << stringifyCSRegSet(CSRUsed[MBB])  << " | "
1864          << stringifyCSRegSet(AnticIn[MBB])  << " | "
1865          << stringifyCSRegSet(AnticOut[MBB]) << " | "
1866          << stringifyCSRegSet(AvailIn[MBB])  << " | "
1867          << stringifyCSRegSet(AvailOut[MBB]) << "\n";
1868   }
1869 }
1870 
1871 void PEI::dumpSets1(MachineBasicBlock* MBB) {
1872   if (MBB) {
1873     DOUT << getBasicBlockName(MBB)             << " | "
1874          << stringifyCSRegSet(CSRUsed[MBB])    << " | "
1875          << stringifyCSRegSet(AnticIn[MBB])    << " | "
1876          << stringifyCSRegSet(AnticOut[MBB])   << " | "
1877          << stringifyCSRegSet(AvailIn[MBB])    << " | "
1878          << stringifyCSRegSet(AvailOut[MBB])   << " | "
1879          << stringifyCSRegSet(CSRSave[MBB])    << " | "
1880          << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1881   }
1882 }
1883 
1884 void PEI::dumpAllSets() {
1885     for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
1886          MBBI != MBBE; ++MBBI) {
1887       MachineBasicBlock* MBB = MBBI;
1888       dumpSets1(MBB);
1889     }
1890 }
1891 
1892 void PEI::dumpSRSets() {
1893   for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
1894        MBB != E; ++MBB) {
1895     if (! CSRSave[MBB].empty()) {
1896       DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = "
1897            << stringifyCSRegSet(CSRSave[MBB]);
1898       if (CSRRestore[MBB].empty())
1899         DOUT << "\n";
1900     }
1901     if (! CSRRestore[MBB].empty()) {
1902       if (! CSRSave[MBB].empty())
1903         DOUT << "    ";
1904       DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = "
1905            << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
1906     }
1907   }
1908 }
1909 #endif
1910