xref: /llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp (revision b7eee2c3fe953df5f5aa1f543759d9a1e54d5ef7)
1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "RegAllocBase.h"
18 #include "RegAllocEvictionAdvisor.h"
19 #include "RegAllocPriorityAdvisor.h"
20 #include "SplitKit.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/IndexedMap.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
29 #include "llvm/CodeGen/CalcSpillWeights.h"
30 #include "llvm/CodeGen/EdgeBundles.h"
31 #include "llvm/CodeGen/LiveDebugVariables.h"
32 #include "llvm/CodeGen/LiveInterval.h"
33 #include "llvm/CodeGen/LiveIntervalUnion.h"
34 #include "llvm/CodeGen/LiveIntervals.h"
35 #include "llvm/CodeGen/LiveRangeEdit.h"
36 #include "llvm/CodeGen/LiveRegMatrix.h"
37 #include "llvm/CodeGen/LiveStacks.h"
38 #include "llvm/CodeGen/MachineBasicBlock.h"
39 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
40 #include "llvm/CodeGen/MachineDominators.h"
41 #include "llvm/CodeGen/MachineFrameInfo.h"
42 #include "llvm/CodeGen/MachineFunction.h"
43 #include "llvm/CodeGen/MachineFunctionPass.h"
44 #include "llvm/CodeGen/MachineInstr.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineOperand.h"
47 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
48 #include "llvm/CodeGen/MachineRegisterInfo.h"
49 #include "llvm/CodeGen/RegAllocRegistry.h"
50 #include "llvm/CodeGen/RegisterClassInfo.h"
51 #include "llvm/CodeGen/SlotIndexes.h"
52 #include "llvm/CodeGen/SpillPlacement.h"
53 #include "llvm/CodeGen/Spiller.h"
54 #include "llvm/CodeGen/TargetInstrInfo.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/TargetSubtargetInfo.h"
57 #include "llvm/CodeGen/VirtRegMap.h"
58 #include "llvm/IR/DebugInfoMetadata.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/LLVMContext.h"
61 #include "llvm/Pass.h"
62 #include "llvm/Support/BlockFrequency.h"
63 #include "llvm/Support/BranchProbability.h"
64 #include "llvm/Support/CommandLine.h"
65 #include "llvm/Support/Debug.h"
66 #include "llvm/Support/MathExtras.h"
67 #include "llvm/Support/Timer.h"
68 #include "llvm/Support/raw_ostream.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <utility>
73 
74 using namespace llvm;
75 
76 #define DEBUG_TYPE "regalloc"
77 
78 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
79 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
80 STATISTIC(NumEvicted,      "Number of interferences evicted");
81 
82 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
83     "split-spill-mode", cl::Hidden,
84     cl::desc("Spill mode for splitting live ranges"),
85     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
86                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
87                clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
88     cl::init(SplitEditor::SM_Speed));
89 
90 static cl::opt<unsigned>
91 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
92                              cl::desc("Last chance recoloring max depth"),
93                              cl::init(5));
94 
95 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
96     "lcr-max-interf", cl::Hidden,
97     cl::desc("Last chance recoloring maximum number of considered"
98              " interference at a time"),
99     cl::init(8));
100 
101 static cl::opt<bool> ExhaustiveSearch(
102     "exhaustive-register-search", cl::NotHidden,
103     cl::desc("Exhaustive Search for registers bypassing the depth "
104              "and interference cutoffs of last chance recoloring"),
105     cl::Hidden);
106 
107 static cl::opt<bool> EnableDeferredSpilling(
108     "enable-deferred-spilling", cl::Hidden,
109     cl::desc("Instead of spilling a variable right away, defer the actual "
110              "code insertion to the end of the allocation. That way the "
111              "allocator might still find a suitable coloring for this "
112              "variable because of other evicted variables."),
113     cl::init(false));
114 
115 // FIXME: Find a good default for this flag and remove the flag.
116 static cl::opt<unsigned>
117 CSRFirstTimeCost("regalloc-csr-first-time-cost",
118               cl::desc("Cost for first time use of callee-saved register."),
119               cl::init(0), cl::Hidden);
120 
121 static cl::opt<unsigned long> GrowRegionComplexityBudget(
122     "grow-region-complexity-budget",
123     cl::desc("growRegion() does not scale with the number of BB edges, so "
124              "limit its budget and bail out once we reach the limit."),
125     cl::init(10000), cl::Hidden);
126 
127 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
128     "greedy-regclass-priority-trumps-globalness",
129     cl::desc("Change the greedy register allocator's live range priority "
130              "calculation to make the AllocationPriority of the register class "
131              "more important then whether the range is global"),
132     cl::Hidden);
133 
134 static cl::opt<bool> GreedyReverseLocalAssignment(
135     "greedy-reverse-local-assignment",
136     cl::desc("Reverse allocation order of local live ranges, such that "
137              "shorter local live ranges will tend to be allocated first"),
138     cl::Hidden);
139 
140 static cl::opt<unsigned> SplitThresholdForRegWithHint(
141     "split-threshold-for-reg-with-hint",
142     cl::desc("The threshold for splitting a virtual register with a hint, in "
143              "percentage"),
144     cl::init(75), cl::Hidden);
145 
146 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
147                                        createGreedyRegisterAllocator);
148 
149 char RAGreedy::ID = 0;
150 char &llvm::RAGreedyID = RAGreedy::ID;
151 
152 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
153                 "Greedy Register Allocator", false, false)
154 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariablesWrapperLegacy)
155 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
156 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
157 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
158 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
159 INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
160 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
161 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
162 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
163 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
164 INITIALIZE_PASS_DEPENDENCY(EdgeBundlesWrapperLegacy)
165 INITIALIZE_PASS_DEPENDENCY(SpillPlacementWrapperLegacy)
166 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
167 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
168 INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis)
169 INITIALIZE_PASS_END(RAGreedy, "greedy",
170                 "Greedy Register Allocator", false, false)
171 
172 #ifndef NDEBUG
173 const char *const RAGreedy::StageName[] = {
174     "RS_New",
175     "RS_Assign",
176     "RS_Split",
177     "RS_Split2",
178     "RS_Spill",
179     "RS_Memory",
180     "RS_Done"
181 };
182 #endif
183 
184 // Hysteresis to use when comparing floats.
185 // This helps stabilize decisions based on float comparisons.
186 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
187 
188 FunctionPass* llvm::createGreedyRegisterAllocator() {
189   return new RAGreedy();
190 }
191 
192 FunctionPass *llvm::createGreedyRegisterAllocator(RegAllocFilterFunc Ftor) {
193   return new RAGreedy(Ftor);
194 }
195 
196 RAGreedy::RAGreedy(RegAllocFilterFunc F)
197     : MachineFunctionPass(ID), RegAllocBase(F) {}
198 
199 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
200   AU.setPreservesCFG();
201   AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
202   AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
203   AU.addRequired<LiveIntervalsWrapperPass>();
204   AU.addPreserved<LiveIntervalsWrapperPass>();
205   AU.addRequired<SlotIndexesWrapperPass>();
206   AU.addPreserved<SlotIndexesWrapperPass>();
207   AU.addRequired<LiveDebugVariablesWrapperLegacy>();
208   AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
209   AU.addRequired<LiveStacksWrapperLegacy>();
210   AU.addPreserved<LiveStacksWrapperLegacy>();
211   AU.addRequired<MachineDominatorTreeWrapperPass>();
212   AU.addPreserved<MachineDominatorTreeWrapperPass>();
213   AU.addRequired<MachineLoopInfoWrapperPass>();
214   AU.addPreserved<MachineLoopInfoWrapperPass>();
215   AU.addRequired<VirtRegMapWrapperLegacy>();
216   AU.addPreserved<VirtRegMapWrapperLegacy>();
217   AU.addRequired<LiveRegMatrixWrapperLegacy>();
218   AU.addPreserved<LiveRegMatrixWrapperLegacy>();
219   AU.addRequired<EdgeBundlesWrapperLegacy>();
220   AU.addRequired<SpillPlacementWrapperLegacy>();
221   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
222   AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
223   AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
224   MachineFunctionPass::getAnalysisUsage(AU);
225 }
226 
227 //===----------------------------------------------------------------------===//
228 //                     LiveRangeEdit delegate methods
229 //===----------------------------------------------------------------------===//
230 
231 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
232   LiveInterval &LI = LIS->getInterval(VirtReg);
233   if (VRM->hasPhys(VirtReg)) {
234     Matrix->unassign(LI);
235     aboutToRemoveInterval(LI);
236     return true;
237   }
238   // Unassigned virtreg is probably in the priority queue.
239   // RegAllocBase will erase it after dequeueing.
240   // Nonetheless, clear the live-range so that the debug
241   // dump will show the right state for that VirtReg.
242   LI.clear();
243   return false;
244 }
245 
246 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
247   if (!VRM->hasPhys(VirtReg))
248     return;
249 
250   // Register is assigned, put it back on the queue for reassignment.
251   LiveInterval &LI = LIS->getInterval(VirtReg);
252   Matrix->unassign(LI);
253   RegAllocBase::enqueue(&LI);
254 }
255 
256 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
257   ExtraInfo->LRE_DidCloneVirtReg(New, Old);
258 }
259 
260 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
261   // Cloning a register we haven't even heard about yet?  Just ignore it.
262   if (!Info.inBounds(Old))
263     return;
264 
265   // LRE may clone a virtual register because dead code elimination causes it to
266   // be split into connected components. The new components are much smaller
267   // than the original, so they should get a new chance at being assigned.
268   // same stage as the parent.
269   Info[Old].Stage = RS_Assign;
270   Info.grow(New.id());
271   Info[New] = Info[Old];
272 }
273 
274 void RAGreedy::releaseMemory() {
275   SpillerInstance.reset();
276   GlobalCand.clear();
277 }
278 
279 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
280 
281 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
282   // Prioritize live ranges by size, assigning larger ranges first.
283   // The queue holds (size, reg) pairs.
284   const Register Reg = LI->reg();
285   assert(Reg.isVirtual() && "Can only enqueue virtual registers");
286 
287   auto Stage = ExtraInfo->getOrInitStage(Reg);
288   if (Stage == RS_New) {
289     Stage = RS_Assign;
290     ExtraInfo->setStage(Reg, Stage);
291   }
292 
293   unsigned Ret = PriorityAdvisor->getPriority(*LI);
294 
295   // The virtual register number is a tie breaker for same-sized ranges.
296   // Give lower vreg numbers higher priority to assign them first.
297   CurQueue.push(std::make_pair(Ret, ~Reg));
298 }
299 
300 unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
301   const unsigned Size = LI.getSize();
302   const Register Reg = LI.reg();
303   unsigned Prio;
304   LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
305 
306   if (Stage == RS_Split) {
307     // Unsplit ranges that couldn't be allocated immediately are deferred until
308     // everything else has been allocated.
309     Prio = Size;
310   } else if (Stage == RS_Memory) {
311     // Memory operand should be considered last.
312     // Change the priority such that Memory operand are assigned in
313     // the reverse order that they came in.
314     // TODO: Make this a member variable and probably do something about hints.
315     static unsigned MemOp = 0;
316     Prio = MemOp++;
317   } else {
318     // Giant live ranges fall back to the global assignment heuristic, which
319     // prevents excessive spilling in pathological cases.
320     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
321     bool ForceGlobal = RC.GlobalPriority ||
322                        (!ReverseLocalAssignment &&
323                         (Size / SlotIndex::InstrDist) >
324                             (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
325     unsigned GlobalBit = 0;
326 
327     if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
328         LIS->intervalIsInOneMBB(LI)) {
329       // Allocate original local ranges in linear instruction order. Since they
330       // are singly defined, this produces optimal coloring in the absence of
331       // global interference and other constraints.
332       if (!ReverseLocalAssignment)
333         Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
334       else {
335         // Allocating bottom up may allow many short LRGs to be assigned first
336         // to one of the cheap registers. This could be much faster for very
337         // large blocks on targets with many physical registers.
338         Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
339       }
340     } else {
341       // Allocate global and split ranges in long->short order. Long ranges that
342       // don't fit should be spilled (or split) ASAP so they don't create
343       // interference.  Mark a bit to prioritize global above local ranges.
344       Prio = Size;
345       GlobalBit = 1;
346     }
347 
348     // Priority bit layout:
349     // 31 RS_Assign priority
350     // 30 Preference priority
351     // if (RegClassPriorityTrumpsGlobalness)
352     //   29-25 AllocPriority
353     //   24 GlobalBit
354     // else
355     //   29 Global bit
356     //   28-24 AllocPriority
357     // 0-23 Size/Instr distance
358 
359     // Clamp the size to fit with the priority masking scheme
360     Prio = std::min(Prio, (unsigned)maxUIntN(24));
361     assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
362 
363     if (RegClassPriorityTrumpsGlobalness)
364       Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
365     else
366       Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
367 
368     // Mark a higher bit to prioritize global and local above RS_Split.
369     Prio |= (1u << 31);
370 
371     // Boost ranges that have a physical register hint.
372     if (VRM->hasKnownPreference(Reg))
373       Prio |= (1u << 30);
374   }
375 
376   return Prio;
377 }
378 
379 unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
380   // Prioritize by virtual register number, lowest first.
381   Register Reg = LI.reg();
382   return ~Reg.virtRegIndex();
383 }
384 
385 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
386 
387 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
388   if (CurQueue.empty())
389     return nullptr;
390   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
391   CurQueue.pop();
392   return LI;
393 }
394 
395 //===----------------------------------------------------------------------===//
396 //                            Direct Assignment
397 //===----------------------------------------------------------------------===//
398 
399 /// tryAssign - Try to assign VirtReg to an available register.
400 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
401                                AllocationOrder &Order,
402                                SmallVectorImpl<Register> &NewVRegs,
403                                const SmallVirtRegSet &FixedRegisters) {
404   MCRegister PhysReg;
405   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
406     assert(*I);
407     if (!Matrix->checkInterference(VirtReg, *I)) {
408       if (I.isHint())
409         return *I;
410       else
411         PhysReg = *I;
412     }
413   }
414   if (!PhysReg.isValid())
415     return PhysReg;
416 
417   // PhysReg is available, but there may be a better choice.
418 
419   // If we missed a simple hint, try to cheaply evict interference from the
420   // preferred register.
421   if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
422     if (Order.isHint(Hint)) {
423       MCRegister PhysHint = Hint.asMCReg();
424       LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
425 
426       if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
427                                                  FixedRegisters)) {
428         evictInterference(VirtReg, PhysHint, NewVRegs);
429         return PhysHint;
430       }
431 
432       // We can also split the virtual register in cold blocks.
433       if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
434         return 0;
435 
436       // Record the missed hint, we may be able to recover
437       // at the end if the surrounding allocation changed.
438       SetOfBrokenHints.insert(&VirtReg);
439     }
440 
441   // Try to evict interference from a cheaper alternative.
442   uint8_t Cost = RegCosts[PhysReg.id()];
443 
444   // Most registers have 0 additional cost.
445   if (!Cost)
446     return PhysReg;
447 
448   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
449                     << (unsigned)Cost << '\n');
450   MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
451   return CheapReg ? CheapReg : PhysReg;
452 }
453 
454 //===----------------------------------------------------------------------===//
455 //                         Interference eviction
456 //===----------------------------------------------------------------------===//
457 
458 bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
459                                           MCRegister FromReg) const {
460   auto HasRegUnitInterference = [&](MCRegUnit Unit) {
461     // Instantiate a "subquery", not to be confused with the Queries array.
462     LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
463     return SubQ.checkInterference();
464   };
465 
466   for (MCRegister Reg :
467        AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) {
468     if (Reg == FromReg)
469       continue;
470     // If no units have interference, reassignment is possible.
471     if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
472       LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
473                         << printReg(FromReg, TRI) << " to "
474                         << printReg(Reg, TRI) << '\n');
475       return true;
476     }
477   }
478   return false;
479 }
480 
481 /// evictInterference - Evict any interferring registers that prevent VirtReg
482 /// from being assigned to Physreg. This assumes that canEvictInterference
483 /// returned true.
484 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
485                                  MCRegister PhysReg,
486                                  SmallVectorImpl<Register> &NewVRegs) {
487   // Make sure that VirtReg has a cascade number, and assign that cascade
488   // number to every evicted register. These live ranges than then only be
489   // evicted by a newer cascade, preventing infinite loops.
490   unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
491 
492   LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
493                     << " interference: Cascade " << Cascade << '\n');
494 
495   // Collect all interfering virtregs first.
496   SmallVector<const LiveInterval *, 8> Intfs;
497   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
498     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
499     // We usually have the interfering VRegs cached so collectInterferingVRegs()
500     // should be fast, we may need to recalculate if when different physregs
501     // overlap the same register unit so we had different SubRanges queried
502     // against it.
503     ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
504     Intfs.append(IVR.begin(), IVR.end());
505   }
506 
507   // Evict them second. This will invalidate the queries.
508   for (const LiveInterval *Intf : Intfs) {
509     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
510     if (!VRM->hasPhys(Intf->reg()))
511       continue;
512 
513     Matrix->unassign(*Intf);
514     assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
515             VirtReg.isSpillable() < Intf->isSpillable()) &&
516            "Cannot decrease cascade number, illegal eviction");
517     ExtraInfo->setCascade(Intf->reg(), Cascade);
518     ++NumEvicted;
519     NewVRegs.push_back(Intf->reg());
520   }
521 }
522 
523 /// Returns true if the given \p PhysReg is a callee saved register and has not
524 /// been used for allocation yet.
525 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
526   MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
527   if (!CSR)
528     return false;
529 
530   return !Matrix->isPhysRegUsed(PhysReg);
531 }
532 
533 std::optional<unsigned>
534 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
535                                        const AllocationOrder &Order,
536                                        unsigned CostPerUseLimit) const {
537   unsigned OrderLimit = Order.getOrder().size();
538 
539   if (CostPerUseLimit < uint8_t(~0u)) {
540     // Check of any registers in RC are below CostPerUseLimit.
541     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
542     uint8_t MinCost = RegClassInfo.getMinCost(RC);
543     if (MinCost >= CostPerUseLimit) {
544       LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
545                         << MinCost << ", no cheaper registers to be found.\n");
546       return std::nullopt;
547     }
548 
549     // It is normal for register classes to have a long tail of registers with
550     // the same cost. We don't need to look at them if they're too expensive.
551     if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
552       OrderLimit = RegClassInfo.getLastCostChange(RC);
553       LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
554                         << " regs.\n");
555     }
556   }
557   return OrderLimit;
558 }
559 
560 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
561                                                  MCRegister PhysReg) const {
562   if (RegCosts[PhysReg.id()] >= CostPerUseLimit)
563     return false;
564   // The first use of a callee-saved register in a function has cost 1.
565   // Don't start using a CSR when the CostPerUseLimit is low.
566   if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
567     LLVM_DEBUG(
568         dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
569                << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
570                << '\n');
571     return false;
572   }
573   return true;
574 }
575 
576 /// tryEvict - Try to evict all interferences for a physreg.
577 /// @param  VirtReg Currently unassigned virtual register.
578 /// @param  Order   Physregs to try.
579 /// @return         Physreg to assign VirtReg, or 0.
580 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
581                               AllocationOrder &Order,
582                               SmallVectorImpl<Register> &NewVRegs,
583                               uint8_t CostPerUseLimit,
584                               const SmallVirtRegSet &FixedRegisters) {
585   NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
586                      TimePassesIsEnabled);
587 
588   MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
589       VirtReg, Order, CostPerUseLimit, FixedRegisters);
590   if (BestPhys.isValid())
591     evictInterference(VirtReg, BestPhys, NewVRegs);
592   return BestPhys;
593 }
594 
595 //===----------------------------------------------------------------------===//
596 //                              Region Splitting
597 //===----------------------------------------------------------------------===//
598 
599 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
600 /// interference pattern in Physreg and its aliases. Add the constraints to
601 /// SpillPlacement and return the static cost of this split in Cost, assuming
602 /// that all preferences in SplitConstraints are met.
603 /// Return false if there are no bundles with positive bias.
604 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
605                                    BlockFrequency &Cost) {
606   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
607 
608   // Reset interference dependent info.
609   SplitConstraints.resize(UseBlocks.size());
610   BlockFrequency StaticCost = BlockFrequency(0);
611   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
612     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
613     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
614 
615     BC.Number = BI.MBB->getNumber();
616     Intf.moveToBlock(BC.Number);
617     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
618     BC.Exit = (BI.LiveOut &&
619                !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
620                   ? SpillPlacement::PrefReg
621                   : SpillPlacement::DontCare;
622     BC.ChangesValue = BI.FirstDef.isValid();
623 
624     if (!Intf.hasInterference())
625       continue;
626 
627     // Number of spill code instructions to insert.
628     unsigned Ins = 0;
629 
630     // Interference for the live-in value.
631     if (BI.LiveIn) {
632       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
633         BC.Entry = SpillPlacement::MustSpill;
634         ++Ins;
635       } else if (Intf.first() < BI.FirstInstr) {
636         BC.Entry = SpillPlacement::PrefSpill;
637         ++Ins;
638       } else if (Intf.first() < BI.LastInstr) {
639         ++Ins;
640       }
641 
642       // Abort if the spill cannot be inserted at the MBB' start
643       if (((BC.Entry == SpillPlacement::MustSpill) ||
644            (BC.Entry == SpillPlacement::PrefSpill)) &&
645           SlotIndex::isEarlierInstr(BI.FirstInstr,
646                                     SA->getFirstSplitPoint(BC.Number)))
647         return false;
648     }
649 
650     // Interference for the live-out value.
651     if (BI.LiveOut) {
652       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
653         BC.Exit = SpillPlacement::MustSpill;
654         ++Ins;
655       } else if (Intf.last() > BI.LastInstr) {
656         BC.Exit = SpillPlacement::PrefSpill;
657         ++Ins;
658       } else if (Intf.last() > BI.FirstInstr) {
659         ++Ins;
660       }
661     }
662 
663     // Accumulate the total frequency of inserted spill code.
664     while (Ins--)
665       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
666   }
667   Cost = StaticCost;
668 
669   // Add constraints for use-blocks. Note that these are the only constraints
670   // that may add a positive bias, it is downhill from here.
671   SpillPlacer->addConstraints(SplitConstraints);
672   return SpillPlacer->scanActiveBundles();
673 }
674 
675 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
676 /// live-through blocks in Blocks.
677 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
678                                      ArrayRef<unsigned> Blocks) {
679   const unsigned GroupSize = 8;
680   SpillPlacement::BlockConstraint BCS[GroupSize];
681   unsigned TBS[GroupSize];
682   unsigned B = 0, T = 0;
683 
684   for (unsigned Number : Blocks) {
685     Intf.moveToBlock(Number);
686 
687     if (!Intf.hasInterference()) {
688       assert(T < GroupSize && "Array overflow");
689       TBS[T] = Number;
690       if (++T == GroupSize) {
691         SpillPlacer->addLinks(ArrayRef(TBS, T));
692         T = 0;
693       }
694       continue;
695     }
696 
697     assert(B < GroupSize && "Array overflow");
698     BCS[B].Number = Number;
699 
700     // Abort if the spill cannot be inserted at the MBB' start
701     MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
702     auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
703     if (FirstNonDebugInstr != MBB->end() &&
704         SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
705                                   SA->getFirstSplitPoint(Number)))
706       return false;
707     // Interference for the live-in value.
708     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
709       BCS[B].Entry = SpillPlacement::MustSpill;
710     else
711       BCS[B].Entry = SpillPlacement::PrefSpill;
712 
713     // Interference for the live-out value.
714     if (Intf.last() >= SA->getLastSplitPoint(Number))
715       BCS[B].Exit = SpillPlacement::MustSpill;
716     else
717       BCS[B].Exit = SpillPlacement::PrefSpill;
718 
719     if (++B == GroupSize) {
720       SpillPlacer->addConstraints(ArrayRef(BCS, B));
721       B = 0;
722     }
723   }
724 
725   SpillPlacer->addConstraints(ArrayRef(BCS, B));
726   SpillPlacer->addLinks(ArrayRef(TBS, T));
727   return true;
728 }
729 
730 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
731   // Keep track of through blocks that have not been added to SpillPlacer.
732   BitVector Todo = SA->getThroughBlocks();
733   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
734   unsigned AddedTo = 0;
735 #ifndef NDEBUG
736   unsigned Visited = 0;
737 #endif
738 
739   unsigned long Budget = GrowRegionComplexityBudget;
740   while (true) {
741     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
742     // Find new through blocks in the periphery of PrefRegBundles.
743     for (unsigned Bundle : NewBundles) {
744       // Look at all blocks connected to Bundle in the full graph.
745       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
746       // Limit compilation time by bailing out after we use all our budget.
747       if (Blocks.size() >= Budget)
748         return false;
749       Budget -= Blocks.size();
750       for (unsigned Block : Blocks) {
751         if (!Todo.test(Block))
752           continue;
753         Todo.reset(Block);
754         // This is a new through block. Add it to SpillPlacer later.
755         ActiveBlocks.push_back(Block);
756 #ifndef NDEBUG
757         ++Visited;
758 #endif
759       }
760     }
761     // Any new blocks to add?
762     if (ActiveBlocks.size() == AddedTo)
763       break;
764 
765     // Compute through constraints from the interference, or assume that all
766     // through blocks prefer spilling when forming compact regions.
767     auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
768     if (Cand.PhysReg) {
769       if (!addThroughConstraints(Cand.Intf, NewBlocks))
770         return false;
771     } else {
772       // Providing that the variable being spilled does not look like a loop
773       // induction variable, which is expensive to spill around and better
774       // pushed into a condition inside the loop if possible, provide a strong
775       // negative bias on through blocks to prevent unwanted liveness on loop
776       // backedges.
777       bool PrefSpill = true;
778       if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
779         // Check that the current bundle is adding a Header + start+end of
780         // loop-internal blocks. If the block is indeed a header, don't make
781         // the NewBlocks as PrefSpill to allow the variable to be live in
782         // Header<->Latch.
783         MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
784         if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
785             all_of(NewBlocks.drop_front(), [&](unsigned Block) {
786               return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
787             }))
788           PrefSpill = false;
789       }
790       if (PrefSpill)
791         SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
792     }
793     AddedTo = ActiveBlocks.size();
794 
795     // Perhaps iterating can enable more bundles?
796     SpillPlacer->iterate();
797   }
798   LLVM_DEBUG(dbgs() << ", v=" << Visited);
799   return true;
800 }
801 
802 /// calcCompactRegion - Compute the set of edge bundles that should be live
803 /// when splitting the current live range into compact regions.  Compact
804 /// regions can be computed without looking at interference.  They are the
805 /// regions formed by removing all the live-through blocks from the live range.
806 ///
807 /// Returns false if the current live range is already compact, or if the
808 /// compact regions would form single block regions anyway.
809 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
810   // Without any through blocks, the live range is already compact.
811   if (!SA->getNumThroughBlocks())
812     return false;
813 
814   // Compact regions don't correspond to any physreg.
815   Cand.reset(IntfCache, MCRegister::NoRegister);
816 
817   LLVM_DEBUG(dbgs() << "Compact region bundles");
818 
819   // Use the spill placer to determine the live bundles. GrowRegion pretends
820   // that all the through blocks have interference when PhysReg is unset.
821   SpillPlacer->prepare(Cand.LiveBundles);
822 
823   // The static split cost will be zero since Cand.Intf reports no interference.
824   BlockFrequency Cost;
825   if (!addSplitConstraints(Cand.Intf, Cost)) {
826     LLVM_DEBUG(dbgs() << ", none.\n");
827     return false;
828   }
829 
830   if (!growRegion(Cand)) {
831     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
832     return false;
833   }
834 
835   SpillPlacer->finish();
836 
837   if (!Cand.LiveBundles.any()) {
838     LLVM_DEBUG(dbgs() << ", none.\n");
839     return false;
840   }
841 
842   LLVM_DEBUG({
843     for (int I : Cand.LiveBundles.set_bits())
844       dbgs() << " EB#" << I;
845     dbgs() << ".\n";
846   });
847   return true;
848 }
849 
850 /// calcSpillCost - Compute how expensive it would be to split the live range in
851 /// SA around all use blocks instead of forming bundle regions.
852 BlockFrequency RAGreedy::calcSpillCost() {
853   BlockFrequency Cost = BlockFrequency(0);
854   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
855   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
856     unsigned Number = BI.MBB->getNumber();
857     // We normally only need one spill instruction - a load or a store.
858     Cost += SpillPlacer->getBlockFrequency(Number);
859 
860     // Unless the value is redefined in the block.
861     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
862       Cost += SpillPlacer->getBlockFrequency(Number);
863   }
864   return Cost;
865 }
866 
867 /// calcGlobalSplitCost - Return the global split cost of following the split
868 /// pattern in LiveBundles. This cost should be added to the local cost of the
869 /// interference pattern in SplitConstraints.
870 ///
871 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
872                                              const AllocationOrder &Order) {
873   BlockFrequency GlobalCost = BlockFrequency(0);
874   const BitVector &LiveBundles = Cand.LiveBundles;
875   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
876   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
877     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
878     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
879     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, false)];
880     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
881     unsigned Ins = 0;
882 
883     Cand.Intf.moveToBlock(BC.Number);
884 
885     if (BI.LiveIn)
886       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
887     if (BI.LiveOut)
888       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
889     while (Ins--)
890       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
891   }
892 
893   for (unsigned Number : Cand.ActiveBlocks) {
894     bool RegIn  = LiveBundles[Bundles->getBundle(Number, false)];
895     bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
896     if (!RegIn && !RegOut)
897       continue;
898     if (RegIn && RegOut) {
899       // We need double spill code if this block has interference.
900       Cand.Intf.moveToBlock(Number);
901       if (Cand.Intf.hasInterference()) {
902         GlobalCost += SpillPlacer->getBlockFrequency(Number);
903         GlobalCost += SpillPlacer->getBlockFrequency(Number);
904       }
905       continue;
906     }
907     // live-in / stack-out or stack-in live-out.
908     GlobalCost += SpillPlacer->getBlockFrequency(Number);
909   }
910   return GlobalCost;
911 }
912 
913 /// splitAroundRegion - Split the current live range around the regions
914 /// determined by BundleCand and GlobalCand.
915 ///
916 /// Before calling this function, GlobalCand and BundleCand must be initialized
917 /// so each bundle is assigned to a valid candidate, or NoCand for the
918 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
919 /// objects must be initialized for the current live range, and intervals
920 /// created for the used candidates.
921 ///
922 /// @param LREdit    The LiveRangeEdit object handling the current split.
923 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
924 ///                  must appear in this list.
925 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
926                                  ArrayRef<unsigned> UsedCands) {
927   // These are the intervals created for new global ranges. We may create more
928   // intervals for local ranges.
929   const unsigned NumGlobalIntvs = LREdit.size();
930   LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
931                     << " globals.\n");
932   assert(NumGlobalIntvs && "No global intervals configured");
933 
934   // Isolate even single instructions when dealing with a proper sub-class.
935   // That guarantees register class inflation for the stack interval because it
936   // is all copies.
937   Register Reg = SA->getParent().reg();
938   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
939 
940   // First handle all the blocks with uses.
941   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
942   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
943     unsigned Number = BI.MBB->getNumber();
944     unsigned IntvIn = 0, IntvOut = 0;
945     SlotIndex IntfIn, IntfOut;
946     if (BI.LiveIn) {
947       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
948       if (CandIn != NoCand) {
949         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
950         IntvIn = Cand.IntvIdx;
951         Cand.Intf.moveToBlock(Number);
952         IntfIn = Cand.Intf.first();
953       }
954     }
955     if (BI.LiveOut) {
956       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
957       if (CandOut != NoCand) {
958         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
959         IntvOut = Cand.IntvIdx;
960         Cand.Intf.moveToBlock(Number);
961         IntfOut = Cand.Intf.last();
962       }
963     }
964 
965     // Create separate intervals for isolated blocks with multiple uses.
966     if (!IntvIn && !IntvOut) {
967       LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
968       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
969         SE->splitSingleBlock(BI);
970       continue;
971     }
972 
973     if (IntvIn && IntvOut)
974       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
975     else if (IntvIn)
976       SE->splitRegInBlock(BI, IntvIn, IntfIn);
977     else
978       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
979   }
980 
981   // Handle live-through blocks. The relevant live-through blocks are stored in
982   // the ActiveBlocks list with each candidate. We need to filter out
983   // duplicates.
984   BitVector Todo = SA->getThroughBlocks();
985   for (unsigned UsedCand : UsedCands) {
986     ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
987     for (unsigned Number : Blocks) {
988       if (!Todo.test(Number))
989         continue;
990       Todo.reset(Number);
991 
992       unsigned IntvIn = 0, IntvOut = 0;
993       SlotIndex IntfIn, IntfOut;
994 
995       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
996       if (CandIn != NoCand) {
997         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
998         IntvIn = Cand.IntvIdx;
999         Cand.Intf.moveToBlock(Number);
1000         IntfIn = Cand.Intf.first();
1001       }
1002 
1003       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1004       if (CandOut != NoCand) {
1005         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1006         IntvOut = Cand.IntvIdx;
1007         Cand.Intf.moveToBlock(Number);
1008         IntfOut = Cand.Intf.last();
1009       }
1010       if (!IntvIn && !IntvOut)
1011         continue;
1012       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1013     }
1014   }
1015 
1016   ++NumGlobalSplits;
1017 
1018   SmallVector<unsigned, 8> IntvMap;
1019   SE->finish(&IntvMap);
1020   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1021 
1022   unsigned OrigBlocks = SA->getNumLiveBlocks();
1023 
1024   // Sort out the new intervals created by splitting. We get four kinds:
1025   // - Remainder intervals should not be split again.
1026   // - Candidate intervals can be assigned to Cand.PhysReg.
1027   // - Block-local splits are candidates for local splitting.
1028   // - DCE leftovers should go back on the queue.
1029   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1030     const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1031 
1032     // Ignore old intervals from DCE.
1033     if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1034       continue;
1035 
1036     // Remainder interval. Don't try splitting again, spill if it doesn't
1037     // allocate.
1038     if (IntvMap[I] == 0) {
1039       ExtraInfo->setStage(Reg, RS_Spill);
1040       continue;
1041     }
1042 
1043     // Global intervals. Allow repeated splitting as long as the number of live
1044     // blocks is strictly decreasing.
1045     if (IntvMap[I] < NumGlobalIntvs) {
1046       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1047         LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1048                           << " blocks as original.\n");
1049         // Don't allow repeated splitting as a safe guard against looping.
1050         ExtraInfo->setStage(Reg, RS_Split2);
1051       }
1052       continue;
1053     }
1054 
1055     // Other intervals are treated as new. This includes local intervals created
1056     // for blocks with multiple uses, and anything created by DCE.
1057   }
1058 
1059   if (VerifyEnabled)
1060     MF->verify(this, "After splitting live range around region", &errs());
1061 }
1062 
1063 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1064                                     AllocationOrder &Order,
1065                                     SmallVectorImpl<Register> &NewVRegs) {
1066   if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1067     return MCRegister::NoRegister;
1068   unsigned NumCands = 0;
1069   BlockFrequency SpillCost = calcSpillCost();
1070   BlockFrequency BestCost;
1071 
1072   // Check if we can split this live range around a compact region.
1073   bool HasCompact = calcCompactRegion(GlobalCand.front());
1074   if (HasCompact) {
1075     // Yes, keep GlobalCand[0] as the compact region candidate.
1076     NumCands = 1;
1077     BestCost = BlockFrequency::max();
1078   } else {
1079     // No benefit from the compact region, our fallback will be per-block
1080     // splitting. Make sure we find a solution that is cheaper than spilling.
1081     BestCost = SpillCost;
1082     LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1083                       << printBlockFreq(*MBFI, BestCost) << '\n');
1084   }
1085 
1086   unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1087                                                NumCands, false /*IgnoreCSR*/);
1088 
1089   // No solutions found, fall back to single block splitting.
1090   if (!HasCompact && BestCand == NoCand)
1091     return MCRegister::NoRegister;
1092 
1093   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1094 }
1095 
1096 unsigned
1097 RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1098                                             AllocationOrder &Order,
1099                                             BlockFrequency &BestCost,
1100                                             unsigned &NumCands,
1101                                             unsigned &BestCand) {
1102   // Discard bad candidates before we run out of interference cache cursors.
1103   // This will only affect register classes with a lot of registers (>32).
1104   if (NumCands == IntfCache.getMaxCursors()) {
1105     unsigned WorstCount = ~0u;
1106     unsigned Worst = 0;
1107     for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1108       if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1109         continue;
1110       unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1111       if (Count < WorstCount) {
1112         Worst = CandIndex;
1113         WorstCount = Count;
1114       }
1115     }
1116     --NumCands;
1117     GlobalCand[Worst] = GlobalCand[NumCands];
1118     if (BestCand == NumCands)
1119       BestCand = Worst;
1120   }
1121 
1122   if (GlobalCand.size() <= NumCands)
1123     GlobalCand.resize(NumCands+1);
1124   GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1125   Cand.reset(IntfCache, PhysReg);
1126 
1127   SpillPlacer->prepare(Cand.LiveBundles);
1128   BlockFrequency Cost;
1129   if (!addSplitConstraints(Cand.Intf, Cost)) {
1130     LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1131     return BestCand;
1132   }
1133   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1134                     << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1135   if (Cost >= BestCost) {
1136     LLVM_DEBUG({
1137       if (BestCand == NoCand)
1138         dbgs() << " worse than no bundles\n";
1139       else
1140         dbgs() << " worse than "
1141                << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1142     });
1143     return BestCand;
1144   }
1145   if (!growRegion(Cand)) {
1146     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1147     return BestCand;
1148   }
1149 
1150   SpillPlacer->finish();
1151 
1152   // No live bundles, defer to splitSingleBlocks().
1153   if (!Cand.LiveBundles.any()) {
1154     LLVM_DEBUG(dbgs() << " no bundles.\n");
1155     return BestCand;
1156   }
1157 
1158   Cost += calcGlobalSplitCost(Cand, Order);
1159   LLVM_DEBUG({
1160     dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1161     for (int I : Cand.LiveBundles.set_bits())
1162       dbgs() << " EB#" << I;
1163     dbgs() << ".\n";
1164   });
1165   if (Cost < BestCost) {
1166     BestCand = NumCands;
1167     BestCost = Cost;
1168   }
1169   ++NumCands;
1170 
1171   return BestCand;
1172 }
1173 
1174 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1175                                             AllocationOrder &Order,
1176                                             BlockFrequency &BestCost,
1177                                             unsigned &NumCands,
1178                                             bool IgnoreCSR) {
1179   unsigned BestCand = NoCand;
1180   for (MCPhysReg PhysReg : Order) {
1181     assert(PhysReg);
1182     if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1183       continue;
1184 
1185     calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1186                                       BestCand);
1187   }
1188 
1189   return BestCand;
1190 }
1191 
1192 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1193                                  bool HasCompact,
1194                                  SmallVectorImpl<Register> &NewVRegs) {
1195   SmallVector<unsigned, 8> UsedCands;
1196   // Prepare split editor.
1197   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1198   SE->reset(LREdit, SplitSpillMode);
1199 
1200   // Assign all edge bundles to the preferred candidate, or NoCand.
1201   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1202 
1203   // Assign bundles for the best candidate region.
1204   if (BestCand != NoCand) {
1205     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1206     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1207       UsedCands.push_back(BestCand);
1208       Cand.IntvIdx = SE->openIntv();
1209       LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1210                         << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1211       (void)B;
1212     }
1213   }
1214 
1215   // Assign bundles for the compact region.
1216   if (HasCompact) {
1217     GlobalSplitCandidate &Cand = GlobalCand.front();
1218     assert(!Cand.PhysReg && "Compact region has no physreg");
1219     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1220       UsedCands.push_back(0);
1221       Cand.IntvIdx = SE->openIntv();
1222       LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1223                         << " bundles, intv " << Cand.IntvIdx << ".\n");
1224       (void)B;
1225     }
1226   }
1227 
1228   splitAroundRegion(LREdit, UsedCands);
1229   return 0;
1230 }
1231 
1232 // VirtReg has a physical Hint, this function tries to split VirtReg around
1233 // Hint if we can place new COPY instructions in cold blocks.
1234 bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1235                                      const LiveInterval &VirtReg,
1236                                      SmallVectorImpl<Register> &NewVRegs,
1237                                      AllocationOrder &Order) {
1238   // Split the VirtReg may generate COPY instructions in multiple cold basic
1239   // blocks, and increase code size. So we avoid it when the function is
1240   // optimized for size.
1241   if (MF->getFunction().hasOptSize())
1242     return false;
1243 
1244   // Don't allow repeated splitting as a safe guard against looping.
1245   if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1246     return false;
1247 
1248   BlockFrequency Cost = BlockFrequency(0);
1249   Register Reg = VirtReg.reg();
1250 
1251   // Compute the cost of assigning a non Hint physical register to VirtReg.
1252   // We define it as the total frequency of broken COPY instructions to/from
1253   // Hint register, and after split, they can be deleted.
1254   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1255     if (!TII->isFullCopyInstr(Instr))
1256       continue;
1257     Register OtherReg = Instr.getOperand(1).getReg();
1258     if (OtherReg == Reg) {
1259       OtherReg = Instr.getOperand(0).getReg();
1260       if (OtherReg == Reg)
1261         continue;
1262       // Check if VirtReg interferes with OtherReg after this COPY instruction.
1263       if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1264         continue;
1265     }
1266     MCRegister OtherPhysReg =
1267         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1268     if (OtherPhysReg == Hint)
1269       Cost += MBFI->getBlockFreq(Instr.getParent());
1270   }
1271 
1272   // Decrease the cost so it will be split in colder blocks.
1273   BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
1274   Cost *= Threshold;
1275   if (Cost == BlockFrequency(0))
1276     return false;
1277 
1278   unsigned NumCands = 0;
1279   unsigned BestCand = NoCand;
1280   SA->analyze(&VirtReg);
1281   calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1282   if (BestCand == NoCand)
1283     return false;
1284 
1285   doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1286   return true;
1287 }
1288 
1289 //===----------------------------------------------------------------------===//
1290 //                            Per-Block Splitting
1291 //===----------------------------------------------------------------------===//
1292 
1293 /// tryBlockSplit - Split a global live range around every block with uses. This
1294 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1295 /// they don't allocate.
1296 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1297                                  AllocationOrder &Order,
1298                                  SmallVectorImpl<Register> &NewVRegs) {
1299   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1300   Register Reg = VirtReg.reg();
1301   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1302   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1303   SE->reset(LREdit, SplitSpillMode);
1304   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1305   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1306     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1307       SE->splitSingleBlock(BI);
1308   }
1309   // No blocks were split.
1310   if (LREdit.empty())
1311     return 0;
1312 
1313   // We did split for some blocks.
1314   SmallVector<unsigned, 8> IntvMap;
1315   SE->finish(&IntvMap);
1316 
1317   // Tell LiveDebugVariables about the new ranges.
1318   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1319 
1320   // Sort out the new intervals created by splitting. The remainder interval
1321   // goes straight to spilling, the new local ranges get to stay RS_New.
1322   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1323     const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1324     if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1325       ExtraInfo->setStage(LI, RS_Spill);
1326   }
1327 
1328   if (VerifyEnabled)
1329     MF->verify(this, "After splitting live range around basic blocks", &errs());
1330   return 0;
1331 }
1332 
1333 //===----------------------------------------------------------------------===//
1334 //                         Per-Instruction Splitting
1335 //===----------------------------------------------------------------------===//
1336 
1337 /// Get the number of allocatable registers that match the constraints of \p Reg
1338 /// on \p MI and that are also in \p SuperRC.
1339 static unsigned getNumAllocatableRegsForConstraints(
1340     const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1341     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1342     const RegisterClassInfo &RCI) {
1343   assert(SuperRC && "Invalid register class");
1344 
1345   const TargetRegisterClass *ConstrainedRC =
1346       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1347                                              /* ExploreBundle */ true);
1348   if (!ConstrainedRC)
1349     return 0;
1350   return RCI.getNumAllocatableRegs(ConstrainedRC);
1351 }
1352 
1353 static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
1354                                        const TargetRegisterInfo &TRI,
1355                                        const MachineInstr &FirstMI,
1356                                        Register Reg) {
1357   LaneBitmask Mask;
1358   SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
1359   (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1360 
1361   for (auto [MI, OpIdx] : Ops) {
1362     const MachineOperand &MO = MI->getOperand(OpIdx);
1363     assert(MO.isReg() && MO.getReg() == Reg);
1364     unsigned SubReg = MO.getSubReg();
1365     if (SubReg == 0 && MO.isUse()) {
1366       if (MO.isUndef())
1367         continue;
1368       return MRI.getMaxLaneMaskForVReg(Reg);
1369     }
1370 
1371     LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1372     if (MO.isDef()) {
1373       if (!MO.isUndef())
1374         Mask |= ~SubRegMask;
1375     } else
1376       Mask |= SubRegMask;
1377   }
1378 
1379   return Mask;
1380 }
1381 
1382 /// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1383 /// VirtReg.
1384 static bool readsLaneSubset(const MachineRegisterInfo &MRI,
1385                             const MachineInstr *MI, const LiveInterval &VirtReg,
1386                             const TargetRegisterInfo *TRI, SlotIndex Use,
1387                             const TargetInstrInfo *TII) {
1388   // Early check the common case. Beware of the semi-formed bundles SplitKit
1389   // creates by setting the bundle flag on copies without a matching BUNDLE.
1390 
1391   auto DestSrc = TII->isCopyInstr(*MI);
1392   if (DestSrc && !MI->isBundled() &&
1393       DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1394     return false;
1395 
1396   // FIXME: We're only considering uses, but should be consider defs too?
1397   LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1398 
1399   LaneBitmask LiveAtMask;
1400   for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1401     if (S.liveAt(Use))
1402       LiveAtMask |= S.LaneMask;
1403   }
1404 
1405   // If the live lanes aren't different from the lanes used by the instruction,
1406   // this doesn't help.
1407   return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1408 }
1409 
1410 /// tryInstructionSplit - Split a live range around individual instructions.
1411 /// This is normally not worthwhile since the spiller is doing essentially the
1412 /// same thing. However, when the live range is in a constrained register
1413 /// class, it may help to insert copies such that parts of the live range can
1414 /// be moved to a larger register class.
1415 ///
1416 /// This is similar to spilling to a larger register class.
1417 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1418                                        AllocationOrder &Order,
1419                                        SmallVectorImpl<Register> &NewVRegs) {
1420   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1421   // There is no point to this if there are no larger sub-classes.
1422 
1423   bool SplitSubClass = true;
1424   if (!RegClassInfo.isProperSubClass(CurRC)) {
1425     if (!VirtReg.hasSubRanges())
1426       return 0;
1427     SplitSubClass = false;
1428   }
1429 
1430   // Always enable split spill mode, since we're effectively spilling to a
1431   // register.
1432   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1433   SE->reset(LREdit, SplitEditor::SM_Size);
1434 
1435   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1436   if (Uses.size() <= 1)
1437     return 0;
1438 
1439   LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1440                     << " individual instrs.\n");
1441 
1442   const TargetRegisterClass *SuperRC =
1443       TRI->getLargestLegalSuperClass(CurRC, *MF);
1444   unsigned SuperRCNumAllocatableRegs =
1445       RegClassInfo.getNumAllocatableRegs(SuperRC);
1446   // Split around every non-copy instruction if this split will relax
1447   // the constraints on the virtual register.
1448   // Otherwise, splitting just inserts uncoalescable copies that do not help
1449   // the allocation.
1450   for (const SlotIndex Use : Uses) {
1451     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1452       if (TII->isFullCopyInstr(*MI) ||
1453           (SplitSubClass &&
1454            SuperRCNumAllocatableRegs ==
1455                getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1456                                                    TII, TRI, RegClassInfo)) ||
1457           // TODO: Handle split for subranges with subclass constraints?
1458           (!SplitSubClass && VirtReg.hasSubRanges() &&
1459            !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1460         LLVM_DEBUG(dbgs() << "    skip:\t" << Use << '\t' << *MI);
1461         continue;
1462       }
1463     }
1464     SE->openIntv();
1465     SlotIndex SegStart = SE->enterIntvBefore(Use);
1466     SlotIndex SegStop = SE->leaveIntvAfter(Use);
1467     SE->useIntv(SegStart, SegStop);
1468   }
1469 
1470   if (LREdit.empty()) {
1471     LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1472     return 0;
1473   }
1474 
1475   SmallVector<unsigned, 8> IntvMap;
1476   SE->finish(&IntvMap);
1477   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1478   // Assign all new registers to RS_Spill. This was the last chance.
1479   ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1480   return 0;
1481 }
1482 
1483 //===----------------------------------------------------------------------===//
1484 //                             Local Splitting
1485 //===----------------------------------------------------------------------===//
1486 
1487 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1488 /// in order to use PhysReg between two entries in SA->UseSlots.
1489 ///
1490 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1491 ///
1492 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1493                               SmallVectorImpl<float> &GapWeight) {
1494   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1495   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1496   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1497   const unsigned NumGaps = Uses.size()-1;
1498 
1499   // Start and end points for the interference check.
1500   SlotIndex StartIdx =
1501     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1502   SlotIndex StopIdx =
1503     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1504 
1505   GapWeight.assign(NumGaps, 0.0f);
1506 
1507   // Add interference from each overlapping register.
1508   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1509     if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1510              .checkInterference())
1511       continue;
1512 
1513     // We know that VirtReg is a continuous interval from FirstInstr to
1514     // LastInstr, so we don't need InterferenceQuery.
1515     //
1516     // Interference that overlaps an instruction is counted in both gaps
1517     // surrounding the instruction. The exception is interference before
1518     // StartIdx and after StopIdx.
1519     //
1520     LiveIntervalUnion::SegmentIter IntI =
1521         Matrix->getLiveUnions()[Unit].find(StartIdx);
1522     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1523       // Skip the gaps before IntI.
1524       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1525         if (++Gap == NumGaps)
1526           break;
1527       if (Gap == NumGaps)
1528         break;
1529 
1530       // Update the gaps covered by IntI.
1531       const float weight = IntI.value()->weight();
1532       for (; Gap != NumGaps; ++Gap) {
1533         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1534         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1535           break;
1536       }
1537       if (Gap == NumGaps)
1538         break;
1539     }
1540   }
1541 
1542   // Add fixed interference.
1543   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1544     const LiveRange &LR = LIS->getRegUnit(Unit);
1545     LiveRange::const_iterator I = LR.find(StartIdx);
1546     LiveRange::const_iterator E = LR.end();
1547 
1548     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1549     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1550       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1551         if (++Gap == NumGaps)
1552           break;
1553       if (Gap == NumGaps)
1554         break;
1555 
1556       for (; Gap != NumGaps; ++Gap) {
1557         GapWeight[Gap] = huge_valf;
1558         if (Uses[Gap+1].getBaseIndex() >= I->end)
1559           break;
1560       }
1561       if (Gap == NumGaps)
1562         break;
1563     }
1564   }
1565 }
1566 
1567 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1568 /// basic block.
1569 ///
1570 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1571                                  AllocationOrder &Order,
1572                                  SmallVectorImpl<Register> &NewVRegs) {
1573   // TODO: the function currently only handles a single UseBlock; it should be
1574   // possible to generalize.
1575   if (SA->getUseBlocks().size() != 1)
1576     return 0;
1577 
1578   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1579 
1580   // Note that it is possible to have an interval that is live-in or live-out
1581   // while only covering a single block - A phi-def can use undef values from
1582   // predecessors, and the block could be a single-block loop.
1583   // We don't bother doing anything clever about such a case, we simply assume
1584   // that the interval is continuous from FirstInstr to LastInstr. We should
1585   // make sure that we don't do anything illegal to such an interval, though.
1586 
1587   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1588   if (Uses.size() <= 2)
1589     return 0;
1590   const unsigned NumGaps = Uses.size()-1;
1591 
1592   LLVM_DEBUG({
1593     dbgs() << "tryLocalSplit: ";
1594     for (const auto &Use : Uses)
1595       dbgs() << ' ' << Use;
1596     dbgs() << '\n';
1597   });
1598 
1599   // If VirtReg is live across any register mask operands, compute a list of
1600   // gaps with register masks.
1601   SmallVector<unsigned, 8> RegMaskGaps;
1602   if (Matrix->checkRegMaskInterference(VirtReg)) {
1603     // Get regmask slots for the whole block.
1604     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1605     LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1606     // Constrain to VirtReg's live range.
1607     unsigned RI =
1608         llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1609     unsigned RE = RMS.size();
1610     for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1611       // Look for Uses[I] <= RMS <= Uses[I + 1].
1612       assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1613       if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1614         continue;
1615       // Skip a regmask on the same instruction as the last use. It doesn't
1616       // overlap the live range.
1617       if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1618         break;
1619       LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1620                         << Uses[I + 1]);
1621       RegMaskGaps.push_back(I);
1622       // Advance ri to the next gap. A regmask on one of the uses counts in
1623       // both gaps.
1624       while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1625         ++RI;
1626     }
1627     LLVM_DEBUG(dbgs() << '\n');
1628   }
1629 
1630   // Since we allow local split results to be split again, there is a risk of
1631   // creating infinite loops. It is tempting to require that the new live
1632   // ranges have less instructions than the original. That would guarantee
1633   // convergence, but it is too strict. A live range with 3 instructions can be
1634   // split 2+3 (including the COPY), and we want to allow that.
1635   //
1636   // Instead we use these rules:
1637   //
1638   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1639   //    noop split, of course).
1640   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1641   //    the new ranges must have fewer instructions than before the split.
1642   // 3. New ranges with the same number of instructions are marked RS_Split2,
1643   //    smaller ranges are marked RS_New.
1644   //
1645   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1646   // excessive splitting and infinite loops.
1647   //
1648   bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1649 
1650   // Best split candidate.
1651   unsigned BestBefore = NumGaps;
1652   unsigned BestAfter = 0;
1653   float BestDiff = 0;
1654 
1655   const float blockFreq =
1656       SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1657       (1.0f / MBFI->getEntryFreq().getFrequency());
1658   SmallVector<float, 8> GapWeight;
1659 
1660   for (MCPhysReg PhysReg : Order) {
1661     assert(PhysReg);
1662     // Keep track of the largest spill weight that would need to be evicted in
1663     // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1664     calcGapWeights(PhysReg, GapWeight);
1665 
1666     // Remove any gaps with regmask clobbers.
1667     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1668       for (unsigned Gap : RegMaskGaps)
1669         GapWeight[Gap] = huge_valf;
1670 
1671     // Try to find the best sequence of gaps to close.
1672     // The new spill weight must be larger than any gap interference.
1673 
1674     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1675     unsigned SplitBefore = 0, SplitAfter = 1;
1676 
1677     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1678     // It is the spill weight that needs to be evicted.
1679     float MaxGap = GapWeight[0];
1680 
1681     while (true) {
1682       // Live before/after split?
1683       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1684       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1685 
1686       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1687                         << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1688 
1689       // Stop before the interval gets so big we wouldn't be making progress.
1690       if (!LiveBefore && !LiveAfter) {
1691         LLVM_DEBUG(dbgs() << " all\n");
1692         break;
1693       }
1694       // Should the interval be extended or shrunk?
1695       bool Shrink = true;
1696 
1697       // How many gaps would the new range have?
1698       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1699 
1700       // Legally, without causing looping?
1701       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1702 
1703       if (Legal && MaxGap < huge_valf) {
1704         // Estimate the new spill weight. Each instruction reads or writes the
1705         // register. Conservatively assume there are no read-modify-write
1706         // instructions.
1707         //
1708         // Try to guess the size of the new interval.
1709         const float EstWeight = normalizeSpillWeight(
1710             blockFreq * (NewGaps + 1),
1711             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1712                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1713             1);
1714         // Would this split be possible to allocate?
1715         // Never allocate all gaps, we wouldn't be making progress.
1716         LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1717         if (EstWeight * Hysteresis >= MaxGap) {
1718           Shrink = false;
1719           float Diff = EstWeight - MaxGap;
1720           if (Diff > BestDiff) {
1721             LLVM_DEBUG(dbgs() << " (best)");
1722             BestDiff = Hysteresis * Diff;
1723             BestBefore = SplitBefore;
1724             BestAfter = SplitAfter;
1725           }
1726         }
1727       }
1728 
1729       // Try to shrink.
1730       if (Shrink) {
1731         if (++SplitBefore < SplitAfter) {
1732           LLVM_DEBUG(dbgs() << " shrink\n");
1733           // Recompute the max when necessary.
1734           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1735             MaxGap = GapWeight[SplitBefore];
1736             for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1737               MaxGap = std::max(MaxGap, GapWeight[I]);
1738           }
1739           continue;
1740         }
1741         MaxGap = 0;
1742       }
1743 
1744       // Try to extend the interval.
1745       if (SplitAfter >= NumGaps) {
1746         LLVM_DEBUG(dbgs() << " end\n");
1747         break;
1748       }
1749 
1750       LLVM_DEBUG(dbgs() << " extend\n");
1751       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1752     }
1753   }
1754 
1755   // Didn't find any candidates?
1756   if (BestBefore == NumGaps)
1757     return 0;
1758 
1759   LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1760                     << Uses[BestAfter] << ", " << BestDiff << ", "
1761                     << (BestAfter - BestBefore + 1) << " instrs\n");
1762 
1763   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1764   SE->reset(LREdit);
1765 
1766   SE->openIntv();
1767   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1768   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1769   SE->useIntv(SegStart, SegStop);
1770   SmallVector<unsigned, 8> IntvMap;
1771   SE->finish(&IntvMap);
1772   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1773   // If the new range has the same number of instructions as before, mark it as
1774   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1775   // leave the new intervals as RS_New so they can compete.
1776   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1777   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1778   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1779   if (NewGaps >= NumGaps) {
1780     LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1781     assert(!ProgressRequired && "Didn't make progress when it was required.");
1782     for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1783       if (IntvMap[I] == 1) {
1784         ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1785         LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1786       }
1787     LLVM_DEBUG(dbgs() << '\n');
1788   }
1789   ++NumLocalSplits;
1790 
1791   return 0;
1792 }
1793 
1794 //===----------------------------------------------------------------------===//
1795 //                          Live Range Splitting
1796 //===----------------------------------------------------------------------===//
1797 
1798 /// trySplit - Try to split VirtReg or one of its interferences, making it
1799 /// assignable.
1800 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1801 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1802                             SmallVectorImpl<Register> &NewVRegs,
1803                             const SmallVirtRegSet &FixedRegisters) {
1804   // Ranges must be Split2 or less.
1805   if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1806     return 0;
1807 
1808   // Local intervals are handled separately.
1809   if (LIS->intervalIsInOneMBB(VirtReg)) {
1810     NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1811                        TimerGroupDescription, TimePassesIsEnabled);
1812     SA->analyze(&VirtReg);
1813     Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1814     if (PhysReg || !NewVRegs.empty())
1815       return PhysReg;
1816     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1817   }
1818 
1819   NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1820                      TimerGroupDescription, TimePassesIsEnabled);
1821 
1822   SA->analyze(&VirtReg);
1823 
1824   // First try to split around a region spanning multiple blocks. RS_Split2
1825   // ranges already made dubious progress with region splitting, so they go
1826   // straight to single block splitting.
1827   if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1828     MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1829     if (PhysReg || !NewVRegs.empty())
1830       return PhysReg;
1831   }
1832 
1833   // Then isolate blocks.
1834   return tryBlockSplit(VirtReg, Order, NewVRegs);
1835 }
1836 
1837 //===----------------------------------------------------------------------===//
1838 //                          Last Chance Recoloring
1839 //===----------------------------------------------------------------------===//
1840 
1841 /// Return true if \p reg has any tied def operand.
1842 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1843   for (const MachineOperand &MO : MRI->def_operands(reg))
1844     if (MO.isTied())
1845       return true;
1846 
1847   return false;
1848 }
1849 
1850 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1851 /// same, as \p PhysReg.
1852 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1853                                          const VirtRegMap &VRM,
1854                                          MCRegister PhysReg,
1855                                          const LiveInterval &Intf) {
1856   MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1857   if (PhysReg == AssignedReg)
1858     return false;
1859   return TRI.regsOverlap(PhysReg, AssignedReg);
1860 }
1861 
1862 /// mayRecolorAllInterferences - Check if the virtual registers that
1863 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1864 /// recolored to free \p PhysReg.
1865 /// When true is returned, \p RecoloringCandidates has been augmented with all
1866 /// the live intervals that need to be recolored in order to free \p PhysReg
1867 /// for \p VirtReg.
1868 /// \p FixedRegisters contains all the virtual registers that cannot be
1869 /// recolored.
1870 bool RAGreedy::mayRecolorAllInterferences(
1871     MCRegister PhysReg, const LiveInterval &VirtReg,
1872     SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1873   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1874 
1875   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1876     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
1877     // If there is LastChanceRecoloringMaxInterference or more interferences,
1878     // chances are one would not be recolorable.
1879     if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
1880             LastChanceRecoloringMaxInterference &&
1881         !ExhaustiveSearch) {
1882       LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1883       CutOffInfo |= CO_Interf;
1884       return false;
1885     }
1886     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1887       // If Intf is done and sits on the same register class as VirtReg, it
1888       // would not be recolorable as it is in the same state as
1889       // VirtReg. However there are at least two exceptions.
1890       //
1891       // If VirtReg has tied defs and Intf doesn't, then
1892       // there is still a point in examining if it can be recolorable.
1893       //
1894       // Additionally, if the register class has overlapping tuple members, it
1895       // may still be recolorable using a different tuple. This is more likely
1896       // if the existing assignment aliases with the candidate.
1897       //
1898       if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1899             MRI->getRegClass(Intf->reg()) == CurRC &&
1900             !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1901            !(hasTiedDef(MRI, VirtReg.reg()) &&
1902              !hasTiedDef(MRI, Intf->reg()))) ||
1903           FixedRegisters.count(Intf->reg())) {
1904         LLVM_DEBUG(
1905             dbgs() << "Early abort: the interference is not recolorable.\n");
1906         return false;
1907       }
1908       RecoloringCandidates.insert(Intf);
1909     }
1910   }
1911   return true;
1912 }
1913 
1914 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1915 /// its interferences.
1916 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
1917 /// virtual register that was using it. The recoloring process may recursively
1918 /// use the last chance recoloring. Therefore, when a virtual register has been
1919 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1920 /// be last-chance-recolored again during this recoloring "session".
1921 /// E.g.,
1922 /// Let
1923 /// vA can use {R1, R2    }
1924 /// vB can use {    R2, R3}
1925 /// vC can use {R1        }
1926 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
1927 /// instance) and they all interfere.
1928 ///
1929 /// vA is assigned R1
1930 /// vB is assigned R2
1931 /// vC tries to evict vA but vA is already done.
1932 /// Regular register allocation fails.
1933 ///
1934 /// Last chance recoloring kicks in:
1935 /// vC does as if vA was evicted => vC uses R1.
1936 /// vC is marked as fixed.
1937 /// vA needs to find a color.
1938 /// None are available.
1939 /// vA cannot evict vC: vC is a fixed virtual register now.
1940 /// vA does as if vB was evicted => vA uses R2.
1941 /// vB needs to find a color.
1942 /// R3 is available.
1943 /// Recoloring => vC = R1, vA = R2, vB = R3
1944 ///
1945 /// \p Order defines the preferred allocation order for \p VirtReg.
1946 /// \p NewRegs will contain any new virtual register that have been created
1947 /// (split, spill) during the process and that must be assigned.
1948 /// \p FixedRegisters contains all the virtual registers that cannot be
1949 /// recolored.
1950 ///
1951 /// \p RecolorStack tracks the original assignments of successfully recolored
1952 /// registers.
1953 ///
1954 /// \p Depth gives the current depth of the last chance recoloring.
1955 /// \return a physical register that can be used for VirtReg or ~0u if none
1956 /// exists.
1957 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1958                                            AllocationOrder &Order,
1959                                            SmallVectorImpl<Register> &NewVRegs,
1960                                            SmallVirtRegSet &FixedRegisters,
1961                                            RecoloringStack &RecolorStack,
1962                                            unsigned Depth) {
1963   if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1964     return ~0u;
1965 
1966   LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1967 
1968   const ssize_t EntryStackSize = RecolorStack.size();
1969 
1970   // Ranges must be Done.
1971   assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1972          "Last chance recoloring should really be last chance");
1973   // Set the max depth to LastChanceRecoloringMaxDepth.
1974   // We may want to reconsider that if we end up with a too large search space
1975   // for target with hundreds of registers.
1976   // Indeed, in that case we may want to cut the search space earlier.
1977   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
1978     LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1979     CutOffInfo |= CO_Depth;
1980     return ~0u;
1981   }
1982 
1983   // Set of Live intervals that will need to be recolored.
1984   SmallLISet RecoloringCandidates;
1985 
1986   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1987   // this recoloring "session".
1988   assert(!FixedRegisters.count(VirtReg.reg()));
1989   FixedRegisters.insert(VirtReg.reg());
1990   SmallVector<Register, 4> CurrentNewVRegs;
1991 
1992   for (MCRegister PhysReg : Order) {
1993     assert(PhysReg.isValid());
1994     LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1995                       << printReg(PhysReg, TRI) << '\n');
1996     RecoloringCandidates.clear();
1997     CurrentNewVRegs.clear();
1998 
1999     // It is only possible to recolor virtual register interference.
2000     if (Matrix->checkInterference(VirtReg, PhysReg) >
2001         LiveRegMatrix::IK_VirtReg) {
2002       LLVM_DEBUG(
2003           dbgs() << "Some interferences are not with virtual registers.\n");
2004 
2005       continue;
2006     }
2007 
2008     // Early give up on this PhysReg if it is obvious we cannot recolor all
2009     // the interferences.
2010     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2011                                     FixedRegisters)) {
2012       LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2013       continue;
2014     }
2015 
2016     // RecoloringCandidates contains all the virtual registers that interfere
2017     // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2018     // recoloring and perform the actual recoloring.
2019     PQueue RecoloringQueue;
2020     for (const LiveInterval *RC : RecoloringCandidates) {
2021       Register ItVirtReg = RC->reg();
2022       enqueue(RecoloringQueue, RC);
2023       assert(VRM->hasPhys(ItVirtReg) &&
2024              "Interferences are supposed to be with allocated variables");
2025 
2026       // Record the current allocation.
2027       RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2028 
2029       // unset the related struct.
2030       Matrix->unassign(*RC);
2031     }
2032 
2033     // Do as if VirtReg was assigned to PhysReg so that the underlying
2034     // recoloring has the right information about the interferes and
2035     // available colors.
2036     Matrix->assign(VirtReg, PhysReg);
2037 
2038     // VirtReg may be deleted during tryRecoloringCandidates, save a copy.
2039     Register ThisVirtReg = VirtReg.reg();
2040 
2041     // Save the current recoloring state.
2042     // If we cannot recolor all the interferences, we will have to start again
2043     // at this point for the next physical register.
2044     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2045     if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2046                                 FixedRegisters, RecolorStack, Depth)) {
2047       // Push the queued vregs into the main queue.
2048       for (Register NewVReg : CurrentNewVRegs)
2049         NewVRegs.push_back(NewVReg);
2050       // Do not mess up with the global assignment process.
2051       // I.e., VirtReg must be unassigned.
2052       if (VRM->hasPhys(ThisVirtReg)) {
2053         Matrix->unassign(VirtReg);
2054         return PhysReg;
2055       }
2056 
2057       // It is possible VirtReg will be deleted during tryRecoloringCandidates.
2058       LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
2059                         << printReg(ThisVirtReg) << '\n');
2060       FixedRegisters.erase(ThisVirtReg);
2061       return 0;
2062     }
2063 
2064     LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2065                       << printReg(PhysReg, TRI) << '\n');
2066 
2067     // The recoloring attempt failed, undo the changes.
2068     FixedRegisters = SaveFixedRegisters;
2069     Matrix->unassign(VirtReg);
2070 
2071     // For a newly created vreg which is also in RecoloringCandidates,
2072     // don't add it to NewVRegs because its physical register will be restored
2073     // below. Other vregs in CurrentNewVRegs are created by calling
2074     // selectOrSplit and should be added into NewVRegs.
2075     for (Register R : CurrentNewVRegs) {
2076       if (RecoloringCandidates.count(&LIS->getInterval(R)))
2077         continue;
2078       NewVRegs.push_back(R);
2079     }
2080 
2081     // Roll back our unsuccessful recoloring. Also roll back any successful
2082     // recolorings in any recursive recoloring attempts, since it's possible
2083     // they would have introduced conflicts with assignments we will be
2084     // restoring further up the stack. Perform all unassignments prior to
2085     // reassigning, since sub-recolorings may have conflicted with the registers
2086     // we are going to restore to their original assignments.
2087     for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2088       const LiveInterval *LI;
2089       MCRegister PhysReg;
2090       std::tie(LI, PhysReg) = RecolorStack[I];
2091 
2092       if (VRM->hasPhys(LI->reg()))
2093         Matrix->unassign(*LI);
2094     }
2095 
2096     for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2097       const LiveInterval *LI;
2098       MCRegister PhysReg;
2099       std::tie(LI, PhysReg) = RecolorStack[I];
2100       if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2101         Matrix->assign(*LI, PhysReg);
2102     }
2103 
2104     // Pop the stack of recoloring attempts.
2105     RecolorStack.resize(EntryStackSize);
2106   }
2107 
2108   // Last chance recoloring did not worked either, give up.
2109   return ~0u;
2110 }
2111 
2112 /// tryRecoloringCandidates - Try to assign a new color to every register
2113 /// in \RecoloringQueue.
2114 /// \p NewRegs will contain any new virtual register created during the
2115 /// recoloring process.
2116 /// \p FixedRegisters[in/out] contains all the registers that have been
2117 /// recolored.
2118 /// \return true if all virtual registers in RecoloringQueue were successfully
2119 /// recolored, false otherwise.
2120 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2121                                        SmallVectorImpl<Register> &NewVRegs,
2122                                        SmallVirtRegSet &FixedRegisters,
2123                                        RecoloringStack &RecolorStack,
2124                                        unsigned Depth) {
2125   while (!RecoloringQueue.empty()) {
2126     const LiveInterval *LI = dequeue(RecoloringQueue);
2127     LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2128     MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2129                                            RecolorStack, Depth + 1);
2130     // When splitting happens, the live-range may actually be empty.
2131     // In that case, this is okay to continue the recoloring even
2132     // if we did not find an alternative color for it. Indeed,
2133     // there will not be anything to color for LI in the end.
2134     if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2135       return false;
2136 
2137     if (!PhysReg) {
2138       assert(LI->empty() && "Only empty live-range do not require a register");
2139       LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2140                         << " succeeded. Empty LI.\n");
2141       continue;
2142     }
2143     LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2144                       << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2145 
2146     Matrix->assign(*LI, PhysReg);
2147     FixedRegisters.insert(LI->reg());
2148   }
2149   return true;
2150 }
2151 
2152 //===----------------------------------------------------------------------===//
2153 //                            Main Entry Point
2154 //===----------------------------------------------------------------------===//
2155 
2156 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
2157                                    SmallVectorImpl<Register> &NewVRegs) {
2158   CutOffInfo = CO_None;
2159   LLVMContext &Ctx = MF->getFunction().getContext();
2160   SmallVirtRegSet FixedRegisters;
2161   RecoloringStack RecolorStack;
2162   MCRegister Reg =
2163       selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2164   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2165     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2166     if (CutOffEncountered == CO_Depth)
2167       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2168                     "reached. Use -fexhaustive-register-search to skip "
2169                     "cutoffs");
2170     else if (CutOffEncountered == CO_Interf)
2171       Ctx.emitError("register allocation failed: maximum interference for "
2172                     "recoloring reached. Use -fexhaustive-register-search "
2173                     "to skip cutoffs");
2174     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2175       Ctx.emitError("register allocation failed: maximum interference and "
2176                     "depth for recoloring reached. Use "
2177                     "-fexhaustive-register-search to skip cutoffs");
2178   }
2179   return Reg;
2180 }
2181 
2182 /// Using a CSR for the first time has a cost because it causes push|pop
2183 /// to be added to prologue|epilogue. Splitting a cold section of the live
2184 /// range can have lower cost than using the CSR for the first time;
2185 /// Spilling a live range in the cold path can have lower cost than using
2186 /// the CSR for the first time. Returns the physical register if we decide
2187 /// to use the CSR; otherwise return 0.
2188 MCRegister RAGreedy::tryAssignCSRFirstTime(
2189     const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2190     uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2191   if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2192     // We choose spill over using the CSR for the first time if the spill cost
2193     // is lower than CSRCost.
2194     SA->analyze(&VirtReg);
2195     if (calcSpillCost() >= CSRCost)
2196       return PhysReg;
2197 
2198     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2199     // we will not use a callee-saved register in tryEvict.
2200     CostPerUseLimit = 1;
2201     return 0;
2202   }
2203   if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2204     // We choose pre-splitting over using the CSR for the first time if
2205     // the cost of splitting is lower than CSRCost.
2206     SA->analyze(&VirtReg);
2207     unsigned NumCands = 0;
2208     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2209     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2210                                                  NumCands, true /*IgnoreCSR*/);
2211     if (BestCand == NoCand)
2212       // Use the CSR if we can't find a region split below CSRCost.
2213       return PhysReg;
2214 
2215     // Perform the actual pre-splitting.
2216     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2217     return 0;
2218   }
2219   return PhysReg;
2220 }
2221 
2222 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2223   // Do not keep invalid information around.
2224   SetOfBrokenHints.remove(&LI);
2225 }
2226 
2227 void RAGreedy::initializeCSRCost() {
2228   // We use the larger one out of the command-line option and the value report
2229   // by TRI.
2230   CSRCost = BlockFrequency(
2231       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2232   if (!CSRCost.getFrequency())
2233     return;
2234 
2235   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2236   uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2237   if (!ActualEntry) {
2238     CSRCost = BlockFrequency(0);
2239     return;
2240   }
2241   uint64_t FixedEntry = 1 << 14;
2242   if (ActualEntry < FixedEntry)
2243     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2244   else if (ActualEntry <= UINT32_MAX)
2245     // Invert the fraction and divide.
2246     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2247   else
2248     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2249     CSRCost =
2250         BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2251 }
2252 
2253 /// Collect the hint info for \p Reg.
2254 /// The results are stored into \p Out.
2255 /// \p Out is not cleared before being populated.
2256 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2257   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2258     if (!TII->isFullCopyInstr(Instr))
2259       continue;
2260     // Look for the other end of the copy.
2261     Register OtherReg = Instr.getOperand(0).getReg();
2262     if (OtherReg == Reg) {
2263       OtherReg = Instr.getOperand(1).getReg();
2264       if (OtherReg == Reg)
2265         continue;
2266     }
2267     // Get the current assignment.
2268     MCRegister OtherPhysReg =
2269         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2270     // Push the collected information.
2271     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2272                            OtherPhysReg));
2273   }
2274 }
2275 
2276 /// Using the given \p List, compute the cost of the broken hints if
2277 /// \p PhysReg was used.
2278 /// \return The cost of \p List for \p PhysReg.
2279 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2280                                            MCRegister PhysReg) {
2281   BlockFrequency Cost = BlockFrequency(0);
2282   for (const HintInfo &Info : List) {
2283     if (Info.PhysReg != PhysReg)
2284       Cost += Info.Freq;
2285   }
2286   return Cost;
2287 }
2288 
2289 /// Using the register assigned to \p VirtReg, try to recolor
2290 /// all the live ranges that are copy-related with \p VirtReg.
2291 /// The recoloring is then propagated to all the live-ranges that have
2292 /// been recolored and so on, until no more copies can be coalesced or
2293 /// it is not profitable.
2294 /// For a given live range, profitability is determined by the sum of the
2295 /// frequencies of the non-identity copies it would introduce with the old
2296 /// and new register.
2297 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2298   // We have a broken hint, check if it is possible to fix it by
2299   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2300   // some register and PhysReg may be available for the other live-ranges.
2301   SmallSet<Register, 4> Visited;
2302   SmallVector<unsigned, 2> RecoloringCandidates;
2303   HintsInfo Info;
2304   Register Reg = VirtReg.reg();
2305   MCRegister PhysReg = VRM->getPhys(Reg);
2306   // Start the recoloring algorithm from the input live-interval, then
2307   // it will propagate to the ones that are copy-related with it.
2308   Visited.insert(Reg);
2309   RecoloringCandidates.push_back(Reg);
2310 
2311   LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2312                     << '(' << printReg(PhysReg, TRI) << ")\n");
2313 
2314   do {
2315     Reg = RecoloringCandidates.pop_back_val();
2316 
2317     // We cannot recolor physical register.
2318     if (Reg.isPhysical())
2319       continue;
2320 
2321     // This may be a skipped register.
2322     if (!VRM->hasPhys(Reg)) {
2323       assert(!shouldAllocateRegister(Reg) &&
2324              "We have an unallocated variable which should have been handled");
2325       continue;
2326     }
2327 
2328     // Get the live interval mapped with this virtual register to be able
2329     // to check for the interference with the new color.
2330     LiveInterval &LI = LIS->getInterval(Reg);
2331     MCRegister CurrPhys = VRM->getPhys(Reg);
2332     // Check that the new color matches the register class constraints and
2333     // that it is free for this live range.
2334     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2335                                 Matrix->checkInterference(LI, PhysReg)))
2336       continue;
2337 
2338     LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2339                       << ") is recolorable.\n");
2340 
2341     // Gather the hint info.
2342     Info.clear();
2343     collectHintInfo(Reg, Info);
2344     // Check if recoloring the live-range will increase the cost of the
2345     // non-identity copies.
2346     if (CurrPhys != PhysReg) {
2347       LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2348       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2349       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2350       LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2351                         << "\nNew Cost: "
2352                         << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2353       if (OldCopiesCost < NewCopiesCost) {
2354         LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2355         continue;
2356       }
2357       // At this point, the cost is either cheaper or equal. If it is
2358       // equal, we consider this is profitable because it may expose
2359       // more recoloring opportunities.
2360       LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2361       // Recolor the live-range.
2362       Matrix->unassign(LI);
2363       Matrix->assign(LI, PhysReg);
2364     }
2365     // Push all copy-related live-ranges to keep reconciling the broken
2366     // hints.
2367     for (const HintInfo &HI : Info) {
2368       if (Visited.insert(HI.Reg).second)
2369         RecoloringCandidates.push_back(HI.Reg);
2370     }
2371   } while (!RecoloringCandidates.empty());
2372 }
2373 
2374 /// Try to recolor broken hints.
2375 /// Broken hints may be repaired by recoloring when an evicted variable
2376 /// freed up a register for a larger live-range.
2377 /// Consider the following example:
2378 /// BB1:
2379 ///   a =
2380 ///   b =
2381 /// BB2:
2382 ///   ...
2383 ///   = b
2384 ///   = a
2385 /// Let us assume b gets split:
2386 /// BB1:
2387 ///   a =
2388 ///   b =
2389 /// BB2:
2390 ///   c = b
2391 ///   ...
2392 ///   d = c
2393 ///   = d
2394 ///   = a
2395 /// Because of how the allocation work, b, c, and d may be assigned different
2396 /// colors. Now, if a gets evicted later:
2397 /// BB1:
2398 ///   a =
2399 ///   st a, SpillSlot
2400 ///   b =
2401 /// BB2:
2402 ///   c = b
2403 ///   ...
2404 ///   d = c
2405 ///   = d
2406 ///   e = ld SpillSlot
2407 ///   = e
2408 /// This is likely that we can assign the same register for b, c, and d,
2409 /// getting rid of 2 copies.
2410 void RAGreedy::tryHintsRecoloring() {
2411   for (const LiveInterval *LI : SetOfBrokenHints) {
2412     assert(LI->reg().isVirtual() &&
2413            "Recoloring is possible only for virtual registers");
2414     // Some dead defs may be around (e.g., because of debug uses).
2415     // Ignore those.
2416     if (!VRM->hasPhys(LI->reg()))
2417       continue;
2418     tryHintRecoloring(*LI);
2419   }
2420 }
2421 
2422 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2423                                        SmallVectorImpl<Register> &NewVRegs,
2424                                        SmallVirtRegSet &FixedRegisters,
2425                                        RecoloringStack &RecolorStack,
2426                                        unsigned Depth) {
2427   uint8_t CostPerUseLimit = uint8_t(~0u);
2428   // First try assigning a free register.
2429   auto Order =
2430       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2431   if (MCRegister PhysReg =
2432           tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2433     // When NewVRegs is not empty, we may have made decisions such as evicting
2434     // a virtual register, go with the earlier decisions and use the physical
2435     // register.
2436     if (CSRCost.getFrequency() &&
2437         EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2438       MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2439                                                 CostPerUseLimit, NewVRegs);
2440       if (CSRReg || !NewVRegs.empty())
2441         // Return now if we decide to use a CSR or create new vregs due to
2442         // pre-splitting.
2443         return CSRReg;
2444     } else
2445       return PhysReg;
2446   }
2447   // Non empty NewVRegs means VirtReg has been split.
2448   if (!NewVRegs.empty())
2449     return 0;
2450 
2451   LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2452   LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2453                     << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2454 
2455   // Try to evict a less worthy live range, but only for ranges from the primary
2456   // queue. The RS_Split ranges already failed to do this, and they should not
2457   // get a second chance until they have been split.
2458   if (Stage != RS_Split)
2459     if (Register PhysReg =
2460             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2461                      FixedRegisters)) {
2462       Register Hint = MRI->getSimpleHint(VirtReg.reg());
2463       // If VirtReg has a hint and that hint is broken record this
2464       // virtual register as a recoloring candidate for broken hint.
2465       // Indeed, since we evicted a variable in its neighborhood it is
2466       // likely we can at least partially recolor some of the
2467       // copy-related live-ranges.
2468       if (Hint && Hint != PhysReg)
2469         SetOfBrokenHints.insert(&VirtReg);
2470       return PhysReg;
2471     }
2472 
2473   assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2474 
2475   // The first time we see a live range, don't try to split or spill.
2476   // Wait until the second time, when all smaller ranges have been allocated.
2477   // This gives a better picture of the interference to split around.
2478   if (Stage < RS_Split) {
2479     ExtraInfo->setStage(VirtReg, RS_Split);
2480     LLVM_DEBUG(dbgs() << "wait for second round\n");
2481     NewVRegs.push_back(VirtReg.reg());
2482     return 0;
2483   }
2484 
2485   if (Stage < RS_Spill && !VirtReg.empty()) {
2486     // Try splitting VirtReg or interferences.
2487     unsigned NewVRegSizeBefore = NewVRegs.size();
2488     Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2489     if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2490       return PhysReg;
2491   }
2492 
2493   // If we couldn't allocate a register from spilling, there is probably some
2494   // invalid inline assembly. The base class will report it.
2495   if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2496     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2497                                    RecolorStack, Depth);
2498   }
2499 
2500   // Finally spill VirtReg itself.
2501   if ((EnableDeferredSpilling ||
2502        TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2503       ExtraInfo->getStage(VirtReg) < RS_Memory) {
2504     // TODO: This is experimental and in particular, we do not model
2505     // the live range splitting done by spilling correctly.
2506     // We would need a deep integration with the spiller to do the
2507     // right thing here. Anyway, that is still good for early testing.
2508     ExtraInfo->setStage(VirtReg, RS_Memory);
2509     LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2510     NewVRegs.push_back(VirtReg.reg());
2511   } else {
2512     NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2513                        TimerGroupDescription, TimePassesIsEnabled);
2514     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2515     spiller().spill(LRE);
2516     ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2517 
2518     // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2519     // the new regs are kept in LDV (still mapping to the old register), until
2520     // we rewrite spilled locations in LDV at a later stage.
2521     for (Register r : spiller().getSpilledRegs())
2522       DebugVars->splitRegister(r, LRE.regs(), *LIS);
2523     for (Register r : spiller().getReplacedRegs())
2524       DebugVars->splitRegister(r, LRE.regs(), *LIS);
2525 
2526     if (VerifyEnabled)
2527       MF->verify(this, "After spilling", &errs());
2528   }
2529 
2530   // The live virtual register requesting allocation was spilled, so tell
2531   // the caller not to allocate anything during this round.
2532   return 0;
2533 }
2534 
2535 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2536   using namespace ore;
2537   if (Spills) {
2538     R << NV("NumSpills", Spills) << " spills ";
2539     R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2540   }
2541   if (FoldedSpills) {
2542     R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2543     R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2544       << " total folded spills cost ";
2545   }
2546   if (Reloads) {
2547     R << NV("NumReloads", Reloads) << " reloads ";
2548     R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2549   }
2550   if (FoldedReloads) {
2551     R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2552     R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2553       << " total folded reloads cost ";
2554   }
2555   if (ZeroCostFoldedReloads)
2556     R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2557       << " zero cost folded reloads ";
2558   if (Copies) {
2559     R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2560     R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2561   }
2562 }
2563 
2564 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2565   RAGreedyStats Stats;
2566   const MachineFrameInfo &MFI = MF->getFrameInfo();
2567   int FI;
2568 
2569   auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2570     return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2571         A->getPseudoValue())->getFrameIndex());
2572   };
2573   auto isPatchpointInstr = [](const MachineInstr &MI) {
2574     return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2575            MI.getOpcode() == TargetOpcode::STACKMAP ||
2576            MI.getOpcode() == TargetOpcode::STATEPOINT;
2577   };
2578   for (MachineInstr &MI : MBB) {
2579     auto DestSrc = TII->isCopyInstr(MI);
2580     if (DestSrc) {
2581       const MachineOperand &Dest = *DestSrc->Destination;
2582       const MachineOperand &Src = *DestSrc->Source;
2583       Register SrcReg = Src.getReg();
2584       Register DestReg = Dest.getReg();
2585       // Only count `COPY`s with a virtual register as source or destination.
2586       if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2587         if (SrcReg.isVirtual()) {
2588           SrcReg = VRM->getPhys(SrcReg);
2589           if (SrcReg && Src.getSubReg())
2590             SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2591         }
2592         if (DestReg.isVirtual()) {
2593           DestReg = VRM->getPhys(DestReg);
2594           if (DestReg && Dest.getSubReg())
2595             DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2596         }
2597         if (SrcReg != DestReg)
2598           ++Stats.Copies;
2599       }
2600       continue;
2601     }
2602 
2603     SmallVector<const MachineMemOperand *, 2> Accesses;
2604     if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2605       ++Stats.Reloads;
2606       continue;
2607     }
2608     if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2609       ++Stats.Spills;
2610       continue;
2611     }
2612     if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2613         llvm::any_of(Accesses, isSpillSlotAccess)) {
2614       if (!isPatchpointInstr(MI)) {
2615         Stats.FoldedReloads += Accesses.size();
2616         continue;
2617       }
2618       // For statepoint there may be folded and zero cost folded stack reloads.
2619       std::pair<unsigned, unsigned> NonZeroCostRange =
2620           TII->getPatchpointUnfoldableRange(MI);
2621       SmallSet<unsigned, 16> FoldedReloads;
2622       SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2623       for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2624         MachineOperand &MO = MI.getOperand(Idx);
2625         if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2626           continue;
2627         if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2628           FoldedReloads.insert(MO.getIndex());
2629         else
2630           ZeroCostFoldedReloads.insert(MO.getIndex());
2631       }
2632       // If stack slot is used in folded reload it is not zero cost then.
2633       for (unsigned Slot : FoldedReloads)
2634         ZeroCostFoldedReloads.erase(Slot);
2635       Stats.FoldedReloads += FoldedReloads.size();
2636       Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2637       continue;
2638     }
2639     Accesses.clear();
2640     if (TII->hasStoreToStackSlot(MI, Accesses) &&
2641         llvm::any_of(Accesses, isSpillSlotAccess)) {
2642       Stats.FoldedSpills += Accesses.size();
2643     }
2644   }
2645   // Set cost of collected statistic by multiplication to relative frequency of
2646   // this basic block.
2647   float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2648   Stats.ReloadsCost = RelFreq * Stats.Reloads;
2649   Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2650   Stats.SpillsCost = RelFreq * Stats.Spills;
2651   Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2652   Stats.CopiesCost = RelFreq * Stats.Copies;
2653   return Stats;
2654 }
2655 
2656 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2657   RAGreedyStats Stats;
2658 
2659   // Sum up the spill and reloads in subloops.
2660   for (MachineLoop *SubLoop : *L)
2661     Stats.add(reportStats(SubLoop));
2662 
2663   for (MachineBasicBlock *MBB : L->getBlocks())
2664     // Handle blocks that were not included in subloops.
2665     if (Loops->getLoopFor(MBB) == L)
2666       Stats.add(computeStats(*MBB));
2667 
2668   if (!Stats.isEmpty()) {
2669     using namespace ore;
2670 
2671     ORE->emit([&]() {
2672       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2673                                         L->getStartLoc(), L->getHeader());
2674       Stats.report(R);
2675       R << "generated in loop";
2676       return R;
2677     });
2678   }
2679   return Stats;
2680 }
2681 
2682 void RAGreedy::reportStats() {
2683   if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2684     return;
2685   RAGreedyStats Stats;
2686   for (MachineLoop *L : *Loops)
2687     Stats.add(reportStats(L));
2688   // Process non-loop blocks.
2689   for (MachineBasicBlock &MBB : *MF)
2690     if (!Loops->getLoopFor(&MBB))
2691       Stats.add(computeStats(MBB));
2692   if (!Stats.isEmpty()) {
2693     using namespace ore;
2694 
2695     ORE->emit([&]() {
2696       DebugLoc Loc;
2697       if (auto *SP = MF->getFunction().getSubprogram())
2698         Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2699       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2700                                         &MF->front());
2701       Stats.report(R);
2702       R << "generated in function";
2703       return R;
2704     });
2705   }
2706 }
2707 
2708 bool RAGreedy::hasVirtRegAlloc() {
2709   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2710     Register Reg = Register::index2VirtReg(I);
2711     if (MRI->reg_nodbg_empty(Reg))
2712       continue;
2713     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2714     if (!RC)
2715       continue;
2716     if (shouldAllocateRegister(Reg))
2717       return true;
2718   }
2719 
2720   return false;
2721 }
2722 
2723 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2724   LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2725                     << "********** Function: " << mf.getName() << '\n');
2726 
2727   MF = &mf;
2728   TII = MF->getSubtarget().getInstrInfo();
2729 
2730   if (VerifyEnabled)
2731     MF->verify(this, "Before greedy register allocator", &errs());
2732 
2733   RegAllocBase::init(getAnalysis<VirtRegMapWrapperLegacy>().getVRM(),
2734                      getAnalysis<LiveIntervalsWrapperPass>().getLIS(),
2735                      getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM());
2736 
2737   // Early return if there is no virtual register to be allocated to a
2738   // physical register.
2739   if (!hasVirtRegAlloc())
2740     return false;
2741 
2742   Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
2743   // Renumber to get accurate and consistent results from
2744   // SlotIndexes::getApproxInstrDistance.
2745   Indexes->packIndexes();
2746   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
2747   DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
2748   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2749   Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2750   Bundles = &getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
2751   SpillPlacer = &getAnalysis<SpillPlacementWrapperLegacy>().getResult();
2752   DebugVars = &getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
2753   auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
2754 
2755   initializeCSRCost();
2756 
2757   RegCosts = TRI->getRegisterCosts(*MF);
2758   RegClassPriorityTrumpsGlobalness =
2759       GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2760           ? GreedyRegClassPriorityTrumpsGlobalness
2761           : TRI->regClassPriorityTrumpsGlobalness(*MF);
2762 
2763   ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2764                                ? GreedyReverseLocalAssignment
2765                                : TRI->reverseLocalAssignment();
2766 
2767   ExtraInfo.emplace();
2768   EvictAdvisor =
2769       getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2770   PriorityAdvisor =
2771       getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2772 
2773   VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2774   SpillerInstance.reset(
2775       createInlineSpiller({*LIS, LSS, *DomTree, *MBFI}, *MF, *VRM, *VRAI));
2776 
2777   VRAI->calculateSpillWeightsAndHints();
2778 
2779   LLVM_DEBUG(LIS->dump());
2780 
2781   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2782   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2783 
2784   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2785   GlobalCand.resize(32);  // This will grow as needed.
2786   SetOfBrokenHints.clear();
2787 
2788   allocatePhysRegs();
2789   tryHintsRecoloring();
2790 
2791   if (VerifyEnabled)
2792     MF->verify(this, "Before post optimization", &errs());
2793   postOptimization();
2794   reportStats();
2795 
2796   releaseMemory();
2797   return true;
2798 }
2799