1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AllocationOrder.h"
15 #include "InterferenceCache.h"
16 #include "LiveDebugVariables.h"
17 #include "RegAllocBase.h"
18 #include "SpillPlacement.h"
19 #include "SplitKit.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/IndexedMap.h"
24 #include "llvm/ADT/MapVector.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/CodeGen/CalcSpillWeights.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LiveInterval.h"
36 #include "llvm/CodeGen/LiveIntervalUnion.h"
37 #include "llvm/CodeGen/LiveIntervals.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveRegMatrix.h"
40 #include "llvm/CodeGen/LiveStacks.h"
41 #include "llvm/CodeGen/MachineBasicBlock.h"
42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineFrameInfo.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineLoopInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
51 #include "llvm/CodeGen/MachineRegisterInfo.h"
52 #include "llvm/CodeGen/RegAllocRegistry.h"
53 #include "llvm/CodeGen/RegisterClassInfo.h"
54 #include "llvm/CodeGen/SlotIndexes.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/LLVMContext.h"
62 #include "llvm/MC/MCRegisterInfo.h"
63 #include "llvm/Pass.h"
64 #include "llvm/Support/BlockFrequency.h"
65 #include "llvm/Support/BranchProbability.h"
66 #include "llvm/Support/CommandLine.h"
67 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/MathExtras.h"
69 #include "llvm/Support/Timer.h"
70 #include "llvm/Support/raw_ostream.h"
71 #include "llvm/Target/TargetMachine.h"
72 #include "llvm/IR/DebugInfoMetadata.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <memory>
77 #include <queue>
78 #include <tuple>
79 #include <utility>
80
81 using namespace llvm;
82
83 #define DEBUG_TYPE "regalloc"
84
85 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
86 STATISTIC(NumLocalSplits, "Number of split local live ranges");
87 STATISTIC(NumEvicted, "Number of interferences evicted");
88
89 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
90 "split-spill-mode", cl::Hidden,
91 cl::desc("Spill mode for splitting live ranges"),
92 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
93 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
94 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
95 cl::init(SplitEditor::SM_Speed));
96
97 static cl::opt<unsigned>
98 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
99 cl::desc("Last chance recoloring max depth"),
100 cl::init(5));
101
102 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
103 "lcr-max-interf", cl::Hidden,
104 cl::desc("Last chance recoloring maximum number of considered"
105 " interference at a time"),
106 cl::init(8));
107
108 static cl::opt<bool> ExhaustiveSearch(
109 "exhaustive-register-search", cl::NotHidden,
110 cl::desc("Exhaustive Search for registers bypassing the depth "
111 "and interference cutoffs of last chance recoloring"),
112 cl::Hidden);
113
114 static cl::opt<bool> EnableLocalReassignment(
115 "enable-local-reassign", cl::Hidden,
116 cl::desc("Local reassignment can yield better allocation decisions, but "
117 "may be compile time intensive"),
118 cl::init(false));
119
120 static cl::opt<bool> EnableDeferredSpilling(
121 "enable-deferred-spilling", cl::Hidden,
122 cl::desc("Instead of spilling a variable right away, defer the actual "
123 "code insertion to the end of the allocation. That way the "
124 "allocator might still find a suitable coloring for this "
125 "variable because of other evicted variables."),
126 cl::init(false));
127
128 // FIXME: Find a good default for this flag and remove the flag.
129 static cl::opt<unsigned>
130 CSRFirstTimeCost("regalloc-csr-first-time-cost",
131 cl::desc("Cost for first time use of callee-saved register."),
132 cl::init(0), cl::Hidden);
133
134 static cl::opt<bool> ConsiderLocalIntervalCost(
135 "consider-local-interval-cost", cl::Hidden,
136 cl::desc("Consider the cost of local intervals created by a split "
137 "candidate when choosing the best split candidate."),
138 cl::init(false));
139
140 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
141 createGreedyRegisterAllocator);
142
143 namespace {
144
145 class RAGreedy : public MachineFunctionPass,
146 public RegAllocBase,
147 private LiveRangeEdit::Delegate {
148 // Convenient shortcuts.
149 using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
150 using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
151 using SmallVirtRegSet = SmallSet<Register, 16>;
152
153 // context
154 MachineFunction *MF;
155
156 // Shortcuts to some useful interface.
157 const TargetInstrInfo *TII;
158 const TargetRegisterInfo *TRI;
159 RegisterClassInfo RCI;
160
161 // analyses
162 SlotIndexes *Indexes;
163 MachineBlockFrequencyInfo *MBFI;
164 MachineDominatorTree *DomTree;
165 MachineLoopInfo *Loops;
166 MachineOptimizationRemarkEmitter *ORE;
167 EdgeBundles *Bundles;
168 SpillPlacement *SpillPlacer;
169 LiveDebugVariables *DebugVars;
170 AliasAnalysis *AA;
171
172 // state
173 std::unique_ptr<Spiller> SpillerInstance;
174 PQueue Queue;
175 unsigned NextCascade;
176 std::unique_ptr<VirtRegAuxInfo> VRAI;
177
178 // Live ranges pass through a number of stages as we try to allocate them.
179 // Some of the stages may also create new live ranges:
180 //
181 // - Region splitting.
182 // - Per-block splitting.
183 // - Local splitting.
184 // - Spilling.
185 //
186 // Ranges produced by one of the stages skip the previous stages when they are
187 // dequeued. This improves performance because we can skip interference checks
188 // that are unlikely to give any results. It also guarantees that the live
189 // range splitting algorithm terminates, something that is otherwise hard to
190 // ensure.
191 enum LiveRangeStage {
192 /// Newly created live range that has never been queued.
193 RS_New,
194
195 /// Only attempt assignment and eviction. Then requeue as RS_Split.
196 RS_Assign,
197
198 /// Attempt live range splitting if assignment is impossible.
199 RS_Split,
200
201 /// Attempt more aggressive live range splitting that is guaranteed to make
202 /// progress. This is used for split products that may not be making
203 /// progress.
204 RS_Split2,
205
206 /// Live range will be spilled. No more splitting will be attempted.
207 RS_Spill,
208
209
210 /// Live range is in memory. Because of other evictions, it might get moved
211 /// in a register in the end.
212 RS_Memory,
213
214 /// There is nothing more we can do to this live range. Abort compilation
215 /// if it can't be assigned.
216 RS_Done
217 };
218
219 // Enum CutOffStage to keep a track whether the register allocation failed
220 // because of the cutoffs encountered in last chance recoloring.
221 // Note: This is used as bitmask. New value should be next power of 2.
222 enum CutOffStage {
223 // No cutoffs encountered
224 CO_None = 0,
225
226 // lcr-max-depth cutoff encountered
227 CO_Depth = 1,
228
229 // lcr-max-interf cutoff encountered
230 CO_Interf = 2
231 };
232
233 uint8_t CutOffInfo;
234
235 #ifndef NDEBUG
236 static const char *const StageName[];
237 #endif
238
239 // RegInfo - Keep additional information about each live range.
240 struct RegInfo {
241 LiveRangeStage Stage = RS_New;
242
243 // Cascade - Eviction loop prevention. See canEvictInterference().
244 unsigned Cascade = 0;
245
246 RegInfo() = default;
247 };
248
249 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
250
getStage(const LiveInterval & VirtReg) const251 LiveRangeStage getStage(const LiveInterval &VirtReg) const {
252 return ExtraRegInfo[VirtReg.reg()].Stage;
253 }
254
setStage(const LiveInterval & VirtReg,LiveRangeStage Stage)255 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
256 ExtraRegInfo.resize(MRI->getNumVirtRegs());
257 ExtraRegInfo[VirtReg.reg()].Stage = Stage;
258 }
259
260 template<typename Iterator>
setStage(Iterator Begin,Iterator End,LiveRangeStage NewStage)261 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
262 ExtraRegInfo.resize(MRI->getNumVirtRegs());
263 for (;Begin != End; ++Begin) {
264 Register Reg = *Begin;
265 if (ExtraRegInfo[Reg].Stage == RS_New)
266 ExtraRegInfo[Reg].Stage = NewStage;
267 }
268 }
269
270 /// Cost of evicting interference.
271 struct EvictionCost {
272 unsigned BrokenHints = 0; ///< Total number of broken hints.
273 float MaxWeight = 0; ///< Maximum spill weight evicted.
274
275 EvictionCost() = default;
276
isMax__anone1e285680111::RAGreedy::EvictionCost277 bool isMax() const { return BrokenHints == ~0u; }
278
setMax__anone1e285680111::RAGreedy::EvictionCost279 void setMax() { BrokenHints = ~0u; }
280
setBrokenHints__anone1e285680111::RAGreedy::EvictionCost281 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
282
operator <__anone1e285680111::RAGreedy::EvictionCost283 bool operator<(const EvictionCost &O) const {
284 return std::tie(BrokenHints, MaxWeight) <
285 std::tie(O.BrokenHints, O.MaxWeight);
286 }
287 };
288
289 /// EvictionTrack - Keeps track of past evictions in order to optimize region
290 /// split decision.
291 class EvictionTrack {
292
293 public:
294 using EvictorInfo =
295 std::pair<Register /* evictor */, MCRegister /* physreg */>;
296 using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>;
297
298 private:
299 /// Each Vreg that has been evicted in the last stage of selectOrSplit will
300 /// be mapped to the evictor Vreg and the PhysReg it was evicted from.
301 EvicteeInfo Evictees;
302
303 public:
304 /// Clear all eviction information.
clear()305 void clear() { Evictees.clear(); }
306
307 /// Clear eviction information for the given evictee Vreg.
308 /// E.g. when Vreg get's a new allocation, the old eviction info is no
309 /// longer relevant.
310 /// \param Evictee The evictee Vreg for whom we want to clear collected
311 /// eviction info.
clearEvicteeInfo(Register Evictee)312 void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); }
313
314 /// Track new eviction.
315 /// The Evictor vreg has evicted the Evictee vreg from Physreg.
316 /// \param PhysReg The physical register Evictee was evicted from.
317 /// \param Evictor The evictor Vreg that evicted Evictee.
318 /// \param Evictee The evictee Vreg.
addEviction(MCRegister PhysReg,Register Evictor,Register Evictee)319 void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) {
320 Evictees[Evictee].first = Evictor;
321 Evictees[Evictee].second = PhysReg;
322 }
323
324 /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
325 /// \param Evictee The evictee vreg.
326 /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
327 /// nobody has evicted Evictee from PhysReg.
getEvictor(Register Evictee)328 EvictorInfo getEvictor(Register Evictee) {
329 if (Evictees.count(Evictee)) {
330 return Evictees[Evictee];
331 }
332
333 return EvictorInfo(0, 0);
334 }
335 };
336
337 // Keeps track of past evictions in order to optimize region split decision.
338 EvictionTrack LastEvicted;
339
340 // splitting state.
341 std::unique_ptr<SplitAnalysis> SA;
342 std::unique_ptr<SplitEditor> SE;
343
344 /// Cached per-block interference maps
345 InterferenceCache IntfCache;
346
347 /// All basic blocks where the current register has uses.
348 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
349
350 /// Global live range splitting candidate info.
351 struct GlobalSplitCandidate {
352 // Register intended for assignment, or 0.
353 MCRegister PhysReg;
354
355 // SplitKit interval index for this candidate.
356 unsigned IntvIdx;
357
358 // Interference for PhysReg.
359 InterferenceCache::Cursor Intf;
360
361 // Bundles where this candidate should be live.
362 BitVector LiveBundles;
363 SmallVector<unsigned, 8> ActiveBlocks;
364
reset__anone1e285680111::RAGreedy::GlobalSplitCandidate365 void reset(InterferenceCache &Cache, MCRegister Reg) {
366 PhysReg = Reg;
367 IntvIdx = 0;
368 Intf.setPhysReg(Cache, Reg);
369 LiveBundles.clear();
370 ActiveBlocks.clear();
371 }
372
373 // Set B[I] = C for every live bundle where B[I] was NoCand.
getBundles__anone1e285680111::RAGreedy::GlobalSplitCandidate374 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
375 unsigned Count = 0;
376 for (unsigned I : LiveBundles.set_bits())
377 if (B[I] == NoCand) {
378 B[I] = C;
379 Count++;
380 }
381 return Count;
382 }
383 };
384
385 /// Candidate info for each PhysReg in AllocationOrder.
386 /// This vector never shrinks, but grows to the size of the largest register
387 /// class.
388 SmallVector<GlobalSplitCandidate, 32> GlobalCand;
389
390 enum : unsigned { NoCand = ~0u };
391
392 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
393 /// NoCand which indicates the stack interval.
394 SmallVector<unsigned, 32> BundleCand;
395
396 /// Callee-save register cost, calculated once per machine function.
397 BlockFrequency CSRCost;
398
399 /// Run or not the local reassignment heuristic. This information is
400 /// obtained from the TargetSubtargetInfo.
401 bool EnableLocalReassign;
402
403 /// Enable or not the consideration of the cost of local intervals created
404 /// by a split candidate when choosing the best split candidate.
405 bool EnableAdvancedRASplitCost;
406
407 /// Set of broken hints that may be reconciled later because of eviction.
408 SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
409
410 /// The register cost values. This list will be recreated for each Machine
411 /// Function
412 ArrayRef<uint8_t> RegCosts;
413
414 public:
415 RAGreedy();
416
417 /// Return the pass name.
getPassName() const418 StringRef getPassName() const override { return "Greedy Register Allocator"; }
419
420 /// RAGreedy analysis usage.
421 void getAnalysisUsage(AnalysisUsage &AU) const override;
422 void releaseMemory() override;
spiller()423 Spiller &spiller() override { return *SpillerInstance; }
424 void enqueue(LiveInterval *LI) override;
425 LiveInterval *dequeue() override;
426 MCRegister selectOrSplit(LiveInterval &,
427 SmallVectorImpl<Register> &) override;
428 void aboutToRemoveInterval(LiveInterval &) override;
429
430 /// Perform register allocation.
431 bool runOnMachineFunction(MachineFunction &mf) override;
432
getRequiredProperties() const433 MachineFunctionProperties getRequiredProperties() const override {
434 return MachineFunctionProperties().set(
435 MachineFunctionProperties::Property::NoPHIs);
436 }
437
getClearedProperties() const438 MachineFunctionProperties getClearedProperties() const override {
439 return MachineFunctionProperties().set(
440 MachineFunctionProperties::Property::IsSSA);
441 }
442
443 static char ID;
444
445 private:
446 MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
447 SmallVirtRegSet &, unsigned = 0);
448
449 bool LRE_CanEraseVirtReg(Register) override;
450 void LRE_WillShrinkVirtReg(Register) override;
451 void LRE_DidCloneVirtReg(Register, Register) override;
452 void enqueue(PQueue &CurQueue, LiveInterval *LI);
453 LiveInterval *dequeue(PQueue &CurQueue);
454
455 BlockFrequency calcSpillCost();
456 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
457 bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
458 bool growRegion(GlobalSplitCandidate &Cand);
459 bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand,
460 unsigned BBNumber,
461 const AllocationOrder &Order);
462 bool splitCanCauseLocalSpill(unsigned VirtRegToSplit,
463 GlobalSplitCandidate &Cand, unsigned BBNumber,
464 const AllocationOrder &Order);
465 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
466 const AllocationOrder &Order,
467 bool *CanCauseEvictionChain);
468 bool calcCompactRegion(GlobalSplitCandidate&);
469 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
470 void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
471 Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
472 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
473 bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
474 const SmallVirtRegSet &) const;
475 bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
476 MCRegister PhysReg, SlotIndex Start,
477 SlotIndex End, EvictionCost &MaxCost) const;
478 MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
479 const LiveInterval &VirtReg,
480 SlotIndex Start, SlotIndex End,
481 float *BestEvictWeight) const;
482 void evictInterference(LiveInterval &, MCRegister,
483 SmallVectorImpl<Register> &);
484 bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
485 SmallLISet &RecoloringCandidates,
486 const SmallVirtRegSet &FixedRegisters);
487
488 MCRegister tryAssign(LiveInterval&, AllocationOrder&,
489 SmallVectorImpl<Register>&,
490 const SmallVirtRegSet&);
491 MCRegister tryEvict(LiveInterval &, AllocationOrder &,
492 SmallVectorImpl<Register> &, uint8_t,
493 const SmallVirtRegSet &);
494 MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &,
495 SmallVectorImpl<Register> &);
496 /// Calculate cost of region splitting.
497 unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
498 AllocationOrder &Order,
499 BlockFrequency &BestCost,
500 unsigned &NumCands, bool IgnoreCSR,
501 bool *CanCauseEvictionChain = nullptr);
502 /// Perform region splitting.
503 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
504 bool HasCompact,
505 SmallVectorImpl<Register> &NewVRegs);
506 /// Check other options before using a callee-saved register for the first
507 /// time.
508 MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg,
509 AllocationOrder &Order, MCRegister PhysReg,
510 uint8_t &CostPerUseLimit,
511 SmallVectorImpl<Register> &NewVRegs);
512 void initializeCSRCost();
513 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
514 SmallVectorImpl<Register>&);
515 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
516 SmallVectorImpl<Register>&);
517 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
518 SmallVectorImpl<Register>&);
519 unsigned trySplit(LiveInterval&, AllocationOrder&,
520 SmallVectorImpl<Register>&,
521 const SmallVirtRegSet&);
522 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
523 SmallVectorImpl<Register> &,
524 SmallVirtRegSet &, unsigned);
525 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &,
526 SmallVirtRegSet &, unsigned);
527 void tryHintRecoloring(LiveInterval &);
528 void tryHintsRecoloring();
529
530 /// Model the information carried by one end of a copy.
531 struct HintInfo {
532 /// The frequency of the copy.
533 BlockFrequency Freq;
534 /// The virtual register or physical register.
535 Register Reg;
536 /// Its currently assigned register.
537 /// In case of a physical register Reg == PhysReg.
538 MCRegister PhysReg;
539
HintInfo__anone1e285680111::RAGreedy::HintInfo540 HintInfo(BlockFrequency Freq, Register Reg, MCRegister PhysReg)
541 : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
542 };
543 using HintsInfo = SmallVector<HintInfo, 4>;
544
545 BlockFrequency getBrokenHintFreq(const HintsInfo &, MCRegister);
546 void collectHintInfo(Register, HintsInfo &);
547
548 bool isUnusedCalleeSavedReg(MCRegister PhysReg) const;
549
550 /// Greedy RA statistic to remark.
551 struct RAGreedyStats {
552 unsigned Reloads = 0;
553 unsigned FoldedReloads = 0;
554 unsigned ZeroCostFoldedReloads = 0;
555 unsigned Spills = 0;
556 unsigned FoldedSpills = 0;
557 unsigned Copies = 0;
558 float ReloadsCost = 0.0f;
559 float FoldedReloadsCost = 0.0f;
560 float SpillsCost = 0.0f;
561 float FoldedSpillsCost = 0.0f;
562 float CopiesCost = 0.0f;
563
isEmpty__anone1e285680111::RAGreedy::RAGreedyStats564 bool isEmpty() {
565 return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
566 ZeroCostFoldedReloads || Copies);
567 }
568
add__anone1e285680111::RAGreedy::RAGreedyStats569 void add(RAGreedyStats other) {
570 Reloads += other.Reloads;
571 FoldedReloads += other.FoldedReloads;
572 ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
573 Spills += other.Spills;
574 FoldedSpills += other.FoldedSpills;
575 Copies += other.Copies;
576 ReloadsCost += other.ReloadsCost;
577 FoldedReloadsCost += other.FoldedReloadsCost;
578 SpillsCost += other.SpillsCost;
579 FoldedSpillsCost += other.FoldedSpillsCost;
580 CopiesCost += other.CopiesCost;
581 }
582
583 void report(MachineOptimizationRemarkMissed &R);
584 };
585
586 /// Compute statistic for a basic block.
587 RAGreedyStats computeStats(MachineBasicBlock &MBB);
588
589 /// Compute and report statistic through a remark.
590 RAGreedyStats reportStats(MachineLoop *L);
591
592 /// Report the statistic for each loop.
593 void reportStats();
594 };
595
596 } // end anonymous namespace
597
598 char RAGreedy::ID = 0;
599 char &llvm::RAGreedyID = RAGreedy::ID;
600
601 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
602 "Greedy Register Allocator", false, false)
603 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
604 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
605 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
606 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
607 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
608 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
609 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
610 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
611 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
612 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
613 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
614 INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
615 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
616 INITIALIZE_PASS_END(RAGreedy, "greedy",
617 "Greedy Register Allocator", false, false)
618
619 #ifndef NDEBUG
620 const char *const RAGreedy::StageName[] = {
621 "RS_New",
622 "RS_Assign",
623 "RS_Split",
624 "RS_Split2",
625 "RS_Spill",
626 "RS_Memory",
627 "RS_Done"
628 };
629 #endif
630
631 // Hysteresis to use when comparing floats.
632 // This helps stabilize decisions based on float comparisons.
633 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
634
createGreedyRegisterAllocator()635 FunctionPass* llvm::createGreedyRegisterAllocator() {
636 return new RAGreedy();
637 }
638
RAGreedy()639 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
640 }
641
getAnalysisUsage(AnalysisUsage & AU) const642 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
643 AU.setPreservesCFG();
644 AU.addRequired<MachineBlockFrequencyInfo>();
645 AU.addPreserved<MachineBlockFrequencyInfo>();
646 AU.addRequired<AAResultsWrapperPass>();
647 AU.addPreserved<AAResultsWrapperPass>();
648 AU.addRequired<LiveIntervals>();
649 AU.addPreserved<LiveIntervals>();
650 AU.addRequired<SlotIndexes>();
651 AU.addPreserved<SlotIndexes>();
652 AU.addRequired<LiveDebugVariables>();
653 AU.addPreserved<LiveDebugVariables>();
654 AU.addRequired<LiveStacks>();
655 AU.addPreserved<LiveStacks>();
656 AU.addRequired<MachineDominatorTree>();
657 AU.addPreserved<MachineDominatorTree>();
658 AU.addRequired<MachineLoopInfo>();
659 AU.addPreserved<MachineLoopInfo>();
660 AU.addRequired<VirtRegMap>();
661 AU.addPreserved<VirtRegMap>();
662 AU.addRequired<LiveRegMatrix>();
663 AU.addPreserved<LiveRegMatrix>();
664 AU.addRequired<EdgeBundles>();
665 AU.addRequired<SpillPlacement>();
666 AU.addRequired<MachineOptimizationRemarkEmitterPass>();
667 MachineFunctionPass::getAnalysisUsage(AU);
668 }
669
670 //===----------------------------------------------------------------------===//
671 // LiveRangeEdit delegate methods
672 //===----------------------------------------------------------------------===//
673
LRE_CanEraseVirtReg(Register VirtReg)674 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
675 LiveInterval &LI = LIS->getInterval(VirtReg);
676 if (VRM->hasPhys(VirtReg)) {
677 Matrix->unassign(LI);
678 aboutToRemoveInterval(LI);
679 return true;
680 }
681 // Unassigned virtreg is probably in the priority queue.
682 // RegAllocBase will erase it after dequeueing.
683 // Nonetheless, clear the live-range so that the debug
684 // dump will show the right state for that VirtReg.
685 LI.clear();
686 return false;
687 }
688
LRE_WillShrinkVirtReg(Register VirtReg)689 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
690 if (!VRM->hasPhys(VirtReg))
691 return;
692
693 // Register is assigned, put it back on the queue for reassignment.
694 LiveInterval &LI = LIS->getInterval(VirtReg);
695 Matrix->unassign(LI);
696 enqueue(&LI);
697 }
698
LRE_DidCloneVirtReg(Register New,Register Old)699 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
700 // Cloning a register we haven't even heard about yet? Just ignore it.
701 if (!ExtraRegInfo.inBounds(Old))
702 return;
703
704 // LRE may clone a virtual register because dead code elimination causes it to
705 // be split into connected components. The new components are much smaller
706 // than the original, so they should get a new chance at being assigned.
707 // same stage as the parent.
708 ExtraRegInfo[Old].Stage = RS_Assign;
709 ExtraRegInfo.grow(New);
710 ExtraRegInfo[New] = ExtraRegInfo[Old];
711 }
712
releaseMemory()713 void RAGreedy::releaseMemory() {
714 SpillerInstance.reset();
715 ExtraRegInfo.clear();
716 GlobalCand.clear();
717 }
718
enqueue(LiveInterval * LI)719 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
720
enqueue(PQueue & CurQueue,LiveInterval * LI)721 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
722 // Prioritize live ranges by size, assigning larger ranges first.
723 // The queue holds (size, reg) pairs.
724 const unsigned Size = LI->getSize();
725 const Register Reg = LI->reg();
726 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
727 unsigned Prio;
728
729 ExtraRegInfo.grow(Reg);
730 if (ExtraRegInfo[Reg].Stage == RS_New)
731 ExtraRegInfo[Reg].Stage = RS_Assign;
732
733 if (ExtraRegInfo[Reg].Stage == RS_Split) {
734 // Unsplit ranges that couldn't be allocated immediately are deferred until
735 // everything else has been allocated.
736 Prio = Size;
737 } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
738 // Memory operand should be considered last.
739 // Change the priority such that Memory operand are assigned in
740 // the reverse order that they came in.
741 // TODO: Make this a member variable and probably do something about hints.
742 static unsigned MemOp = 0;
743 Prio = MemOp++;
744 } else {
745 // Giant live ranges fall back to the global assignment heuristic, which
746 // prevents excessive spilling in pathological cases.
747 bool ReverseLocal = TRI->reverseLocalAssignment();
748 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
749 bool ForceGlobal = !ReverseLocal &&
750 (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
751
752 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
753 LIS->intervalIsInOneMBB(*LI)) {
754 // Allocate original local ranges in linear instruction order. Since they
755 // are singly defined, this produces optimal coloring in the absence of
756 // global interference and other constraints.
757 if (!ReverseLocal)
758 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
759 else {
760 // Allocating bottom up may allow many short LRGs to be assigned first
761 // to one of the cheap registers. This could be much faster for very
762 // large blocks on targets with many physical registers.
763 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
764 }
765 Prio |= RC.AllocationPriority << 24;
766 } else {
767 // Allocate global and split ranges in long->short order. Long ranges that
768 // don't fit should be spilled (or split) ASAP so they don't create
769 // interference. Mark a bit to prioritize global above local ranges.
770 Prio = (1u << 29) + Size;
771 }
772 // Mark a higher bit to prioritize global and local above RS_Split.
773 Prio |= (1u << 31);
774
775 // Boost ranges that have a physical register hint.
776 if (VRM->hasKnownPreference(Reg))
777 Prio |= (1u << 30);
778 }
779 // The virtual register number is a tie breaker for same-sized ranges.
780 // Give lower vreg numbers higher priority to assign them first.
781 CurQueue.push(std::make_pair(Prio, ~Reg));
782 }
783
dequeue()784 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
785
dequeue(PQueue & CurQueue)786 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
787 if (CurQueue.empty())
788 return nullptr;
789 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
790 CurQueue.pop();
791 return LI;
792 }
793
794 //===----------------------------------------------------------------------===//
795 // Direct Assignment
796 //===----------------------------------------------------------------------===//
797
798 /// tryAssign - Try to assign VirtReg to an available register.
tryAssign(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)799 MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
800 AllocationOrder &Order,
801 SmallVectorImpl<Register> &NewVRegs,
802 const SmallVirtRegSet &FixedRegisters) {
803 MCRegister PhysReg;
804 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
805 assert(*I);
806 if (!Matrix->checkInterference(VirtReg, *I)) {
807 if (I.isHint())
808 return *I;
809 else
810 PhysReg = *I;
811 }
812 }
813 if (!PhysReg.isValid())
814 return PhysReg;
815
816 // PhysReg is available, but there may be a better choice.
817
818 // If we missed a simple hint, try to cheaply evict interference from the
819 // preferred register.
820 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
821 if (Order.isHint(Hint)) {
822 MCRegister PhysHint = Hint.asMCReg();
823 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
824 EvictionCost MaxCost;
825 MaxCost.setBrokenHints(1);
826 if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
827 FixedRegisters)) {
828 evictInterference(VirtReg, PhysHint, NewVRegs);
829 return PhysHint;
830 }
831 // Record the missed hint, we may be able to recover
832 // at the end if the surrounding allocation changed.
833 SetOfBrokenHints.insert(&VirtReg);
834 }
835
836 // Try to evict interference from a cheaper alternative.
837 uint8_t Cost = RegCosts[PhysReg];
838
839 // Most registers have 0 additional cost.
840 if (!Cost)
841 return PhysReg;
842
843 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
844 << Cost << '\n');
845 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
846 return CheapReg ? CheapReg : PhysReg;
847 }
848
849 //===----------------------------------------------------------------------===//
850 // Interference eviction
851 //===----------------------------------------------------------------------===//
852
canReassign(LiveInterval & VirtReg,Register PrevReg) const853 Register RAGreedy::canReassign(LiveInterval &VirtReg, Register PrevReg) const {
854 auto Order =
855 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
856 MCRegister PhysReg;
857 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
858 if ((*I).id() == PrevReg.id())
859 continue;
860
861 MCRegUnitIterator Units(*I, TRI);
862 for (; Units.isValid(); ++Units) {
863 // Instantiate a "subquery", not to be confused with the Queries array.
864 LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
865 if (subQ.checkInterference())
866 break;
867 }
868 // If no units have interference, break out with the current PhysReg.
869 if (!Units.isValid())
870 PhysReg = *I;
871 }
872 if (PhysReg)
873 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
874 << printReg(PrevReg, TRI) << " to "
875 << printReg(PhysReg, TRI) << '\n');
876 return PhysReg;
877 }
878
879 /// shouldEvict - determine if A should evict the assigned live range B. The
880 /// eviction policy defined by this function together with the allocation order
881 /// defined by enqueue() decides which registers ultimately end up being split
882 /// and spilled.
883 ///
884 /// Cascade numbers are used to prevent infinite loops if this function is a
885 /// cyclic relation.
886 ///
887 /// @param A The live range to be assigned.
888 /// @param IsHint True when A is about to be assigned to its preferred
889 /// register.
890 /// @param B The live range to be evicted.
891 /// @param BreaksHint True when B is already assigned to its preferred register.
shouldEvict(LiveInterval & A,bool IsHint,LiveInterval & B,bool BreaksHint) const892 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
893 LiveInterval &B, bool BreaksHint) const {
894 bool CanSplit = getStage(B) < RS_Spill;
895
896 // Be fairly aggressive about following hints as long as the evictee can be
897 // split.
898 if (CanSplit && IsHint && !BreaksHint)
899 return true;
900
901 if (A.weight() > B.weight()) {
902 LLVM_DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight() << '\n');
903 return true;
904 }
905 return false;
906 }
907
908 /// canEvictInterference - Return true if all interferences between VirtReg and
909 /// PhysReg can be evicted.
910 ///
911 /// @param VirtReg Live range that is about to be assigned.
912 /// @param PhysReg Desired register for assignment.
913 /// @param IsHint True when PhysReg is VirtReg's preferred register.
914 /// @param MaxCost Only look for cheaper candidates and update with new cost
915 /// when returning true.
916 /// @returns True when interference can be evicted cheaper than MaxCost.
canEvictInterference(LiveInterval & VirtReg,MCRegister PhysReg,bool IsHint,EvictionCost & MaxCost,const SmallVirtRegSet & FixedRegisters) const917 bool RAGreedy::canEvictInterference(
918 LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
919 EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
920 // It is only possible to evict virtual register interference.
921 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
922 return false;
923
924 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
925
926 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
927 // involved in an eviction before. If a cascade number was assigned, deny
928 // evicting anything with the same or a newer cascade number. This prevents
929 // infinite eviction loops.
930 //
931 // This works out so a register without a cascade number is allowed to evict
932 // anything, and it can be evicted by anything.
933 unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
934 if (!Cascade)
935 Cascade = NextCascade;
936
937 EvictionCost Cost;
938 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
939 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
940 // If there is 10 or more interferences, chances are one is heavier.
941 if (Q.collectInterferingVRegs(10) >= 10)
942 return false;
943
944 // Check if any interfering live range is heavier than MaxWeight.
945 for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
946 assert(Register::isVirtualRegister(Intf->reg()) &&
947 "Only expecting virtual register interference from query");
948
949 // Do not allow eviction of a virtual register if we are in the middle
950 // of last-chance recoloring and this virtual register is one that we
951 // have scavenged a physical register for.
952 if (FixedRegisters.count(Intf->reg()))
953 return false;
954
955 // Never evict spill products. They cannot split or spill.
956 if (getStage(*Intf) == RS_Done)
957 return false;
958 // Once a live range becomes small enough, it is urgent that we find a
959 // register for it. This is indicated by an infinite spill weight. These
960 // urgent live ranges get to evict almost anything.
961 //
962 // Also allow urgent evictions of unspillable ranges from a strictly
963 // larger allocation order.
964 bool Urgent =
965 !VirtReg.isSpillable() &&
966 (Intf->isSpillable() ||
967 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg())) <
968 RegClassInfo.getNumAllocatableRegs(
969 MRI->getRegClass(Intf->reg())));
970 // Only evict older cascades or live ranges without a cascade.
971 unsigned IntfCascade = ExtraRegInfo[Intf->reg()].Cascade;
972 if (Cascade <= IntfCascade) {
973 if (!Urgent)
974 return false;
975 // We permit breaking cascades for urgent evictions. It should be the
976 // last resort, though, so make it really expensive.
977 Cost.BrokenHints += 10;
978 }
979 // Would this break a satisfied hint?
980 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
981 // Update eviction cost.
982 Cost.BrokenHints += BreaksHint;
983 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
984 // Abort if this would be too expensive.
985 if (!(Cost < MaxCost))
986 return false;
987 if (Urgent)
988 continue;
989 // Apply the eviction policy for non-urgent evictions.
990 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
991 return false;
992 // If !MaxCost.isMax(), then we're just looking for a cheap register.
993 // Evicting another local live range in this case could lead to suboptimal
994 // coloring.
995 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
996 (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
997 return false;
998 }
999 }
1000 }
1001 MaxCost = Cost;
1002 return true;
1003 }
1004
1005 /// Return true if all interferences between VirtReg and PhysReg between
1006 /// Start and End can be evicted.
1007 ///
1008 /// \param VirtReg Live range that is about to be assigned.
1009 /// \param PhysReg Desired register for assignment.
1010 /// \param Start Start of range to look for interferences.
1011 /// \param End End of range to look for interferences.
1012 /// \param MaxCost Only look for cheaper candidates and update with new cost
1013 /// when returning true.
1014 /// \return True when interference can be evicted cheaper than MaxCost.
canEvictInterferenceInRange(const LiveInterval & VirtReg,MCRegister PhysReg,SlotIndex Start,SlotIndex End,EvictionCost & MaxCost) const1015 bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
1016 MCRegister PhysReg, SlotIndex Start,
1017 SlotIndex End,
1018 EvictionCost &MaxCost) const {
1019 EvictionCost Cost;
1020
1021 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1022 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1023 Q.collectInterferingVRegs();
1024
1025 // Check if any interfering live range is heavier than MaxWeight.
1026 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1027 // Check if interference overlast the segment in interest.
1028 if (!Intf->overlaps(Start, End))
1029 continue;
1030
1031 // Cannot evict non virtual reg interference.
1032 if (!Register::isVirtualRegister(Intf->reg()))
1033 return false;
1034 // Never evict spill products. They cannot split or spill.
1035 if (getStage(*Intf) == RS_Done)
1036 return false;
1037
1038 // Would this break a satisfied hint?
1039 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg());
1040 // Update eviction cost.
1041 Cost.BrokenHints += BreaksHint;
1042 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight());
1043 // Abort if this would be too expensive.
1044 if (!(Cost < MaxCost))
1045 return false;
1046 }
1047 }
1048
1049 if (Cost.MaxWeight == 0)
1050 return false;
1051
1052 MaxCost = Cost;
1053 return true;
1054 }
1055
1056 /// Return the physical register that will be best
1057 /// candidate for eviction by a local split interval that will be created
1058 /// between Start and End.
1059 ///
1060 /// \param Order The allocation order
1061 /// \param VirtReg Live range that is about to be assigned.
1062 /// \param Start Start of range to look for interferences
1063 /// \param End End of range to look for interferences
1064 /// \param BestEvictweight The eviction cost of that eviction
1065 /// \return The PhysReg which is the best candidate for eviction and the
1066 /// eviction cost in BestEvictweight
getCheapestEvicteeWeight(const AllocationOrder & Order,const LiveInterval & VirtReg,SlotIndex Start,SlotIndex End,float * BestEvictweight) const1067 MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
1068 const LiveInterval &VirtReg,
1069 SlotIndex Start, SlotIndex End,
1070 float *BestEvictweight) const {
1071 EvictionCost BestEvictCost;
1072 BestEvictCost.setMax();
1073 BestEvictCost.MaxWeight = VirtReg.weight();
1074 MCRegister BestEvicteePhys;
1075
1076 // Go over all physical registers and find the best candidate for eviction
1077 for (MCRegister PhysReg : Order.getOrder()) {
1078
1079 if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
1080 BestEvictCost))
1081 continue;
1082
1083 // Best so far.
1084 BestEvicteePhys = PhysReg;
1085 }
1086 *BestEvictweight = BestEvictCost.MaxWeight;
1087 return BestEvicteePhys;
1088 }
1089
1090 /// evictInterference - Evict any interferring registers that prevent VirtReg
1091 /// from being assigned to Physreg. This assumes that canEvictInterference
1092 /// returned true.
evictInterference(LiveInterval & VirtReg,MCRegister PhysReg,SmallVectorImpl<Register> & NewVRegs)1093 void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
1094 SmallVectorImpl<Register> &NewVRegs) {
1095 // Make sure that VirtReg has a cascade number, and assign that cascade
1096 // number to every evicted register. These live ranges than then only be
1097 // evicted by a newer cascade, preventing infinite loops.
1098 unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
1099 if (!Cascade)
1100 Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
1101
1102 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
1103 << " interference: Cascade " << Cascade << '\n');
1104
1105 // Collect all interfering virtregs first.
1106 SmallVector<LiveInterval*, 8> Intfs;
1107 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1108 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1109 // We usually have the interfering VRegs cached so collectInterferingVRegs()
1110 // should be fast, we may need to recalculate if when different physregs
1111 // overlap the same register unit so we had different SubRanges queried
1112 // against it.
1113 Q.collectInterferingVRegs();
1114 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
1115 Intfs.append(IVR.begin(), IVR.end());
1116 }
1117
1118 // Evict them second. This will invalidate the queries.
1119 for (LiveInterval *Intf : Intfs) {
1120 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
1121 if (!VRM->hasPhys(Intf->reg()))
1122 continue;
1123
1124 LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
1125
1126 Matrix->unassign(*Intf);
1127 assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
1128 VirtReg.isSpillable() < Intf->isSpillable()) &&
1129 "Cannot decrease cascade number, illegal eviction");
1130 ExtraRegInfo[Intf->reg()].Cascade = Cascade;
1131 ++NumEvicted;
1132 NewVRegs.push_back(Intf->reg());
1133 }
1134 }
1135
1136 /// Returns true if the given \p PhysReg is a callee saved register and has not
1137 /// been used for allocation yet.
isUnusedCalleeSavedReg(MCRegister PhysReg) const1138 bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
1139 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
1140 if (!CSR)
1141 return false;
1142
1143 return !Matrix->isPhysRegUsed(PhysReg);
1144 }
1145
1146 /// tryEvict - Try to evict all interferences for a physreg.
1147 /// @param VirtReg Currently unassigned virtual register.
1148 /// @param Order Physregs to try.
1149 /// @return Physreg to assign VirtReg, or 0.
tryEvict(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,uint8_t CostPerUseLimit,const SmallVirtRegSet & FixedRegisters)1150 MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
1151 SmallVectorImpl<Register> &NewVRegs,
1152 uint8_t CostPerUseLimit,
1153 const SmallVirtRegSet &FixedRegisters) {
1154 NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
1155 TimePassesIsEnabled);
1156
1157 // Keep track of the cheapest interference seen so far.
1158 EvictionCost BestCost;
1159 BestCost.setMax();
1160 MCRegister BestPhys;
1161 unsigned OrderLimit = Order.getOrder().size();
1162
1163 // When we are just looking for a reduced cost per use, don't break any
1164 // hints, and only evict smaller spill weights.
1165 if (CostPerUseLimit < uint8_t(~0u)) {
1166 BestCost.BrokenHints = 0;
1167 BestCost.MaxWeight = VirtReg.weight();
1168
1169 // Check of any registers in RC are below CostPerUseLimit.
1170 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
1171 uint8_t MinCost = RegClassInfo.getMinCost(RC);
1172 if (MinCost >= CostPerUseLimit) {
1173 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
1174 << MinCost << ", no cheaper registers to be found.\n");
1175 return 0;
1176 }
1177
1178 // It is normal for register classes to have a long tail of registers with
1179 // the same cost. We don't need to look at them if they're too expensive.
1180 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
1181 OrderLimit = RegClassInfo.getLastCostChange(RC);
1182 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
1183 << " regs.\n");
1184 }
1185 }
1186
1187 for (auto I = Order.begin(), E = Order.getOrderLimitEnd(OrderLimit); I != E;
1188 ++I) {
1189 MCRegister PhysReg = *I;
1190 assert(PhysReg);
1191 if (RegCosts[PhysReg] >= CostPerUseLimit)
1192 continue;
1193 // The first use of a callee-saved register in a function has cost 1.
1194 // Don't start using a CSR when the CostPerUseLimit is low.
1195 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
1196 LLVM_DEBUG(
1197 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
1198 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
1199 << '\n');
1200 continue;
1201 }
1202
1203 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
1204 FixedRegisters))
1205 continue;
1206
1207 // Best so far.
1208 BestPhys = PhysReg;
1209
1210 // Stop if the hint can be used.
1211 if (I.isHint())
1212 break;
1213 }
1214
1215 if (BestPhys.isValid())
1216 evictInterference(VirtReg, BestPhys, NewVRegs);
1217 return BestPhys;
1218 }
1219
1220 //===----------------------------------------------------------------------===//
1221 // Region Splitting
1222 //===----------------------------------------------------------------------===//
1223
1224 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
1225 /// interference pattern in Physreg and its aliases. Add the constraints to
1226 /// SpillPlacement and return the static cost of this split in Cost, assuming
1227 /// that all preferences in SplitConstraints are met.
1228 /// Return false if there are no bundles with positive bias.
addSplitConstraints(InterferenceCache::Cursor Intf,BlockFrequency & Cost)1229 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
1230 BlockFrequency &Cost) {
1231 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1232
1233 // Reset interference dependent info.
1234 SplitConstraints.resize(UseBlocks.size());
1235 BlockFrequency StaticCost = 0;
1236 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
1237 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
1238 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
1239
1240 BC.Number = BI.MBB->getNumber();
1241 Intf.moveToBlock(BC.Number);
1242 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
1243 BC.Exit = (BI.LiveOut &&
1244 !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
1245 ? SpillPlacement::PrefReg
1246 : SpillPlacement::DontCare;
1247 BC.ChangesValue = BI.FirstDef.isValid();
1248
1249 if (!Intf.hasInterference())
1250 continue;
1251
1252 // Number of spill code instructions to insert.
1253 unsigned Ins = 0;
1254
1255 // Interference for the live-in value.
1256 if (BI.LiveIn) {
1257 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
1258 BC.Entry = SpillPlacement::MustSpill;
1259 ++Ins;
1260 } else if (Intf.first() < BI.FirstInstr) {
1261 BC.Entry = SpillPlacement::PrefSpill;
1262 ++Ins;
1263 } else if (Intf.first() < BI.LastInstr) {
1264 ++Ins;
1265 }
1266
1267 // Abort if the spill cannot be inserted at the MBB' start
1268 if (((BC.Entry == SpillPlacement::MustSpill) ||
1269 (BC.Entry == SpillPlacement::PrefSpill)) &&
1270 SlotIndex::isEarlierInstr(BI.FirstInstr,
1271 SA->getFirstSplitPoint(BC.Number)))
1272 return false;
1273 }
1274
1275 // Interference for the live-out value.
1276 if (BI.LiveOut) {
1277 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
1278 BC.Exit = SpillPlacement::MustSpill;
1279 ++Ins;
1280 } else if (Intf.last() > BI.LastInstr) {
1281 BC.Exit = SpillPlacement::PrefSpill;
1282 ++Ins;
1283 } else if (Intf.last() > BI.FirstInstr) {
1284 ++Ins;
1285 }
1286 }
1287
1288 // Accumulate the total frequency of inserted spill code.
1289 while (Ins--)
1290 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
1291 }
1292 Cost = StaticCost;
1293
1294 // Add constraints for use-blocks. Note that these are the only constraints
1295 // that may add a positive bias, it is downhill from here.
1296 SpillPlacer->addConstraints(SplitConstraints);
1297 return SpillPlacer->scanActiveBundles();
1298 }
1299
1300 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
1301 /// live-through blocks in Blocks.
addThroughConstraints(InterferenceCache::Cursor Intf,ArrayRef<unsigned> Blocks)1302 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
1303 ArrayRef<unsigned> Blocks) {
1304 const unsigned GroupSize = 8;
1305 SpillPlacement::BlockConstraint BCS[GroupSize];
1306 unsigned TBS[GroupSize];
1307 unsigned B = 0, T = 0;
1308
1309 for (unsigned Number : Blocks) {
1310 Intf.moveToBlock(Number);
1311
1312 if (!Intf.hasInterference()) {
1313 assert(T < GroupSize && "Array overflow");
1314 TBS[T] = Number;
1315 if (++T == GroupSize) {
1316 SpillPlacer->addLinks(makeArrayRef(TBS, T));
1317 T = 0;
1318 }
1319 continue;
1320 }
1321
1322 assert(B < GroupSize && "Array overflow");
1323 BCS[B].Number = Number;
1324
1325 // Abort if the spill cannot be inserted at the MBB' start
1326 MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
1327 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
1328 if (FirstNonDebugInstr != MBB->end() &&
1329 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
1330 SA->getFirstSplitPoint(Number)))
1331 return false;
1332 // Interference for the live-in value.
1333 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
1334 BCS[B].Entry = SpillPlacement::MustSpill;
1335 else
1336 BCS[B].Entry = SpillPlacement::PrefSpill;
1337
1338 // Interference for the live-out value.
1339 if (Intf.last() >= SA->getLastSplitPoint(Number))
1340 BCS[B].Exit = SpillPlacement::MustSpill;
1341 else
1342 BCS[B].Exit = SpillPlacement::PrefSpill;
1343
1344 if (++B == GroupSize) {
1345 SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1346 B = 0;
1347 }
1348 }
1349
1350 SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1351 SpillPlacer->addLinks(makeArrayRef(TBS, T));
1352 return true;
1353 }
1354
growRegion(GlobalSplitCandidate & Cand)1355 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
1356 // Keep track of through blocks that have not been added to SpillPlacer.
1357 BitVector Todo = SA->getThroughBlocks();
1358 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
1359 unsigned AddedTo = 0;
1360 #ifndef NDEBUG
1361 unsigned Visited = 0;
1362 #endif
1363
1364 while (true) {
1365 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
1366 // Find new through blocks in the periphery of PrefRegBundles.
1367 for (unsigned Bundle : NewBundles) {
1368 // Look at all blocks connected to Bundle in the full graph.
1369 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
1370 for (unsigned Block : Blocks) {
1371 if (!Todo.test(Block))
1372 continue;
1373 Todo.reset(Block);
1374 // This is a new through block. Add it to SpillPlacer later.
1375 ActiveBlocks.push_back(Block);
1376 #ifndef NDEBUG
1377 ++Visited;
1378 #endif
1379 }
1380 }
1381 // Any new blocks to add?
1382 if (ActiveBlocks.size() == AddedTo)
1383 break;
1384
1385 // Compute through constraints from the interference, or assume that all
1386 // through blocks prefer spilling when forming compact regions.
1387 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
1388 if (Cand.PhysReg) {
1389 if (!addThroughConstraints(Cand.Intf, NewBlocks))
1390 return false;
1391 } else
1392 // Provide a strong negative bias on through blocks to prevent unwanted
1393 // liveness on loop backedges.
1394 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
1395 AddedTo = ActiveBlocks.size();
1396
1397 // Perhaps iterating can enable more bundles?
1398 SpillPlacer->iterate();
1399 }
1400 LLVM_DEBUG(dbgs() << ", v=" << Visited);
1401 return true;
1402 }
1403
1404 /// calcCompactRegion - Compute the set of edge bundles that should be live
1405 /// when splitting the current live range into compact regions. Compact
1406 /// regions can be computed without looking at interference. They are the
1407 /// regions formed by removing all the live-through blocks from the live range.
1408 ///
1409 /// Returns false if the current live range is already compact, or if the
1410 /// compact regions would form single block regions anyway.
calcCompactRegion(GlobalSplitCandidate & Cand)1411 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
1412 // Without any through blocks, the live range is already compact.
1413 if (!SA->getNumThroughBlocks())
1414 return false;
1415
1416 // Compact regions don't correspond to any physreg.
1417 Cand.reset(IntfCache, MCRegister::NoRegister);
1418
1419 LLVM_DEBUG(dbgs() << "Compact region bundles");
1420
1421 // Use the spill placer to determine the live bundles. GrowRegion pretends
1422 // that all the through blocks have interference when PhysReg is unset.
1423 SpillPlacer->prepare(Cand.LiveBundles);
1424
1425 // The static split cost will be zero since Cand.Intf reports no interference.
1426 BlockFrequency Cost;
1427 if (!addSplitConstraints(Cand.Intf, Cost)) {
1428 LLVM_DEBUG(dbgs() << ", none.\n");
1429 return false;
1430 }
1431
1432 if (!growRegion(Cand)) {
1433 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1434 return false;
1435 }
1436
1437 SpillPlacer->finish();
1438
1439 if (!Cand.LiveBundles.any()) {
1440 LLVM_DEBUG(dbgs() << ", none.\n");
1441 return false;
1442 }
1443
1444 LLVM_DEBUG({
1445 for (int I : Cand.LiveBundles.set_bits())
1446 dbgs() << " EB#" << I;
1447 dbgs() << ".\n";
1448 });
1449 return true;
1450 }
1451
1452 /// calcSpillCost - Compute how expensive it would be to split the live range in
1453 /// SA around all use blocks instead of forming bundle regions.
calcSpillCost()1454 BlockFrequency RAGreedy::calcSpillCost() {
1455 BlockFrequency Cost = 0;
1456 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1457 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1458 unsigned Number = BI.MBB->getNumber();
1459 // We normally only need one spill instruction - a load or a store.
1460 Cost += SpillPlacer->getBlockFrequency(Number);
1461
1462 // Unless the value is redefined in the block.
1463 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
1464 Cost += SpillPlacer->getBlockFrequency(Number);
1465 }
1466 return Cost;
1467 }
1468
1469 /// Check if splitting Evictee will create a local split interval in
1470 /// basic block number BBNumber that may cause a bad eviction chain. This is
1471 /// intended to prevent bad eviction sequences like:
1472 /// movl %ebp, 8(%esp) # 4-byte Spill
1473 /// movl %ecx, %ebp
1474 /// movl %ebx, %ecx
1475 /// movl %edi, %ebx
1476 /// movl %edx, %edi
1477 /// cltd
1478 /// idivl %esi
1479 /// movl %edi, %edx
1480 /// movl %ebx, %edi
1481 /// movl %ecx, %ebx
1482 /// movl %ebp, %ecx
1483 /// movl 16(%esp), %ebp # 4 - byte Reload
1484 ///
1485 /// Such sequences are created in 2 scenarios:
1486 ///
1487 /// Scenario #1:
1488 /// %0 is evicted from physreg0 by %1.
1489 /// Evictee %0 is intended for region splitting with split candidate
1490 /// physreg0 (the reg %0 was evicted from).
1491 /// Region splitting creates a local interval because of interference with the
1492 /// evictor %1 (normally region splitting creates 2 interval, the "by reg"
1493 /// and "by stack" intervals and local interval created when interference
1494 /// occurs).
1495 /// One of the split intervals ends up evicting %2 from physreg1.
1496 /// Evictee %2 is intended for region splitting with split candidate
1497 /// physreg1.
1498 /// One of the split intervals ends up evicting %3 from physreg2, etc.
1499 ///
1500 /// Scenario #2
1501 /// %0 is evicted from physreg0 by %1.
1502 /// %2 is evicted from physreg2 by %3 etc.
1503 /// Evictee %0 is intended for region splitting with split candidate
1504 /// physreg1.
1505 /// Region splitting creates a local interval because of interference with the
1506 /// evictor %1.
1507 /// One of the split intervals ends up evicting back original evictor %1
1508 /// from physreg0 (the reg %0 was evicted from).
1509 /// Another evictee %2 is intended for region splitting with split candidate
1510 /// physreg1.
1511 /// One of the split intervals ends up evicting %3 from physreg2, etc.
1512 ///
1513 /// \param Evictee The register considered to be split.
1514 /// \param Cand The split candidate that determines the physical register
1515 /// we are splitting for and the interferences.
1516 /// \param BBNumber The number of a BB for which the region split process will
1517 /// create a local split interval.
1518 /// \param Order The physical registers that may get evicted by a split
1519 /// artifact of Evictee.
1520 /// \return True if splitting Evictee may cause a bad eviction chain, false
1521 /// otherwise.
splitCanCauseEvictionChain(Register Evictee,GlobalSplitCandidate & Cand,unsigned BBNumber,const AllocationOrder & Order)1522 bool RAGreedy::splitCanCauseEvictionChain(Register Evictee,
1523 GlobalSplitCandidate &Cand,
1524 unsigned BBNumber,
1525 const AllocationOrder &Order) {
1526 EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
1527 unsigned Evictor = VregEvictorInfo.first;
1528 MCRegister PhysReg = VregEvictorInfo.second;
1529
1530 // No actual evictor.
1531 if (!Evictor || !PhysReg)
1532 return false;
1533
1534 float MaxWeight = 0;
1535 MCRegister FutureEvictedPhysReg =
1536 getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
1537 Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
1538
1539 // The bad eviction chain occurs when either the split candidate is the
1540 // evicting reg or one of the split artifact will evict the evicting reg.
1541 if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
1542 return false;
1543
1544 Cand.Intf.moveToBlock(BBNumber);
1545
1546 // Check to see if the Evictor contains interference (with Evictee) in the
1547 // given BB. If so, this interference caused the eviction of Evictee from
1548 // PhysReg. This suggest that we will create a local interval during the
1549 // region split to avoid this interference This local interval may cause a bad
1550 // eviction chain.
1551 if (!LIS->hasInterval(Evictor))
1552 return false;
1553 LiveInterval &EvictorLI = LIS->getInterval(Evictor);
1554 if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
1555 return false;
1556
1557 // Now, check to see if the local interval we will create is going to be
1558 // expensive enough to evict somebody If so, this may cause a bad eviction
1559 // chain.
1560 float splitArtifactWeight =
1561 VRAI->futureWeight(LIS->getInterval(Evictee),
1562 Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
1563 if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
1564 return false;
1565
1566 return true;
1567 }
1568
1569 /// Check if splitting VirtRegToSplit will create a local split interval
1570 /// in basic block number BBNumber that may cause a spill.
1571 ///
1572 /// \param VirtRegToSplit The register considered to be split.
1573 /// \param Cand The split candidate that determines the physical
1574 /// register we are splitting for and the interferences.
1575 /// \param BBNumber The number of a BB for which the region split process
1576 /// will create a local split interval.
1577 /// \param Order The physical registers that may get evicted by a
1578 /// split artifact of VirtRegToSplit.
1579 /// \return True if splitting VirtRegToSplit may cause a spill, false
1580 /// otherwise.
splitCanCauseLocalSpill(unsigned VirtRegToSplit,GlobalSplitCandidate & Cand,unsigned BBNumber,const AllocationOrder & Order)1581 bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
1582 GlobalSplitCandidate &Cand,
1583 unsigned BBNumber,
1584 const AllocationOrder &Order) {
1585 Cand.Intf.moveToBlock(BBNumber);
1586
1587 // Check if the local interval will find a non interfereing assignment.
1588 for (auto PhysReg : Order.getOrder()) {
1589 if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(),
1590 Cand.Intf.last(), PhysReg))
1591 return false;
1592 }
1593
1594 // The local interval is not able to find non interferencing assignment
1595 // and not able to evict a less worthy interval, therfore, it can cause a
1596 // spill.
1597 return true;
1598 }
1599
1600 /// calcGlobalSplitCost - Return the global split cost of following the split
1601 /// pattern in LiveBundles. This cost should be added to the local cost of the
1602 /// interference pattern in SplitConstraints.
1603 ///
calcGlobalSplitCost(GlobalSplitCandidate & Cand,const AllocationOrder & Order,bool * CanCauseEvictionChain)1604 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
1605 const AllocationOrder &Order,
1606 bool *CanCauseEvictionChain) {
1607 BlockFrequency GlobalCost = 0;
1608 const BitVector &LiveBundles = Cand.LiveBundles;
1609 Register VirtRegToSplit = SA->getParent().reg();
1610 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1611 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
1612 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
1613 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
1614 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
1615 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
1616 unsigned Ins = 0;
1617
1618 Cand.Intf.moveToBlock(BC.Number);
1619 // Check wheather a local interval is going to be created during the region
1620 // split. Calculate adavanced spilt cost (cost of local intervals) if option
1621 // is enabled.
1622 if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn &&
1623 BI.LiveOut && RegIn && RegOut) {
1624
1625 if (CanCauseEvictionChain &&
1626 splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
1627 // This interference causes our eviction from this assignment, we might
1628 // evict somebody else and eventually someone will spill, add that cost.
1629 // See splitCanCauseEvictionChain for detailed description of scenarios.
1630 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1631 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1632
1633 *CanCauseEvictionChain = true;
1634
1635 } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number,
1636 Order)) {
1637 // This interference causes local interval to spill, add that cost.
1638 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1639 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1640 }
1641 }
1642
1643 if (BI.LiveIn)
1644 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1645 if (BI.LiveOut)
1646 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1647 while (Ins--)
1648 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1649 }
1650
1651 for (unsigned Number : Cand.ActiveBlocks) {
1652 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
1653 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
1654 if (!RegIn && !RegOut)
1655 continue;
1656 if (RegIn && RegOut) {
1657 // We need double spill code if this block has interference.
1658 Cand.Intf.moveToBlock(Number);
1659 if (Cand.Intf.hasInterference()) {
1660 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1661 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1662
1663 // Check wheather a local interval is going to be created during the
1664 // region split.
1665 if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
1666 splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
1667 // This interference cause our eviction from this assignment, we might
1668 // evict somebody else, add that cost.
1669 // See splitCanCauseEvictionChain for detailed description of
1670 // scenarios.
1671 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1672 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1673
1674 *CanCauseEvictionChain = true;
1675 }
1676 }
1677 continue;
1678 }
1679 // live-in / stack-out or stack-in live-out.
1680 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1681 }
1682 return GlobalCost;
1683 }
1684
1685 /// splitAroundRegion - Split the current live range around the regions
1686 /// determined by BundleCand and GlobalCand.
1687 ///
1688 /// Before calling this function, GlobalCand and BundleCand must be initialized
1689 /// so each bundle is assigned to a valid candidate, or NoCand for the
1690 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
1691 /// objects must be initialized for the current live range, and intervals
1692 /// created for the used candidates.
1693 ///
1694 /// @param LREdit The LiveRangeEdit object handling the current split.
1695 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1696 /// must appear in this list.
splitAroundRegion(LiveRangeEdit & LREdit,ArrayRef<unsigned> UsedCands)1697 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1698 ArrayRef<unsigned> UsedCands) {
1699 // These are the intervals created for new global ranges. We may create more
1700 // intervals for local ranges.
1701 const unsigned NumGlobalIntvs = LREdit.size();
1702 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
1703 << " globals.\n");
1704 assert(NumGlobalIntvs && "No global intervals configured");
1705
1706 // Isolate even single instructions when dealing with a proper sub-class.
1707 // That guarantees register class inflation for the stack interval because it
1708 // is all copies.
1709 Register Reg = SA->getParent().reg();
1710 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1711
1712 // First handle all the blocks with uses.
1713 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1714 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1715 unsigned Number = BI.MBB->getNumber();
1716 unsigned IntvIn = 0, IntvOut = 0;
1717 SlotIndex IntfIn, IntfOut;
1718 if (BI.LiveIn) {
1719 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1720 if (CandIn != NoCand) {
1721 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1722 IntvIn = Cand.IntvIdx;
1723 Cand.Intf.moveToBlock(Number);
1724 IntfIn = Cand.Intf.first();
1725 }
1726 }
1727 if (BI.LiveOut) {
1728 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1729 if (CandOut != NoCand) {
1730 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1731 IntvOut = Cand.IntvIdx;
1732 Cand.Intf.moveToBlock(Number);
1733 IntfOut = Cand.Intf.last();
1734 }
1735 }
1736
1737 // Create separate intervals for isolated blocks with multiple uses.
1738 if (!IntvIn && !IntvOut) {
1739 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
1740 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1741 SE->splitSingleBlock(BI);
1742 continue;
1743 }
1744
1745 if (IntvIn && IntvOut)
1746 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1747 else if (IntvIn)
1748 SE->splitRegInBlock(BI, IntvIn, IntfIn);
1749 else
1750 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1751 }
1752
1753 // Handle live-through blocks. The relevant live-through blocks are stored in
1754 // the ActiveBlocks list with each candidate. We need to filter out
1755 // duplicates.
1756 BitVector Todo = SA->getThroughBlocks();
1757 for (unsigned c = 0; c != UsedCands.size(); ++c) {
1758 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
1759 for (unsigned Number : Blocks) {
1760 if (!Todo.test(Number))
1761 continue;
1762 Todo.reset(Number);
1763
1764 unsigned IntvIn = 0, IntvOut = 0;
1765 SlotIndex IntfIn, IntfOut;
1766
1767 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1768 if (CandIn != NoCand) {
1769 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1770 IntvIn = Cand.IntvIdx;
1771 Cand.Intf.moveToBlock(Number);
1772 IntfIn = Cand.Intf.first();
1773 }
1774
1775 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1776 if (CandOut != NoCand) {
1777 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1778 IntvOut = Cand.IntvIdx;
1779 Cand.Intf.moveToBlock(Number);
1780 IntfOut = Cand.Intf.last();
1781 }
1782 if (!IntvIn && !IntvOut)
1783 continue;
1784 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1785 }
1786 }
1787
1788 ++NumGlobalSplits;
1789
1790 SmallVector<unsigned, 8> IntvMap;
1791 SE->finish(&IntvMap);
1792 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1793
1794 ExtraRegInfo.resize(MRI->getNumVirtRegs());
1795 unsigned OrigBlocks = SA->getNumLiveBlocks();
1796
1797 // Sort out the new intervals created by splitting. We get four kinds:
1798 // - Remainder intervals should not be split again.
1799 // - Candidate intervals can be assigned to Cand.PhysReg.
1800 // - Block-local splits are candidates for local splitting.
1801 // - DCE leftovers should go back on the queue.
1802 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1803 LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1804
1805 // Ignore old intervals from DCE.
1806 if (getStage(Reg) != RS_New)
1807 continue;
1808
1809 // Remainder interval. Don't try splitting again, spill if it doesn't
1810 // allocate.
1811 if (IntvMap[I] == 0) {
1812 setStage(Reg, RS_Spill);
1813 continue;
1814 }
1815
1816 // Global intervals. Allow repeated splitting as long as the number of live
1817 // blocks is strictly decreasing.
1818 if (IntvMap[I] < NumGlobalIntvs) {
1819 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1820 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1821 << " blocks as original.\n");
1822 // Don't allow repeated splitting as a safe guard against looping.
1823 setStage(Reg, RS_Split2);
1824 }
1825 continue;
1826 }
1827
1828 // Other intervals are treated as new. This includes local intervals created
1829 // for blocks with multiple uses, and anything created by DCE.
1830 }
1831
1832 if (VerifyEnabled)
1833 MF->verify(this, "After splitting live range around region");
1834 }
1835
tryRegionSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)1836 MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg,
1837 AllocationOrder &Order,
1838 SmallVectorImpl<Register> &NewVRegs) {
1839 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1840 return MCRegister::NoRegister;
1841 unsigned NumCands = 0;
1842 BlockFrequency SpillCost = calcSpillCost();
1843 BlockFrequency BestCost;
1844
1845 // Check if we can split this live range around a compact region.
1846 bool HasCompact = calcCompactRegion(GlobalCand.front());
1847 if (HasCompact) {
1848 // Yes, keep GlobalCand[0] as the compact region candidate.
1849 NumCands = 1;
1850 BestCost = BlockFrequency::getMaxFrequency();
1851 } else {
1852 // No benefit from the compact region, our fallback will be per-block
1853 // splitting. Make sure we find a solution that is cheaper than spilling.
1854 BestCost = SpillCost;
1855 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1856 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1857 }
1858
1859 bool CanCauseEvictionChain = false;
1860 unsigned BestCand =
1861 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
1862 false /*IgnoreCSR*/, &CanCauseEvictionChain);
1863
1864 // Split candidates with compact regions can cause a bad eviction sequence.
1865 // See splitCanCauseEvictionChain for detailed description of scenarios.
1866 // To avoid it, we need to comapre the cost with the spill cost and not the
1867 // current max frequency.
1868 if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
1869 CanCauseEvictionChain) {
1870 return MCRegister::NoRegister;
1871 }
1872
1873 // No solutions found, fall back to single block splitting.
1874 if (!HasCompact && BestCand == NoCand)
1875 return MCRegister::NoRegister;
1876
1877 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1878 }
1879
calculateRegionSplitCost(LiveInterval & VirtReg,AllocationOrder & Order,BlockFrequency & BestCost,unsigned & NumCands,bool IgnoreCSR,bool * CanCauseEvictionChain)1880 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
1881 AllocationOrder &Order,
1882 BlockFrequency &BestCost,
1883 unsigned &NumCands, bool IgnoreCSR,
1884 bool *CanCauseEvictionChain) {
1885 unsigned BestCand = NoCand;
1886 for (MCPhysReg PhysReg : Order) {
1887 assert(PhysReg);
1888 if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
1889 continue;
1890
1891 // Discard bad candidates before we run out of interference cache cursors.
1892 // This will only affect register classes with a lot of registers (>32).
1893 if (NumCands == IntfCache.getMaxCursors()) {
1894 unsigned WorstCount = ~0u;
1895 unsigned Worst = 0;
1896 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1897 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1898 continue;
1899 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1900 if (Count < WorstCount) {
1901 Worst = CandIndex;
1902 WorstCount = Count;
1903 }
1904 }
1905 --NumCands;
1906 GlobalCand[Worst] = GlobalCand[NumCands];
1907 if (BestCand == NumCands)
1908 BestCand = Worst;
1909 }
1910
1911 if (GlobalCand.size() <= NumCands)
1912 GlobalCand.resize(NumCands+1);
1913 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1914 Cand.reset(IntfCache, PhysReg);
1915
1916 SpillPlacer->prepare(Cand.LiveBundles);
1917 BlockFrequency Cost;
1918 if (!addSplitConstraints(Cand.Intf, Cost)) {
1919 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1920 continue;
1921 }
1922 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1923 MBFI->printBlockFreq(dbgs(), Cost));
1924 if (Cost >= BestCost) {
1925 LLVM_DEBUG({
1926 if (BestCand == NoCand)
1927 dbgs() << " worse than no bundles\n";
1928 else
1929 dbgs() << " worse than "
1930 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1931 });
1932 continue;
1933 }
1934 if (!growRegion(Cand)) {
1935 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1936 continue;
1937 }
1938
1939 SpillPlacer->finish();
1940
1941 // No live bundles, defer to splitSingleBlocks().
1942 if (!Cand.LiveBundles.any()) {
1943 LLVM_DEBUG(dbgs() << " no bundles.\n");
1944 continue;
1945 }
1946
1947 bool HasEvictionChain = false;
1948 Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
1949 LLVM_DEBUG({
1950 dbgs() << ", total = ";
1951 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1952 for (int I : Cand.LiveBundles.set_bits())
1953 dbgs() << " EB#" << I;
1954 dbgs() << ".\n";
1955 });
1956 if (Cost < BestCost) {
1957 BestCand = NumCands;
1958 BestCost = Cost;
1959 // See splitCanCauseEvictionChain for detailed description of bad
1960 // eviction chain scenarios.
1961 if (CanCauseEvictionChain)
1962 *CanCauseEvictionChain = HasEvictionChain;
1963 }
1964 ++NumCands;
1965 }
1966
1967 if (CanCauseEvictionChain && BestCand != NoCand) {
1968 // See splitCanCauseEvictionChain for detailed description of bad
1969 // eviction chain scenarios.
1970 LLVM_DEBUG(dbgs() << "Best split candidate of vreg "
1971 << printReg(VirtReg.reg(), TRI) << " may ");
1972 if (!(*CanCauseEvictionChain))
1973 LLVM_DEBUG(dbgs() << "not ");
1974 LLVM_DEBUG(dbgs() << "cause bad eviction chain\n");
1975 }
1976
1977 return BestCand;
1978 }
1979
doRegionSplit(LiveInterval & VirtReg,unsigned BestCand,bool HasCompact,SmallVectorImpl<Register> & NewVRegs)1980 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
1981 bool HasCompact,
1982 SmallVectorImpl<Register> &NewVRegs) {
1983 SmallVector<unsigned, 8> UsedCands;
1984 // Prepare split editor.
1985 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1986 SE->reset(LREdit, SplitSpillMode);
1987
1988 // Assign all edge bundles to the preferred candidate, or NoCand.
1989 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1990
1991 // Assign bundles for the best candidate region.
1992 if (BestCand != NoCand) {
1993 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1994 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1995 UsedCands.push_back(BestCand);
1996 Cand.IntvIdx = SE->openIntv();
1997 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1998 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1999 (void)B;
2000 }
2001 }
2002
2003 // Assign bundles for the compact region.
2004 if (HasCompact) {
2005 GlobalSplitCandidate &Cand = GlobalCand.front();
2006 assert(!Cand.PhysReg && "Compact region has no physreg");
2007 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
2008 UsedCands.push_back(0);
2009 Cand.IntvIdx = SE->openIntv();
2010 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
2011 << " bundles, intv " << Cand.IntvIdx << ".\n");
2012 (void)B;
2013 }
2014 }
2015
2016 splitAroundRegion(LREdit, UsedCands);
2017 return 0;
2018 }
2019
2020 //===----------------------------------------------------------------------===//
2021 // Per-Block Splitting
2022 //===----------------------------------------------------------------------===//
2023
2024 /// tryBlockSplit - Split a global live range around every block with uses. This
2025 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
2026 /// they don't allocate.
tryBlockSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)2027 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
2028 SmallVectorImpl<Register> &NewVRegs) {
2029 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
2030 Register Reg = VirtReg.reg();
2031 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
2032 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2033 SE->reset(LREdit, SplitSpillMode);
2034 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
2035 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
2036 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
2037 SE->splitSingleBlock(BI);
2038 }
2039 // No blocks were split.
2040 if (LREdit.empty())
2041 return 0;
2042
2043 // We did split for some blocks.
2044 SmallVector<unsigned, 8> IntvMap;
2045 SE->finish(&IntvMap);
2046
2047 // Tell LiveDebugVariables about the new ranges.
2048 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
2049
2050 ExtraRegInfo.resize(MRI->getNumVirtRegs());
2051
2052 // Sort out the new intervals created by splitting. The remainder interval
2053 // goes straight to spilling, the new local ranges get to stay RS_New.
2054 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
2055 LiveInterval &LI = LIS->getInterval(LREdit.get(I));
2056 if (getStage(LI) == RS_New && IntvMap[I] == 0)
2057 setStage(LI, RS_Spill);
2058 }
2059
2060 if (VerifyEnabled)
2061 MF->verify(this, "After splitting live range around basic blocks");
2062 return 0;
2063 }
2064
2065 //===----------------------------------------------------------------------===//
2066 // Per-Instruction Splitting
2067 //===----------------------------------------------------------------------===//
2068
2069 /// Get the number of allocatable registers that match the constraints of \p Reg
2070 /// on \p MI and that are also in \p SuperRC.
getNumAllocatableRegsForConstraints(const MachineInstr * MI,Register Reg,const TargetRegisterClass * SuperRC,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI,const RegisterClassInfo & RCI)2071 static unsigned getNumAllocatableRegsForConstraints(
2072 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
2073 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
2074 const RegisterClassInfo &RCI) {
2075 assert(SuperRC && "Invalid register class");
2076
2077 const TargetRegisterClass *ConstrainedRC =
2078 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
2079 /* ExploreBundle */ true);
2080 if (!ConstrainedRC)
2081 return 0;
2082 return RCI.getNumAllocatableRegs(ConstrainedRC);
2083 }
2084
2085 /// tryInstructionSplit - Split a live range around individual instructions.
2086 /// This is normally not worthwhile since the spiller is doing essentially the
2087 /// same thing. However, when the live range is in a constrained register
2088 /// class, it may help to insert copies such that parts of the live range can
2089 /// be moved to a larger register class.
2090 ///
2091 /// This is similar to spilling to a larger register class.
2092 unsigned
tryInstructionSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)2093 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
2094 SmallVectorImpl<Register> &NewVRegs) {
2095 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
2096 // There is no point to this if there are no larger sub-classes.
2097 if (!RegClassInfo.isProperSubClass(CurRC))
2098 return 0;
2099
2100 // Always enable split spill mode, since we're effectively spilling to a
2101 // register.
2102 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2103 SE->reset(LREdit, SplitEditor::SM_Size);
2104
2105 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
2106 if (Uses.size() <= 1)
2107 return 0;
2108
2109 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
2110 << " individual instrs.\n");
2111
2112 const TargetRegisterClass *SuperRC =
2113 TRI->getLargestLegalSuperClass(CurRC, *MF);
2114 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
2115 // Split around every non-copy instruction if this split will relax
2116 // the constraints on the virtual register.
2117 // Otherwise, splitting just inserts uncoalescable copies that do not help
2118 // the allocation.
2119 for (const auto &Use : Uses) {
2120 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
2121 if (MI->isFullCopy() ||
2122 SuperRCNumAllocatableRegs ==
2123 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
2124 TII, TRI, RCI)) {
2125 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
2126 continue;
2127 }
2128 SE->openIntv();
2129 SlotIndex SegStart = SE->enterIntvBefore(Use);
2130 SlotIndex SegStop = SE->leaveIntvAfter(Use);
2131 SE->useIntv(SegStart, SegStop);
2132 }
2133
2134 if (LREdit.empty()) {
2135 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
2136 return 0;
2137 }
2138
2139 SmallVector<unsigned, 8> IntvMap;
2140 SE->finish(&IntvMap);
2141 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
2142 ExtraRegInfo.resize(MRI->getNumVirtRegs());
2143
2144 // Assign all new registers to RS_Spill. This was the last chance.
2145 setStage(LREdit.begin(), LREdit.end(), RS_Spill);
2146 return 0;
2147 }
2148
2149 //===----------------------------------------------------------------------===//
2150 // Local Splitting
2151 //===----------------------------------------------------------------------===//
2152
2153 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
2154 /// in order to use PhysReg between two entries in SA->UseSlots.
2155 ///
2156 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
2157 ///
calcGapWeights(MCRegister PhysReg,SmallVectorImpl<float> & GapWeight)2158 void RAGreedy::calcGapWeights(MCRegister PhysReg,
2159 SmallVectorImpl<float> &GapWeight) {
2160 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
2161 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
2162 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
2163 const unsigned NumGaps = Uses.size()-1;
2164
2165 // Start and end points for the interference check.
2166 SlotIndex StartIdx =
2167 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
2168 SlotIndex StopIdx =
2169 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
2170
2171 GapWeight.assign(NumGaps, 0.0f);
2172
2173 // Add interference from each overlapping register.
2174 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
2175 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
2176 .checkInterference())
2177 continue;
2178
2179 // We know that VirtReg is a continuous interval from FirstInstr to
2180 // LastInstr, so we don't need InterferenceQuery.
2181 //
2182 // Interference that overlaps an instruction is counted in both gaps
2183 // surrounding the instruction. The exception is interference before
2184 // StartIdx and after StopIdx.
2185 //
2186 LiveIntervalUnion::SegmentIter IntI =
2187 Matrix->getLiveUnions()[*Units] .find(StartIdx);
2188 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
2189 // Skip the gaps before IntI.
2190 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
2191 if (++Gap == NumGaps)
2192 break;
2193 if (Gap == NumGaps)
2194 break;
2195
2196 // Update the gaps covered by IntI.
2197 const float weight = IntI.value()->weight();
2198 for (; Gap != NumGaps; ++Gap) {
2199 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
2200 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
2201 break;
2202 }
2203 if (Gap == NumGaps)
2204 break;
2205 }
2206 }
2207
2208 // Add fixed interference.
2209 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
2210 const LiveRange &LR = LIS->getRegUnit(*Units);
2211 LiveRange::const_iterator I = LR.find(StartIdx);
2212 LiveRange::const_iterator E = LR.end();
2213
2214 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
2215 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
2216 while (Uses[Gap+1].getBoundaryIndex() < I->start)
2217 if (++Gap == NumGaps)
2218 break;
2219 if (Gap == NumGaps)
2220 break;
2221
2222 for (; Gap != NumGaps; ++Gap) {
2223 GapWeight[Gap] = huge_valf;
2224 if (Uses[Gap+1].getBaseIndex() >= I->end)
2225 break;
2226 }
2227 if (Gap == NumGaps)
2228 break;
2229 }
2230 }
2231 }
2232
2233 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
2234 /// basic block.
2235 ///
tryLocalSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs)2236 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
2237 SmallVectorImpl<Register> &NewVRegs) {
2238 // TODO: the function currently only handles a single UseBlock; it should be
2239 // possible to generalize.
2240 if (SA->getUseBlocks().size() != 1)
2241 return 0;
2242
2243 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
2244
2245 // Note that it is possible to have an interval that is live-in or live-out
2246 // while only covering a single block - A phi-def can use undef values from
2247 // predecessors, and the block could be a single-block loop.
2248 // We don't bother doing anything clever about such a case, we simply assume
2249 // that the interval is continuous from FirstInstr to LastInstr. We should
2250 // make sure that we don't do anything illegal to such an interval, though.
2251
2252 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
2253 if (Uses.size() <= 2)
2254 return 0;
2255 const unsigned NumGaps = Uses.size()-1;
2256
2257 LLVM_DEBUG({
2258 dbgs() << "tryLocalSplit: ";
2259 for (const auto &Use : Uses)
2260 dbgs() << ' ' << Use;
2261 dbgs() << '\n';
2262 });
2263
2264 // If VirtReg is live across any register mask operands, compute a list of
2265 // gaps with register masks.
2266 SmallVector<unsigned, 8> RegMaskGaps;
2267 if (Matrix->checkRegMaskInterference(VirtReg)) {
2268 // Get regmask slots for the whole block.
2269 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
2270 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
2271 // Constrain to VirtReg's live range.
2272 unsigned RI =
2273 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
2274 unsigned RE = RMS.size();
2275 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
2276 // Look for Uses[I] <= RMS <= Uses[I + 1].
2277 assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
2278 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
2279 continue;
2280 // Skip a regmask on the same instruction as the last use. It doesn't
2281 // overlap the live range.
2282 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
2283 break;
2284 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
2285 << Uses[I + 1]);
2286 RegMaskGaps.push_back(I);
2287 // Advance ri to the next gap. A regmask on one of the uses counts in
2288 // both gaps.
2289 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
2290 ++RI;
2291 }
2292 LLVM_DEBUG(dbgs() << '\n');
2293 }
2294
2295 // Since we allow local split results to be split again, there is a risk of
2296 // creating infinite loops. It is tempting to require that the new live
2297 // ranges have less instructions than the original. That would guarantee
2298 // convergence, but it is too strict. A live range with 3 instructions can be
2299 // split 2+3 (including the COPY), and we want to allow that.
2300 //
2301 // Instead we use these rules:
2302 //
2303 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
2304 // noop split, of course).
2305 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
2306 // the new ranges must have fewer instructions than before the split.
2307 // 3. New ranges with the same number of instructions are marked RS_Split2,
2308 // smaller ranges are marked RS_New.
2309 //
2310 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
2311 // excessive splitting and infinite loops.
2312 //
2313 bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
2314
2315 // Best split candidate.
2316 unsigned BestBefore = NumGaps;
2317 unsigned BestAfter = 0;
2318 float BestDiff = 0;
2319
2320 const float blockFreq =
2321 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
2322 (1.0f / MBFI->getEntryFreq());
2323 SmallVector<float, 8> GapWeight;
2324
2325 for (MCPhysReg PhysReg : Order) {
2326 assert(PhysReg);
2327 // Keep track of the largest spill weight that would need to be evicted in
2328 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
2329 calcGapWeights(PhysReg, GapWeight);
2330
2331 // Remove any gaps with regmask clobbers.
2332 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
2333 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
2334 GapWeight[RegMaskGaps[I]] = huge_valf;
2335
2336 // Try to find the best sequence of gaps to close.
2337 // The new spill weight must be larger than any gap interference.
2338
2339 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
2340 unsigned SplitBefore = 0, SplitAfter = 1;
2341
2342 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
2343 // It is the spill weight that needs to be evicted.
2344 float MaxGap = GapWeight[0];
2345
2346 while (true) {
2347 // Live before/after split?
2348 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
2349 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
2350
2351 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
2352 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
2353
2354 // Stop before the interval gets so big we wouldn't be making progress.
2355 if (!LiveBefore && !LiveAfter) {
2356 LLVM_DEBUG(dbgs() << " all\n");
2357 break;
2358 }
2359 // Should the interval be extended or shrunk?
2360 bool Shrink = true;
2361
2362 // How many gaps would the new range have?
2363 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
2364
2365 // Legally, without causing looping?
2366 bool Legal = !ProgressRequired || NewGaps < NumGaps;
2367
2368 if (Legal && MaxGap < huge_valf) {
2369 // Estimate the new spill weight. Each instruction reads or writes the
2370 // register. Conservatively assume there are no read-modify-write
2371 // instructions.
2372 //
2373 // Try to guess the size of the new interval.
2374 const float EstWeight = normalizeSpillWeight(
2375 blockFreq * (NewGaps + 1),
2376 Uses[SplitBefore].distance(Uses[SplitAfter]) +
2377 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
2378 1);
2379 // Would this split be possible to allocate?
2380 // Never allocate all gaps, we wouldn't be making progress.
2381 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
2382 if (EstWeight * Hysteresis >= MaxGap) {
2383 Shrink = false;
2384 float Diff = EstWeight - MaxGap;
2385 if (Diff > BestDiff) {
2386 LLVM_DEBUG(dbgs() << " (best)");
2387 BestDiff = Hysteresis * Diff;
2388 BestBefore = SplitBefore;
2389 BestAfter = SplitAfter;
2390 }
2391 }
2392 }
2393
2394 // Try to shrink.
2395 if (Shrink) {
2396 if (++SplitBefore < SplitAfter) {
2397 LLVM_DEBUG(dbgs() << " shrink\n");
2398 // Recompute the max when necessary.
2399 if (GapWeight[SplitBefore - 1] >= MaxGap) {
2400 MaxGap = GapWeight[SplitBefore];
2401 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
2402 MaxGap = std::max(MaxGap, GapWeight[I]);
2403 }
2404 continue;
2405 }
2406 MaxGap = 0;
2407 }
2408
2409 // Try to extend the interval.
2410 if (SplitAfter >= NumGaps) {
2411 LLVM_DEBUG(dbgs() << " end\n");
2412 break;
2413 }
2414
2415 LLVM_DEBUG(dbgs() << " extend\n");
2416 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
2417 }
2418 }
2419
2420 // Didn't find any candidates?
2421 if (BestBefore == NumGaps)
2422 return 0;
2423
2424 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
2425 << Uses[BestAfter] << ", " << BestDiff << ", "
2426 << (BestAfter - BestBefore + 1) << " instrs\n");
2427
2428 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2429 SE->reset(LREdit);
2430
2431 SE->openIntv();
2432 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
2433 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
2434 SE->useIntv(SegStart, SegStop);
2435 SmallVector<unsigned, 8> IntvMap;
2436 SE->finish(&IntvMap);
2437 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
2438
2439 // If the new range has the same number of instructions as before, mark it as
2440 // RS_Split2 so the next split will be forced to make progress. Otherwise,
2441 // leave the new intervals as RS_New so they can compete.
2442 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
2443 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
2444 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
2445 if (NewGaps >= NumGaps) {
2446 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
2447 assert(!ProgressRequired && "Didn't make progress when it was required.");
2448 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
2449 if (IntvMap[I] == 1) {
2450 setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
2451 LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
2452 }
2453 LLVM_DEBUG(dbgs() << '\n');
2454 }
2455 ++NumLocalSplits;
2456
2457 return 0;
2458 }
2459
2460 //===----------------------------------------------------------------------===//
2461 // Live Range Splitting
2462 //===----------------------------------------------------------------------===//
2463
2464 /// trySplit - Try to split VirtReg or one of its interferences, making it
2465 /// assignable.
2466 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
trySplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,const SmallVirtRegSet & FixedRegisters)2467 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
2468 SmallVectorImpl<Register> &NewVRegs,
2469 const SmallVirtRegSet &FixedRegisters) {
2470 // Ranges must be Split2 or less.
2471 if (getStage(VirtReg) >= RS_Spill)
2472 return 0;
2473
2474 // Local intervals are handled separately.
2475 if (LIS->intervalIsInOneMBB(VirtReg)) {
2476 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
2477 TimerGroupDescription, TimePassesIsEnabled);
2478 SA->analyze(&VirtReg);
2479 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
2480 if (PhysReg || !NewVRegs.empty())
2481 return PhysReg;
2482 return tryInstructionSplit(VirtReg, Order, NewVRegs);
2483 }
2484
2485 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
2486 TimerGroupDescription, TimePassesIsEnabled);
2487
2488 SA->analyze(&VirtReg);
2489
2490 // FIXME: SplitAnalysis may repair broken live ranges coming from the
2491 // coalescer. That may cause the range to become allocatable which means that
2492 // tryRegionSplit won't be making progress. This check should be replaced with
2493 // an assertion when the coalescer is fixed.
2494 if (SA->didRepairRange()) {
2495 // VirtReg has changed, so all cached queries are invalid.
2496 Matrix->invalidateVirtRegs();
2497 if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
2498 return PhysReg;
2499 }
2500
2501 // First try to split around a region spanning multiple blocks. RS_Split2
2502 // ranges already made dubious progress with region splitting, so they go
2503 // straight to single block splitting.
2504 if (getStage(VirtReg) < RS_Split2) {
2505 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
2506 if (PhysReg || !NewVRegs.empty())
2507 return PhysReg;
2508 }
2509
2510 // Then isolate blocks.
2511 return tryBlockSplit(VirtReg, Order, NewVRegs);
2512 }
2513
2514 //===----------------------------------------------------------------------===//
2515 // Last Chance Recoloring
2516 //===----------------------------------------------------------------------===//
2517
2518 /// Return true if \p reg has any tied def operand.
hasTiedDef(MachineRegisterInfo * MRI,unsigned reg)2519 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
2520 for (const MachineOperand &MO : MRI->def_operands(reg))
2521 if (MO.isTied())
2522 return true;
2523
2524 return false;
2525 }
2526
2527 /// mayRecolorAllInterferences - Check if the virtual registers that
2528 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
2529 /// recolored to free \p PhysReg.
2530 /// When true is returned, \p RecoloringCandidates has been augmented with all
2531 /// the live intervals that need to be recolored in order to free \p PhysReg
2532 /// for \p VirtReg.
2533 /// \p FixedRegisters contains all the virtual registers that cannot be
2534 /// recolored.
mayRecolorAllInterferences(MCRegister PhysReg,LiveInterval & VirtReg,SmallLISet & RecoloringCandidates,const SmallVirtRegSet & FixedRegisters)2535 bool RAGreedy::mayRecolorAllInterferences(
2536 MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates,
2537 const SmallVirtRegSet &FixedRegisters) {
2538 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
2539
2540 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
2541 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
2542 // If there is LastChanceRecoloringMaxInterference or more interferences,
2543 // chances are one would not be recolorable.
2544 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
2545 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
2546 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
2547 CutOffInfo |= CO_Interf;
2548 return false;
2549 }
2550 for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
2551 // If Intf is done and sit on the same register class as VirtReg,
2552 // it would not be recolorable as it is in the same state as VirtReg.
2553 // However, if VirtReg has tied defs and Intf doesn't, then
2554 // there is still a point in examining if it can be recolorable.
2555 if (((getStage(*Intf) == RS_Done &&
2556 MRI->getRegClass(Intf->reg()) == CurRC) &&
2557 !(hasTiedDef(MRI, VirtReg.reg()) &&
2558 !hasTiedDef(MRI, Intf->reg()))) ||
2559 FixedRegisters.count(Intf->reg())) {
2560 LLVM_DEBUG(
2561 dbgs() << "Early abort: the interference is not recolorable.\n");
2562 return false;
2563 }
2564 RecoloringCandidates.insert(Intf);
2565 }
2566 }
2567 return true;
2568 }
2569
2570 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2571 /// its interferences.
2572 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
2573 /// virtual register that was using it. The recoloring process may recursively
2574 /// use the last chance recoloring. Therefore, when a virtual register has been
2575 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2576 /// be last-chance-recolored again during this recoloring "session".
2577 /// E.g.,
2578 /// Let
2579 /// vA can use {R1, R2 }
2580 /// vB can use { R2, R3}
2581 /// vC can use {R1 }
2582 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
2583 /// instance) and they all interfere.
2584 ///
2585 /// vA is assigned R1
2586 /// vB is assigned R2
2587 /// vC tries to evict vA but vA is already done.
2588 /// Regular register allocation fails.
2589 ///
2590 /// Last chance recoloring kicks in:
2591 /// vC does as if vA was evicted => vC uses R1.
2592 /// vC is marked as fixed.
2593 /// vA needs to find a color.
2594 /// None are available.
2595 /// vA cannot evict vC: vC is a fixed virtual register now.
2596 /// vA does as if vB was evicted => vA uses R2.
2597 /// vB needs to find a color.
2598 /// R3 is available.
2599 /// Recoloring => vC = R1, vA = R2, vB = R3
2600 ///
2601 /// \p Order defines the preferred allocation order for \p VirtReg.
2602 /// \p NewRegs will contain any new virtual register that have been created
2603 /// (split, spill) during the process and that must be assigned.
2604 /// \p FixedRegisters contains all the virtual registers that cannot be
2605 /// recolored.
2606 /// \p Depth gives the current depth of the last chance recoloring.
2607 /// \return a physical register that can be used for VirtReg or ~0u if none
2608 /// exists.
tryLastChanceRecoloring(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)2609 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
2610 AllocationOrder &Order,
2611 SmallVectorImpl<Register> &NewVRegs,
2612 SmallVirtRegSet &FixedRegisters,
2613 unsigned Depth) {
2614 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
2615 return ~0u;
2616
2617 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2618 // Ranges must be Done.
2619 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2620 "Last chance recoloring should really be last chance");
2621 // Set the max depth to LastChanceRecoloringMaxDepth.
2622 // We may want to reconsider that if we end up with a too large search space
2623 // for target with hundreds of registers.
2624 // Indeed, in that case we may want to cut the search space earlier.
2625 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
2626 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2627 CutOffInfo |= CO_Depth;
2628 return ~0u;
2629 }
2630
2631 // Set of Live intervals that will need to be recolored.
2632 SmallLISet RecoloringCandidates;
2633 // Record the original mapping virtual register to physical register in case
2634 // the recoloring fails.
2635 DenseMap<Register, MCRegister> VirtRegToPhysReg;
2636 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2637 // this recoloring "session".
2638 assert(!FixedRegisters.count(VirtReg.reg()));
2639 FixedRegisters.insert(VirtReg.reg());
2640 SmallVector<Register, 4> CurrentNewVRegs;
2641
2642 for (MCRegister PhysReg : Order) {
2643 assert(PhysReg.isValid());
2644 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2645 << printReg(PhysReg, TRI) << '\n');
2646 RecoloringCandidates.clear();
2647 VirtRegToPhysReg.clear();
2648 CurrentNewVRegs.clear();
2649
2650 // It is only possible to recolor virtual register interference.
2651 if (Matrix->checkInterference(VirtReg, PhysReg) >
2652 LiveRegMatrix::IK_VirtReg) {
2653 LLVM_DEBUG(
2654 dbgs() << "Some interferences are not with virtual registers.\n");
2655
2656 continue;
2657 }
2658
2659 // Early give up on this PhysReg if it is obvious we cannot recolor all
2660 // the interferences.
2661 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2662 FixedRegisters)) {
2663 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2664 continue;
2665 }
2666
2667 // RecoloringCandidates contains all the virtual registers that interfer
2668 // with VirtReg on PhysReg (or one of its aliases).
2669 // Enqueue them for recoloring and perform the actual recoloring.
2670 PQueue RecoloringQueue;
2671 for (LiveInterval *RC : RecoloringCandidates) {
2672 Register ItVirtReg = RC->reg();
2673 enqueue(RecoloringQueue, RC);
2674 assert(VRM->hasPhys(ItVirtReg) &&
2675 "Interferences are supposed to be with allocated variables");
2676
2677 // Record the current allocation.
2678 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
2679 // unset the related struct.
2680 Matrix->unassign(*RC);
2681 }
2682
2683 // Do as if VirtReg was assigned to PhysReg so that the underlying
2684 // recoloring has the right information about the interferes and
2685 // available colors.
2686 Matrix->assign(VirtReg, PhysReg);
2687
2688 // Save the current recoloring state.
2689 // If we cannot recolor all the interferences, we will have to start again
2690 // at this point for the next physical register.
2691 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2692 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2693 FixedRegisters, Depth)) {
2694 // Push the queued vregs into the main queue.
2695 for (Register NewVReg : CurrentNewVRegs)
2696 NewVRegs.push_back(NewVReg);
2697 // Do not mess up with the global assignment process.
2698 // I.e., VirtReg must be unassigned.
2699 Matrix->unassign(VirtReg);
2700 return PhysReg;
2701 }
2702
2703 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2704 << printReg(PhysReg, TRI) << '\n');
2705
2706 // The recoloring attempt failed, undo the changes.
2707 FixedRegisters = SaveFixedRegisters;
2708 Matrix->unassign(VirtReg);
2709
2710 // For a newly created vreg which is also in RecoloringCandidates,
2711 // don't add it to NewVRegs because its physical register will be restored
2712 // below. Other vregs in CurrentNewVRegs are created by calling
2713 // selectOrSplit and should be added into NewVRegs.
2714 for (Register &R : CurrentNewVRegs) {
2715 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2716 continue;
2717 NewVRegs.push_back(R);
2718 }
2719
2720 for (LiveInterval *RC : RecoloringCandidates) {
2721 Register ItVirtReg = RC->reg();
2722 if (VRM->hasPhys(ItVirtReg))
2723 Matrix->unassign(*RC);
2724 MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
2725 Matrix->assign(*RC, ItPhysReg);
2726 }
2727 }
2728
2729 // Last chance recoloring did not worked either, give up.
2730 return ~0u;
2731 }
2732
2733 /// tryRecoloringCandidates - Try to assign a new color to every register
2734 /// in \RecoloringQueue.
2735 /// \p NewRegs will contain any new virtual register created during the
2736 /// recoloring process.
2737 /// \p FixedRegisters[in/out] contains all the registers that have been
2738 /// recolored.
2739 /// \return true if all virtual registers in RecoloringQueue were successfully
2740 /// recolored, false otherwise.
tryRecoloringCandidates(PQueue & RecoloringQueue,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)2741 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2742 SmallVectorImpl<Register> &NewVRegs,
2743 SmallVirtRegSet &FixedRegisters,
2744 unsigned Depth) {
2745 while (!RecoloringQueue.empty()) {
2746 LiveInterval *LI = dequeue(RecoloringQueue);
2747 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2748 MCRegister PhysReg =
2749 selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
2750 // When splitting happens, the live-range may actually be empty.
2751 // In that case, this is okay to continue the recoloring even
2752 // if we did not find an alternative color for it. Indeed,
2753 // there will not be anything to color for LI in the end.
2754 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2755 return false;
2756
2757 if (!PhysReg) {
2758 assert(LI->empty() && "Only empty live-range do not require a register");
2759 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2760 << " succeeded. Empty LI.\n");
2761 continue;
2762 }
2763 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2764 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2765
2766 Matrix->assign(*LI, PhysReg);
2767 FixedRegisters.insert(LI->reg());
2768 }
2769 return true;
2770 }
2771
2772 //===----------------------------------------------------------------------===//
2773 // Main Entry Point
2774 //===----------------------------------------------------------------------===//
2775
selectOrSplit(LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs)2776 MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
2777 SmallVectorImpl<Register> &NewVRegs) {
2778 CutOffInfo = CO_None;
2779 LLVMContext &Ctx = MF->getFunction().getContext();
2780 SmallVirtRegSet FixedRegisters;
2781 MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
2782 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2783 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2784 if (CutOffEncountered == CO_Depth)
2785 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2786 "reached. Use -fexhaustive-register-search to skip "
2787 "cutoffs");
2788 else if (CutOffEncountered == CO_Interf)
2789 Ctx.emitError("register allocation failed: maximum interference for "
2790 "recoloring reached. Use -fexhaustive-register-search "
2791 "to skip cutoffs");
2792 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2793 Ctx.emitError("register allocation failed: maximum interference and "
2794 "depth for recoloring reached. Use "
2795 "-fexhaustive-register-search to skip cutoffs");
2796 }
2797 return Reg;
2798 }
2799
2800 /// Using a CSR for the first time has a cost because it causes push|pop
2801 /// to be added to prologue|epilogue. Splitting a cold section of the live
2802 /// range can have lower cost than using the CSR for the first time;
2803 /// Spilling a live range in the cold path can have lower cost than using
2804 /// the CSR for the first time. Returns the physical register if we decide
2805 /// to use the CSR; otherwise return 0.
2806 MCRegister
tryAssignCSRFirstTime(LiveInterval & VirtReg,AllocationOrder & Order,MCRegister PhysReg,uint8_t & CostPerUseLimit,SmallVectorImpl<Register> & NewVRegs)2807 RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
2808 MCRegister PhysReg, uint8_t &CostPerUseLimit,
2809 SmallVectorImpl<Register> &NewVRegs) {
2810 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2811 // We choose spill over using the CSR for the first time if the spill cost
2812 // is lower than CSRCost.
2813 SA->analyze(&VirtReg);
2814 if (calcSpillCost() >= CSRCost)
2815 return PhysReg;
2816
2817 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2818 // we will not use a callee-saved register in tryEvict.
2819 CostPerUseLimit = 1;
2820 return 0;
2821 }
2822 if (getStage(VirtReg) < RS_Split) {
2823 // We choose pre-splitting over using the CSR for the first time if
2824 // the cost of splitting is lower than CSRCost.
2825 SA->analyze(&VirtReg);
2826 unsigned NumCands = 0;
2827 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2828 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2829 NumCands, true /*IgnoreCSR*/);
2830 if (BestCand == NoCand)
2831 // Use the CSR if we can't find a region split below CSRCost.
2832 return PhysReg;
2833
2834 // Perform the actual pre-splitting.
2835 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2836 return 0;
2837 }
2838 return PhysReg;
2839 }
2840
aboutToRemoveInterval(LiveInterval & LI)2841 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
2842 // Do not keep invalid information around.
2843 SetOfBrokenHints.remove(&LI);
2844 }
2845
initializeCSRCost()2846 void RAGreedy::initializeCSRCost() {
2847 // We use the larger one out of the command-line option and the value report
2848 // by TRI.
2849 CSRCost = BlockFrequency(
2850 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2851 if (!CSRCost.getFrequency())
2852 return;
2853
2854 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2855 uint64_t ActualEntry = MBFI->getEntryFreq();
2856 if (!ActualEntry) {
2857 CSRCost = 0;
2858 return;
2859 }
2860 uint64_t FixedEntry = 1 << 14;
2861 if (ActualEntry < FixedEntry)
2862 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2863 else if (ActualEntry <= UINT32_MAX)
2864 // Invert the fraction and divide.
2865 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2866 else
2867 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2868 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2869 }
2870
2871 /// Collect the hint info for \p Reg.
2872 /// The results are stored into \p Out.
2873 /// \p Out is not cleared before being populated.
collectHintInfo(Register Reg,HintsInfo & Out)2874 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2875 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2876 if (!Instr.isFullCopy())
2877 continue;
2878 // Look for the other end of the copy.
2879 Register OtherReg = Instr.getOperand(0).getReg();
2880 if (OtherReg == Reg) {
2881 OtherReg = Instr.getOperand(1).getReg();
2882 if (OtherReg == Reg)
2883 continue;
2884 }
2885 // Get the current assignment.
2886 MCRegister OtherPhysReg =
2887 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2888 // Push the collected information.
2889 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2890 OtherPhysReg));
2891 }
2892 }
2893
2894 /// Using the given \p List, compute the cost of the broken hints if
2895 /// \p PhysReg was used.
2896 /// \return The cost of \p List for \p PhysReg.
getBrokenHintFreq(const HintsInfo & List,MCRegister PhysReg)2897 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2898 MCRegister PhysReg) {
2899 BlockFrequency Cost = 0;
2900 for (const HintInfo &Info : List) {
2901 if (Info.PhysReg != PhysReg)
2902 Cost += Info.Freq;
2903 }
2904 return Cost;
2905 }
2906
2907 /// Using the register assigned to \p VirtReg, try to recolor
2908 /// all the live ranges that are copy-related with \p VirtReg.
2909 /// The recoloring is then propagated to all the live-ranges that have
2910 /// been recolored and so on, until no more copies can be coalesced or
2911 /// it is not profitable.
2912 /// For a given live range, profitability is determined by the sum of the
2913 /// frequencies of the non-identity copies it would introduce with the old
2914 /// and new register.
tryHintRecoloring(LiveInterval & VirtReg)2915 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
2916 // We have a broken hint, check if it is possible to fix it by
2917 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2918 // some register and PhysReg may be available for the other live-ranges.
2919 SmallSet<Register, 4> Visited;
2920 SmallVector<unsigned, 2> RecoloringCandidates;
2921 HintsInfo Info;
2922 Register Reg = VirtReg.reg();
2923 MCRegister PhysReg = VRM->getPhys(Reg);
2924 // Start the recoloring algorithm from the input live-interval, then
2925 // it will propagate to the ones that are copy-related with it.
2926 Visited.insert(Reg);
2927 RecoloringCandidates.push_back(Reg);
2928
2929 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2930 << '(' << printReg(PhysReg, TRI) << ")\n");
2931
2932 do {
2933 Reg = RecoloringCandidates.pop_back_val();
2934
2935 // We cannot recolor physical register.
2936 if (Register::isPhysicalRegister(Reg))
2937 continue;
2938
2939 assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
2940
2941 // Get the live interval mapped with this virtual register to be able
2942 // to check for the interference with the new color.
2943 LiveInterval &LI = LIS->getInterval(Reg);
2944 MCRegister CurrPhys = VRM->getPhys(Reg);
2945 // Check that the new color matches the register class constraints and
2946 // that it is free for this live range.
2947 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2948 Matrix->checkInterference(LI, PhysReg)))
2949 continue;
2950
2951 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2952 << ") is recolorable.\n");
2953
2954 // Gather the hint info.
2955 Info.clear();
2956 collectHintInfo(Reg, Info);
2957 // Check if recoloring the live-range will increase the cost of the
2958 // non-identity copies.
2959 if (CurrPhys != PhysReg) {
2960 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2961 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2962 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2963 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2964 << "\nNew Cost: " << NewCopiesCost.getFrequency()
2965 << '\n');
2966 if (OldCopiesCost < NewCopiesCost) {
2967 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2968 continue;
2969 }
2970 // At this point, the cost is either cheaper or equal. If it is
2971 // equal, we consider this is profitable because it may expose
2972 // more recoloring opportunities.
2973 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2974 // Recolor the live-range.
2975 Matrix->unassign(LI);
2976 Matrix->assign(LI, PhysReg);
2977 }
2978 // Push all copy-related live-ranges to keep reconciling the broken
2979 // hints.
2980 for (const HintInfo &HI : Info) {
2981 if (Visited.insert(HI.Reg).second)
2982 RecoloringCandidates.push_back(HI.Reg);
2983 }
2984 } while (!RecoloringCandidates.empty());
2985 }
2986
2987 /// Try to recolor broken hints.
2988 /// Broken hints may be repaired by recoloring when an evicted variable
2989 /// freed up a register for a larger live-range.
2990 /// Consider the following example:
2991 /// BB1:
2992 /// a =
2993 /// b =
2994 /// BB2:
2995 /// ...
2996 /// = b
2997 /// = a
2998 /// Let us assume b gets split:
2999 /// BB1:
3000 /// a =
3001 /// b =
3002 /// BB2:
3003 /// c = b
3004 /// ...
3005 /// d = c
3006 /// = d
3007 /// = a
3008 /// Because of how the allocation work, b, c, and d may be assigned different
3009 /// colors. Now, if a gets evicted later:
3010 /// BB1:
3011 /// a =
3012 /// st a, SpillSlot
3013 /// b =
3014 /// BB2:
3015 /// c = b
3016 /// ...
3017 /// d = c
3018 /// = d
3019 /// e = ld SpillSlot
3020 /// = e
3021 /// This is likely that we can assign the same register for b, c, and d,
3022 /// getting rid of 2 copies.
tryHintsRecoloring()3023 void RAGreedy::tryHintsRecoloring() {
3024 for (LiveInterval *LI : SetOfBrokenHints) {
3025 assert(Register::isVirtualRegister(LI->reg()) &&
3026 "Recoloring is possible only for virtual registers");
3027 // Some dead defs may be around (e.g., because of debug uses).
3028 // Ignore those.
3029 if (!VRM->hasPhys(LI->reg()))
3030 continue;
3031 tryHintRecoloring(*LI);
3032 }
3033 }
3034
selectOrSplitImpl(LiveInterval & VirtReg,SmallVectorImpl<Register> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)3035 MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
3036 SmallVectorImpl<Register> &NewVRegs,
3037 SmallVirtRegSet &FixedRegisters,
3038 unsigned Depth) {
3039 uint8_t CostPerUseLimit = uint8_t(~0u);
3040 // First try assigning a free register.
3041 auto Order =
3042 AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
3043 if (MCRegister PhysReg =
3044 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
3045 // If VirtReg got an assignment, the eviction info is no longer relevant.
3046 LastEvicted.clearEvicteeInfo(VirtReg.reg());
3047 // When NewVRegs is not empty, we may have made decisions such as evicting
3048 // a virtual register, go with the earlier decisions and use the physical
3049 // register.
3050 if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
3051 NewVRegs.empty()) {
3052 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
3053 CostPerUseLimit, NewVRegs);
3054 if (CSRReg || !NewVRegs.empty())
3055 // Return now if we decide to use a CSR or create new vregs due to
3056 // pre-splitting.
3057 return CSRReg;
3058 } else
3059 return PhysReg;
3060 }
3061
3062 LiveRangeStage Stage = getStage(VirtReg);
3063 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
3064 << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
3065
3066 // Try to evict a less worthy live range, but only for ranges from the primary
3067 // queue. The RS_Split ranges already failed to do this, and they should not
3068 // get a second chance until they have been split.
3069 if (Stage != RS_Split)
3070 if (Register PhysReg =
3071 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
3072 FixedRegisters)) {
3073 Register Hint = MRI->getSimpleHint(VirtReg.reg());
3074 // If VirtReg has a hint and that hint is broken record this
3075 // virtual register as a recoloring candidate for broken hint.
3076 // Indeed, since we evicted a variable in its neighborhood it is
3077 // likely we can at least partially recolor some of the
3078 // copy-related live-ranges.
3079 if (Hint && Hint != PhysReg)
3080 SetOfBrokenHints.insert(&VirtReg);
3081 // If VirtReg eviction someone, the eviction info for it as an evictee is
3082 // no longer relevant.
3083 LastEvicted.clearEvicteeInfo(VirtReg.reg());
3084 return PhysReg;
3085 }
3086
3087 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
3088
3089 // The first time we see a live range, don't try to split or spill.
3090 // Wait until the second time, when all smaller ranges have been allocated.
3091 // This gives a better picture of the interference to split around.
3092 if (Stage < RS_Split) {
3093 setStage(VirtReg, RS_Split);
3094 LLVM_DEBUG(dbgs() << "wait for second round\n");
3095 NewVRegs.push_back(VirtReg.reg());
3096 return 0;
3097 }
3098
3099 if (Stage < RS_Spill) {
3100 // Try splitting VirtReg or interferences.
3101 unsigned NewVRegSizeBefore = NewVRegs.size();
3102 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
3103 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
3104 // If VirtReg got split, the eviction info is no longer relevant.
3105 LastEvicted.clearEvicteeInfo(VirtReg.reg());
3106 return PhysReg;
3107 }
3108 }
3109
3110 // If we couldn't allocate a register from spilling, there is probably some
3111 // invalid inline assembly. The base class will report it.
3112 if (Stage >= RS_Done || !VirtReg.isSpillable())
3113 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
3114 Depth);
3115
3116 // Finally spill VirtReg itself.
3117 if ((EnableDeferredSpilling ||
3118 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
3119 getStage(VirtReg) < RS_Memory) {
3120 // TODO: This is experimental and in particular, we do not model
3121 // the live range splitting done by spilling correctly.
3122 // We would need a deep integration with the spiller to do the
3123 // right thing here. Anyway, that is still good for early testing.
3124 setStage(VirtReg, RS_Memory);
3125 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
3126 NewVRegs.push_back(VirtReg.reg());
3127 } else {
3128 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
3129 TimerGroupDescription, TimePassesIsEnabled);
3130 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
3131 spiller().spill(LRE);
3132 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
3133
3134 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
3135 // the new regs are kept in LDV (still mapping to the old register), until
3136 // we rewrite spilled locations in LDV at a later stage.
3137 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
3138
3139 if (VerifyEnabled)
3140 MF->verify(this, "After spilling");
3141 }
3142
3143 // The live virtual register requesting allocation was spilled, so tell
3144 // the caller not to allocate anything during this round.
3145 return 0;
3146 }
3147
report(MachineOptimizationRemarkMissed & R)3148 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
3149 using namespace ore;
3150 if (Spills) {
3151 R << NV("NumSpills", Spills) << " spills ";
3152 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
3153 }
3154 if (FoldedSpills) {
3155 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
3156 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
3157 << " total folded spills cost ";
3158 }
3159 if (Reloads) {
3160 R << NV("NumReloads", Reloads) << " reloads ";
3161 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
3162 }
3163 if (FoldedReloads) {
3164 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
3165 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
3166 << " total folded reloads cost ";
3167 }
3168 if (ZeroCostFoldedReloads)
3169 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
3170 << " zero cost folded reloads ";
3171 if (Copies) {
3172 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
3173 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
3174 }
3175 }
3176
computeStats(MachineBasicBlock & MBB)3177 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
3178 RAGreedyStats Stats;
3179 const MachineFrameInfo &MFI = MF->getFrameInfo();
3180 int FI;
3181
3182 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
3183 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
3184 A->getPseudoValue())->getFrameIndex());
3185 };
3186 auto isPatchpointInstr = [](const MachineInstr &MI) {
3187 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
3188 MI.getOpcode() == TargetOpcode::STACKMAP ||
3189 MI.getOpcode() == TargetOpcode::STATEPOINT;
3190 };
3191 for (MachineInstr &MI : MBB) {
3192 if (MI.isCopy()) {
3193 MachineOperand &Dest = MI.getOperand(0);
3194 MachineOperand &Src = MI.getOperand(1);
3195 if (Dest.isReg() && Src.isReg() && Dest.getReg().isVirtual() &&
3196 Src.getReg().isVirtual())
3197 ++Stats.Copies;
3198 continue;
3199 }
3200
3201 SmallVector<const MachineMemOperand *, 2> Accesses;
3202 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
3203 ++Stats.Reloads;
3204 continue;
3205 }
3206 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
3207 ++Stats.Spills;
3208 continue;
3209 }
3210 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
3211 llvm::any_of(Accesses, isSpillSlotAccess)) {
3212 if (!isPatchpointInstr(MI)) {
3213 Stats.FoldedReloads += Accesses.size();
3214 continue;
3215 }
3216 // For statepoint there may be folded and zero cost folded stack reloads.
3217 std::pair<unsigned, unsigned> NonZeroCostRange =
3218 TII->getPatchpointUnfoldableRange(MI);
3219 SmallSet<unsigned, 16> FoldedReloads;
3220 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
3221 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
3222 MachineOperand &MO = MI.getOperand(Idx);
3223 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
3224 continue;
3225 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
3226 FoldedReloads.insert(MO.getIndex());
3227 else
3228 ZeroCostFoldedReloads.insert(MO.getIndex());
3229 }
3230 // If stack slot is used in folded reload it is not zero cost then.
3231 for (unsigned Slot : FoldedReloads)
3232 ZeroCostFoldedReloads.erase(Slot);
3233 Stats.FoldedReloads += FoldedReloads.size();
3234 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
3235 continue;
3236 }
3237 Accesses.clear();
3238 if (TII->hasStoreToStackSlot(MI, Accesses) &&
3239 llvm::any_of(Accesses, isSpillSlotAccess)) {
3240 Stats.FoldedSpills += Accesses.size();
3241 }
3242 }
3243 // Set cost of collected statistic by multiplication to relative frequency of
3244 // this basic block.
3245 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
3246 Stats.ReloadsCost = RelFreq * Stats.Reloads;
3247 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
3248 Stats.SpillsCost = RelFreq * Stats.Spills;
3249 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
3250 Stats.CopiesCost = RelFreq * Stats.Copies;
3251 return Stats;
3252 }
3253
reportStats(MachineLoop * L)3254 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
3255 RAGreedyStats Stats;
3256
3257 // Sum up the spill and reloads in subloops.
3258 for (MachineLoop *SubLoop : *L)
3259 Stats.add(reportStats(SubLoop));
3260
3261 for (MachineBasicBlock *MBB : L->getBlocks())
3262 // Handle blocks that were not included in subloops.
3263 if (Loops->getLoopFor(MBB) == L)
3264 Stats.add(computeStats(*MBB));
3265
3266 if (!Stats.isEmpty()) {
3267 using namespace ore;
3268
3269 ORE->emit([&]() {
3270 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
3271 L->getStartLoc(), L->getHeader());
3272 Stats.report(R);
3273 R << "generated in loop";
3274 return R;
3275 });
3276 }
3277 return Stats;
3278 }
3279
reportStats()3280 void RAGreedy::reportStats() {
3281 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
3282 return;
3283 RAGreedyStats Stats;
3284 for (MachineLoop *L : *Loops)
3285 Stats.add(reportStats(L));
3286 // Process non-loop blocks.
3287 for (MachineBasicBlock &MBB : *MF)
3288 if (!Loops->getLoopFor(&MBB))
3289 Stats.add(computeStats(MBB));
3290 if (!Stats.isEmpty()) {
3291 using namespace ore;
3292
3293 ORE->emit([&]() {
3294 DebugLoc Loc;
3295 if (auto *SP = MF->getFunction().getSubprogram())
3296 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
3297 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
3298 &MF->front());
3299 Stats.report(R);
3300 R << "generated in function";
3301 return R;
3302 });
3303 }
3304 }
3305
runOnMachineFunction(MachineFunction & mf)3306 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
3307 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
3308 << "********** Function: " << mf.getName() << '\n');
3309
3310 MF = &mf;
3311 TRI = MF->getSubtarget().getRegisterInfo();
3312 TII = MF->getSubtarget().getInstrInfo();
3313 RCI.runOnMachineFunction(mf);
3314
3315 EnableLocalReassign = EnableLocalReassignment ||
3316 MF->getSubtarget().enableRALocalReassignment(
3317 MF->getTarget().getOptLevel());
3318
3319 EnableAdvancedRASplitCost =
3320 ConsiderLocalIntervalCost.getNumOccurrences()
3321 ? ConsiderLocalIntervalCost
3322 : MF->getSubtarget().enableAdvancedRASplitCost();
3323
3324 if (VerifyEnabled)
3325 MF->verify(this, "Before greedy register allocator");
3326
3327 RegAllocBase::init(getAnalysis<VirtRegMap>(),
3328 getAnalysis<LiveIntervals>(),
3329 getAnalysis<LiveRegMatrix>());
3330 Indexes = &getAnalysis<SlotIndexes>();
3331 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
3332 DomTree = &getAnalysis<MachineDominatorTree>();
3333 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
3334 Loops = &getAnalysis<MachineLoopInfo>();
3335 Bundles = &getAnalysis<EdgeBundles>();
3336 SpillPlacer = &getAnalysis<SpillPlacement>();
3337 DebugVars = &getAnalysis<LiveDebugVariables>();
3338 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3339
3340 initializeCSRCost();
3341
3342 RegCosts = TRI->getRegisterCosts(*MF);
3343
3344 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
3345 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
3346
3347 VRAI->calculateSpillWeightsAndHints();
3348
3349 LLVM_DEBUG(LIS->dump());
3350
3351 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
3352 SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
3353 ExtraRegInfo.clear();
3354 ExtraRegInfo.resize(MRI->getNumVirtRegs());
3355 NextCascade = 1;
3356 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
3357 GlobalCand.resize(32); // This will grow as needed.
3358 SetOfBrokenHints.clear();
3359 LastEvicted.clear();
3360
3361 allocatePhysRegs();
3362 tryHintsRecoloring();
3363
3364 if (VerifyEnabled)
3365 MF->verify(this, "Before post optimization");
3366 postOptimization();
3367 reportStats();
3368
3369 releaseMemory();
3370 return true;
3371 }
3372