xref: /llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp (revision a3aa452a21f983237873fa85c866b9f0224789bd)
1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the generic RegisterCoalescer interface which
10 // is used as the common interface used by all clients and
11 // implementations of register coalescing.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "RegisterCoalescer.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/LiveInterval.h"
24 #include "llvm/CodeGen/LiveIntervals.h"
25 #include "llvm/CodeGen/LiveRangeEdit.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineLoopInfo.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/Passes.h"
35 #include "llvm/CodeGen/RegisterClassInfo.h"
36 #include "llvm/CodeGen/SlotIndexes.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetOpcodes.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/TargetSubtargetInfo.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/MC/LaneBitmask.h"
44 #include "llvm/MC/MCInstrDesc.h"
45 #include "llvm/MC/MCRegisterInfo.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Compiler.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include <algorithm>
53 #include <cassert>
54 #include <iterator>
55 #include <limits>
56 #include <tuple>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "regalloc"
63 
64 STATISTIC(numJoins, "Number of interval joins performed");
65 STATISTIC(numCrossRCs, "Number of cross class joins performed");
66 STATISTIC(numCommutes, "Number of instruction commuting performed");
67 STATISTIC(numExtends, "Number of copies extended");
68 STATISTIC(NumReMats, "Number of instructions re-materialized");
69 STATISTIC(NumInflated, "Number of register classes inflated");
70 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
71 STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
72 STATISTIC(NumShrinkToUses, "Number of shrinkToUses called");
73 
74 static cl::opt<bool> EnableJoining("join-liveintervals",
75                                    cl::desc("Coalesce copies (default=true)"),
76                                    cl::init(true), cl::Hidden);
77 
78 static cl::opt<bool> UseTerminalRule("terminal-rule",
79                                      cl::desc("Apply the terminal rule"),
80                                      cl::init(false), cl::Hidden);
81 
82 /// Temporary flag to test critical edge unsplitting.
83 static cl::opt<bool> EnableJoinSplits(
84     "join-splitedges",
85     cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
86 
87 /// Temporary flag to test global copy optimization.
88 static cl::opt<cl::boolOrDefault> EnableGlobalCopies(
89     "join-globalcopies",
90     cl::desc("Coalesce copies that span blocks (default=subtarget)"),
91     cl::init(cl::BOU_UNSET), cl::Hidden);
92 
93 static cl::opt<bool> VerifyCoalescing(
94     "verify-coalescing",
95     cl::desc("Verify machine instrs before and after register coalescing"),
96     cl::Hidden);
97 
98 static cl::opt<unsigned> LateRematUpdateThreshold(
99     "late-remat-update-threshold", cl::Hidden,
100     cl::desc("During rematerialization for a copy, if the def instruction has "
101              "many other copy uses to be rematerialized, delay the multiple "
102              "separate live interval update work and do them all at once after "
103              "all those rematerialization are done. It will save a lot of "
104              "repeated work. "),
105     cl::init(100));
106 
107 static cl::opt<unsigned> LargeIntervalSizeThreshold(
108     "large-interval-size-threshold", cl::Hidden,
109     cl::desc("If the valnos size of an interval is larger than the threshold, "
110              "it is regarded as a large interval. "),
111     cl::init(100));
112 
113 static cl::opt<unsigned> LargeIntervalFreqThreshold(
114     "large-interval-freq-threshold", cl::Hidden,
115     cl::desc("For a large interval, if it is coalesced with other live "
116              "intervals many times more than the threshold, stop its "
117              "coalescing to control the compile time. "),
118     cl::init(256));
119 
120 namespace {
121 
122 class JoinVals;
123 
124 class RegisterCoalescer : public MachineFunctionPass,
125                           private LiveRangeEdit::Delegate {
126   MachineFunction *MF = nullptr;
127   MachineRegisterInfo *MRI = nullptr;
128   const TargetRegisterInfo *TRI = nullptr;
129   const TargetInstrInfo *TII = nullptr;
130   LiveIntervals *LIS = nullptr;
131   const MachineLoopInfo *Loops = nullptr;
132   RegisterClassInfo RegClassInfo;
133 
134   /// Position and VReg of a PHI instruction during coalescing.
135   struct PHIValPos {
136     SlotIndex SI;    ///< Slot where this PHI occurs.
137     Register Reg;    ///< VReg the PHI occurs in.
138     unsigned SubReg; ///< Qualifying subregister for Reg.
139   };
140 
141   /// Map from debug instruction number to PHI position during coalescing.
142   DenseMap<unsigned, PHIValPos> PHIValToPos;
143   /// Index of, for each VReg, which debug instruction numbers and
144   /// corresponding PHIs are sensitive to coalescing. Each VReg may have
145   /// multiple PHI defs, at different positions.
146   DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx;
147 
148   /// Debug variable location tracking -- for each VReg, maintain an
149   /// ordered-by-slot-index set of DBG_VALUEs, to help quick
150   /// identification of whether coalescing may change location validity.
151   using DbgValueLoc = std::pair<SlotIndex, MachineInstr *>;
152   DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
153 
154   /// A LaneMask to remember on which subregister live ranges we need to call
155   /// shrinkToUses() later.
156   LaneBitmask ShrinkMask;
157 
158   /// True if the main range of the currently coalesced intervals should be
159   /// checked for smaller live intervals.
160   bool ShrinkMainRange = false;
161 
162   /// True if the coalescer should aggressively coalesce global copies
163   /// in favor of keeping local copies.
164   bool JoinGlobalCopies = false;
165 
166   /// True if the coalescer should aggressively coalesce fall-thru
167   /// blocks exclusively containing copies.
168   bool JoinSplitEdges = false;
169 
170   /// Copy instructions yet to be coalesced.
171   SmallVector<MachineInstr *, 8> WorkList;
172   SmallVector<MachineInstr *, 8> LocalWorkList;
173 
174   /// Set of instruction pointers that have been erased, and
175   /// that may be present in WorkList.
176   SmallPtrSet<MachineInstr *, 8> ErasedInstrs;
177 
178   /// Dead instructions that are about to be deleted.
179   SmallVector<MachineInstr *, 8> DeadDefs;
180 
181   /// Virtual registers to be considered for register class inflation.
182   SmallVector<Register, 8> InflateRegs;
183 
184   /// The collection of live intervals which should have been updated
185   /// immediately after rematerialiation but delayed until
186   /// lateLiveIntervalUpdate is called.
187   DenseSet<Register> ToBeUpdated;
188 
189   /// Record how many times the large live interval with many valnos
190   /// has been tried to join with other live interval.
191   DenseMap<Register, unsigned long> LargeLIVisitCounter;
192 
193   /// Recursively eliminate dead defs in DeadDefs.
194   void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
195 
196   /// LiveRangeEdit callback for eliminateDeadDefs().
197   void LRE_WillEraseInstruction(MachineInstr *MI) override;
198 
199   /// Coalesce the LocalWorkList.
200   void coalesceLocals();
201 
202   /// Join compatible live intervals
203   void joinAllIntervals();
204 
205   /// Coalesce copies in the specified MBB, putting
206   /// copies that cannot yet be coalesced into WorkList.
207   void copyCoalesceInMBB(MachineBasicBlock *MBB);
208 
209   /// Tries to coalesce all copies in CurrList. Returns true if any progress
210   /// was made.
211   bool copyCoalesceWorkList(MutableArrayRef<MachineInstr *> CurrList);
212 
213   /// If one def has many copy like uses, and those copy uses are all
214   /// rematerialized, the live interval update needed for those
215   /// rematerializations will be delayed and done all at once instead
216   /// of being done multiple times. This is to save compile cost because
217   /// live interval update is costly.
218   void lateLiveIntervalUpdate();
219 
220   /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
221   /// has no value defined in the predecessors. If the incoming value is the
222   /// same as defined by the copy itself, the value is considered undefined.
223   bool copyValueUndefInPredecessors(LiveRange &S, const MachineBasicBlock *MBB,
224                                     LiveQueryResult SLRQ);
225 
226   /// Set necessary undef flags on subregister uses after pruning out undef
227   /// lane segments from the subrange.
228   void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
229                                   LaneBitmask PrunedLanes);
230 
231   /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
232   /// src/dst of the copy instruction CopyMI.  This returns true if the copy
233   /// was successfully coalesced away. If it is not currently possible to
234   /// coalesce this interval, but it may be possible if other things get
235   /// coalesced, then it returns true by reference in 'Again'.
236   bool joinCopy(MachineInstr *CopyMI, bool &Again,
237                 SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs);
238 
239   /// Attempt to join these two intervals.  On failure, this
240   /// returns false.  The output "SrcInt" will not have been modified, so we
241   /// can use this information below to update aliases.
242   bool joinIntervals(CoalescerPair &CP);
243 
244   /// Attempt joining two virtual registers. Return true on success.
245   bool joinVirtRegs(CoalescerPair &CP);
246 
247   /// If a live interval has many valnos and is coalesced with other
248   /// live intervals many times, we regard such live interval as having
249   /// high compile time cost.
250   bool isHighCostLiveInterval(LiveInterval &LI);
251 
252   /// Attempt joining with a reserved physreg.
253   bool joinReservedPhysReg(CoalescerPair &CP);
254 
255   /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
256   /// Subranges in @p LI which only partially interfere with the desired
257   /// LaneMask are split as necessary. @p LaneMask are the lanes that
258   /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
259   /// lanemasks already adjusted to the coalesced register.
260   void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
261                          LaneBitmask LaneMask, CoalescerPair &CP,
262                          unsigned DstIdx);
263 
264   /// Join the liveranges of two subregisters. Joins @p RRange into
265   /// @p LRange, @p RRange may be invalid afterwards.
266   void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
267                         LaneBitmask LaneMask, const CoalescerPair &CP);
268 
269   /// We found a non-trivially-coalescable copy. If the source value number is
270   /// defined by a copy from the destination reg see if we can merge these two
271   /// destination reg valno# into a single value number, eliminating a copy.
272   /// This returns true if an interval was modified.
273   bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
274 
275   /// Return true if there are definitions of IntB
276   /// other than BValNo val# that can reach uses of AValno val# of IntA.
277   bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
278                             VNInfo *AValNo, VNInfo *BValNo);
279 
280   /// We found a non-trivially-coalescable copy.
281   /// If the source value number is defined by a commutable instruction and
282   /// its other operand is coalesced to the copy dest register, see if we
283   /// can transform the copy into a noop by commuting the definition.
284   /// This returns a pair of two flags:
285   /// - the first element is true if an interval was modified,
286   /// - the second element is true if the destination interval needs
287   ///   to be shrunk after deleting the copy.
288   std::pair<bool, bool> removeCopyByCommutingDef(const CoalescerPair &CP,
289                                                  MachineInstr *CopyMI);
290 
291   /// We found a copy which can be moved to its less frequent predecessor.
292   bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
293 
294   /// If the source of a copy is defined by a
295   /// trivial computation, replace the copy by rematerialize the definition.
296   bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
297                                bool &IsDefCopy);
298 
299   /// Return true if a copy involving a physreg should be joined.
300   bool canJoinPhys(const CoalescerPair &CP);
301 
302   /// Replace all defs and uses of SrcReg to DstReg and update the subregister
303   /// number if it is not zero. If DstReg is a physical register and the
304   /// existing subregister number of the def / use being updated is not zero,
305   /// make sure to set it to the correct physical subregister.
306   void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
307 
308   /// If the given machine operand reads only undefined lanes add an undef
309   /// flag.
310   /// This can happen when undef uses were previously concealed by a copy
311   /// which we coalesced. Example:
312   ///    %0:sub0<def,read-undef> = ...
313   ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
314   ///       = use %1:sub1       <-- hidden undef use
315   void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
316                     MachineOperand &MO, unsigned SubRegIdx);
317 
318   /// Handle copies of undef values. If the undef value is an incoming
319   /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
320   /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
321   /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
322   MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
323 
324   /// Check whether or not we should apply the terminal rule on the
325   /// destination (Dst) of \p Copy.
326   /// When the terminal rule applies, Copy is not profitable to
327   /// coalesce.
328   /// Dst is terminal if it has exactly one affinity (Dst, Src) and
329   /// at least one interference (Dst, Dst2). If Dst is terminal, the
330   /// terminal rule consists in checking that at least one of
331   /// interfering node, say Dst2, has an affinity of equal or greater
332   /// weight with Src.
333   /// In that case, Dst2 and Dst will not be able to be both coalesced
334   /// with Src. Since Dst2 exposes more coalescing opportunities than
335   /// Dst, we can drop \p Copy.
336   bool applyTerminalRule(const MachineInstr &Copy) const;
337 
338   /// Wrapper method for \see LiveIntervals::shrinkToUses.
339   /// This method does the proper fixing of the live-ranges when the afore
340   /// mentioned method returns true.
341   void shrinkToUses(LiveInterval *LI,
342                     SmallVectorImpl<MachineInstr *> *Dead = nullptr) {
343     NumShrinkToUses++;
344     if (LIS->shrinkToUses(LI, Dead)) {
345       /// Check whether or not \p LI is composed by multiple connected
346       /// components and if that is the case, fix that.
347       SmallVector<LiveInterval *, 8> SplitLIs;
348       LIS->splitSeparateComponents(*LI, SplitLIs);
349     }
350   }
351 
352   /// Wrapper Method to do all the necessary work when an Instruction is
353   /// deleted.
354   /// Optimizations should use this to make sure that deleted instructions
355   /// are always accounted for.
356   void deleteInstr(MachineInstr *MI) {
357     ErasedInstrs.insert(MI);
358     LIS->RemoveMachineInstrFromMaps(*MI);
359     MI->eraseFromParent();
360   }
361 
362   /// Walk over function and initialize the DbgVRegToValues map.
363   void buildVRegToDbgValueMap(MachineFunction &MF);
364 
365   /// Test whether, after merging, any DBG_VALUEs would refer to a
366   /// different value number than before merging, and whether this can
367   /// be resolved. If not, mark the DBG_VALUE as being undef.
368   void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
369                                     JoinVals &LHSVals, LiveRange &RHS,
370                                     JoinVals &RHSVals);
371 
372   void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
373                                         LiveRange &RegRange, JoinVals &Vals2);
374 
375 public:
376   static char ID; ///< Class identification, replacement for typeinfo
377 
378   RegisterCoalescer() : MachineFunctionPass(ID) {
379     initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
380   }
381 
382   void getAnalysisUsage(AnalysisUsage &AU) const override;
383 
384   MachineFunctionProperties getClearedProperties() const override {
385     return MachineFunctionProperties().set(
386         MachineFunctionProperties::Property::IsSSA);
387   }
388 
389   void releaseMemory() override;
390 
391   /// This is the pass entry point.
392   bool runOnMachineFunction(MachineFunction &) override;
393 
394   /// Implement the dump method.
395   void print(raw_ostream &O, const Module * = nullptr) const override;
396 };
397 
398 } // end anonymous namespace
399 
400 char RegisterCoalescer::ID = 0;
401 
402 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
403 
404 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
405                       "Register Coalescer", false, false)
406 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
407 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
408 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
409 INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
410                     "Register Coalescer", false, false)
411 
412 [[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
413                                       const MachineInstr *MI, Register &Src,
414                                       Register &Dst, unsigned &SrcSub,
415                                       unsigned &DstSub) {
416   if (MI->isCopy()) {
417     Dst = MI->getOperand(0).getReg();
418     DstSub = MI->getOperand(0).getSubReg();
419     Src = MI->getOperand(1).getReg();
420     SrcSub = MI->getOperand(1).getSubReg();
421   } else if (MI->isSubregToReg()) {
422     Dst = MI->getOperand(0).getReg();
423     DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
424                                       MI->getOperand(3).getImm());
425     Src = MI->getOperand(2).getReg();
426     SrcSub = MI->getOperand(2).getSubReg();
427   } else
428     return false;
429   return true;
430 }
431 
432 /// Return true if this block should be vacated by the coalescer to eliminate
433 /// branches. The important cases to handle in the coalescer are critical edges
434 /// split during phi elimination which contain only copies. Simple blocks that
435 /// contain non-branches should also be vacated, but this can be handled by an
436 /// earlier pass similar to early if-conversion.
437 static bool isSplitEdge(const MachineBasicBlock *MBB) {
438   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
439     return false;
440 
441   for (const auto &MI : *MBB) {
442     if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
443       return false;
444   }
445   return true;
446 }
447 
448 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
449   SrcReg = DstReg = Register();
450   SrcIdx = DstIdx = 0;
451   NewRC = nullptr;
452   Flipped = CrossClass = false;
453 
454   Register Src, Dst;
455   unsigned SrcSub = 0, DstSub = 0;
456   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
457     return false;
458   Partial = SrcSub || DstSub;
459 
460   // If one register is a physreg, it must be Dst.
461   if (Src.isPhysical()) {
462     if (Dst.isPhysical())
463       return false;
464     std::swap(Src, Dst);
465     std::swap(SrcSub, DstSub);
466     Flipped = true;
467   }
468 
469   const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
470 
471   if (Dst.isPhysical()) {
472     // Eliminate DstSub on a physreg.
473     if (DstSub) {
474       Dst = TRI.getSubReg(Dst, DstSub);
475       if (!Dst)
476         return false;
477       DstSub = 0;
478     }
479 
480     // Eliminate SrcSub by picking a corresponding Dst superregister.
481     if (SrcSub) {
482       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
483       if (!Dst)
484         return false;
485     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
486       return false;
487     }
488   } else {
489     // Both registers are virtual.
490     const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
491     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
492 
493     // Both registers have subreg indices.
494     if (SrcSub && DstSub) {
495       // Copies between different sub-registers are never coalescable.
496       if (Src == Dst && SrcSub != DstSub)
497         return false;
498 
499       NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, SrcIdx,
500                                          DstIdx);
501       if (!NewRC)
502         return false;
503     } else if (DstSub) {
504       // SrcReg will be merged with a sub-register of DstReg.
505       SrcIdx = DstSub;
506       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
507     } else if (SrcSub) {
508       // DstReg will be merged with a sub-register of SrcReg.
509       DstIdx = SrcSub;
510       NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
511     } else {
512       // This is a straight copy without sub-registers.
513       NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
514     }
515 
516     // The combined constraint may be impossible to satisfy.
517     if (!NewRC)
518       return false;
519 
520     // Prefer SrcReg to be a sub-register of DstReg.
521     // FIXME: Coalescer should support subregs symmetrically.
522     if (DstIdx && !SrcIdx) {
523       std::swap(Src, Dst);
524       std::swap(SrcIdx, DstIdx);
525       Flipped = !Flipped;
526     }
527 
528     CrossClass = NewRC != DstRC || NewRC != SrcRC;
529   }
530   // Check our invariants
531   assert(Src.isVirtual() && "Src must be virtual");
532   assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
533   SrcReg = Src;
534   DstReg = Dst;
535   return true;
536 }
537 
538 bool CoalescerPair::flip() {
539   if (DstReg.isPhysical())
540     return false;
541   std::swap(SrcReg, DstReg);
542   std::swap(SrcIdx, DstIdx);
543   Flipped = !Flipped;
544   return true;
545 }
546 
547 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
548   if (!MI)
549     return false;
550   Register Src, Dst;
551   unsigned SrcSub = 0, DstSub = 0;
552   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
553     return false;
554 
555   // Find the virtual register that is SrcReg.
556   if (Dst == SrcReg) {
557     std::swap(Src, Dst);
558     std::swap(SrcSub, DstSub);
559   } else if (Src != SrcReg) {
560     return false;
561   }
562 
563   // Now check that Dst matches DstReg.
564   if (DstReg.isPhysical()) {
565     if (!Dst.isPhysical())
566       return false;
567     assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
568     // DstSub could be set for a physreg from INSERT_SUBREG.
569     if (DstSub)
570       Dst = TRI.getSubReg(Dst, DstSub);
571     // Full copy of Src.
572     if (!SrcSub)
573       return DstReg == Dst;
574     // This is a partial register copy. Check that the parts match.
575     return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
576   } else {
577     // DstReg is virtual.
578     if (DstReg != Dst)
579       return false;
580     // Registers match, do the subregisters line up?
581     return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
582            TRI.composeSubRegIndices(DstIdx, DstSub);
583   }
584 }
585 
586 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
587   AU.setPreservesCFG();
588   AU.addRequired<LiveIntervalsWrapperPass>();
589   AU.addPreserved<LiveIntervalsWrapperPass>();
590   AU.addPreserved<SlotIndexesWrapperPass>();
591   AU.addRequired<MachineLoopInfoWrapperPass>();
592   AU.addPreserved<MachineLoopInfoWrapperPass>();
593   AU.addPreservedID(MachineDominatorsID);
594   MachineFunctionPass::getAnalysisUsage(AU);
595 }
596 
597 void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
598   if (Edit) {
599     Edit->eliminateDeadDefs(DeadDefs);
600     return;
601   }
602   SmallVector<Register, 8> NewRegs;
603   LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this)
604       .eliminateDeadDefs(DeadDefs);
605 }
606 
607 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
608   // MI may be in WorkList. Make sure we don't visit it.
609   ErasedInstrs.insert(MI);
610 }
611 
612 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
613                                              MachineInstr *CopyMI) {
614   assert(!CP.isPartial() && "This doesn't work for partial copies.");
615   assert(!CP.isPhys() && "This doesn't work for physreg copies.");
616 
617   LiveInterval &IntA =
618       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
619   LiveInterval &IntB =
620       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
621   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
622 
623   // We have a non-trivially-coalescable copy with IntA being the source and
624   // IntB being the dest, thus this defines a value number in IntB.  If the
625   // source value number (in IntA) is defined by a copy from B, see if we can
626   // merge these two pieces of B into a single value number, eliminating a copy.
627   // For example:
628   //
629   //  A3 = B0
630   //    ...
631   //  B1 = A3      <- this copy
632   //
633   // In this case, B0 can be extended to where the B1 copy lives, allowing the
634   // B1 value number to be replaced with B0 (which simplifies the B
635   // liveinterval).
636 
637   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
638   // the example above.
639   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
640   if (BS == IntB.end())
641     return false;
642   VNInfo *BValNo = BS->valno;
643 
644   // Get the location that B is defined at.  Two options: either this value has
645   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
646   // can't process it.
647   if (BValNo->def != CopyIdx)
648     return false;
649 
650   // AValNo is the value number in A that defines the copy, A3 in the example.
651   SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
652   LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
653   // The live segment might not exist after fun with physreg coalescing.
654   if (AS == IntA.end())
655     return false;
656   VNInfo *AValNo = AS->valno;
657 
658   // If AValNo is defined as a copy from IntB, we can potentially process this.
659   // Get the instruction that defines this value number.
660   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
661   // Don't allow any partial copies, even if isCoalescable() allows them.
662   if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
663     return false;
664 
665   // Get the Segment in IntB that this value number starts with.
666   LiveInterval::iterator ValS =
667       IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
668   if (ValS == IntB.end())
669     return false;
670 
671   // Make sure that the end of the live segment is inside the same block as
672   // CopyMI.
673   MachineInstr *ValSEndInst =
674       LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
675   if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
676     return false;
677 
678   // Okay, we now know that ValS ends in the same block that the CopyMI
679   // live-range starts.  If there are no intervening live segments between them
680   // in IntB, we can merge them.
681   if (ValS + 1 != BS)
682     return false;
683 
684   LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
685 
686   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
687   // We are about to delete CopyMI, so need to remove it as the 'instruction
688   // that defines this value #'. Update the valnum with the new defining
689   // instruction #.
690   BValNo->def = FillerStart;
691 
692   // Okay, we can merge them.  We need to insert a new liverange:
693   // [ValS.end, BS.begin) of either value number, then we merge the
694   // two value numbers.
695   IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
696 
697   // Okay, merge "B1" into the same value number as "B0".
698   if (BValNo != ValS->valno)
699     IntB.MergeValueNumberInto(BValNo, ValS->valno);
700 
701   // Do the same for the subregister segments.
702   for (LiveInterval::SubRange &S : IntB.subranges()) {
703     // Check for SubRange Segments of the form [1234r,1234d:0) which can be
704     // removed to prevent creating bogus SubRange Segments.
705     LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
706     if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
707       S.removeSegment(*SS, true);
708       continue;
709     }
710     // The subrange may have ended before FillerStart. If so, extend it.
711     if (!S.getVNInfoAt(FillerStart)) {
712       SlotIndex BBStart =
713           LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
714       S.extendInBlock(BBStart, FillerStart);
715     }
716     VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
717     S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
718     VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
719     if (SubBValNo != SubValSNo)
720       S.MergeValueNumberInto(SubBValNo, SubValSNo);
721   }
722 
723   LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
724 
725   // If the source instruction was killing the source register before the
726   // merge, unset the isKill marker given the live range has been extended.
727   int UIdx =
728       ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), /*TRI=*/nullptr, true);
729   if (UIdx != -1) {
730     ValSEndInst->getOperand(UIdx).setIsKill(false);
731   }
732 
733   // Rewrite the copy.
734   CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
735   // If the copy instruction was killing the destination register or any
736   // subrange before the merge trim the live range.
737   bool RecomputeLiveRange = AS->end == CopyIdx;
738   if (!RecomputeLiveRange) {
739     for (LiveInterval::SubRange &S : IntA.subranges()) {
740       LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
741       if (SS != S.end() && SS->end == CopyIdx) {
742         RecomputeLiveRange = true;
743         break;
744       }
745     }
746   }
747   if (RecomputeLiveRange)
748     shrinkToUses(&IntA);
749 
750   ++numExtends;
751   return true;
752 }
753 
754 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
755                                              LiveInterval &IntB, VNInfo *AValNo,
756                                              VNInfo *BValNo) {
757   // If AValNo has PHI kills, conservatively assume that IntB defs can reach
758   // the PHI values.
759   if (LIS->hasPHIKill(IntA, AValNo))
760     return true;
761 
762   for (LiveRange::Segment &ASeg : IntA.segments) {
763     if (ASeg.valno != AValNo)
764       continue;
765     LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
766     if (BI != IntB.begin())
767       --BI;
768     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
769       if (BI->valno == BValNo)
770         continue;
771       if (BI->start <= ASeg.start && BI->end > ASeg.start)
772         return true;
773       if (BI->start > ASeg.start && BI->start < ASeg.end)
774         return true;
775     }
776   }
777   return false;
778 }
779 
780 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
781 /// range @Dst and use value number @p DstValNo there.
782 static std::pair<bool, bool> addSegmentsWithValNo(LiveRange &Dst,
783                                                   VNInfo *DstValNo,
784                                                   const LiveRange &Src,
785                                                   const VNInfo *SrcValNo) {
786   bool Changed = false;
787   bool MergedWithDead = false;
788   for (const LiveRange::Segment &S : Src.segments) {
789     if (S.valno != SrcValNo)
790       continue;
791     // This is adding a segment from Src that ends in a copy that is about
792     // to be removed. This segment is going to be merged with a pre-existing
793     // segment in Dst. This works, except in cases when the corresponding
794     // segment in Dst is dead. For example: adding [192r,208r:1) from Src
795     // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
796     // Recognized such cases, so that the segments can be shrunk.
797     LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
798     LiveRange::Segment &Merged = *Dst.addSegment(Added);
799     if (Merged.end.isDead())
800       MergedWithDead = true;
801     Changed = true;
802   }
803   return std::make_pair(Changed, MergedWithDead);
804 }
805 
806 std::pair<bool, bool>
807 RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
808                                             MachineInstr *CopyMI) {
809   assert(!CP.isPhys());
810 
811   LiveInterval &IntA =
812       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
813   LiveInterval &IntB =
814       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
815 
816   // We found a non-trivially-coalescable copy with IntA being the source and
817   // IntB being the dest, thus this defines a value number in IntB.  If the
818   // source value number (in IntA) is defined by a commutable instruction and
819   // its other operand is coalesced to the copy dest register, see if we can
820   // transform the copy into a noop by commuting the definition. For example,
821   //
822   //  A3 = op A2 killed B0
823   //    ...
824   //  B1 = A3      <- this copy
825   //    ...
826   //     = op A3   <- more uses
827   //
828   // ==>
829   //
830   //  B2 = op B0 killed A2
831   //    ...
832   //  B1 = B2      <- now an identity copy
833   //    ...
834   //     = op B2   <- more uses
835 
836   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
837   // the example above.
838   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
839   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
840   assert(BValNo != nullptr && BValNo->def == CopyIdx);
841 
842   // AValNo is the value number in A that defines the copy, A3 in the example.
843   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
844   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
845   if (AValNo->isPHIDef())
846     return {false, false};
847   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
848   if (!DefMI)
849     return {false, false};
850   if (!DefMI->isCommutable())
851     return {false, false};
852   // If DefMI is a two-address instruction then commuting it will change the
853   // destination register.
854   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg(), /*TRI=*/nullptr);
855   assert(DefIdx != -1);
856   unsigned UseOpIdx;
857   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
858     return {false, false};
859 
860   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
861   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
862   // passed to the method. That _other_ operand is chosen by
863   // the findCommutedOpIndices() method.
864   //
865   // That is obviously an area for improvement in case of instructions having
866   // more than 2 operands. For example, if some instruction has 3 commutable
867   // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
868   // op#2<->op#3) of commute transformation should be considered/tried here.
869   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
870   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
871     return {false, false};
872 
873   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
874   Register NewReg = NewDstMO.getReg();
875   if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
876     return {false, false};
877 
878   // Make sure there are no other definitions of IntB that would reach the
879   // uses which the new definition can reach.
880   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
881     return {false, false};
882 
883   // If some of the uses of IntA.reg is already coalesced away, return false.
884   // It's not possible to determine whether it's safe to perform the coalescing.
885   for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
886     MachineInstr *UseMI = MO.getParent();
887     unsigned OpNo = &MO - &UseMI->getOperand(0);
888     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
889     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
890     if (US == IntA.end() || US->valno != AValNo)
891       continue;
892     // If this use is tied to a def, we can't rewrite the register.
893     if (UseMI->isRegTiedToDefOperand(OpNo))
894       return {false, false};
895   }
896 
897   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
898                     << *DefMI);
899 
900   // At this point we have decided that it is legal to do this
901   // transformation.  Start by commuting the instruction.
902   MachineBasicBlock *MBB = DefMI->getParent();
903   MachineInstr *NewMI =
904       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
905   if (!NewMI)
906     return {false, false};
907   if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
908       !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
909     return {false, false};
910   if (NewMI != DefMI) {
911     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
912     MachineBasicBlock::iterator Pos = DefMI;
913     MBB->insert(Pos, NewMI);
914     MBB->erase(DefMI);
915   }
916 
917   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
918   // A = or A, B
919   // ...
920   // B = A
921   // ...
922   // C = killed A
923   // ...
924   //   = B
925 
926   // Update uses of IntA of the specific Val# with IntB.
927   for (MachineOperand &UseMO :
928        llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
929     if (UseMO.isUndef())
930       continue;
931     MachineInstr *UseMI = UseMO.getParent();
932     if (UseMI->isDebugInstr()) {
933       // FIXME These don't have an instruction index.  Not clear we have enough
934       // info to decide whether to do this replacement or not.  For now do it.
935       UseMO.setReg(NewReg);
936       continue;
937     }
938     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
939     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
940     assert(US != IntA.end() && "Use must be live");
941     if (US->valno != AValNo)
942       continue;
943     // Kill flags are no longer accurate. They are recomputed after RA.
944     UseMO.setIsKill(false);
945     if (NewReg.isPhysical())
946       UseMO.substPhysReg(NewReg, *TRI);
947     else
948       UseMO.setReg(NewReg);
949     if (UseMI == CopyMI)
950       continue;
951     if (!UseMI->isCopy())
952       continue;
953     if (UseMI->getOperand(0).getReg() != IntB.reg() ||
954         UseMI->getOperand(0).getSubReg())
955       continue;
956 
957     // This copy will become a noop. If it's defining a new val#, merge it into
958     // BValNo.
959     SlotIndex DefIdx = UseIdx.getRegSlot();
960     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
961     if (!DVNI)
962       continue;
963     LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
964     assert(DVNI->def == DefIdx);
965     BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
966     for (LiveInterval::SubRange &S : IntB.subranges()) {
967       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
968       if (!SubDVNI)
969         continue;
970       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
971       assert(SubBValNo->def == CopyIdx);
972       S.MergeValueNumberInto(SubDVNI, SubBValNo);
973     }
974 
975     deleteInstr(UseMI);
976   }
977 
978   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
979   // is updated.
980   bool ShrinkB = false;
981   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
982   if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
983     if (!IntA.hasSubRanges()) {
984       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
985       IntA.createSubRangeFrom(Allocator, Mask, IntA);
986     } else if (!IntB.hasSubRanges()) {
987       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
988       IntB.createSubRangeFrom(Allocator, Mask, IntB);
989     }
990     SlotIndex AIdx = CopyIdx.getRegSlot(true);
991     LaneBitmask MaskA;
992     const SlotIndexes &Indexes = *LIS->getSlotIndexes();
993     for (LiveInterval::SubRange &SA : IntA.subranges()) {
994       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
995       // Even if we are dealing with a full copy, some lanes can
996       // still be undefined.
997       // E.g.,
998       // undef A.subLow = ...
999       // B = COPY A <== A.subHigh is undefined here and does
1000       //                not have a value number.
1001       if (!ASubValNo)
1002         continue;
1003       MaskA |= SA.LaneMask;
1004 
1005       IntB.refineSubRanges(
1006           Allocator, SA.LaneMask,
1007           [&Allocator, &SA, CopyIdx, ASubValNo,
1008            &ShrinkB](LiveInterval::SubRange &SR) {
1009             VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1010                                            : SR.getVNInfoAt(CopyIdx);
1011             assert(BSubValNo != nullptr);
1012             auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1013             ShrinkB |= P.second;
1014             if (P.first)
1015               BSubValNo->def = ASubValNo->def;
1016           },
1017           Indexes, *TRI);
1018     }
1019     // Go over all subranges of IntB that have not been covered by IntA,
1020     // and delete the segments starting at CopyIdx. This can happen if
1021     // IntA has undef lanes that are defined in IntB.
1022     for (LiveInterval::SubRange &SB : IntB.subranges()) {
1023       if ((SB.LaneMask & MaskA).any())
1024         continue;
1025       if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1026         if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1027           SB.removeSegment(*S, true);
1028     }
1029   }
1030 
1031   BValNo->def = AValNo->def;
1032   auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1033   ShrinkB |= P.second;
1034   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1035 
1036   LIS->removeVRegDefAt(IntA, AValNo->def);
1037 
1038   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
1039   ++numCommutes;
1040   return {true, ShrinkB};
1041 }
1042 
1043 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1044 /// predecessor of BB2, and if B is not redefined on the way from A = B
1045 /// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1046 /// execution goes through the path from BB0 to BB2. We may move B = A
1047 /// to the predecessor without such reversed copy.
1048 /// So we will transform the program from:
1049 ///   BB0:
1050 ///      A = B;    BB1:
1051 ///       ...         ...
1052 ///     /     \      /
1053 ///             BB2:
1054 ///               ...
1055 ///               B = A;
1056 ///
1057 /// to:
1058 ///
1059 ///   BB0:         BB1:
1060 ///      A = B;        ...
1061 ///       ...          B = A;
1062 ///     /     \       /
1063 ///             BB2:
1064 ///               ...
1065 ///
1066 /// A special case is when BB0 and BB2 are the same BB which is the only
1067 /// BB in a loop:
1068 ///   BB1:
1069 ///        ...
1070 ///   BB0/BB2:  ----
1071 ///        B = A;   |
1072 ///        ...      |
1073 ///        A = B;   |
1074 ///          |-------
1075 ///          |
1076 /// We may hoist B = A from BB0/BB2 to BB1.
1077 ///
1078 /// The major preconditions for correctness to remove such partial
1079 /// redundancy include:
1080 /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1081 ///    the PHI is defined by the reversed copy A = B in BB0.
1082 /// 2. No B is referenced from the start of BB2 to B = A.
1083 /// 3. No B is defined from A = B to the end of BB0.
1084 /// 4. BB1 has only one successor.
1085 ///
1086 /// 2 and 4 implicitly ensure B is not live at the end of BB1.
1087 /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1088 /// colder place, which not only prevent endless loop, but also make sure
1089 /// the movement of copy is beneficial.
1090 bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1091                                                 MachineInstr &CopyMI) {
1092   assert(!CP.isPhys());
1093   if (!CopyMI.isFullCopy())
1094     return false;
1095 
1096   MachineBasicBlock &MBB = *CopyMI.getParent();
1097   // If this block is the target of an invoke/inlineasm_br, moving the copy into
1098   // the predecessor is tricker, and we don't handle it.
1099   if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget())
1100     return false;
1101 
1102   if (MBB.pred_size() != 2)
1103     return false;
1104 
1105   LiveInterval &IntA =
1106       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1107   LiveInterval &IntB =
1108       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1109 
1110   // A is defined by PHI at the entry of MBB.
1111   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1112   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1113   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1114   if (!AValNo->isPHIDef())
1115     return false;
1116 
1117   // No B is referenced before CopyMI in MBB.
1118   if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1119     return false;
1120 
1121   // MBB has two predecessors: one contains A = B so no copy will be inserted
1122   // for it. The other one will have a copy moved from MBB.
1123   bool FoundReverseCopy = false;
1124   MachineBasicBlock *CopyLeftBB = nullptr;
1125   for (MachineBasicBlock *Pred : MBB.predecessors()) {
1126     VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1127     MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
1128     if (!DefMI || !DefMI->isFullCopy()) {
1129       CopyLeftBB = Pred;
1130       continue;
1131     }
1132     // Check DefMI is a reverse copy and it is in BB Pred.
1133     if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1134         DefMI->getOperand(1).getReg() != IntB.reg() ||
1135         DefMI->getParent() != Pred) {
1136       CopyLeftBB = Pred;
1137       continue;
1138     }
1139     // If there is any other def of B after DefMI and before the end of Pred,
1140     // we need to keep the copy of B = A at the end of Pred if we remove
1141     // B = A from MBB.
1142     bool ValB_Changed = false;
1143     for (auto *VNI : IntB.valnos) {
1144       if (VNI->isUnused())
1145         continue;
1146       if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1147         ValB_Changed = true;
1148         break;
1149       }
1150     }
1151     if (ValB_Changed) {
1152       CopyLeftBB = Pred;
1153       continue;
1154     }
1155     FoundReverseCopy = true;
1156   }
1157 
1158   // If no reverse copy is found in predecessors, nothing to do.
1159   if (!FoundReverseCopy)
1160     return false;
1161 
1162   // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1163   // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1164   // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1165   // update IntA/IntB.
1166   //
1167   // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1168   // MBB is hotter than CopyLeftBB.
1169   if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1170     return false;
1171 
1172   // Now (almost sure it's) ok to move copy.
1173   if (CopyLeftBB) {
1174     // Position in CopyLeftBB where we should insert new copy.
1175     auto InsPos = CopyLeftBB->getFirstTerminator();
1176 
1177     // Make sure that B isn't referenced in the terminators (if any) at the end
1178     // of the predecessor since we're about to insert a new definition of B
1179     // before them.
1180     if (InsPos != CopyLeftBB->end()) {
1181       SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1182       if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1183         return false;
1184     }
1185 
1186     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1187                       << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1188 
1189     // Insert new copy to CopyLeftBB.
1190     MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1191                                       TII->get(TargetOpcode::COPY), IntB.reg())
1192                                   .addReg(IntA.reg());
1193     SlotIndex NewCopyIdx =
1194         LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1195     IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1196     for (LiveInterval::SubRange &SR : IntB.subranges())
1197       SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1198 
1199     // If the newly created Instruction has an address of an instruction that
1200     // was deleted before (object recycled by the allocator) it needs to be
1201     // removed from the deleted list.
1202     ErasedInstrs.erase(NewCopyMI);
1203   } else {
1204     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1205                       << printMBBReference(MBB) << '\t' << CopyMI);
1206   }
1207 
1208   const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
1209 
1210   // Remove CopyMI.
1211   // Note: This is fine to remove the copy before updating the live-ranges.
1212   // While updating the live-ranges, we only look at slot indices and
1213   // never go back to the instruction.
1214   // Mark instructions as deleted.
1215   deleteInstr(&CopyMI);
1216 
1217   // Update the liveness.
1218   SmallVector<SlotIndex, 8> EndPoints;
1219   VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1220   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1221                   &EndPoints);
1222   BValNo->markUnused();
1223 
1224   if (IsUndefCopy) {
1225     // We're introducing an undef phi def, and need to set undef on any users of
1226     // the previously local def to avoid artifically extending the lifetime
1227     // through the block.
1228     for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
1229       const MachineInstr &MI = *MO.getParent();
1230       SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1231       if (!IntB.liveAt(UseIdx))
1232         MO.setIsUndef(true);
1233     }
1234   }
1235 
1236   // Extend IntB to the EndPoints of its original live interval.
1237   LIS->extendToIndices(IntB, EndPoints);
1238 
1239   // Now, do the same for its subranges.
1240   for (LiveInterval::SubRange &SR : IntB.subranges()) {
1241     EndPoints.clear();
1242     VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1243     assert(BValNo && "All sublanes should be live");
1244     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1245     BValNo->markUnused();
1246     // We can have a situation where the result of the original copy is live,
1247     // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1248     // the copy appear as an endpoint from pruneValue(), but we don't want it
1249     // to because the copy has been removed.  We can go ahead and remove that
1250     // endpoint; there is no other situation here that there could be a use at
1251     // the same place as we know that the copy is a full copy.
1252     for (unsigned I = 0; I != EndPoints.size();) {
1253       if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1254         EndPoints[I] = EndPoints.back();
1255         EndPoints.pop_back();
1256         continue;
1257       }
1258       ++I;
1259     }
1260     SmallVector<SlotIndex, 8> Undefs;
1261     IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1262                                *LIS->getSlotIndexes());
1263     LIS->extendToIndices(SR, EndPoints, Undefs);
1264   }
1265   // If any dead defs were extended, truncate them.
1266   shrinkToUses(&IntB);
1267 
1268   // Finally, update the live-range of IntA.
1269   shrinkToUses(&IntA);
1270   return true;
1271 }
1272 
1273 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1274 /// defining a subregister.
1275 static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1276   assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1277 
1278   for (const MachineOperand &Op : MI.all_defs()) {
1279     if (Op.getReg() != Reg)
1280       continue;
1281     // Return true if we define the full register or don't care about the value
1282     // inside other subregisters.
1283     if (Op.getSubReg() == 0 || Op.isUndef())
1284       return true;
1285   }
1286   return false;
1287 }
1288 
1289 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1290                                                 MachineInstr *CopyMI,
1291                                                 bool &IsDefCopy) {
1292   IsDefCopy = false;
1293   Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1294   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1295   Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1296   unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1297   if (SrcReg.isPhysical())
1298     return false;
1299 
1300   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1301   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1302   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1303   if (!ValNo)
1304     return false;
1305   if (ValNo->isPHIDef() || ValNo->isUnused())
1306     return false;
1307   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
1308   if (!DefMI)
1309     return false;
1310   if (DefMI->isCopyLike()) {
1311     IsDefCopy = true;
1312     return false;
1313   }
1314   if (!TII->isAsCheapAsAMove(*DefMI))
1315     return false;
1316 
1317   SmallVector<Register, 8> NewRegs;
1318   LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1319   if (!Edit.checkRematerializable(ValNo, DefMI))
1320     return false;
1321 
1322   if (!definesFullReg(*DefMI, SrcReg))
1323     return false;
1324   bool SawStore = false;
1325   if (!DefMI->isSafeToMove(SawStore))
1326     return false;
1327   const MCInstrDesc &MCID = DefMI->getDesc();
1328   if (MCID.getNumDefs() != 1)
1329     return false;
1330 
1331   // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1332   // the register substantially (beyond both source and dest size). This is bad
1333   // for performance since it can cascade through a function, introducing many
1334   // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1335   // around after a few subreg copies).
1336   if (SrcIdx && DstIdx)
1337     return false;
1338 
1339   // Only support subregister destinations when the def is read-undef.
1340   MachineOperand &DstOperand = CopyMI->getOperand(0);
1341   Register CopyDstReg = DstOperand.getReg();
1342   if (DstOperand.getSubReg() && !DstOperand.isUndef())
1343     return false;
1344 
1345   // In the physical register case, checking that the def is read-undef is not
1346   // enough. We're widening the def and need to avoid clobbering other live
1347   // values in the unused register pieces.
1348   //
1349   // TODO: Targets may support rewriting the rematerialized instruction to only
1350   // touch relevant lanes, in which case we don't need any liveness check.
1351   if (CopyDstReg.isPhysical() && CP.isPartial()) {
1352     for (MCRegUnit Unit : TRI->regunits(DstReg)) {
1353       // Ignore the register units we are writing anyway.
1354       if (is_contained(TRI->regunits(CopyDstReg), Unit))
1355         continue;
1356 
1357       // Check if the other lanes we are defining are live at the
1358       // rematerialization point.
1359       LiveRange &LR = LIS->getRegUnit(Unit);
1360       if (LR.liveAt(CopyIdx))
1361         return false;
1362     }
1363   }
1364 
1365   const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
1366   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1367   if (!DefMI->isImplicitDef()) {
1368     if (DstReg.isPhysical()) {
1369       Register NewDstReg = DstReg;
1370 
1371       unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), DefSubIdx);
1372       if (NewDstIdx)
1373         NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1374 
1375       // Finally, make sure that the physical subregister that will be
1376       // constructed later is permitted for the instruction.
1377       if (!DefRC->contains(NewDstReg))
1378         return false;
1379     } else {
1380       // Theoretically, some stack frame reference could exist. Just make sure
1381       // it hasn't actually happened.
1382       assert(DstReg.isVirtual() &&
1383              "Only expect to deal with virtual or physical registers");
1384     }
1385   }
1386 
1387   LiveRangeEdit::Remat RM(ValNo);
1388   RM.OrigMI = DefMI;
1389   if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1390     return false;
1391 
1392   DebugLoc DL = CopyMI->getDebugLoc();
1393   MachineBasicBlock *MBB = CopyMI->getParent();
1394   MachineBasicBlock::iterator MII =
1395       std::next(MachineBasicBlock::iterator(CopyMI));
1396   Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1397   MachineInstr &NewMI = *std::prev(MII);
1398   NewMI.setDebugLoc(DL);
1399 
1400   // In a situation like the following:
1401   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
1402   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
1403   // instead of widening %1 to the register class of %0 simply do:
1404   //     %1 = instr
1405   const TargetRegisterClass *NewRC = CP.getNewRC();
1406   if (DstIdx != 0) {
1407     MachineOperand &DefMO = NewMI.getOperand(0);
1408     if (DefMO.getSubReg() == DstIdx) {
1409       assert(SrcIdx == 0 && CP.isFlipped() &&
1410              "Shouldn't have SrcIdx+DstIdx at this point");
1411       const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1412       const TargetRegisterClass *CommonRC =
1413           TRI->getCommonSubClass(DefRC, DstRC);
1414       if (CommonRC != nullptr) {
1415         NewRC = CommonRC;
1416 
1417         // Instruction might contain "undef %0:subreg" as use operand:
1418         //   %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1419         //
1420         // Need to check all operands.
1421         for (MachineOperand &MO : NewMI.operands()) {
1422           if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1423             MO.setSubReg(0);
1424           }
1425         }
1426 
1427         DstIdx = 0;
1428         DefMO.setIsUndef(false); // Only subregs can have def+undef.
1429       }
1430     }
1431   }
1432 
1433   // CopyMI may have implicit operands, save them so that we can transfer them
1434   // over to the newly materialized instruction after CopyMI is removed.
1435   SmallVector<MachineOperand, 4> ImplicitOps;
1436   ImplicitOps.reserve(CopyMI->getNumOperands() -
1437                       CopyMI->getDesc().getNumOperands());
1438   for (unsigned I = CopyMI->getDesc().getNumOperands(),
1439                 E = CopyMI->getNumOperands();
1440        I != E; ++I) {
1441     MachineOperand &MO = CopyMI->getOperand(I);
1442     if (MO.isReg()) {
1443       assert(MO.isImplicit() &&
1444              "No explicit operands after implicit operands.");
1445       assert((MO.getReg().isPhysical() ||
1446               (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
1447              "unexpected implicit virtual register def");
1448       ImplicitOps.push_back(MO);
1449     }
1450   }
1451 
1452   CopyMI->eraseFromParent();
1453   ErasedInstrs.insert(CopyMI);
1454 
1455   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1456   // We need to remember these so we can add intervals once we insert
1457   // NewMI into SlotIndexes.
1458   //
1459   // We also expect to have tied implicit-defs of super registers originating
1460   // from SUBREG_TO_REG, such as:
1461   // $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
1462   // undef %0.sub_32bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
1463   //
1464   // The implicit-def of the super register may have been reduced to
1465   // subregisters depending on the uses.
1466 
1467   bool NewMIDefinesFullReg = false;
1468 
1469   SmallVector<MCRegister, 4> NewMIImplDefs;
1470   for (unsigned i = NewMI.getDesc().getNumOperands(),
1471                 e = NewMI.getNumOperands();
1472        i != e; ++i) {
1473     MachineOperand &MO = NewMI.getOperand(i);
1474     if (MO.isReg() && MO.isDef()) {
1475       assert(MO.isImplicit());
1476       if (MO.getReg().isPhysical()) {
1477         if (MO.getReg() == DstReg)
1478           NewMIDefinesFullReg = true;
1479 
1480         assert(MO.isImplicit() && MO.getReg().isPhysical() &&
1481                (MO.isDead() ||
1482                 (DefSubIdx &&
1483                  ((TRI->getSubReg(MO.getReg(), DefSubIdx) ==
1484                    MCRegister((unsigned)NewMI.getOperand(0).getReg())) ||
1485                   TRI->isSubRegisterEq(NewMI.getOperand(0).getReg(),
1486                                        MO.getReg())))));
1487         NewMIImplDefs.push_back(MO.getReg().asMCReg());
1488       } else {
1489         assert(MO.getReg() == NewMI.getOperand(0).getReg());
1490 
1491         // We're only expecting another def of the main output, so the range
1492         // should get updated with the regular output range.
1493         //
1494         // FIXME: The range updating below probably needs updating to look at
1495         // the super register if subranges are tracked.
1496         assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1497                "subrange update for implicit-def of super register may not be "
1498                "properly handled");
1499       }
1500     }
1501   }
1502 
1503   if (DstReg.isVirtual()) {
1504     unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1505 
1506     if (DefRC != nullptr) {
1507       if (NewIdx)
1508         NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1509       else
1510         NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1511       assert(NewRC && "subreg chosen for remat incompatible with instruction");
1512     }
1513 
1514     // Remap subranges to new lanemask and change register class.
1515     LiveInterval &DstInt = LIS->getInterval(DstReg);
1516     for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1517       SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1518     }
1519     MRI->setRegClass(DstReg, NewRC);
1520 
1521     // Update machine operands and add flags.
1522     updateRegDefsUses(DstReg, DstReg, DstIdx);
1523     NewMI.getOperand(0).setSubReg(NewIdx);
1524     // updateRegDefUses can add an "undef" flag to the definition, since
1525     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1526     // sure that "undef" is not set.
1527     if (NewIdx == 0)
1528       NewMI.getOperand(0).setIsUndef(false);
1529 
1530     // In a situation like the following:
1531     //
1532     //    undef %2.subreg:reg = INST %1:reg    ; DefMI (rematerializable),
1533     //                                         ; Defines only some of lanes,
1534     //                                         ; so DefSubIdx = NewIdx = subreg
1535     //    %3:reg = COPY %2                     ; Copy full reg
1536     //    .... = SOMEINSTR %3:reg              ; Use full reg
1537     //
1538     // there are no subranges for %3 so after rematerialization we need
1539     // to explicitly create them. Undefined subranges are removed later on.
1540     if (NewIdx && !DstInt.hasSubRanges() &&
1541         MRI->shouldTrackSubRegLiveness(DstReg)) {
1542       LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstReg);
1543       LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(NewIdx);
1544       LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1545       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
1546       DstInt.createSubRangeFrom(Alloc, UsedLanes, DstInt);
1547       DstInt.createSubRangeFrom(Alloc, UnusedLanes, DstInt);
1548     }
1549 
1550     // Add dead subregister definitions if we are defining the whole register
1551     // but only part of it is live.
1552     // This could happen if the rematerialization instruction is rematerializing
1553     // more than actually is used in the register.
1554     // An example would be:
1555     // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1556     // ; Copying only part of the register here, but the rest is undef.
1557     // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1558     // ==>
1559     // ; Materialize all the constants but only using one
1560     // %2 = LOAD_CONSTANTS 5, 8
1561     //
1562     // at this point for the part that wasn't defined before we could have
1563     // subranges missing the definition.
1564     if (NewIdx == 0 && DstInt.hasSubRanges()) {
1565       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1566       SlotIndex DefIndex =
1567           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1568       LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1569       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
1570       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1571         if (!SR.liveAt(DefIndex))
1572           SR.createDeadDef(DefIndex, Alloc);
1573         MaxMask &= ~SR.LaneMask;
1574       }
1575       if (MaxMask.any()) {
1576         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1577         SR->createDeadDef(DefIndex, Alloc);
1578       }
1579     }
1580 
1581     // Make sure that the subrange for resultant undef is removed
1582     // For example:
1583     //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
1584     //   %2 = COPY %1
1585     // ==>
1586     //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
1587     //     ; Correct but need to remove the subrange for %2:sub0
1588     //     ; as it is now undef
1589     if (NewIdx != 0 && DstInt.hasSubRanges()) {
1590       // The affected subregister segments can be removed.
1591       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1592       LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1593       bool UpdatedSubRanges = false;
1594       SlotIndex DefIndex =
1595           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1596       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
1597       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1598         if ((SR.LaneMask & DstMask).none()) {
1599           LLVM_DEBUG(dbgs()
1600                      << "Removing undefined SubRange "
1601                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1602 
1603           if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1604             // VNI is in ValNo - remove any segments in this SubRange that have
1605             // this ValNo
1606             SR.removeValNo(RmValNo);
1607           }
1608 
1609           // We may not have a defined value at this point, but still need to
1610           // clear out any empty subranges tentatively created by
1611           // updateRegDefUses. The original subrange def may have only undefed
1612           // some lanes.
1613           UpdatedSubRanges = true;
1614         } else {
1615           // We know that this lane is defined by this instruction,
1616           // but at this point it may be empty because it is not used by
1617           // anything. This happens when updateRegDefUses adds the missing
1618           // lanes. Assign that lane a dead def so that the interferences
1619           // are properly modeled.
1620           if (SR.empty())
1621             SR.createDeadDef(DefIndex, Alloc);
1622         }
1623       }
1624       if (UpdatedSubRanges)
1625         DstInt.removeEmptySubRanges();
1626     }
1627   } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1628     // The New instruction may be defining a sub-register of what's actually
1629     // been asked for. If so it must implicitly define the whole thing.
1630     assert(DstReg.isPhysical() &&
1631            "Only expect virtual or physical registers in remat");
1632     NewMI.getOperand(0).setIsDead(true);
1633 
1634     if (!NewMIDefinesFullReg) {
1635       NewMI.addOperand(MachineOperand::CreateReg(
1636           CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1637     }
1638 
1639     // Record small dead def live-ranges for all the subregisters
1640     // of the destination register.
1641     // Otherwise, variables that live through may miss some
1642     // interferences, thus creating invalid allocation.
1643     // E.g., i386 code:
1644     // %1 = somedef ; %1 GR8
1645     // %2 = remat ; %2 GR32
1646     // CL = COPY %2.sub_8bit
1647     // = somedef %1 ; %1 GR8
1648     // =>
1649     // %1 = somedef ; %1 GR8
1650     // dead ECX = remat ; implicit-def CL
1651     // = somedef %1 ; %1 GR8
1652     // %1 will see the interferences with CL but not with CH since
1653     // no live-ranges would have been created for ECX.
1654     // Fix that!
1655     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1656     for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
1657       if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1658         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1659   }
1660 
1661   NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
1662 
1663   // Transfer over implicit operands to the rematerialized instruction.
1664   for (MachineOperand &MO : ImplicitOps)
1665     NewMI.addOperand(MO);
1666 
1667   SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1668   for (MCRegister Reg : NewMIImplDefs) {
1669     for (MCRegUnit Unit : TRI->regunits(Reg))
1670       if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1671         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1672   }
1673 
1674   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1675   ++NumReMats;
1676 
1677   // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1678   // to describe DstReg instead.
1679   if (MRI->use_nodbg_empty(SrcReg)) {
1680     for (MachineOperand &UseMO :
1681          llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1682       MachineInstr *UseMI = UseMO.getParent();
1683       if (UseMI->isDebugInstr()) {
1684         if (DstReg.isPhysical())
1685           UseMO.substPhysReg(DstReg, *TRI);
1686         else
1687           UseMO.setReg(DstReg);
1688         // Move the debug value directly after the def of the rematerialized
1689         // value in DstReg.
1690         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1691         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1692       }
1693     }
1694   }
1695 
1696   if (ToBeUpdated.count(SrcReg))
1697     return true;
1698 
1699   unsigned NumCopyUses = 0;
1700   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1701     if (UseMO.getParent()->isCopyLike())
1702       NumCopyUses++;
1703   }
1704   if (NumCopyUses < LateRematUpdateThreshold) {
1705     // The source interval can become smaller because we removed a use.
1706     shrinkToUses(&SrcInt, &DeadDefs);
1707     if (!DeadDefs.empty())
1708       eliminateDeadDefs(&Edit);
1709   } else {
1710     ToBeUpdated.insert(SrcReg);
1711   }
1712   return true;
1713 }
1714 
1715 MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1716   // ProcessImplicitDefs may leave some copies of <undef> values, it only
1717   // removes local variables. When we have a copy like:
1718   //
1719   //   %1 = COPY undef %2
1720   //
1721   // We delete the copy and remove the corresponding value number from %1.
1722   // Any uses of that value number are marked as <undef>.
1723 
1724   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1725   // CoalescerPair may have a new register class with adjusted subreg indices
1726   // at this point.
1727   Register SrcReg, DstReg;
1728   unsigned SrcSubIdx = 0, DstSubIdx = 0;
1729   if (!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1730     return nullptr;
1731 
1732   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1733   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1734   // CopyMI is undef iff SrcReg is not live before the instruction.
1735   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1736     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1737     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1738       if ((SR.LaneMask & SrcMask).none())
1739         continue;
1740       if (SR.liveAt(Idx))
1741         return nullptr;
1742     }
1743   } else if (SrcLI.liveAt(Idx))
1744     return nullptr;
1745 
1746   // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1747   // then replace it with an IMPLICIT_DEF.
1748   LiveInterval &DstLI = LIS->getInterval(DstReg);
1749   SlotIndex RegIndex = Idx.getRegSlot();
1750   LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1751   assert(Seg != nullptr && "No segment for defining instruction");
1752   VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1753 
1754   // The source interval may also have been on an undef use, in which case the
1755   // copy introduced a live value.
1756   if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1757     for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1758       MachineOperand &MO = CopyMI->getOperand(i - 1);
1759       if (MO.isReg()) {
1760         if (MO.isUse())
1761           CopyMI->removeOperand(i - 1);
1762       } else {
1763         assert(MO.isImm() &&
1764                CopyMI->getOpcode() == TargetOpcode::SUBREG_TO_REG);
1765         CopyMI->removeOperand(i - 1);
1766       }
1767     }
1768 
1769     CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1770     LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1771                          "implicit def\n");
1772     return CopyMI;
1773   }
1774 
1775   // Remove any DstReg segments starting at the instruction.
1776   LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1777 
1778   // Remove value or merge with previous one in case of a subregister def.
1779   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1780     VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1781     DstLI.MergeValueNumberInto(VNI, PrevVNI);
1782 
1783     // The affected subregister segments can be removed.
1784     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1785     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1786       if ((SR.LaneMask & DstMask).none())
1787         continue;
1788 
1789       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1790       assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1791       SR.removeValNo(SVNI);
1792     }
1793     DstLI.removeEmptySubRanges();
1794   } else
1795     LIS->removeVRegDefAt(DstLI, RegIndex);
1796 
1797   // Mark uses as undef.
1798   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1799     if (MO.isDef() /*|| MO.isUndef()*/)
1800       continue;
1801     const MachineInstr &MI = *MO.getParent();
1802     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1803     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1804     bool isLive;
1805     if (!UseMask.all() && DstLI.hasSubRanges()) {
1806       isLive = false;
1807       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1808         if ((SR.LaneMask & UseMask).none())
1809           continue;
1810         if (SR.liveAt(UseIdx)) {
1811           isLive = true;
1812           break;
1813         }
1814       }
1815     } else
1816       isLive = DstLI.liveAt(UseIdx);
1817     if (isLive)
1818       continue;
1819     MO.setIsUndef(true);
1820     LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1821   }
1822 
1823   // A def of a subregister may be a use of the other subregisters, so
1824   // deleting a def of a subregister may also remove uses. Since CopyMI
1825   // is still part of the function (but about to be erased), mark all
1826   // defs of DstReg in it as <undef>, so that shrinkToUses would
1827   // ignore them.
1828   for (MachineOperand &MO : CopyMI->all_defs())
1829     if (MO.getReg() == DstReg)
1830       MO.setIsUndef(true);
1831   LIS->shrinkToUses(&DstLI);
1832 
1833   return CopyMI;
1834 }
1835 
1836 void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1837                                      MachineOperand &MO, unsigned SubRegIdx) {
1838   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1839   if (MO.isDef())
1840     Mask = ~Mask;
1841   bool IsUndef = true;
1842   for (const LiveInterval::SubRange &S : Int.subranges()) {
1843     if ((S.LaneMask & Mask).none())
1844       continue;
1845     if (S.liveAt(UseIdx)) {
1846       IsUndef = false;
1847       break;
1848     }
1849   }
1850   if (IsUndef) {
1851     MO.setIsUndef(true);
1852     // We found out some subregister use is actually reading an undefined
1853     // value. In some cases the whole vreg has become undefined at this
1854     // point so we have to potentially shrink the main range if the
1855     // use was ending a live segment there.
1856     LiveQueryResult Q = Int.Query(UseIdx);
1857     if (Q.valueOut() == nullptr)
1858       ShrinkMainRange = true;
1859   }
1860 }
1861 
1862 void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1863                                           unsigned SubIdx) {
1864   bool DstIsPhys = DstReg.isPhysical();
1865   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1866 
1867   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1868     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1869       if (MO.isUndef())
1870         continue;
1871       unsigned SubReg = MO.getSubReg();
1872       if (SubReg == 0 && MO.isDef())
1873         continue;
1874 
1875       MachineInstr &MI = *MO.getParent();
1876       if (MI.isDebugInstr())
1877         continue;
1878       SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1879       addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1880     }
1881   }
1882 
1883   SmallPtrSet<MachineInstr *, 8> Visited;
1884   for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg),
1885                                                E = MRI->reg_instr_end();
1886        I != E;) {
1887     MachineInstr *UseMI = &*(I++);
1888 
1889     // Each instruction can only be rewritten once because sub-register
1890     // composition is not always idempotent. When SrcReg != DstReg, rewriting
1891     // the UseMI operands removes them from the SrcReg use-def chain, but when
1892     // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1893     // operands mentioning the virtual register.
1894     if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1895       continue;
1896 
1897     SmallVector<unsigned, 8> Ops;
1898     bool Reads, Writes;
1899     std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1900 
1901     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1902     // because SrcReg is a sub-register.
1903     if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1904       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1905 
1906     // Replace SrcReg with DstReg in all UseMI operands.
1907     for (unsigned Op : Ops) {
1908       MachineOperand &MO = UseMI->getOperand(Op);
1909 
1910       // Adjust <undef> flags in case of sub-register joins. We don't want to
1911       // turn a full def into a read-modify-write sub-register def and vice
1912       // versa.
1913       if (SubIdx && MO.isDef())
1914         MO.setIsUndef(!Reads);
1915 
1916       // A subreg use of a partially undef (super) register may be a complete
1917       // undef use now and then has to be marked that way.
1918       if (MO.isUse() && !MO.isUndef() && !DstIsPhys) {
1919         unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1920         if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1921           if (!DstInt->hasSubRanges()) {
1922             BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1923             LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1924             LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1925             LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1926             DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1927             // The unused lanes are just empty live-ranges at this point.
1928             // It is the caller responsibility to set the proper
1929             // dead segments if there is an actual dead def of the
1930             // unused lanes. This may happen with rematerialization.
1931             DstInt->createSubRange(Allocator, UnusedLanes);
1932           }
1933           SlotIndex MIIdx = UseMI->isDebugInstr()
1934                                 ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
1935                                 : LIS->getInstructionIndex(*UseMI);
1936           SlotIndex UseIdx = MIIdx.getRegSlot(true);
1937           addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1938         }
1939       }
1940 
1941       if (DstIsPhys)
1942         MO.substPhysReg(DstReg, *TRI);
1943       else
1944         MO.substVirtReg(DstReg, SubIdx, *TRI);
1945     }
1946 
1947     LLVM_DEBUG({
1948       dbgs() << "\t\tupdated: ";
1949       if (!UseMI->isDebugInstr())
1950         dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1951       dbgs() << *UseMI;
1952     });
1953   }
1954 }
1955 
1956 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1957   // Always join simple intervals that are defined by a single copy from a
1958   // reserved register. This doesn't increase register pressure, so it is
1959   // always beneficial.
1960   if (!MRI->isReserved(CP.getDstReg())) {
1961     LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1962     return false;
1963   }
1964 
1965   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1966   if (JoinVInt.containsOneValue())
1967     return true;
1968 
1969   LLVM_DEBUG(
1970       dbgs() << "\tCannot join complex intervals into reserved register.\n");
1971   return false;
1972 }
1973 
1974 bool RegisterCoalescer::copyValueUndefInPredecessors(
1975     LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
1976   for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1977     SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1978     if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1979       // If this is a self loop, we may be reading the same value.
1980       if (V->id != SLRQ.valueOutOrDead()->id)
1981         return false;
1982     }
1983   }
1984 
1985   return true;
1986 }
1987 
1988 void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1989                                                    Register Reg,
1990                                                    LaneBitmask PrunedLanes) {
1991   // If we had other instructions in the segment reading the undef sublane
1992   // value, we need to mark them with undef.
1993   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1994     unsigned SubRegIdx = MO.getSubReg();
1995     if (SubRegIdx == 0 || MO.isUndef())
1996       continue;
1997 
1998     LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1999     SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
2000     for (LiveInterval::SubRange &S : LI.subranges()) {
2001       if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
2002         MO.setIsUndef();
2003         break;
2004       }
2005     }
2006   }
2007 
2008   LI.removeEmptySubRanges();
2009 
2010   // A def of a subregister may be a use of other register lanes. Replacing
2011   // such a def with a def of a different register will eliminate the use,
2012   // and may cause the recorded live range to be larger than the actual
2013   // liveness in the program IR.
2014   LIS->shrinkToUses(&LI);
2015 }
2016 
2017 bool RegisterCoalescer::joinCopy(
2018     MachineInstr *CopyMI, bool &Again,
2019     SmallPtrSetImpl<MachineInstr *> &CurrentErasedInstrs) {
2020   Again = false;
2021   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
2022 
2023   CoalescerPair CP(*TRI);
2024   if (!CP.setRegisters(CopyMI)) {
2025     LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
2026     return false;
2027   }
2028 
2029   if (CP.getNewRC()) {
2030     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
2031     auto DstRC = MRI->getRegClass(CP.getDstReg());
2032     unsigned SrcIdx = CP.getSrcIdx();
2033     unsigned DstIdx = CP.getDstIdx();
2034     if (CP.isFlipped()) {
2035       std::swap(SrcIdx, DstIdx);
2036       std::swap(SrcRC, DstRC);
2037     }
2038     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
2039                              CP.getNewRC(), *LIS)) {
2040       LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
2041       return false;
2042     }
2043   }
2044 
2045   // Dead code elimination. This really should be handled by MachineDCE, but
2046   // sometimes dead copies slip through, and we can't generate invalid live
2047   // ranges.
2048   if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
2049     LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
2050     DeadDefs.push_back(CopyMI);
2051     eliminateDeadDefs();
2052     return true;
2053   }
2054 
2055   // Eliminate undefs.
2056   if (!CP.isPhys()) {
2057     // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
2058     if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
2059       if (UndefMI->isImplicitDef())
2060         return false;
2061       deleteInstr(CopyMI);
2062       return false; // Not coalescable.
2063     }
2064   }
2065 
2066   // Coalesced copies are normally removed immediately, but transformations
2067   // like removeCopyByCommutingDef() can inadvertently create identity copies.
2068   // When that happens, just join the values and remove the copy.
2069   if (CP.getSrcReg() == CP.getDstReg()) {
2070     LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
2071     LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
2072     const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
2073     LiveQueryResult LRQ = LI.Query(CopyIdx);
2074     if (VNInfo *DefVNI = LRQ.valueDefined()) {
2075       VNInfo *ReadVNI = LRQ.valueIn();
2076       assert(ReadVNI && "No value before copy and no <undef> flag.");
2077       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
2078 
2079       // Track incoming undef lanes we need to eliminate from the subrange.
2080       LaneBitmask PrunedLanes;
2081       MachineBasicBlock *MBB = CopyMI->getParent();
2082 
2083       // Process subregister liveranges.
2084       for (LiveInterval::SubRange &S : LI.subranges()) {
2085         LiveQueryResult SLRQ = S.Query(CopyIdx);
2086         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
2087           if (VNInfo *SReadVNI = SLRQ.valueIn())
2088             SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
2089 
2090           // If this copy introduced an undef subrange from an incoming value,
2091           // we need to eliminate the undef live in values from the subrange.
2092           if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
2093             LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
2094             PrunedLanes |= S.LaneMask;
2095             S.removeValNo(SDefVNI);
2096           }
2097         }
2098       }
2099 
2100       LI.MergeValueNumberInto(DefVNI, ReadVNI);
2101       if (PrunedLanes.any()) {
2102         LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: " << PrunedLanes
2103                           << '\n');
2104         setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
2105       }
2106 
2107       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
2108     }
2109     deleteInstr(CopyMI);
2110     return true;
2111   }
2112 
2113   // Enforce policies.
2114   if (CP.isPhys()) {
2115     LLVM_DEBUG(dbgs() << "\tConsidering merging "
2116                       << printReg(CP.getSrcReg(), TRI) << " with "
2117                       << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
2118     if (!canJoinPhys(CP)) {
2119       // Before giving up coalescing, if definition of source is defined by
2120       // trivial computation, try rematerializing it.
2121       bool IsDefCopy = false;
2122       if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2123         return true;
2124       if (IsDefCopy)
2125         Again = true; // May be possible to coalesce later.
2126       return false;
2127     }
2128   } else {
2129     // When possible, let DstReg be the larger interval.
2130     if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2131                                LIS->getInterval(CP.getDstReg()).size())
2132       CP.flip();
2133 
2134     LLVM_DEBUG({
2135       dbgs() << "\tConsidering merging to "
2136              << TRI->getRegClassName(CP.getNewRC()) << " with ";
2137       if (CP.getDstIdx() && CP.getSrcIdx())
2138         dbgs() << printReg(CP.getDstReg()) << " in "
2139                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2140                << printReg(CP.getSrcReg()) << " in "
2141                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2142       else
2143         dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2144                << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2145     });
2146   }
2147 
2148   ShrinkMask = LaneBitmask::getNone();
2149   ShrinkMainRange = false;
2150 
2151   // Okay, attempt to join these two intervals.  On failure, this returns false.
2152   // Otherwise, if one of the intervals being joined is a physreg, this method
2153   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
2154   // been modified, so we can use this information below to update aliases.
2155   if (!joinIntervals(CP)) {
2156     // Coalescing failed.
2157 
2158     // If definition of source is defined by trivial computation, try
2159     // rematerializing it.
2160     bool IsDefCopy = false;
2161     if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2162       return true;
2163 
2164     // If we can eliminate the copy without merging the live segments, do so
2165     // now.
2166     if (!CP.isPartial() && !CP.isPhys()) {
2167       bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2168       bool Shrink = false;
2169       if (!Changed)
2170         std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2171       if (Changed) {
2172         deleteInstr(CopyMI);
2173         if (Shrink) {
2174           Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2175           LiveInterval &DstLI = LIS->getInterval(DstReg);
2176           shrinkToUses(&DstLI);
2177           LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
2178         }
2179         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2180         return true;
2181       }
2182     }
2183 
2184     // Try and see if we can partially eliminate the copy by moving the copy to
2185     // its predecessor.
2186     if (!CP.isPartial() && !CP.isPhys())
2187       if (removePartialRedundancy(CP, *CopyMI))
2188         return true;
2189 
2190     // Otherwise, we are unable to join the intervals.
2191     LLVM_DEBUG(dbgs() << "\tInterference!\n");
2192     Again = true; // May be possible to coalesce later.
2193     return false;
2194   }
2195 
2196   // Coalescing to a virtual register that is of a sub-register class of the
2197   // other. Make sure the resulting register is set to the right register class.
2198   if (CP.isCrossClass()) {
2199     ++numCrossRCs;
2200     MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2201   }
2202 
2203   // Removing sub-register copies can ease the register class constraints.
2204   // Make sure we attempt to inflate the register class of DstReg.
2205   if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2206     InflateRegs.push_back(CP.getDstReg());
2207 
2208   // CopyMI has been erased by joinIntervals at this point. Remove it from
2209   // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2210   // to the work list. This keeps ErasedInstrs from growing needlessly.
2211   if (ErasedInstrs.erase(CopyMI))
2212     // But we may encounter the instruction again in this iteration.
2213     CurrentErasedInstrs.insert(CopyMI);
2214 
2215   // Rewrite all SrcReg operands to DstReg.
2216   // Also update DstReg operands to include DstIdx if it is set.
2217   if (CP.getDstIdx())
2218     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2219   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2220 
2221   // Shrink subregister ranges if necessary.
2222   if (ShrinkMask.any()) {
2223     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2224     for (LiveInterval::SubRange &S : LI.subranges()) {
2225       if ((S.LaneMask & ShrinkMask).none())
2226         continue;
2227       LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2228                         << ")\n");
2229       LIS->shrinkToUses(S, LI.reg());
2230       ShrinkMainRange = true;
2231     }
2232     LI.removeEmptySubRanges();
2233   }
2234 
2235   // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2236   // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2237   // is not up-to-date, need to update the merged live interval here.
2238   if (ToBeUpdated.count(CP.getSrcReg()))
2239     ShrinkMainRange = true;
2240 
2241   if (ShrinkMainRange) {
2242     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2243     shrinkToUses(&LI);
2244   }
2245 
2246   // SrcReg is guaranteed to be the register whose live interval that is
2247   // being merged.
2248   LIS->removeInterval(CP.getSrcReg());
2249 
2250   // Update regalloc hint.
2251   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2252 
2253   LLVM_DEBUG({
2254     dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2255            << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2256     dbgs() << "\tResult = ";
2257     if (CP.isPhys())
2258       dbgs() << printReg(CP.getDstReg(), TRI);
2259     else
2260       dbgs() << LIS->getInterval(CP.getDstReg());
2261     dbgs() << '\n';
2262   });
2263 
2264   ++numJoins;
2265   return true;
2266 }
2267 
2268 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2269   Register DstReg = CP.getDstReg();
2270   Register SrcReg = CP.getSrcReg();
2271   assert(CP.isPhys() && "Must be a physreg copy");
2272   assert(MRI->isReserved(DstReg) && "Not a reserved register");
2273   LiveInterval &RHS = LIS->getInterval(SrcReg);
2274   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2275 
2276   assert(RHS.containsOneValue() && "Invalid join with reserved register");
2277 
2278   // Optimization for reserved registers like ESP. We can only merge with a
2279   // reserved physreg if RHS has a single value that is a copy of DstReg.
2280   // The live range of the reserved register will look like a set of dead defs
2281   // - we don't properly track the live range of reserved registers.
2282 
2283   // Deny any overlapping intervals.  This depends on all the reserved
2284   // register live ranges to look like dead defs.
2285   if (!MRI->isConstantPhysReg(DstReg)) {
2286     for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2287       // Abort if not all the regunits are reserved.
2288       for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
2289         if (!MRI->isReserved(*RI))
2290           return false;
2291       }
2292       if (RHS.overlaps(LIS->getRegUnit(Unit))) {
2293         LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
2294                           << '\n');
2295         return false;
2296       }
2297     }
2298 
2299     // We must also check for overlaps with regmask clobbers.
2300     BitVector RegMaskUsable;
2301     if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2302         !RegMaskUsable.test(DstReg)) {
2303       LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2304       return false;
2305     }
2306   }
2307 
2308   // Skip any value computations, we are not adding new values to the
2309   // reserved register.  Also skip merging the live ranges, the reserved
2310   // register live range doesn't need to be accurate as long as all the
2311   // defs are there.
2312 
2313   // Delete the identity copy.
2314   MachineInstr *CopyMI;
2315   if (CP.isFlipped()) {
2316     // Physreg is copied into vreg
2317     //   %y = COPY %physreg_x
2318     //   ...  //< no other def of %physreg_x here
2319     //   use %y
2320     // =>
2321     //   ...
2322     //   use %physreg_x
2323     CopyMI = MRI->getVRegDef(SrcReg);
2324     deleteInstr(CopyMI);
2325   } else {
2326     // VReg is copied into physreg:
2327     //   %y = def
2328     //   ... //< no other def or use of %physreg_x here
2329     //   %physreg_x = COPY %y
2330     // =>
2331     //   %physreg_x = def
2332     //   ...
2333     if (!MRI->hasOneNonDBGUse(SrcReg)) {
2334       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2335       return false;
2336     }
2337 
2338     if (!LIS->intervalIsInOneMBB(RHS)) {
2339       LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2340       return false;
2341     }
2342 
2343     MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2344     CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2345     SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2346     SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2347 
2348     if (!MRI->isConstantPhysReg(DstReg)) {
2349       // We checked above that there are no interfering defs of the physical
2350       // register. However, for this case, where we intend to move up the def of
2351       // the physical register, we also need to check for interfering uses.
2352       SlotIndexes *Indexes = LIS->getSlotIndexes();
2353       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2354            SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2355         MachineInstr *MI = LIS->getInstructionFromIndex(SI);
2356         if (MI->readsRegister(DstReg, TRI)) {
2357           LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2358           return false;
2359         }
2360       }
2361     }
2362 
2363     // We're going to remove the copy which defines a physical reserved
2364     // register, so remove its valno, etc.
2365     LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2366                       << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2367 
2368     LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2369     deleteInstr(CopyMI);
2370 
2371     // Create a new dead def at the new def location.
2372     for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2373       LiveRange &LR = LIS->getRegUnit(Unit);
2374       LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2375     }
2376   }
2377 
2378   // We don't track kills for reserved registers.
2379   MRI->clearKillFlags(CP.getSrcReg());
2380 
2381   return true;
2382 }
2383 
2384 //===----------------------------------------------------------------------===//
2385 //                 Interference checking and interval joining
2386 //===----------------------------------------------------------------------===//
2387 //
2388 // In the easiest case, the two live ranges being joined are disjoint, and
2389 // there is no interference to consider. It is quite common, though, to have
2390 // overlapping live ranges, and we need to check if the interference can be
2391 // resolved.
2392 //
2393 // The live range of a single SSA value forms a sub-tree of the dominator tree.
2394 // This means that two SSA values overlap if and only if the def of one value
2395 // is contained in the live range of the other value. As a special case, the
2396 // overlapping values can be defined at the same index.
2397 //
2398 // The interference from an overlapping def can be resolved in these cases:
2399 //
2400 // 1. Coalescable copies. The value is defined by a copy that would become an
2401 //    identity copy after joining SrcReg and DstReg. The copy instruction will
2402 //    be removed, and the value will be merged with the source value.
2403 //
2404 //    There can be several copies back and forth, causing many values to be
2405 //    merged into one. We compute a list of ultimate values in the joined live
2406 //    range as well as a mappings from the old value numbers.
2407 //
2408 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2409 //    predecessors have a live out value. It doesn't cause real interference,
2410 //    and can be merged into the value it overlaps. Like a coalescable copy, it
2411 //    can be erased after joining.
2412 //
2413 // 3. Copy of external value. The overlapping def may be a copy of a value that
2414 //    is already in the other register. This is like a coalescable copy, but
2415 //    the live range of the source register must be trimmed after erasing the
2416 //    copy instruction:
2417 //
2418 //      %src = COPY %ext
2419 //      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
2420 //
2421 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
2422 //    defining one lane at a time:
2423 //
2424 //      %dst:ssub0<def,read-undef> = FOO
2425 //      %src = BAR
2426 //      %dst:ssub1 = COPY %src
2427 //
2428 //    The live range of %src overlaps the %dst value defined by FOO, but
2429 //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2430 //    which was undef anyway.
2431 //
2432 //    The value mapping is more complicated in this case. The final live range
2433 //    will have different value numbers for both FOO and BAR, but there is no
2434 //    simple mapping from old to new values. It may even be necessary to add
2435 //    new PHI values.
2436 //
2437 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2438 //    is live, but never read. This can happen because we don't compute
2439 //    individual live ranges per lane.
2440 //
2441 //      %dst = FOO
2442 //      %src = BAR
2443 //      %dst:ssub1 = COPY %src
2444 //
2445 //    This kind of interference is only resolved locally. If the clobbered
2446 //    lane value escapes the block, the join is aborted.
2447 
2448 namespace {
2449 
2450 /// Track information about values in a single virtual register about to be
2451 /// joined. Objects of this class are always created in pairs - one for each
2452 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
2453 /// pair)
2454 class JoinVals {
2455   /// Live range we work on.
2456   LiveRange &LR;
2457 
2458   /// (Main) register we work on.
2459   const Register Reg;
2460 
2461   /// Reg (and therefore the values in this liverange) will end up as
2462   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2463   /// CP.SrcIdx.
2464   const unsigned SubIdx;
2465 
2466   /// The LaneMask that this liverange will occupy the coalesced register. May
2467   /// be smaller than the lanemask produced by SubIdx when merging subranges.
2468   const LaneBitmask LaneMask;
2469 
2470   /// This is true when joining sub register ranges, false when joining main
2471   /// ranges.
2472   const bool SubRangeJoin;
2473 
2474   /// Whether the current LiveInterval tracks subregister liveness.
2475   const bool TrackSubRegLiveness;
2476 
2477   /// Values that will be present in the final live range.
2478   SmallVectorImpl<VNInfo *> &NewVNInfo;
2479 
2480   const CoalescerPair &CP;
2481   LiveIntervals *LIS;
2482   SlotIndexes *Indexes;
2483   const TargetRegisterInfo *TRI;
2484 
2485   /// Value number assignments. Maps value numbers in LI to entries in
2486   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2487   SmallVector<int, 8> Assignments;
2488 
2489 public:
2490   /// Conflict resolution for overlapping values.
2491   enum ConflictResolution {
2492     /// No overlap, simply keep this value.
2493     CR_Keep,
2494 
2495     /// Merge this value into OtherVNI and erase the defining instruction.
2496     /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2497     /// values.
2498     CR_Erase,
2499 
2500     /// Merge this value into OtherVNI but keep the defining instruction.
2501     /// This is for the special case where OtherVNI is defined by the same
2502     /// instruction.
2503     CR_Merge,
2504 
2505     /// Keep this value, and have it replace OtherVNI where possible. This
2506     /// complicates value mapping since OtherVNI maps to two different values
2507     /// before and after this def.
2508     /// Used when clobbering undefined or dead lanes.
2509     CR_Replace,
2510 
2511     /// Unresolved conflict. Visit later when all values have been mapped.
2512     CR_Unresolved,
2513 
2514     /// Unresolvable conflict. Abort the join.
2515     CR_Impossible
2516   };
2517 
2518 private:
2519   /// Per-value info for LI. The lane bit masks are all relative to the final
2520   /// joined register, so they can be compared directly between SrcReg and
2521   /// DstReg.
2522   struct Val {
2523     ConflictResolution Resolution = CR_Keep;
2524 
2525     /// Lanes written by this def, 0 for unanalyzed values.
2526     LaneBitmask WriteLanes;
2527 
2528     /// Lanes with defined values in this register. Other lanes are undef and
2529     /// safe to clobber.
2530     LaneBitmask ValidLanes;
2531 
2532     /// Value in LI being redefined by this def.
2533     VNInfo *RedefVNI = nullptr;
2534 
2535     /// Value in the other live range that overlaps this def, if any.
2536     VNInfo *OtherVNI = nullptr;
2537 
2538     /// Is this value an IMPLICIT_DEF that can be erased?
2539     ///
2540     /// IMPLICIT_DEF values should only exist at the end of a basic block that
2541     /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2542     /// safely erased if they are overlapping a live value in the other live
2543     /// interval.
2544     ///
2545     /// Weird control flow graphs and incomplete PHI handling in
2546     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2547     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2548     /// normal values.
2549     bool ErasableImplicitDef = false;
2550 
2551     /// True when the live range of this value will be pruned because of an
2552     /// overlapping CR_Replace value in the other live range.
2553     bool Pruned = false;
2554 
2555     /// True once Pruned above has been computed.
2556     bool PrunedComputed = false;
2557 
2558     /// True if this value is determined to be identical to OtherVNI
2559     /// (in valuesIdentical). This is used with CR_Erase where the erased
2560     /// copy is redundant, i.e. the source value is already the same as
2561     /// the destination. In such cases the subranges need to be updated
2562     /// properly. See comment at pruneSubRegValues for more info.
2563     bool Identical = false;
2564 
2565     Val() = default;
2566 
2567     bool isAnalyzed() const { return WriteLanes.any(); }
2568 
2569     /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an
2570     /// ordinary value.
2571     void mustKeepImplicitDef(const TargetRegisterInfo &TRI,
2572                              const MachineInstr &ImpDef) {
2573       assert(ImpDef.isImplicitDef());
2574       ErasableImplicitDef = false;
2575       ValidLanes = TRI.getSubRegIndexLaneMask(ImpDef.getOperand(0).getSubReg());
2576     }
2577   };
2578 
2579   /// One entry per value number in LI.
2580   SmallVector<Val, 8> Vals;
2581 
2582   /// Compute the bitmask of lanes actually written by DefMI.
2583   /// Set Redef if there are any partial register definitions that depend on the
2584   /// previous value of the register.
2585   LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2586 
2587   /// Find the ultimate value that VNI was copied from.
2588   std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2589 
2590   bool valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2591                        const JoinVals &Other) const;
2592 
2593   /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2594   /// Return a conflict resolution when possible, but leave the hard cases as
2595   /// CR_Unresolved.
2596   /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2597   /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2598   /// The recursion always goes upwards in the dominator tree, making loops
2599   /// impossible.
2600   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2601 
2602   /// Compute the value assignment for ValNo in RI.
2603   /// This may be called recursively by analyzeValue(), but never for a ValNo on
2604   /// the stack.
2605   void computeAssignment(unsigned ValNo, JoinVals &Other);
2606 
2607   /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2608   /// the extent of the tainted lanes in the block.
2609   ///
2610   /// Multiple values in Other.LR can be affected since partial redefinitions
2611   /// can preserve previously tainted lanes.
2612   ///
2613   ///   1 %dst = VLOAD           <-- Define all lanes in %dst
2614   ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
2615   ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
2616   ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2617   ///
2618   /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2619   /// entry to TaintedVals.
2620   ///
2621   /// Returns false if the tainted lanes extend beyond the basic block.
2622   bool
2623   taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2624               SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2625 
2626   /// Return true if MI uses any of the given Lanes from Reg.
2627   /// This does not include partial redefinitions of Reg.
2628   bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2629 
2630   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2631   /// be pruned:
2632   ///
2633   ///   %dst = COPY %src
2634   ///   %src = COPY %dst  <-- This value to be pruned.
2635   ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
2636   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2637 
2638 public:
2639   JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2640            SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2641            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2642            bool TrackSubRegLiveness)
2643       : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2644         SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2645         NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2646         TRI(TRI), Assignments(LR.getNumValNums(), -1),
2647         Vals(LR.getNumValNums()) {}
2648 
2649   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2650   /// Returns false if any conflicts were impossible to resolve.
2651   bool mapValues(JoinVals &Other);
2652 
2653   /// Try to resolve conflicts that require all values to be mapped.
2654   /// Returns false if any conflicts were impossible to resolve.
2655   bool resolveConflicts(JoinVals &Other);
2656 
2657   /// Prune the live range of values in Other.LR where they would conflict with
2658   /// CR_Replace values in LR. Collect end points for restoring the live range
2659   /// after joining.
2660   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2661                    bool changeInstrs);
2662 
2663   /// Removes subranges starting at copies that get removed. This sometimes
2664   /// happens when undefined subranges are copied around. These ranges contain
2665   /// no useful information and can be removed.
2666   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2667 
2668   /// Pruning values in subranges can lead to removing segments in these
2669   /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2670   /// the main range also need to be removed. This function will mark
2671   /// the corresponding values in the main range as pruned, so that
2672   /// eraseInstrs can do the final cleanup.
2673   /// The parameter @p LI must be the interval whose main range is the
2674   /// live range LR.
2675   void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2676 
2677   /// Erase any machine instructions that have been coalesced away.
2678   /// Add erased instructions to ErasedInstrs.
2679   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2680   /// the erased instrs.
2681   void eraseInstrs(SmallPtrSetImpl<MachineInstr *> &ErasedInstrs,
2682                    SmallVectorImpl<Register> &ShrinkRegs,
2683                    LiveInterval *LI = nullptr);
2684 
2685   /// Remove liverange defs at places where implicit defs will be removed.
2686   void removeImplicitDefs();
2687 
2688   /// Get the value assignments suitable for passing to LiveInterval::join.
2689   const int *getAssignments() const { return Assignments.data(); }
2690 
2691   /// Get the conflict resolution for a value number.
2692   ConflictResolution getResolution(unsigned Num) const {
2693     return Vals[Num].Resolution;
2694   }
2695 };
2696 
2697 } // end anonymous namespace
2698 
2699 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI,
2700                                         bool &Redef) const {
2701   LaneBitmask L;
2702   for (const MachineOperand &MO : DefMI->all_defs()) {
2703     if (MO.getReg() != Reg)
2704       continue;
2705     L |= TRI->getSubRegIndexLaneMask(
2706         TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2707     if (MO.readsReg())
2708       Redef = true;
2709   }
2710   return L;
2711 }
2712 
2713 std::pair<const VNInfo *, Register>
2714 JoinVals::followCopyChain(const VNInfo *VNI) const {
2715   Register TrackReg = Reg;
2716 
2717   while (!VNI->isPHIDef()) {
2718     SlotIndex Def = VNI->def;
2719     MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2720     assert(MI && "No defining instruction");
2721     if (!MI->isFullCopy())
2722       return std::make_pair(VNI, TrackReg);
2723     Register SrcReg = MI->getOperand(1).getReg();
2724     if (!SrcReg.isVirtual())
2725       return std::make_pair(VNI, TrackReg);
2726 
2727     const LiveInterval &LI = LIS->getInterval(SrcReg);
2728     const VNInfo *ValueIn;
2729     // No subrange involved.
2730     if (!SubRangeJoin || !LI.hasSubRanges()) {
2731       LiveQueryResult LRQ = LI.Query(Def);
2732       ValueIn = LRQ.valueIn();
2733     } else {
2734       // Query subranges. Ensure that all matching ones take us to the same def
2735       // (allowing some of them to be undef).
2736       ValueIn = nullptr;
2737       for (const LiveInterval::SubRange &S : LI.subranges()) {
2738         // Transform lanemask to a mask in the joined live interval.
2739         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2740         if ((SMask & LaneMask).none())
2741           continue;
2742         LiveQueryResult LRQ = S.Query(Def);
2743         if (!ValueIn) {
2744           ValueIn = LRQ.valueIn();
2745           continue;
2746         }
2747         if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2748           return std::make_pair(VNI, TrackReg);
2749       }
2750     }
2751     if (ValueIn == nullptr) {
2752       // Reaching an undefined value is legitimate, for example:
2753       //
2754       // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
2755       // 2   %1 = COPY %0         ;; %1 is defined here.
2756       // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
2757       //                          ;; but it's equivalent to "undef".
2758       return std::make_pair(nullptr, SrcReg);
2759     }
2760     VNI = ValueIn;
2761     TrackReg = SrcReg;
2762   }
2763   return std::make_pair(VNI, TrackReg);
2764 }
2765 
2766 bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2767                                const JoinVals &Other) const {
2768   const VNInfo *Orig0;
2769   Register Reg0;
2770   std::tie(Orig0, Reg0) = followCopyChain(Value0);
2771   if (Orig0 == Value1 && Reg0 == Other.Reg)
2772     return true;
2773 
2774   const VNInfo *Orig1;
2775   Register Reg1;
2776   std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2777   // If both values are undefined, and the source registers are the same
2778   // register, the values are identical. Filter out cases where only one
2779   // value is defined.
2780   if (Orig0 == nullptr || Orig1 == nullptr)
2781     return Orig0 == Orig1 && Reg0 == Reg1;
2782 
2783   // The values are equal if they are defined at the same place and use the
2784   // same register. Note that we cannot compare VNInfos directly as some of
2785   // them might be from a copy created in mergeSubRangeInto()  while the other
2786   // is from the original LiveInterval.
2787   return Orig0->def == Orig1->def && Reg0 == Reg1;
2788 }
2789 
2790 JoinVals::ConflictResolution JoinVals::analyzeValue(unsigned ValNo,
2791                                                     JoinVals &Other) {
2792   Val &V = Vals[ValNo];
2793   assert(!V.isAnalyzed() && "Value has already been analyzed!");
2794   VNInfo *VNI = LR.getValNumInfo(ValNo);
2795   if (VNI->isUnused()) {
2796     V.WriteLanes = LaneBitmask::getAll();
2797     return CR_Keep;
2798   }
2799 
2800   // Get the instruction defining this value, compute the lanes written.
2801   const MachineInstr *DefMI = nullptr;
2802   if (VNI->isPHIDef()) {
2803     // Conservatively assume that all lanes in a PHI are valid.
2804     LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2805                                      : TRI->getSubRegIndexLaneMask(SubIdx);
2806     V.ValidLanes = V.WriteLanes = Lanes;
2807   } else {
2808     DefMI = Indexes->getInstructionFromIndex(VNI->def);
2809     assert(DefMI != nullptr);
2810     if (SubRangeJoin) {
2811       // We don't care about the lanes when joining subregister ranges.
2812       V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2813       if (DefMI->isImplicitDef()) {
2814         V.ValidLanes = LaneBitmask::getNone();
2815         V.ErasableImplicitDef = true;
2816       }
2817     } else {
2818       bool Redef = false;
2819       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2820 
2821       // If this is a read-modify-write instruction, there may be more valid
2822       // lanes than the ones written by this instruction.
2823       // This only covers partial redef operands. DefMI may have normal use
2824       // operands reading the register. They don't contribute valid lanes.
2825       //
2826       // This adds ssub1 to the set of valid lanes in %src:
2827       //
2828       //   %src:ssub1 = FOO
2829       //
2830       // This leaves only ssub1 valid, making any other lanes undef:
2831       //
2832       //   %src:ssub1<def,read-undef> = FOO %src:ssub2
2833       //
2834       // The <read-undef> flag on the def operand means that old lane values are
2835       // not important.
2836       if (Redef) {
2837         V.RedefVNI = LR.Query(VNI->def).valueIn();
2838         assert((TrackSubRegLiveness || V.RedefVNI) &&
2839                "Instruction is reading nonexistent value");
2840         if (V.RedefVNI != nullptr) {
2841           computeAssignment(V.RedefVNI->id, Other);
2842           V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2843         }
2844       }
2845 
2846       // An IMPLICIT_DEF writes undef values.
2847       if (DefMI->isImplicitDef()) {
2848         // We normally expect IMPLICIT_DEF values to be live only until the end
2849         // of their block. If the value is really live longer and gets pruned in
2850         // another block, this flag is cleared again.
2851         //
2852         // Clearing the valid lanes is deferred until it is sure this can be
2853         // erased.
2854         V.ErasableImplicitDef = true;
2855       }
2856     }
2857   }
2858 
2859   // Find the value in Other that overlaps VNI->def, if any.
2860   LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2861 
2862   // It is possible that both values are defined by the same instruction, or
2863   // the values are PHIs defined in the same block. When that happens, the two
2864   // values should be merged into one, but not into any preceding value.
2865   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2866   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2867     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2868 
2869     // One value stays, the other is merged. Keep the earlier one, or the first
2870     // one we see.
2871     if (OtherVNI->def < VNI->def)
2872       Other.computeAssignment(OtherVNI->id, *this);
2873     else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2874       // This is an early-clobber def overlapping a live-in value in the other
2875       // register. Not mergeable.
2876       V.OtherVNI = OtherLRQ.valueIn();
2877       return CR_Impossible;
2878     }
2879     V.OtherVNI = OtherVNI;
2880     Val &OtherV = Other.Vals[OtherVNI->id];
2881     // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2882     // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2883     // is assigned.
2884     if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2885       return CR_Keep;
2886     // Both sides have been analyzed now.
2887     // Allow overlapping PHI values. Any real interference would show up in a
2888     // predecessor, the PHI itself can't introduce any conflicts.
2889     if (VNI->isPHIDef())
2890       return CR_Merge;
2891     if ((V.ValidLanes & OtherV.ValidLanes).any())
2892       // Overlapping lanes can't be resolved.
2893       return CR_Impossible;
2894     else
2895       return CR_Merge;
2896   }
2897 
2898   // No simultaneous def. Is Other live at the def?
2899   V.OtherVNI = OtherLRQ.valueIn();
2900   if (!V.OtherVNI)
2901     // No overlap, no conflict.
2902     return CR_Keep;
2903 
2904   assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2905 
2906   // We have overlapping values, or possibly a kill of Other.
2907   // Recursively compute assignments up the dominator tree.
2908   Other.computeAssignment(V.OtherVNI->id, *this);
2909   Val &OtherV = Other.Vals[V.OtherVNI->id];
2910 
2911   if (OtherV.ErasableImplicitDef) {
2912     // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2913     // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2914     // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2915     // technically.
2916     //
2917     // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2918     // to erase the IMPLICIT_DEF instruction.
2919     //
2920     // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming
2921     // value.
2922 
2923     MachineInstr *OtherImpDef =
2924         Indexes->getInstructionFromIndex(V.OtherVNI->def);
2925     MachineBasicBlock *OtherMBB = OtherImpDef->getParent();
2926     if (DefMI &&
2927         (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, OtherMBB))) {
2928       LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2929                         << " extends into "
2930                         << printMBBReference(*DefMI->getParent())
2931                         << ", keeping it.\n");
2932       OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2933     } else if (OtherMBB->hasEHPadSuccessor()) {
2934       // If OtherV is defined in a basic block that has EH pad successors then
2935       // we get the same problem not just if OtherV is live beyond its basic
2936       // block, but beyond the last call instruction in its basic block. Handle
2937       // this case conservatively.
2938       LLVM_DEBUG(
2939           dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2940                  << " may be live into EH pad successors, keeping it.\n");
2941       OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2942     } else {
2943       // We deferred clearing these lanes in case we needed to save them
2944       OtherV.ValidLanes &= ~OtherV.WriteLanes;
2945     }
2946   }
2947 
2948   // Allow overlapping PHI values. Any real interference would show up in a
2949   // predecessor, the PHI itself can't introduce any conflicts.
2950   if (VNI->isPHIDef())
2951     return CR_Replace;
2952 
2953   // Check for simple erasable conflicts.
2954   if (DefMI->isImplicitDef())
2955     return CR_Erase;
2956 
2957   // Include the non-conflict where DefMI is a coalescable copy that kills
2958   // OtherVNI. We still want the copy erased and value numbers merged.
2959   if (CP.isCoalescable(DefMI)) {
2960     // Some of the lanes copied from OtherVNI may be undef, making them undef
2961     // here too.
2962     V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2963     return CR_Erase;
2964   }
2965 
2966   // This may not be a real conflict if DefMI simply kills Other and defines
2967   // VNI.
2968   if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2969     return CR_Keep;
2970 
2971   // Handle the case where VNI and OtherVNI can be proven to be identical:
2972   //
2973   //   %other = COPY %ext
2974   //   %this  = COPY %ext <-- Erase this copy
2975   //
2976   if (DefMI->isFullCopy() && !CP.isPartial() &&
2977       valuesIdentical(VNI, V.OtherVNI, Other)) {
2978     V.Identical = true;
2979     return CR_Erase;
2980   }
2981 
2982   // The remaining checks apply to the lanes, which aren't tracked here.  This
2983   // was already decided to be OK via the following CR_Replace condition.
2984   // CR_Replace.
2985   if (SubRangeJoin)
2986     return CR_Replace;
2987 
2988   // If the lanes written by this instruction were all undef in OtherVNI, it is
2989   // still safe to join the live ranges. This can't be done with a simple value
2990   // mapping, though - OtherVNI will map to multiple values:
2991   //
2992   //   1 %dst:ssub0 = FOO                <-- OtherVNI
2993   //   2 %src = BAR                      <-- VNI
2994   //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
2995   //   4 BAZ killed %dst
2996   //   5 QUUX killed %src
2997   //
2998   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2999   // handles this complex value mapping.
3000   if ((V.WriteLanes & OtherV.ValidLanes).none())
3001     return CR_Replace;
3002 
3003   // If the other live range is killed by DefMI and the live ranges are still
3004   // overlapping, it must be because we're looking at an early clobber def:
3005   //
3006   //   %dst<def,early-clobber> = ASM killed %src
3007   //
3008   // In this case, it is illegal to merge the two live ranges since the early
3009   // clobber def would clobber %src before it was read.
3010   if (OtherLRQ.isKill()) {
3011     // This case where the def doesn't overlap the kill is handled above.
3012     assert(VNI->def.isEarlyClobber() &&
3013            "Only early clobber defs can overlap a kill");
3014     return CR_Impossible;
3015   }
3016 
3017   // VNI is clobbering live lanes in OtherVNI, but there is still the
3018   // possibility that no instructions actually read the clobbered lanes.
3019   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
3020   // Otherwise Other.RI wouldn't be live here.
3021   if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
3022     return CR_Impossible;
3023 
3024   if (TrackSubRegLiveness) {
3025     auto &OtherLI = LIS->getInterval(Other.Reg);
3026     // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
3027     // share the same live range, so we just need to check whether they have
3028     // any conflict bit in their LaneMask.
3029     if (!OtherLI.hasSubRanges()) {
3030       LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
3031       return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
3032     }
3033 
3034     // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
3035     // impossible to resolve the conflict. Otherwise, we can just replace
3036     // OtherVNI because of no real conflict.
3037     for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
3038       LaneBitmask OtherMask =
3039           TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
3040       if ((OtherMask & V.WriteLanes).none())
3041         continue;
3042 
3043       auto OtherSRQ = OtherSR.Query(VNI->def);
3044       if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
3045         // VNI is clobbering some lanes of OtherVNI, they have real conflict.
3046         return CR_Impossible;
3047       }
3048     }
3049 
3050     // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
3051     return CR_Replace;
3052   }
3053 
3054   // We need to verify that no instructions are reading the clobbered lanes.
3055   // To save compile time, we'll only check that locally. Don't allow the
3056   // tainted value to escape the basic block.
3057   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3058   if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
3059     return CR_Impossible;
3060 
3061   // There are still some things that could go wrong besides clobbered lanes
3062   // being read, for example OtherVNI may be only partially redefined in MBB,
3063   // and some clobbered lanes could escape the block. Save this analysis for
3064   // resolveConflicts() when all values have been mapped. We need to know
3065   // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
3066   // that now - the recursive analyzeValue() calls must go upwards in the
3067   // dominator tree.
3068   return CR_Unresolved;
3069 }
3070 
3071 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
3072   Val &V = Vals[ValNo];
3073   if (V.isAnalyzed()) {
3074     // Recursion should always move up the dominator tree, so ValNo is not
3075     // supposed to reappear before it has been assigned.
3076     assert(Assignments[ValNo] != -1 && "Bad recursion?");
3077     return;
3078   }
3079   switch ((V.Resolution = analyzeValue(ValNo, Other))) {
3080   case CR_Erase:
3081   case CR_Merge:
3082     // Merge this ValNo into OtherVNI.
3083     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
3084     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
3085     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
3086     LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
3087                       << LR.getValNumInfo(ValNo)->def << " into "
3088                       << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
3089                       << V.OtherVNI->def << " --> @"
3090                       << NewVNInfo[Assignments[ValNo]]->def << '\n');
3091     break;
3092   case CR_Replace:
3093   case CR_Unresolved: {
3094     // The other value is going to be pruned if this join is successful.
3095     assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
3096     Val &OtherV = Other.Vals[V.OtherVNI->id];
3097     OtherV.Pruned = true;
3098     [[fallthrough]];
3099   }
3100   default:
3101     // This value number needs to go in the final joined live range.
3102     Assignments[ValNo] = NewVNInfo.size();
3103     NewVNInfo.push_back(LR.getValNumInfo(ValNo));
3104     break;
3105   }
3106 }
3107 
3108 bool JoinVals::mapValues(JoinVals &Other) {
3109   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3110     computeAssignment(i, Other);
3111     if (Vals[i].Resolution == CR_Impossible) {
3112       LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
3113                         << '@' << LR.getValNumInfo(i)->def << '\n');
3114       return false;
3115     }
3116   }
3117   return true;
3118 }
3119 
3120 bool JoinVals::taintExtent(
3121     unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
3122     SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
3123   VNInfo *VNI = LR.getValNumInfo(ValNo);
3124   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3125   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
3126 
3127   // Scan Other.LR from VNI.def to MBBEnd.
3128   LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
3129   assert(OtherI != Other.LR.end() && "No conflict?");
3130   do {
3131     // OtherI is pointing to a tainted value. Abort the join if the tainted
3132     // lanes escape the block.
3133     SlotIndex End = OtherI->end;
3134     if (End >= MBBEnd) {
3135       LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3136                         << OtherI->valno->id << '@' << OtherI->start << '\n');
3137       return false;
3138     }
3139     LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3140                       << OtherI->valno->id << '@' << OtherI->start << " to "
3141                       << End << '\n');
3142     // A dead def is not a problem.
3143     if (End.isDead())
3144       break;
3145     TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3146 
3147     // Check for another def in the MBB.
3148     if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3149       break;
3150 
3151     // Lanes written by the new def are no longer tainted.
3152     const Val &OV = Other.Vals[OtherI->valno->id];
3153     TaintedLanes &= ~OV.WriteLanes;
3154     if (!OV.RedefVNI)
3155       break;
3156   } while (TaintedLanes.any());
3157   return true;
3158 }
3159 
3160 bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3161                          LaneBitmask Lanes) const {
3162   if (MI.isDebugOrPseudoInstr())
3163     return false;
3164   for (const MachineOperand &MO : MI.all_uses()) {
3165     if (MO.getReg() != Reg)
3166       continue;
3167     if (!MO.readsReg())
3168       continue;
3169     unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3170     if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3171       return true;
3172   }
3173   return false;
3174 }
3175 
3176 bool JoinVals::resolveConflicts(JoinVals &Other) {
3177   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3178     Val &V = Vals[i];
3179     assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3180     if (V.Resolution != CR_Unresolved)
3181       continue;
3182     LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3183                       << LR.getValNumInfo(i)->def << ' '
3184                       << PrintLaneMask(LaneMask) << '\n');
3185     if (SubRangeJoin)
3186       return false;
3187 
3188     ++NumLaneConflicts;
3189     assert(V.OtherVNI && "Inconsistent conflict resolution.");
3190     VNInfo *VNI = LR.getValNumInfo(i);
3191     const Val &OtherV = Other.Vals[V.OtherVNI->id];
3192 
3193     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3194     // join, those lanes will be tainted with a wrong value. Get the extent of
3195     // the tainted lanes.
3196     LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3197     SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
3198     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3199       // Tainted lanes would extend beyond the basic block.
3200       return false;
3201 
3202     assert(!TaintExtent.empty() && "There should be at least one conflict.");
3203 
3204     // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3205     MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3206     MachineBasicBlock::iterator MI = MBB->begin();
3207     if (!VNI->isPHIDef()) {
3208       MI = Indexes->getInstructionFromIndex(VNI->def);
3209       if (!VNI->def.isEarlyClobber()) {
3210         // No need to check the instruction defining VNI for reads.
3211         ++MI;
3212       }
3213     }
3214     assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3215            "Interference ends on VNI->def. Should have been handled earlier");
3216     MachineInstr *LastMI =
3217         Indexes->getInstructionFromIndex(TaintExtent.front().first);
3218     assert(LastMI && "Range must end at a proper instruction");
3219     unsigned TaintNum = 0;
3220     while (true) {
3221       assert(MI != MBB->end() && "Bad LastMI");
3222       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3223         LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3224         return false;
3225       }
3226       // LastMI is the last instruction to use the current value.
3227       if (&*MI == LastMI) {
3228         if (++TaintNum == TaintExtent.size())
3229           break;
3230         LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3231         assert(LastMI && "Range must end at a proper instruction");
3232         TaintedLanes = TaintExtent[TaintNum].second;
3233       }
3234       ++MI;
3235     }
3236 
3237     // The tainted lanes are unused.
3238     V.Resolution = CR_Replace;
3239     ++NumLaneResolves;
3240   }
3241   return true;
3242 }
3243 
3244 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3245   Val &V = Vals[ValNo];
3246   if (V.Pruned || V.PrunedComputed)
3247     return V.Pruned;
3248 
3249   if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3250     return V.Pruned;
3251 
3252   // Follow copies up the dominator tree and check if any intermediate value
3253   // has been pruned.
3254   V.PrunedComputed = true;
3255   V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3256   return V.Pruned;
3257 }
3258 
3259 void JoinVals::pruneValues(JoinVals &Other,
3260                            SmallVectorImpl<SlotIndex> &EndPoints,
3261                            bool changeInstrs) {
3262   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3263     SlotIndex Def = LR.getValNumInfo(i)->def;
3264     switch (Vals[i].Resolution) {
3265     case CR_Keep:
3266       break;
3267     case CR_Replace: {
3268       // This value takes precedence over the value in Other.LR.
3269       LIS->pruneValue(Other.LR, Def, &EndPoints);
3270       // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3271       // instructions are only inserted to provide a live-out value for PHI
3272       // predecessors, so the instruction should simply go away once its value
3273       // has been replaced.
3274       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3275       bool EraseImpDef =
3276           OtherV.ErasableImplicitDef && OtherV.Resolution == CR_Keep;
3277       if (!Def.isBlock()) {
3278         if (changeInstrs) {
3279           // Remove <def,read-undef> flags. This def is now a partial redef.
3280           // Also remove dead flags since the joined live range will
3281           // continue past this instruction.
3282           for (MachineOperand &MO :
3283                Indexes->getInstructionFromIndex(Def)->all_defs()) {
3284             if (MO.getReg() == Reg) {
3285               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3286                 MO.setIsUndef(false);
3287               MO.setIsDead(false);
3288             }
3289           }
3290         }
3291         // This value will reach instructions below, but we need to make sure
3292         // the live range also reaches the instruction at Def.
3293         if (!EraseImpDef)
3294           EndPoints.push_back(Def);
3295       }
3296       LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3297                         << ": " << Other.LR << '\n');
3298       break;
3299     }
3300     case CR_Erase:
3301     case CR_Merge:
3302       if (isPrunedValue(i, Other)) {
3303         // This value is ultimately a copy of a pruned value in LR or Other.LR.
3304         // We can no longer trust the value mapping computed by
3305         // computeAssignment(), the value that was originally copied could have
3306         // been replaced.
3307         LIS->pruneValue(LR, Def, &EndPoints);
3308         LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3309                           << Def << ": " << LR << '\n');
3310       }
3311       break;
3312     case CR_Unresolved:
3313     case CR_Impossible:
3314       llvm_unreachable("Unresolved conflicts");
3315     }
3316   }
3317 }
3318 
3319 // Check if the segment consists of a copied live-through value (i.e. the copy
3320 // in the block only extended the liveness, of an undef value which we may need
3321 // to handle).
3322 static bool isLiveThrough(const LiveQueryResult Q) {
3323   return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3324 }
3325 
3326 /// Consider the following situation when coalescing the copy between
3327 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
3328 ///
3329 ///                              Main range         Subrange 0004 (sub2)
3330 ///                              %31    %45           %31    %45
3331 ///  544    %45 = COPY %28               +                    +
3332 ///                                      | v1                 | v1
3333 ///  560B bb.1:                          +                    +
3334 ///  624        = %45.sub2               | v2                 | v2
3335 ///  800    %31 = COPY %45        +      +             +      +
3336 ///                               | v0                 | v0
3337 ///  816    %31.sub1 = ...        +                    |
3338 ///  880    %30 = COPY %31        | v1                 +
3339 ///  928    %45 = COPY %30        |      +                    +
3340 ///                               |      | v0                 | v0  <--+
3341 ///  992B   ; backedge -> bb.1    |      +                    +        |
3342 /// 1040        = %31.sub0        +                                    |
3343 ///                                                 This value must remain
3344 ///                                                 live-out!
3345 ///
3346 /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3347 /// redundant, since it copies the value from %45 back into it. The
3348 /// conflict resolution for the main range determines that %45.v0 is
3349 /// to be erased, which is ok since %31.v1 is identical to it.
3350 /// The problem happens with the subrange for sub2: it has to be live
3351 /// on exit from the block, but since 928 was actually a point of
3352 /// definition of %45.sub2, %45.sub2 was not live immediately prior
3353 /// to that definition. As a result, when 928 was erased, the value v0
3354 /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3355 /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3356 /// providing an incorrect value to the use at 624.
3357 ///
3358 /// Since the main-range values %31.v1 and %45.v0 were proved to be
3359 /// identical, the corresponding values in subranges must also be the
3360 /// same. A redundant copy is removed because it's not needed, and not
3361 /// because it copied an undefined value, so any liveness that originated
3362 /// from that copy cannot disappear. When pruning a value that started
3363 /// at the removed copy, the corresponding identical value must be
3364 /// extended to replace it.
3365 void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3366   // Look for values being erased.
3367   bool DidPrune = false;
3368   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3369     Val &V = Vals[i];
3370     // We should trigger in all cases in which eraseInstrs() does something.
3371     // match what eraseInstrs() is doing, print a message so
3372     if (V.Resolution != CR_Erase &&
3373         (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3374       continue;
3375 
3376     // Check subranges at the point where the copy will be removed.
3377     SlotIndex Def = LR.getValNumInfo(i)->def;
3378     SlotIndex OtherDef;
3379     if (V.Identical)
3380       OtherDef = V.OtherVNI->def;
3381 
3382     // Print message so mismatches with eraseInstrs() can be diagnosed.
3383     LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3384                       << '\n');
3385     for (LiveInterval::SubRange &S : LI.subranges()) {
3386       LiveQueryResult Q = S.Query(Def);
3387 
3388       // If a subrange starts at the copy then an undefined value has been
3389       // copied and we must remove that subrange value as well.
3390       VNInfo *ValueOut = Q.valueOutOrDead();
3391       if (ValueOut != nullptr &&
3392           (Q.valueIn() == nullptr ||
3393            (V.Identical && V.Resolution == CR_Erase && ValueOut->def == Def))) {
3394         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3395                           << " at " << Def << "\n");
3396         SmallVector<SlotIndex, 8> EndPoints;
3397         LIS->pruneValue(S, Def, &EndPoints);
3398         DidPrune = true;
3399         // Mark value number as unused.
3400         ValueOut->markUnused();
3401 
3402         if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3403           // If V is identical to V.OtherVNI (and S was live at OtherDef),
3404           // then we can't simply prune V from S. V needs to be replaced
3405           // with V.OtherVNI.
3406           LIS->extendToIndices(S, EndPoints);
3407         }
3408 
3409         // We may need to eliminate the subrange if the copy introduced a live
3410         // out undef value.
3411         if (ValueOut->isPHIDef())
3412           ShrinkMask |= S.LaneMask;
3413         continue;
3414       }
3415 
3416       // If a subrange ends at the copy, then a value was copied but only
3417       // partially used later. Shrink the subregister range appropriately.
3418       //
3419       // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3420       // conservatively correct.
3421       if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3422           (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3423         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3424                           << PrintLaneMask(S.LaneMask) << " at " << Def
3425                           << "\n");
3426         ShrinkMask |= S.LaneMask;
3427       }
3428     }
3429   }
3430   if (DidPrune)
3431     LI.removeEmptySubRanges();
3432 }
3433 
3434 /// Check if any of the subranges of @p LI contain a definition at @p Def.
3435 static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
3436   for (LiveInterval::SubRange &SR : LI.subranges()) {
3437     if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3438       if (VNI->def == Def)
3439         return true;
3440   }
3441   return false;
3442 }
3443 
3444 void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3445   assert(&static_cast<LiveRange &>(LI) == &LR);
3446 
3447   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3448     if (Vals[i].Resolution != CR_Keep)
3449       continue;
3450     VNInfo *VNI = LR.getValNumInfo(i);
3451     if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3452       continue;
3453     Vals[i].Pruned = true;
3454     ShrinkMainRange = true;
3455   }
3456 }
3457 
3458 void JoinVals::removeImplicitDefs() {
3459   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3460     Val &V = Vals[i];
3461     if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3462       continue;
3463 
3464     VNInfo *VNI = LR.getValNumInfo(i);
3465     VNI->markUnused();
3466     LR.removeValNo(VNI);
3467   }
3468 }
3469 
3470 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr *> &ErasedInstrs,
3471                            SmallVectorImpl<Register> &ShrinkRegs,
3472                            LiveInterval *LI) {
3473   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3474     // Get the def location before markUnused() below invalidates it.
3475     VNInfo *VNI = LR.getValNumInfo(i);
3476     SlotIndex Def = VNI->def;
3477     switch (Vals[i].Resolution) {
3478     case CR_Keep: {
3479       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3480       // longer. The IMPLICIT_DEF instructions are only inserted by
3481       // PHIElimination to guarantee that all PHI predecessors have a value.
3482       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3483         break;
3484       // Remove value number i from LR.
3485       // For intervals with subranges, removing a segment from the main range
3486       // may require extending the previous segment: for each definition of
3487       // a subregister, there will be a corresponding def in the main range.
3488       // That def may fall in the middle of a segment from another subrange.
3489       // In such cases, removing this def from the main range must be
3490       // complemented by extending the main range to account for the liveness
3491       // of the other subrange.
3492       // The new end point of the main range segment to be extended.
3493       SlotIndex NewEnd;
3494       if (LI != nullptr) {
3495         LiveRange::iterator I = LR.FindSegmentContaining(Def);
3496         assert(I != LR.end());
3497         // Do not extend beyond the end of the segment being removed.
3498         // The segment may have been pruned in preparation for joining
3499         // live ranges.
3500         NewEnd = I->end;
3501       }
3502 
3503       LR.removeValNo(VNI);
3504       // Note that this VNInfo is reused and still referenced in NewVNInfo,
3505       // make it appear like an unused value number.
3506       VNI->markUnused();
3507 
3508       if (LI != nullptr && LI->hasSubRanges()) {
3509         assert(static_cast<LiveRange *>(LI) == &LR);
3510         // Determine the end point based on the subrange information:
3511         // minimum of (earliest def of next segment,
3512         //             latest end point of containing segment)
3513         SlotIndex ED, LE;
3514         for (LiveInterval::SubRange &SR : LI->subranges()) {
3515           LiveRange::iterator I = SR.find(Def);
3516           if (I == SR.end())
3517             continue;
3518           if (I->start > Def)
3519             ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3520           else
3521             LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3522         }
3523         if (LE.isValid())
3524           NewEnd = std::min(NewEnd, LE);
3525         if (ED.isValid())
3526           NewEnd = std::min(NewEnd, ED);
3527 
3528         // We only want to do the extension if there was a subrange that
3529         // was live across Def.
3530         if (LE.isValid()) {
3531           LiveRange::iterator S = LR.find(Def);
3532           if (S != LR.begin())
3533             std::prev(S)->end = NewEnd;
3534         }
3535       }
3536       LLVM_DEBUG({
3537         dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3538         if (LI != nullptr)
3539           dbgs() << "\t\t  LHS = " << *LI << '\n';
3540       });
3541       [[fallthrough]];
3542     }
3543 
3544     case CR_Erase: {
3545       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3546       assert(MI && "No instruction to erase");
3547       if (MI->isCopy()) {
3548         Register Reg = MI->getOperand(1).getReg();
3549         if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3550           ShrinkRegs.push_back(Reg);
3551       }
3552       ErasedInstrs.insert(MI);
3553       LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3554       LIS->RemoveMachineInstrFromMaps(*MI);
3555       MI->eraseFromParent();
3556       break;
3557     }
3558     default:
3559       break;
3560     }
3561   }
3562 }
3563 
3564 void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3565                                          LaneBitmask LaneMask,
3566                                          const CoalescerPair &CP) {
3567   SmallVector<VNInfo *, 16> NewVNInfo;
3568   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, NewVNInfo,
3569                    CP, LIS, TRI, true, true);
3570   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, NewVNInfo,
3571                    CP, LIS, TRI, true, true);
3572 
3573   // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3574   // We should be able to resolve all conflicts here as we could successfully do
3575   // it on the mainrange already. There is however a problem when multiple
3576   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3577   // interferences.
3578   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3579     // We already determined that it is legal to merge the intervals, so this
3580     // should never fail.
3581     llvm_unreachable("*** Couldn't join subrange!\n");
3582   }
3583   if (!LHSVals.resolveConflicts(RHSVals) ||
3584       !RHSVals.resolveConflicts(LHSVals)) {
3585     // We already determined that it is legal to merge the intervals, so this
3586     // should never fail.
3587     llvm_unreachable("*** Couldn't join subrange!\n");
3588   }
3589 
3590   // The merging algorithm in LiveInterval::join() can't handle conflicting
3591   // value mappings, so we need to remove any live ranges that overlap a
3592   // CR_Replace resolution. Collect a set of end points that can be used to
3593   // restore the live range after joining.
3594   SmallVector<SlotIndex, 8> EndPoints;
3595   LHSVals.pruneValues(RHSVals, EndPoints, false);
3596   RHSVals.pruneValues(LHSVals, EndPoints, false);
3597 
3598   LHSVals.removeImplicitDefs();
3599   RHSVals.removeImplicitDefs();
3600 
3601   assert(LRange.verify() && RRange.verify());
3602 
3603   // Join RRange into LHS.
3604   LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3605               NewVNInfo);
3606 
3607   LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) << ' '
3608                     << LRange << "\n");
3609   if (EndPoints.empty())
3610     return;
3611 
3612   // Recompute the parts of the live range we had to remove because of
3613   // CR_Replace conflicts.
3614   LLVM_DEBUG({
3615     dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3616     for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3617       dbgs() << EndPoints[i];
3618       if (i != n - 1)
3619         dbgs() << ',';
3620     }
3621     dbgs() << ":  " << LRange << '\n';
3622   });
3623   LIS->extendToIndices(LRange, EndPoints);
3624 }
3625 
3626 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3627                                           const LiveRange &ToMerge,
3628                                           LaneBitmask LaneMask,
3629                                           CoalescerPair &CP,
3630                                           unsigned ComposeSubRegIdx) {
3631   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3632   LI.refineSubRanges(
3633       Allocator, LaneMask,
3634       [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3635         if (SR.empty()) {
3636           SR.assign(ToMerge, Allocator);
3637         } else {
3638           // joinSubRegRange() destroys the merged range, so we need a copy.
3639           LiveRange RangeCopy(ToMerge, Allocator);
3640           joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3641         }
3642       },
3643       *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3644 }
3645 
3646 bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3647   if (LI.valnos.size() < LargeIntervalSizeThreshold)
3648     return false;
3649   auto &Counter = LargeLIVisitCounter[LI.reg()];
3650   if (Counter < LargeIntervalFreqThreshold) {
3651     Counter++;
3652     return false;
3653   }
3654   return true;
3655 }
3656 
3657 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3658   SmallVector<VNInfo *, 16> NewVNInfo;
3659   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3660   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3661   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3662   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3663                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3664   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3665                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3666 
3667   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3668 
3669   if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3670     return false;
3671 
3672   // First compute NewVNInfo and the simple value mappings.
3673   // Detect impossible conflicts early.
3674   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3675     return false;
3676 
3677   // Some conflicts can only be resolved after all values have been mapped.
3678   if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3679     return false;
3680 
3681   // All clear, the live ranges can be merged.
3682   if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3683     BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3684 
3685     // Transform lanemasks from the LHS to masks in the coalesced register and
3686     // create initial subranges if necessary.
3687     unsigned DstIdx = CP.getDstIdx();
3688     if (!LHS.hasSubRanges()) {
3689       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3690                                      : TRI->getSubRegIndexLaneMask(DstIdx);
3691       // LHS must support subregs or we wouldn't be in this codepath.
3692       assert(Mask.any());
3693       LHS.createSubRangeFrom(Allocator, Mask, LHS);
3694     } else if (DstIdx != 0) {
3695       // Transform LHS lanemasks to new register class if necessary.
3696       for (LiveInterval::SubRange &R : LHS.subranges()) {
3697         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3698         R.LaneMask = Mask;
3699       }
3700     }
3701     LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3702                       << '\n');
3703 
3704     // Determine lanemasks of RHS in the coalesced register and merge subranges.
3705     unsigned SrcIdx = CP.getSrcIdx();
3706     if (!RHS.hasSubRanges()) {
3707       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3708                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
3709       mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3710     } else {
3711       // Pair up subranges and merge.
3712       for (LiveInterval::SubRange &R : RHS.subranges()) {
3713         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3714         mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3715       }
3716     }
3717     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3718 
3719     // Pruning implicit defs from subranges may result in the main range
3720     // having stale segments.
3721     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3722 
3723     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3724     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3725   } else if (TrackSubRegLiveness && !CP.getDstIdx() && CP.getSrcIdx()) {
3726     LHS.createSubRangeFrom(LIS->getVNInfoAllocator(),
3727                            CP.getNewRC()->getLaneMask(), LHS);
3728     mergeSubRangeInto(LHS, RHS, TRI->getSubRegIndexLaneMask(CP.getSrcIdx()), CP,
3729                       CP.getDstIdx());
3730     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3731     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3732   }
3733 
3734   // The merging algorithm in LiveInterval::join() can't handle conflicting
3735   // value mappings, so we need to remove any live ranges that overlap a
3736   // CR_Replace resolution. Collect a set of end points that can be used to
3737   // restore the live range after joining.
3738   SmallVector<SlotIndex, 8> EndPoints;
3739   LHSVals.pruneValues(RHSVals, EndPoints, true);
3740   RHSVals.pruneValues(LHSVals, EndPoints, true);
3741 
3742   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3743   // registers to require trimming.
3744   SmallVector<Register, 8> ShrinkRegs;
3745   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3746   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3747   while (!ShrinkRegs.empty())
3748     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3749 
3750   // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3751   checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3752 
3753   // If the RHS covers any PHI locations that were tracked for debug-info, we
3754   // must update tracking information to reflect the join.
3755   auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3756   if (RegIt != RegToPHIIdx.end()) {
3757     // Iterate over all the debug instruction numbers assigned this register.
3758     for (unsigned InstID : RegIt->second) {
3759       auto PHIIt = PHIValToPos.find(InstID);
3760       assert(PHIIt != PHIValToPos.end());
3761       const SlotIndex &SI = PHIIt->second.SI;
3762 
3763       // Does the RHS cover the position of this PHI?
3764       auto LII = RHS.find(SI);
3765       if (LII == RHS.end() || LII->start > SI)
3766         continue;
3767 
3768       // Accept two kinds of subregister movement:
3769       //  * When we merge from one register class into a larger register:
3770       //        %1:gr16 = some-inst
3771       //                ->
3772       //        %2:gr32.sub_16bit = some-inst
3773       //  * When the PHI is already in a subregister, and the larger class
3774       //    is coalesced:
3775       //        %2:gr32.sub_16bit = some-inst
3776       //        %3:gr32 = COPY %2
3777       //                ->
3778       //        %3:gr32.sub_16bit = some-inst
3779       // Test for subregister move:
3780       if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3781         // If we're moving between different subregisters, ignore this join.
3782         // The PHI will not get a location, dropping variable locations.
3783         if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3784           continue;
3785 
3786       // Update our tracking of where the PHI is.
3787       PHIIt->second.Reg = CP.getDstReg();
3788 
3789       // If we merge into a sub-register of a larger class (test above),
3790       // update SubReg.
3791       if (CP.getSrcIdx() != 0)
3792         PHIIt->second.SubReg = CP.getSrcIdx();
3793     }
3794 
3795     // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3796     // different VRegs now. Copy old collection of debug instruction numbers and
3797     // erase the old one:
3798     auto InstrNums = RegIt->second;
3799     RegToPHIIdx.erase(RegIt);
3800 
3801     // There might already be PHIs being tracked in the destination VReg. Insert
3802     // into an existing tracking collection, or insert a new one.
3803     RegIt = RegToPHIIdx.find(CP.getDstReg());
3804     if (RegIt != RegToPHIIdx.end())
3805       RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3806                            InstrNums.end());
3807     else
3808       RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3809   }
3810 
3811   // Join RHS into LHS.
3812   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3813 
3814   // Kill flags are going to be wrong if the live ranges were overlapping.
3815   // Eventually, we should simply clear all kill flags when computing live
3816   // ranges. They are reinserted after register allocation.
3817   MRI->clearKillFlags(LHS.reg());
3818   MRI->clearKillFlags(RHS.reg());
3819 
3820   if (!EndPoints.empty()) {
3821     // Recompute the parts of the live range we had to remove because of
3822     // CR_Replace conflicts.
3823     LLVM_DEBUG({
3824       dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3825       for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3826         dbgs() << EndPoints[i];
3827         if (i != n - 1)
3828           dbgs() << ',';
3829       }
3830       dbgs() << ":  " << LHS << '\n';
3831     });
3832     LIS->extendToIndices((LiveRange &)LHS, EndPoints);
3833   }
3834 
3835   return true;
3836 }
3837 
3838 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3839   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3840 }
3841 
3842 void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) {
3843   const SlotIndexes &Slots = *LIS->getSlotIndexes();
3844   SmallVector<MachineInstr *, 8> ToInsert;
3845 
3846   // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3847   // vreg => DbgValueLoc map.
3848   auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3849     for (auto *X : ToInsert) {
3850       for (const auto &Op : X->debug_operands()) {
3851         if (Op.isReg() && Op.getReg().isVirtual())
3852           DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3853       }
3854     }
3855 
3856     ToInsert.clear();
3857   };
3858 
3859   // Iterate over all instructions, collecting them into the ToInsert vector.
3860   // Once a non-debug instruction is found, record the slot index of the
3861   // collected DBG_VALUEs.
3862   for (auto &MBB : MF) {
3863     SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3864 
3865     for (auto &MI : MBB) {
3866       if (MI.isDebugValue()) {
3867         if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3868               return MO.isReg() && MO.getReg().isVirtual();
3869             }))
3870           ToInsert.push_back(&MI);
3871       } else if (!MI.isDebugOrPseudoInstr()) {
3872         CurrentSlot = Slots.getInstructionIndex(MI);
3873         CloseNewDVRange(CurrentSlot);
3874       }
3875     }
3876 
3877     // Close range of DBG_VALUEs at the end of blocks.
3878     CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3879   }
3880 
3881   // Sort all DBG_VALUEs we've seen by slot number.
3882   for (auto &Pair : DbgVRegToValues)
3883     llvm::sort(Pair.second);
3884 }
3885 
3886 void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3887                                                      LiveRange &LHS,
3888                                                      JoinVals &LHSVals,
3889                                                      LiveRange &RHS,
3890                                                      JoinVals &RHSVals) {
3891   auto ScanForDstReg = [&](Register Reg) {
3892     checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3893   };
3894 
3895   auto ScanForSrcReg = [&](Register Reg) {
3896     checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3897   };
3898 
3899   // Scan for unsound updates of both the source and destination register.
3900   ScanForSrcReg(CP.getSrcReg());
3901   ScanForDstReg(CP.getDstReg());
3902 }
3903 
3904 void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3905                                                          LiveRange &OtherLR,
3906                                                          LiveRange &RegLR,
3907                                                          JoinVals &RegVals) {
3908   // Are there any DBG_VALUEs to examine?
3909   auto VRegMapIt = DbgVRegToValues.find(Reg);
3910   if (VRegMapIt == DbgVRegToValues.end())
3911     return;
3912 
3913   auto &DbgValueSet = VRegMapIt->second;
3914   auto DbgValueSetIt = DbgValueSet.begin();
3915   auto SegmentIt = OtherLR.begin();
3916 
3917   bool LastUndefResult = false;
3918   SlotIndex LastUndefIdx;
3919 
3920   // If the "Other" register is live at a slot Idx, test whether Reg can
3921   // safely be merged with it, or should be marked undef.
3922   auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3923                       &LastUndefIdx](SlotIndex Idx) -> bool {
3924     // Our worst-case performance typically happens with asan, causing very
3925     // many DBG_VALUEs of the same location. Cache a copy of the most recent
3926     // result for this edge-case.
3927     if (LastUndefIdx == Idx)
3928       return LastUndefResult;
3929 
3930     // If the other range was live, and Reg's was not, the register coalescer
3931     // will not have tried to resolve any conflicts. We don't know whether
3932     // the DBG_VALUE will refer to the same value number, so it must be made
3933     // undef.
3934     auto OtherIt = RegLR.find(Idx);
3935     if (OtherIt == RegLR.end())
3936       return true;
3937 
3938     // Both the registers were live: examine the conflict resolution record for
3939     // the value number Reg refers to. CR_Keep meant that this value number
3940     // "won" and the merged register definitely refers to that value. CR_Erase
3941     // means the value number was a redundant copy of the other value, which
3942     // was coalesced and Reg deleted. It's safe to refer to the other register
3943     // (which will be the source of the copy).
3944     auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3945     LastUndefResult =
3946         Resolution != JoinVals::CR_Keep && Resolution != JoinVals::CR_Erase;
3947     LastUndefIdx = Idx;
3948     return LastUndefResult;
3949   };
3950 
3951   // Iterate over both the live-range of the "Other" register, and the set of
3952   // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3953   // slot index. This relies on the DbgValueSet being ordered.
3954   while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3955     if (DbgValueSetIt->first < SegmentIt->end) {
3956       // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3957       // set it undef.
3958       if (DbgValueSetIt->first >= SegmentIt->start) {
3959         bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3960         bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3961         if (HasReg && ShouldUndefReg) {
3962           // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3963           DbgValueSetIt->second->setDebugValueUndef();
3964           continue;
3965         }
3966       }
3967       ++DbgValueSetIt;
3968     } else {
3969       ++SegmentIt;
3970     }
3971   }
3972 }
3973 
3974 namespace {
3975 
3976 /// Information concerning MBB coalescing priority.
3977 struct MBBPriorityInfo {
3978   MachineBasicBlock *MBB;
3979   unsigned Depth;
3980   bool IsSplit;
3981 
3982   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3983       : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3984 };
3985 
3986 } // end anonymous namespace
3987 
3988 /// C-style comparator that sorts first based on the loop depth of the basic
3989 /// block (the unsigned), and then on the MBB number.
3990 ///
3991 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
3992 static int compareMBBPriority(const MBBPriorityInfo *LHS,
3993                               const MBBPriorityInfo *RHS) {
3994   // Deeper loops first
3995   if (LHS->Depth != RHS->Depth)
3996     return LHS->Depth > RHS->Depth ? -1 : 1;
3997 
3998   // Try to unsplit critical edges next.
3999   if (LHS->IsSplit != RHS->IsSplit)
4000     return LHS->IsSplit ? -1 : 1;
4001 
4002   // Prefer blocks that are more connected in the CFG. This takes care of
4003   // the most difficult copies first while intervals are short.
4004   unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
4005   unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
4006   if (cl != cr)
4007     return cl > cr ? -1 : 1;
4008 
4009   // As a last resort, sort by block number.
4010   return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
4011 }
4012 
4013 /// \returns true if the given copy uses or defines a local live range.
4014 static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
4015   if (!Copy->isCopy())
4016     return false;
4017 
4018   if (Copy->getOperand(1).isUndef())
4019     return false;
4020 
4021   Register SrcReg = Copy->getOperand(1).getReg();
4022   Register DstReg = Copy->getOperand(0).getReg();
4023   if (SrcReg.isPhysical() || DstReg.isPhysical())
4024     return false;
4025 
4026   return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) ||
4027          LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
4028 }
4029 
4030 void RegisterCoalescer::lateLiveIntervalUpdate() {
4031   for (Register reg : ToBeUpdated) {
4032     if (!LIS->hasInterval(reg))
4033       continue;
4034     LiveInterval &LI = LIS->getInterval(reg);
4035     shrinkToUses(&LI, &DeadDefs);
4036     if (!DeadDefs.empty())
4037       eliminateDeadDefs();
4038   }
4039   ToBeUpdated.clear();
4040 }
4041 
4042 bool RegisterCoalescer::copyCoalesceWorkList(
4043     MutableArrayRef<MachineInstr *> CurrList) {
4044   bool Progress = false;
4045   SmallPtrSet<MachineInstr *, 4> CurrentErasedInstrs;
4046   for (MachineInstr *&MI : CurrList) {
4047     if (!MI)
4048       continue;
4049     // Skip instruction pointers that have already been erased, for example by
4050     // dead code elimination.
4051     if (ErasedInstrs.count(MI) || CurrentErasedInstrs.count(MI)) {
4052       MI = nullptr;
4053       continue;
4054     }
4055     bool Again = false;
4056     bool Success = joinCopy(MI, Again, CurrentErasedInstrs);
4057     Progress |= Success;
4058     if (Success || !Again)
4059       MI = nullptr;
4060   }
4061   // Clear instructions not recorded in `ErasedInstrs` but erased.
4062   if (!CurrentErasedInstrs.empty()) {
4063     for (MachineInstr *&MI : CurrList) {
4064       if (MI && CurrentErasedInstrs.count(MI))
4065         MI = nullptr;
4066     }
4067     for (MachineInstr *&MI : WorkList) {
4068       if (MI && CurrentErasedInstrs.count(MI))
4069         MI = nullptr;
4070     }
4071   }
4072   return Progress;
4073 }
4074 
4075 /// Check if DstReg is a terminal node.
4076 /// I.e., it does not have any affinity other than \p Copy.
4077 static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
4078                           const MachineRegisterInfo *MRI) {
4079   assert(Copy.isCopyLike());
4080   // Check if the destination of this copy as any other affinity.
4081   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
4082     if (&MI != &Copy && MI.isCopyLike())
4083       return false;
4084   return true;
4085 }
4086 
4087 bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
4088   assert(Copy.isCopyLike());
4089   if (!UseTerminalRule)
4090     return false;
4091   Register SrcReg, DstReg;
4092   unsigned SrcSubReg = 0, DstSubReg = 0;
4093   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
4094     return false;
4095   // Check if the destination of this copy has any other affinity.
4096   if (DstReg.isPhysical() ||
4097       // If SrcReg is a physical register, the copy won't be coalesced.
4098       // Ignoring it may have other side effect (like missing
4099       // rematerialization). So keep it.
4100       SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
4101     return false;
4102 
4103   // DstReg is a terminal node. Check if it interferes with any other
4104   // copy involving SrcReg.
4105   const MachineBasicBlock *OrigBB = Copy.getParent();
4106   const LiveInterval &DstLI = LIS->getInterval(DstReg);
4107   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
4108     // Technically we should check if the weight of the new copy is
4109     // interesting compared to the other one and update the weight
4110     // of the copies accordingly. However, this would only work if
4111     // we would gather all the copies first then coalesce, whereas
4112     // right now we interleave both actions.
4113     // For now, just consider the copies that are in the same block.
4114     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
4115       continue;
4116     Register OtherSrcReg, OtherReg;
4117     unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
4118     if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
4119                      OtherSubReg))
4120       return false;
4121     if (OtherReg == SrcReg)
4122       OtherReg = OtherSrcReg;
4123     // Check if OtherReg is a non-terminal.
4124     if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
4125       continue;
4126     // Check that OtherReg interfere with DstReg.
4127     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
4128       LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
4129                         << '\n');
4130       return true;
4131     }
4132   }
4133   return false;
4134 }
4135 
4136 void RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
4137   LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
4138 
4139   // Collect all copy-like instructions in MBB. Don't start coalescing anything
4140   // yet, it might invalidate the iterator.
4141   const unsigned PrevSize = WorkList.size();
4142   if (JoinGlobalCopies) {
4143     SmallVector<MachineInstr *, 2> LocalTerminals;
4144     SmallVector<MachineInstr *, 2> GlobalTerminals;
4145     // Coalesce copies bottom-up to coalesce local defs before local uses. They
4146     // are not inherently easier to resolve, but slightly preferable until we
4147     // have local live range splitting. In particular this is required by
4148     // cmp+jmp macro fusion.
4149     for (MachineInstr &MI : *MBB) {
4150       if (!MI.isCopyLike())
4151         continue;
4152       bool ApplyTerminalRule = applyTerminalRule(MI);
4153       if (isLocalCopy(&MI, LIS)) {
4154         if (ApplyTerminalRule)
4155           LocalTerminals.push_back(&MI);
4156         else
4157           LocalWorkList.push_back(&MI);
4158       } else {
4159         if (ApplyTerminalRule)
4160           GlobalTerminals.push_back(&MI);
4161         else
4162           WorkList.push_back(&MI);
4163       }
4164     }
4165     // Append the copies evicted by the terminal rule at the end of the list.
4166     LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4167     WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4168   } else {
4169     SmallVector<MachineInstr *, 2> Terminals;
4170     for (MachineInstr &MII : *MBB)
4171       if (MII.isCopyLike()) {
4172         if (applyTerminalRule(MII))
4173           Terminals.push_back(&MII);
4174         else
4175           WorkList.push_back(&MII);
4176       }
4177     // Append the copies evicted by the terminal rule at the end of the list.
4178     WorkList.append(Terminals.begin(), Terminals.end());
4179   }
4180   // Try coalescing the collected copies immediately, and remove the nulls.
4181   // This prevents the WorkList from getting too large since most copies are
4182   // joinable on the first attempt.
4183   MutableArrayRef<MachineInstr *> CurrList(WorkList.begin() + PrevSize,
4184                                            WorkList.end());
4185   if (copyCoalesceWorkList(CurrList))
4186     WorkList.erase(
4187         std::remove(WorkList.begin() + PrevSize, WorkList.end(), nullptr),
4188         WorkList.end());
4189 }
4190 
4191 void RegisterCoalescer::coalesceLocals() {
4192   copyCoalesceWorkList(LocalWorkList);
4193   for (MachineInstr *MI : LocalWorkList) {
4194     if (MI)
4195       WorkList.push_back(MI);
4196   }
4197   LocalWorkList.clear();
4198 }
4199 
4200 void RegisterCoalescer::joinAllIntervals() {
4201   LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4202   assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4203 
4204   std::vector<MBBPriorityInfo> MBBs;
4205   MBBs.reserve(MF->size());
4206   for (MachineBasicBlock &MBB : *MF) {
4207     MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4208                                    JoinSplitEdges && isSplitEdge(&MBB)));
4209   }
4210   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4211 
4212   // Coalesce intervals in MBB priority order.
4213   unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4214   for (MBBPriorityInfo &MBB : MBBs) {
4215     // Try coalescing the collected local copies for deeper loops.
4216     if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4217       coalesceLocals();
4218       CurrDepth = MBB.Depth;
4219     }
4220     copyCoalesceInMBB(MBB.MBB);
4221   }
4222   lateLiveIntervalUpdate();
4223   coalesceLocals();
4224 
4225   // Joining intervals can allow other intervals to be joined.  Iteratively join
4226   // until we make no progress.
4227   while (copyCoalesceWorkList(WorkList))
4228     /* empty */;
4229   lateLiveIntervalUpdate();
4230 }
4231 
4232 void RegisterCoalescer::releaseMemory() {
4233   ErasedInstrs.clear();
4234   WorkList.clear();
4235   DeadDefs.clear();
4236   InflateRegs.clear();
4237   LargeLIVisitCounter.clear();
4238 }
4239 
4240 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4241   LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
4242                     << "********** Function: " << fn.getName() << '\n');
4243 
4244   // Variables changed between a setjmp and a longjump can have undefined value
4245   // after the longjmp. This behaviour can be observed if such a variable is
4246   // spilled, so longjmp won't restore the value in the spill slot.
4247   // RegisterCoalescer should not run in functions with a setjmp to avoid
4248   // merging such undefined variables with predictable ones.
4249   //
4250   // TODO: Could specifically disable coalescing registers live across setjmp
4251   // calls
4252   if (fn.exposesReturnsTwice()) {
4253     LLVM_DEBUG(
4254         dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4255     return false;
4256   }
4257 
4258   MF = &fn;
4259   MRI = &fn.getRegInfo();
4260   const TargetSubtargetInfo &STI = fn.getSubtarget();
4261   TRI = STI.getRegisterInfo();
4262   TII = STI.getInstrInfo();
4263   LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
4264   Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
4265   if (EnableGlobalCopies == cl::BOU_UNSET)
4266     JoinGlobalCopies = STI.enableJoinGlobalCopies();
4267   else
4268     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4269 
4270   // If there are PHIs tracked by debug-info, they will need updating during
4271   // coalescing. Build an index of those PHIs to ease updating.
4272   SlotIndexes *Slots = LIS->getSlotIndexes();
4273   for (const auto &DebugPHI : MF->DebugPHIPositions) {
4274     MachineBasicBlock *MBB = DebugPHI.second.MBB;
4275     Register Reg = DebugPHI.second.Reg;
4276     unsigned SubReg = DebugPHI.second.SubReg;
4277     SlotIndex SI = Slots->getMBBStartIdx(MBB);
4278     PHIValPos P = {SI, Reg, SubReg};
4279     PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4280     RegToPHIIdx[Reg].push_back(DebugPHI.first);
4281   }
4282 
4283   // The MachineScheduler does not currently require JoinSplitEdges. This will
4284   // either be enabled unconditionally or replaced by a more general live range
4285   // splitting optimization.
4286   JoinSplitEdges = EnableJoinSplits;
4287 
4288   if (VerifyCoalescing)
4289     MF->verify(this, "Before register coalescing", &errs());
4290 
4291   DbgVRegToValues.clear();
4292   buildVRegToDbgValueMap(fn);
4293 
4294   RegClassInfo.runOnMachineFunction(fn);
4295 
4296   // Join (coalesce) intervals if requested.
4297   if (EnableJoining)
4298     joinAllIntervals();
4299 
4300   // After deleting a lot of copies, register classes may be less constrained.
4301   // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4302   // DPR inflation.
4303   array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4304   InflateRegs.erase(llvm::unique(InflateRegs), InflateRegs.end());
4305   LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4306                     << " regs.\n");
4307   for (Register Reg : InflateRegs) {
4308     if (MRI->reg_nodbg_empty(Reg))
4309       continue;
4310     if (MRI->recomputeRegClass(Reg)) {
4311       LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4312                         << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4313       ++NumInflated;
4314 
4315       LiveInterval &LI = LIS->getInterval(Reg);
4316       if (LI.hasSubRanges()) {
4317         // If the inflated register class does not support subregisters anymore
4318         // remove the subranges.
4319         if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4320           LI.clearSubRanges();
4321         } else {
4322 #ifndef NDEBUG
4323           LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4324           // If subranges are still supported, then the same subregs
4325           // should still be supported.
4326           for (LiveInterval::SubRange &S : LI.subranges()) {
4327             assert((S.LaneMask & ~MaxMask).none());
4328           }
4329 #endif
4330         }
4331       }
4332     }
4333   }
4334 
4335   // After coalescing, update any PHIs that are being tracked by debug-info
4336   // with their new VReg locations.
4337   for (auto &p : MF->DebugPHIPositions) {
4338     auto it = PHIValToPos.find(p.first);
4339     assert(it != PHIValToPos.end());
4340     p.second.Reg = it->second.Reg;
4341     p.second.SubReg = it->second.SubReg;
4342   }
4343 
4344   PHIValToPos.clear();
4345   RegToPHIIdx.clear();
4346 
4347   LLVM_DEBUG(dump());
4348   if (VerifyCoalescing)
4349     MF->verify(this, "After register coalescing", &errs());
4350   return true;
4351 }
4352 
4353 void RegisterCoalescer::print(raw_ostream &O, const Module *m) const {
4354   LIS->print(O);
4355 }
4356