xref: /llvm-project/llvm/lib/Target/X86/X86RegisterInfo.cpp (revision 4a486e773e0ef1add4515ee47b038c274ced2e76)
1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetFrameLowering.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TileShapeInfo.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/MC/MCContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
37 
38 using namespace llvm;
39 
40 #define GET_REGINFO_TARGET_DESC
41 #include "X86GenRegisterInfo.inc"
42 
43 static cl::opt<bool>
44 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
45           cl::desc("Enable use of a base pointer for complex stack frames"));
46 
47 static cl::opt<bool>
48     DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
49                             cl::init(false),
50                             cl::desc("Disable two address hints for register "
51                                      "allocation"));
52 
53 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
54     : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
55                          X86_MC::getDwarfRegFlavour(TT, false),
56                          X86_MC::getDwarfRegFlavour(TT, true),
57                          (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
58   X86_MC::initLLVMToSEHAndCVRegMapping(this);
59 
60   // Cache some information.
61   Is64Bit = TT.isArch64Bit();
62   IsWin64 = Is64Bit && TT.isOSWindows();
63 
64   // Use a callee-saved register as the base pointer.  These registers must
65   // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
66   // requires GOT in the EBX register before function calls via PLT GOT pointer.
67   if (Is64Bit) {
68     SlotSize = 8;
69     // This matches the simplified 32-bit pointer code in the data layout
70     // computation.
71     // FIXME: Should use the data layout?
72     bool Use64BitReg = !TT.isX32();
73     StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
74     FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
75     BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
76   } else {
77     SlotSize = 4;
78     StackPtr = X86::ESP;
79     FramePtr = X86::EBP;
80     BasePtr = X86::ESI;
81   }
82 }
83 
84 const TargetRegisterClass *
85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86                                        unsigned Idx) const {
87   // The sub_8bit sub-register index is more constrained in 32-bit mode.
88   // It behaves just like the sub_8bit_hi index.
89   if (!Is64Bit && Idx == X86::sub_8bit)
90     Idx = X86::sub_8bit_hi;
91 
92   // Forward to TableGen's default version.
93   return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
94 }
95 
96 const TargetRegisterClass *
97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98                                           const TargetRegisterClass *B,
99                                           unsigned SubIdx) const {
100   // The sub_8bit sub-register index is more constrained in 32-bit mode.
101   if (!Is64Bit && SubIdx == X86::sub_8bit) {
102     A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103     if (!A)
104       return nullptr;
105   }
106   return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
107 }
108 
109 const TargetRegisterClass *
110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111                                            const MachineFunction &MF) const {
112   // Don't allow super-classes of GR8_NOREX.  This class is only used after
113   // extracting sub_8bit_hi sub-registers.  The H sub-registers cannot be copied
114   // to the full GR8 register class in 64-bit mode, so we cannot allow the
115   // reigster class inflation.
116   //
117   // The GR8_NOREX class is always used in a way that won't be constrained to a
118   // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119   // full GR8 class.
120   if (RC == &X86::GR8_NOREXRegClass)
121     return RC;
122 
123   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
124 
125   const TargetRegisterClass *Super = RC;
126   auto I = RC->superclasses().begin();
127   auto E = RC->superclasses().end();
128   do {
129     switch (Super->getID()) {
130     case X86::FR32RegClassID:
131     case X86::FR64RegClassID:
132       // If AVX-512 isn't supported we should only inflate to these classes.
133       if (!Subtarget.hasAVX512() &&
134           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
135         return Super;
136       break;
137     case X86::VR128RegClassID:
138     case X86::VR256RegClassID:
139       // If VLX isn't supported we should only inflate to these classes.
140       if (!Subtarget.hasVLX() &&
141           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
142         return Super;
143       break;
144     case X86::VR128XRegClassID:
145     case X86::VR256XRegClassID:
146       // If VLX isn't support we shouldn't inflate to these classes.
147       if (Subtarget.hasVLX() &&
148           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
149         return Super;
150       break;
151     case X86::FR32XRegClassID:
152     case X86::FR64XRegClassID:
153       // If AVX-512 isn't support we shouldn't inflate to these classes.
154       if (Subtarget.hasAVX512() &&
155           getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
156         return Super;
157       break;
158     case X86::GR8RegClassID:
159     case X86::GR16RegClassID:
160     case X86::GR32RegClassID:
161     case X86::GR64RegClassID:
162     case X86::GR8_NOREX2RegClassID:
163     case X86::GR16_NOREX2RegClassID:
164     case X86::GR32_NOREX2RegClassID:
165     case X86::GR64_NOREX2RegClassID:
166     case X86::RFP32RegClassID:
167     case X86::RFP64RegClassID:
168     case X86::RFP80RegClassID:
169     case X86::VR512_0_15RegClassID:
170     case X86::VR512RegClassID:
171       // Don't return a super-class that would shrink the spill size.
172       // That can happen with the vector and float classes.
173       if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
174         return Super;
175     }
176     if (I != E) {
177       Super = getRegClass(*I);
178       ++I;
179     } else {
180       Super = nullptr;
181     }
182   } while (Super);
183   return RC;
184 }
185 
186 const TargetRegisterClass *
187 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
188                                     unsigned Kind) const {
189   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
190   switch (Kind) {
191   default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
192   case 0: // Normal GPRs.
193     if (Subtarget.isTarget64BitLP64())
194       return &X86::GR64RegClass;
195     // If the target is 64bit but we have been told to use 32bit addresses,
196     // we can still use 64-bit register as long as we know the high bits
197     // are zeros.
198     // Reflect that in the returned register class.
199     if (Is64Bit) {
200       // When the target also allows 64-bit frame pointer and we do have a
201       // frame, this is fine to use it for the address accesses as well.
202       const X86FrameLowering *TFI = getFrameLowering(MF);
203       return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
204                  ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
205                  : &X86::LOW32_ADDR_ACCESSRegClass;
206     }
207     return &X86::GR32RegClass;
208   case 1: // Normal GPRs except the stack pointer (for encoding reasons).
209     if (Subtarget.isTarget64BitLP64())
210       return &X86::GR64_NOSPRegClass;
211     // NOSP does not contain RIP, so no special case here.
212     return &X86::GR32_NOSPRegClass;
213   case 2: // NOREX GPRs.
214     if (Subtarget.isTarget64BitLP64())
215       return &X86::GR64_NOREXRegClass;
216     return &X86::GR32_NOREXRegClass;
217   case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
218     if (Subtarget.isTarget64BitLP64())
219       return &X86::GR64_NOREX_NOSPRegClass;
220     // NOSP does not contain RIP, so no special case here.
221     return &X86::GR32_NOREX_NOSPRegClass;
222   case 4: // Available for tailcall (not callee-saved GPRs).
223     return getGPRsForTailCall(MF);
224   }
225 }
226 
227 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
228                                            unsigned DefSubReg,
229                                            const TargetRegisterClass *SrcRC,
230                                            unsigned SrcSubReg) const {
231   // Prevent rewriting a copy where the destination size is larger than the
232   // input size. See PR41619.
233   // FIXME: Should this be factored into the base implementation somehow.
234   if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
235       SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
236     return false;
237 
238   return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
239                                                   SrcRC, SrcSubReg);
240 }
241 
242 const TargetRegisterClass *
243 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
244   const Function &F = MF.getFunction();
245   if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
246     return &X86::GR64_TCW64RegClass;
247   else if (Is64Bit)
248     return &X86::GR64_TCRegClass;
249 
250   bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
251   if (hasHipeCC)
252     return &X86::GR32RegClass;
253   return &X86::GR32_TCRegClass;
254 }
255 
256 const TargetRegisterClass *
257 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
258   if (RC == &X86::CCRRegClass) {
259     if (Is64Bit)
260       return &X86::GR64RegClass;
261     else
262       return &X86::GR32RegClass;
263   }
264   return RC;
265 }
266 
267 unsigned
268 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
269                                      MachineFunction &MF) const {
270   const X86FrameLowering *TFI = getFrameLowering(MF);
271 
272   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
273   switch (RC->getID()) {
274   default:
275     return 0;
276   case X86::GR32RegClassID:
277     return 4 - FPDiff;
278   case X86::GR64RegClassID:
279     return 12 - FPDiff;
280   case X86::VR128RegClassID:
281     return Is64Bit ? 10 : 4;
282   case X86::VR64RegClassID:
283     return 4;
284   }
285 }
286 
287 const MCPhysReg *
288 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
289   assert(MF && "MachineFunction required");
290 
291   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
292   const Function &F = MF->getFunction();
293   bool HasSSE = Subtarget.hasSSE1();
294   bool HasAVX = Subtarget.hasAVX();
295   bool HasAVX512 = Subtarget.hasAVX512();
296   bool CallsEHReturn = MF->callsEHReturn();
297 
298   CallingConv::ID CC = F.getCallingConv();
299 
300   // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
301   // convention because it has the CSR list.
302   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
303     CC = CallingConv::X86_INTR;
304 
305   // If atribute specified, override the CSRs normally specified by the
306   // calling convention and use the empty set instead.
307   if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
308     return CSR_NoRegs_SaveList;
309 
310   switch (CC) {
311   case CallingConv::GHC:
312   case CallingConv::HiPE:
313     return CSR_NoRegs_SaveList;
314   case CallingConv::AnyReg:
315     if (HasAVX)
316       return CSR_64_AllRegs_AVX_SaveList;
317     return CSR_64_AllRegs_SaveList;
318   case CallingConv::PreserveMost:
319     return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList
320                    : CSR_64_RT_MostRegs_SaveList;
321   case CallingConv::PreserveAll:
322     if (HasAVX)
323       return CSR_64_RT_AllRegs_AVX_SaveList;
324     return CSR_64_RT_AllRegs_SaveList;
325   case CallingConv::PreserveNone:
326     return CSR_64_NoneRegs_SaveList;
327   case CallingConv::CXX_FAST_TLS:
328     if (Is64Bit)
329       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
330              CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
331     break;
332   case CallingConv::Intel_OCL_BI: {
333     if (HasAVX512 && IsWin64)
334       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
335     if (HasAVX512 && Is64Bit)
336       return CSR_64_Intel_OCL_BI_AVX512_SaveList;
337     if (HasAVX && IsWin64)
338       return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
339     if (HasAVX && Is64Bit)
340       return CSR_64_Intel_OCL_BI_AVX_SaveList;
341     if (!HasAVX && !IsWin64 && Is64Bit)
342       return CSR_64_Intel_OCL_BI_SaveList;
343     break;
344   }
345   case CallingConv::X86_RegCall:
346     if (Is64Bit) {
347       if (IsWin64) {
348         return (HasSSE ? CSR_Win64_RegCall_SaveList :
349                          CSR_Win64_RegCall_NoSSE_SaveList);
350       } else {
351         return (HasSSE ? CSR_SysV64_RegCall_SaveList :
352                          CSR_SysV64_RegCall_NoSSE_SaveList);
353       }
354     } else {
355       return (HasSSE ? CSR_32_RegCall_SaveList :
356                        CSR_32_RegCall_NoSSE_SaveList);
357     }
358   case CallingConv::CFGuard_Check:
359     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
360     return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
361                    : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
362   case CallingConv::Cold:
363     if (Is64Bit)
364       return CSR_64_MostRegs_SaveList;
365     break;
366   case CallingConv::Win64:
367     if (!HasSSE)
368       return CSR_Win64_NoSSE_SaveList;
369     return CSR_Win64_SaveList;
370   case CallingConv::SwiftTail:
371     if (!Is64Bit)
372       return CSR_32_SaveList;
373     return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
374   case CallingConv::X86_64_SysV:
375     if (CallsEHReturn)
376       return CSR_64EHRet_SaveList;
377     return CSR_64_SaveList;
378   case CallingConv::X86_INTR:
379     if (Is64Bit) {
380       if (HasAVX512)
381         return CSR_64_AllRegs_AVX512_SaveList;
382       if (HasAVX)
383         return CSR_64_AllRegs_AVX_SaveList;
384       if (HasSSE)
385         return CSR_64_AllRegs_SaveList;
386       return CSR_64_AllRegs_NoSSE_SaveList;
387     } else {
388       if (HasAVX512)
389         return CSR_32_AllRegs_AVX512_SaveList;
390       if (HasAVX)
391         return CSR_32_AllRegs_AVX_SaveList;
392       if (HasSSE)
393         return CSR_32_AllRegs_SSE_SaveList;
394       return CSR_32_AllRegs_SaveList;
395     }
396   default:
397     break;
398   }
399 
400   if (Is64Bit) {
401     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
402                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
403     if (IsSwiftCC)
404       return IsWin64 ? CSR_Win64_SwiftError_SaveList
405                      : CSR_64_SwiftError_SaveList;
406 
407     if (IsWin64)
408       return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
409     if (CallsEHReturn)
410       return CSR_64EHRet_SaveList;
411     return CSR_64_SaveList;
412   }
413 
414   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
415 }
416 
417 const MCPhysReg *
418 X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
419   return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
420 }
421 
422 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
423     const MachineFunction *MF) const {
424   assert(MF && "Invalid MachineFunction pointer.");
425   if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
426       MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
427     return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
428   return nullptr;
429 }
430 
431 const uint32_t *
432 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
433                                       CallingConv::ID CC) const {
434   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
435   bool HasSSE = Subtarget.hasSSE1();
436   bool HasAVX = Subtarget.hasAVX();
437   bool HasAVX512 = Subtarget.hasAVX512();
438 
439   switch (CC) {
440   case CallingConv::GHC:
441   case CallingConv::HiPE:
442     return CSR_NoRegs_RegMask;
443   case CallingConv::AnyReg:
444     if (HasAVX)
445       return CSR_64_AllRegs_AVX_RegMask;
446     return CSR_64_AllRegs_RegMask;
447   case CallingConv::PreserveMost:
448     return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask;
449   case CallingConv::PreserveAll:
450     if (HasAVX)
451       return CSR_64_RT_AllRegs_AVX_RegMask;
452     return CSR_64_RT_AllRegs_RegMask;
453   case CallingConv::PreserveNone:
454     return CSR_64_NoneRegs_RegMask;
455   case CallingConv::CXX_FAST_TLS:
456     if (Is64Bit)
457       return CSR_64_TLS_Darwin_RegMask;
458     break;
459   case CallingConv::Intel_OCL_BI: {
460     if (HasAVX512 && IsWin64)
461       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
462     if (HasAVX512 && Is64Bit)
463       return CSR_64_Intel_OCL_BI_AVX512_RegMask;
464     if (HasAVX && IsWin64)
465       return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
466     if (HasAVX && Is64Bit)
467       return CSR_64_Intel_OCL_BI_AVX_RegMask;
468     if (!HasAVX && !IsWin64 && Is64Bit)
469       return CSR_64_Intel_OCL_BI_RegMask;
470     break;
471   }
472   case CallingConv::X86_RegCall:
473     if (Is64Bit) {
474       if (IsWin64) {
475         return (HasSSE ? CSR_Win64_RegCall_RegMask :
476                          CSR_Win64_RegCall_NoSSE_RegMask);
477       } else {
478         return (HasSSE ? CSR_SysV64_RegCall_RegMask :
479                          CSR_SysV64_RegCall_NoSSE_RegMask);
480       }
481     } else {
482       return (HasSSE ? CSR_32_RegCall_RegMask :
483                        CSR_32_RegCall_NoSSE_RegMask);
484     }
485   case CallingConv::CFGuard_Check:
486     assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
487     return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
488                    : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
489   case CallingConv::Cold:
490     if (Is64Bit)
491       return CSR_64_MostRegs_RegMask;
492     break;
493   case CallingConv::Win64:
494     return CSR_Win64_RegMask;
495   case CallingConv::SwiftTail:
496     if (!Is64Bit)
497       return CSR_32_RegMask;
498     return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
499   case CallingConv::X86_64_SysV:
500     return CSR_64_RegMask;
501   case CallingConv::X86_INTR:
502     if (Is64Bit) {
503       if (HasAVX512)
504         return CSR_64_AllRegs_AVX512_RegMask;
505       if (HasAVX)
506         return CSR_64_AllRegs_AVX_RegMask;
507       if (HasSSE)
508         return CSR_64_AllRegs_RegMask;
509       return CSR_64_AllRegs_NoSSE_RegMask;
510     } else {
511       if (HasAVX512)
512         return CSR_32_AllRegs_AVX512_RegMask;
513       if (HasAVX)
514         return CSR_32_AllRegs_AVX_RegMask;
515       if (HasSSE)
516         return CSR_32_AllRegs_SSE_RegMask;
517       return CSR_32_AllRegs_RegMask;
518     }
519   default:
520     break;
521   }
522 
523   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
524   // callsEHReturn().
525   if (Is64Bit) {
526     const Function &F = MF.getFunction();
527     bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
528                      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
529     if (IsSwiftCC)
530       return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
531 
532     return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
533   }
534 
535   return CSR_32_RegMask;
536 }
537 
538 const uint32_t*
539 X86RegisterInfo::getNoPreservedMask() const {
540   return CSR_NoRegs_RegMask;
541 }
542 
543 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
544   return CSR_64_TLS_Darwin_RegMask;
545 }
546 
547 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
548   BitVector Reserved(getNumRegs());
549   const X86FrameLowering *TFI = getFrameLowering(MF);
550 
551   // Set the floating point control register as reserved.
552   Reserved.set(X86::FPCW);
553 
554   // Set the floating point status register as reserved.
555   Reserved.set(X86::FPSW);
556 
557   // Set the SIMD floating point control register as reserved.
558   Reserved.set(X86::MXCSR);
559 
560   // Set the stack-pointer register and its aliases as reserved.
561   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
562     Reserved.set(SubReg);
563 
564   // Set the Shadow Stack Pointer as reserved.
565   Reserved.set(X86::SSP);
566 
567   // Set the instruction pointer register and its aliases as reserved.
568   for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
569     Reserved.set(SubReg);
570 
571   // Set the frame-pointer register and its aliases as reserved if needed.
572   if (TFI->hasFP(MF)) {
573     if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke())
574       MF.getContext().reportError(
575           SMLoc(),
576           "Frame pointer clobbered by function invoke is not supported.");
577 
578     for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
579       Reserved.set(SubReg);
580   }
581 
582   // Set the base-pointer register and its aliases as reserved if needed.
583   if (hasBasePointer(MF)) {
584     if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke())
585       MF.getContext().reportError(SMLoc(),
586                                   "Stack realignment in presence of dynamic "
587                                   "allocas is not supported with "
588                                   "this calling convention.");
589 
590     Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
591     for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
592       Reserved.set(SubReg);
593   }
594 
595   // Mark the segment registers as reserved.
596   Reserved.set(X86::CS);
597   Reserved.set(X86::SS);
598   Reserved.set(X86::DS);
599   Reserved.set(X86::ES);
600   Reserved.set(X86::FS);
601   Reserved.set(X86::GS);
602 
603   // Mark the floating point stack registers as reserved.
604   for (unsigned n = 0; n != 8; ++n)
605     Reserved.set(X86::ST0 + n);
606 
607   // Reserve the registers that only exist in 64-bit mode.
608   if (!Is64Bit) {
609     // These 8-bit registers are part of the x86-64 extension even though their
610     // super-registers are old 32-bits.
611     Reserved.set(X86::SIL);
612     Reserved.set(X86::DIL);
613     Reserved.set(X86::BPL);
614     Reserved.set(X86::SPL);
615     Reserved.set(X86::SIH);
616     Reserved.set(X86::DIH);
617     Reserved.set(X86::BPH);
618     Reserved.set(X86::SPH);
619 
620     for (unsigned n = 0; n != 8; ++n) {
621       // R8, R9, ...
622       for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
623         Reserved.set(*AI);
624 
625       // XMM8, XMM9, ...
626       for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
627         Reserved.set(*AI);
628     }
629   }
630   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
631     for (unsigned n = 0; n != 16; ++n) {
632       for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid();
633            ++AI)
634         Reserved.set(*AI);
635     }
636   }
637 
638   // Reserve the extended general purpose registers.
639   if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR())
640     Reserved.set(X86::R16, X86::R31WH + 1);
641 
642   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
643     for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI)
644       Reserved.set(*AI);
645     for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI)
646       Reserved.set(*AI);
647   }
648 
649   // Reserve low half pair registers in case they are used by RA aggressively.
650   Reserved.set(X86::TMM0_TMM1);
651   Reserved.set(X86::TMM2_TMM3);
652 
653   assert(checkAllSuperRegsMarked(Reserved,
654                                  {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
655                                   X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
656   return Reserved;
657 }
658 
659 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
660   // All existing Intel CPUs that support AMX support AVX512 and all existing
661   // Intel CPUs that support APX support AMX. AVX512 implies AVX.
662   //
663   // We enumerate the registers in X86GenRegisterInfo.inc in this order:
664   //
665   // Registers before AVX512,
666   // AVX512 registers (X/YMM16-31, ZMM0-31, K registers)
667   // AMX registers (TMM)
668   // APX registers (R16-R31)
669   //
670   // and try to return the minimum number of registers supported by the target.
671   static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
672                     (X86::K6_K7 + 1 == X86::TMMCFG) &&
673                     (X86::TMM6_TMM7 + 1 == X86::R16) &&
674                     (X86::R31WH + 1 == X86::NUM_TARGET_REGS),
675                 "Register number may be incorrect");
676 
677   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
678   if (ST.hasEGPR())
679     return X86::NUM_TARGET_REGS;
680   if (ST.hasAMXTILE())
681     return X86::TMM7 + 1;
682   if (ST.hasAVX512())
683     return X86::K6_K7 + 1;
684   if (ST.hasAVX())
685     return X86::YMM15 + 1;
686   return X86::R15WH + 1;
687 }
688 
689 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
690                                          MCRegister Reg) const {
691   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
692   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
693   auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
694     return TRI.isSuperOrSubRegisterEq(RegA, RegB);
695   };
696 
697   if (!ST.is64Bit())
698     return llvm::any_of(
699                SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
700                [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
701            (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
702 
703   CallingConv::ID CC = MF.getFunction().getCallingConv();
704 
705   if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
706     return true;
707 
708   if (llvm::any_of(
709           SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
710           [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
711     return true;
712 
713   if (CC != CallingConv::Win64 &&
714       llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
715                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
716     return true;
717 
718   if (ST.hasSSE1() &&
719       llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
720                                            X86::XMM3, X86::XMM4, X86::XMM5,
721                                            X86::XMM6, X86::XMM7},
722                    [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
723     return true;
724 
725   return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
726 }
727 
728 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
729                                       MCRegister PhysReg) const {
730   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
731   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
732 
733   // Stack pointer.
734   if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
735     return true;
736 
737   // Don't use the frame pointer if it's being used.
738   const X86FrameLowering &TFI = *getFrameLowering(MF);
739   if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
740     return true;
741 
742   return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
743 }
744 
745 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
746   return RC->getID() == X86::TILERegClassID ||
747          RC->getID() == X86::TILEPAIRRegClassID;
748 }
749 
750 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
751   // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
752   // because the calling convention defines the EFLAGS register as NOT
753   // preserved.
754   //
755   // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
756   // an assert to track this and clear the register afterwards to avoid
757   // unnecessary crashes during release builds.
758   assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
759          "EFLAGS are not live-out from a patchpoint.");
760 
761   // Also clean other registers that don't need preserving (IP).
762   for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
763     Mask[Reg / 32] &= ~(1U << (Reg % 32));
764 }
765 
766 //===----------------------------------------------------------------------===//
767 // Stack Frame Processing methods
768 //===----------------------------------------------------------------------===//
769 
770 static bool CantUseSP(const MachineFrameInfo &MFI) {
771   return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
772 }
773 
774 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
775   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
776   // We have a virtual register to reference argument, and don't need base
777   // pointer.
778   if (X86FI->getStackPtrSaveMI() != nullptr)
779     return false;
780 
781   if (X86FI->hasPreallocatedCall())
782     return true;
783 
784   const MachineFrameInfo &MFI = MF.getFrameInfo();
785 
786   if (!EnableBasePointer)
787     return false;
788 
789   // When we need stack realignment, we can't address the stack from the frame
790   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
791   // can't address variables from the stack pointer.  MS inline asm can
792   // reference locals while also adjusting the stack pointer.  When we can't
793   // use both the SP and the FP, we need a separate base pointer register.
794   bool CantUseFP = hasStackRealignment(MF);
795   return CantUseFP && CantUseSP(MFI);
796 }
797 
798 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
799   if (!TargetRegisterInfo::canRealignStack(MF))
800     return false;
801 
802   const MachineFrameInfo &MFI = MF.getFrameInfo();
803   const MachineRegisterInfo *MRI = &MF.getRegInfo();
804 
805   // Stack realignment requires a frame pointer.  If we already started
806   // register allocation with frame pointer elimination, it is too late now.
807   if (!MRI->canReserveReg(FramePtr))
808     return false;
809 
810   // If a base pointer is necessary.  Check that it isn't too late to reserve
811   // it.
812   if (CantUseSP(MFI))
813     return MRI->canReserveReg(BasePtr);
814   return true;
815 }
816 
817 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
818   if (TargetRegisterInfo::shouldRealignStack(MF))
819     return true;
820 
821   return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
822 }
823 
824 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
825 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
826 // TODO: In this case we should be really trying first to entirely eliminate
827 // this instruction which is a plain copy.
828 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
829   MachineInstr &MI = *II;
830   unsigned Opc = II->getOpcode();
831   // Check if this is a LEA of the form 'lea (%esp), %ebx'
832   if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
833       MI.getOperand(2).getImm() != 1 ||
834       MI.getOperand(3).getReg() != X86::NoRegister ||
835       MI.getOperand(4).getImm() != 0 ||
836       MI.getOperand(5).getReg() != X86::NoRegister)
837     return false;
838   Register BasePtr = MI.getOperand(1).getReg();
839   // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
840   // be replaced with a 32-bit operand MOV which will zero extend the upper
841   // 32-bits of the super register.
842   if (Opc == X86::LEA64_32r)
843     BasePtr = getX86SubSuperRegister(BasePtr, 32);
844   Register NewDestReg = MI.getOperand(0).getReg();
845   const X86InstrInfo *TII =
846       MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
847   TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
848                    MI.getOperand(1).isKill());
849   MI.eraseFromParent();
850   return true;
851 }
852 
853 static bool isFuncletReturnInstr(MachineInstr &MI) {
854   switch (MI.getOpcode()) {
855   case X86::CATCHRET:
856   case X86::CLEANUPRET:
857     return true;
858   default:
859     return false;
860   }
861   llvm_unreachable("impossible");
862 }
863 
864 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
865                                           unsigned FIOperandNum,
866                                           Register BaseReg,
867                                           int FIOffset) const {
868   MachineInstr &MI = *II;
869   unsigned Opc = MI.getOpcode();
870   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
871     MachineOperand &FI = MI.getOperand(FIOperandNum);
872     FI.ChangeToImmediate(FIOffset);
873     return;
874   }
875 
876   MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
877 
878   // The frame index format for stackmaps and patchpoints is different from the
879   // X86 format. It only has a FI and an offset.
880   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
881     assert(BasePtr == FramePtr && "Expected the FP as base register");
882     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
883     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
884     return;
885   }
886 
887   if (MI.getOperand(FIOperandNum + 3).isImm()) {
888     // Offset is a 32-bit integer.
889     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
890     int Offset = FIOffset + Imm;
891     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
892            "Requesting 64-bit offset in 32-bit immediate!");
893     if (Offset != 0)
894       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
895   } else {
896     // Offset is symbolic. This is extremely rare.
897     uint64_t Offset =
898         FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
899     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
900   }
901 }
902 
903 bool
904 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
905                                      int SPAdj, unsigned FIOperandNum,
906                                      RegScavenger *RS) const {
907   MachineInstr &MI = *II;
908   MachineBasicBlock &MBB = *MI.getParent();
909   MachineFunction &MF = *MBB.getParent();
910   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
911   bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
912                                                : isFuncletReturnInstr(*MBBI);
913   const X86FrameLowering *TFI = getFrameLowering(MF);
914   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
915 
916   // Determine base register and offset.
917   int FIOffset;
918   Register BasePtr;
919   if (MI.isReturn()) {
920     assert((!hasStackRealignment(MF) ||
921             MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
922            "Return instruction can only reference SP relative frame objects");
923     FIOffset =
924         TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
925   } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
926     FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
927   } else {
928     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
929   }
930 
931   // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
932   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
933   // offset is from the traditional base pointer location.  On 64-bit, the
934   // offset is from the SP at the end of the prologue, not the FP location. This
935   // matches the behavior of llvm.frameaddress.
936   unsigned Opc = MI.getOpcode();
937   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
938     MachineOperand &FI = MI.getOperand(FIOperandNum);
939     FI.ChangeToImmediate(FIOffset);
940     return false;
941   }
942 
943   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
944   // register as source operand, semantic is the same and destination is
945   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
946   // Don't change BasePtr since it is used later for stack adjustment.
947   Register MachineBasePtr = BasePtr;
948   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
949     MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
950 
951   // This must be part of a four operand memory reference.  Replace the
952   // FrameIndex with base register.  Add an offset to the offset.
953   MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
954 
955   if (BasePtr == StackPtr)
956     FIOffset += SPAdj;
957 
958   // The frame index format for stackmaps and patchpoints is different from the
959   // X86 format. It only has a FI and an offset.
960   if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
961     assert(BasePtr == FramePtr && "Expected the FP as base register");
962     int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
963     MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
964     return false;
965   }
966 
967   if (MI.getOperand(FIOperandNum+3).isImm()) {
968     // Offset is a 32-bit integer.
969     int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
970     int Offset = FIOffset + Imm;
971     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
972            "Requesting 64-bit offset in 32-bit immediate!");
973     if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
974       MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
975   } else {
976     // Offset is symbolic. This is extremely rare.
977     uint64_t Offset = FIOffset +
978       (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
979     MI.getOperand(FIOperandNum + 3).setOffset(Offset);
980   }
981   return false;
982 }
983 
984 unsigned X86RegisterInfo::findDeadCallerSavedReg(
985     MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
986   const MachineFunction *MF = MBB.getParent();
987   if (MF->callsEHReturn())
988     return 0;
989 
990   const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
991 
992   if (MBBI == MBB.end())
993     return 0;
994 
995   switch (MBBI->getOpcode()) {
996   default:
997     return 0;
998   case TargetOpcode::PATCHABLE_RET:
999   case X86::RET:
1000   case X86::RET32:
1001   case X86::RET64:
1002   case X86::RETI32:
1003   case X86::RETI64:
1004   case X86::TCRETURNdi:
1005   case X86::TCRETURNri:
1006   case X86::TCRETURNmi:
1007   case X86::TCRETURNdi64:
1008   case X86::TCRETURNri64:
1009   case X86::TCRETURNmi64:
1010   case X86::EH_RETURN:
1011   case X86::EH_RETURN64: {
1012     SmallSet<uint16_t, 8> Uses;
1013     for (MachineOperand &MO : MBBI->operands()) {
1014       if (!MO.isReg() || MO.isDef())
1015         continue;
1016       Register Reg = MO.getReg();
1017       if (!Reg)
1018         continue;
1019       for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
1020         Uses.insert(*AI);
1021     }
1022 
1023     for (auto CS : AvailableRegs)
1024       if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
1025         return CS;
1026   }
1027   }
1028 
1029   return 0;
1030 }
1031 
1032 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
1033   const X86FrameLowering *TFI = getFrameLowering(MF);
1034   return TFI->hasFP(MF) ? FramePtr : StackPtr;
1035 }
1036 
1037 unsigned
1038 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
1039   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1040   Register FrameReg = getFrameRegister(MF);
1041   if (Subtarget.isTarget64BitILP32())
1042     FrameReg = getX86SubSuperRegister(FrameReg, 32);
1043   return FrameReg;
1044 }
1045 
1046 unsigned
1047 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
1048   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
1049   Register StackReg = getStackRegister();
1050   if (Subtarget.isTarget64BitILP32())
1051     StackReg = getX86SubSuperRegister(StackReg, 32);
1052   return StackReg;
1053 }
1054 
1055 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
1056                            const MachineRegisterInfo *MRI) {
1057   if (VRM->hasShape(VirtReg))
1058     return VRM->getShape(VirtReg);
1059 
1060   const MachineOperand &Def = *MRI->def_begin(VirtReg);
1061   MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
1062   unsigned OpCode = MI->getOpcode();
1063   switch (OpCode) {
1064   default:
1065     llvm_unreachable("Unexpected machine instruction on tile register!");
1066     break;
1067   case X86::COPY: {
1068     Register SrcReg = MI->getOperand(1).getReg();
1069     ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
1070     VRM->assignVirt2Shape(VirtReg, Shape);
1071     return Shape;
1072   }
1073   // We only collect the tile shape that is defined.
1074   case X86::PTILELOADDV:
1075   case X86::PTILELOADDT1V:
1076   case X86::PTDPBSSDV:
1077   case X86::PTDPBSUDV:
1078   case X86::PTDPBUSDV:
1079   case X86::PTDPBUUDV:
1080   case X86::PTILEZEROV:
1081   case X86::PTDPBF16PSV:
1082   case X86::PTDPFP16PSV:
1083   case X86::PTCMMIMFP16PSV:
1084   case X86::PTCMMRLFP16PSV:
1085   case X86::PTTRANSPOSEDV:
1086   case X86::PTTDPBF16PSV:
1087   case X86::PTTDPFP16PSV:
1088   case X86::PTTCMMIMFP16PSV:
1089   case X86::PTTCMMRLFP16PSV:
1090   case X86::PTCONJTCMMIMFP16PSV:
1091   case X86::PTCONJTFP16V:
1092   case X86::PTILELOADDRSV:
1093   case X86::PTILELOADDRST1V:
1094   case X86::PTMMULTF32PSV:
1095   case X86::PTTMMULTF32PSV:
1096   case X86::PTDPBF8PSV:
1097   case X86::PTDPBHF8PSV:
1098   case X86::PTDPHBF8PSV:
1099   case X86::PTDPHF8PSV: {
1100     MachineOperand &MO1 = MI->getOperand(1);
1101     MachineOperand &MO2 = MI->getOperand(2);
1102     ShapeT Shape(&MO1, &MO2, MRI);
1103     VRM->assignVirt2Shape(VirtReg, Shape);
1104     return Shape;
1105   }
1106   case X86::PT2RPNTLVWZ0V:
1107   case X86::PT2RPNTLVWZ0T1V:
1108   case X86::PT2RPNTLVWZ1V:
1109   case X86::PT2RPNTLVWZ1T1V:
1110   case X86::PT2RPNTLVWZ0RSV:
1111   case X86::PT2RPNTLVWZ0RST1V:
1112   case X86::PT2RPNTLVWZ1RSV:
1113   case X86::PT2RPNTLVWZ1RST1V: {
1114     MachineOperand &MO1 = MI->getOperand(1);
1115     MachineOperand &MO2 = MI->getOperand(2);
1116     MachineOperand &MO3 = MI->getOperand(3);
1117     ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI);
1118     VRM->assignVirt2Shape(VirtReg, Shape);
1119     return Shape;
1120   }
1121   }
1122 }
1123 
1124 static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) {
1125   unsigned PhysShapeNum = PhysShape.getShapeNum();
1126   unsigned VirtShapeNum = VirtShape.getShapeNum();
1127 
1128   if (PhysShapeNum < VirtShapeNum)
1129     return false;
1130 
1131   if (PhysShapeNum == VirtShapeNum) {
1132     if (PhysShapeNum == 1)
1133       return PhysShape == VirtShape;
1134 
1135     for (unsigned I = 0; I < PhysShapeNum; I++) {
1136       ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1137       ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I));
1138       if (VShape != PShape)
1139         return false;
1140     }
1141     return true;
1142   }
1143 
1144   // Hint subreg of mult-tile reg to single tile reg.
1145   if (VirtShapeNum == 1) {
1146     for (unsigned I = 0; I < PhysShapeNum; I++) {
1147       ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I));
1148       if (VirtShape == PShape)
1149         return true;
1150     }
1151   }
1152 
1153   // Note: Currently we have no requirement for case of
1154   // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum)
1155   return false;
1156 }
1157 
1158 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
1159                                             ArrayRef<MCPhysReg> Order,
1160                                             SmallVectorImpl<MCPhysReg> &Hints,
1161                                             const MachineFunction &MF,
1162                                             const VirtRegMap *VRM,
1163                                             const LiveRegMatrix *Matrix) const {
1164   const MachineRegisterInfo *MRI = &MF.getRegInfo();
1165   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
1166   bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
1167       VirtReg, Order, Hints, MF, VRM, Matrix);
1168   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
1169   const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
1170 
1171   unsigned ID = RC.getID();
1172 
1173   if (!VRM)
1174     return BaseImplRetVal;
1175 
1176   if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) {
1177     if (DisableRegAllocNDDHints || !ST.hasNDD() ||
1178         !TRI.isGeneralPurposeRegisterClass(&RC))
1179       return BaseImplRetVal;
1180 
1181     // Add any two address hints after any copy hints.
1182     SmallSet<unsigned, 4> TwoAddrHints;
1183 
1184     auto TryAddNDDHint = [&](const MachineOperand &MO) {
1185       Register Reg = MO.getReg();
1186       Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg));
1187       if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
1188         TwoAddrHints.insert(PhysReg);
1189     };
1190 
1191     // NDD instructions is compressible when Op0 is allocated to the same
1192     // physic register as Op1 (or Op2 if it's commutable).
1193     for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
1194       const MachineInstr &MI = *MO.getParent();
1195       if (!X86::getNonNDVariant(MI.getOpcode()))
1196         continue;
1197       unsigned OpIdx = MI.getOperandNo(&MO);
1198       if (OpIdx == 0) {
1199         assert(MI.getOperand(1).isReg());
1200         TryAddNDDHint(MI.getOperand(1));
1201         if (MI.isCommutable()) {
1202           assert(MI.getOperand(2).isReg());
1203           TryAddNDDHint(MI.getOperand(2));
1204         }
1205       } else if (OpIdx == 1) {
1206         TryAddNDDHint(MI.getOperand(0));
1207       } else if (MI.isCommutable() && OpIdx == 2) {
1208         TryAddNDDHint(MI.getOperand(0));
1209       }
1210     }
1211 
1212     for (MCPhysReg OrderReg : Order)
1213       if (TwoAddrHints.count(OrderReg))
1214         Hints.push_back(OrderReg);
1215 
1216     return BaseImplRetVal;
1217   }
1218 
1219   ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
1220   auto AddHint = [&](MCPhysReg PhysReg) {
1221     Register VReg = Matrix->getOneVReg(PhysReg);
1222     if (VReg == MCRegister::NoRegister) { // Not allocated yet
1223       Hints.push_back(PhysReg);
1224       return;
1225     }
1226     ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
1227     if (canHintShape(PhysShape, VirtShape))
1228       Hints.push_back(PhysReg);
1229   };
1230 
1231   SmallSet<MCPhysReg, 4> CopyHints;
1232   CopyHints.insert(Hints.begin(), Hints.end());
1233   Hints.clear();
1234   for (auto Hint : CopyHints) {
1235     if (RC.contains(Hint) && !MRI->isReserved(Hint))
1236       AddHint(Hint);
1237   }
1238   for (MCPhysReg PhysReg : Order) {
1239     if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1240         !MRI->isReserved(PhysReg))
1241       AddHint(PhysReg);
1242   }
1243 
1244 #define DEBUG_TYPE "tile-hint"
1245   LLVM_DEBUG({
1246     dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1247     for (auto Hint : Hints) {
1248       dbgs() << "tmm" << Hint << ",";
1249     }
1250     dbgs() << "\n";
1251   });
1252 #undef DEBUG_TYPE
1253 
1254   return true;
1255 }
1256