//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains the X86 implementation of the TargetRegisterInfo class. // This file is responsible for the frame pointer elimination optimization // on X86. // //===----------------------------------------------------------------------===// #include "X86RegisterInfo.h" #include "X86FrameLowering.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TileShapeInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" static cl::opt EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); static cl::opt DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden, cl::init(false), cl::desc("Disable two address hints for register " "allocation")); X86RegisterInfo::X86RegisterInfo(const Triple &TT) : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), X86_MC::getDwarfRegFlavour(TT, false), X86_MC::getDwarfRegFlavour(TT, true), (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { X86_MC::initLLVMToSEHAndCVRegMapping(this); // Cache some information. Is64Bit = TT.isArch64Bit(); IsWin64 = Is64Bit && TT.isOSWindows(); // Use a callee-saved register as the base pointer. These registers must // not conflict with any ABI requirements. For example, in 32-bit mode PIC // requires GOT in the EBX register before function calls via PLT GOT pointer. if (Is64Bit) { SlotSize = 8; // This matches the simplified 32-bit pointer code in the data layout // computation. // FIXME: Should use the data layout? bool Use64BitReg = !TT.isX32(); StackPtr = Use64BitReg ? X86::RSP : X86::ESP; FramePtr = Use64BitReg ? X86::RBP : X86::EBP; BasePtr = Use64BitReg ? X86::RBX : X86::EBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; BasePtr = X86::ESI; } } const TargetRegisterClass * X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const { // The sub_8bit sub-register index is more constrained in 32-bit mode. // It behaves just like the sub_8bit_hi index. if (!Is64Bit && Idx == X86::sub_8bit) Idx = X86::sub_8bit_hi; // Forward to TableGen's default version. return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); } const TargetRegisterClass * X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned SubIdx) const { // The sub_8bit sub-register index is more constrained in 32-bit mode. if (!Is64Bit && SubIdx == X86::sub_8bit) { A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); if (!A) return nullptr; } return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); } const TargetRegisterClass * X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { // Don't allow super-classes of GR8_NOREX. This class is only used after // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied // to the full GR8 register class in 64-bit mode, so we cannot allow the // reigster class inflation. // // The GR8_NOREX class is always used in a way that won't be constrained to a // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the // full GR8 class. if (RC == &X86::GR8_NOREXRegClass) return RC; const X86Subtarget &Subtarget = MF.getSubtarget(); const TargetRegisterClass *Super = RC; auto I = RC->superclasses().begin(); auto E = RC->superclasses().end(); do { switch (Super->getID()) { case X86::FR32RegClassID: case X86::FR64RegClassID: // If AVX-512 isn't supported we should only inflate to these classes. if (!Subtarget.hasAVX512() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128RegClassID: case X86::VR256RegClassID: // If VLX isn't supported we should only inflate to these classes. if (!Subtarget.hasVLX() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128XRegClassID: case X86::VR256XRegClassID: // If VLX isn't support we shouldn't inflate to these classes. if (Subtarget.hasVLX() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::FR32XRegClassID: case X86::FR64XRegClassID: // If AVX-512 isn't support we shouldn't inflate to these classes. if (Subtarget.hasAVX512() && getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::GR8RegClassID: case X86::GR16RegClassID: case X86::GR32RegClassID: case X86::GR64RegClassID: case X86::GR8_NOREX2RegClassID: case X86::GR16_NOREX2RegClassID: case X86::GR32_NOREX2RegClassID: case X86::GR64_NOREX2RegClassID: case X86::RFP32RegClassID: case X86::RFP64RegClassID: case X86::RFP80RegClassID: case X86::VR512_0_15RegClassID: case X86::VR512RegClassID: // Don't return a super-class that would shrink the spill size. // That can happen with the vector and float classes. if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } if (I != E) { Super = getRegClass(*I); ++I; } else { Super = nullptr; } } while (Super); return RC; } const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { const X86Subtarget &Subtarget = MF.getSubtarget(); switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. if (Subtarget.isTarget64BitLP64()) return &X86::GR64RegClass; // If the target is 64bit but we have been told to use 32bit addresses, // we can still use 64-bit register as long as we know the high bits // are zeros. // Reflect that in the returned register class. if (Is64Bit) { // When the target also allows 64-bit frame pointer and we do have a // frame, this is fine to use it for the address accesses as well. const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) && TFI->Uses64BitFramePtr ? &X86::LOW32_ADDR_ACCESS_RBPRegClass : &X86::LOW32_ADDR_ACCESSRegClass; } return &X86::GR32RegClass; case 1: // Normal GPRs except the stack pointer (for encoding reasons). if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOSPRegClass; case 2: // NOREX GPRs. if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOREXRegClass; return &X86::GR32_NOREXRegClass; case 3: // NOREX GPRs except the stack pointer (for encoding reasons). if (Subtarget.isTarget64BitLP64()) return &X86::GR64_NOREX_NOSPRegClass; // NOSP does not contain RIP, so no special case here. return &X86::GR32_NOREX_NOSPRegClass; case 4: // Available for tailcall (not callee-saved GPRs). return getGPRsForTailCall(MF); } } bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const { // Prevent rewriting a copy where the destination size is larger than the // input size. See PR41619. // FIXME: Should this be factored into the base implementation somehow. if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) return false; return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, SrcRC, SrcSubReg); } const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { const Function &F = MF.getFunction(); if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); if (hasHipeCC) return &X86::GR32RegClass; return &X86::GR32_TCRegClass; } const TargetRegisterClass * X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &X86::CCRRegClass) { if (Is64Bit) return &X86::GR64RegClass; else return &X86::GR32RegClass; } return RC; } unsigned X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const X86FrameLowering *TFI = getFrameLowering(MF); unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { default: return 0; case X86::GR32RegClassID: return 4 - FPDiff; case X86::GR64RegClassID: return 12 - FPDiff; case X86::VR128RegClassID: return Is64Bit ? 10 : 4; case X86::VR64RegClassID: return 4; } } const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "MachineFunction required"); const X86Subtarget &Subtarget = MF->getSubtarget(); const Function &F = MF->getFunction(); bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); CallingConv::ID CC = F.getCallingConv(); // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling // convention because it has the CSR list. if (MF->getFunction().hasFnAttribute("no_caller_saved_registers")) CC = CallingConv::X86_INTR; // If atribute specified, override the CSRs normally specified by the // calling convention and use the empty set instead. if (MF->getFunction().hasFnAttribute("no_callee_saved_registers")) return CSR_NoRegs_SaveList; switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; case CallingConv::AnyReg: if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; return CSR_64_AllRegs_SaveList; case CallingConv::PreserveMost: return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList : CSR_64_RT_MostRegs_SaveList; case CallingConv::PreserveAll: if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; case CallingConv::PreserveNone: return CSR_64_NoneRegs_SaveList; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return MF->getInfo()->isSplitCSR() ? CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; if (HasAVX512 && Is64Bit) return CSR_64_Intel_OCL_BI_AVX512_SaveList; if (HasAVX && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX_SaveList; if (HasAVX && Is64Bit) return CSR_64_Intel_OCL_BI_AVX_SaveList; if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_SaveList; break; } case CallingConv::X86_RegCall: if (Is64Bit) { if (IsWin64) { return (HasSSE ? CSR_Win64_RegCall_SaveList : CSR_Win64_RegCall_NoSSE_SaveList); } else { return (HasSSE ? CSR_SysV64_RegCall_SaveList : CSR_SysV64_RegCall_NoSSE_SaveList); } } else { return (HasSSE ? CSR_32_RegCall_SaveList : CSR_32_RegCall_NoSSE_SaveList); } case CallingConv::CFGuard_Check: assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList : CSR_Win32_CFGuard_Check_NoSSE_SaveList); case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_SaveList; break; case CallingConv::Win64: if (!HasSSE) return CSR_Win64_NoSSE_SaveList; return CSR_Win64_SaveList; case CallingConv::SwiftTail: if (!Is64Bit) return CSR_32_SaveList; return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; case CallingConv::X86_64_SysV: if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; case CallingConv::X86_INTR: if (Is64Bit) { if (HasAVX512) return CSR_64_AllRegs_AVX512_SaveList; if (HasAVX) return CSR_64_AllRegs_AVX_SaveList; if (HasSSE) return CSR_64_AllRegs_SaveList; return CSR_64_AllRegs_NoSSE_SaveList; } else { if (HasAVX512) return CSR_32_AllRegs_AVX512_SaveList; if (HasAVX) return CSR_32_AllRegs_AVX_SaveList; if (HasSSE) return CSR_32_AllRegs_SSE_SaveList; return CSR_32_AllRegs_SaveList; } default: break; } if (Is64Bit) { bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_SaveList : CSR_64_SwiftError_SaveList; if (IsWin64) return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; if (CallsEHReturn) return CSR_64EHRet_SaveList; return CSR_64_SaveList; } return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; } const MCPhysReg * X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const { return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList; } const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo()->isSplitCSR()) return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; return nullptr; } const uint32_t * X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { const X86Subtarget &Subtarget = MF.getSubtarget(); bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_RegMask; case CallingConv::AnyReg: if (HasAVX) return CSR_64_AllRegs_AVX_RegMask; return CSR_64_AllRegs_RegMask; case CallingConv::PreserveMost: return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; case CallingConv::PreserveAll: if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; case CallingConv::PreserveNone: return CSR_64_NoneRegs_RegMask; case CallingConv::CXX_FAST_TLS: if (Is64Bit) return CSR_64_TLS_Darwin_RegMask; break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; if (HasAVX512 && Is64Bit) return CSR_64_Intel_OCL_BI_AVX512_RegMask; if (HasAVX && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX_RegMask; if (HasAVX && Is64Bit) return CSR_64_Intel_OCL_BI_AVX_RegMask; if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_RegMask; break; } case CallingConv::X86_RegCall: if (Is64Bit) { if (IsWin64) { return (HasSSE ? CSR_Win64_RegCall_RegMask : CSR_Win64_RegCall_NoSSE_RegMask); } else { return (HasSSE ? CSR_SysV64_RegCall_RegMask : CSR_SysV64_RegCall_NoSSE_RegMask); } } else { return (HasSSE ? CSR_32_RegCall_RegMask : CSR_32_RegCall_NoSSE_RegMask); } case CallingConv::CFGuard_Check: assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask : CSR_Win32_CFGuard_Check_NoSSE_RegMask); case CallingConv::Cold: if (Is64Bit) return CSR_64_MostRegs_RegMask; break; case CallingConv::Win64: return CSR_Win64_RegMask; case CallingConv::SwiftTail: if (!Is64Bit) return CSR_32_RegMask; return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; case CallingConv::X86_64_SysV: return CSR_64_RegMask; case CallingConv::X86_INTR: if (Is64Bit) { if (HasAVX512) return CSR_64_AllRegs_AVX512_RegMask; if (HasAVX) return CSR_64_AllRegs_AVX_RegMask; if (HasSSE) return CSR_64_AllRegs_RegMask; return CSR_64_AllRegs_NoSSE_RegMask; } else { if (HasAVX512) return CSR_32_AllRegs_AVX512_RegMask; if (HasAVX) return CSR_32_AllRegs_AVX_RegMask; if (HasSSE) return CSR_32_AllRegs_SSE_RegMask; return CSR_32_AllRegs_RegMask; } default: break; } // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check // callsEHReturn(). if (Is64Bit) { const Function &F = MF.getFunction(); bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; } return CSR_32_RegMask; } const uint32_t* X86RegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { return CSR_64_TLS_Darwin_RegMask; } BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const X86FrameLowering *TFI = getFrameLowering(MF); // Set the floating point control register as reserved. Reserved.set(X86::FPCW); // Set the floating point status register as reserved. Reserved.set(X86::FPSW); // Set the SIMD floating point control register as reserved. Reserved.set(X86::MXCSR); // Set the stack-pointer register and its aliases as reserved. for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) Reserved.set(SubReg); // Set the Shadow Stack Pointer as reserved. Reserved.set(X86::SSP); // Set the instruction pointer register and its aliases as reserved. for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) Reserved.set(SubReg); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { if (MF.getInfo()->getFPClobberedByInvoke()) MF.getContext().reportError( SMLoc(), "Frame pointer clobbered by function invoke is not supported."); for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) Reserved.set(SubReg); } // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { if (MF.getInfo()->getBPClobberedByInvoke()) MF.getContext().reportError(SMLoc(), "Stack realignment in presence of dynamic " "allocas is not supported with " "this calling convention."); Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) Reserved.set(SubReg); } // Mark the segment registers as reserved. Reserved.set(X86::CS); Reserved.set(X86::SS); Reserved.set(X86::DS); Reserved.set(X86::ES); Reserved.set(X86::FS); Reserved.set(X86::GS); // Mark the floating point stack registers as reserved. for (unsigned n = 0; n != 8; ++n) Reserved.set(X86::ST0 + n); // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their // super-registers are old 32-bits. Reserved.set(X86::SIL); Reserved.set(X86::DIL); Reserved.set(X86::BPL); Reserved.set(X86::SPL); Reserved.set(X86::SIH); Reserved.set(X86::DIH); Reserved.set(X86::BPH); Reserved.set(X86::SPH); for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); // XMM8, XMM9, ... for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } if (!Is64Bit || !MF.getSubtarget().hasAVX512()) { for (unsigned n = 0; n != 16; ++n) { for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } // Reserve the extended general purpose registers. if (!Is64Bit || !MF.getSubtarget().hasEGPR()) Reserved.set(X86::R16, X86::R31WH + 1); if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) Reserved.set(*AI); for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } // Reserve low half pair registers in case they are used by RA aggressively. Reserved.set(X86::TMM0_TMM1); Reserved.set(X86::TMM2_TMM3); assert(checkAllSuperRegsMarked(Reserved, {X86::SIL, X86::DIL, X86::BPL, X86::SPL, X86::SIH, X86::DIH, X86::BPH, X86::SPH})); return Reserved; } unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { // All existing Intel CPUs that support AMX support AVX512 and all existing // Intel CPUs that support APX support AMX. AVX512 implies AVX. // // We enumerate the registers in X86GenRegisterInfo.inc in this order: // // Registers before AVX512, // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) // AMX registers (TMM) // APX registers (R16-R31) // // and try to return the minimum number of registers supported by the target. static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) && (X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM6_TMM7 + 1 == X86::R16) && (X86::R31WH + 1 == X86::NUM_TARGET_REGS), "Register number may be incorrect"); const X86Subtarget &ST = MF.getSubtarget(); if (ST.hasEGPR()) return X86::NUM_TARGET_REGS; if (ST.hasAMXTILE()) return X86::TMM7 + 1; if (ST.hasAVX512()) return X86::K6_K7 + 1; if (ST.hasAVX()) return X86::YMM15 + 1; return X86::R15WH + 1; } bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const { const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { return TRI.isSuperOrSubRegisterEq(RegA, RegB); }; if (!ST.is64Bit()) return llvm::any_of( SmallVector{X86::EAX, X86::ECX, X86::EDX}, [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); CallingConv::ID CC = MF.getFunction().getCallingConv(); if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) return true; if (llvm::any_of( SmallVector{X86::RDX, X86::RCX, X86::R8, X86::R9}, [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; if (CC != CallingConv::Win64 && llvm::any_of(SmallVector{X86::RDI, X86::RSI}, [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; if (ST.hasSSE1() && llvm::any_of(SmallVector{X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7}, [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) return true; return X86GenRegisterInfo::isArgumentRegister(MF, Reg); } bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, MCRegister PhysReg) const { const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); // Stack pointer. if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) return true; // Don't use the frame pointer if it's being used. const X86FrameLowering &TFI = *getFrameLowering(MF); if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) return true; return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); } bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { return RC->getID() == X86::TILERegClassID || RC->getID() == X86::TILEPAIRRegClassID; } void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Check if the EFLAGS register is marked as live-out. This shouldn't happen, // because the calling convention defines the EFLAGS register as NOT // preserved. // // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding // an assert to track this and clear the register afterwards to avoid // unnecessary crashes during release builds. assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && "EFLAGS are not live-out from a patchpoint."); // Also clean other registers that don't need preserving (IP). for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) Mask[Reg / 32] &= ~(1U << (Reg % 32)); } //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// static bool CantUseSP(const MachineFrameInfo &MFI) { return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); } bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); // We have a virtual register to reference argument, and don't need base // pointer. if (X86FI->getStackPtrSaveMI() != nullptr) return false; if (X86FI->hasPreallocatedCall()) return true; const MachineFrameInfo &MFI = MF.getFrameInfo(); if (!EnableBasePointer) return false; // When we need stack realignment, we can't address the stack from the frame // pointer. When we have dynamic allocas or stack-adjusting inline asm, we // can't address variables from the stack pointer. MS inline asm can // reference locals while also adjusting the stack pointer. When we can't // use both the SP and the FP, we need a separate base pointer register. bool CantUseFP = hasStackRealignment(MF); return CantUseFP && CantUseSP(MFI); } bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { if (!TargetRegisterInfo::canRealignStack(MF)) return false; const MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(FramePtr)) return false; // If a base pointer is necessary. Check that it isn't too late to reserve // it. if (CantUseSP(MFI)) return MRI->canReserveReg(BasePtr); return true; } bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { if (TargetRegisterInfo::shouldRealignStack(MF)) return true; return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; } // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. // TODO: In this case we should be really trying first to entirely eliminate // this instruction which is a plain copy. static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { MachineInstr &MI = *II; unsigned Opc = II->getOpcode(); // Check if this is a LEA of the form 'lea (%esp), %ebx' if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || MI.getOperand(2).getImm() != 1 || MI.getOperand(3).getReg() != X86::NoRegister || MI.getOperand(4).getImm() != 0 || MI.getOperand(5).getReg() != X86::NoRegister) return false; Register BasePtr = MI.getOperand(1).getReg(); // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will // be replaced with a 32-bit operand MOV which will zero extend the upper // 32-bits of the super register. if (Opc == X86::LEA64_32r) BasePtr = getX86SubSuperRegister(BasePtr, 32); Register NewDestReg = MI.getOperand(0).getReg(); const X86InstrInfo *TII = MI.getParent()->getParent()->getSubtarget().getInstrInfo(); TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, MI.getOperand(1).isKill()); MI.eraseFromParent(); return true; } static bool isFuncletReturnInstr(MachineInstr &MI) { switch (MI.getOpcode()) { case X86::CATCHRET: case X86::CLEANUPRET: return true; default: return false; } llvm_unreachable("impossible"); } void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FIOperandNum, Register BaseReg, int FIOffset) const { MachineInstr &MI = *II; unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); FI.ChangeToImmediate(FIOffset); return; } MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); // The frame index format for stackmaps and patchpoints is different from the // X86 format. It only has a FI and an offset. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { assert(BasePtr == FramePtr && "Expected the FP as base register"); int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } if (MI.getOperand(FIOperandNum + 3).isImm()) { // Offset is a 32-bit integer. int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); if (Offset != 0) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); MI.getOperand(FIOperandNum + 3).setOffset(Offset); } } bool X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Determine base register and offset. int FIOffset; Register BasePtr; if (MI.isReturn()) { assert((!hasStackRealignment(MF) || MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && "Return instruction can only reference SP relative frame objects"); FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed(); } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); } else { FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed(); } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); FI.ChangeToImmediate(FIOffset); return false; } // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit // register as source operand, semantic is the same and destination is // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. // Don't change BasePtr since it is used later for stack adjustment. Register MachineBasePtr = BasePtr; if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); // This must be part of a four operand memory reference. Replace the // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); if (BasePtr == StackPtr) FIOffset += SPAdj; // The frame index format for stackmaps and patchpoints is different from the // X86 format. It only has a FI and an offset. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { assert(BasePtr == FramePtr && "Expected the FP as base register"); int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return false; } if (MI.getOperand(FIOperandNum+3).isImm()) { // Offset is a 32-bit integer. int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); int Offset = FIOffset + Imm; assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && "Requesting 64-bit offset in 32-bit immediate!"); if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); MI.getOperand(FIOperandNum + 3).setOffset(Offset); } return false; } unsigned X86RegisterInfo::findDeadCallerSavedReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { const MachineFunction *MF = MBB.getParent(); if (MF->callsEHReturn()) return 0; const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF); if (MBBI == MBB.end()) return 0; switch (MBBI->getOpcode()) { default: return 0; case TargetOpcode::PATCHABLE_RET: case X86::RET: case X86::RET32: case X86::RET64: case X86::RETI32: case X86::RETI64: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: { SmallSet Uses; for (MachineOperand &MO : MBBI->operands()) { if (!MO.isReg() || MO.isDef()) continue; Register Reg = MO.getReg(); if (!Reg) continue; for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) Uses.insert(*AI); } for (auto CS : AvailableRegs) if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) return CS; } } return 0; } Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const X86FrameLowering *TFI = getFrameLowering(MF); return TFI->hasFP(MF) ? FramePtr : StackPtr; } unsigned X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); Register FrameReg = getFrameRegister(MF); if (Subtarget.isTarget64BitILP32()) FrameReg = getX86SubSuperRegister(FrameReg, 32); return FrameReg; } unsigned X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { const X86Subtarget &Subtarget = MF.getSubtarget(); Register StackReg = getStackRegister(); if (Subtarget.isTarget64BitILP32()) StackReg = getX86SubSuperRegister(StackReg, 32); return StackReg; } static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, const MachineRegisterInfo *MRI) { if (VRM->hasShape(VirtReg)) return VRM->getShape(VirtReg); const MachineOperand &Def = *MRI->def_begin(VirtReg); MachineInstr *MI = const_cast(Def.getParent()); unsigned OpCode = MI->getOpcode(); switch (OpCode) { default: llvm_unreachable("Unexpected machine instruction on tile register!"); break; case X86::COPY: { Register SrcReg = MI->getOperand(1).getReg(); ShapeT Shape = getTileShape(SrcReg, VRM, MRI); VRM->assignVirt2Shape(VirtReg, Shape); return Shape; } // We only collect the tile shape that is defined. case X86::PTILELOADDV: case X86::PTILELOADDT1V: case X86::PTDPBSSDV: case X86::PTDPBSUDV: case X86::PTDPBUSDV: case X86::PTDPBUUDV: case X86::PTILEZEROV: case X86::PTDPBF16PSV: case X86::PTDPFP16PSV: case X86::PTCMMIMFP16PSV: case X86::PTCMMRLFP16PSV: case X86::PTTRANSPOSEDV: case X86::PTTDPBF16PSV: case X86::PTTDPFP16PSV: case X86::PTTCMMIMFP16PSV: case X86::PTTCMMRLFP16PSV: case X86::PTCONJTCMMIMFP16PSV: case X86::PTCONJTFP16V: case X86::PTILELOADDRSV: case X86::PTILELOADDRST1V: case X86::PTMMULTF32PSV: case X86::PTTMMULTF32PSV: case X86::PTDPBF8PSV: case X86::PTDPBHF8PSV: case X86::PTDPHBF8PSV: case X86::PTDPHF8PSV: { MachineOperand &MO1 = MI->getOperand(1); MachineOperand &MO2 = MI->getOperand(2); ShapeT Shape(&MO1, &MO2, MRI); VRM->assignVirt2Shape(VirtReg, Shape); return Shape; } case X86::PT2RPNTLVWZ0V: case X86::PT2RPNTLVWZ0T1V: case X86::PT2RPNTLVWZ1V: case X86::PT2RPNTLVWZ1T1V: case X86::PT2RPNTLVWZ0RSV: case X86::PT2RPNTLVWZ0RST1V: case X86::PT2RPNTLVWZ1RSV: case X86::PT2RPNTLVWZ1RST1V: { MachineOperand &MO1 = MI->getOperand(1); MachineOperand &MO2 = MI->getOperand(2); MachineOperand &MO3 = MI->getOperand(3); ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI); VRM->assignVirt2Shape(VirtReg, Shape); return Shape; } } } static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) { unsigned PhysShapeNum = PhysShape.getShapeNum(); unsigned VirtShapeNum = VirtShape.getShapeNum(); if (PhysShapeNum < VirtShapeNum) return false; if (PhysShapeNum == VirtShapeNum) { if (PhysShapeNum == 1) return PhysShape == VirtShape; for (unsigned I = 0; I < PhysShapeNum; I++) { ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I)); ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I)); if (VShape != PShape) return false; } return true; } // Hint subreg of mult-tile reg to single tile reg. if (VirtShapeNum == 1) { for (unsigned I = 0; I < PhysShapeNum; I++) { ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I)); if (VirtShape == PShape) return true; } } // Note: Currently we have no requirement for case of // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum) return false; } bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); const X86Subtarget &ST = MF.getSubtarget(); const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); unsigned ID = RC.getID(); if (!VRM) return BaseImplRetVal; if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) { if (DisableRegAllocNDDHints || !ST.hasNDD() || !TRI.isGeneralPurposeRegisterClass(&RC)) return BaseImplRetVal; // Add any two address hints after any copy hints. SmallSet TwoAddrHints; auto TryAddNDDHint = [&](const MachineOperand &MO) { Register Reg = MO.getReg(); Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg)); if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) TwoAddrHints.insert(PhysReg); }; // NDD instructions is compressible when Op0 is allocated to the same // physic register as Op1 (or Op2 if it's commutable). for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) { const MachineInstr &MI = *MO.getParent(); if (!X86::getNonNDVariant(MI.getOpcode())) continue; unsigned OpIdx = MI.getOperandNo(&MO); if (OpIdx == 0) { assert(MI.getOperand(1).isReg()); TryAddNDDHint(MI.getOperand(1)); if (MI.isCommutable()) { assert(MI.getOperand(2).isReg()); TryAddNDDHint(MI.getOperand(2)); } } else if (OpIdx == 1) { TryAddNDDHint(MI.getOperand(0)); } else if (MI.isCommutable() && OpIdx == 2) { TryAddNDDHint(MI.getOperand(0)); } } for (MCPhysReg OrderReg : Order) if (TwoAddrHints.count(OrderReg)) Hints.push_back(OrderReg); return BaseImplRetVal; } ShapeT VirtShape = getTileShape(VirtReg, const_cast(VRM), MRI); auto AddHint = [&](MCPhysReg PhysReg) { Register VReg = Matrix->getOneVReg(PhysReg); if (VReg == MCRegister::NoRegister) { // Not allocated yet Hints.push_back(PhysReg); return; } ShapeT PhysShape = getTileShape(VReg, const_cast(VRM), MRI); if (canHintShape(PhysShape, VirtShape)) Hints.push_back(PhysReg); }; SmallSet CopyHints; CopyHints.insert(Hints.begin(), Hints.end()); Hints.clear(); for (auto Hint : CopyHints) { if (RC.contains(Hint) && !MRI->isReserved(Hint)) AddHint(Hint); } for (MCPhysReg PhysReg : Order) { if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) && !MRI->isReserved(PhysReg)) AddHint(PhysReg); } #define DEBUG_TYPE "tile-hint" LLVM_DEBUG({ dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n"; for (auto Hint : Hints) { dbgs() << "tmm" << Hint << ","; } dbgs() << "\n"; }); #undef DEBUG_TYPE return true; }