1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 implementation of the TargetRegisterInfo class. 10 // This file is responsible for the frame pointer elimination optimization 11 // on X86. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86RegisterInfo.h" 16 #include "X86FrameLowering.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/CodeGen/LiveRegMatrix.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/TargetFrameLowering.h" 27 #include "llvm/CodeGen/TargetInstrInfo.h" 28 #include "llvm/CodeGen/TileShapeInfo.h" 29 #include "llvm/CodeGen/VirtRegMap.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/Type.h" 32 #include "llvm/MC/MCContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define GET_REGINFO_TARGET_DESC 41 #include "X86GenRegisterInfo.inc" 42 43 static cl::opt<bool> 44 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), 45 cl::desc("Enable use of a base pointer for complex stack frames")); 46 47 static cl::opt<bool> 48 DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden, 49 cl::init(false), 50 cl::desc("Disable two address hints for register " 51 "allocation")); 52 53 X86RegisterInfo::X86RegisterInfo(const Triple &TT) 54 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), 55 X86_MC::getDwarfRegFlavour(TT, false), 56 X86_MC::getDwarfRegFlavour(TT, true), 57 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { 58 X86_MC::initLLVMToSEHAndCVRegMapping(this); 59 60 // Cache some information. 61 Is64Bit = TT.isArch64Bit(); 62 IsWin64 = Is64Bit && TT.isOSWindows(); 63 64 // Use a callee-saved register as the base pointer. These registers must 65 // not conflict with any ABI requirements. For example, in 32-bit mode PIC 66 // requires GOT in the EBX register before function calls via PLT GOT pointer. 67 if (Is64Bit) { 68 SlotSize = 8; 69 // This matches the simplified 32-bit pointer code in the data layout 70 // computation. 71 // FIXME: Should use the data layout? 72 bool Use64BitReg = !TT.isX32(); 73 StackPtr = Use64BitReg ? X86::RSP : X86::ESP; 74 FramePtr = Use64BitReg ? X86::RBP : X86::EBP; 75 BasePtr = Use64BitReg ? X86::RBX : X86::EBX; 76 } else { 77 SlotSize = 4; 78 StackPtr = X86::ESP; 79 FramePtr = X86::EBP; 80 BasePtr = X86::ESI; 81 } 82 } 83 84 const TargetRegisterClass * 85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, 86 unsigned Idx) const { 87 // The sub_8bit sub-register index is more constrained in 32-bit mode. 88 // It behaves just like the sub_8bit_hi index. 89 if (!Is64Bit && Idx == X86::sub_8bit) 90 Idx = X86::sub_8bit_hi; 91 92 // Forward to TableGen's default version. 93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); 94 } 95 96 const TargetRegisterClass * 97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, 98 const TargetRegisterClass *B, 99 unsigned SubIdx) const { 100 // The sub_8bit sub-register index is more constrained in 32-bit mode. 101 if (!Is64Bit && SubIdx == X86::sub_8bit) { 102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); 103 if (!A) 104 return nullptr; 105 } 106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); 107 } 108 109 const TargetRegisterClass * 110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, 111 const MachineFunction &MF) const { 112 // Don't allow super-classes of GR8_NOREX. This class is only used after 113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied 114 // to the full GR8 register class in 64-bit mode, so we cannot allow the 115 // reigster class inflation. 116 // 117 // The GR8_NOREX class is always used in a way that won't be constrained to a 118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the 119 // full GR8 class. 120 if (RC == &X86::GR8_NOREXRegClass) 121 return RC; 122 123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 124 125 const TargetRegisterClass *Super = RC; 126 auto I = RC->superclasses().begin(); 127 auto E = RC->superclasses().end(); 128 do { 129 switch (Super->getID()) { 130 case X86::FR32RegClassID: 131 case X86::FR64RegClassID: 132 // If AVX-512 isn't supported we should only inflate to these classes. 133 if (!Subtarget.hasAVX512() && 134 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 135 return Super; 136 break; 137 case X86::VR128RegClassID: 138 case X86::VR256RegClassID: 139 // If VLX isn't supported we should only inflate to these classes. 140 if (!Subtarget.hasVLX() && 141 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 142 return Super; 143 break; 144 case X86::VR128XRegClassID: 145 case X86::VR256XRegClassID: 146 // If VLX isn't support we shouldn't inflate to these classes. 147 if (Subtarget.hasVLX() && 148 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 149 return Super; 150 break; 151 case X86::FR32XRegClassID: 152 case X86::FR64XRegClassID: 153 // If AVX-512 isn't support we shouldn't inflate to these classes. 154 if (Subtarget.hasAVX512() && 155 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 156 return Super; 157 break; 158 case X86::GR8RegClassID: 159 case X86::GR16RegClassID: 160 case X86::GR32RegClassID: 161 case X86::GR64RegClassID: 162 case X86::GR8_NOREX2RegClassID: 163 case X86::GR16_NOREX2RegClassID: 164 case X86::GR32_NOREX2RegClassID: 165 case X86::GR64_NOREX2RegClassID: 166 case X86::RFP32RegClassID: 167 case X86::RFP64RegClassID: 168 case X86::RFP80RegClassID: 169 case X86::VR512_0_15RegClassID: 170 case X86::VR512RegClassID: 171 // Don't return a super-class that would shrink the spill size. 172 // That can happen with the vector and float classes. 173 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 174 return Super; 175 } 176 if (I != E) { 177 Super = getRegClass(*I); 178 ++I; 179 } else { 180 Super = nullptr; 181 } 182 } while (Super); 183 return RC; 184 } 185 186 const TargetRegisterClass * 187 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, 188 unsigned Kind) const { 189 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 190 switch (Kind) { 191 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); 192 case 0: // Normal GPRs. 193 if (Subtarget.isTarget64BitLP64()) 194 return &X86::GR64RegClass; 195 // If the target is 64bit but we have been told to use 32bit addresses, 196 // we can still use 64-bit register as long as we know the high bits 197 // are zeros. 198 // Reflect that in the returned register class. 199 if (Is64Bit) { 200 // When the target also allows 64-bit frame pointer and we do have a 201 // frame, this is fine to use it for the address accesses as well. 202 const X86FrameLowering *TFI = getFrameLowering(MF); 203 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr 204 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass 205 : &X86::LOW32_ADDR_ACCESSRegClass; 206 } 207 return &X86::GR32RegClass; 208 case 1: // Normal GPRs except the stack pointer (for encoding reasons). 209 if (Subtarget.isTarget64BitLP64()) 210 return &X86::GR64_NOSPRegClass; 211 // NOSP does not contain RIP, so no special case here. 212 return &X86::GR32_NOSPRegClass; 213 case 2: // NOREX GPRs. 214 if (Subtarget.isTarget64BitLP64()) 215 return &X86::GR64_NOREXRegClass; 216 return &X86::GR32_NOREXRegClass; 217 case 3: // NOREX GPRs except the stack pointer (for encoding reasons). 218 if (Subtarget.isTarget64BitLP64()) 219 return &X86::GR64_NOREX_NOSPRegClass; 220 // NOSP does not contain RIP, so no special case here. 221 return &X86::GR32_NOREX_NOSPRegClass; 222 case 4: // Available for tailcall (not callee-saved GPRs). 223 return getGPRsForTailCall(MF); 224 } 225 } 226 227 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, 228 unsigned DefSubReg, 229 const TargetRegisterClass *SrcRC, 230 unsigned SrcSubReg) const { 231 // Prevent rewriting a copy where the destination size is larger than the 232 // input size. See PR41619. 233 // FIXME: Should this be factored into the base implementation somehow. 234 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && 235 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) 236 return false; 237 238 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, 239 SrcRC, SrcSubReg); 240 } 241 242 const TargetRegisterClass * 243 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { 244 const Function &F = MF.getFunction(); 245 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) 246 return &X86::GR64_TCW64RegClass; 247 else if (Is64Bit) 248 return &X86::GR64_TCRegClass; 249 250 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); 251 if (hasHipeCC) 252 return &X86::GR32RegClass; 253 return &X86::GR32_TCRegClass; 254 } 255 256 const TargetRegisterClass * 257 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 258 if (RC == &X86::CCRRegClass) { 259 if (Is64Bit) 260 return &X86::GR64RegClass; 261 else 262 return &X86::GR32RegClass; 263 } 264 return RC; 265 } 266 267 unsigned 268 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 269 MachineFunction &MF) const { 270 const X86FrameLowering *TFI = getFrameLowering(MF); 271 272 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; 273 switch (RC->getID()) { 274 default: 275 return 0; 276 case X86::GR32RegClassID: 277 return 4 - FPDiff; 278 case X86::GR64RegClassID: 279 return 12 - FPDiff; 280 case X86::VR128RegClassID: 281 return Is64Bit ? 10 : 4; 282 case X86::VR64RegClassID: 283 return 4; 284 } 285 } 286 287 const MCPhysReg * 288 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 289 assert(MF && "MachineFunction required"); 290 291 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); 292 const Function &F = MF->getFunction(); 293 bool HasSSE = Subtarget.hasSSE1(); 294 bool HasAVX = Subtarget.hasAVX(); 295 bool HasAVX512 = Subtarget.hasAVX512(); 296 bool CallsEHReturn = MF->callsEHReturn(); 297 298 CallingConv::ID CC = F.getCallingConv(); 299 300 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling 301 // convention because it has the CSR list. 302 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers")) 303 CC = CallingConv::X86_INTR; 304 305 // If atribute specified, override the CSRs normally specified by the 306 // calling convention and use the empty set instead. 307 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers")) 308 return CSR_NoRegs_SaveList; 309 310 switch (CC) { 311 case CallingConv::GHC: 312 case CallingConv::HiPE: 313 return CSR_NoRegs_SaveList; 314 case CallingConv::AnyReg: 315 if (HasAVX) 316 return CSR_64_AllRegs_AVX_SaveList; 317 return CSR_64_AllRegs_SaveList; 318 case CallingConv::PreserveMost: 319 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList 320 : CSR_64_RT_MostRegs_SaveList; 321 case CallingConv::PreserveAll: 322 if (HasAVX) 323 return CSR_64_RT_AllRegs_AVX_SaveList; 324 return CSR_64_RT_AllRegs_SaveList; 325 case CallingConv::PreserveNone: 326 return CSR_64_NoneRegs_SaveList; 327 case CallingConv::CXX_FAST_TLS: 328 if (Is64Bit) 329 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? 330 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; 331 break; 332 case CallingConv::Intel_OCL_BI: { 333 if (HasAVX512 && IsWin64) 334 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; 335 if (HasAVX512 && Is64Bit) 336 return CSR_64_Intel_OCL_BI_AVX512_SaveList; 337 if (HasAVX && IsWin64) 338 return CSR_Win64_Intel_OCL_BI_AVX_SaveList; 339 if (HasAVX && Is64Bit) 340 return CSR_64_Intel_OCL_BI_AVX_SaveList; 341 if (!HasAVX && !IsWin64 && Is64Bit) 342 return CSR_64_Intel_OCL_BI_SaveList; 343 break; 344 } 345 case CallingConv::X86_RegCall: 346 if (Is64Bit) { 347 if (IsWin64) { 348 return (HasSSE ? CSR_Win64_RegCall_SaveList : 349 CSR_Win64_RegCall_NoSSE_SaveList); 350 } else { 351 return (HasSSE ? CSR_SysV64_RegCall_SaveList : 352 CSR_SysV64_RegCall_NoSSE_SaveList); 353 } 354 } else { 355 return (HasSSE ? CSR_32_RegCall_SaveList : 356 CSR_32_RegCall_NoSSE_SaveList); 357 } 358 case CallingConv::CFGuard_Check: 359 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 360 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList 361 : CSR_Win32_CFGuard_Check_NoSSE_SaveList); 362 case CallingConv::Cold: 363 if (Is64Bit) 364 return CSR_64_MostRegs_SaveList; 365 break; 366 case CallingConv::Win64: 367 if (!HasSSE) 368 return CSR_Win64_NoSSE_SaveList; 369 return CSR_Win64_SaveList; 370 case CallingConv::SwiftTail: 371 if (!Is64Bit) 372 return CSR_32_SaveList; 373 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; 374 case CallingConv::X86_64_SysV: 375 if (CallsEHReturn) 376 return CSR_64EHRet_SaveList; 377 return CSR_64_SaveList; 378 case CallingConv::X86_INTR: 379 if (Is64Bit) { 380 if (HasAVX512) 381 return CSR_64_AllRegs_AVX512_SaveList; 382 if (HasAVX) 383 return CSR_64_AllRegs_AVX_SaveList; 384 if (HasSSE) 385 return CSR_64_AllRegs_SaveList; 386 return CSR_64_AllRegs_NoSSE_SaveList; 387 } else { 388 if (HasAVX512) 389 return CSR_32_AllRegs_AVX512_SaveList; 390 if (HasAVX) 391 return CSR_32_AllRegs_AVX_SaveList; 392 if (HasSSE) 393 return CSR_32_AllRegs_SSE_SaveList; 394 return CSR_32_AllRegs_SaveList; 395 } 396 default: 397 break; 398 } 399 400 if (Is64Bit) { 401 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 402 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 403 if (IsSwiftCC) 404 return IsWin64 ? CSR_Win64_SwiftError_SaveList 405 : CSR_64_SwiftError_SaveList; 406 407 if (IsWin64) 408 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; 409 if (CallsEHReturn) 410 return CSR_64EHRet_SaveList; 411 return CSR_64_SaveList; 412 } 413 414 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; 415 } 416 417 const MCPhysReg * 418 X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const { 419 return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList; 420 } 421 422 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( 423 const MachineFunction *MF) const { 424 assert(MF && "Invalid MachineFunction pointer."); 425 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 426 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) 427 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; 428 return nullptr; 429 } 430 431 const uint32_t * 432 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, 433 CallingConv::ID CC) const { 434 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 435 bool HasSSE = Subtarget.hasSSE1(); 436 bool HasAVX = Subtarget.hasAVX(); 437 bool HasAVX512 = Subtarget.hasAVX512(); 438 439 switch (CC) { 440 case CallingConv::GHC: 441 case CallingConv::HiPE: 442 return CSR_NoRegs_RegMask; 443 case CallingConv::AnyReg: 444 if (HasAVX) 445 return CSR_64_AllRegs_AVX_RegMask; 446 return CSR_64_AllRegs_RegMask; 447 case CallingConv::PreserveMost: 448 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; 449 case CallingConv::PreserveAll: 450 if (HasAVX) 451 return CSR_64_RT_AllRegs_AVX_RegMask; 452 return CSR_64_RT_AllRegs_RegMask; 453 case CallingConv::PreserveNone: 454 return CSR_64_NoneRegs_RegMask; 455 case CallingConv::CXX_FAST_TLS: 456 if (Is64Bit) 457 return CSR_64_TLS_Darwin_RegMask; 458 break; 459 case CallingConv::Intel_OCL_BI: { 460 if (HasAVX512 && IsWin64) 461 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; 462 if (HasAVX512 && Is64Bit) 463 return CSR_64_Intel_OCL_BI_AVX512_RegMask; 464 if (HasAVX && IsWin64) 465 return CSR_Win64_Intel_OCL_BI_AVX_RegMask; 466 if (HasAVX && Is64Bit) 467 return CSR_64_Intel_OCL_BI_AVX_RegMask; 468 if (!HasAVX && !IsWin64 && Is64Bit) 469 return CSR_64_Intel_OCL_BI_RegMask; 470 break; 471 } 472 case CallingConv::X86_RegCall: 473 if (Is64Bit) { 474 if (IsWin64) { 475 return (HasSSE ? CSR_Win64_RegCall_RegMask : 476 CSR_Win64_RegCall_NoSSE_RegMask); 477 } else { 478 return (HasSSE ? CSR_SysV64_RegCall_RegMask : 479 CSR_SysV64_RegCall_NoSSE_RegMask); 480 } 481 } else { 482 return (HasSSE ? CSR_32_RegCall_RegMask : 483 CSR_32_RegCall_NoSSE_RegMask); 484 } 485 case CallingConv::CFGuard_Check: 486 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 487 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask 488 : CSR_Win32_CFGuard_Check_NoSSE_RegMask); 489 case CallingConv::Cold: 490 if (Is64Bit) 491 return CSR_64_MostRegs_RegMask; 492 break; 493 case CallingConv::Win64: 494 return CSR_Win64_RegMask; 495 case CallingConv::SwiftTail: 496 if (!Is64Bit) 497 return CSR_32_RegMask; 498 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; 499 case CallingConv::X86_64_SysV: 500 return CSR_64_RegMask; 501 case CallingConv::X86_INTR: 502 if (Is64Bit) { 503 if (HasAVX512) 504 return CSR_64_AllRegs_AVX512_RegMask; 505 if (HasAVX) 506 return CSR_64_AllRegs_AVX_RegMask; 507 if (HasSSE) 508 return CSR_64_AllRegs_RegMask; 509 return CSR_64_AllRegs_NoSSE_RegMask; 510 } else { 511 if (HasAVX512) 512 return CSR_32_AllRegs_AVX512_RegMask; 513 if (HasAVX) 514 return CSR_32_AllRegs_AVX_RegMask; 515 if (HasSSE) 516 return CSR_32_AllRegs_SSE_RegMask; 517 return CSR_32_AllRegs_RegMask; 518 } 519 default: 520 break; 521 } 522 523 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check 524 // callsEHReturn(). 525 if (Is64Bit) { 526 const Function &F = MF.getFunction(); 527 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 528 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 529 if (IsSwiftCC) 530 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; 531 532 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; 533 } 534 535 return CSR_32_RegMask; 536 } 537 538 const uint32_t* 539 X86RegisterInfo::getNoPreservedMask() const { 540 return CSR_NoRegs_RegMask; 541 } 542 543 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { 544 return CSR_64_TLS_Darwin_RegMask; 545 } 546 547 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 548 BitVector Reserved(getNumRegs()); 549 const X86FrameLowering *TFI = getFrameLowering(MF); 550 551 // Set the floating point control register as reserved. 552 Reserved.set(X86::FPCW); 553 554 // Set the floating point status register as reserved. 555 Reserved.set(X86::FPSW); 556 557 // Set the SIMD floating point control register as reserved. 558 Reserved.set(X86::MXCSR); 559 560 // Set the stack-pointer register and its aliases as reserved. 561 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) 562 Reserved.set(SubReg); 563 564 // Set the Shadow Stack Pointer as reserved. 565 Reserved.set(X86::SSP); 566 567 // Set the instruction pointer register and its aliases as reserved. 568 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) 569 Reserved.set(SubReg); 570 571 // Set the frame-pointer register and its aliases as reserved if needed. 572 if (TFI->hasFP(MF)) { 573 if (MF.getInfo<X86MachineFunctionInfo>()->getFPClobberedByInvoke()) 574 MF.getContext().reportError( 575 SMLoc(), 576 "Frame pointer clobbered by function invoke is not supported."); 577 578 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) 579 Reserved.set(SubReg); 580 } 581 582 // Set the base-pointer register and its aliases as reserved if needed. 583 if (hasBasePointer(MF)) { 584 if (MF.getInfo<X86MachineFunctionInfo>()->getBPClobberedByInvoke()) 585 MF.getContext().reportError(SMLoc(), 586 "Stack realignment in presence of dynamic " 587 "allocas is not supported with " 588 "this calling convention."); 589 590 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); 591 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) 592 Reserved.set(SubReg); 593 } 594 595 // Mark the segment registers as reserved. 596 Reserved.set(X86::CS); 597 Reserved.set(X86::SS); 598 Reserved.set(X86::DS); 599 Reserved.set(X86::ES); 600 Reserved.set(X86::FS); 601 Reserved.set(X86::GS); 602 603 // Mark the floating point stack registers as reserved. 604 for (unsigned n = 0; n != 8; ++n) 605 Reserved.set(X86::ST0 + n); 606 607 // Reserve the registers that only exist in 64-bit mode. 608 if (!Is64Bit) { 609 // These 8-bit registers are part of the x86-64 extension even though their 610 // super-registers are old 32-bits. 611 Reserved.set(X86::SIL); 612 Reserved.set(X86::DIL); 613 Reserved.set(X86::BPL); 614 Reserved.set(X86::SPL); 615 Reserved.set(X86::SIH); 616 Reserved.set(X86::DIH); 617 Reserved.set(X86::BPH); 618 Reserved.set(X86::SPH); 619 620 for (unsigned n = 0; n != 8; ++n) { 621 // R8, R9, ... 622 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) 623 Reserved.set(*AI); 624 625 // XMM8, XMM9, ... 626 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) 627 Reserved.set(*AI); 628 } 629 } 630 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { 631 for (unsigned n = 0; n != 16; ++n) { 632 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); 633 ++AI) 634 Reserved.set(*AI); 635 } 636 } 637 638 // Reserve the extended general purpose registers. 639 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) 640 Reserved.set(X86::R16, X86::R31WH + 1); 641 642 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { 643 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) 644 Reserved.set(*AI); 645 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) 646 Reserved.set(*AI); 647 } 648 649 // Reserve low half pair registers in case they are used by RA aggressively. 650 Reserved.set(X86::TMM0_TMM1); 651 Reserved.set(X86::TMM2_TMM3); 652 653 assert(checkAllSuperRegsMarked(Reserved, 654 {X86::SIL, X86::DIL, X86::BPL, X86::SPL, 655 X86::SIH, X86::DIH, X86::BPH, X86::SPH})); 656 return Reserved; 657 } 658 659 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { 660 // All existing Intel CPUs that support AMX support AVX512 and all existing 661 // Intel CPUs that support APX support AMX. AVX512 implies AVX. 662 // 663 // We enumerate the registers in X86GenRegisterInfo.inc in this order: 664 // 665 // Registers before AVX512, 666 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) 667 // AMX registers (TMM) 668 // APX registers (R16-R31) 669 // 670 // and try to return the minimum number of registers supported by the target. 671 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) && 672 (X86::K6_K7 + 1 == X86::TMMCFG) && 673 (X86::TMM6_TMM7 + 1 == X86::R16) && 674 (X86::R31WH + 1 == X86::NUM_TARGET_REGS), 675 "Register number may be incorrect"); 676 677 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 678 if (ST.hasEGPR()) 679 return X86::NUM_TARGET_REGS; 680 if (ST.hasAMXTILE()) 681 return X86::TMM7 + 1; 682 if (ST.hasAVX512()) 683 return X86::K6_K7 + 1; 684 if (ST.hasAVX()) 685 return X86::YMM15 + 1; 686 return X86::R15WH + 1; 687 } 688 689 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, 690 MCRegister Reg) const { 691 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 692 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 693 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { 694 return TRI.isSuperOrSubRegisterEq(RegA, RegB); 695 }; 696 697 if (!ST.is64Bit()) 698 return llvm::any_of( 699 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, 700 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || 701 (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); 702 703 CallingConv::ID CC = MF.getFunction().getCallingConv(); 704 705 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) 706 return true; 707 708 if (llvm::any_of( 709 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, 710 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 711 return true; 712 713 if (CC != CallingConv::Win64 && 714 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI}, 715 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 716 return true; 717 718 if (ST.hasSSE1() && 719 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, 720 X86::XMM3, X86::XMM4, X86::XMM5, 721 X86::XMM6, X86::XMM7}, 722 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 723 return true; 724 725 return X86GenRegisterInfo::isArgumentRegister(MF, Reg); 726 } 727 728 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, 729 MCRegister PhysReg) const { 730 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 731 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 732 733 // Stack pointer. 734 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) 735 return true; 736 737 // Don't use the frame pointer if it's being used. 738 const X86FrameLowering &TFI = *getFrameLowering(MF); 739 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) 740 return true; 741 742 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); 743 } 744 745 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { 746 return RC->getID() == X86::TILERegClassID || 747 RC->getID() == X86::TILEPAIRRegClassID; 748 } 749 750 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { 751 // Check if the EFLAGS register is marked as live-out. This shouldn't happen, 752 // because the calling convention defines the EFLAGS register as NOT 753 // preserved. 754 // 755 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding 756 // an assert to track this and clear the register afterwards to avoid 757 // unnecessary crashes during release builds. 758 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && 759 "EFLAGS are not live-out from a patchpoint."); 760 761 // Also clean other registers that don't need preserving (IP). 762 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) 763 Mask[Reg / 32] &= ~(1U << (Reg % 32)); 764 } 765 766 //===----------------------------------------------------------------------===// 767 // Stack Frame Processing methods 768 //===----------------------------------------------------------------------===// 769 770 static bool CantUseSP(const MachineFrameInfo &MFI) { 771 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); 772 } 773 774 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { 775 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 776 // We have a virtual register to reference argument, and don't need base 777 // pointer. 778 if (X86FI->getStackPtrSaveMI() != nullptr) 779 return false; 780 781 if (X86FI->hasPreallocatedCall()) 782 return true; 783 784 const MachineFrameInfo &MFI = MF.getFrameInfo(); 785 786 if (!EnableBasePointer) 787 return false; 788 789 // When we need stack realignment, we can't address the stack from the frame 790 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we 791 // can't address variables from the stack pointer. MS inline asm can 792 // reference locals while also adjusting the stack pointer. When we can't 793 // use both the SP and the FP, we need a separate base pointer register. 794 bool CantUseFP = hasStackRealignment(MF); 795 return CantUseFP && CantUseSP(MFI); 796 } 797 798 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { 799 if (!TargetRegisterInfo::canRealignStack(MF)) 800 return false; 801 802 const MachineFrameInfo &MFI = MF.getFrameInfo(); 803 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 804 805 // Stack realignment requires a frame pointer. If we already started 806 // register allocation with frame pointer elimination, it is too late now. 807 if (!MRI->canReserveReg(FramePtr)) 808 return false; 809 810 // If a base pointer is necessary. Check that it isn't too late to reserve 811 // it. 812 if (CantUseSP(MFI)) 813 return MRI->canReserveReg(BasePtr); 814 return true; 815 } 816 817 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 818 if (TargetRegisterInfo::shouldRealignStack(MF)) 819 return true; 820 821 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; 822 } 823 824 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction 825 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. 826 // TODO: In this case we should be really trying first to entirely eliminate 827 // this instruction which is a plain copy. 828 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { 829 MachineInstr &MI = *II; 830 unsigned Opc = II->getOpcode(); 831 // Check if this is a LEA of the form 'lea (%esp), %ebx' 832 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || 833 MI.getOperand(2).getImm() != 1 || 834 MI.getOperand(3).getReg() != X86::NoRegister || 835 MI.getOperand(4).getImm() != 0 || 836 MI.getOperand(5).getReg() != X86::NoRegister) 837 return false; 838 Register BasePtr = MI.getOperand(1).getReg(); 839 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will 840 // be replaced with a 32-bit operand MOV which will zero extend the upper 841 // 32-bits of the super register. 842 if (Opc == X86::LEA64_32r) 843 BasePtr = getX86SubSuperRegister(BasePtr, 32); 844 Register NewDestReg = MI.getOperand(0).getReg(); 845 const X86InstrInfo *TII = 846 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); 847 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, 848 MI.getOperand(1).isKill()); 849 MI.eraseFromParent(); 850 return true; 851 } 852 853 static bool isFuncletReturnInstr(MachineInstr &MI) { 854 switch (MI.getOpcode()) { 855 case X86::CATCHRET: 856 case X86::CLEANUPRET: 857 return true; 858 default: 859 return false; 860 } 861 llvm_unreachable("impossible"); 862 } 863 864 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 865 unsigned FIOperandNum, 866 Register BaseReg, 867 int FIOffset) const { 868 MachineInstr &MI = *II; 869 unsigned Opc = MI.getOpcode(); 870 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 871 MachineOperand &FI = MI.getOperand(FIOperandNum); 872 FI.ChangeToImmediate(FIOffset); 873 return; 874 } 875 876 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); 877 878 // The frame index format for stackmaps and patchpoints is different from the 879 // X86 format. It only has a FI and an offset. 880 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 881 assert(BasePtr == FramePtr && "Expected the FP as base register"); 882 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 883 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 884 return; 885 } 886 887 if (MI.getOperand(FIOperandNum + 3).isImm()) { 888 // Offset is a 32-bit integer. 889 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 890 int Offset = FIOffset + Imm; 891 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 892 "Requesting 64-bit offset in 32-bit immediate!"); 893 if (Offset != 0) 894 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 895 } else { 896 // Offset is symbolic. This is extremely rare. 897 uint64_t Offset = 898 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); 899 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 900 } 901 } 902 903 bool 904 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 905 int SPAdj, unsigned FIOperandNum, 906 RegScavenger *RS) const { 907 MachineInstr &MI = *II; 908 MachineBasicBlock &MBB = *MI.getParent(); 909 MachineFunction &MF = *MBB.getParent(); 910 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 911 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false 912 : isFuncletReturnInstr(*MBBI); 913 const X86FrameLowering *TFI = getFrameLowering(MF); 914 int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); 915 916 // Determine base register and offset. 917 int FIOffset; 918 Register BasePtr; 919 if (MI.isReturn()) { 920 assert((!hasStackRealignment(MF) || 921 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && 922 "Return instruction can only reference SP relative frame objects"); 923 FIOffset = 924 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed(); 925 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { 926 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); 927 } else { 928 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed(); 929 } 930 931 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the 932 // simple FP case, and doesn't work with stack realignment. On 32-bit, the 933 // offset is from the traditional base pointer location. On 64-bit, the 934 // offset is from the SP at the end of the prologue, not the FP location. This 935 // matches the behavior of llvm.frameaddress. 936 unsigned Opc = MI.getOpcode(); 937 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 938 MachineOperand &FI = MI.getOperand(FIOperandNum); 939 FI.ChangeToImmediate(FIOffset); 940 return false; 941 } 942 943 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit 944 // register as source operand, semantic is the same and destination is 945 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. 946 // Don't change BasePtr since it is used later for stack adjustment. 947 Register MachineBasePtr = BasePtr; 948 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) 949 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); 950 951 // This must be part of a four operand memory reference. Replace the 952 // FrameIndex with base register. Add an offset to the offset. 953 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); 954 955 if (BasePtr == StackPtr) 956 FIOffset += SPAdj; 957 958 // The frame index format for stackmaps and patchpoints is different from the 959 // X86 format. It only has a FI and an offset. 960 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 961 assert(BasePtr == FramePtr && "Expected the FP as base register"); 962 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 963 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 964 return false; 965 } 966 967 if (MI.getOperand(FIOperandNum+3).isImm()) { 968 // Offset is a 32-bit integer. 969 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 970 int Offset = FIOffset + Imm; 971 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 972 "Requesting 64-bit offset in 32-bit immediate!"); 973 if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) 974 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 975 } else { 976 // Offset is symbolic. This is extremely rare. 977 uint64_t Offset = FIOffset + 978 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); 979 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 980 } 981 return false; 982 } 983 984 unsigned X86RegisterInfo::findDeadCallerSavedReg( 985 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { 986 const MachineFunction *MF = MBB.getParent(); 987 if (MF->callsEHReturn()) 988 return 0; 989 990 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF); 991 992 if (MBBI == MBB.end()) 993 return 0; 994 995 switch (MBBI->getOpcode()) { 996 default: 997 return 0; 998 case TargetOpcode::PATCHABLE_RET: 999 case X86::RET: 1000 case X86::RET32: 1001 case X86::RET64: 1002 case X86::RETI32: 1003 case X86::RETI64: 1004 case X86::TCRETURNdi: 1005 case X86::TCRETURNri: 1006 case X86::TCRETURNmi: 1007 case X86::TCRETURNdi64: 1008 case X86::TCRETURNri64: 1009 case X86::TCRETURNmi64: 1010 case X86::EH_RETURN: 1011 case X86::EH_RETURN64: { 1012 SmallSet<uint16_t, 8> Uses; 1013 for (MachineOperand &MO : MBBI->operands()) { 1014 if (!MO.isReg() || MO.isDef()) 1015 continue; 1016 Register Reg = MO.getReg(); 1017 if (!Reg) 1018 continue; 1019 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) 1020 Uses.insert(*AI); 1021 } 1022 1023 for (auto CS : AvailableRegs) 1024 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) 1025 return CS; 1026 } 1027 } 1028 1029 return 0; 1030 } 1031 1032 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { 1033 const X86FrameLowering *TFI = getFrameLowering(MF); 1034 return TFI->hasFP(MF) ? FramePtr : StackPtr; 1035 } 1036 1037 unsigned 1038 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { 1039 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 1040 Register FrameReg = getFrameRegister(MF); 1041 if (Subtarget.isTarget64BitILP32()) 1042 FrameReg = getX86SubSuperRegister(FrameReg, 32); 1043 return FrameReg; 1044 } 1045 1046 unsigned 1047 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { 1048 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 1049 Register StackReg = getStackRegister(); 1050 if (Subtarget.isTarget64BitILP32()) 1051 StackReg = getX86SubSuperRegister(StackReg, 32); 1052 return StackReg; 1053 } 1054 1055 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, 1056 const MachineRegisterInfo *MRI) { 1057 if (VRM->hasShape(VirtReg)) 1058 return VRM->getShape(VirtReg); 1059 1060 const MachineOperand &Def = *MRI->def_begin(VirtReg); 1061 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); 1062 unsigned OpCode = MI->getOpcode(); 1063 switch (OpCode) { 1064 default: 1065 llvm_unreachable("Unexpected machine instruction on tile register!"); 1066 break; 1067 case X86::COPY: { 1068 Register SrcReg = MI->getOperand(1).getReg(); 1069 ShapeT Shape = getTileShape(SrcReg, VRM, MRI); 1070 VRM->assignVirt2Shape(VirtReg, Shape); 1071 return Shape; 1072 } 1073 // We only collect the tile shape that is defined. 1074 case X86::PTILELOADDV: 1075 case X86::PTILELOADDT1V: 1076 case X86::PTDPBSSDV: 1077 case X86::PTDPBSUDV: 1078 case X86::PTDPBUSDV: 1079 case X86::PTDPBUUDV: 1080 case X86::PTILEZEROV: 1081 case X86::PTDPBF16PSV: 1082 case X86::PTDPFP16PSV: 1083 case X86::PTCMMIMFP16PSV: 1084 case X86::PTCMMRLFP16PSV: 1085 case X86::PTTRANSPOSEDV: 1086 case X86::PTTDPBF16PSV: 1087 case X86::PTTDPFP16PSV: 1088 case X86::PTTCMMIMFP16PSV: 1089 case X86::PTTCMMRLFP16PSV: 1090 case X86::PTCONJTCMMIMFP16PSV: 1091 case X86::PTCONJTFP16V: 1092 case X86::PTILELOADDRSV: 1093 case X86::PTILELOADDRST1V: 1094 case X86::PTMMULTF32PSV: 1095 case X86::PTTMMULTF32PSV: 1096 case X86::PTDPBF8PSV: 1097 case X86::PTDPBHF8PSV: 1098 case X86::PTDPHBF8PSV: 1099 case X86::PTDPHF8PSV: { 1100 MachineOperand &MO1 = MI->getOperand(1); 1101 MachineOperand &MO2 = MI->getOperand(2); 1102 ShapeT Shape(&MO1, &MO2, MRI); 1103 VRM->assignVirt2Shape(VirtReg, Shape); 1104 return Shape; 1105 } 1106 case X86::PT2RPNTLVWZ0V: 1107 case X86::PT2RPNTLVWZ0T1V: 1108 case X86::PT2RPNTLVWZ1V: 1109 case X86::PT2RPNTLVWZ1T1V: 1110 case X86::PT2RPNTLVWZ0RSV: 1111 case X86::PT2RPNTLVWZ0RST1V: 1112 case X86::PT2RPNTLVWZ1RSV: 1113 case X86::PT2RPNTLVWZ1RST1V: { 1114 MachineOperand &MO1 = MI->getOperand(1); 1115 MachineOperand &MO2 = MI->getOperand(2); 1116 MachineOperand &MO3 = MI->getOperand(3); 1117 ShapeT Shape({&MO1, &MO2, &MO1, &MO3}, MRI); 1118 VRM->assignVirt2Shape(VirtReg, Shape); 1119 return Shape; 1120 } 1121 } 1122 } 1123 1124 static bool canHintShape(ShapeT &PhysShape, ShapeT &VirtShape) { 1125 unsigned PhysShapeNum = PhysShape.getShapeNum(); 1126 unsigned VirtShapeNum = VirtShape.getShapeNum(); 1127 1128 if (PhysShapeNum < VirtShapeNum) 1129 return false; 1130 1131 if (PhysShapeNum == VirtShapeNum) { 1132 if (PhysShapeNum == 1) 1133 return PhysShape == VirtShape; 1134 1135 for (unsigned I = 0; I < PhysShapeNum; I++) { 1136 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I)); 1137 ShapeT VShape(VirtShape.getRow(I), VirtShape.getCol(I)); 1138 if (VShape != PShape) 1139 return false; 1140 } 1141 return true; 1142 } 1143 1144 // Hint subreg of mult-tile reg to single tile reg. 1145 if (VirtShapeNum == 1) { 1146 for (unsigned I = 0; I < PhysShapeNum; I++) { 1147 ShapeT PShape(PhysShape.getRow(I), PhysShape.getCol(I)); 1148 if (VirtShape == PShape) 1149 return true; 1150 } 1151 } 1152 1153 // Note: Currently we have no requirement for case of 1154 // (VirtShapeNum > 1 and PhysShapeNum > VirtShapeNum) 1155 return false; 1156 } 1157 1158 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, 1159 ArrayRef<MCPhysReg> Order, 1160 SmallVectorImpl<MCPhysReg> &Hints, 1161 const MachineFunction &MF, 1162 const VirtRegMap *VRM, 1163 const LiveRegMatrix *Matrix) const { 1164 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1165 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 1166 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( 1167 VirtReg, Order, Hints, MF, VRM, Matrix); 1168 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 1169 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 1170 1171 unsigned ID = RC.getID(); 1172 1173 if (!VRM) 1174 return BaseImplRetVal; 1175 1176 if (ID != X86::TILERegClassID && ID != X86::TILEPAIRRegClassID) { 1177 if (DisableRegAllocNDDHints || !ST.hasNDD() || 1178 !TRI.isGeneralPurposeRegisterClass(&RC)) 1179 return BaseImplRetVal; 1180 1181 // Add any two address hints after any copy hints. 1182 SmallSet<unsigned, 4> TwoAddrHints; 1183 1184 auto TryAddNDDHint = [&](const MachineOperand &MO) { 1185 Register Reg = MO.getReg(); 1186 Register PhysReg = Reg.isPhysical() ? Reg : Register(VRM->getPhys(Reg)); 1187 if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) 1188 TwoAddrHints.insert(PhysReg); 1189 }; 1190 1191 // NDD instructions is compressible when Op0 is allocated to the same 1192 // physic register as Op1 (or Op2 if it's commutable). 1193 for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) { 1194 const MachineInstr &MI = *MO.getParent(); 1195 if (!X86::getNonNDVariant(MI.getOpcode())) 1196 continue; 1197 unsigned OpIdx = MI.getOperandNo(&MO); 1198 if (OpIdx == 0) { 1199 assert(MI.getOperand(1).isReg()); 1200 TryAddNDDHint(MI.getOperand(1)); 1201 if (MI.isCommutable()) { 1202 assert(MI.getOperand(2).isReg()); 1203 TryAddNDDHint(MI.getOperand(2)); 1204 } 1205 } else if (OpIdx == 1) { 1206 TryAddNDDHint(MI.getOperand(0)); 1207 } else if (MI.isCommutable() && OpIdx == 2) { 1208 TryAddNDDHint(MI.getOperand(0)); 1209 } 1210 } 1211 1212 for (MCPhysReg OrderReg : Order) 1213 if (TwoAddrHints.count(OrderReg)) 1214 Hints.push_back(OrderReg); 1215 1216 return BaseImplRetVal; 1217 } 1218 1219 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI); 1220 auto AddHint = [&](MCPhysReg PhysReg) { 1221 Register VReg = Matrix->getOneVReg(PhysReg); 1222 if (VReg == MCRegister::NoRegister) { // Not allocated yet 1223 Hints.push_back(PhysReg); 1224 return; 1225 } 1226 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI); 1227 if (canHintShape(PhysShape, VirtShape)) 1228 Hints.push_back(PhysReg); 1229 }; 1230 1231 SmallSet<MCPhysReg, 4> CopyHints; 1232 CopyHints.insert(Hints.begin(), Hints.end()); 1233 Hints.clear(); 1234 for (auto Hint : CopyHints) { 1235 if (RC.contains(Hint) && !MRI->isReserved(Hint)) 1236 AddHint(Hint); 1237 } 1238 for (MCPhysReg PhysReg : Order) { 1239 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) && 1240 !MRI->isReserved(PhysReg)) 1241 AddHint(PhysReg); 1242 } 1243 1244 #define DEBUG_TYPE "tile-hint" 1245 LLVM_DEBUG({ 1246 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n"; 1247 for (auto Hint : Hints) { 1248 dbgs() << "tmm" << Hint << ","; 1249 } 1250 dbgs() << "\n"; 1251 }); 1252 #undef DEBUG_TYPE 1253 1254 return true; 1255 } 1256