1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 implementation of the TargetRegisterInfo class. 10 // This file is responsible for the frame pointer elimination optimization 11 // on X86. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86RegisterInfo.h" 16 #include "X86FrameLowering.h" 17 #include "X86MachineFunctionInfo.h" 18 #include "X86Subtarget.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/CodeGen/LiveRegMatrix.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/TargetFrameLowering.h" 28 #include "llvm/CodeGen/TargetInstrInfo.h" 29 #include "llvm/CodeGen/TileShapeInfo.h" 30 #include "llvm/CodeGen/VirtRegMap.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetOptions.h" 38 39 using namespace llvm; 40 41 #define GET_REGINFO_TARGET_DESC 42 #include "X86GenRegisterInfo.inc" 43 44 static cl::opt<bool> 45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), 46 cl::desc("Enable use of a base pointer for complex stack frames")); 47 48 X86RegisterInfo::X86RegisterInfo(const Triple &TT) 49 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP), 50 X86_MC::getDwarfRegFlavour(TT, false), 51 X86_MC::getDwarfRegFlavour(TT, true), 52 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) { 53 X86_MC::initLLVMToSEHAndCVRegMapping(this); 54 55 // Cache some information. 56 Is64Bit = TT.isArch64Bit(); 57 IsWin64 = Is64Bit && TT.isOSWindows(); 58 59 // Use a callee-saved register as the base pointer. These registers must 60 // not conflict with any ABI requirements. For example, in 32-bit mode PIC 61 // requires GOT in the EBX register before function calls via PLT GOT pointer. 62 if (Is64Bit) { 63 SlotSize = 8; 64 // This matches the simplified 32-bit pointer code in the data layout 65 // computation. 66 // FIXME: Should use the data layout? 67 bool Use64BitReg = !TT.isX32(); 68 StackPtr = Use64BitReg ? X86::RSP : X86::ESP; 69 FramePtr = Use64BitReg ? X86::RBP : X86::EBP; 70 BasePtr = Use64BitReg ? X86::RBX : X86::EBX; 71 } else { 72 SlotSize = 4; 73 StackPtr = X86::ESP; 74 FramePtr = X86::EBP; 75 BasePtr = X86::ESI; 76 } 77 } 78 79 int 80 X86RegisterInfo::getSEHRegNum(unsigned i) const { 81 return getEncodingValue(i); 82 } 83 84 const TargetRegisterClass * 85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, 86 unsigned Idx) const { 87 // The sub_8bit sub-register index is more constrained in 32-bit mode. 88 // It behaves just like the sub_8bit_hi index. 89 if (!Is64Bit && Idx == X86::sub_8bit) 90 Idx = X86::sub_8bit_hi; 91 92 // Forward to TableGen's default version. 93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx); 94 } 95 96 const TargetRegisterClass * 97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, 98 const TargetRegisterClass *B, 99 unsigned SubIdx) const { 100 // The sub_8bit sub-register index is more constrained in 32-bit mode. 101 if (!Is64Bit && SubIdx == X86::sub_8bit) { 102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); 103 if (!A) 104 return nullptr; 105 } 106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); 107 } 108 109 const TargetRegisterClass * 110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, 111 const MachineFunction &MF) const { 112 // Don't allow super-classes of GR8_NOREX. This class is only used after 113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied 114 // to the full GR8 register class in 64-bit mode, so we cannot allow the 115 // reigster class inflation. 116 // 117 // The GR8_NOREX class is always used in a way that won't be constrained to a 118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the 119 // full GR8 class. 120 if (RC == &X86::GR8_NOREXRegClass) 121 return RC; 122 123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 124 125 const TargetRegisterClass *Super = RC; 126 TargetRegisterClass::sc_iterator I = RC->getSuperClasses(); 127 do { 128 switch (Super->getID()) { 129 case X86::FR32RegClassID: 130 case X86::FR64RegClassID: 131 // If AVX-512 isn't supported we should only inflate to these classes. 132 if (!Subtarget.hasAVX512() && 133 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 134 return Super; 135 break; 136 case X86::VR128RegClassID: 137 case X86::VR256RegClassID: 138 // If VLX isn't supported we should only inflate to these classes. 139 if (!Subtarget.hasVLX() && 140 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 141 return Super; 142 break; 143 case X86::VR128XRegClassID: 144 case X86::VR256XRegClassID: 145 // If VLX isn't support we shouldn't inflate to these classes. 146 if (Subtarget.hasVLX() && 147 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 148 return Super; 149 break; 150 case X86::FR32XRegClassID: 151 case X86::FR64XRegClassID: 152 // If AVX-512 isn't support we shouldn't inflate to these classes. 153 if (Subtarget.hasAVX512() && 154 getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 155 return Super; 156 break; 157 case X86::GR8RegClassID: 158 case X86::GR16RegClassID: 159 case X86::GR32RegClassID: 160 case X86::GR64RegClassID: 161 case X86::GR8_NOREX2RegClassID: 162 case X86::GR16_NOREX2RegClassID: 163 case X86::GR32_NOREX2RegClassID: 164 case X86::GR64_NOREX2RegClassID: 165 case X86::RFP32RegClassID: 166 case X86::RFP64RegClassID: 167 case X86::RFP80RegClassID: 168 case X86::VR512_0_15RegClassID: 169 case X86::VR512RegClassID: 170 // Don't return a super-class that would shrink the spill size. 171 // That can happen with the vector and float classes. 172 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) 173 return Super; 174 } 175 Super = *I++; 176 } while (Super); 177 return RC; 178 } 179 180 const TargetRegisterClass * 181 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, 182 unsigned Kind) const { 183 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 184 switch (Kind) { 185 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); 186 case 0: // Normal GPRs. 187 if (Subtarget.isTarget64BitLP64()) 188 return &X86::GR64RegClass; 189 // If the target is 64bit but we have been told to use 32bit addresses, 190 // we can still use 64-bit register as long as we know the high bits 191 // are zeros. 192 // Reflect that in the returned register class. 193 if (Is64Bit) { 194 // When the target also allows 64-bit frame pointer and we do have a 195 // frame, this is fine to use it for the address accesses as well. 196 const X86FrameLowering *TFI = getFrameLowering(MF); 197 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr 198 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass 199 : &X86::LOW32_ADDR_ACCESSRegClass; 200 } 201 return &X86::GR32RegClass; 202 case 1: // Normal GPRs except the stack pointer (for encoding reasons). 203 if (Subtarget.isTarget64BitLP64()) 204 return &X86::GR64_NOSPRegClass; 205 // NOSP does not contain RIP, so no special case here. 206 return &X86::GR32_NOSPRegClass; 207 case 2: // NOREX GPRs. 208 if (Subtarget.isTarget64BitLP64()) 209 return &X86::GR64_NOREXRegClass; 210 return &X86::GR32_NOREXRegClass; 211 case 3: // NOREX GPRs except the stack pointer (for encoding reasons). 212 if (Subtarget.isTarget64BitLP64()) 213 return &X86::GR64_NOREX_NOSPRegClass; 214 // NOSP does not contain RIP, so no special case here. 215 return &X86::GR32_NOREX_NOSPRegClass; 216 case 4: // Available for tailcall (not callee-saved GPRs). 217 return getGPRsForTailCall(MF); 218 } 219 } 220 221 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, 222 unsigned DefSubReg, 223 const TargetRegisterClass *SrcRC, 224 unsigned SrcSubReg) const { 225 // Prevent rewriting a copy where the destination size is larger than the 226 // input size. See PR41619. 227 // FIXME: Should this be factored into the base implementation somehow. 228 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 && 229 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit) 230 return false; 231 232 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg, 233 SrcRC, SrcSubReg); 234 } 235 236 const TargetRegisterClass * 237 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { 238 const Function &F = MF.getFunction(); 239 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) 240 return &X86::GR64_TCW64RegClass; 241 else if (Is64Bit) 242 return &X86::GR64_TCRegClass; 243 244 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); 245 if (hasHipeCC) 246 return &X86::GR32RegClass; 247 return &X86::GR32_TCRegClass; 248 } 249 250 const TargetRegisterClass * 251 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 252 if (RC == &X86::CCRRegClass) { 253 if (Is64Bit) 254 return &X86::GR64RegClass; 255 else 256 return &X86::GR32RegClass; 257 } 258 return RC; 259 } 260 261 unsigned 262 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 263 MachineFunction &MF) const { 264 const X86FrameLowering *TFI = getFrameLowering(MF); 265 266 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; 267 switch (RC->getID()) { 268 default: 269 return 0; 270 case X86::GR32RegClassID: 271 return 4 - FPDiff; 272 case X86::GR64RegClassID: 273 return 12 - FPDiff; 274 case X86::VR128RegClassID: 275 return Is64Bit ? 10 : 4; 276 case X86::VR64RegClassID: 277 return 4; 278 } 279 } 280 281 const MCPhysReg * 282 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 283 assert(MF && "MachineFunction required"); 284 285 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>(); 286 const Function &F = MF->getFunction(); 287 bool HasSSE = Subtarget.hasSSE1(); 288 bool HasAVX = Subtarget.hasAVX(); 289 bool HasAVX512 = Subtarget.hasAVX512(); 290 bool CallsEHReturn = MF->callsEHReturn(); 291 292 CallingConv::ID CC = F.getCallingConv(); 293 294 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling 295 // convention because it has the CSR list. 296 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers")) 297 CC = CallingConv::X86_INTR; 298 299 // If atribute specified, override the CSRs normally specified by the 300 // calling convention and use the empty set instead. 301 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers")) 302 return CSR_NoRegs_SaveList; 303 304 switch (CC) { 305 case CallingConv::GHC: 306 case CallingConv::HiPE: 307 return CSR_NoRegs_SaveList; 308 case CallingConv::AnyReg: 309 if (HasAVX) 310 return CSR_64_AllRegs_AVX_SaveList; 311 return CSR_64_AllRegs_SaveList; 312 case CallingConv::PreserveMost: 313 return IsWin64 ? CSR_Win64_RT_MostRegs_SaveList 314 : CSR_64_RT_MostRegs_SaveList; 315 case CallingConv::PreserveAll: 316 if (HasAVX) 317 return CSR_64_RT_AllRegs_AVX_SaveList; 318 return CSR_64_RT_AllRegs_SaveList; 319 case CallingConv::PreserveNone: 320 return CSR_64_NoneRegs_SaveList; 321 case CallingConv::CXX_FAST_TLS: 322 if (Is64Bit) 323 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ? 324 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; 325 break; 326 case CallingConv::Intel_OCL_BI: { 327 if (HasAVX512 && IsWin64) 328 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; 329 if (HasAVX512 && Is64Bit) 330 return CSR_64_Intel_OCL_BI_AVX512_SaveList; 331 if (HasAVX && IsWin64) 332 return CSR_Win64_Intel_OCL_BI_AVX_SaveList; 333 if (HasAVX && Is64Bit) 334 return CSR_64_Intel_OCL_BI_AVX_SaveList; 335 if (!HasAVX && !IsWin64 && Is64Bit) 336 return CSR_64_Intel_OCL_BI_SaveList; 337 break; 338 } 339 case CallingConv::X86_RegCall: 340 if (Is64Bit) { 341 if (IsWin64) { 342 return (HasSSE ? CSR_Win64_RegCall_SaveList : 343 CSR_Win64_RegCall_NoSSE_SaveList); 344 } else { 345 return (HasSSE ? CSR_SysV64_RegCall_SaveList : 346 CSR_SysV64_RegCall_NoSSE_SaveList); 347 } 348 } else { 349 return (HasSSE ? CSR_32_RegCall_SaveList : 350 CSR_32_RegCall_NoSSE_SaveList); 351 } 352 case CallingConv::CFGuard_Check: 353 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 354 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList 355 : CSR_Win32_CFGuard_Check_NoSSE_SaveList); 356 case CallingConv::Cold: 357 if (Is64Bit) 358 return CSR_64_MostRegs_SaveList; 359 break; 360 case CallingConv::Win64: 361 if (!HasSSE) 362 return CSR_Win64_NoSSE_SaveList; 363 return CSR_Win64_SaveList; 364 case CallingConv::SwiftTail: 365 if (!Is64Bit) 366 return CSR_32_SaveList; 367 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList; 368 case CallingConv::X86_64_SysV: 369 if (CallsEHReturn) 370 return CSR_64EHRet_SaveList; 371 return CSR_64_SaveList; 372 case CallingConv::X86_INTR: 373 if (Is64Bit) { 374 if (HasAVX512) 375 return CSR_64_AllRegs_AVX512_SaveList; 376 if (HasAVX) 377 return CSR_64_AllRegs_AVX_SaveList; 378 if (HasSSE) 379 return CSR_64_AllRegs_SaveList; 380 return CSR_64_AllRegs_NoSSE_SaveList; 381 } else { 382 if (HasAVX512) 383 return CSR_32_AllRegs_AVX512_SaveList; 384 if (HasAVX) 385 return CSR_32_AllRegs_AVX_SaveList; 386 if (HasSSE) 387 return CSR_32_AllRegs_SSE_SaveList; 388 return CSR_32_AllRegs_SaveList; 389 } 390 default: 391 break; 392 } 393 394 if (Is64Bit) { 395 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 396 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 397 if (IsSwiftCC) 398 return IsWin64 ? CSR_Win64_SwiftError_SaveList 399 : CSR_64_SwiftError_SaveList; 400 401 if (IsWin64) 402 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList; 403 if (CallsEHReturn) 404 return CSR_64EHRet_SaveList; 405 return CSR_64_SaveList; 406 } 407 408 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList; 409 } 410 411 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( 412 const MachineFunction *MF) const { 413 assert(MF && "Invalid MachineFunction pointer."); 414 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 415 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR()) 416 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; 417 return nullptr; 418 } 419 420 const uint32_t * 421 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, 422 CallingConv::ID CC) const { 423 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 424 bool HasSSE = Subtarget.hasSSE1(); 425 bool HasAVX = Subtarget.hasAVX(); 426 bool HasAVX512 = Subtarget.hasAVX512(); 427 428 switch (CC) { 429 case CallingConv::GHC: 430 case CallingConv::HiPE: 431 return CSR_NoRegs_RegMask; 432 case CallingConv::AnyReg: 433 if (HasAVX) 434 return CSR_64_AllRegs_AVX_RegMask; 435 return CSR_64_AllRegs_RegMask; 436 case CallingConv::PreserveMost: 437 return IsWin64 ? CSR_Win64_RT_MostRegs_RegMask : CSR_64_RT_MostRegs_RegMask; 438 case CallingConv::PreserveAll: 439 if (HasAVX) 440 return CSR_64_RT_AllRegs_AVX_RegMask; 441 return CSR_64_RT_AllRegs_RegMask; 442 case CallingConv::PreserveNone: 443 return CSR_64_NoneRegs_RegMask; 444 case CallingConv::CXX_FAST_TLS: 445 if (Is64Bit) 446 return CSR_64_TLS_Darwin_RegMask; 447 break; 448 case CallingConv::Intel_OCL_BI: { 449 if (HasAVX512 && IsWin64) 450 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; 451 if (HasAVX512 && Is64Bit) 452 return CSR_64_Intel_OCL_BI_AVX512_RegMask; 453 if (HasAVX && IsWin64) 454 return CSR_Win64_Intel_OCL_BI_AVX_RegMask; 455 if (HasAVX && Is64Bit) 456 return CSR_64_Intel_OCL_BI_AVX_RegMask; 457 if (!HasAVX && !IsWin64 && Is64Bit) 458 return CSR_64_Intel_OCL_BI_RegMask; 459 break; 460 } 461 case CallingConv::X86_RegCall: 462 if (Is64Bit) { 463 if (IsWin64) { 464 return (HasSSE ? CSR_Win64_RegCall_RegMask : 465 CSR_Win64_RegCall_NoSSE_RegMask); 466 } else { 467 return (HasSSE ? CSR_SysV64_RegCall_RegMask : 468 CSR_SysV64_RegCall_NoSSE_RegMask); 469 } 470 } else { 471 return (HasSSE ? CSR_32_RegCall_RegMask : 472 CSR_32_RegCall_NoSSE_RegMask); 473 } 474 case CallingConv::CFGuard_Check: 475 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86"); 476 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask 477 : CSR_Win32_CFGuard_Check_NoSSE_RegMask); 478 case CallingConv::Cold: 479 if (Is64Bit) 480 return CSR_64_MostRegs_RegMask; 481 break; 482 case CallingConv::Win64: 483 return CSR_Win64_RegMask; 484 case CallingConv::SwiftTail: 485 if (!Is64Bit) 486 return CSR_32_RegMask; 487 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask; 488 case CallingConv::X86_64_SysV: 489 return CSR_64_RegMask; 490 case CallingConv::X86_INTR: 491 if (Is64Bit) { 492 if (HasAVX512) 493 return CSR_64_AllRegs_AVX512_RegMask; 494 if (HasAVX) 495 return CSR_64_AllRegs_AVX_RegMask; 496 if (HasSSE) 497 return CSR_64_AllRegs_RegMask; 498 return CSR_64_AllRegs_NoSSE_RegMask; 499 } else { 500 if (HasAVX512) 501 return CSR_32_AllRegs_AVX512_RegMask; 502 if (HasAVX) 503 return CSR_32_AllRegs_AVX_RegMask; 504 if (HasSSE) 505 return CSR_32_AllRegs_SSE_RegMask; 506 return CSR_32_AllRegs_RegMask; 507 } 508 default: 509 break; 510 } 511 512 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check 513 // callsEHReturn(). 514 if (Is64Bit) { 515 const Function &F = MF.getFunction(); 516 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && 517 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); 518 if (IsSwiftCC) 519 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; 520 521 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; 522 } 523 524 return CSR_32_RegMask; 525 } 526 527 const uint32_t* 528 X86RegisterInfo::getNoPreservedMask() const { 529 return CSR_NoRegs_RegMask; 530 } 531 532 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const { 533 return CSR_64_TLS_Darwin_RegMask; 534 } 535 536 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 537 BitVector Reserved(getNumRegs()); 538 const X86FrameLowering *TFI = getFrameLowering(MF); 539 540 // Set the floating point control register as reserved. 541 Reserved.set(X86::FPCW); 542 543 // Set the floating point status register as reserved. 544 Reserved.set(X86::FPSW); 545 546 // Set the SIMD floating point control register as reserved. 547 Reserved.set(X86::MXCSR); 548 549 // Set the stack-pointer register and its aliases as reserved. 550 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP)) 551 Reserved.set(SubReg); 552 553 // Set the Shadow Stack Pointer as reserved. 554 Reserved.set(X86::SSP); 555 556 // Set the instruction pointer register and its aliases as reserved. 557 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP)) 558 Reserved.set(SubReg); 559 560 // Set the frame-pointer register and its aliases as reserved if needed. 561 if (TFI->hasFP(MF)) { 562 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP)) 563 Reserved.set(SubReg); 564 } 565 566 // Set the base-pointer register and its aliases as reserved if needed. 567 if (hasBasePointer(MF)) { 568 CallingConv::ID CC = MF.getFunction().getCallingConv(); 569 const uint32_t *RegMask = getCallPreservedMask(MF, CC); 570 if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) 571 report_fatal_error( 572 "Stack realignment in presence of dynamic allocas is not supported with" 573 "this calling convention."); 574 575 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64); 576 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr)) 577 Reserved.set(SubReg); 578 } 579 580 // Mark the segment registers as reserved. 581 Reserved.set(X86::CS); 582 Reserved.set(X86::SS); 583 Reserved.set(X86::DS); 584 Reserved.set(X86::ES); 585 Reserved.set(X86::FS); 586 Reserved.set(X86::GS); 587 588 // Mark the floating point stack registers as reserved. 589 for (unsigned n = 0; n != 8; ++n) 590 Reserved.set(X86::ST0 + n); 591 592 // Reserve the registers that only exist in 64-bit mode. 593 if (!Is64Bit) { 594 // These 8-bit registers are part of the x86-64 extension even though their 595 // super-registers are old 32-bits. 596 Reserved.set(X86::SIL); 597 Reserved.set(X86::DIL); 598 Reserved.set(X86::BPL); 599 Reserved.set(X86::SPL); 600 Reserved.set(X86::SIH); 601 Reserved.set(X86::DIH); 602 Reserved.set(X86::BPH); 603 Reserved.set(X86::SPH); 604 605 for (unsigned n = 0; n != 8; ++n) { 606 // R8, R9, ... 607 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI) 608 Reserved.set(*AI); 609 610 // XMM8, XMM9, ... 611 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) 612 Reserved.set(*AI); 613 } 614 } 615 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) { 616 for (unsigned n = 0; n != 16; ++n) { 617 for (MCRegAliasIterator AI(X86::XMM16 + n, this, true); AI.isValid(); 618 ++AI) 619 Reserved.set(*AI); 620 } 621 } 622 623 // Reserve the extended general purpose registers. 624 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) 625 Reserved.set(X86::R16, X86::R31WH + 1); 626 627 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { 628 for (MCRegAliasIterator AI(X86::R14, this, true); AI.isValid(); ++AI) 629 Reserved.set(*AI); 630 for (MCRegAliasIterator AI(X86::R15, this, true); AI.isValid(); ++AI) 631 Reserved.set(*AI); 632 } 633 634 assert(checkAllSuperRegsMarked(Reserved, 635 {X86::SIL, X86::DIL, X86::BPL, X86::SPL, 636 X86::SIH, X86::DIH, X86::BPH, X86::SPH})); 637 return Reserved; 638 } 639 640 unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const { 641 // All existing Intel CPUs that support AMX support AVX512 and all existing 642 // Intel CPUs that support APX support AMX. AVX512 implies AVX. 643 // 644 // We enumerate the registers in X86GenRegisterInfo.inc in this order: 645 // 646 // Registers before AVX512, 647 // AVX512 registers (X/YMM16-31, ZMM0-31, K registers) 648 // AMX registers (TMM) 649 // APX registers (R16-R31) 650 // 651 // and try to return the minimum number of registers supported by the target. 652 static_assert((X86::R15WH + 1 == X86::YMM0) && (X86::YMM15 + 1 == X86::K0) && 653 (X86::K6_K7 + 1 == X86::TMMCFG) && 654 (X86::TMM7 + 1 == X86::R16) && 655 (X86::R31WH + 1 == X86::NUM_TARGET_REGS), 656 "Register number may be incorrect"); 657 658 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 659 if (ST.hasEGPR()) 660 return X86::NUM_TARGET_REGS; 661 if (ST.hasAMXTILE()) 662 return X86::TMM7 + 1; 663 if (ST.hasAVX512()) 664 return X86::K6_K7 + 1; 665 if (ST.hasAVX()) 666 return X86::YMM15 + 1; 667 return X86::R15WH + 1; 668 } 669 670 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, 671 MCRegister Reg) const { 672 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 673 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 674 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { 675 return TRI.isSuperOrSubRegisterEq(RegA, RegB); 676 }; 677 678 if (!ST.is64Bit()) 679 return llvm::any_of( 680 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX}, 681 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || 682 (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); 683 684 CallingConv::ID CC = MF.getFunction().getCallingConv(); 685 686 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) 687 return true; 688 689 if (llvm::any_of( 690 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9}, 691 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 692 return true; 693 694 if (CC != CallingConv::Win64 && 695 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI}, 696 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 697 return true; 698 699 if (ST.hasSSE1() && 700 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2, 701 X86::XMM3, X86::XMM4, X86::XMM5, 702 X86::XMM6, X86::XMM7}, 703 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) 704 return true; 705 706 return X86GenRegisterInfo::isArgumentRegister(MF, Reg); 707 } 708 709 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, 710 MCRegister PhysReg) const { 711 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 712 const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); 713 714 // Stack pointer. 715 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) 716 return true; 717 718 // Don't use the frame pointer if it's being used. 719 const X86FrameLowering &TFI = *getFrameLowering(MF); 720 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) 721 return true; 722 723 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); 724 } 725 726 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const { 727 return RC->getID() == X86::TILERegClassID; 728 } 729 730 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { 731 // Check if the EFLAGS register is marked as live-out. This shouldn't happen, 732 // because the calling convention defines the EFLAGS register as NOT 733 // preserved. 734 // 735 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding 736 // an assert to track this and clear the register afterwards to avoid 737 // unnecessary crashes during release builds. 738 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) && 739 "EFLAGS are not live-out from a patchpoint."); 740 741 // Also clean other registers that don't need preserving (IP). 742 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP}) 743 Mask[Reg / 32] &= ~(1U << (Reg % 32)); 744 } 745 746 //===----------------------------------------------------------------------===// 747 // Stack Frame Processing methods 748 //===----------------------------------------------------------------------===// 749 750 static bool CantUseSP(const MachineFrameInfo &MFI) { 751 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment(); 752 } 753 754 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { 755 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 756 // We have a virtual register to reference argument, and don't need base 757 // pointer. 758 if (X86FI->getStackPtrSaveMI() != nullptr) 759 return false; 760 761 if (X86FI->hasPreallocatedCall()) 762 return true; 763 764 const MachineFrameInfo &MFI = MF.getFrameInfo(); 765 766 if (!EnableBasePointer) 767 return false; 768 769 // When we need stack realignment, we can't address the stack from the frame 770 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we 771 // can't address variables from the stack pointer. MS inline asm can 772 // reference locals while also adjusting the stack pointer. When we can't 773 // use both the SP and the FP, we need a separate base pointer register. 774 bool CantUseFP = hasStackRealignment(MF); 775 return CantUseFP && CantUseSP(MFI); 776 } 777 778 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { 779 if (!TargetRegisterInfo::canRealignStack(MF)) 780 return false; 781 782 const MachineFrameInfo &MFI = MF.getFrameInfo(); 783 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 784 785 // Stack realignment requires a frame pointer. If we already started 786 // register allocation with frame pointer elimination, it is too late now. 787 if (!MRI->canReserveReg(FramePtr)) 788 return false; 789 790 // If a base pointer is necessary. Check that it isn't too late to reserve 791 // it. 792 if (CantUseSP(MFI)) 793 return MRI->canReserveReg(BasePtr); 794 return true; 795 } 796 797 bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 798 if (TargetRegisterInfo::shouldRealignStack(MF)) 799 return true; 800 801 return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR; 802 } 803 804 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction 805 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'. 806 // TODO: In this case we should be really trying first to entirely eliminate 807 // this instruction which is a plain copy. 808 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) { 809 MachineInstr &MI = *II; 810 unsigned Opc = II->getOpcode(); 811 // Check if this is a LEA of the form 'lea (%esp), %ebx' 812 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) || 813 MI.getOperand(2).getImm() != 1 || 814 MI.getOperand(3).getReg() != X86::NoRegister || 815 MI.getOperand(4).getImm() != 0 || 816 MI.getOperand(5).getReg() != X86::NoRegister) 817 return false; 818 Register BasePtr = MI.getOperand(1).getReg(); 819 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will 820 // be replaced with a 32-bit operand MOV which will zero extend the upper 821 // 32-bits of the super register. 822 if (Opc == X86::LEA64_32r) 823 BasePtr = getX86SubSuperRegister(BasePtr, 32); 824 Register NewDestReg = MI.getOperand(0).getReg(); 825 const X86InstrInfo *TII = 826 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo(); 827 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr, 828 MI.getOperand(1).isKill()); 829 MI.eraseFromParent(); 830 return true; 831 } 832 833 static bool isFuncletReturnInstr(MachineInstr &MI) { 834 switch (MI.getOpcode()) { 835 case X86::CATCHRET: 836 case X86::CLEANUPRET: 837 return true; 838 default: 839 return false; 840 } 841 llvm_unreachable("impossible"); 842 } 843 844 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 845 unsigned FIOperandNum, 846 Register BaseReg, 847 int FIOffset) const { 848 MachineInstr &MI = *II; 849 unsigned Opc = MI.getOpcode(); 850 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 851 MachineOperand &FI = MI.getOperand(FIOperandNum); 852 FI.ChangeToImmediate(FIOffset); 853 return; 854 } 855 856 MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); 857 858 // The frame index format for stackmaps and patchpoints is different from the 859 // X86 format. It only has a FI and an offset. 860 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 861 assert(BasePtr == FramePtr && "Expected the FP as base register"); 862 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 863 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 864 return; 865 } 866 867 if (MI.getOperand(FIOperandNum + 3).isImm()) { 868 // Offset is a 32-bit integer. 869 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 870 int Offset = FIOffset + Imm; 871 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 872 "Requesting 64-bit offset in 32-bit immediate!"); 873 if (Offset != 0) 874 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 875 } else { 876 // Offset is symbolic. This is extremely rare. 877 uint64_t Offset = 878 FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); 879 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 880 } 881 } 882 883 bool 884 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 885 int SPAdj, unsigned FIOperandNum, 886 RegScavenger *RS) const { 887 MachineInstr &MI = *II; 888 MachineBasicBlock &MBB = *MI.getParent(); 889 MachineFunction &MF = *MBB.getParent(); 890 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 891 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false 892 : isFuncletReturnInstr(*MBBI); 893 const X86FrameLowering *TFI = getFrameLowering(MF); 894 int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); 895 896 // Determine base register and offset. 897 int FIOffset; 898 Register BasePtr; 899 if (MI.isReturn()) { 900 assert((!hasStackRealignment(MF) || 901 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && 902 "Return instruction can only reference SP relative frame objects"); 903 FIOffset = 904 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed(); 905 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) { 906 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr); 907 } else { 908 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed(); 909 } 910 911 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the 912 // simple FP case, and doesn't work with stack realignment. On 32-bit, the 913 // offset is from the traditional base pointer location. On 64-bit, the 914 // offset is from the SP at the end of the prologue, not the FP location. This 915 // matches the behavior of llvm.frameaddress. 916 unsigned Opc = MI.getOpcode(); 917 if (Opc == TargetOpcode::LOCAL_ESCAPE) { 918 MachineOperand &FI = MI.getOperand(FIOperandNum); 919 FI.ChangeToImmediate(FIOffset); 920 return false; 921 } 922 923 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit 924 // register as source operand, semantic is the same and destination is 925 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided. 926 // Don't change BasePtr since it is used later for stack adjustment. 927 Register MachineBasePtr = BasePtr; 928 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr)) 929 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64); 930 931 // This must be part of a four operand memory reference. Replace the 932 // FrameIndex with base register. Add an offset to the offset. 933 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); 934 935 if (BasePtr == StackPtr) 936 FIOffset += SPAdj; 937 938 // The frame index format for stackmaps and patchpoints is different from the 939 // X86 format. It only has a FI and an offset. 940 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { 941 assert(BasePtr == FramePtr && "Expected the FP as base register"); 942 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; 943 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); 944 return false; 945 } 946 947 if (MI.getOperand(FIOperandNum+3).isImm()) { 948 // Offset is a 32-bit integer. 949 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); 950 int Offset = FIOffset + Imm; 951 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && 952 "Requesting 64-bit offset in 32-bit immediate!"); 953 if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) 954 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); 955 } else { 956 // Offset is symbolic. This is extremely rare. 957 uint64_t Offset = FIOffset + 958 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset(); 959 MI.getOperand(FIOperandNum + 3).setOffset(Offset); 960 } 961 return false; 962 } 963 964 unsigned X86RegisterInfo::findDeadCallerSavedReg( 965 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const { 966 const MachineFunction *MF = MBB.getParent(); 967 if (MF->callsEHReturn()) 968 return 0; 969 970 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF); 971 972 if (MBBI == MBB.end()) 973 return 0; 974 975 switch (MBBI->getOpcode()) { 976 default: 977 return 0; 978 case TargetOpcode::PATCHABLE_RET: 979 case X86::RET: 980 case X86::RET32: 981 case X86::RET64: 982 case X86::RETI32: 983 case X86::RETI64: 984 case X86::TCRETURNdi: 985 case X86::TCRETURNri: 986 case X86::TCRETURNmi: 987 case X86::TCRETURNdi64: 988 case X86::TCRETURNri64: 989 case X86::TCRETURNmi64: 990 case X86::EH_RETURN: 991 case X86::EH_RETURN64: { 992 SmallSet<uint16_t, 8> Uses; 993 for (MachineOperand &MO : MBBI->operands()) { 994 if (!MO.isReg() || MO.isDef()) 995 continue; 996 Register Reg = MO.getReg(); 997 if (!Reg) 998 continue; 999 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI) 1000 Uses.insert(*AI); 1001 } 1002 1003 for (auto CS : AvailableRegs) 1004 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP) 1005 return CS; 1006 } 1007 } 1008 1009 return 0; 1010 } 1011 1012 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { 1013 const X86FrameLowering *TFI = getFrameLowering(MF); 1014 return TFI->hasFP(MF) ? FramePtr : StackPtr; 1015 } 1016 1017 unsigned 1018 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const { 1019 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 1020 Register FrameReg = getFrameRegister(MF); 1021 if (Subtarget.isTarget64BitILP32()) 1022 FrameReg = getX86SubSuperRegister(FrameReg, 32); 1023 return FrameReg; 1024 } 1025 1026 unsigned 1027 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const { 1028 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>(); 1029 Register StackReg = getStackRegister(); 1030 if (Subtarget.isTarget64BitILP32()) 1031 StackReg = getX86SubSuperRegister(StackReg, 32); 1032 return StackReg; 1033 } 1034 1035 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM, 1036 const MachineRegisterInfo *MRI) { 1037 if (VRM->hasShape(VirtReg)) 1038 return VRM->getShape(VirtReg); 1039 1040 const MachineOperand &Def = *MRI->def_begin(VirtReg); 1041 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent()); 1042 unsigned OpCode = MI->getOpcode(); 1043 switch (OpCode) { 1044 default: 1045 llvm_unreachable("Unexpected machine instruction on tile register!"); 1046 break; 1047 case X86::COPY: { 1048 Register SrcReg = MI->getOperand(1).getReg(); 1049 ShapeT Shape = getTileShape(SrcReg, VRM, MRI); 1050 VRM->assignVirt2Shape(VirtReg, Shape); 1051 return Shape; 1052 } 1053 // We only collect the tile shape that is defined. 1054 case X86::PTILELOADDV: 1055 case X86::PTILELOADDT1V: 1056 case X86::PTDPBSSDV: 1057 case X86::PTDPBSUDV: 1058 case X86::PTDPBUSDV: 1059 case X86::PTDPBUUDV: 1060 case X86::PTILEZEROV: 1061 case X86::PTDPBF16PSV: 1062 case X86::PTDPFP16PSV: 1063 case X86::PTCMMIMFP16PSV: 1064 case X86::PTCMMRLFP16PSV: 1065 MachineOperand &MO1 = MI->getOperand(1); 1066 MachineOperand &MO2 = MI->getOperand(2); 1067 ShapeT Shape(&MO1, &MO2, MRI); 1068 VRM->assignVirt2Shape(VirtReg, Shape); 1069 return Shape; 1070 } 1071 } 1072 1073 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg, 1074 ArrayRef<MCPhysReg> Order, 1075 SmallVectorImpl<MCPhysReg> &Hints, 1076 const MachineFunction &MF, 1077 const VirtRegMap *VRM, 1078 const LiveRegMatrix *Matrix) const { 1079 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1080 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 1081 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( 1082 VirtReg, Order, Hints, MF, VRM, Matrix); 1083 1084 unsigned ID = RC.getID(); 1085 if (ID != X86::TILERegClassID) 1086 return BaseImplRetVal; 1087 1088 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI); 1089 auto AddHint = [&](MCPhysReg PhysReg) { 1090 Register VReg = Matrix->getOneVReg(PhysReg); 1091 if (VReg == MCRegister::NoRegister) { // Not allocated yet 1092 Hints.push_back(PhysReg); 1093 return; 1094 } 1095 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI); 1096 if (PhysShape == VirtShape) 1097 Hints.push_back(PhysReg); 1098 }; 1099 1100 SmallSet<MCPhysReg, 4> CopyHints; 1101 CopyHints.insert(Hints.begin(), Hints.end()); 1102 Hints.clear(); 1103 for (auto Hint : CopyHints) { 1104 if (RC.contains(Hint) && !MRI->isReserved(Hint)) 1105 AddHint(Hint); 1106 } 1107 for (MCPhysReg PhysReg : Order) { 1108 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) && 1109 !MRI->isReserved(PhysReg)) 1110 AddHint(PhysReg); 1111 } 1112 1113 #define DEBUG_TYPE "tile-hint" 1114 LLVM_DEBUG({ 1115 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n"; 1116 for (auto Hint : Hints) { 1117 dbgs() << "tmm" << Hint << ","; 1118 } 1119 dbgs() << "\n"; 1120 }); 1121 #undef DEBUG_TYPE 1122 1123 return true; 1124 } 1125