1 //===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the X86 implementation of the TargetRegisterInfo class.
10 // This file is responsible for the frame pointer elimination optimization
11 // on X86.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86RegisterInfo.h"
16 #include "X86FrameLowering.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/CodeGen/LiveRegMatrix.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetFrameLowering.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TileShapeInfo.h"
30 #include "llvm/CodeGen/VirtRegMap.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38
39 using namespace llvm;
40
41 #define GET_REGINFO_TARGET_DESC
42 #include "X86GenRegisterInfo.inc"
43
44 static cl::opt<bool>
45 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
46 cl::desc("Enable use of a base pointer for complex stack frames"));
47
X86RegisterInfo(const Triple & TT)48 X86RegisterInfo::X86RegisterInfo(const Triple &TT)
49 : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
50 X86_MC::getDwarfRegFlavour(TT, false),
51 X86_MC::getDwarfRegFlavour(TT, true),
52 (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
53 X86_MC::initLLVMToSEHAndCVRegMapping(this);
54
55 // Cache some information.
56 Is64Bit = TT.isArch64Bit();
57 IsWin64 = Is64Bit && TT.isOSWindows();
58
59 // Use a callee-saved register as the base pointer. These registers must
60 // not conflict with any ABI requirements. For example, in 32-bit mode PIC
61 // requires GOT in the EBX register before function calls via PLT GOT pointer.
62 if (Is64Bit) {
63 SlotSize = 8;
64 // This matches the simplified 32-bit pointer code in the data layout
65 // computation.
66 // FIXME: Should use the data layout?
67 bool Use64BitReg = !TT.isX32();
68 StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
69 FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
70 BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
71 } else {
72 SlotSize = 4;
73 StackPtr = X86::ESP;
74 FramePtr = X86::EBP;
75 BasePtr = X86::ESI;
76 }
77 }
78
79 int
getSEHRegNum(unsigned i) const80 X86RegisterInfo::getSEHRegNum(unsigned i) const {
81 return getEncodingValue(i);
82 }
83
84 const TargetRegisterClass *
getSubClassWithSubReg(const TargetRegisterClass * RC,unsigned Idx) const85 X86RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC,
86 unsigned Idx) const {
87 // The sub_8bit sub-register index is more constrained in 32-bit mode.
88 // It behaves just like the sub_8bit_hi index.
89 if (!Is64Bit && Idx == X86::sub_8bit)
90 Idx = X86::sub_8bit_hi;
91
92 // Forward to TableGen's default version.
93 return X86GenRegisterInfo::getSubClassWithSubReg(RC, Idx);
94 }
95
96 const TargetRegisterClass *
getMatchingSuperRegClass(const TargetRegisterClass * A,const TargetRegisterClass * B,unsigned SubIdx) const97 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
98 const TargetRegisterClass *B,
99 unsigned SubIdx) const {
100 // The sub_8bit sub-register index is more constrained in 32-bit mode.
101 if (!Is64Bit && SubIdx == X86::sub_8bit) {
102 A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
103 if (!A)
104 return nullptr;
105 }
106 return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
107 }
108
109 const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass * RC,const MachineFunction & MF) const110 X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
111 const MachineFunction &MF) const {
112 // Don't allow super-classes of GR8_NOREX. This class is only used after
113 // extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
114 // to the full GR8 register class in 64-bit mode, so we cannot allow the
115 // reigster class inflation.
116 //
117 // The GR8_NOREX class is always used in a way that won't be constrained to a
118 // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the
119 // full GR8 class.
120 if (RC == &X86::GR8_NOREXRegClass)
121 return RC;
122
123 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
124
125 const TargetRegisterClass *Super = RC;
126 TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
127 do {
128 switch (Super->getID()) {
129 case X86::FR32RegClassID:
130 case X86::FR64RegClassID:
131 // If AVX-512 isn't supported we should only inflate to these classes.
132 if (!Subtarget.hasAVX512() &&
133 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
134 return Super;
135 break;
136 case X86::VR128RegClassID:
137 case X86::VR256RegClassID:
138 // If VLX isn't supported we should only inflate to these classes.
139 if (!Subtarget.hasVLX() &&
140 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
141 return Super;
142 break;
143 case X86::VR128XRegClassID:
144 case X86::VR256XRegClassID:
145 // If VLX isn't support we shouldn't inflate to these classes.
146 if (Subtarget.hasVLX() &&
147 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
148 return Super;
149 break;
150 case X86::FR32XRegClassID:
151 case X86::FR64XRegClassID:
152 // If AVX-512 isn't support we shouldn't inflate to these classes.
153 if (Subtarget.hasAVX512() &&
154 getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
155 return Super;
156 break;
157 case X86::GR8RegClassID:
158 case X86::GR16RegClassID:
159 case X86::GR32RegClassID:
160 case X86::GR64RegClassID:
161 case X86::RFP32RegClassID:
162 case X86::RFP64RegClassID:
163 case X86::RFP80RegClassID:
164 case X86::VR512_0_15RegClassID:
165 case X86::VR512RegClassID:
166 // Don't return a super-class that would shrink the spill size.
167 // That can happen with the vector and float classes.
168 if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
169 return Super;
170 }
171 Super = *I++;
172 } while (Super);
173 return RC;
174 }
175
176 const TargetRegisterClass *
getPointerRegClass(const MachineFunction & MF,unsigned Kind) const177 X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
178 unsigned Kind) const {
179 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
180 switch (Kind) {
181 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
182 case 0: // Normal GPRs.
183 if (Subtarget.isTarget64BitLP64())
184 return &X86::GR64RegClass;
185 // If the target is 64bit but we have been told to use 32bit addresses,
186 // we can still use 64-bit register as long as we know the high bits
187 // are zeros.
188 // Reflect that in the returned register class.
189 if (Is64Bit) {
190 // When the target also allows 64-bit frame pointer and we do have a
191 // frame, this is fine to use it for the address accesses as well.
192 const X86FrameLowering *TFI = getFrameLowering(MF);
193 return TFI->hasFP(MF) && TFI->Uses64BitFramePtr
194 ? &X86::LOW32_ADDR_ACCESS_RBPRegClass
195 : &X86::LOW32_ADDR_ACCESSRegClass;
196 }
197 return &X86::GR32RegClass;
198 case 1: // Normal GPRs except the stack pointer (for encoding reasons).
199 if (Subtarget.isTarget64BitLP64())
200 return &X86::GR64_NOSPRegClass;
201 // NOSP does not contain RIP, so no special case here.
202 return &X86::GR32_NOSPRegClass;
203 case 2: // NOREX GPRs.
204 if (Subtarget.isTarget64BitLP64())
205 return &X86::GR64_NOREXRegClass;
206 return &X86::GR32_NOREXRegClass;
207 case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
208 if (Subtarget.isTarget64BitLP64())
209 return &X86::GR64_NOREX_NOSPRegClass;
210 // NOSP does not contain RIP, so no special case here.
211 return &X86::GR32_NOREX_NOSPRegClass;
212 case 4: // Available for tailcall (not callee-saved GPRs).
213 return getGPRsForTailCall(MF);
214 }
215 }
216
shouldRewriteCopySrc(const TargetRegisterClass * DefRC,unsigned DefSubReg,const TargetRegisterClass * SrcRC,unsigned SrcSubReg) const217 bool X86RegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
218 unsigned DefSubReg,
219 const TargetRegisterClass *SrcRC,
220 unsigned SrcSubReg) const {
221 // Prevent rewriting a copy where the destination size is larger than the
222 // input size. See PR41619.
223 // FIXME: Should this be factored into the base implementation somehow.
224 if (DefRC->hasSuperClassEq(&X86::GR64RegClass) && DefSubReg == 0 &&
225 SrcRC->hasSuperClassEq(&X86::GR64RegClass) && SrcSubReg == X86::sub_32bit)
226 return false;
227
228 return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
229 SrcRC, SrcSubReg);
230 }
231
232 const TargetRegisterClass *
getGPRsForTailCall(const MachineFunction & MF) const233 X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
234 const Function &F = MF.getFunction();
235 if (IsWin64 || (F.getCallingConv() == CallingConv::Win64))
236 return &X86::GR64_TCW64RegClass;
237 else if (Is64Bit)
238 return &X86::GR64_TCRegClass;
239
240 bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
241 if (hasHipeCC)
242 return &X86::GR32RegClass;
243 return &X86::GR32_TCRegClass;
244 }
245
246 const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass * RC) const247 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
248 if (RC == &X86::CCRRegClass) {
249 if (Is64Bit)
250 return &X86::GR64RegClass;
251 else
252 return &X86::GR32RegClass;
253 }
254 return RC;
255 }
256
257 unsigned
getRegPressureLimit(const TargetRegisterClass * RC,MachineFunction & MF) const258 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
259 MachineFunction &MF) const {
260 const X86FrameLowering *TFI = getFrameLowering(MF);
261
262 unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
263 switch (RC->getID()) {
264 default:
265 return 0;
266 case X86::GR32RegClassID:
267 return 4 - FPDiff;
268 case X86::GR64RegClassID:
269 return 12 - FPDiff;
270 case X86::VR128RegClassID:
271 return Is64Bit ? 10 : 4;
272 case X86::VR64RegClassID:
273 return 4;
274 }
275 }
276
277 const MCPhysReg *
getCalleeSavedRegs(const MachineFunction * MF) const278 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
279 assert(MF && "MachineFunction required");
280
281 const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
282 const Function &F = MF->getFunction();
283 bool HasSSE = Subtarget.hasSSE1();
284 bool HasAVX = Subtarget.hasAVX();
285 bool HasAVX512 = Subtarget.hasAVX512();
286 bool CallsEHReturn = MF->callsEHReturn();
287
288 CallingConv::ID CC = F.getCallingConv();
289
290 // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling
291 // convention because it has the CSR list.
292 if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
293 CC = CallingConv::X86_INTR;
294
295 // If atribute specified, override the CSRs normally specified by the
296 // calling convention and use the empty set instead.
297 if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
298 return CSR_NoRegs_SaveList;
299
300 switch (CC) {
301 case CallingConv::GHC:
302 case CallingConv::HiPE:
303 return CSR_NoRegs_SaveList;
304 case CallingConv::AnyReg:
305 if (HasAVX)
306 return CSR_64_AllRegs_AVX_SaveList;
307 return CSR_64_AllRegs_SaveList;
308 case CallingConv::PreserveMost:
309 return CSR_64_RT_MostRegs_SaveList;
310 case CallingConv::PreserveAll:
311 if (HasAVX)
312 return CSR_64_RT_AllRegs_AVX_SaveList;
313 return CSR_64_RT_AllRegs_SaveList;
314 case CallingConv::CXX_FAST_TLS:
315 if (Is64Bit)
316 return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
317 CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
318 break;
319 case CallingConv::Intel_OCL_BI: {
320 if (HasAVX512 && IsWin64)
321 return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
322 if (HasAVX512 && Is64Bit)
323 return CSR_64_Intel_OCL_BI_AVX512_SaveList;
324 if (HasAVX && IsWin64)
325 return CSR_Win64_Intel_OCL_BI_AVX_SaveList;
326 if (HasAVX && Is64Bit)
327 return CSR_64_Intel_OCL_BI_AVX_SaveList;
328 if (!HasAVX && !IsWin64 && Is64Bit)
329 return CSR_64_Intel_OCL_BI_SaveList;
330 break;
331 }
332 case CallingConv::HHVM:
333 return CSR_64_HHVM_SaveList;
334 case CallingConv::X86_RegCall:
335 if (Is64Bit) {
336 if (IsWin64) {
337 return (HasSSE ? CSR_Win64_RegCall_SaveList :
338 CSR_Win64_RegCall_NoSSE_SaveList);
339 } else {
340 return (HasSSE ? CSR_SysV64_RegCall_SaveList :
341 CSR_SysV64_RegCall_NoSSE_SaveList);
342 }
343 } else {
344 return (HasSSE ? CSR_32_RegCall_SaveList :
345 CSR_32_RegCall_NoSSE_SaveList);
346 }
347 case CallingConv::CFGuard_Check:
348 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
349 return (HasSSE ? CSR_Win32_CFGuard_Check_SaveList
350 : CSR_Win32_CFGuard_Check_NoSSE_SaveList);
351 case CallingConv::Cold:
352 if (Is64Bit)
353 return CSR_64_MostRegs_SaveList;
354 break;
355 case CallingConv::Win64:
356 if (!HasSSE)
357 return CSR_Win64_NoSSE_SaveList;
358 return CSR_Win64_SaveList;
359 case CallingConv::SwiftTail:
360 if (!Is64Bit)
361 return CSR_32_SaveList;
362 return IsWin64 ? CSR_Win64_SwiftTail_SaveList : CSR_64_SwiftTail_SaveList;
363 case CallingConv::X86_64_SysV:
364 if (CallsEHReturn)
365 return CSR_64EHRet_SaveList;
366 return CSR_64_SaveList;
367 case CallingConv::X86_INTR:
368 if (Is64Bit) {
369 if (HasAVX512)
370 return CSR_64_AllRegs_AVX512_SaveList;
371 if (HasAVX)
372 return CSR_64_AllRegs_AVX_SaveList;
373 if (HasSSE)
374 return CSR_64_AllRegs_SaveList;
375 return CSR_64_AllRegs_NoSSE_SaveList;
376 } else {
377 if (HasAVX512)
378 return CSR_32_AllRegs_AVX512_SaveList;
379 if (HasAVX)
380 return CSR_32_AllRegs_AVX_SaveList;
381 if (HasSSE)
382 return CSR_32_AllRegs_SSE_SaveList;
383 return CSR_32_AllRegs_SaveList;
384 }
385 default:
386 break;
387 }
388
389 if (Is64Bit) {
390 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
391 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
392 if (IsSwiftCC)
393 return IsWin64 ? CSR_Win64_SwiftError_SaveList
394 : CSR_64_SwiftError_SaveList;
395
396 if (IsWin64)
397 return HasSSE ? CSR_Win64_SaveList : CSR_Win64_NoSSE_SaveList;
398 if (CallsEHReturn)
399 return CSR_64EHRet_SaveList;
400 return CSR_64_SaveList;
401 }
402
403 return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
404 }
405
getCalleeSavedRegsViaCopy(const MachineFunction * MF) const406 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
407 const MachineFunction *MF) const {
408 assert(MF && "Invalid MachineFunction pointer.");
409 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
410 MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
411 return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
412 return nullptr;
413 }
414
415 const uint32_t *
getCallPreservedMask(const MachineFunction & MF,CallingConv::ID CC) const416 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
417 CallingConv::ID CC) const {
418 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
419 bool HasSSE = Subtarget.hasSSE1();
420 bool HasAVX = Subtarget.hasAVX();
421 bool HasAVX512 = Subtarget.hasAVX512();
422
423 switch (CC) {
424 case CallingConv::GHC:
425 case CallingConv::HiPE:
426 return CSR_NoRegs_RegMask;
427 case CallingConv::AnyReg:
428 if (HasAVX)
429 return CSR_64_AllRegs_AVX_RegMask;
430 return CSR_64_AllRegs_RegMask;
431 case CallingConv::PreserveMost:
432 return CSR_64_RT_MostRegs_RegMask;
433 case CallingConv::PreserveAll:
434 if (HasAVX)
435 return CSR_64_RT_AllRegs_AVX_RegMask;
436 return CSR_64_RT_AllRegs_RegMask;
437 case CallingConv::CXX_FAST_TLS:
438 if (Is64Bit)
439 return CSR_64_TLS_Darwin_RegMask;
440 break;
441 case CallingConv::Intel_OCL_BI: {
442 if (HasAVX512 && IsWin64)
443 return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
444 if (HasAVX512 && Is64Bit)
445 return CSR_64_Intel_OCL_BI_AVX512_RegMask;
446 if (HasAVX && IsWin64)
447 return CSR_Win64_Intel_OCL_BI_AVX_RegMask;
448 if (HasAVX && Is64Bit)
449 return CSR_64_Intel_OCL_BI_AVX_RegMask;
450 if (!HasAVX && !IsWin64 && Is64Bit)
451 return CSR_64_Intel_OCL_BI_RegMask;
452 break;
453 }
454 case CallingConv::HHVM:
455 return CSR_64_HHVM_RegMask;
456 case CallingConv::X86_RegCall:
457 if (Is64Bit) {
458 if (IsWin64) {
459 return (HasSSE ? CSR_Win64_RegCall_RegMask :
460 CSR_Win64_RegCall_NoSSE_RegMask);
461 } else {
462 return (HasSSE ? CSR_SysV64_RegCall_RegMask :
463 CSR_SysV64_RegCall_NoSSE_RegMask);
464 }
465 } else {
466 return (HasSSE ? CSR_32_RegCall_RegMask :
467 CSR_32_RegCall_NoSSE_RegMask);
468 }
469 case CallingConv::CFGuard_Check:
470 assert(!Is64Bit && "CFGuard check mechanism only used on 32-bit X86");
471 return (HasSSE ? CSR_Win32_CFGuard_Check_RegMask
472 : CSR_Win32_CFGuard_Check_NoSSE_RegMask);
473 case CallingConv::Cold:
474 if (Is64Bit)
475 return CSR_64_MostRegs_RegMask;
476 break;
477 case CallingConv::Win64:
478 return CSR_Win64_RegMask;
479 case CallingConv::SwiftTail:
480 if (!Is64Bit)
481 return CSR_32_RegMask;
482 return IsWin64 ? CSR_Win64_SwiftTail_RegMask : CSR_64_SwiftTail_RegMask;
483 case CallingConv::X86_64_SysV:
484 return CSR_64_RegMask;
485 case CallingConv::X86_INTR:
486 if (Is64Bit) {
487 if (HasAVX512)
488 return CSR_64_AllRegs_AVX512_RegMask;
489 if (HasAVX)
490 return CSR_64_AllRegs_AVX_RegMask;
491 if (HasSSE)
492 return CSR_64_AllRegs_RegMask;
493 return CSR_64_AllRegs_NoSSE_RegMask;
494 } else {
495 if (HasAVX512)
496 return CSR_32_AllRegs_AVX512_RegMask;
497 if (HasAVX)
498 return CSR_32_AllRegs_AVX_RegMask;
499 if (HasSSE)
500 return CSR_32_AllRegs_SSE_RegMask;
501 return CSR_32_AllRegs_RegMask;
502 }
503 default:
504 break;
505 }
506
507 // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
508 // callsEHReturn().
509 if (Is64Bit) {
510 const Function &F = MF.getFunction();
511 bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() &&
512 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError);
513 if (IsSwiftCC)
514 return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask;
515
516 return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask;
517 }
518
519 return CSR_32_RegMask;
520 }
521
522 const uint32_t*
getNoPreservedMask() const523 X86RegisterInfo::getNoPreservedMask() const {
524 return CSR_NoRegs_RegMask;
525 }
526
getDarwinTLSCallPreservedMask() const527 const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
528 return CSR_64_TLS_Darwin_RegMask;
529 }
530
getReservedRegs(const MachineFunction & MF) const531 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
532 BitVector Reserved(getNumRegs());
533 const X86FrameLowering *TFI = getFrameLowering(MF);
534
535 // Set the floating point control register as reserved.
536 Reserved.set(X86::FPCW);
537
538 // Set the floating point status register as reserved.
539 Reserved.set(X86::FPSW);
540
541 // Set the SIMD floating point control register as reserved.
542 Reserved.set(X86::MXCSR);
543
544 // Set the stack-pointer register and its aliases as reserved.
545 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RSP))
546 Reserved.set(SubReg);
547
548 // Set the Shadow Stack Pointer as reserved.
549 Reserved.set(X86::SSP);
550
551 // Set the instruction pointer register and its aliases as reserved.
552 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RIP))
553 Reserved.set(SubReg);
554
555 // Set the frame-pointer register and its aliases as reserved if needed.
556 if (TFI->hasFP(MF)) {
557 for (const MCPhysReg &SubReg : subregs_inclusive(X86::RBP))
558 Reserved.set(SubReg);
559 }
560
561 // Set the base-pointer register and its aliases as reserved if needed.
562 if (hasBasePointer(MF)) {
563 CallingConv::ID CC = MF.getFunction().getCallingConv();
564 const uint32_t *RegMask = getCallPreservedMask(MF, CC);
565 if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
566 report_fatal_error(
567 "Stack realignment in presence of dynamic allocas is not supported with"
568 "this calling convention.");
569
570 Register BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
571 for (const MCPhysReg &SubReg : subregs_inclusive(BasePtr))
572 Reserved.set(SubReg);
573 }
574
575 // Mark the segment registers as reserved.
576 Reserved.set(X86::CS);
577 Reserved.set(X86::SS);
578 Reserved.set(X86::DS);
579 Reserved.set(X86::ES);
580 Reserved.set(X86::FS);
581 Reserved.set(X86::GS);
582
583 // Mark the floating point stack registers as reserved.
584 for (unsigned n = 0; n != 8; ++n)
585 Reserved.set(X86::ST0 + n);
586
587 // Reserve the registers that only exist in 64-bit mode.
588 if (!Is64Bit) {
589 // These 8-bit registers are part of the x86-64 extension even though their
590 // super-registers are old 32-bits.
591 Reserved.set(X86::SIL);
592 Reserved.set(X86::DIL);
593 Reserved.set(X86::BPL);
594 Reserved.set(X86::SPL);
595 Reserved.set(X86::SIH);
596 Reserved.set(X86::DIH);
597 Reserved.set(X86::BPH);
598 Reserved.set(X86::SPH);
599
600 for (unsigned n = 0; n != 8; ++n) {
601 // R8, R9, ...
602 for (MCRegAliasIterator AI(X86::R8 + n, this, true); AI.isValid(); ++AI)
603 Reserved.set(*AI);
604
605 // XMM8, XMM9, ...
606 for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI)
607 Reserved.set(*AI);
608 }
609 }
610 if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
611 for (unsigned n = 16; n != 32; ++n) {
612 for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
613 Reserved.set(*AI);
614 }
615 }
616
617 assert(checkAllSuperRegsMarked(Reserved,
618 {X86::SIL, X86::DIL, X86::BPL, X86::SPL,
619 X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
620 return Reserved;
621 }
622
isArgumentRegister(const MachineFunction & MF,MCRegister Reg) const623 bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
624 MCRegister Reg) const {
625 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
626 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
627 auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) {
628 return TRI.isSuperOrSubRegisterEq(RegA, RegB);
629 };
630
631 if (!ST.is64Bit())
632 return llvm::any_of(
633 SmallVector<MCRegister>{X86::EAX, X86::ECX, X86::EDX},
634 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) ||
635 (ST.hasMMX() && X86::VR64RegClass.contains(Reg));
636
637 CallingConv::ID CC = MF.getFunction().getCallingConv();
638
639 if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg))
640 return true;
641
642 if (llvm::any_of(
643 SmallVector<MCRegister>{X86::RDX, X86::RCX, X86::R8, X86::R9},
644 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
645 return true;
646
647 if (CC != CallingConv::Win64 &&
648 llvm::any_of(SmallVector<MCRegister>{X86::RDI, X86::RSI},
649 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
650 return true;
651
652 if (ST.hasSSE1() &&
653 llvm::any_of(SmallVector<MCRegister>{X86::XMM0, X86::XMM1, X86::XMM2,
654 X86::XMM3, X86::XMM4, X86::XMM5,
655 X86::XMM6, X86::XMM7},
656 [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }))
657 return true;
658
659 return X86GenRegisterInfo::isArgumentRegister(MF, Reg);
660 }
661
isFixedRegister(const MachineFunction & MF,MCRegister PhysReg) const662 bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF,
663 MCRegister PhysReg) const {
664 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
665 const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
666
667 // Stack pointer.
668 if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg))
669 return true;
670
671 // Don't use the frame pointer if it's being used.
672 const X86FrameLowering &TFI = *getFrameLowering(MF);
673 if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg))
674 return true;
675
676 return X86GenRegisterInfo::isFixedRegister(MF, PhysReg);
677 }
678
isTileRegisterClass(const TargetRegisterClass * RC) const679 bool X86RegisterInfo::isTileRegisterClass(const TargetRegisterClass *RC) const {
680 return RC->getID() == X86::TILERegClassID;
681 }
682
adjustStackMapLiveOutMask(uint32_t * Mask) const683 void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
684 // Check if the EFLAGS register is marked as live-out. This shouldn't happen,
685 // because the calling convention defines the EFLAGS register as NOT
686 // preserved.
687 //
688 // Unfortunatelly the EFLAGS show up as live-out after branch folding. Adding
689 // an assert to track this and clear the register afterwards to avoid
690 // unnecessary crashes during release builds.
691 assert(!(Mask[X86::EFLAGS / 32] & (1U << (X86::EFLAGS % 32))) &&
692 "EFLAGS are not live-out from a patchpoint.");
693
694 // Also clean other registers that don't need preserving (IP).
695 for (auto Reg : {X86::EFLAGS, X86::RIP, X86::EIP, X86::IP})
696 Mask[Reg / 32] &= ~(1U << (Reg % 32));
697 }
698
699 //===----------------------------------------------------------------------===//
700 // Stack Frame Processing methods
701 //===----------------------------------------------------------------------===//
702
CantUseSP(const MachineFrameInfo & MFI)703 static bool CantUseSP(const MachineFrameInfo &MFI) {
704 return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
705 }
706
hasBasePointer(const MachineFunction & MF) const707 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
708 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
709 if (X86FI->hasPreallocatedCall())
710 return true;
711
712 const MachineFrameInfo &MFI = MF.getFrameInfo();
713
714 if (!EnableBasePointer)
715 return false;
716
717 // When we need stack realignment, we can't address the stack from the frame
718 // pointer. When we have dynamic allocas or stack-adjusting inline asm, we
719 // can't address variables from the stack pointer. MS inline asm can
720 // reference locals while also adjusting the stack pointer. When we can't
721 // use both the SP and the FP, we need a separate base pointer register.
722 bool CantUseFP = hasStackRealignment(MF);
723 return CantUseFP && CantUseSP(MFI);
724 }
725
canRealignStack(const MachineFunction & MF) const726 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
727 if (!TargetRegisterInfo::canRealignStack(MF))
728 return false;
729
730 const MachineFrameInfo &MFI = MF.getFrameInfo();
731 const MachineRegisterInfo *MRI = &MF.getRegInfo();
732
733 // Stack realignment requires a frame pointer. If we already started
734 // register allocation with frame pointer elimination, it is too late now.
735 if (!MRI->canReserveReg(FramePtr))
736 return false;
737
738 // If a base pointer is necessary. Check that it isn't too late to reserve
739 // it.
740 if (CantUseSP(MFI))
741 return MRI->canReserveReg(BasePtr);
742 return true;
743 }
744
745 // tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
746 // of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
747 // TODO: In this case we should be really trying first to entirely eliminate
748 // this instruction which is a plain copy.
tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II)749 static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
750 MachineInstr &MI = *II;
751 unsigned Opc = II->getOpcode();
752 // Check if this is a LEA of the form 'lea (%esp), %ebx'
753 if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
754 MI.getOperand(2).getImm() != 1 ||
755 MI.getOperand(3).getReg() != X86::NoRegister ||
756 MI.getOperand(4).getImm() != 0 ||
757 MI.getOperand(5).getReg() != X86::NoRegister)
758 return false;
759 Register BasePtr = MI.getOperand(1).getReg();
760 // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
761 // be replaced with a 32-bit operand MOV which will zero extend the upper
762 // 32-bits of the super register.
763 if (Opc == X86::LEA64_32r)
764 BasePtr = getX86SubSuperRegister(BasePtr, 32);
765 Register NewDestReg = MI.getOperand(0).getReg();
766 const X86InstrInfo *TII =
767 MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
768 TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
769 MI.getOperand(1).isKill());
770 MI.eraseFromParent();
771 return true;
772 }
773
isFuncletReturnInstr(MachineInstr & MI)774 static bool isFuncletReturnInstr(MachineInstr &MI) {
775 switch (MI.getOpcode()) {
776 case X86::CATCHRET:
777 case X86::CLEANUPRET:
778 return true;
779 default:
780 return false;
781 }
782 llvm_unreachable("impossible");
783 }
784
785 bool
eliminateFrameIndex(MachineBasicBlock::iterator II,int SPAdj,unsigned FIOperandNum,RegScavenger * RS) const786 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
787 int SPAdj, unsigned FIOperandNum,
788 RegScavenger *RS) const {
789 MachineInstr &MI = *II;
790 MachineBasicBlock &MBB = *MI.getParent();
791 MachineFunction &MF = *MBB.getParent();
792 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
793 bool IsEHFuncletEpilogue = MBBI == MBB.end() ? false
794 : isFuncletReturnInstr(*MBBI);
795 const X86FrameLowering *TFI = getFrameLowering(MF);
796 int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
797
798 // Determine base register and offset.
799 int FIOffset;
800 Register BasePtr;
801 if (MI.isReturn()) {
802 assert((!hasStackRealignment(MF) ||
803 MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
804 "Return instruction can only reference SP relative frame objects");
805 FIOffset =
806 TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0).getFixed();
807 } else if (TFI->Is64Bit && (MBB.isEHFuncletEntry() || IsEHFuncletEpilogue)) {
808 FIOffset = TFI->getWin64EHFrameIndexRef(MF, FrameIndex, BasePtr);
809 } else {
810 FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr).getFixed();
811 }
812
813 // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
814 // simple FP case, and doesn't work with stack realignment. On 32-bit, the
815 // offset is from the traditional base pointer location. On 64-bit, the
816 // offset is from the SP at the end of the prologue, not the FP location. This
817 // matches the behavior of llvm.frameaddress.
818 unsigned Opc = MI.getOpcode();
819 if (Opc == TargetOpcode::LOCAL_ESCAPE) {
820 MachineOperand &FI = MI.getOperand(FIOperandNum);
821 FI.ChangeToImmediate(FIOffset);
822 return false;
823 }
824
825 // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
826 // register as source operand, semantic is the same and destination is
827 // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
828 // Don't change BasePtr since it is used later for stack adjustment.
829 Register MachineBasePtr = BasePtr;
830 if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
831 MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
832
833 // This must be part of a four operand memory reference. Replace the
834 // FrameIndex with base register. Add an offset to the offset.
835 MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
836
837 if (BasePtr == StackPtr)
838 FIOffset += SPAdj;
839
840 // The frame index format for stackmaps and patchpoints is different from the
841 // X86 format. It only has a FI and an offset.
842 if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
843 assert(BasePtr == FramePtr && "Expected the FP as base register");
844 int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
845 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
846 return false;
847 }
848
849 if (MI.getOperand(FIOperandNum+3).isImm()) {
850 // Offset is a 32-bit integer.
851 int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
852 int Offset = FIOffset + Imm;
853 assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
854 "Requesting 64-bit offset in 32-bit immediate!");
855 if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
856 MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
857 } else {
858 // Offset is symbolic. This is extremely rare.
859 uint64_t Offset = FIOffset +
860 (uint64_t)MI.getOperand(FIOperandNum+3).getOffset();
861 MI.getOperand(FIOperandNum + 3).setOffset(Offset);
862 }
863 return false;
864 }
865
findDeadCallerSavedReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI) const866 unsigned X86RegisterInfo::findDeadCallerSavedReg(
867 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) const {
868 const MachineFunction *MF = MBB.getParent();
869 if (MF->callsEHReturn())
870 return 0;
871
872 const TargetRegisterClass &AvailableRegs = *getGPRsForTailCall(*MF);
873
874 if (MBBI == MBB.end())
875 return 0;
876
877 switch (MBBI->getOpcode()) {
878 default:
879 return 0;
880 case TargetOpcode::PATCHABLE_RET:
881 case X86::RET:
882 case X86::RET32:
883 case X86::RET64:
884 case X86::RETI32:
885 case X86::RETI64:
886 case X86::TCRETURNdi:
887 case X86::TCRETURNri:
888 case X86::TCRETURNmi:
889 case X86::TCRETURNdi64:
890 case X86::TCRETURNri64:
891 case X86::TCRETURNmi64:
892 case X86::EH_RETURN:
893 case X86::EH_RETURN64: {
894 SmallSet<uint16_t, 8> Uses;
895 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
896 MachineOperand &MO = MBBI->getOperand(I);
897 if (!MO.isReg() || MO.isDef())
898 continue;
899 Register Reg = MO.getReg();
900 if (!Reg)
901 continue;
902 for (MCRegAliasIterator AI(Reg, this, true); AI.isValid(); ++AI)
903 Uses.insert(*AI);
904 }
905
906 for (auto CS : AvailableRegs)
907 if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP && CS != X86::ESP)
908 return CS;
909 }
910 }
911
912 return 0;
913 }
914
getFrameRegister(const MachineFunction & MF) const915 Register X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
916 const X86FrameLowering *TFI = getFrameLowering(MF);
917 return TFI->hasFP(MF) ? FramePtr : StackPtr;
918 }
919
920 unsigned
getPtrSizedFrameRegister(const MachineFunction & MF) const921 X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
922 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
923 Register FrameReg = getFrameRegister(MF);
924 if (Subtarget.isTarget64BitILP32())
925 FrameReg = getX86SubSuperRegister(FrameReg, 32);
926 return FrameReg;
927 }
928
929 unsigned
getPtrSizedStackRegister(const MachineFunction & MF) const930 X86RegisterInfo::getPtrSizedStackRegister(const MachineFunction &MF) const {
931 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
932 Register StackReg = getStackRegister();
933 if (Subtarget.isTarget64BitILP32())
934 StackReg = getX86SubSuperRegister(StackReg, 32);
935 return StackReg;
936 }
937
getTileShape(Register VirtReg,VirtRegMap * VRM,const MachineRegisterInfo * MRI)938 static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
939 const MachineRegisterInfo *MRI) {
940 if (VRM->hasShape(VirtReg))
941 return VRM->getShape(VirtReg);
942
943 const MachineOperand &Def = *MRI->def_begin(VirtReg);
944 MachineInstr *MI = const_cast<MachineInstr *>(Def.getParent());
945 unsigned OpCode = MI->getOpcode();
946 switch (OpCode) {
947 default:
948 llvm_unreachable("Unexpected machine instruction on tile register!");
949 break;
950 case X86::COPY: {
951 Register SrcReg = MI->getOperand(1).getReg();
952 ShapeT Shape = getTileShape(SrcReg, VRM, MRI);
953 VRM->assignVirt2Shape(VirtReg, Shape);
954 return Shape;
955 }
956 // We only collect the tile shape that is defined.
957 case X86::PTILELOADDV:
958 case X86::PTILELOADDT1V:
959 case X86::PTDPBSSDV:
960 case X86::PTDPBSUDV:
961 case X86::PTDPBUSDV:
962 case X86::PTDPBUUDV:
963 case X86::PTILEZEROV:
964 case X86::PTDPBF16PSV:
965 case X86::PTDPFP16PSV:
966 MachineOperand &MO1 = MI->getOperand(1);
967 MachineOperand &MO2 = MI->getOperand(2);
968 ShapeT Shape(&MO1, &MO2, MRI);
969 VRM->assignVirt2Shape(VirtReg, Shape);
970 return Shape;
971 }
972 }
973
getRegAllocationHints(Register VirtReg,ArrayRef<MCPhysReg> Order,SmallVectorImpl<MCPhysReg> & Hints,const MachineFunction & MF,const VirtRegMap * VRM,const LiveRegMatrix * Matrix) const974 bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
975 ArrayRef<MCPhysReg> Order,
976 SmallVectorImpl<MCPhysReg> &Hints,
977 const MachineFunction &MF,
978 const VirtRegMap *VRM,
979 const LiveRegMatrix *Matrix) const {
980 const MachineRegisterInfo *MRI = &MF.getRegInfo();
981 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
982 bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
983 VirtReg, Order, Hints, MF, VRM, Matrix);
984
985 if (RC.getID() != X86::TILERegClassID)
986 return BaseImplRetVal;
987
988 ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
989 auto AddHint = [&](MCPhysReg PhysReg) {
990 Register VReg = Matrix->getOneVReg(PhysReg);
991 if (VReg == MCRegister::NoRegister) { // Not allocated yet
992 Hints.push_back(PhysReg);
993 return;
994 }
995 ShapeT PhysShape = getTileShape(VReg, const_cast<VirtRegMap *>(VRM), MRI);
996 if (PhysShape == VirtShape)
997 Hints.push_back(PhysReg);
998 };
999
1000 SmallSet<MCPhysReg, 4> CopyHints;
1001 CopyHints.insert(Hints.begin(), Hints.end());
1002 Hints.clear();
1003 for (auto Hint : CopyHints) {
1004 if (RC.contains(Hint) && !MRI->isReserved(Hint))
1005 AddHint(Hint);
1006 }
1007 for (MCPhysReg PhysReg : Order) {
1008 if (!CopyHints.count(PhysReg) && RC.contains(PhysReg) &&
1009 !MRI->isReserved(PhysReg))
1010 AddHint(PhysReg);
1011 }
1012
1013 #define DEBUG_TYPE "tile-hint"
1014 LLVM_DEBUG({
1015 dbgs() << "Hints for virtual register " << format_hex(VirtReg, 8) << "\n";
1016 for (auto Hint : Hints) {
1017 dbgs() << "tmm" << Hint << ",";
1018 }
1019 dbgs() << "\n";
1020 });
1021 #undef DEBUG_TYPE
1022
1023 return true;
1024 }
1025