1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains code to lower X86 MachineInstrs to their corresponding 10 // MCInst records. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86ATTInstPrinter.h" 15 #include "MCTargetDesc/X86BaseInfo.h" 16 #include "MCTargetDesc/X86EncodingOptimization.h" 17 #include "MCTargetDesc/X86InstComments.h" 18 #include "MCTargetDesc/X86ShuffleDecode.h" 19 #include "MCTargetDesc/X86TargetStreamer.h" 20 #include "X86AsmPrinter.h" 21 #include "X86MachineFunctionInfo.h" 22 #include "X86RegisterInfo.h" 23 #include "X86ShuffleDecodeConstantPool.h" 24 #include "X86Subtarget.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 29 #include "llvm/CodeGen/MachineConstantPool.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineModuleInfoImpls.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/StackMaps.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Mangler.h" 37 #include "llvm/MC/MCAsmInfo.h" 38 #include "llvm/MC/MCCodeEmitter.h" 39 #include "llvm/MC/MCContext.h" 40 #include "llvm/MC/MCExpr.h" 41 #include "llvm/MC/MCFixup.h" 42 #include "llvm/MC/MCInst.h" 43 #include "llvm/MC/MCInstBuilder.h" 44 #include "llvm/MC/MCSection.h" 45 #include "llvm/MC/MCStreamer.h" 46 #include "llvm/MC/MCSymbol.h" 47 #include "llvm/MC/TargetRegistry.h" 48 #include "llvm/Target/TargetLoweringObjectFile.h" 49 #include "llvm/Target/TargetMachine.h" 50 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" 51 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" 52 #include <string> 53 54 using namespace llvm; 55 56 static cl::opt<bool> EnableBranchHint("enable-branch-hint", 57 cl::desc("Enable branch hint."), 58 cl::init(false), cl::Hidden); 59 static cl::opt<unsigned> BranchHintProbabilityThreshold( 60 "branch-hint-probability-threshold", 61 cl::desc("The probability threshold of enabling branch hint."), 62 cl::init(50), cl::Hidden); 63 64 namespace { 65 66 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. 67 class X86MCInstLower { 68 MCContext &Ctx; 69 const MachineFunction &MF; 70 const TargetMachine &TM; 71 const MCAsmInfo &MAI; 72 X86AsmPrinter &AsmPrinter; 73 74 public: 75 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); 76 77 MCOperand LowerMachineOperand(const MachineInstr *MI, 78 const MachineOperand &MO) const; 79 void Lower(const MachineInstr *MI, MCInst &OutMI) const; 80 81 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; 82 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; 83 84 private: 85 MachineModuleInfoMachO &getMachOMMI() const; 86 }; 87 88 } // end anonymous namespace 89 90 /// A RAII helper which defines a region of instructions which can't have 91 /// padding added between them for correctness. 92 struct NoAutoPaddingScope { 93 MCStreamer &OS; 94 const bool OldAllowAutoPadding; 95 NoAutoPaddingScope(MCStreamer &OS) 96 : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) { 97 changeAndComment(false); 98 } 99 ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); } 100 void changeAndComment(bool b) { 101 if (b == OS.getAllowAutoPadding()) 102 return; 103 OS.setAllowAutoPadding(b); 104 if (b) 105 OS.emitRawComment("autopadding"); 106 else 107 OS.emitRawComment("noautopadding"); 108 } 109 }; 110 111 // Emit a minimal sequence of nops spanning NumBytes bytes. 112 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 113 const X86Subtarget *Subtarget); 114 115 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, 116 const MCSubtargetInfo &STI, 117 MCCodeEmitter *CodeEmitter) { 118 if (InShadow) { 119 SmallString<256> Code; 120 SmallVector<MCFixup, 4> Fixups; 121 CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI); 122 CurrentShadowSize += Code.size(); 123 if (CurrentShadowSize >= RequiredShadowSize) 124 InShadow = false; // The shadow is big enough. Stop counting. 125 } 126 } 127 128 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( 129 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { 130 if (InShadow && CurrentShadowSize < RequiredShadowSize) { 131 InShadow = false; 132 emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize, 133 &MF->getSubtarget<X86Subtarget>()); 134 } 135 } 136 137 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { 138 OutStreamer->emitInstruction(Inst, getSubtargetInfo()); 139 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); 140 } 141 142 X86MCInstLower::X86MCInstLower(const MachineFunction &mf, 143 X86AsmPrinter &asmprinter) 144 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()), 145 AsmPrinter(asmprinter) {} 146 147 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { 148 return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); 149 } 150 151 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol 152 /// operand to an MCSymbol. 153 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { 154 const Triple &TT = TM.getTargetTriple(); 155 if (MO.isGlobal() && TT.isOSBinFormatELF()) 156 return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal()); 157 158 const DataLayout &DL = MF.getDataLayout(); 159 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && 160 "Isn't a symbol reference"); 161 162 MCSymbol *Sym = nullptr; 163 SmallString<128> Name; 164 StringRef Suffix; 165 166 switch (MO.getTargetFlags()) { 167 case X86II::MO_DLLIMPORT: 168 // Handle dllimport linkage. 169 Name += "__imp_"; 170 break; 171 case X86II::MO_COFFSTUB: 172 Name += ".refptr."; 173 break; 174 case X86II::MO_DARWIN_NONLAZY: 175 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 176 Suffix = "$non_lazy_ptr"; 177 break; 178 } 179 180 if (!Suffix.empty()) 181 Name += DL.getPrivateGlobalPrefix(); 182 183 if (MO.isGlobal()) { 184 const GlobalValue *GV = MO.getGlobal(); 185 AsmPrinter.getNameWithPrefix(Name, GV); 186 } else if (MO.isSymbol()) { 187 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL); 188 } else if (MO.isMBB()) { 189 assert(Suffix.empty()); 190 Sym = MO.getMBB()->getSymbol(); 191 } 192 193 Name += Suffix; 194 if (!Sym) 195 Sym = Ctx.getOrCreateSymbol(Name); 196 197 // If the target flags on the operand changes the name of the symbol, do that 198 // before we return the symbol. 199 switch (MO.getTargetFlags()) { 200 default: 201 break; 202 case X86II::MO_COFFSTUB: { 203 MachineModuleInfoCOFF &MMICOFF = 204 AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoCOFF>(); 205 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym); 206 if (!StubSym.getPointer()) { 207 assert(MO.isGlobal() && "Extern symbol not handled yet"); 208 StubSym = MachineModuleInfoImpl::StubValueTy( 209 AsmPrinter.getSymbol(MO.getGlobal()), true); 210 } 211 break; 212 } 213 case X86II::MO_DARWIN_NONLAZY: 214 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { 215 MachineModuleInfoImpl::StubValueTy &StubSym = 216 getMachOMMI().getGVStubEntry(Sym); 217 if (!StubSym.getPointer()) { 218 assert(MO.isGlobal() && "Extern symbol not handled yet"); 219 StubSym = MachineModuleInfoImpl::StubValueTy( 220 AsmPrinter.getSymbol(MO.getGlobal()), 221 !MO.getGlobal()->hasInternalLinkage()); 222 } 223 break; 224 } 225 } 226 227 return Sym; 228 } 229 230 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, 231 MCSymbol *Sym) const { 232 // FIXME: We would like an efficient form for this, so we don't have to do a 233 // lot of extra uniquing. 234 const MCExpr *Expr = nullptr; 235 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; 236 237 switch (MO.getTargetFlags()) { 238 default: 239 llvm_unreachable("Unknown target flag on GV operand"); 240 case X86II::MO_NO_FLAG: // No flag. 241 // These affect the name of the symbol, not any suffix. 242 case X86II::MO_DARWIN_NONLAZY: 243 case X86II::MO_DLLIMPORT: 244 case X86II::MO_COFFSTUB: 245 break; 246 247 case X86II::MO_TLVP: 248 RefKind = MCSymbolRefExpr::VK_TLVP; 249 break; 250 case X86II::MO_TLVP_PIC_BASE: 251 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); 252 // Subtract the pic base. 253 Expr = MCBinaryExpr::createSub( 254 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 255 break; 256 case X86II::MO_SECREL: 257 RefKind = MCSymbolRefExpr::VK_SECREL; 258 break; 259 case X86II::MO_TLSGD: 260 RefKind = MCSymbolRefExpr::VK_TLSGD; 261 break; 262 case X86II::MO_TLSLD: 263 RefKind = MCSymbolRefExpr::VK_TLSLD; 264 break; 265 case X86II::MO_TLSLDM: 266 RefKind = MCSymbolRefExpr::VK_TLSLDM; 267 break; 268 case X86II::MO_GOTTPOFF: 269 RefKind = MCSymbolRefExpr::VK_GOTTPOFF; 270 break; 271 case X86II::MO_INDNTPOFF: 272 RefKind = MCSymbolRefExpr::VK_INDNTPOFF; 273 break; 274 case X86II::MO_TPOFF: 275 RefKind = MCSymbolRefExpr::VK_TPOFF; 276 break; 277 case X86II::MO_DTPOFF: 278 RefKind = MCSymbolRefExpr::VK_DTPOFF; 279 break; 280 case X86II::MO_NTPOFF: 281 RefKind = MCSymbolRefExpr::VK_NTPOFF; 282 break; 283 case X86II::MO_GOTNTPOFF: 284 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; 285 break; 286 case X86II::MO_GOTPCREL: 287 RefKind = MCSymbolRefExpr::VK_GOTPCREL; 288 break; 289 case X86II::MO_GOTPCREL_NORELAX: 290 RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX; 291 break; 292 case X86II::MO_GOT: 293 RefKind = MCSymbolRefExpr::VK_GOT; 294 break; 295 case X86II::MO_GOTOFF: 296 RefKind = MCSymbolRefExpr::VK_GOTOFF; 297 break; 298 case X86II::MO_PLT: 299 RefKind = MCSymbolRefExpr::VK_PLT; 300 break; 301 case X86II::MO_ABS8: 302 RefKind = MCSymbolRefExpr::VK_X86_ABS8; 303 break; 304 case X86II::MO_PIC_BASE_OFFSET: 305 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: 306 Expr = MCSymbolRefExpr::create(Sym, Ctx); 307 // Subtract the pic base. 308 Expr = MCBinaryExpr::createSub( 309 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); 310 if (MO.isJTI()) { 311 assert(MAI.doesSetDirectiveSuppressReloc()); 312 // If .set directive is supported, use it to reduce the number of 313 // relocations the assembler will generate for differences between 314 // local labels. This is only safe when the symbols are in the same 315 // section so we are restricting it to jumptable references. 316 MCSymbol *Label = Ctx.createTempSymbol(); 317 AsmPrinter.OutStreamer->emitAssignment(Label, Expr); 318 Expr = MCSymbolRefExpr::create(Label, Ctx); 319 } 320 break; 321 } 322 323 if (!Expr) 324 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); 325 326 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) 327 Expr = MCBinaryExpr::createAdd( 328 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); 329 return MCOperand::createExpr(Expr); 330 } 331 332 static unsigned getRetOpcode(const X86Subtarget &Subtarget) { 333 return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; 334 } 335 336 MCOperand X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, 337 const MachineOperand &MO) const { 338 switch (MO.getType()) { 339 default: 340 MI->print(errs()); 341 llvm_unreachable("unknown operand type"); 342 case MachineOperand::MO_Register: 343 // Ignore all implicit register operands. 344 if (MO.isImplicit()) 345 return MCOperand(); 346 return MCOperand::createReg(MO.getReg()); 347 case MachineOperand::MO_Immediate: 348 return MCOperand::createImm(MO.getImm()); 349 case MachineOperand::MO_MachineBasicBlock: 350 case MachineOperand::MO_GlobalAddress: 351 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); 352 case MachineOperand::MO_ExternalSymbol: { 353 MCSymbol *Sym = GetSymbolFromOperand(MO); 354 Sym->setExternal(true); 355 return LowerSymbolOperand(MO, Sym); 356 } 357 case MachineOperand::MO_MCSymbol: 358 return LowerSymbolOperand(MO, MO.getMCSymbol()); 359 case MachineOperand::MO_JumpTableIndex: 360 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); 361 case MachineOperand::MO_ConstantPoolIndex: 362 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); 363 case MachineOperand::MO_BlockAddress: 364 return LowerSymbolOperand( 365 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); 366 case MachineOperand::MO_RegisterMask: 367 // Ignore call clobbers. 368 return MCOperand(); 369 } 370 } 371 372 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding 373 // information. 374 static unsigned convertTailJumpOpcode(unsigned Opcode) { 375 switch (Opcode) { 376 case X86::TAILJMPr: 377 Opcode = X86::JMP32r; 378 break; 379 case X86::TAILJMPm: 380 Opcode = X86::JMP32m; 381 break; 382 case X86::TAILJMPr64: 383 Opcode = X86::JMP64r; 384 break; 385 case X86::TAILJMPm64: 386 Opcode = X86::JMP64m; 387 break; 388 case X86::TAILJMPr64_REX: 389 Opcode = X86::JMP64r_REX; 390 break; 391 case X86::TAILJMPm64_REX: 392 Opcode = X86::JMP64m_REX; 393 break; 394 case X86::TAILJMPd: 395 case X86::TAILJMPd64: 396 Opcode = X86::JMP_1; 397 break; 398 case X86::TAILJMPd_CC: 399 case X86::TAILJMPd64_CC: 400 Opcode = X86::JCC_1; 401 break; 402 } 403 404 return Opcode; 405 } 406 407 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { 408 OutMI.setOpcode(MI->getOpcode()); 409 410 for (const MachineOperand &MO : MI->operands()) 411 if (auto Op = LowerMachineOperand(MI, MO); Op.isValid()) 412 OutMI.addOperand(Op); 413 414 bool In64BitMode = AsmPrinter.getSubtarget().is64Bit(); 415 if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) || 416 X86::optimizeShiftRotateWithImmediateOne(OutMI) || 417 X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) || 418 X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) || 419 X86::optimizeMOV(OutMI, In64BitMode) || 420 X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI)) 421 return; 422 423 // Handle a few special cases to eliminate operand modifiers. 424 switch (OutMI.getOpcode()) { 425 case X86::LEA64_32r: 426 case X86::LEA64r: 427 case X86::LEA16r: 428 case X86::LEA32r: 429 // LEA should have a segment register, but it must be empty. 430 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && 431 "Unexpected # of LEA operands"); 432 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && 433 "LEA has segment specified!"); 434 break; 435 case X86::MULX32Hrr: 436 case X86::MULX32Hrm: 437 case X86::MULX64Hrr: 438 case X86::MULX64Hrm: { 439 // Turn into regular MULX by duplicating the destination. 440 unsigned NewOpc; 441 switch (OutMI.getOpcode()) { 442 default: llvm_unreachable("Invalid opcode"); 443 case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break; 444 case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break; 445 case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break; 446 case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break; 447 } 448 OutMI.setOpcode(NewOpc); 449 // Duplicate the destination. 450 unsigned DestReg = OutMI.getOperand(0).getReg(); 451 OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg)); 452 break; 453 } 454 // CALL64r, CALL64pcrel32 - These instructions used to have 455 // register inputs modeled as normal uses instead of implicit uses. As such, 456 // they we used to truncate off all but the first operand (the callee). This 457 // issue seems to have been fixed at some point. This assert verifies that. 458 case X86::CALL64r: 459 case X86::CALL64pcrel32: 460 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 461 break; 462 case X86::EH_RETURN: 463 case X86::EH_RETURN64: { 464 OutMI = MCInst(); 465 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 466 break; 467 } 468 case X86::CLEANUPRET: { 469 // Replace CLEANUPRET with the appropriate RET. 470 OutMI = MCInst(); 471 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget())); 472 break; 473 } 474 case X86::CATCHRET: { 475 // Replace CATCHRET with the appropriate RET. 476 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget(); 477 unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX; 478 OutMI = MCInst(); 479 OutMI.setOpcode(getRetOpcode(Subtarget)); 480 OutMI.addOperand(MCOperand::createReg(ReturnReg)); 481 break; 482 } 483 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump 484 // instruction. 485 case X86::TAILJMPr: 486 case X86::TAILJMPr64: 487 case X86::TAILJMPr64_REX: 488 case X86::TAILJMPd: 489 case X86::TAILJMPd64: 490 assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!"); 491 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 492 break; 493 case X86::TAILJMPd_CC: 494 case X86::TAILJMPd64_CC: 495 assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!"); 496 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 497 break; 498 case X86::TAILJMPm: 499 case X86::TAILJMPm64: 500 case X86::TAILJMPm64_REX: 501 assert(OutMI.getNumOperands() == X86::AddrNumOperands && 502 "Unexpected number of operands!"); 503 OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode())); 504 break; 505 case X86::MASKMOVDQU: 506 case X86::VMASKMOVDQU: 507 if (In64BitMode) 508 OutMI.setFlags(X86::IP_HAS_AD_SIZE); 509 break; 510 case X86::BSF16rm: 511 case X86::BSF16rr: 512 case X86::BSF32rm: 513 case X86::BSF32rr: 514 case X86::BSF64rm: 515 case X86::BSF64rr: { 516 // Add an REP prefix to BSF instructions so that new processors can 517 // recognize as TZCNT, which has better performance than BSF. 518 // BSF and TZCNT have different interpretations on ZF bit. So make sure 519 // it won't be used later. 520 const MachineOperand *FlagDef = 521 MI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 522 if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead()) 523 OutMI.setFlags(X86::IP_HAS_REPEAT); 524 break; 525 } 526 default: 527 break; 528 } 529 } 530 531 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, 532 const MachineInstr &MI) { 533 NoAutoPaddingScope NoPadScope(*OutStreamer); 534 bool Is64Bits = getSubtarget().is64Bit(); 535 bool Is64BitsLP64 = getSubtarget().isTarget64BitLP64(); 536 MCContext &Ctx = OutStreamer->getContext(); 537 538 MCSymbolRefExpr::VariantKind SRVK; 539 switch (MI.getOpcode()) { 540 case X86::TLS_addr32: 541 case X86::TLS_addr64: 542 case X86::TLS_addrX32: 543 SRVK = MCSymbolRefExpr::VK_TLSGD; 544 break; 545 case X86::TLS_base_addr32: 546 SRVK = MCSymbolRefExpr::VK_TLSLDM; 547 break; 548 case X86::TLS_base_addr64: 549 case X86::TLS_base_addrX32: 550 SRVK = MCSymbolRefExpr::VK_TLSLD; 551 break; 552 case X86::TLS_desc32: 553 case X86::TLS_desc64: 554 SRVK = MCSymbolRefExpr::VK_TLSDESC; 555 break; 556 default: 557 llvm_unreachable("unexpected opcode"); 558 } 559 560 const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create( 561 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx); 562 563 // Before binutils 2.41, ld has a bogus TLS relaxation error when the GD/LD 564 // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is 565 // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by 566 // only using GOT when GOTPCRELX is enabled. 567 // TODO Delete the workaround when rustc no longer relies on the hack 568 bool UseGot = MMI->getModule()->getRtLibUseGOT() && 569 Ctx.getTargetOptions()->X86RelaxRelocations; 570 571 if (SRVK == MCSymbolRefExpr::VK_TLSDESC) { 572 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create( 573 MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), 574 MCSymbolRefExpr::VK_TLSCALL, Ctx); 575 EmitAndCountInstruction( 576 MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r) 577 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) 578 .addReg(Is64Bits ? X86::RIP : X86::EBX) 579 .addImm(1) 580 .addReg(0) 581 .addExpr(Sym) 582 .addReg(0)); 583 EmitAndCountInstruction( 584 MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m) 585 .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX) 586 .addImm(1) 587 .addReg(0) 588 .addExpr(Expr) 589 .addReg(0)); 590 } else if (Is64Bits) { 591 bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD; 592 if (NeedsPadding && Is64BitsLP64) 593 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 594 EmitAndCountInstruction(MCInstBuilder(X86::LEA64r) 595 .addReg(X86::RDI) 596 .addReg(X86::RIP) 597 .addImm(1) 598 .addReg(0) 599 .addExpr(Sym) 600 .addReg(0)); 601 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr"); 602 if (NeedsPadding) { 603 if (!UseGot) 604 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 605 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); 606 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); 607 } 608 if (UseGot) { 609 const MCExpr *Expr = MCSymbolRefExpr::create( 610 TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx); 611 EmitAndCountInstruction(MCInstBuilder(X86::CALL64m) 612 .addReg(X86::RIP) 613 .addImm(1) 614 .addReg(0) 615 .addExpr(Expr) 616 .addReg(0)); 617 } else { 618 EmitAndCountInstruction( 619 MCInstBuilder(X86::CALL64pcrel32) 620 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 621 MCSymbolRefExpr::VK_PLT, Ctx))); 622 } 623 } else { 624 if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) { 625 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 626 .addReg(X86::EAX) 627 .addReg(0) 628 .addImm(1) 629 .addReg(X86::EBX) 630 .addExpr(Sym) 631 .addReg(0)); 632 } else { 633 EmitAndCountInstruction(MCInstBuilder(X86::LEA32r) 634 .addReg(X86::EAX) 635 .addReg(X86::EBX) 636 .addImm(1) 637 .addReg(0) 638 .addExpr(Sym) 639 .addReg(0)); 640 } 641 642 const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr"); 643 if (UseGot) { 644 const MCExpr *Expr = 645 MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx); 646 EmitAndCountInstruction(MCInstBuilder(X86::CALL32m) 647 .addReg(X86::EBX) 648 .addImm(1) 649 .addReg(0) 650 .addExpr(Expr) 651 .addReg(0)); 652 } else { 653 EmitAndCountInstruction( 654 MCInstBuilder(X86::CALLpcrel32) 655 .addExpr(MCSymbolRefExpr::create(TlsGetAddr, 656 MCSymbolRefExpr::VK_PLT, Ctx))); 657 } 658 } 659 } 660 661 /// Emit the largest nop instruction smaller than or equal to \p NumBytes 662 /// bytes. Return the size of nop emitted. 663 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes, 664 const X86Subtarget *Subtarget) { 665 // Determine the longest nop which can be efficiently decoded for the given 666 // target cpu. 15-bytes is the longest single NOP instruction, but some 667 // platforms can't decode the longest forms efficiently. 668 unsigned MaxNopLength = 1; 669 if (Subtarget->is64Bit()) { 670 // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the 671 // IndexReg/BaseReg below need to be updated. 672 if (Subtarget->hasFeature(X86::TuningFast7ByteNOP)) 673 MaxNopLength = 7; 674 else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP)) 675 MaxNopLength = 15; 676 else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP)) 677 MaxNopLength = 11; 678 else 679 MaxNopLength = 10; 680 } if (Subtarget->is32Bit()) 681 MaxNopLength = 2; 682 683 // Cap a single nop emission at the profitable value for the target 684 NumBytes = std::min(NumBytes, MaxNopLength); 685 686 unsigned NopSize; 687 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg; 688 IndexReg = Displacement = SegmentReg = 0; 689 BaseReg = X86::RAX; 690 ScaleVal = 1; 691 switch (NumBytes) { 692 case 0: 693 llvm_unreachable("Zero nops?"); 694 break; 695 case 1: 696 NopSize = 1; 697 Opc = X86::NOOP; 698 break; 699 case 2: 700 NopSize = 2; 701 Opc = X86::XCHG16ar; 702 break; 703 case 3: 704 NopSize = 3; 705 Opc = X86::NOOPL; 706 break; 707 case 4: 708 NopSize = 4; 709 Opc = X86::NOOPL; 710 Displacement = 8; 711 break; 712 case 5: 713 NopSize = 5; 714 Opc = X86::NOOPL; 715 Displacement = 8; 716 IndexReg = X86::RAX; 717 break; 718 case 6: 719 NopSize = 6; 720 Opc = X86::NOOPW; 721 Displacement = 8; 722 IndexReg = X86::RAX; 723 break; 724 case 7: 725 NopSize = 7; 726 Opc = X86::NOOPL; 727 Displacement = 512; 728 break; 729 case 8: 730 NopSize = 8; 731 Opc = X86::NOOPL; 732 Displacement = 512; 733 IndexReg = X86::RAX; 734 break; 735 case 9: 736 NopSize = 9; 737 Opc = X86::NOOPW; 738 Displacement = 512; 739 IndexReg = X86::RAX; 740 break; 741 default: 742 NopSize = 10; 743 Opc = X86::NOOPW; 744 Displacement = 512; 745 IndexReg = X86::RAX; 746 SegmentReg = X86::CS; 747 break; 748 } 749 750 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); 751 NopSize += NumPrefixes; 752 for (unsigned i = 0; i != NumPrefixes; ++i) 753 OS.emitBytes("\x66"); 754 755 switch (Opc) { 756 default: llvm_unreachable("Unexpected opcode"); 757 case X86::NOOP: 758 OS.emitInstruction(MCInstBuilder(Opc), *Subtarget); 759 break; 760 case X86::XCHG16ar: 761 OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), 762 *Subtarget); 763 break; 764 case X86::NOOPL: 765 case X86::NOOPW: 766 OS.emitInstruction(MCInstBuilder(Opc) 767 .addReg(BaseReg) 768 .addImm(ScaleVal) 769 .addReg(IndexReg) 770 .addImm(Displacement) 771 .addReg(SegmentReg), 772 *Subtarget); 773 break; 774 } 775 assert(NopSize <= NumBytes && "We overemitted?"); 776 return NopSize; 777 } 778 779 /// Emit the optimal amount of multi-byte nops on X86. 780 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes, 781 const X86Subtarget *Subtarget) { 782 unsigned NopsToEmit = NumBytes; 783 (void)NopsToEmit; 784 while (NumBytes) { 785 NumBytes -= emitNop(OS, NumBytes, Subtarget); 786 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!"); 787 } 788 } 789 790 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, 791 X86MCInstLower &MCIL) { 792 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64"); 793 794 NoAutoPaddingScope NoPadScope(*OutStreamer); 795 796 StatepointOpers SOpers(&MI); 797 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { 798 emitX86Nops(*OutStreamer, PatchBytes, Subtarget); 799 } else { 800 // Lower call target and choose correct opcode 801 const MachineOperand &CallTarget = SOpers.getCallTarget(); 802 MCOperand CallTargetMCOp; 803 unsigned CallOpcode; 804 switch (CallTarget.getType()) { 805 case MachineOperand::MO_GlobalAddress: 806 case MachineOperand::MO_ExternalSymbol: 807 CallTargetMCOp = MCIL.LowerSymbolOperand( 808 CallTarget, MCIL.GetSymbolFromOperand(CallTarget)); 809 CallOpcode = X86::CALL64pcrel32; 810 // Currently, we only support relative addressing with statepoints. 811 // Otherwise, we'll need a scratch register to hold the target 812 // address. You'll fail asserts during load & relocation if this 813 // symbol is to far away. (TODO: support non-relative addressing) 814 break; 815 case MachineOperand::MO_Immediate: 816 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); 817 CallOpcode = X86::CALL64pcrel32; 818 // Currently, we only support relative addressing with statepoints. 819 // Otherwise, we'll need a scratch register to hold the target 820 // immediate. You'll fail asserts during load & relocation if this 821 // address is to far away. (TODO: support non-relative addressing) 822 break; 823 case MachineOperand::MO_Register: 824 // FIXME: Add retpoline support and remove this. 825 if (Subtarget->useIndirectThunkCalls()) 826 report_fatal_error("Lowering register statepoints with thunks not " 827 "yet implemented."); 828 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); 829 CallOpcode = X86::CALL64r; 830 break; 831 default: 832 llvm_unreachable("Unsupported operand type in statepoint call target"); 833 break; 834 } 835 836 // Emit call 837 MCInst CallInst; 838 CallInst.setOpcode(CallOpcode); 839 CallInst.addOperand(CallTargetMCOp); 840 OutStreamer->emitInstruction(CallInst, getSubtargetInfo()); 841 } 842 843 // Record our statepoint node in the same section used by STACKMAP 844 // and PATCHPOINT 845 auto &Ctx = OutStreamer->getContext(); 846 MCSymbol *MILabel = Ctx.createTempSymbol(); 847 OutStreamer->emitLabel(MILabel); 848 SM.recordStatepoint(*MILabel, MI); 849 } 850 851 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI, 852 X86MCInstLower &MCIL) { 853 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>, 854 // <opcode>, <operands> 855 856 NoAutoPaddingScope NoPadScope(*OutStreamer); 857 858 Register DefRegister = FaultingMI.getOperand(0).getReg(); 859 FaultMaps::FaultKind FK = 860 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm()); 861 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol(); 862 unsigned Opcode = FaultingMI.getOperand(3).getImm(); 863 unsigned OperandsBeginIdx = 4; 864 865 auto &Ctx = OutStreamer->getContext(); 866 MCSymbol *FaultingLabel = Ctx.createTempSymbol(); 867 OutStreamer->emitLabel(FaultingLabel); 868 869 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!"); 870 FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel); 871 872 MCInst MI; 873 MI.setOpcode(Opcode); 874 875 if (DefRegister != X86::NoRegister) 876 MI.addOperand(MCOperand::createReg(DefRegister)); 877 878 for (const MachineOperand &MO : 879 llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx)) 880 if (auto Op = MCIL.LowerMachineOperand(&FaultingMI, MO); Op.isValid()) 881 MI.addOperand(Op); 882 883 OutStreamer->AddComment("on-fault: " + HandlerLabel->getName()); 884 OutStreamer->emitInstruction(MI, getSubtargetInfo()); 885 } 886 887 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI, 888 X86MCInstLower &MCIL) { 889 bool Is64Bits = Subtarget->is64Bit(); 890 MCContext &Ctx = OutStreamer->getContext(); 891 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__"); 892 const MCSymbolRefExpr *Op = 893 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx); 894 895 EmitAndCountInstruction( 896 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) 897 .addExpr(Op)); 898 } 899 900 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { 901 assert(std::next(MI.getIterator())->isCall() && 902 "KCFI_CHECK not followed by a call instruction"); 903 904 // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop() 905 // returns a 1-byte X86::NOOP, which means the offset is the same in 906 // bytes. This assumes that patchable-function-prefix is the same for all 907 // functions. 908 const MachineFunction &MF = *MI.getMF(); 909 int64_t PrefixNops = 0; 910 (void)MF.getFunction() 911 .getFnAttribute("patchable-function-prefix") 912 .getValueAsString() 913 .getAsInteger(10, PrefixNops); 914 915 // KCFI allows indirect calls to any location that's preceded by a valid 916 // type identifier. To avoid encoding the full constant into an instruction, 917 // and thus emitting potential call target gadgets at each indirect call 918 // site, load a negated constant to a register and compare that to the 919 // expected value at the call target. 920 const Register AddrReg = MI.getOperand(0).getReg(); 921 const uint32_t Type = MI.getOperand(1).getImm(); 922 // The check is immediately before the call. If the call target is in R10, 923 // we can clobber R11 for the check instead. 924 unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D; 925 EmitAndCountInstruction( 926 MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type))); 927 EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm) 928 .addReg(X86::NoRegister) 929 .addReg(TempReg) 930 .addReg(AddrReg) 931 .addImm(1) 932 .addReg(X86::NoRegister) 933 .addImm(-(PrefixNops + 4)) 934 .addReg(X86::NoRegister)); 935 936 MCSymbol *Pass = OutContext.createTempSymbol(); 937 EmitAndCountInstruction( 938 MCInstBuilder(X86::JCC_1) 939 .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) 940 .addImm(X86::COND_E)); 941 942 MCSymbol *Trap = OutContext.createTempSymbol(); 943 OutStreamer->emitLabel(Trap); 944 EmitAndCountInstruction(MCInstBuilder(X86::TRAP)); 945 emitKCFITrapEntry(MF, Trap); 946 OutStreamer->emitLabel(Pass); 947 } 948 949 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { 950 // FIXME: Make this work on non-ELF. 951 if (!TM.getTargetTriple().isOSBinFormatELF()) { 952 report_fatal_error("llvm.asan.check.memaccess only supported on ELF"); 953 return; 954 } 955 956 const auto &Reg = MI.getOperand(0).getReg(); 957 ASanAccessInfo AccessInfo(MI.getOperand(1).getImm()); 958 959 uint64_t ShadowBase; 960 int MappingScale; 961 bool OrShadowOffset; 962 getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64, 963 AccessInfo.CompileKernel, &ShadowBase, 964 &MappingScale, &OrShadowOffset); 965 966 StringRef Name = AccessInfo.IsWrite ? "store" : "load"; 967 StringRef Op = OrShadowOffset ? "or" : "add"; 968 std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" + 969 Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" + 970 TM.getMCRegisterInfo()->getName(Reg.asMCReg())) 971 .str(); 972 if (OrShadowOffset) 973 report_fatal_error( 974 "OrShadowOffset is not supported with optimized callbacks"); 975 976 EmitAndCountInstruction( 977 MCInstBuilder(X86::CALL64pcrel32) 978 .addExpr(MCSymbolRefExpr::create( 979 OutContext.getOrCreateSymbol(SymName), OutContext))); 980 } 981 982 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, 983 X86MCInstLower &MCIL) { 984 // PATCHABLE_OP minsize 985 986 NoAutoPaddingScope NoPadScope(*OutStreamer); 987 988 auto NextMI = std::find_if(std::next(MI.getIterator()), 989 MI.getParent()->end().getInstrIterator(), 990 [](auto &II) { return !II.isMetaInstruction(); }); 991 992 SmallString<256> Code; 993 unsigned MinSize = MI.getOperand(0).getImm(); 994 995 if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { 996 // Lower the next MachineInstr to find its byte size. 997 // If the next instruction is inline assembly, we skip lowering it for now, 998 // and assume we should always generate NOPs. 999 MCInst MCI; 1000 MCIL.Lower(&*NextMI, MCI); 1001 1002 SmallVector<MCFixup, 4> Fixups; 1003 CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); 1004 } 1005 1006 if (Code.size() < MinSize) { 1007 if (MinSize == 2 && Subtarget->is32Bit() && 1008 Subtarget->isTargetWindowsMSVC() && 1009 (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) { 1010 // For compatibility reasons, when targetting MSVC, it is important to 1011 // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools 1012 // rely specifically on this pattern to be able to patch a function. 1013 // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE. 1014 OutStreamer->emitInstruction( 1015 MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), 1016 *Subtarget); 1017 } else { 1018 unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); 1019 assert(NopSize == MinSize && "Could not implement MinSize!"); 1020 (void)NopSize; 1021 } 1022 } 1023 } 1024 1025 // Lower a stackmap of the form: 1026 // <id>, <shadowBytes>, ... 1027 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { 1028 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1029 1030 auto &Ctx = OutStreamer->getContext(); 1031 MCSymbol *MILabel = Ctx.createTempSymbol(); 1032 OutStreamer->emitLabel(MILabel); 1033 1034 SM.recordStackMap(*MILabel, MI); 1035 unsigned NumShadowBytes = MI.getOperand(1).getImm(); 1036 SMShadowTracker.reset(NumShadowBytes); 1037 } 1038 1039 // Lower a patchpoint of the form: 1040 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ... 1041 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, 1042 X86MCInstLower &MCIL) { 1043 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); 1044 1045 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 1046 1047 NoAutoPaddingScope NoPadScope(*OutStreamer); 1048 1049 auto &Ctx = OutStreamer->getContext(); 1050 MCSymbol *MILabel = Ctx.createTempSymbol(); 1051 OutStreamer->emitLabel(MILabel); 1052 SM.recordPatchPoint(*MILabel, MI); 1053 1054 PatchPointOpers opers(&MI); 1055 unsigned ScratchIdx = opers.getNextScratchIdx(); 1056 unsigned EncodedBytes = 0; 1057 const MachineOperand &CalleeMO = opers.getCallTarget(); 1058 1059 // Check for null target. If target is non-null (i.e. is non-zero or is 1060 // symbolic) then emit a call. 1061 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) { 1062 MCOperand CalleeMCOp; 1063 switch (CalleeMO.getType()) { 1064 default: 1065 /// FIXME: Add a verifier check for bad callee types. 1066 llvm_unreachable("Unrecognized callee operand type."); 1067 case MachineOperand::MO_Immediate: 1068 if (CalleeMO.getImm()) 1069 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm()); 1070 break; 1071 case MachineOperand::MO_ExternalSymbol: 1072 case MachineOperand::MO_GlobalAddress: 1073 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, 1074 MCIL.GetSymbolFromOperand(CalleeMO)); 1075 break; 1076 } 1077 1078 // Emit MOV to materialize the target address and the CALL to target. 1079 // This is encoded with 12-13 bytes, depending on which register is used. 1080 Register ScratchReg = MI.getOperand(ScratchIdx).getReg(); 1081 if (X86II::isX86_64ExtendedReg(ScratchReg)) 1082 EncodedBytes = 13; 1083 else 1084 EncodedBytes = 12; 1085 1086 EmitAndCountInstruction( 1087 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); 1088 // FIXME: Add retpoline support and remove this. 1089 if (Subtarget->useIndirectThunkCalls()) 1090 report_fatal_error( 1091 "Lowering patchpoint with thunks not yet implemented."); 1092 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); 1093 } 1094 1095 // Emit padding. 1096 unsigned NumBytes = opers.getNumPatchBytes(); 1097 assert(NumBytes >= EncodedBytes && 1098 "Patchpoint can't request size less than the length of a call."); 1099 1100 emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget); 1101 } 1102 1103 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, 1104 X86MCInstLower &MCIL) { 1105 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); 1106 1107 NoAutoPaddingScope NoPadScope(*OutStreamer); 1108 1109 // We want to emit the following pattern, which follows the x86 calling 1110 // convention to prepare for the trampoline call to be patched in. 1111 // 1112 // .p2align 1, ... 1113 // .Lxray_event_sled_N: 1114 // jmp +N // jump across the instrumentation sled 1115 // ... // set up arguments in register 1116 // callq __xray_CustomEvent@plt // force dependency to symbol 1117 // ... 1118 // <jump here> 1119 // 1120 // After patching, it would look something like: 1121 // 1122 // nopw (2-byte nop) 1123 // ... 1124 // callq __xrayCustomEvent // already lowered 1125 // ... 1126 // 1127 // --- 1128 // First we emit the label and the jump. 1129 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); 1130 OutStreamer->AddComment("# XRay Custom Event Log"); 1131 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1132 OutStreamer->emitLabel(CurSled); 1133 1134 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1135 // an operand (computed as an offset from the jmp instruction). 1136 // FIXME: Find another less hacky way do force the relative jump. 1137 OutStreamer->emitBinaryData("\xeb\x0f"); 1138 1139 // The default C calling convention will place two arguments into %rcx and 1140 // %rdx -- so we only work with those. 1141 const Register DestRegs[] = {X86::RDI, X86::RSI}; 1142 bool UsedMask[] = {false, false}; 1143 // Filled out in loop. 1144 Register SrcRegs[] = {0, 0}; 1145 1146 // Then we put the operands in the %rdi and %rsi registers. We spill the 1147 // values in the register before we clobber them, and mark them as used in 1148 // UsedMask. In case the arguments are already in the correct register, we use 1149 // emit nops appropriately sized to keep the sled the same size in every 1150 // situation. 1151 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1152 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I)); 1153 Op.isValid()) { 1154 assert(Op.isReg() && "Only support arguments in registers"); 1155 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64); 1156 assert(SrcRegs[I].isValid() && "Invalid operand"); 1157 if (SrcRegs[I] != DestRegs[I]) { 1158 UsedMask[I] = true; 1159 EmitAndCountInstruction( 1160 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1161 } else { 1162 emitX86Nops(*OutStreamer, 4, Subtarget); 1163 } 1164 } 1165 1166 // Now that the register values are stashed, mov arguments into place. 1167 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1168 // earlier DestReg. We will have already overwritten over the register before 1169 // we can copy from it. 1170 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1171 if (SrcRegs[I] != DestRegs[I]) 1172 EmitAndCountInstruction( 1173 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1174 1175 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the 1176 // name of the trampoline to be implemented by the XRay runtime. 1177 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); 1178 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1179 if (isPositionIndependent()) 1180 TOp.setTargetFlags(X86II::MO_PLT); 1181 1182 // Emit the call instruction. 1183 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1184 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1185 1186 // Restore caller-saved and used registers. 1187 for (unsigned I = sizeof UsedMask; I-- > 0;) 1188 if (UsedMask[I]) 1189 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1190 else 1191 emitX86Nops(*OutStreamer, 1, Subtarget); 1192 1193 OutStreamer->AddComment("xray custom event end."); 1194 1195 // Record the sled version. Version 0 of this sled was spelled differently, so 1196 // we let the runtime handle the different offsets we're using. Version 2 1197 // changed the absolute address to a PC-relative address. 1198 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2); 1199 } 1200 1201 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, 1202 X86MCInstLower &MCIL) { 1203 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); 1204 1205 NoAutoPaddingScope NoPadScope(*OutStreamer); 1206 1207 // We want to emit the following pattern, which follows the x86 calling 1208 // convention to prepare for the trampoline call to be patched in. 1209 // 1210 // .p2align 1, ... 1211 // .Lxray_event_sled_N: 1212 // jmp +N // jump across the instrumentation sled 1213 // ... // set up arguments in register 1214 // callq __xray_TypedEvent@plt // force dependency to symbol 1215 // ... 1216 // <jump here> 1217 // 1218 // After patching, it would look something like: 1219 // 1220 // nopw (2-byte nop) 1221 // ... 1222 // callq __xrayTypedEvent // already lowered 1223 // ... 1224 // 1225 // --- 1226 // First we emit the label and the jump. 1227 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); 1228 OutStreamer->AddComment("# XRay Typed Event Log"); 1229 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1230 OutStreamer->emitLabel(CurSled); 1231 1232 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1233 // an operand (computed as an offset from the jmp instruction). 1234 // FIXME: Find another less hacky way do force the relative jump. 1235 OutStreamer->emitBinaryData("\xeb\x14"); 1236 1237 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, 1238 // so we'll work with those. Or we may be called via SystemV, in which case 1239 // we don't have to do any translation. 1240 const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; 1241 bool UsedMask[] = {false, false, false}; 1242 1243 // Will fill out src regs in the loop. 1244 Register SrcRegs[] = {0, 0, 0}; 1245 1246 // Then we put the operands in the SystemV registers. We spill the values in 1247 // the registers before we clobber them, and mark them as used in UsedMask. 1248 // In case the arguments are already in the correct register, we emit nops 1249 // appropriately sized to keep the sled the same size in every situation. 1250 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1251 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I)); 1252 Op.isValid()) { 1253 // TODO: Is register only support adequate? 1254 assert(Op.isReg() && "Only supports arguments in registers"); 1255 SrcRegs[I] = getX86SubSuperRegister(Op.getReg(), 64); 1256 assert(SrcRegs[I].isValid() && "Invalid operand"); 1257 if (SrcRegs[I] != DestRegs[I]) { 1258 UsedMask[I] = true; 1259 EmitAndCountInstruction( 1260 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); 1261 } else { 1262 emitX86Nops(*OutStreamer, 4, Subtarget); 1263 } 1264 } 1265 1266 // In the above loop we only stash all of the destination registers or emit 1267 // nops if the arguments are already in the right place. Doing the actually 1268 // moving is postponed until after all the registers are stashed so nothing 1269 // is clobbers. We've already added nops to account for the size of mov and 1270 // push if the register is in the right place, so we only have to worry about 1271 // emitting movs. 1272 // FIXME: This doesn't work if one of the later SrcRegs is equal to an 1273 // earlier DestReg. We will have already overwritten over the register before 1274 // we can copy from it. 1275 for (unsigned I = 0; I < MI.getNumOperands(); ++I) 1276 if (UsedMask[I]) 1277 EmitAndCountInstruction( 1278 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); 1279 1280 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the 1281 // name of the trampoline to be implemented by the XRay runtime. 1282 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); 1283 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); 1284 if (isPositionIndependent()) 1285 TOp.setTargetFlags(X86II::MO_PLT); 1286 1287 // Emit the call instruction. 1288 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) 1289 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); 1290 1291 // Restore caller-saved and used registers. 1292 for (unsigned I = sizeof UsedMask; I-- > 0;) 1293 if (UsedMask[I]) 1294 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); 1295 else 1296 emitX86Nops(*OutStreamer, 1, Subtarget); 1297 1298 OutStreamer->AddComment("xray typed event end."); 1299 1300 // Record the sled version. 1301 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2); 1302 } 1303 1304 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, 1305 X86MCInstLower &MCIL) { 1306 1307 NoAutoPaddingScope NoPadScope(*OutStreamer); 1308 1309 const Function &F = MF->getFunction(); 1310 if (F.hasFnAttribute("patchable-function-entry")) { 1311 unsigned Num; 1312 if (F.getFnAttribute("patchable-function-entry") 1313 .getValueAsString() 1314 .getAsInteger(10, Num)) 1315 return; 1316 emitX86Nops(*OutStreamer, Num, Subtarget); 1317 return; 1318 } 1319 // We want to emit the following pattern: 1320 // 1321 // .p2align 1, ... 1322 // .Lxray_sled_N: 1323 // jmp .tmpN 1324 // # 9 bytes worth of noops 1325 // 1326 // We need the 9 bytes because at runtime, we'd be patching over the full 11 1327 // bytes with the following pattern: 1328 // 1329 // mov %r10, <function id, 32-bit> // 6 bytes 1330 // call <relative offset, 32-bits> // 5 bytes 1331 // 1332 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1333 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1334 OutStreamer->emitLabel(CurSled); 1335 1336 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1337 // an operand (computed as an offset from the jmp instruction). 1338 // FIXME: Find another less hacky way do force the relative jump. 1339 OutStreamer->emitBytes("\xeb\x09"); 1340 emitX86Nops(*OutStreamer, 9, Subtarget); 1341 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2); 1342 } 1343 1344 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, 1345 X86MCInstLower &MCIL) { 1346 NoAutoPaddingScope NoPadScope(*OutStreamer); 1347 1348 // Since PATCHABLE_RET takes the opcode of the return statement as an 1349 // argument, we use that to emit the correct form of the RET that we want. 1350 // i.e. when we see this: 1351 // 1352 // PATCHABLE_RET X86::RET ... 1353 // 1354 // We should emit the RET followed by sleds. 1355 // 1356 // .p2align 1, ... 1357 // .Lxray_sled_N: 1358 // ret # or equivalent instruction 1359 // # 10 bytes worth of noops 1360 // 1361 // This just makes sure that the alignment for the next instruction is 2. 1362 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1363 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1364 OutStreamer->emitLabel(CurSled); 1365 unsigned OpCode = MI.getOperand(0).getImm(); 1366 MCInst Ret; 1367 Ret.setOpcode(OpCode); 1368 for (auto &MO : drop_begin(MI.operands())) 1369 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid()) 1370 Ret.addOperand(Op); 1371 OutStreamer->emitInstruction(Ret, getSubtargetInfo()); 1372 emitX86Nops(*OutStreamer, 10, Subtarget); 1373 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2); 1374 } 1375 1376 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, 1377 X86MCInstLower &MCIL) { 1378 MCInst TC; 1379 TC.setOpcode(convertTailJumpOpcode(MI.getOperand(0).getImm())); 1380 // Drop the tail jump opcode. 1381 auto TCOperands = drop_begin(MI.operands()); 1382 bool IsConditional = TC.getOpcode() == X86::JCC_1; 1383 MCSymbol *FallthroughLabel; 1384 if (IsConditional) { 1385 // Rewrite: 1386 // je target 1387 // 1388 // To: 1389 // jne .fallthrough 1390 // .p2align 1, ... 1391 // .Lxray_sled_N: 1392 // SLED_CODE 1393 // jmp target 1394 // .fallthrough: 1395 FallthroughLabel = OutContext.createTempSymbol(); 1396 EmitToStreamer( 1397 *OutStreamer, 1398 MCInstBuilder(X86::JCC_1) 1399 .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext)) 1400 .addImm(X86::GetOppositeBranchCondition( 1401 static_cast<X86::CondCode>(MI.getOperand(2).getImm())))); 1402 TC.setOpcode(X86::JMP_1); 1403 // Drop the condition code. 1404 TCOperands = drop_end(TCOperands); 1405 } 1406 1407 NoAutoPaddingScope NoPadScope(*OutStreamer); 1408 1409 // Like PATCHABLE_RET, we have the actual instruction in the operands to this 1410 // instruction so we lower that particular instruction and its operands. 1411 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how 1412 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to 1413 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual 1414 // tail call much like how we have it in PATCHABLE_RET. 1415 auto CurSled = OutContext.createTempSymbol("xray_sled_", true); 1416 OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); 1417 OutStreamer->emitLabel(CurSled); 1418 auto Target = OutContext.createTempSymbol(); 1419 1420 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as 1421 // an operand (computed as an offset from the jmp instruction). 1422 // FIXME: Find another less hacky way do force the relative jump. 1423 OutStreamer->emitBytes("\xeb\x09"); 1424 emitX86Nops(*OutStreamer, 9, Subtarget); 1425 OutStreamer->emitLabel(Target); 1426 recordSled(CurSled, MI, SledKind::TAIL_CALL, 2); 1427 1428 // Before emitting the instruction, add a comment to indicate that this is 1429 // indeed a tail call. 1430 OutStreamer->AddComment("TAILCALL"); 1431 for (auto &MO : TCOperands) 1432 if (auto Op = MCIL.LowerMachineOperand(&MI, MO); Op.isValid()) 1433 TC.addOperand(Op); 1434 OutStreamer->emitInstruction(TC, getSubtargetInfo()); 1435 1436 if (IsConditional) 1437 OutStreamer->emitLabel(FallthroughLabel); 1438 } 1439 1440 // Returns instruction preceding MBBI in MachineFunction. 1441 // If MBBI is the first instruction of the first basic block, returns null. 1442 static MachineBasicBlock::const_iterator 1443 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) { 1444 const MachineBasicBlock *MBB = MBBI->getParent(); 1445 while (MBBI == MBB->begin()) { 1446 if (MBB == &MBB->getParent()->front()) 1447 return MachineBasicBlock::const_iterator(); 1448 MBB = MBB->getPrevNode(); 1449 MBBI = MBB->end(); 1450 } 1451 --MBBI; 1452 return MBBI; 1453 } 1454 1455 static unsigned getSrcIdx(const MachineInstr* MI, unsigned SrcIdx) { 1456 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1457 // Skip mask operand. 1458 ++SrcIdx; 1459 if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1460 // Skip passthru operand. 1461 ++SrcIdx; 1462 } 1463 } 1464 return SrcIdx; 1465 } 1466 1467 static void printDstRegisterName(raw_ostream &CS, const MachineInstr *MI, 1468 unsigned SrcOpIdx) { 1469 const MachineOperand &DstOp = MI->getOperand(0); 1470 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()); 1471 1472 // Handle AVX512 MASK/MASXZ write mask comments. 1473 // MASK: zmmX {%kY} 1474 // MASKZ: zmmX {%kY} {z} 1475 if (X86II::isKMasked(MI->getDesc().TSFlags)) { 1476 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOpIdx - 1); 1477 StringRef Mask = X86ATTInstPrinter::getRegisterName(WriteMaskOp.getReg()); 1478 CS << " {%" << Mask << "}"; 1479 if (!X86II::isKMergeMasked(MI->getDesc().TSFlags)) { 1480 CS << " {z}"; 1481 } 1482 } 1483 } 1484 1485 static void printShuffleMask(raw_ostream &CS, StringRef Src1Name, 1486 StringRef Src2Name, ArrayRef<int> Mask) { 1487 // One source operand, fix the mask to print all elements in one span. 1488 SmallVector<int, 8> ShuffleMask(Mask); 1489 if (Src1Name == Src2Name) 1490 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) 1491 if (ShuffleMask[i] >= e) 1492 ShuffleMask[i] -= e; 1493 1494 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) { 1495 if (i != 0) 1496 CS << ","; 1497 if (ShuffleMask[i] == SM_SentinelZero) { 1498 CS << "zero"; 1499 continue; 1500 } 1501 1502 // Otherwise, it must come from src1 or src2. Print the span of elements 1503 // that comes from this src. 1504 bool isSrc1 = ShuffleMask[i] < (int)e; 1505 CS << (isSrc1 ? Src1Name : Src2Name) << '['; 1506 1507 bool IsFirst = true; 1508 while (i != e && ShuffleMask[i] != SM_SentinelZero && 1509 (ShuffleMask[i] < (int)e) == isSrc1) { 1510 if (!IsFirst) 1511 CS << ','; 1512 else 1513 IsFirst = false; 1514 if (ShuffleMask[i] == SM_SentinelUndef) 1515 CS << "u"; 1516 else 1517 CS << ShuffleMask[i] % (int)e; 1518 ++i; 1519 } 1520 CS << ']'; 1521 --i; // For loop increments element #. 1522 } 1523 } 1524 1525 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, 1526 unsigned SrcOp2Idx, ArrayRef<int> Mask) { 1527 std::string Comment; 1528 1529 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx); 1530 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx); 1531 StringRef Src1Name = SrcOp1.isReg() 1532 ? X86ATTInstPrinter::getRegisterName(SrcOp1.getReg()) 1533 : "mem"; 1534 StringRef Src2Name = SrcOp2.isReg() 1535 ? X86ATTInstPrinter::getRegisterName(SrcOp2.getReg()) 1536 : "mem"; 1537 1538 raw_string_ostream CS(Comment); 1539 printDstRegisterName(CS, MI, SrcOp1Idx); 1540 CS << " = "; 1541 printShuffleMask(CS, Src1Name, Src2Name, Mask); 1542 1543 return Comment; 1544 } 1545 1546 static void printConstant(const APInt &Val, raw_ostream &CS, 1547 bool PrintZero = false) { 1548 if (Val.getBitWidth() <= 64) { 1549 CS << (PrintZero ? 0ULL : Val.getZExtValue()); 1550 } else { 1551 // print multi-word constant as (w0,w1) 1552 CS << "("; 1553 for (int i = 0, N = Val.getNumWords(); i < N; ++i) { 1554 if (i > 0) 1555 CS << ","; 1556 CS << (PrintZero ? 0ULL : Val.getRawData()[i]); 1557 } 1558 CS << ")"; 1559 } 1560 } 1561 1562 static void printConstant(const APFloat &Flt, raw_ostream &CS, 1563 bool PrintZero = false) { 1564 SmallString<32> Str; 1565 // Force scientific notation to distinguish from integers. 1566 if (PrintZero) 1567 APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0); 1568 else 1569 Flt.toString(Str, 0, 0); 1570 CS << Str; 1571 } 1572 1573 static void printConstant(const Constant *COp, unsigned BitWidth, 1574 raw_ostream &CS, bool PrintZero = false) { 1575 if (isa<UndefValue>(COp)) { 1576 CS << "u"; 1577 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { 1578 printConstant(CI->getValue(), CS, PrintZero); 1579 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { 1580 printConstant(CF->getValueAPF(), CS, PrintZero); 1581 } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) { 1582 Type *EltTy = CDS->getElementType(); 1583 bool IsInteger = EltTy->isIntegerTy(); 1584 bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); 1585 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); 1586 unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements()); 1587 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1588 for (unsigned I = 0; I != E; ++I) { 1589 if (I != 0) 1590 CS << ","; 1591 if (IsInteger) 1592 printConstant(CDS->getElementAsAPInt(I), CS, PrintZero); 1593 else if (IsFP) 1594 printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero); 1595 else 1596 CS << "?"; 1597 } 1598 } else if (auto *CV = dyn_cast<ConstantVector>(COp)) { 1599 unsigned EltBits = CV->getType()->getScalarSizeInBits(); 1600 unsigned E = std::min(BitWidth / EltBits, CV->getNumOperands()); 1601 assert((BitWidth % EltBits) == 0 && "Element size mismatch"); 1602 for (unsigned I = 0; I != E; ++I) { 1603 if (I != 0) 1604 CS << ","; 1605 printConstant(CV->getOperand(I), EltBits, CS, PrintZero); 1606 } 1607 } else { 1608 CS << "?"; 1609 } 1610 } 1611 1612 static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer, 1613 int SclWidth, int VecWidth, 1614 const char *ShuffleComment) { 1615 unsigned SrcIdx = getSrcIdx(MI, 1); 1616 1617 std::string Comment; 1618 raw_string_ostream CS(Comment); 1619 printDstRegisterName(CS, MI, SrcIdx); 1620 CS << " = "; 1621 1622 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) { 1623 CS << "["; 1624 printConstant(C, SclWidth, CS); 1625 for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) { 1626 CS << ","; 1627 printConstant(C, SclWidth, CS, true); 1628 } 1629 CS << "]"; 1630 OutStreamer.AddComment(CS.str()); 1631 return; // early-out 1632 } 1633 1634 // We didn't find a constant load, fallback to a shuffle mask decode. 1635 CS << ShuffleComment; 1636 OutStreamer.AddComment(CS.str()); 1637 } 1638 1639 static void printBroadcast(const MachineInstr *MI, MCStreamer &OutStreamer, 1640 int Repeats, int BitWidth) { 1641 unsigned SrcIdx = getSrcIdx(MI, 1); 1642 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx)) { 1643 std::string Comment; 1644 raw_string_ostream CS(Comment); 1645 printDstRegisterName(CS, MI, SrcIdx); 1646 CS << " = ["; 1647 for (int l = 0; l != Repeats; ++l) { 1648 if (l != 0) 1649 CS << ","; 1650 printConstant(C, BitWidth, CS); 1651 } 1652 CS << "]"; 1653 OutStreamer.AddComment(CS.str()); 1654 } 1655 } 1656 1657 static bool printExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1658 int SrcEltBits, int DstEltBits, bool IsSext) { 1659 unsigned SrcIdx = getSrcIdx(MI, 1); 1660 auto *C = X86::getConstantFromPool(*MI, SrcIdx); 1661 if (C && C->getType()->getScalarSizeInBits() == unsigned(SrcEltBits)) { 1662 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { 1663 int NumElts = CDS->getNumElements(); 1664 std::string Comment; 1665 raw_string_ostream CS(Comment); 1666 printDstRegisterName(CS, MI, SrcIdx); 1667 CS << " = ["; 1668 for (int i = 0; i != NumElts; ++i) { 1669 if (i != 0) 1670 CS << ","; 1671 if (CDS->getElementType()->isIntegerTy()) { 1672 APInt Elt = CDS->getElementAsAPInt(i); 1673 Elt = IsSext ? Elt.sext(DstEltBits) : Elt.zext(DstEltBits); 1674 printConstant(Elt, CS); 1675 } else 1676 CS << "?"; 1677 } 1678 CS << "]"; 1679 OutStreamer.AddComment(CS.str()); 1680 return true; 1681 } 1682 } 1683 1684 return false; 1685 } 1686 static void printSignExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1687 int SrcEltBits, int DstEltBits) { 1688 printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, true); 1689 } 1690 static void printZeroExtend(const MachineInstr *MI, MCStreamer &OutStreamer, 1691 int SrcEltBits, int DstEltBits) { 1692 if (printExtend(MI, OutStreamer, SrcEltBits, DstEltBits, false)) 1693 return; 1694 1695 // We didn't find a constant load, fallback to a shuffle mask decode. 1696 std::string Comment; 1697 raw_string_ostream CS(Comment); 1698 printDstRegisterName(CS, MI, getSrcIdx(MI, 1)); 1699 CS << " = "; 1700 1701 SmallVector<int> Mask; 1702 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1703 assert((Width % DstEltBits) == 0 && (DstEltBits % SrcEltBits) == 0 && 1704 "Illegal extension ratio"); 1705 DecodeZeroExtendMask(SrcEltBits, DstEltBits, Width / DstEltBits, false, Mask); 1706 printShuffleMask(CS, "mem", "", Mask); 1707 1708 OutStreamer.AddComment(CS.str()); 1709 } 1710 1711 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { 1712 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 1713 assert(getSubtarget().isOSWindowsOrUEFI() && 1714 "SEH_ instruction Windows and UEFI only"); 1715 1716 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. 1717 if (EmitFPOData) { 1718 X86TargetStreamer *XTS = 1719 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer()); 1720 switch (MI->getOpcode()) { 1721 case X86::SEH_PushReg: 1722 XTS->emitFPOPushReg(MI->getOperand(0).getImm()); 1723 break; 1724 case X86::SEH_StackAlloc: 1725 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); 1726 break; 1727 case X86::SEH_StackAlign: 1728 XTS->emitFPOStackAlign(MI->getOperand(0).getImm()); 1729 break; 1730 case X86::SEH_SetFrame: 1731 assert(MI->getOperand(1).getImm() == 0 && 1732 ".cv_fpo_setframe takes no offset"); 1733 XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); 1734 break; 1735 case X86::SEH_EndPrologue: 1736 XTS->emitFPOEndPrologue(); 1737 break; 1738 case X86::SEH_SaveReg: 1739 case X86::SEH_SaveXMM: 1740 case X86::SEH_PushFrame: 1741 llvm_unreachable("SEH_ directive incompatible with FPO"); 1742 break; 1743 default: 1744 llvm_unreachable("expected SEH_ instruction"); 1745 } 1746 return; 1747 } 1748 1749 // Otherwise, use the .seh_ directives for all other Windows platforms. 1750 switch (MI->getOpcode()) { 1751 case X86::SEH_PushReg: 1752 OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm()); 1753 break; 1754 1755 case X86::SEH_SaveReg: 1756 OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(), 1757 MI->getOperand(1).getImm()); 1758 break; 1759 1760 case X86::SEH_SaveXMM: 1761 OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(), 1762 MI->getOperand(1).getImm()); 1763 break; 1764 1765 case X86::SEH_StackAlloc: 1766 OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm()); 1767 break; 1768 1769 case X86::SEH_SetFrame: 1770 OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(), 1771 MI->getOperand(1).getImm()); 1772 break; 1773 1774 case X86::SEH_PushFrame: 1775 OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm()); 1776 break; 1777 1778 case X86::SEH_EndPrologue: 1779 OutStreamer->emitWinCFIEndProlog(); 1780 break; 1781 1782 default: 1783 llvm_unreachable("expected SEH_ instruction"); 1784 } 1785 } 1786 1787 static void addConstantComments(const MachineInstr *MI, 1788 MCStreamer &OutStreamer) { 1789 switch (MI->getOpcode()) { 1790 // Lower PSHUFB and VPERMILP normally but add a comment if we can find 1791 // a constant shuffle mask. We won't be able to do this at the MC layer 1792 // because the mask isn't an immediate. 1793 case X86::PSHUFBrm: 1794 case X86::VPSHUFBrm: 1795 case X86::VPSHUFBYrm: 1796 case X86::VPSHUFBZ128rm: 1797 case X86::VPSHUFBZ128rmk: 1798 case X86::VPSHUFBZ128rmkz: 1799 case X86::VPSHUFBZ256rm: 1800 case X86::VPSHUFBZ256rmk: 1801 case X86::VPSHUFBZ256rmkz: 1802 case X86::VPSHUFBZrm: 1803 case X86::VPSHUFBZrmk: 1804 case X86::VPSHUFBZrmkz: { 1805 unsigned SrcIdx = getSrcIdx(MI, 1); 1806 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1807 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1808 SmallVector<int, 64> Mask; 1809 DecodePSHUFBMask(C, Width, Mask); 1810 if (!Mask.empty()) 1811 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1812 } 1813 break; 1814 } 1815 1816 case X86::VPERMILPSrm: 1817 case X86::VPERMILPSYrm: 1818 case X86::VPERMILPSZ128rm: 1819 case X86::VPERMILPSZ128rmk: 1820 case X86::VPERMILPSZ128rmkz: 1821 case X86::VPERMILPSZ256rm: 1822 case X86::VPERMILPSZ256rmk: 1823 case X86::VPERMILPSZ256rmkz: 1824 case X86::VPERMILPSZrm: 1825 case X86::VPERMILPSZrmk: 1826 case X86::VPERMILPSZrmkz: { 1827 unsigned SrcIdx = getSrcIdx(MI, 1); 1828 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1829 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1830 SmallVector<int, 16> Mask; 1831 DecodeVPERMILPMask(C, 32, Width, Mask); 1832 if (!Mask.empty()) 1833 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1834 } 1835 break; 1836 } 1837 case X86::VPERMILPDrm: 1838 case X86::VPERMILPDYrm: 1839 case X86::VPERMILPDZ128rm: 1840 case X86::VPERMILPDZ128rmk: 1841 case X86::VPERMILPDZ128rmkz: 1842 case X86::VPERMILPDZ256rm: 1843 case X86::VPERMILPDZ256rmk: 1844 case X86::VPERMILPDZ256rmkz: 1845 case X86::VPERMILPDZrm: 1846 case X86::VPERMILPDZrmk: 1847 case X86::VPERMILPDZrmkz: { 1848 unsigned SrcIdx = getSrcIdx(MI, 1); 1849 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1850 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1851 SmallVector<int, 16> Mask; 1852 DecodeVPERMILPMask(C, 64, Width, Mask); 1853 if (!Mask.empty()) 1854 OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); 1855 } 1856 break; 1857 } 1858 1859 case X86::VPERMIL2PDrm: 1860 case X86::VPERMIL2PSrm: 1861 case X86::VPERMIL2PDYrm: 1862 case X86::VPERMIL2PSYrm: { 1863 assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) && 1864 "Unexpected number of operands!"); 1865 1866 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1); 1867 if (!CtrlOp.isImm()) 1868 break; 1869 1870 unsigned ElSize; 1871 switch (MI->getOpcode()) { 1872 default: llvm_unreachable("Invalid opcode"); 1873 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; 1874 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; 1875 } 1876 1877 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1878 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1879 SmallVector<int, 16> Mask; 1880 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask); 1881 if (!Mask.empty()) 1882 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1883 } 1884 break; 1885 } 1886 1887 case X86::VPPERMrrm: { 1888 if (auto *C = X86::getConstantFromPool(*MI, 3)) { 1889 unsigned Width = X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1890 SmallVector<int, 16> Mask; 1891 DecodeVPPERMMask(C, Width, Mask); 1892 if (!Mask.empty()) 1893 OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask)); 1894 } 1895 break; 1896 } 1897 1898 case X86::MMX_MOVQ64rm: { 1899 if (auto *C = X86::getConstantFromPool(*MI, 1)) { 1900 std::string Comment; 1901 raw_string_ostream CS(Comment); 1902 const MachineOperand &DstOp = MI->getOperand(0); 1903 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; 1904 if (auto *CF = dyn_cast<ConstantFP>(C)) { 1905 CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false); 1906 OutStreamer.AddComment(CS.str()); 1907 } 1908 } 1909 break; 1910 } 1911 1912 #define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ 1913 case X86::Prefix##Instr##Suffix##rm##Postfix: 1914 1915 #define CASE_ARITH_RM(Instr) \ 1916 INSTR_CASE(, Instr, , ) /* SSE */ \ 1917 INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ 1918 INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \ 1919 INSTR_CASE(V, Instr, Z128, ) \ 1920 INSTR_CASE(V, Instr, Z128, k) \ 1921 INSTR_CASE(V, Instr, Z128, kz) \ 1922 INSTR_CASE(V, Instr, Z256, ) \ 1923 INSTR_CASE(V, Instr, Z256, k) \ 1924 INSTR_CASE(V, Instr, Z256, kz) \ 1925 INSTR_CASE(V, Instr, Z, ) \ 1926 INSTR_CASE(V, Instr, Z, k) \ 1927 INSTR_CASE(V, Instr, Z, kz) 1928 1929 // TODO: Add additional instructions when useful. 1930 CASE_ARITH_RM(PMADDUBSW) { 1931 unsigned SrcIdx = getSrcIdx(MI, 1); 1932 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1933 if (C->getType()->getScalarSizeInBits() == 8) { 1934 std::string Comment; 1935 raw_string_ostream CS(Comment); 1936 unsigned VectorWidth = 1937 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1938 CS << "["; 1939 printConstant(C, VectorWidth, CS); 1940 CS << "]"; 1941 OutStreamer.AddComment(CS.str()); 1942 } 1943 } 1944 break; 1945 } 1946 1947 CASE_ARITH_RM(PMADDWD) 1948 CASE_ARITH_RM(PMULLW) 1949 CASE_ARITH_RM(PMULHW) 1950 CASE_ARITH_RM(PMULHUW) 1951 CASE_ARITH_RM(PMULHRSW) { 1952 unsigned SrcIdx = getSrcIdx(MI, 1); 1953 if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { 1954 if (C->getType()->getScalarSizeInBits() == 16) { 1955 std::string Comment; 1956 raw_string_ostream CS(Comment); 1957 unsigned VectorWidth = 1958 X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); 1959 CS << "["; 1960 printConstant(C, VectorWidth, CS); 1961 CS << "]"; 1962 OutStreamer.AddComment(CS.str()); 1963 } 1964 } 1965 break; 1966 } 1967 1968 #define MASK_AVX512_CASE(Instr) \ 1969 case Instr: \ 1970 case Instr##k: \ 1971 case Instr##kz: 1972 1973 case X86::MOVSDrm: 1974 case X86::VMOVSDrm: 1975 MASK_AVX512_CASE(X86::VMOVSDZrm) 1976 case X86::MOVSDrm_alt: 1977 case X86::VMOVSDrm_alt: 1978 case X86::VMOVSDZrm_alt: 1979 case X86::MOVQI2PQIrm: 1980 case X86::VMOVQI2PQIrm: 1981 case X86::VMOVQI2PQIZrm: 1982 printZeroUpperMove(MI, OutStreamer, 64, 128, "mem[0],zero"); 1983 break; 1984 1985 MASK_AVX512_CASE(X86::VMOVSHZrm) 1986 case X86::VMOVSHZrm_alt: 1987 printZeroUpperMove(MI, OutStreamer, 16, 128, 1988 "mem[0],zero,zero,zero,zero,zero,zero,zero"); 1989 break; 1990 1991 case X86::MOVSSrm: 1992 case X86::VMOVSSrm: 1993 MASK_AVX512_CASE(X86::VMOVSSZrm) 1994 case X86::MOVSSrm_alt: 1995 case X86::VMOVSSrm_alt: 1996 case X86::VMOVSSZrm_alt: 1997 case X86::MOVDI2PDIrm: 1998 case X86::VMOVDI2PDIrm: 1999 case X86::VMOVDI2PDIZrm: 2000 printZeroUpperMove(MI, OutStreamer, 32, 128, "mem[0],zero,zero,zero"); 2001 break; 2002 2003 #define MOV_CASE(Prefix, Suffix) \ 2004 case X86::Prefix##MOVAPD##Suffix##rm: \ 2005 case X86::Prefix##MOVAPS##Suffix##rm: \ 2006 case X86::Prefix##MOVUPD##Suffix##rm: \ 2007 case X86::Prefix##MOVUPS##Suffix##rm: \ 2008 case X86::Prefix##MOVDQA##Suffix##rm: \ 2009 case X86::Prefix##MOVDQU##Suffix##rm: 2010 2011 #define MOV_AVX512_CASE(Suffix, Postfix) \ 2012 case X86::VMOVDQA64##Suffix##rm##Postfix: \ 2013 case X86::VMOVDQA32##Suffix##rm##Postfix: \ 2014 case X86::VMOVDQU64##Suffix##rm##Postfix: \ 2015 case X86::VMOVDQU32##Suffix##rm##Postfix: \ 2016 case X86::VMOVDQU16##Suffix##rm##Postfix: \ 2017 case X86::VMOVDQU8##Suffix##rm##Postfix: \ 2018 case X86::VMOVAPS##Suffix##rm##Postfix: \ 2019 case X86::VMOVAPD##Suffix##rm##Postfix: \ 2020 case X86::VMOVUPS##Suffix##rm##Postfix: \ 2021 case X86::VMOVUPD##Suffix##rm##Postfix: 2022 2023 #define CASE_128_MOV_RM() \ 2024 MOV_CASE(, ) /* SSE */ \ 2025 MOV_CASE(V, ) /* AVX-128 */ \ 2026 MOV_AVX512_CASE(Z128, ) \ 2027 MOV_AVX512_CASE(Z128, k) \ 2028 MOV_AVX512_CASE(Z128, kz) 2029 2030 #define CASE_256_MOV_RM() \ 2031 MOV_CASE(V, Y) /* AVX-256 */ \ 2032 MOV_AVX512_CASE(Z256, ) \ 2033 MOV_AVX512_CASE(Z256, k) \ 2034 MOV_AVX512_CASE(Z256, kz) \ 2035 2036 #define CASE_512_MOV_RM() \ 2037 MOV_AVX512_CASE(Z, ) \ 2038 MOV_AVX512_CASE(Z, k) \ 2039 MOV_AVX512_CASE(Z, kz) \ 2040 2041 // For loads from a constant pool to a vector register, print the constant 2042 // loaded. 2043 CASE_128_MOV_RM() 2044 printBroadcast(MI, OutStreamer, 1, 128); 2045 break; 2046 CASE_256_MOV_RM() 2047 printBroadcast(MI, OutStreamer, 1, 256); 2048 break; 2049 CASE_512_MOV_RM() 2050 printBroadcast(MI, OutStreamer, 1, 512); 2051 break; 2052 case X86::VBROADCASTF128rm: 2053 case X86::VBROADCASTI128rm: 2054 MASK_AVX512_CASE(X86::VBROADCASTF32X4Z256rm) 2055 MASK_AVX512_CASE(X86::VBROADCASTF64X2Z256rm) 2056 MASK_AVX512_CASE(X86::VBROADCASTI32X4Z256rm) 2057 MASK_AVX512_CASE(X86::VBROADCASTI64X2Z256rm) 2058 printBroadcast(MI, OutStreamer, 2, 128); 2059 break; 2060 MASK_AVX512_CASE(X86::VBROADCASTF32X4Zrm) 2061 MASK_AVX512_CASE(X86::VBROADCASTF64X2Zrm) 2062 MASK_AVX512_CASE(X86::VBROADCASTI32X4Zrm) 2063 MASK_AVX512_CASE(X86::VBROADCASTI64X2Zrm) 2064 printBroadcast(MI, OutStreamer, 4, 128); 2065 break; 2066 MASK_AVX512_CASE(X86::VBROADCASTF32X8Zrm) 2067 MASK_AVX512_CASE(X86::VBROADCASTF64X4Zrm) 2068 MASK_AVX512_CASE(X86::VBROADCASTI32X8Zrm) 2069 MASK_AVX512_CASE(X86::VBROADCASTI64X4Zrm) 2070 printBroadcast(MI, OutStreamer, 2, 256); 2071 break; 2072 2073 // For broadcast loads from a constant pool to a vector register, repeatedly 2074 // print the constant loaded. 2075 case X86::MOVDDUPrm: 2076 case X86::VMOVDDUPrm: 2077 MASK_AVX512_CASE(X86::VMOVDDUPZ128rm) 2078 case X86::VPBROADCASTQrm: 2079 MASK_AVX512_CASE(X86::VPBROADCASTQZ128rm) 2080 printBroadcast(MI, OutStreamer, 2, 64); 2081 break; 2082 case X86::VBROADCASTSDYrm: 2083 MASK_AVX512_CASE(X86::VBROADCASTSDZ256rm) 2084 case X86::VPBROADCASTQYrm: 2085 MASK_AVX512_CASE(X86::VPBROADCASTQZ256rm) 2086 printBroadcast(MI, OutStreamer, 4, 64); 2087 break; 2088 MASK_AVX512_CASE(X86::VBROADCASTSDZrm) 2089 MASK_AVX512_CASE(X86::VPBROADCASTQZrm) 2090 printBroadcast(MI, OutStreamer, 8, 64); 2091 break; 2092 case X86::VBROADCASTSSrm: 2093 MASK_AVX512_CASE(X86::VBROADCASTSSZ128rm) 2094 case X86::VPBROADCASTDrm: 2095 MASK_AVX512_CASE(X86::VPBROADCASTDZ128rm) 2096 printBroadcast(MI, OutStreamer, 4, 32); 2097 break; 2098 case X86::VBROADCASTSSYrm: 2099 MASK_AVX512_CASE(X86::VBROADCASTSSZ256rm) 2100 case X86::VPBROADCASTDYrm: 2101 MASK_AVX512_CASE(X86::VPBROADCASTDZ256rm) 2102 printBroadcast(MI, OutStreamer, 8, 32); 2103 break; 2104 MASK_AVX512_CASE(X86::VBROADCASTSSZrm) 2105 MASK_AVX512_CASE(X86::VPBROADCASTDZrm) 2106 printBroadcast(MI, OutStreamer, 16, 32); 2107 break; 2108 case X86::VPBROADCASTWrm: 2109 MASK_AVX512_CASE(X86::VPBROADCASTWZ128rm) 2110 printBroadcast(MI, OutStreamer, 8, 16); 2111 break; 2112 case X86::VPBROADCASTWYrm: 2113 MASK_AVX512_CASE(X86::VPBROADCASTWZ256rm) 2114 printBroadcast(MI, OutStreamer, 16, 16); 2115 break; 2116 MASK_AVX512_CASE(X86::VPBROADCASTWZrm) 2117 printBroadcast(MI, OutStreamer, 32, 16); 2118 break; 2119 case X86::VPBROADCASTBrm: 2120 MASK_AVX512_CASE(X86::VPBROADCASTBZ128rm) 2121 printBroadcast(MI, OutStreamer, 16, 8); 2122 break; 2123 case X86::VPBROADCASTBYrm: 2124 MASK_AVX512_CASE(X86::VPBROADCASTBZ256rm) 2125 printBroadcast(MI, OutStreamer, 32, 8); 2126 break; 2127 MASK_AVX512_CASE(X86::VPBROADCASTBZrm) 2128 printBroadcast(MI, OutStreamer, 64, 8); 2129 break; 2130 2131 #define MOVX_CASE(Prefix, Ext, Type, Suffix, Postfix) \ 2132 case X86::Prefix##PMOV##Ext##Type##Suffix##rm##Postfix: 2133 2134 #define CASE_MOVX_RM(Ext, Type) \ 2135 MOVX_CASE(, Ext, Type, , ) \ 2136 MOVX_CASE(V, Ext, Type, , ) \ 2137 MOVX_CASE(V, Ext, Type, Y, ) \ 2138 MOVX_CASE(V, Ext, Type, Z128, ) \ 2139 MOVX_CASE(V, Ext, Type, Z128, k ) \ 2140 MOVX_CASE(V, Ext, Type, Z128, kz ) \ 2141 MOVX_CASE(V, Ext, Type, Z256, ) \ 2142 MOVX_CASE(V, Ext, Type, Z256, k ) \ 2143 MOVX_CASE(V, Ext, Type, Z256, kz ) \ 2144 MOVX_CASE(V, Ext, Type, Z, ) \ 2145 MOVX_CASE(V, Ext, Type, Z, k ) \ 2146 MOVX_CASE(V, Ext, Type, Z, kz ) 2147 2148 CASE_MOVX_RM(SX, BD) 2149 printSignExtend(MI, OutStreamer, 8, 32); 2150 break; 2151 CASE_MOVX_RM(SX, BQ) 2152 printSignExtend(MI, OutStreamer, 8, 64); 2153 break; 2154 CASE_MOVX_RM(SX, BW) 2155 printSignExtend(MI, OutStreamer, 8, 16); 2156 break; 2157 CASE_MOVX_RM(SX, DQ) 2158 printSignExtend(MI, OutStreamer, 32, 64); 2159 break; 2160 CASE_MOVX_RM(SX, WD) 2161 printSignExtend(MI, OutStreamer, 16, 32); 2162 break; 2163 CASE_MOVX_RM(SX, WQ) 2164 printSignExtend(MI, OutStreamer, 16, 64); 2165 break; 2166 2167 CASE_MOVX_RM(ZX, BD) 2168 printZeroExtend(MI, OutStreamer, 8, 32); 2169 break; 2170 CASE_MOVX_RM(ZX, BQ) 2171 printZeroExtend(MI, OutStreamer, 8, 64); 2172 break; 2173 CASE_MOVX_RM(ZX, BW) 2174 printZeroExtend(MI, OutStreamer, 8, 16); 2175 break; 2176 CASE_MOVX_RM(ZX, DQ) 2177 printZeroExtend(MI, OutStreamer, 32, 64); 2178 break; 2179 CASE_MOVX_RM(ZX, WD) 2180 printZeroExtend(MI, OutStreamer, 16, 32); 2181 break; 2182 CASE_MOVX_RM(ZX, WQ) 2183 printZeroExtend(MI, OutStreamer, 16, 64); 2184 break; 2185 } 2186 } 2187 2188 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) { 2189 // FIXME: Enable feature predicate checks once all the test pass. 2190 // X86_MC::verifyInstructionPredicates(MI->getOpcode(), 2191 // Subtarget->getFeatureBits()); 2192 2193 X86MCInstLower MCInstLowering(*MF, *this); 2194 const X86RegisterInfo *RI = 2195 MF->getSubtarget<X86Subtarget>().getRegisterInfo(); 2196 2197 if (MI->getOpcode() == X86::OR64rm) { 2198 for (auto &Opd : MI->operands()) { 2199 if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) == 2200 "swift_async_extendedFramePointerFlags") { 2201 ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true; 2202 } 2203 } 2204 } 2205 2206 // Add comments for values loaded from constant pool. 2207 if (OutStreamer->isVerboseAsm()) 2208 addConstantComments(MI, *OutStreamer); 2209 2210 // Add a comment about EVEX compression 2211 if (TM.Options.MCOptions.ShowMCEncoding) { 2212 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_LEGACY) 2213 OutStreamer->AddComment("EVEX TO LEGACY Compression ", false); 2214 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX) 2215 OutStreamer->AddComment("EVEX TO VEX Compression ", false); 2216 else if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_EVEX) 2217 OutStreamer->AddComment("EVEX TO EVEX Compression ", false); 2218 } 2219 2220 switch (MI->getOpcode()) { 2221 case TargetOpcode::DBG_VALUE: 2222 llvm_unreachable("Should be handled target independently"); 2223 2224 case X86::EH_RETURN: 2225 case X86::EH_RETURN64: { 2226 // Lower these as normal, but add some comments. 2227 Register Reg = MI->getOperand(0).getReg(); 2228 OutStreamer->AddComment(StringRef("eh_return, addr: %") + 2229 X86ATTInstPrinter::getRegisterName(Reg)); 2230 break; 2231 } 2232 case X86::CLEANUPRET: { 2233 // Lower these as normal, but add some comments. 2234 OutStreamer->AddComment("CLEANUPRET"); 2235 break; 2236 } 2237 2238 case X86::CATCHRET: { 2239 // Lower these as normal, but add some comments. 2240 OutStreamer->AddComment("CATCHRET"); 2241 break; 2242 } 2243 2244 case X86::ENDBR32: 2245 case X86::ENDBR64: { 2246 // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for 2247 // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be 2248 // non-empty. If MI is the initial ENDBR, place the 2249 // __patchable_function_entries label after ENDBR. 2250 if (CurrentPatchableFunctionEntrySym && 2251 CurrentPatchableFunctionEntrySym == CurrentFnBegin && 2252 MI == &MF->front().front()) { 2253 MCInst Inst; 2254 MCInstLowering.Lower(MI, Inst); 2255 EmitAndCountInstruction(Inst); 2256 CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); 2257 OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym); 2258 return; 2259 } 2260 break; 2261 } 2262 2263 case X86::TAILJMPd64: 2264 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2265 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2266 [[fallthrough]]; 2267 case X86::TAILJMPr: 2268 case X86::TAILJMPm: 2269 case X86::TAILJMPd: 2270 case X86::TAILJMPd_CC: 2271 case X86::TAILJMPr64: 2272 case X86::TAILJMPm64: 2273 case X86::TAILJMPd64_CC: 2274 case X86::TAILJMPr64_REX: 2275 case X86::TAILJMPm64_REX: 2276 // Lower these as normal, but add some comments. 2277 OutStreamer->AddComment("TAILCALL"); 2278 break; 2279 2280 case X86::TLS_addr32: 2281 case X86::TLS_addr64: 2282 case X86::TLS_addrX32: 2283 case X86::TLS_base_addr32: 2284 case X86::TLS_base_addr64: 2285 case X86::TLS_base_addrX32: 2286 case X86::TLS_desc32: 2287 case X86::TLS_desc64: 2288 return LowerTlsAddr(MCInstLowering, *MI); 2289 2290 case X86::MOVPC32r: { 2291 // This is a pseudo op for a two instruction sequence with a label, which 2292 // looks like: 2293 // call "L1$pb" 2294 // "L1$pb": 2295 // popl %esi 2296 2297 // Emit the call. 2298 MCSymbol *PICBase = MF->getPICBaseSymbol(); 2299 // FIXME: We would like an efficient form for this, so we don't have to do a 2300 // lot of extra uniquing. 2301 EmitAndCountInstruction( 2302 MCInstBuilder(X86::CALLpcrel32) 2303 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); 2304 2305 const X86FrameLowering *FrameLowering = 2306 MF->getSubtarget<X86Subtarget>().getFrameLowering(); 2307 bool hasFP = FrameLowering->hasFP(*MF); 2308 2309 // TODO: This is needed only if we require precise CFA. 2310 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && 2311 !OutStreamer->getDwarfFrameInfos().back().End; 2312 2313 int stackGrowth = -RI->getSlotSize(); 2314 2315 if (HasActiveDwarfFrame && !hasFP) { 2316 OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth); 2317 MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true); 2318 } 2319 2320 // Emit the label. 2321 OutStreamer->emitLabel(PICBase); 2322 2323 // popl $reg 2324 EmitAndCountInstruction( 2325 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); 2326 2327 if (HasActiveDwarfFrame && !hasFP) { 2328 OutStreamer->emitCFIAdjustCfaOffset(stackGrowth); 2329 } 2330 return; 2331 } 2332 2333 case X86::ADD32ri: { 2334 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. 2335 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) 2336 break; 2337 2338 // Okay, we have something like: 2339 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) 2340 2341 // For this, we want to print something like: 2342 // MYGLOBAL + (. - PICBASE) 2343 // However, we can't generate a ".", so just emit a new label here and refer 2344 // to it. 2345 MCSymbol *DotSym = OutContext.createTempSymbol(); 2346 OutStreamer->emitLabel(DotSym); 2347 2348 // Now that we have emitted the label, lower the complex operand expression. 2349 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); 2350 2351 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); 2352 const MCExpr *PICBase = 2353 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); 2354 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); 2355 2356 DotExpr = MCBinaryExpr::createAdd( 2357 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); 2358 2359 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) 2360 .addReg(MI->getOperand(0).getReg()) 2361 .addReg(MI->getOperand(1).getReg()) 2362 .addExpr(DotExpr)); 2363 return; 2364 } 2365 case TargetOpcode::STATEPOINT: 2366 return LowerSTATEPOINT(*MI, MCInstLowering); 2367 2368 case TargetOpcode::FAULTING_OP: 2369 return LowerFAULTING_OP(*MI, MCInstLowering); 2370 2371 case TargetOpcode::FENTRY_CALL: 2372 return LowerFENTRY_CALL(*MI, MCInstLowering); 2373 2374 case TargetOpcode::PATCHABLE_OP: 2375 return LowerPATCHABLE_OP(*MI, MCInstLowering); 2376 2377 case TargetOpcode::STACKMAP: 2378 return LowerSTACKMAP(*MI); 2379 2380 case TargetOpcode::PATCHPOINT: 2381 return LowerPATCHPOINT(*MI, MCInstLowering); 2382 2383 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 2384 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering); 2385 2386 case TargetOpcode::PATCHABLE_RET: 2387 return LowerPATCHABLE_RET(*MI, MCInstLowering); 2388 2389 case TargetOpcode::PATCHABLE_TAIL_CALL: 2390 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); 2391 2392 case TargetOpcode::PATCHABLE_EVENT_CALL: 2393 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); 2394 2395 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: 2396 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); 2397 2398 case X86::MORESTACK_RET: 2399 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2400 return; 2401 2402 case X86::KCFI_CHECK: 2403 return LowerKCFI_CHECK(*MI); 2404 2405 case X86::ASAN_CHECK_MEMACCESS: 2406 return LowerASAN_CHECK_MEMACCESS(*MI); 2407 2408 case X86::MORESTACK_RET_RESTORE_R10: 2409 // Return, then restore R10. 2410 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); 2411 EmitAndCountInstruction( 2412 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); 2413 return; 2414 2415 case X86::SEH_PushReg: 2416 case X86::SEH_SaveReg: 2417 case X86::SEH_SaveXMM: 2418 case X86::SEH_StackAlloc: 2419 case X86::SEH_StackAlign: 2420 case X86::SEH_SetFrame: 2421 case X86::SEH_PushFrame: 2422 case X86::SEH_EndPrologue: 2423 EmitSEHInstruction(MI); 2424 return; 2425 2426 case X86::SEH_Epilogue: { 2427 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); 2428 MachineBasicBlock::const_iterator MBBI(MI); 2429 // Check if preceded by a call and emit nop if so. 2430 for (MBBI = PrevCrossBBInst(MBBI); 2431 MBBI != MachineBasicBlock::const_iterator(); 2432 MBBI = PrevCrossBBInst(MBBI)) { 2433 // Pseudo instructions that aren't a call are assumed to not emit any 2434 // code. If they do, we worst case generate unnecessary noops after a 2435 // call. 2436 if (MBBI->isCall() || !MBBI->isPseudo()) { 2437 if (MBBI->isCall()) 2438 EmitAndCountInstruction(MCInstBuilder(X86::NOOP)); 2439 break; 2440 } 2441 } 2442 return; 2443 } 2444 case X86::UBSAN_UD1: 2445 EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm) 2446 .addReg(X86::EAX) 2447 .addReg(X86::EAX) 2448 .addImm(1) 2449 .addReg(X86::NoRegister) 2450 .addImm(MI->getOperand(0).getImm()) 2451 .addReg(X86::NoRegister)); 2452 return; 2453 case X86::CALL64pcrel32: 2454 if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11)) 2455 EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX)); 2456 break; 2457 case X86::JCC_1: 2458 // Two instruction prefixes (2EH for branch not-taken and 3EH for branch 2459 // taken) are used as branch hints. Here we add branch taken prefix for 2460 // jump instruction with higher probability than threshold. 2461 if (getSubtarget().hasBranchHint() && EnableBranchHint) { 2462 const MachineBranchProbabilityInfo *MBPI = 2463 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); 2464 MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); 2465 BranchProbability EdgeProb = 2466 MBPI->getEdgeProbability(MI->getParent(), DestBB); 2467 BranchProbability Threshold(BranchHintProbabilityThreshold, 100); 2468 if (EdgeProb > Threshold) 2469 EmitAndCountInstruction(MCInstBuilder(X86::DS_PREFIX)); 2470 } 2471 break; 2472 } 2473 2474 MCInst TmpInst; 2475 MCInstLowering.Lower(MI, TmpInst); 2476 2477 // Stackmap shadows cannot include branch targets, so we can count the bytes 2478 // in a call towards the shadow, but must ensure that the no thread returns 2479 // in to the stackmap shadow. The only way to achieve this is if the call 2480 // is at the end of the shadow. 2481 if (MI->isCall()) { 2482 // Count then size of the call towards the shadow 2483 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get()); 2484 // Then flush the shadow so that we fill with nops before the call, not 2485 // after it. 2486 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); 2487 // Then emit the call 2488 OutStreamer->emitInstruction(TmpInst, getSubtargetInfo()); 2489 return; 2490 } 2491 2492 EmitAndCountInstruction(TmpInst); 2493 } 2494