1 //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, other late 11 // optimizations, or simply the encoding of the instructions. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86FrameLowering.h" 17 #include "X86InstrInfo.h" 18 #include "X86MachineFunctionInfo.h" 19 #include "X86Subtarget.h" 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved. 24 #include "llvm/IR/EHPersonalities.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/Target/TargetMachine.h" 27 using namespace llvm; 28 29 #define DEBUG_TYPE "x86-pseudo" 30 #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass" 31 32 namespace { 33 class X86ExpandPseudo : public MachineFunctionPass { 34 public: 35 static char ID; 36 X86ExpandPseudo() : MachineFunctionPass(ID) {} 37 38 void getAnalysisUsage(AnalysisUsage &AU) const override { 39 AU.setPreservesCFG(); 40 AU.addPreservedID(MachineLoopInfoID); 41 AU.addPreservedID(MachineDominatorsID); 42 MachineFunctionPass::getAnalysisUsage(AU); 43 } 44 45 const X86Subtarget *STI = nullptr; 46 const X86InstrInfo *TII = nullptr; 47 const X86RegisterInfo *TRI = nullptr; 48 const X86MachineFunctionInfo *X86FI = nullptr; 49 const X86FrameLowering *X86FL = nullptr; 50 51 bool runOnMachineFunction(MachineFunction &MF) override; 52 53 MachineFunctionProperties getRequiredProperties() const override { 54 return MachineFunctionProperties().set( 55 MachineFunctionProperties::Property::NoVRegs); 56 } 57 58 StringRef getPassName() const override { 59 return "X86 pseudo instruction expansion pass"; 60 } 61 62 private: 63 void expandICallBranchFunnel(MachineBasicBlock *MBB, 64 MachineBasicBlock::iterator MBBI); 65 void expandCALL_RVMARKER(MachineBasicBlock &MBB, 66 MachineBasicBlock::iterator MBBI); 67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 68 bool expandMBB(MachineBasicBlock &MBB); 69 70 /// This function expands pseudos which affects control flow. 71 /// It is done in separate pass to simplify blocks navigation in main 72 /// pass(calling expandMBB). 73 bool expandPseudosWhichAffectControlFlow(MachineFunction &MF); 74 75 /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions, 76 /// placed into separate block guarded by check for al register(for SystemV 77 /// abi). 78 void expandVastartSaveXmmRegs( 79 MachineBasicBlock *EntryBlk, 80 MachineBasicBlock::iterator VAStartPseudoInstr) const; 81 }; 82 char X86ExpandPseudo::ID = 0; 83 84 } // End anonymous namespace. 85 86 INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false, 87 false) 88 89 void X86ExpandPseudo::expandICallBranchFunnel( 90 MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) { 91 MachineBasicBlock *JTMBB = MBB; 92 MachineInstr *JTInst = &*MBBI; 93 MachineFunction *MF = MBB->getParent(); 94 const BasicBlock *BB = MBB->getBasicBlock(); 95 auto InsPt = MachineFunction::iterator(MBB); 96 ++InsPt; 97 98 std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs; 99 const DebugLoc &DL = JTInst->getDebugLoc(); 100 MachineOperand Selector = JTInst->getOperand(0); 101 const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal(); 102 103 auto CmpTarget = [&](unsigned Target) { 104 if (Selector.isReg()) 105 MBB->addLiveIn(Selector.getReg()); 106 BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11) 107 .addReg(X86::RIP) 108 .addImm(1) 109 .addReg(0) 110 .addGlobalAddress(CombinedGlobal, 111 JTInst->getOperand(2 + 2 * Target).getImm()) 112 .addReg(0); 113 BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr)) 114 .add(Selector) 115 .addReg(X86::R11); 116 }; 117 118 auto CreateMBB = [&]() { 119 auto *NewMBB = MF->CreateMachineBasicBlock(BB); 120 MBB->addSuccessor(NewMBB); 121 if (!MBB->isLiveIn(X86::EFLAGS)) 122 MBB->addLiveIn(X86::EFLAGS); 123 return NewMBB; 124 }; 125 126 auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) { 127 BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC); 128 129 auto *ElseMBB = CreateMBB(); 130 MF->insert(InsPt, ElseMBB); 131 MBB = ElseMBB; 132 MBBI = MBB->end(); 133 }; 134 135 auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) { 136 auto *ThenMBB = CreateMBB(); 137 TargetMBBs.push_back({ThenMBB, Target}); 138 EmitCondJump(CC, ThenMBB); 139 }; 140 141 auto EmitTailCall = [&](unsigned Target) { 142 BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64)) 143 .add(JTInst->getOperand(3 + 2 * Target)); 144 }; 145 146 std::function<void(unsigned, unsigned)> EmitBranchFunnel = 147 [&](unsigned FirstTarget, unsigned NumTargets) { 148 if (NumTargets == 1) { 149 EmitTailCall(FirstTarget); 150 return; 151 } 152 153 if (NumTargets == 2) { 154 CmpTarget(FirstTarget + 1); 155 EmitCondJumpTarget(X86::COND_B, FirstTarget); 156 EmitTailCall(FirstTarget + 1); 157 return; 158 } 159 160 if (NumTargets < 6) { 161 CmpTarget(FirstTarget + 1); 162 EmitCondJumpTarget(X86::COND_B, FirstTarget); 163 EmitCondJumpTarget(X86::COND_E, FirstTarget + 1); 164 EmitBranchFunnel(FirstTarget + 2, NumTargets - 2); 165 return; 166 } 167 168 auto *ThenMBB = CreateMBB(); 169 CmpTarget(FirstTarget + (NumTargets / 2)); 170 EmitCondJump(X86::COND_B, ThenMBB); 171 EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2)); 172 EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1, 173 NumTargets - (NumTargets / 2) - 1); 174 175 MF->insert(InsPt, ThenMBB); 176 MBB = ThenMBB; 177 MBBI = MBB->end(); 178 EmitBranchFunnel(FirstTarget, NumTargets / 2); 179 }; 180 181 EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2); 182 for (auto P : TargetMBBs) { 183 MF->insert(InsPt, P.first); 184 BuildMI(P.first, DL, TII->get(X86::TAILJMPd64)) 185 .add(JTInst->getOperand(3 + 2 * P.second)); 186 } 187 JTMBB->erase(JTInst); 188 } 189 190 void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB, 191 MachineBasicBlock::iterator MBBI) { 192 // Expand CALL_RVMARKER pseudo to call instruction, followed by the special 193 //"movq %rax, %rdi" marker. 194 MachineInstr &MI = *MBBI; 195 196 MachineInstr *OriginalCall; 197 assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) && 198 "invalid operand for regular call"); 199 unsigned Opc = -1; 200 if (MI.getOpcode() == X86::CALL64m_RVMARKER) 201 Opc = X86::CALL64m; 202 else if (MI.getOpcode() == X86::CALL64r_RVMARKER) 203 Opc = X86::CALL64r; 204 else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER) 205 Opc = X86::CALL64pcrel32; 206 else 207 llvm_unreachable("unexpected opcode"); 208 209 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 210 bool RAXImplicitDead = false; 211 for (MachineOperand &Op : llvm::drop_begin(MI.operands())) { 212 // RAX may be 'implicit dead', if there are no other users of the return 213 // value. We introduce a new use, so change it to 'implicit def'. 214 if (Op.isReg() && Op.isImplicit() && Op.isDead() && 215 TRI->regsOverlap(Op.getReg(), X86::RAX)) { 216 Op.setIsDead(false); 217 Op.setIsDef(true); 218 RAXImplicitDead = true; 219 } 220 OriginalCall->addOperand(Op); 221 } 222 223 // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be 224 // live across the earlier call. The call to the ObjC runtime function returns 225 // the first argument, so the value of %rax is unchanged after the ObjC 226 // runtime call. On Windows targets, the runtime call follows the regular 227 // x64 calling convention and expects the first argument in %rcx. 228 auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI; 229 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr)) 230 .addReg(TargetReg, RegState::Define) 231 .addReg(X86::RAX) 232 .getInstr(); 233 if (MI.shouldUpdateAdditionalCallInfo()) 234 MBB.getParent()->moveAdditionalCallInfo(&MI, Marker); 235 236 // Emit call to ObjC runtime. 237 const uint32_t *RegMask = 238 TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C); 239 MachineInstr *RtCall = 240 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32)) 241 .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0) 242 .addRegMask(RegMask) 243 .addReg(X86::RAX, 244 RegState::Implicit | 245 (RAXImplicitDead ? (RegState::Dead | RegState::Define) 246 : RegState::Define)) 247 .getInstr(); 248 MI.eraseFromParent(); 249 250 auto &TM = MBB.getParent()->getTarget(); 251 // On Darwin platforms, wrap the expanded sequence in a bundle to prevent 252 // later optimizations from breaking up the sequence. 253 if (TM.getTargetTriple().isOSDarwin()) 254 finalizeBundle(MBB, OriginalCall->getIterator(), 255 std::next(RtCall->getIterator())); 256 } 257 258 /// If \p MBBI is a pseudo instruction, this method expands 259 /// it to the corresponding (sequence of) actual instruction(s). 260 /// \returns true if \p MBBI has been expanded. 261 bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, 262 MachineBasicBlock::iterator MBBI) { 263 MachineInstr &MI = *MBBI; 264 unsigned Opcode = MI.getOpcode(); 265 const DebugLoc &DL = MBBI->getDebugLoc(); 266 #define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC) 267 switch (Opcode) { 268 default: 269 return false; 270 case X86::TCRETURNdi: 271 case X86::TCRETURNdicc: 272 case X86::TCRETURNri: 273 case X86::TCRETURNmi: 274 case X86::TCRETURNdi64: 275 case X86::TCRETURNdi64cc: 276 case X86::TCRETURNri64: 277 case X86::TCRETURNmi64: { 278 bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64; 279 MachineOperand &JumpTarget = MBBI->getOperand(0); 280 MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands 281 : 1); 282 assert(StackAdjust.isImm() && "Expecting immediate value."); 283 284 // Adjust stack pointer. 285 int StackAdj = StackAdjust.getImm(); 286 int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 287 int Offset = 0; 288 assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 289 290 // Incoporate the retaddr area. 291 Offset = StackAdj - MaxTCDelta; 292 assert(Offset >= 0 && "Offset should never be negative"); 293 294 if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) { 295 assert(Offset == 0 && "Conditional tail call cannot adjust the stack."); 296 } 297 298 if (Offset) { 299 // Check for possible merge with preceding ADD instruction. 300 Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); 301 X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true); 302 } 303 304 // Jump to label or value in register. 305 bool IsWin64 = STI->isTargetWin64(); 306 if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc || 307 Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) { 308 unsigned Op; 309 switch (Opcode) { 310 case X86::TCRETURNdi: 311 Op = X86::TAILJMPd; 312 break; 313 case X86::TCRETURNdicc: 314 Op = X86::TAILJMPd_CC; 315 break; 316 case X86::TCRETURNdi64cc: 317 assert(!MBB.getParent()->hasWinCFI() && 318 "Conditional tail calls confuse " 319 "the Win64 unwinder."); 320 Op = X86::TAILJMPd64_CC; 321 break; 322 default: 323 // Note: Win64 uses REX prefixes indirect jumps out of functions, but 324 // not direct ones. 325 Op = X86::TAILJMPd64; 326 break; 327 } 328 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); 329 if (JumpTarget.isGlobal()) { 330 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 331 JumpTarget.getTargetFlags()); 332 } else { 333 assert(JumpTarget.isSymbol()); 334 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 335 JumpTarget.getTargetFlags()); 336 } 337 if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) { 338 MIB.addImm(MBBI->getOperand(2).getImm()); 339 } 340 341 } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) { 342 unsigned Op = (Opcode == X86::TCRETURNmi) 343 ? X86::TAILJMPm 344 : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64); 345 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op)); 346 for (unsigned i = 0; i != X86::AddrNumOperands; ++i) 347 MIB.add(MBBI->getOperand(i)); 348 } else if (Opcode == X86::TCRETURNri64) { 349 JumpTarget.setIsKill(); 350 BuildMI(MBB, MBBI, DL, 351 TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64)) 352 .add(JumpTarget); 353 } else { 354 JumpTarget.setIsKill(); 355 BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr)) 356 .add(JumpTarget); 357 } 358 359 MachineInstr &NewMI = *std::prev(MBBI); 360 NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI); 361 NewMI.setCFIType(*MBB.getParent(), MI.getCFIType()); 362 363 // Update the call info. 364 if (MBBI->isCandidateForAdditionalCallInfo()) 365 MBB.getParent()->moveAdditionalCallInfo(&*MBBI, &NewMI); 366 367 // Delete the pseudo instruction TCRETURN. 368 MBB.erase(MBBI); 369 370 return true; 371 } 372 case X86::EH_RETURN: 373 case X86::EH_RETURN64: { 374 MachineOperand &DestAddr = MBBI->getOperand(0); 375 assert(DestAddr.isReg() && "Offset should be in register!"); 376 const bool Uses64BitFramePtr = 377 STI->isTarget64BitLP64() || STI->isTargetNaCl64(); 378 Register StackPtr = TRI->getStackRegister(); 379 BuildMI(MBB, MBBI, DL, 380 TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr) 381 .addReg(DestAddr.getReg()); 382 // The EH_RETURN pseudo is really removed during the MC Lowering. 383 return true; 384 } 385 case X86::IRET: { 386 // Adjust stack to erase error code 387 int64_t StackAdj = MBBI->getOperand(0).getImm(); 388 X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true); 389 // Replace pseudo with machine iret 390 unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32; 391 // Use UIRET if UINTR is present (except for building kernel) 392 if (STI->is64Bit() && STI->hasUINTR() && 393 MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel) 394 RetOp = X86::UIRET; 395 BuildMI(MBB, MBBI, DL, TII->get(RetOp)); 396 MBB.erase(MBBI); 397 return true; 398 } 399 case X86::RET: { 400 // Adjust stack to erase error code 401 int64_t StackAdj = MBBI->getOperand(0).getImm(); 402 MachineInstrBuilder MIB; 403 if (StackAdj == 0) { 404 MIB = BuildMI(MBB, MBBI, DL, 405 TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32)); 406 } else if (isUInt<16>(StackAdj)) { 407 MIB = BuildMI(MBB, MBBI, DL, 408 TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32)) 409 .addImm(StackAdj); 410 } else { 411 assert(!STI->is64Bit() && 412 "shouldn't need to do this for x86_64 targets!"); 413 // A ret can only handle immediates as big as 2**16-1. If we need to pop 414 // off bytes before the return address, we must do it manually. 415 BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); 416 X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true); 417 BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); 418 MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32)); 419 } 420 for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) 421 MIB.add(MBBI->getOperand(I)); 422 MBB.erase(MBBI); 423 return true; 424 } 425 case X86::LCMPXCHG16B_SAVE_RBX: { 426 // Perform the following transformation. 427 // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx 428 // => 429 // RBX = InArg 430 // actualcmpxchg Addr 431 // RBX = SaveRbx 432 const MachineOperand &InArg = MBBI->getOperand(6); 433 Register SaveRbx = MBBI->getOperand(7).getReg(); 434 435 // Copy the input argument of the pseudo into the argument of the 436 // actual instruction. 437 // NOTE: We don't copy the kill flag since the input might be the same reg 438 // as one of the other operands of LCMPXCHG16B. 439 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false); 440 // Create the actual instruction. 441 MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B)); 442 // Copy the operands related to the address. 443 for (unsigned Idx = 1; Idx < 6; ++Idx) 444 NewInstr->addOperand(MBBI->getOperand(Idx)); 445 // Finally, restore the value of RBX. 446 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, 447 /*SrcIsKill*/ true); 448 449 // Delete the pseudo. 450 MBBI->eraseFromParent(); 451 return true; 452 } 453 // Loading/storing mask pairs requires two kmov operations. The second one of 454 // these needs a 2 byte displacement relative to the specified address (with 455 // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the 456 // same spill size, they all are stored using MASKPAIR16STORE, loaded using 457 // MASKPAIR16LOAD. 458 // 459 // The displacement value might wrap around in theory, thus the asserts in 460 // both cases. 461 case X86::MASKPAIR16LOAD: { 462 int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm(); 463 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); 464 Register Reg = MBBI->getOperand(0).getReg(); 465 bool DstIsDead = MBBI->getOperand(0).isDead(); 466 Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); 467 Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); 468 469 auto MIBLo = 470 BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) 471 .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead)); 472 auto MIBHi = 473 BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm))) 474 .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead)); 475 476 for (int i = 0; i < X86::AddrNumOperands; ++i) { 477 MIBLo.add(MBBI->getOperand(1 + i)); 478 if (i == X86::AddrDisp) 479 MIBHi.addImm(Disp + 2); 480 else 481 MIBHi.add(MBBI->getOperand(1 + i)); 482 } 483 484 // Split the memory operand, adjusting the offset and size for the halves. 485 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 486 MachineFunction *MF = MBB.getParent(); 487 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); 488 MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); 489 490 MIBLo.setMemRefs(MMOLo); 491 MIBHi.setMemRefs(MMOHi); 492 493 // Delete the pseudo. 494 MBB.erase(MBBI); 495 return true; 496 } 497 case X86::MASKPAIR16STORE: { 498 int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm(); 499 assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement"); 500 Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg(); 501 bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill(); 502 Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0); 503 Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1); 504 505 auto MIBLo = 506 BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); 507 auto MIBHi = 508 BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk))); 509 510 for (int i = 0; i < X86::AddrNumOperands; ++i) { 511 MIBLo.add(MBBI->getOperand(i)); 512 if (i == X86::AddrDisp) 513 MIBHi.addImm(Disp + 2); 514 else 515 MIBHi.add(MBBI->getOperand(i)); 516 } 517 MIBLo.addReg(Reg0, getKillRegState(SrcIsKill)); 518 MIBHi.addReg(Reg1, getKillRegState(SrcIsKill)); 519 520 // Split the memory operand, adjusting the offset and size for the halves. 521 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 522 MachineFunction *MF = MBB.getParent(); 523 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2); 524 MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2); 525 526 MIBLo.setMemRefs(MMOLo); 527 MIBHi.setMemRefs(MMOHi); 528 529 // Delete the pseudo. 530 MBB.erase(MBBI); 531 return true; 532 } 533 case X86::MWAITX_SAVE_RBX: { 534 // Perform the following transformation. 535 // SaveRbx = pseudomwaitx InArg, SaveRbx 536 // => 537 // [E|R]BX = InArg 538 // actualmwaitx 539 // [E|R]BX = SaveRbx 540 const MachineOperand &InArg = MBBI->getOperand(1); 541 // Copy the input argument of the pseudo into the argument of the 542 // actual instruction. 543 TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill()); 544 // Create the actual instruction. 545 BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr)); 546 // Finally, restore the value of RBX. 547 Register SaveRbx = MBBI->getOperand(2).getReg(); 548 TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true); 549 // Delete the pseudo. 550 MBBI->eraseFromParent(); 551 return true; 552 } 553 case TargetOpcode::ICALL_BRANCH_FUNNEL: 554 expandICallBranchFunnel(&MBB, MBBI); 555 return true; 556 case X86::PLDTILECFGV: { 557 MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG))); 558 return true; 559 } 560 case X86::PTILELOADDV: 561 case X86::PTILELOADDT1V: 562 case X86::PTILELOADDRSV: 563 case X86::PTILELOADDRST1V: 564 case X86::PTCVTROWD2PSrreV: 565 case X86::PTCVTROWD2PSrriV: 566 case X86::PTCVTROWPS2BF16HrreV: 567 case X86::PTCVTROWPS2BF16HrriV: 568 case X86::PTCVTROWPS2BF16LrreV: 569 case X86::PTCVTROWPS2BF16LrriV: 570 case X86::PTCVTROWPS2PHHrreV: 571 case X86::PTCVTROWPS2PHHrriV: 572 case X86::PTCVTROWPS2PHLrreV: 573 case X86::PTCVTROWPS2PHLrriV: 574 case X86::PTILEMOVROWrreV: 575 case X86::PTILEMOVROWrriV: { 576 for (unsigned i = 2; i > 0; --i) 577 MI.removeOperand(i); 578 unsigned Opc; 579 switch (Opcode) { 580 case X86::PTILELOADDRSV: 581 Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS); 582 break; 583 case X86::PTILELOADDRST1V: 584 Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1); 585 break; 586 case X86::PTILELOADDV: 587 Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); 588 break; 589 case X86::PTILELOADDT1V: 590 Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1); 591 break; 592 case X86::PTCVTROWD2PSrreV: 593 Opc = X86::TCVTROWD2PSrre; 594 break; 595 case X86::PTCVTROWD2PSrriV: 596 Opc = X86::TCVTROWD2PSrri; 597 break; 598 case X86::PTCVTROWPS2BF16HrreV: 599 Opc = X86::TCVTROWPS2BF16Hrre; 600 break; 601 case X86::PTCVTROWPS2BF16HrriV: 602 Opc = X86::TCVTROWPS2BF16Hrri; 603 break; 604 case X86::PTCVTROWPS2BF16LrreV: 605 Opc = X86::TCVTROWPS2BF16Lrre; 606 break; 607 case X86::PTCVTROWPS2BF16LrriV: 608 Opc = X86::TCVTROWPS2BF16Lrri; 609 break; 610 case X86::PTCVTROWPS2PHHrreV: 611 Opc = X86::TCVTROWPS2PHHrre; 612 break; 613 case X86::PTCVTROWPS2PHHrriV: 614 Opc = X86::TCVTROWPS2PHHrri; 615 break; 616 case X86::PTCVTROWPS2PHLrreV: 617 Opc = X86::TCVTROWPS2PHLrre; 618 break; 619 case X86::PTCVTROWPS2PHLrriV: 620 Opc = X86::TCVTROWPS2PHLrri; 621 break; 622 case X86::PTILEMOVROWrreV: 623 Opc = X86::TILEMOVROWrre; 624 break; 625 case X86::PTILEMOVROWrriV: 626 Opc = X86::TILEMOVROWrri; 627 break; 628 default: 629 llvm_unreachable("Unexpected Opcode"); 630 } 631 MI.setDesc(TII->get(Opc)); 632 return true; 633 } 634 // TILEPAIRLOAD is just for TILEPair spill, we don't have corresponding 635 // AMX instruction to support it. So, split it to 2 load instructions: 636 // "TILEPAIRLOAD TMM0:TMM1, Base, Scale, Index, Offset, Segment" --> 637 // "TILELOAD TMM0, Base, Scale, Index, Offset, Segment" + 638 // "TILELOAD TMM1, Base, Scale, Index, Offset + TMM_SIZE, Segment" 639 case X86::PTILEPAIRLOAD: { 640 int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm(); 641 Register TReg = MBBI->getOperand(0).getReg(); 642 bool DstIsDead = MBBI->getOperand(0).isDead(); 643 Register TReg0 = TRI->getSubReg(TReg, X86::sub_t0); 644 Register TReg1 = TRI->getSubReg(TReg, X86::sub_t1); 645 unsigned TmmSize = TRI->getRegSizeInBits(X86::TILERegClass) / 8; 646 647 MachineInstrBuilder MIBLo = 648 BuildMI(MBB, MBBI, DL, TII->get(X86::TILELOADD)) 649 .addReg(TReg0, RegState::Define | getDeadRegState(DstIsDead)); 650 MachineInstrBuilder MIBHi = 651 BuildMI(MBB, MBBI, DL, TII->get(X86::TILELOADD)) 652 .addReg(TReg1, RegState::Define | getDeadRegState(DstIsDead)); 653 654 for (int i = 0; i < X86::AddrNumOperands; ++i) { 655 MIBLo.add(MBBI->getOperand(1 + i)); 656 if (i == X86::AddrDisp) 657 MIBHi.addImm(Disp + TmmSize); 658 else 659 MIBHi.add(MBBI->getOperand(1 + i)); 660 } 661 662 // Make sure the first stride reg used in first tileload is alive. 663 MachineOperand &Stride = 664 MIBLo.getInstr()->getOperand(1 + X86::AddrIndexReg); 665 Stride.setIsKill(false); 666 667 // Split the memory operand, adjusting the offset and size for the halves. 668 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 669 MachineFunction *MF = MBB.getParent(); 670 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, TmmSize); 671 MachineMemOperand *MMOHi = 672 MF->getMachineMemOperand(OldMMO, TmmSize, TmmSize); 673 674 MIBLo.setMemRefs(MMOLo); 675 MIBHi.setMemRefs(MMOHi); 676 677 // Delete the pseudo. 678 MBB.erase(MBBI); 679 return true; 680 } 681 // Similar with TILEPAIRLOAD, TILEPAIRSTORE is just for TILEPair spill, no 682 // corresponding AMX instruction to support it. So, split it too: 683 // "TILEPAIRSTORE Base, Scale, Index, Offset, Segment, TMM0:TMM1" --> 684 // "TILESTORE Base, Scale, Index, Offset, Segment, TMM0" + 685 // "TILESTORE Base, Scale, Index, Offset + TMM_SIZE, Segment, TMM1" 686 case X86::PTILEPAIRSTORE: { 687 int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm(); 688 Register TReg = MBBI->getOperand(X86::AddrNumOperands).getReg(); 689 bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill(); 690 Register TReg0 = TRI->getSubReg(TReg, X86::sub_t0); 691 Register TReg1 = TRI->getSubReg(TReg, X86::sub_t1); 692 unsigned TmmSize = TRI->getRegSizeInBits(X86::TILERegClass) / 8; 693 694 MachineInstrBuilder MIBLo = 695 BuildMI(MBB, MBBI, DL, TII->get(X86::TILESTORED)); 696 MachineInstrBuilder MIBHi = 697 BuildMI(MBB, MBBI, DL, TII->get(X86::TILESTORED)); 698 699 for (int i = 0; i < X86::AddrNumOperands; ++i) { 700 MIBLo.add(MBBI->getOperand(i)); 701 if (i == X86::AddrDisp) 702 MIBHi.addImm(Disp + TmmSize); 703 else 704 MIBHi.add(MBBI->getOperand(i)); 705 } 706 MIBLo.addReg(TReg0, getKillRegState(SrcIsKill)); 707 MIBHi.addReg(TReg1, getKillRegState(SrcIsKill)); 708 709 // Make sure the first stride reg used in first tilestore is alive. 710 MachineOperand &Stride = MIBLo.getInstr()->getOperand(X86::AddrIndexReg); 711 Stride.setIsKill(false); 712 713 // Split the memory operand, adjusting the offset and size for the halves. 714 MachineMemOperand *OldMMO = MBBI->memoperands().front(); 715 MachineFunction *MF = MBB.getParent(); 716 MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, TmmSize); 717 MachineMemOperand *MMOHi = 718 MF->getMachineMemOperand(OldMMO, TmmSize, TmmSize); 719 720 MIBLo.setMemRefs(MMOLo); 721 MIBHi.setMemRefs(MMOHi); 722 723 // Delete the pseudo. 724 MBB.erase(MBBI); 725 return true; 726 } 727 case X86::PT2RPNTLVWZ0V: 728 case X86::PT2RPNTLVWZ0T1V: 729 case X86::PT2RPNTLVWZ1V: 730 case X86::PT2RPNTLVWZ1T1V: 731 case X86::PT2RPNTLVWZ0RSV: 732 case X86::PT2RPNTLVWZ0RST1V: 733 case X86::PT2RPNTLVWZ1RSV: 734 case X86::PT2RPNTLVWZ1RST1V: { 735 for (unsigned i = 3; i > 0; --i) 736 MI.removeOperand(i); 737 unsigned Opc; 738 switch (Opcode) { 739 case X86::PT2RPNTLVWZ0V: 740 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0); 741 break; 742 case X86::PT2RPNTLVWZ0T1V: 743 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1); 744 break; 745 case X86::PT2RPNTLVWZ1V: 746 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1); 747 break; 748 case X86::PT2RPNTLVWZ1T1V: 749 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1); 750 break; 751 case X86::PT2RPNTLVWZ0RSV: 752 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS); 753 break; 754 case X86::PT2RPNTLVWZ0RST1V: 755 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1); 756 break; 757 case X86::PT2RPNTLVWZ1RSV: 758 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS); 759 break; 760 case X86::PT2RPNTLVWZ1RST1V: 761 Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1); 762 break; 763 default: 764 llvm_unreachable("Impossible Opcode!"); 765 } 766 MI.setDesc(TII->get(Opc)); 767 return true; 768 } 769 case X86::PTTRANSPOSEDV: 770 case X86::PTCONJTFP16V: { 771 for (int i = 2; i > 0; --i) 772 MI.removeOperand(i); 773 MI.setDesc(TII->get(Opcode == X86::PTTRANSPOSEDV ? X86::TTRANSPOSED 774 : X86::TCONJTFP16)); 775 return true; 776 } 777 case X86::PTCMMIMFP16PSV: 778 case X86::PTCMMRLFP16PSV: 779 case X86::PTDPBSSDV: 780 case X86::PTDPBSUDV: 781 case X86::PTDPBUSDV: 782 case X86::PTDPBUUDV: 783 case X86::PTDPBF16PSV: 784 case X86::PTDPFP16PSV: 785 case X86::PTTDPBF16PSV: 786 case X86::PTTDPFP16PSV: 787 case X86::PTTCMMIMFP16PSV: 788 case X86::PTTCMMRLFP16PSV: 789 case X86::PTCONJTCMMIMFP16PSV: 790 case X86::PTMMULTF32PSV: 791 case X86::PTTMMULTF32PSV: 792 case X86::PTDPBF8PSV: 793 case X86::PTDPBHF8PSV: 794 case X86::PTDPHBF8PSV: 795 case X86::PTDPHF8PSV: { 796 MI.untieRegOperand(4); 797 for (unsigned i = 3; i > 0; --i) 798 MI.removeOperand(i); 799 unsigned Opc; 800 switch (Opcode) { 801 case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break; 802 case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break; 803 case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break; 804 case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break; 805 case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break; 806 case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break; 807 case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break; 808 case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break; 809 case X86::PTTDPBF16PSV: 810 Opc = X86::TTDPBF16PS; 811 break; 812 case X86::PTTDPFP16PSV: 813 Opc = X86::TTDPFP16PS; 814 break; 815 case X86::PTTCMMIMFP16PSV: 816 Opc = X86::TTCMMIMFP16PS; 817 break; 818 case X86::PTTCMMRLFP16PSV: 819 Opc = X86::TTCMMRLFP16PS; 820 break; 821 case X86::PTCONJTCMMIMFP16PSV: 822 Opc = X86::TCONJTCMMIMFP16PS; 823 break; 824 case X86::PTMMULTF32PSV: 825 Opc = X86::TMMULTF32PS; 826 break; 827 case X86::PTTMMULTF32PSV: 828 Opc = X86::TTMMULTF32PS; 829 break; 830 case X86::PTDPBF8PSV: 831 Opc = X86::TDPBF8PS; 832 break; 833 case X86::PTDPBHF8PSV: 834 Opc = X86::TDPBHF8PS; 835 break; 836 case X86::PTDPHBF8PSV: 837 Opc = X86::TDPHBF8PS; 838 break; 839 case X86::PTDPHF8PSV: 840 Opc = X86::TDPHF8PS; 841 break; 842 843 default: 844 llvm_unreachable("Unexpected Opcode"); 845 } 846 MI.setDesc(TII->get(Opc)); 847 MI.tieOperands(0, 1); 848 return true; 849 } 850 case X86::PTILESTOREDV: { 851 for (int i = 1; i >= 0; --i) 852 MI.removeOperand(i); 853 MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED))); 854 return true; 855 } 856 #undef GET_EGPR_IF_ENABLED 857 case X86::PTILEZEROV: { 858 for (int i = 2; i > 0; --i) // Remove row, col 859 MI.removeOperand(i); 860 MI.setDesc(TII->get(X86::TILEZERO)); 861 return true; 862 } 863 case X86::CALL64pcrel32_RVMARKER: 864 case X86::CALL64r_RVMARKER: 865 case X86::CALL64m_RVMARKER: 866 expandCALL_RVMARKER(MBB, MBBI); 867 return true; 868 case X86::ADD32mi_ND: 869 case X86::ADD64mi32_ND: 870 case X86::SUB32mi_ND: 871 case X86::SUB64mi32_ND: 872 case X86::AND32mi_ND: 873 case X86::AND64mi32_ND: 874 case X86::OR32mi_ND: 875 case X86::OR64mi32_ND: 876 case X86::XOR32mi_ND: 877 case X86::XOR64mi32_ND: 878 case X86::ADC32mi_ND: 879 case X86::ADC64mi32_ND: 880 case X86::SBB32mi_ND: 881 case X86::SBB64mi32_ND: { 882 // It's possible for an EVEX-encoded legacy instruction to reach the 15-byte 883 // instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1 884 // byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of 885 // immediate = 15 bytes in total, e.g. 886 // 887 // subq $184, %fs:257(%rbx, %rcx), %rax 888 // 889 // In such a case, no additional (ADSIZE or segment override) prefix can be 890 // used. To resolve the issue, we split the “long” instruction into 2 891 // instructions: 892 // 893 // movq %fs:257(%rbx, %rcx),%rax 894 // subq $184, %rax 895 // 896 // Therefore we consider the OPmi_ND to be a pseudo instruction to some 897 // extent. 898 const MachineOperand &ImmOp = 899 MI.getOperand(MI.getNumExplicitOperands() - 1); 900 // If the immediate is a expr, conservatively estimate 4 bytes. 901 if (ImmOp.isImm() && isInt<8>(ImmOp.getImm())) 902 return false; 903 int MemOpNo = X86::getFirstAddrOperandIdx(MI); 904 const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp); 905 Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg(); 906 // If the displacement is a expr, conservatively estimate 4 bytes. 907 if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm())) 908 return false; 909 // There can only be one of three: SIB, segment override register, ADSIZE 910 Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg(); 911 unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg(); 912 if (X86II::needSIB(Base, Index, /*In64BitMode=*/true)) 913 ++Count; 914 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) || 915 X86MCRegisterClasses[X86::GR32RegClassID].contains(Index)) 916 ++Count; 917 if (Count < 2) 918 return false; 919 unsigned Opc, LoadOpc; 920 switch (Opcode) { 921 #define MI_TO_RI(OP) \ 922 case X86::OP##32mi_ND: \ 923 Opc = X86::OP##32ri; \ 924 LoadOpc = X86::MOV32rm; \ 925 break; \ 926 case X86::OP##64mi32_ND: \ 927 Opc = X86::OP##64ri32; \ 928 LoadOpc = X86::MOV64rm; \ 929 break; 930 931 default: 932 llvm_unreachable("Unexpected Opcode"); 933 MI_TO_RI(ADD); 934 MI_TO_RI(SUB); 935 MI_TO_RI(AND); 936 MI_TO_RI(OR); 937 MI_TO_RI(XOR); 938 MI_TO_RI(ADC); 939 MI_TO_RI(SBB); 940 #undef MI_TO_RI 941 } 942 // Insert OPri. 943 Register DestReg = MI.getOperand(0).getReg(); 944 BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg) 945 .addReg(DestReg) 946 .add(ImmOp); 947 // Change OPmi_ND to MOVrm. 948 for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I) 949 MI.removeOperand(MI.getNumOperands() - 1); 950 MI.setDesc(TII->get(LoadOpc)); 951 return true; 952 } 953 } 954 llvm_unreachable("Previous switch has a fallthrough?"); 955 } 956 957 // This function creates additional block for storing varargs guarded 958 // registers. It adds check for %al into entry block, to skip 959 // GuardedRegsBlk if xmm registers should not be stored. 960 // 961 // EntryBlk[VAStartPseudoInstr] EntryBlk 962 // | | . 963 // | | . 964 // | | GuardedRegsBlk 965 // | => | . 966 // | | . 967 // | TailBlk 968 // | | 969 // | | 970 // 971 void X86ExpandPseudo::expandVastartSaveXmmRegs( 972 MachineBasicBlock *EntryBlk, 973 MachineBasicBlock::iterator VAStartPseudoInstr) const { 974 assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS); 975 976 MachineFunction *Func = EntryBlk->getParent(); 977 const TargetInstrInfo *TII = STI->getInstrInfo(); 978 const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc(); 979 Register CountReg = VAStartPseudoInstr->getOperand(0).getReg(); 980 981 // Calculate liveins for newly created blocks. 982 LivePhysRegs LiveRegs(*STI->getRegisterInfo()); 983 SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers; 984 985 LiveRegs.addLiveIns(*EntryBlk); 986 for (MachineInstr &MI : EntryBlk->instrs()) { 987 if (MI.getOpcode() == VAStartPseudoInstr->getOpcode()) 988 break; 989 990 LiveRegs.stepForward(MI, Clobbers); 991 } 992 993 // Create the new basic blocks. One block contains all the XMM stores, 994 // and another block is the final destination regardless of whether any 995 // stores were performed. 996 const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock(); 997 MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator(); 998 MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk); 999 MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk); 1000 Func->insert(EntryBlkIter, GuardedRegsBlk); 1001 Func->insert(EntryBlkIter, TailBlk); 1002 1003 // Transfer the remainder of EntryBlk and its successor edges to TailBlk. 1004 TailBlk->splice(TailBlk->begin(), EntryBlk, 1005 std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)), 1006 EntryBlk->end()); 1007 TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); 1008 1009 uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); 1010 uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); 1011 1012 // TODO: add support for YMM and ZMM here. 1013 unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; 1014 1015 // In the XMM save block, save all the XMM argument registers. 1016 for (int64_t OpndIdx = 7, RegIdx = 0; 1017 OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; 1018 OpndIdx++, RegIdx++) { 1019 auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); 1020 for (int i = 0; i < X86::AddrNumOperands; ++i) { 1021 if (i == X86::AddrDisp) 1022 NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); 1023 else 1024 NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); 1025 } 1026 NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); 1027 assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical()); 1028 } 1029 1030 // The original block will now fall through to the GuardedRegsBlk. 1031 EntryBlk->addSuccessor(GuardedRegsBlk); 1032 // The GuardedRegsBlk will fall through to the TailBlk. 1033 GuardedRegsBlk->addSuccessor(TailBlk); 1034 1035 if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) { 1036 // If %al is 0, branch around the XMM save block. 1037 BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr)) 1038 .addReg(CountReg) 1039 .addReg(CountReg); 1040 BuildMI(EntryBlk, DL, TII->get(X86::JCC_1)) 1041 .addMBB(TailBlk) 1042 .addImm(X86::COND_E); 1043 EntryBlk->addSuccessor(TailBlk); 1044 } 1045 1046 // Add liveins to the created block. 1047 addLiveIns(*GuardedRegsBlk, LiveRegs); 1048 addLiveIns(*TailBlk, LiveRegs); 1049 1050 // Delete the pseudo. 1051 VAStartPseudoInstr->eraseFromParent(); 1052 } 1053 1054 /// Expand all pseudo instructions contained in \p MBB. 1055 /// \returns true if any expansion occurred for \p MBB. 1056 bool X86ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1057 bool Modified = false; 1058 1059 // MBBI may be invalidated by the expansion. 1060 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1061 while (MBBI != E) { 1062 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1063 Modified |= expandMI(MBB, MBBI); 1064 MBBI = NMBBI; 1065 } 1066 1067 return Modified; 1068 } 1069 1070 bool X86ExpandPseudo::expandPseudosWhichAffectControlFlow(MachineFunction &MF) { 1071 // Currently pseudo which affects control flow is only 1072 // X86::VASTART_SAVE_XMM_REGS which is located in Entry block. 1073 // So we do not need to evaluate other blocks. 1074 for (MachineInstr &Instr : MF.front().instrs()) { 1075 if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) { 1076 expandVastartSaveXmmRegs(&(MF.front()), Instr); 1077 return true; 1078 } 1079 } 1080 1081 return false; 1082 } 1083 1084 bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1085 STI = &MF.getSubtarget<X86Subtarget>(); 1086 TII = STI->getInstrInfo(); 1087 TRI = STI->getRegisterInfo(); 1088 X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1089 X86FL = STI->getFrameLowering(); 1090 1091 bool Modified = expandPseudosWhichAffectControlFlow(MF); 1092 1093 for (MachineBasicBlock &MBB : MF) 1094 Modified |= expandMBB(MBB); 1095 return Modified; 1096 } 1097 1098 /// Returns an instance of the pseudo instruction expansion pass. 1099 FunctionPass *llvm::createX86ExpandPseudoPass() { 1100 return new X86ExpandPseudo(); 1101 } 1102