1 //===--- X86DomainReassignment.cpp - Selectively switch register classes---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass attempts to find instruction chains (closures) in one domain, 10 // and convert them to equivalent instructions in a different domain, 11 // if profitable. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86.h" 16 #include "X86InstrInfo.h" 17 #include "X86Subtarget.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/TargetRegisterInfo.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/Printable.h" 28 #include <bitset> 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "x86-domain-reassignment" 33 34 STATISTIC(NumClosuresConverted, "Number of closures converted by the pass"); 35 36 static cl::opt<bool> DisableX86DomainReassignment( 37 "disable-x86-domain-reassignment", cl::Hidden, 38 cl::desc("X86: Disable Virtual Register Reassignment."), cl::init(false)); 39 40 namespace { 41 enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains }; 42 43 static bool isMask(const TargetRegisterClass *RC, 44 const TargetRegisterInfo *TRI) { 45 return X86::VK16RegClass.hasSubClassEq(RC); 46 } 47 48 static RegDomain getDomain(const TargetRegisterClass *RC, 49 const TargetRegisterInfo *TRI) { 50 if (TRI->isGeneralPurposeRegisterClass(RC)) 51 return GPRDomain; 52 if (isMask(RC, TRI)) 53 return MaskDomain; 54 return OtherDomain; 55 } 56 57 /// Return a register class equivalent to \p SrcRC, in \p Domain. 58 static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, 59 RegDomain Domain) { 60 assert(Domain == MaskDomain && "add domain"); 61 if (X86::GR8RegClass.hasSubClassEq(SrcRC)) 62 return &X86::VK8RegClass; 63 if (X86::GR16RegClass.hasSubClassEq(SrcRC)) 64 return &X86::VK16RegClass; 65 if (X86::GR32RegClass.hasSubClassEq(SrcRC)) 66 return &X86::VK32RegClass; 67 if (X86::GR64RegClass.hasSubClassEq(SrcRC)) 68 return &X86::VK64RegClass; 69 llvm_unreachable("add register class"); 70 return nullptr; 71 } 72 73 /// Abstract Instruction Converter class. 74 class InstrConverterBase { 75 protected: 76 unsigned SrcOpcode; 77 78 public: 79 InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} 80 81 virtual ~InstrConverterBase() = default; 82 83 /// \returns true if \p MI is legal to convert. 84 virtual bool isLegal(const MachineInstr *MI, 85 const TargetInstrInfo *TII) const { 86 assert(MI->getOpcode() == SrcOpcode && 87 "Wrong instruction passed to converter"); 88 return true; 89 } 90 91 /// Applies conversion to \p MI. 92 /// 93 /// \returns true if \p MI is no longer need, and can be deleted. 94 virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, 95 MachineRegisterInfo *MRI) const = 0; 96 97 /// \returns the cost increment incurred by converting \p MI. 98 virtual double getExtraCost(const MachineInstr *MI, 99 MachineRegisterInfo *MRI) const = 0; 100 }; 101 102 /// An Instruction Converter which ignores the given instruction. 103 /// For example, PHI instructions can be safely ignored since only the registers 104 /// need to change. 105 class InstrIgnore : public InstrConverterBase { 106 public: 107 InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} 108 109 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, 110 MachineRegisterInfo *MRI) const override { 111 assert(isLegal(MI, TII) && "Cannot convert instruction"); 112 return false; 113 } 114 115 double getExtraCost(const MachineInstr *MI, 116 MachineRegisterInfo *MRI) const override { 117 return 0; 118 } 119 }; 120 121 /// An Instruction Converter which replaces an instruction with another. 122 class InstrReplacer : public InstrConverterBase { 123 public: 124 /// Opcode of the destination instruction. 125 unsigned DstOpcode; 126 127 InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) 128 : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} 129 130 bool isLegal(const MachineInstr *MI, 131 const TargetInstrInfo *TII) const override { 132 if (!InstrConverterBase::isLegal(MI, TII)) 133 return false; 134 // It's illegal to replace an instruction that implicitly defines a register 135 // with an instruction that doesn't, unless that register dead. 136 for (const auto &MO : MI->implicit_operands()) 137 if (MO.isReg() && MO.isDef() && !MO.isDead() && 138 !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) 139 return false; 140 return true; 141 } 142 143 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, 144 MachineRegisterInfo *MRI) const override { 145 assert(isLegal(MI, TII) && "Cannot convert instruction"); 146 MachineInstrBuilder Bld = 147 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(DstOpcode)); 148 // Transfer explicit operands from original instruction. Implicit operands 149 // are handled by BuildMI. 150 for (auto &Op : MI->explicit_operands()) 151 Bld.add(Op); 152 return true; 153 } 154 155 double getExtraCost(const MachineInstr *MI, 156 MachineRegisterInfo *MRI) const override { 157 // Assuming instructions have the same cost. 158 return 0; 159 } 160 }; 161 162 /// An Instruction Converter which replaces an instruction with another, and 163 /// adds a COPY from the new instruction's destination to the old one's. 164 class InstrReplacerDstCOPY : public InstrConverterBase { 165 public: 166 unsigned DstOpcode; 167 168 InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode) 169 : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} 170 171 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, 172 MachineRegisterInfo *MRI) const override { 173 assert(isLegal(MI, TII) && "Cannot convert instruction"); 174 MachineBasicBlock *MBB = MI->getParent(); 175 const DebugLoc &DL = MI->getDebugLoc(); 176 177 Register Reg = MRI->createVirtualRegister( 178 TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), 179 *MBB->getParent())); 180 MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); 181 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) 182 Bld.add(MO); 183 184 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) 185 .add(MI->getOperand(0)) 186 .addReg(Reg); 187 188 return true; 189 } 190 191 double getExtraCost(const MachineInstr *MI, 192 MachineRegisterInfo *MRI) const override { 193 // Assuming instructions have the same cost, and that COPY is in the same 194 // domain so it will be eliminated. 195 return 0; 196 } 197 }; 198 199 /// An Instruction Converter for replacing COPY instructions. 200 class InstrCOPYReplacer : public InstrReplacer { 201 public: 202 RegDomain DstDomain; 203 204 InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode) 205 : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {} 206 207 bool isLegal(const MachineInstr *MI, 208 const TargetInstrInfo *TII) const override { 209 if (!InstrConverterBase::isLegal(MI, TII)) 210 return false; 211 212 // Don't allow copies to/flow GR8/GR16 physical registers. 213 // FIXME: Is there some better way to support this? 214 Register DstReg = MI->getOperand(0).getReg(); 215 if (DstReg.isPhysical() && (X86::GR8RegClass.contains(DstReg) || 216 X86::GR16RegClass.contains(DstReg))) 217 return false; 218 Register SrcReg = MI->getOperand(1).getReg(); 219 if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(SrcReg) || 220 X86::GR16RegClass.contains(SrcReg))) 221 return false; 222 223 return true; 224 } 225 226 double getExtraCost(const MachineInstr *MI, 227 MachineRegisterInfo *MRI) const override { 228 assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); 229 230 for (const auto &MO : MI->operands()) { 231 // Physical registers will not be converted. Assume that converting the 232 // COPY to the destination domain will eventually result in a actual 233 // instruction. 234 if (MO.getReg().isPhysical()) 235 return 1; 236 237 RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()), 238 MRI->getTargetRegisterInfo()); 239 // Converting a cross domain COPY to a same domain COPY should eliminate 240 // an insturction 241 if (OpDomain == DstDomain) 242 return -1; 243 } 244 return 0; 245 } 246 }; 247 248 /// An Instruction Converter which replaces an instruction with a COPY. 249 class InstrReplaceWithCopy : public InstrConverterBase { 250 public: 251 // Source instruction operand Index, to be used as the COPY source. 252 unsigned SrcOpIdx; 253 254 InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx) 255 : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {} 256 257 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, 258 MachineRegisterInfo *MRI) const override { 259 assert(isLegal(MI, TII) && "Cannot convert instruction"); 260 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 261 TII->get(TargetOpcode::COPY)) 262 .add({MI->getOperand(0), MI->getOperand(SrcOpIdx)}); 263 return true; 264 } 265 266 double getExtraCost(const MachineInstr *MI, 267 MachineRegisterInfo *MRI) const override { 268 return 0; 269 } 270 }; 271 272 // Key type to be used by the Instruction Converters map. 273 // A converter is identified by <destination domain, source opcode> 274 typedef std::pair<int, unsigned> InstrConverterBaseKeyTy; 275 276 typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>> 277 InstrConverterBaseMap; 278 279 /// A closure is a set of virtual register representing all of the edges in 280 /// the closure, as well as all of the instructions connected by those edges. 281 /// 282 /// A closure may encompass virtual registers in the same register bank that 283 /// have different widths. For example, it may contain 32-bit GPRs as well as 284 /// 64-bit GPRs. 285 /// 286 /// A closure that computes an address (i.e. defines a virtual register that is 287 /// used in a memory operand) excludes the instructions that contain memory 288 /// operands using the address. Such an instruction will be included in a 289 /// different closure that manipulates the loaded or stored value. 290 class Closure { 291 private: 292 /// Virtual registers in the closure. 293 DenseSet<Register> Edges; 294 295 /// Instructions in the closure. 296 SmallVector<MachineInstr *, 8> Instrs; 297 298 /// Domains which this closure can legally be reassigned to. 299 std::bitset<NumDomains> LegalDstDomains; 300 301 /// An ID to uniquely identify this closure, even when it gets 302 /// moved around 303 unsigned ID; 304 305 public: 306 Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) { 307 for (RegDomain D : LegalDstDomainList) 308 LegalDstDomains.set(D); 309 } 310 311 /// Mark this closure as illegal for reassignment to all domains. 312 void setAllIllegal() { LegalDstDomains.reset(); } 313 314 /// \returns true if this closure has domains which are legal to reassign to. 315 bool hasLegalDstDomain() const { return LegalDstDomains.any(); } 316 317 /// \returns true if is legal to reassign this closure to domain \p RD. 318 bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; } 319 320 /// Mark this closure as illegal for reassignment to domain \p RD. 321 void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; } 322 323 bool empty() const { return Edges.empty(); } 324 325 bool insertEdge(Register Reg) { return Edges.insert(Reg).second; } 326 327 using const_edge_iterator = DenseSet<Register>::const_iterator; 328 iterator_range<const_edge_iterator> edges() const { 329 return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end()); 330 } 331 332 void addInstruction(MachineInstr *I) { 333 Instrs.push_back(I); 334 } 335 336 ArrayRef<MachineInstr *> instructions() const { 337 return Instrs; 338 } 339 340 LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const { 341 dbgs() << "Registers: "; 342 bool First = true; 343 for (Register Reg : Edges) { 344 if (!First) 345 dbgs() << ", "; 346 First = false; 347 dbgs() << printReg(Reg, MRI->getTargetRegisterInfo(), 0, MRI); 348 } 349 dbgs() << "\n" << "Instructions:"; 350 for (MachineInstr *MI : Instrs) { 351 dbgs() << "\n "; 352 MI->print(dbgs()); 353 } 354 dbgs() << "\n"; 355 } 356 357 unsigned getID() const { 358 return ID; 359 } 360 361 }; 362 363 class X86DomainReassignment : public MachineFunctionPass { 364 const X86Subtarget *STI = nullptr; 365 MachineRegisterInfo *MRI = nullptr; 366 const X86InstrInfo *TII = nullptr; 367 368 /// All edges that are included in some closure 369 DenseMap<Register, unsigned> EnclosedEdges; 370 371 /// All instructions that are included in some closure. 372 DenseMap<MachineInstr *, unsigned> EnclosedInstrs; 373 374 public: 375 static char ID; 376 377 X86DomainReassignment() : MachineFunctionPass(ID) { } 378 379 bool runOnMachineFunction(MachineFunction &MF) override; 380 381 void getAnalysisUsage(AnalysisUsage &AU) const override { 382 AU.setPreservesCFG(); 383 MachineFunctionPass::getAnalysisUsage(AU); 384 } 385 386 StringRef getPassName() const override { 387 return "X86 Domain Reassignment Pass"; 388 } 389 390 private: 391 /// A map of available Instruction Converters. 392 InstrConverterBaseMap Converters; 393 394 /// Initialize Converters map. 395 void initConverters(); 396 397 /// Starting from \Reg, expand the closure as much as possible. 398 void buildClosure(Closure &, Register Reg); 399 400 /// Enqueue \p Reg to be considered for addition to the closure. 401 /// Return false if the closure becomes invalid. 402 bool visitRegister(Closure &, Register Reg, RegDomain &Domain, 403 SmallVectorImpl<unsigned> &Worklist); 404 405 /// Reassign the closure to \p Domain. 406 void reassign(const Closure &C, RegDomain Domain) const; 407 408 /// Add \p MI to the closure. 409 /// Return false if the closure becomes invalid. 410 bool encloseInstr(Closure &C, MachineInstr *MI); 411 412 /// /returns true if it is profitable to reassign the closure to \p Domain. 413 bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; 414 415 /// Calculate the total cost of reassigning the closure to \p Domain. 416 double calculateCost(const Closure &C, RegDomain Domain) const; 417 }; 418 419 char X86DomainReassignment::ID = 0; 420 421 } // End anonymous namespace. 422 423 bool X86DomainReassignment::visitRegister(Closure &C, Register Reg, 424 RegDomain &Domain, 425 SmallVectorImpl<unsigned> &Worklist) { 426 if (!Reg.isVirtual()) 427 return true; 428 429 auto I = EnclosedEdges.find(Reg); 430 if (I != EnclosedEdges.end()) { 431 if (I->second != C.getID()) { 432 C.setAllIllegal(); 433 return false; 434 } 435 return true; 436 } 437 438 if (!MRI->hasOneDef(Reg)) 439 return true; 440 441 RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo()); 442 // First edge in closure sets the domain. 443 if (Domain == NoDomain) 444 Domain = RD; 445 446 if (Domain != RD) 447 return true; 448 449 Worklist.push_back(Reg); 450 return true; 451 } 452 453 bool X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { 454 auto I = EnclosedInstrs.find(MI); 455 if (I != EnclosedInstrs.end()) { 456 if (I->second != C.getID()) { 457 // Instruction already belongs to another closure, avoid conflicts between 458 // closure and mark this closure as illegal. 459 C.setAllIllegal(); 460 return false; 461 } 462 return true; 463 } 464 465 EnclosedInstrs[MI] = C.getID(); 466 C.addInstruction(MI); 467 468 // Mark closure as illegal for reassignment to domains, if there is no 469 // converter for the instruction or if the converter cannot convert the 470 // instruction. 471 for (int i = 0; i != NumDomains; ++i) { 472 if (C.isLegal((RegDomain)i)) { 473 auto I = Converters.find({i, MI->getOpcode()}); 474 if (I == Converters.end() || !I->second->isLegal(MI, TII)) 475 C.setIllegal((RegDomain)i); 476 } 477 } 478 return C.hasLegalDstDomain(); 479 } 480 481 double X86DomainReassignment::calculateCost(const Closure &C, 482 RegDomain DstDomain) const { 483 assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure"); 484 485 double Cost = 0.0; 486 for (auto *MI : C.instructions()) 487 Cost += Converters.find({DstDomain, MI->getOpcode()}) 488 ->second->getExtraCost(MI, MRI); 489 return Cost; 490 } 491 492 bool X86DomainReassignment::isReassignmentProfitable(const Closure &C, 493 RegDomain Domain) const { 494 return calculateCost(C, Domain) < 0.0; 495 } 496 497 void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { 498 assert(C.isLegal(Domain) && "Cannot convert illegal closure"); 499 500 // Iterate all instructions in the closure, convert each one using the 501 // appropriate converter. 502 SmallVector<MachineInstr *, 8> ToErase; 503 for (auto *MI : C.instructions()) 504 if (Converters.find({Domain, MI->getOpcode()}) 505 ->second->convertInstr(MI, TII, MRI)) 506 ToErase.push_back(MI); 507 508 // Iterate all registers in the closure, replace them with registers in the 509 // destination domain. 510 for (Register Reg : C.edges()) { 511 MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain)); 512 for (auto &MO : MRI->use_operands(Reg)) { 513 if (MO.isReg()) 514 // Remove all subregister references as they are not valid in the 515 // destination domain. 516 MO.setSubReg(0); 517 } 518 } 519 520 for (auto *MI : ToErase) 521 MI->eraseFromParent(); 522 } 523 524 /// \returns true when \p Reg is used as part of an address calculation in \p 525 /// MI. 526 static bool usedAsAddr(const MachineInstr &MI, Register Reg, 527 const TargetInstrInfo *TII) { 528 if (!MI.mayLoadOrStore()) 529 return false; 530 531 const MCInstrDesc &Desc = TII->get(MI.getOpcode()); 532 int MemOpStart = X86II::getMemoryOperandNo(Desc.TSFlags); 533 if (MemOpStart == -1) 534 return false; 535 536 MemOpStart += X86II::getOperandBias(Desc); 537 for (unsigned MemOpIdx = MemOpStart, 538 MemOpEnd = MemOpStart + X86::AddrNumOperands; 539 MemOpIdx < MemOpEnd; ++MemOpIdx) { 540 const MachineOperand &Op = MI.getOperand(MemOpIdx); 541 if (Op.isReg() && Op.getReg() == Reg) 542 return true; 543 } 544 return false; 545 } 546 547 void X86DomainReassignment::buildClosure(Closure &C, Register Reg) { 548 SmallVector<unsigned, 4> Worklist; 549 RegDomain Domain = NoDomain; 550 visitRegister(C, Reg, Domain, Worklist); 551 while (!Worklist.empty()) { 552 unsigned CurReg = Worklist.pop_back_val(); 553 554 // Register already in this closure. 555 if (!C.insertEdge(CurReg)) 556 continue; 557 EnclosedEdges[Reg] = C.getID(); 558 559 MachineInstr *DefMI = MRI->getVRegDef(CurReg); 560 if (!encloseInstr(C, DefMI)) 561 return; 562 563 // Add register used by the defining MI to the worklist. 564 // Do not add registers which are used in address calculation, they will be 565 // added to a different closure. 566 int OpEnd = DefMI->getNumOperands(); 567 const MCInstrDesc &Desc = DefMI->getDesc(); 568 int MemOp = X86II::getMemoryOperandNo(Desc.TSFlags); 569 if (MemOp != -1) 570 MemOp += X86II::getOperandBias(Desc); 571 for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) { 572 if (OpIdx == MemOp) { 573 // skip address calculation. 574 OpIdx += (X86::AddrNumOperands - 1); 575 continue; 576 } 577 auto &Op = DefMI->getOperand(OpIdx); 578 if (!Op.isReg() || !Op.isUse()) 579 continue; 580 if (!visitRegister(C, Op.getReg(), Domain, Worklist)) 581 return; 582 } 583 584 // Expand closure through register uses. 585 for (auto &UseMI : MRI->use_nodbg_instructions(CurReg)) { 586 // We would like to avoid converting closures which calculare addresses, 587 // as this should remain in GPRs. 588 if (usedAsAddr(UseMI, CurReg, TII)) { 589 C.setAllIllegal(); 590 return; 591 } 592 if (!encloseInstr(C, &UseMI)) 593 return; 594 595 for (auto &DefOp : UseMI.defs()) { 596 if (!DefOp.isReg()) 597 continue; 598 599 Register DefReg = DefOp.getReg(); 600 if (!DefReg.isVirtual()) { 601 C.setAllIllegal(); 602 return; 603 } 604 if (!visitRegister(C, DefReg, Domain, Worklist)) 605 return; 606 } 607 } 608 } 609 } 610 611 void X86DomainReassignment::initConverters() { 612 Converters[{MaskDomain, TargetOpcode::PHI}] = 613 std::make_unique<InstrIgnore>(TargetOpcode::PHI); 614 615 Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = 616 std::make_unique<InstrIgnore>(TargetOpcode::IMPLICIT_DEF); 617 618 Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = 619 std::make_unique<InstrReplaceWithCopy>(TargetOpcode::INSERT_SUBREG, 2); 620 621 Converters[{MaskDomain, TargetOpcode::COPY}] = 622 std::make_unique<InstrCOPYReplacer>(TargetOpcode::COPY, MaskDomain, 623 TargetOpcode::COPY); 624 625 auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { 626 Converters[{MaskDomain, From}] = 627 std::make_unique<InstrReplacerDstCOPY>(From, To); 628 }; 629 630 #define GET_EGPR_IF_ENABLED(OPC) STI->hasEGPR() ? OPC##_EVEX : OPC 631 createReplacerDstCOPY(X86::MOVZX32rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); 632 createReplacerDstCOPY(X86::MOVZX64rm16, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); 633 634 createReplacerDstCOPY(X86::MOVZX32rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); 635 createReplacerDstCOPY(X86::MOVZX64rr16, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); 636 637 if (STI->hasDQI()) { 638 createReplacerDstCOPY(X86::MOVZX16rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); 639 createReplacerDstCOPY(X86::MOVZX32rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); 640 createReplacerDstCOPY(X86::MOVZX64rm8, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); 641 642 createReplacerDstCOPY(X86::MOVZX16rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); 643 createReplacerDstCOPY(X86::MOVZX32rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); 644 createReplacerDstCOPY(X86::MOVZX64rr8, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); 645 } 646 647 auto createReplacer = [&](unsigned From, unsigned To) { 648 Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To); 649 }; 650 651 createReplacer(X86::MOV16rm, GET_EGPR_IF_ENABLED(X86::KMOVWkm)); 652 createReplacer(X86::MOV16mr, GET_EGPR_IF_ENABLED(X86::KMOVWmk)); 653 createReplacer(X86::MOV16rr, GET_EGPR_IF_ENABLED(X86::KMOVWkk)); 654 createReplacer(X86::SHR16ri, X86::KSHIFTRWki); 655 createReplacer(X86::SHL16ri, X86::KSHIFTLWki); 656 createReplacer(X86::NOT16r, X86::KNOTWkk); 657 createReplacer(X86::OR16rr, X86::KORWkk); 658 createReplacer(X86::AND16rr, X86::KANDWkk); 659 createReplacer(X86::XOR16rr, X86::KXORWkk); 660 661 bool HasNDD = STI->hasNDD(); 662 if (HasNDD) { 663 createReplacer(X86::SHR16ri_ND, X86::KSHIFTRWki); 664 createReplacer(X86::SHL16ri_ND, X86::KSHIFTLWki); 665 createReplacer(X86::NOT16r_ND, X86::KNOTWkk); 666 createReplacer(X86::OR16rr_ND, X86::KORWkk); 667 createReplacer(X86::AND16rr_ND, X86::KANDWkk); 668 createReplacer(X86::XOR16rr_ND, X86::KXORWkk); 669 } 670 671 if (STI->hasBWI()) { 672 createReplacer(X86::MOV32rm, GET_EGPR_IF_ENABLED(X86::KMOVDkm)); 673 createReplacer(X86::MOV64rm, GET_EGPR_IF_ENABLED(X86::KMOVQkm)); 674 675 createReplacer(X86::MOV32mr, GET_EGPR_IF_ENABLED(X86::KMOVDmk)); 676 createReplacer(X86::MOV64mr, GET_EGPR_IF_ENABLED(X86::KMOVQmk)); 677 678 createReplacer(X86::MOV32rr, GET_EGPR_IF_ENABLED(X86::KMOVDkk)); 679 createReplacer(X86::MOV64rr, GET_EGPR_IF_ENABLED(X86::KMOVQkk)); 680 681 createReplacer(X86::SHR32ri, X86::KSHIFTRDki); 682 createReplacer(X86::SHR64ri, X86::KSHIFTRQki); 683 684 createReplacer(X86::SHL32ri, X86::KSHIFTLDki); 685 createReplacer(X86::SHL64ri, X86::KSHIFTLQki); 686 687 createReplacer(X86::ADD32rr, X86::KADDDkk); 688 createReplacer(X86::ADD64rr, X86::KADDQkk); 689 690 createReplacer(X86::NOT32r, X86::KNOTDkk); 691 createReplacer(X86::NOT64r, X86::KNOTQkk); 692 693 createReplacer(X86::OR32rr, X86::KORDkk); 694 createReplacer(X86::OR64rr, X86::KORQkk); 695 696 createReplacer(X86::AND32rr, X86::KANDDkk); 697 createReplacer(X86::AND64rr, X86::KANDQkk); 698 699 createReplacer(X86::ANDN32rr, X86::KANDNDkk); 700 createReplacer(X86::ANDN64rr, X86::KANDNQkk); 701 702 createReplacer(X86::XOR32rr, X86::KXORDkk); 703 createReplacer(X86::XOR64rr, X86::KXORQkk); 704 705 if (HasNDD) { 706 createReplacer(X86::SHR32ri_ND, X86::KSHIFTRDki); 707 createReplacer(X86::SHL32ri_ND, X86::KSHIFTLDki); 708 createReplacer(X86::ADD32rr_ND, X86::KADDDkk); 709 createReplacer(X86::NOT32r_ND, X86::KNOTDkk); 710 createReplacer(X86::OR32rr_ND, X86::KORDkk); 711 createReplacer(X86::AND32rr_ND, X86::KANDDkk); 712 createReplacer(X86::XOR32rr_ND, X86::KXORDkk); 713 createReplacer(X86::SHR64ri_ND, X86::KSHIFTRQki); 714 createReplacer(X86::SHL64ri_ND, X86::KSHIFTLQki); 715 createReplacer(X86::ADD64rr_ND, X86::KADDQkk); 716 createReplacer(X86::NOT64r_ND, X86::KNOTQkk); 717 createReplacer(X86::OR64rr_ND, X86::KORQkk); 718 createReplacer(X86::AND64rr_ND, X86::KANDQkk); 719 createReplacer(X86::XOR64rr_ND, X86::KXORQkk); 720 } 721 722 // TODO: KTEST is not a replacement for TEST due to flag differences. Need 723 // to prove only Z flag is used. 724 // createReplacer(X86::TEST32rr, X86::KTESTDkk); 725 // createReplacer(X86::TEST64rr, X86::KTESTQkk); 726 } 727 728 if (STI->hasDQI()) { 729 createReplacer(X86::ADD8rr, X86::KADDBkk); 730 createReplacer(X86::ADD16rr, X86::KADDWkk); 731 732 createReplacer(X86::AND8rr, X86::KANDBkk); 733 734 createReplacer(X86::MOV8rm, GET_EGPR_IF_ENABLED(X86::KMOVBkm)); 735 createReplacer(X86::MOV8mr, GET_EGPR_IF_ENABLED(X86::KMOVBmk)); 736 createReplacer(X86::MOV8rr, GET_EGPR_IF_ENABLED(X86::KMOVBkk)); 737 738 createReplacer(X86::NOT8r, X86::KNOTBkk); 739 740 createReplacer(X86::OR8rr, X86::KORBkk); 741 742 createReplacer(X86::SHR8ri, X86::KSHIFTRBki); 743 createReplacer(X86::SHL8ri, X86::KSHIFTLBki); 744 745 // TODO: KTEST is not a replacement for TEST due to flag differences. Need 746 // to prove only Z flag is used. 747 // createReplacer(X86::TEST8rr, X86::KTESTBkk); 748 // createReplacer(X86::TEST16rr, X86::KTESTWkk); 749 750 createReplacer(X86::XOR8rr, X86::KXORBkk); 751 752 if (HasNDD) { 753 createReplacer(X86::ADD8rr_ND, X86::KADDBkk); 754 createReplacer(X86::ADD16rr_ND, X86::KADDWkk); 755 createReplacer(X86::AND8rr_ND, X86::KANDBkk); 756 createReplacer(X86::NOT8r_ND, X86::KNOTBkk); 757 createReplacer(X86::OR8rr_ND, X86::KORBkk); 758 createReplacer(X86::SHR8ri_ND, X86::KSHIFTRBki); 759 createReplacer(X86::SHL8ri_ND, X86::KSHIFTLBki); 760 createReplacer(X86::XOR8rr_ND, X86::KXORBkk); 761 } 762 } 763 #undef GET_EGPR_IF_ENABLED 764 } 765 766 bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { 767 if (skipFunction(MF.getFunction())) 768 return false; 769 if (DisableX86DomainReassignment) 770 return false; 771 772 LLVM_DEBUG( 773 dbgs() << "***** Machine Function before Domain Reassignment *****\n"); 774 LLVM_DEBUG(MF.print(dbgs())); 775 776 STI = &MF.getSubtarget<X86Subtarget>(); 777 // GPR->K is the only transformation currently supported, bail out early if no 778 // AVX512. 779 // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and 780 // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register 781 // coalescer doesn't clean it up and we generate a spill we will crash. 782 if (!STI->hasAVX512() || !STI->hasBWI()) 783 return false; 784 785 MRI = &MF.getRegInfo(); 786 assert(MRI->isSSA() && "Expected MIR to be in SSA form"); 787 788 TII = STI->getInstrInfo(); 789 initConverters(); 790 bool Changed = false; 791 792 EnclosedEdges.clear(); 793 EnclosedInstrs.clear(); 794 795 std::vector<Closure> Closures; 796 797 // Go over all virtual registers and calculate a closure. 798 unsigned ClosureID = 0; 799 for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { 800 Register Reg = Register::index2VirtReg(Idx); 801 802 // Skip unused VRegs. 803 if (MRI->reg_nodbg_empty(Reg)) 804 continue; 805 806 // GPR only current source domain supported. 807 if (!MRI->getTargetRegisterInfo()->isGeneralPurposeRegisterClass( 808 MRI->getRegClass(Reg))) 809 continue; 810 811 // Register already in closure. 812 if (EnclosedEdges.contains(Reg)) 813 continue; 814 815 // Calculate closure starting with Reg. 816 Closure C(ClosureID++, {MaskDomain}); 817 buildClosure(C, Reg); 818 819 // Collect all closures that can potentially be converted. 820 if (!C.empty() && C.isLegal(MaskDomain)) 821 Closures.push_back(std::move(C)); 822 } 823 824 for (Closure &C : Closures) { 825 LLVM_DEBUG(C.dump(MRI)); 826 if (isReassignmentProfitable(C, MaskDomain)) { 827 reassign(C, MaskDomain); 828 ++NumClosuresConverted; 829 Changed = true; 830 } 831 } 832 833 LLVM_DEBUG( 834 dbgs() << "***** Machine Function after Domain Reassignment *****\n"); 835 LLVM_DEBUG(MF.print(dbgs())); 836 837 return Changed; 838 } 839 840 INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment", 841 "X86 Domain Reassignment Pass", false, false) 842 843 /// Returns an instance of the Domain Reassignment pass. 844 FunctionPass *llvm::createX86DomainReassignmentPass() { 845 return new X86DomainReassignment(); 846 } 847