169a322feSNoah Goldstein //===-- X86FixupInstTunings.cpp - replace instructions -----------===// 269a322feSNoah Goldstein // 369a322feSNoah Goldstein // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 469a322feSNoah Goldstein // See https://llvm.org/LICENSE.txt for license information. 569a322feSNoah Goldstein // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 669a322feSNoah Goldstein // 769a322feSNoah Goldstein //===----------------------------------------------------------------------===// 869a322feSNoah Goldstein // 969a322feSNoah Goldstein // This file does a tuning pass replacing slower machine instructions 1069a322feSNoah Goldstein // with faster ones. We do this here, as opposed to during normal ISel, as 1169a322feSNoah Goldstein // attempting to get the "right" instruction can break patterns. This pass 1269a322feSNoah Goldstein // is not meant search for special cases where an instruction can be transformed 1369a322feSNoah Goldstein // to another, it is only meant to do transformations where the old instruction 1469a322feSNoah Goldstein // is always replacable with the new instructions. For example: 1569a322feSNoah Goldstein // 1669a322feSNoah Goldstein // `vpermq ymm` -> `vshufd ymm` 1769a322feSNoah Goldstein // -- BAD, not always valid (lane cross/non-repeated mask) 1869a322feSNoah Goldstein // 1969a322feSNoah Goldstein // `vpermilps ymm` -> `vshufd ymm` 2069a322feSNoah Goldstein // -- GOOD, always replaceable 2169a322feSNoah Goldstein // 2269a322feSNoah Goldstein //===----------------------------------------------------------------------===// 2369a322feSNoah Goldstein 2469a322feSNoah Goldstein #include "X86.h" 2569a322feSNoah Goldstein #include "X86InstrInfo.h" 2669a322feSNoah Goldstein #include "X86Subtarget.h" 2769a322feSNoah Goldstein #include "llvm/ADT/Statistic.h" 2869a322feSNoah Goldstein #include "llvm/CodeGen/MachineFunctionPass.h" 2969a322feSNoah Goldstein #include "llvm/CodeGen/MachineInstrBuilder.h" 3069a322feSNoah Goldstein 3169a322feSNoah Goldstein using namespace llvm; 3269a322feSNoah Goldstein 3369a322feSNoah Goldstein #define DEBUG_TYPE "x86-fixup-inst-tuning" 3469a322feSNoah Goldstein 3569a322feSNoah Goldstein STATISTIC(NumInstChanges, "Number of instructions changes"); 3669a322feSNoah Goldstein 3769a322feSNoah Goldstein namespace { 3869a322feSNoah Goldstein class X86FixupInstTuningPass : public MachineFunctionPass { 3969a322feSNoah Goldstein public: 4069a322feSNoah Goldstein static char ID; 4169a322feSNoah Goldstein 4269a322feSNoah Goldstein X86FixupInstTuningPass() : MachineFunctionPass(ID) {} 4369a322feSNoah Goldstein 4469a322feSNoah Goldstein StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; } 4569a322feSNoah Goldstein 4669a322feSNoah Goldstein bool runOnMachineFunction(MachineFunction &MF) override; 4769a322feSNoah Goldstein bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 4869a322feSNoah Goldstein MachineBasicBlock::iterator &I); 4969a322feSNoah Goldstein 5069a322feSNoah Goldstein // This pass runs after regalloc and doesn't support VReg operands. 5169a322feSNoah Goldstein MachineFunctionProperties getRequiredProperties() const override { 5269a322feSNoah Goldstein return MachineFunctionProperties().set( 5369a322feSNoah Goldstein MachineFunctionProperties::Property::NoVRegs); 5469a322feSNoah Goldstein } 5569a322feSNoah Goldstein 5669a322feSNoah Goldstein private: 5769a322feSNoah Goldstein const X86InstrInfo *TII = nullptr; 5869a322feSNoah Goldstein const X86Subtarget *ST = nullptr; 596b29a6f2SNoah Goldstein const MCSchedModel *SM = nullptr; 6069a322feSNoah Goldstein }; 6169a322feSNoah Goldstein } // end anonymous namespace 6269a322feSNoah Goldstein 6369a322feSNoah Goldstein char X86FixupInstTuningPass::ID = 0; 6469a322feSNoah Goldstein 6569a322feSNoah Goldstein INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 6669a322feSNoah Goldstein 6769a322feSNoah Goldstein FunctionPass *llvm::createX86FixupInstTuning() { 6869a322feSNoah Goldstein return new X86FixupInstTuningPass(); 6969a322feSNoah Goldstein } 7069a322feSNoah Goldstein 716b29a6f2SNoah Goldstein template <typename T> 726b29a6f2SNoah Goldstein static std::optional<bool> CmpOptionals(T NewVal, T CurVal) { 736b29a6f2SNoah Goldstein if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal) 746b29a6f2SNoah Goldstein return *NewVal < *CurVal; 756b29a6f2SNoah Goldstein 766b29a6f2SNoah Goldstein return std::nullopt; 776b29a6f2SNoah Goldstein } 786b29a6f2SNoah Goldstein 7969a322feSNoah Goldstein bool X86FixupInstTuningPass::processInstruction( 8069a322feSNoah Goldstein MachineFunction &MF, MachineBasicBlock &MBB, 8169a322feSNoah Goldstein MachineBasicBlock::iterator &I) { 8269a322feSNoah Goldstein MachineInstr &MI = *I; 8369a322feSNoah Goldstein unsigned Opc = MI.getOpcode(); 8469a322feSNoah Goldstein unsigned NumOperands = MI.getDesc().getNumOperands(); 8569a322feSNoah Goldstein 866b29a6f2SNoah Goldstein auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> { 876b29a6f2SNoah Goldstein // We already checked that SchedModel exists in `NewOpcPreferable`. 886b29a6f2SNoah Goldstein return MCSchedModel::getReciprocalThroughput( 896b29a6f2SNoah Goldstein *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 906b29a6f2SNoah Goldstein }; 916b29a6f2SNoah Goldstein 926b29a6f2SNoah Goldstein auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> { 936b29a6f2SNoah Goldstein // We already checked that SchedModel exists in `NewOpcPreferable`. 946b29a6f2SNoah Goldstein return MCSchedModel::computeInstrLatency( 956b29a6f2SNoah Goldstein *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 966b29a6f2SNoah Goldstein }; 976b29a6f2SNoah Goldstein 986b29a6f2SNoah Goldstein auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> { 996b29a6f2SNoah Goldstein if (unsigned Size = TII->get(Opcode).getSize()) 1006b29a6f2SNoah Goldstein return Size; 1016b29a6f2SNoah Goldstein // Zero size means we where unable to compute it. 1026b29a6f2SNoah Goldstein return std::nullopt; 1036b29a6f2SNoah Goldstein }; 1046b29a6f2SNoah Goldstein 1056b29a6f2SNoah Goldstein auto NewOpcPreferable = [&](unsigned NewOpc, 1066b29a6f2SNoah Goldstein bool ReplaceInTie = true) -> bool { 1076b29a6f2SNoah Goldstein std::optional<bool> Res; 1086b29a6f2SNoah Goldstein if (SM->hasInstrSchedModel()) { 1096b29a6f2SNoah Goldstein // Compare tput -> lat -> code size. 1106b29a6f2SNoah Goldstein Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc)); 1116b29a6f2SNoah Goldstein if (Res.has_value()) 1126b29a6f2SNoah Goldstein return *Res; 1136b29a6f2SNoah Goldstein 1146b29a6f2SNoah Goldstein Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc)); 1156b29a6f2SNoah Goldstein if (Res.has_value()) 1166b29a6f2SNoah Goldstein return *Res; 1176b29a6f2SNoah Goldstein } 1186b29a6f2SNoah Goldstein 1196b29a6f2SNoah Goldstein Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc)); 1206b29a6f2SNoah Goldstein if (Res.has_value()) 1216b29a6f2SNoah Goldstein return *Res; 1226b29a6f2SNoah Goldstein 1236b29a6f2SNoah Goldstein // We either have either were unable to get tput/lat/codesize or all values 1246b29a6f2SNoah Goldstein // were equal. Return specified option for a tie. 1256b29a6f2SNoah Goldstein return ReplaceInTie; 1266b29a6f2SNoah Goldstein }; 1276b29a6f2SNoah Goldstein 128*e9f9467dSSimon Pilgrim // `vpermilpd r, i` -> `vshufpd r, r, i` 129*e9f9467dSSimon Pilgrim // `vpermilpd r, i, k` -> `vshufpd r, r, i, k` 130*e9f9467dSSimon Pilgrim // `vshufpd` is always as fast or faster than `vpermilpd` and takes 131*e9f9467dSSimon Pilgrim // 1 less byte of code size for VEX and EVEX encoding. 132*e9f9467dSSimon Pilgrim auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool { 133*e9f9467dSSimon Pilgrim if (!NewOpcPreferable(NewOpc)) 134*e9f9467dSSimon Pilgrim return false; 135*e9f9467dSSimon Pilgrim unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 136*e9f9467dSSimon Pilgrim MI.removeOperand(NumOperands - 1); 137*e9f9467dSSimon Pilgrim MI.addOperand(MI.getOperand(NumOperands - 2)); 138*e9f9467dSSimon Pilgrim MI.setDesc(TII->get(NewOpc)); 139*e9f9467dSSimon Pilgrim MI.addOperand(MachineOperand::CreateImm(MaskImm)); 140*e9f9467dSSimon Pilgrim return true; 141*e9f9467dSSimon Pilgrim }; 142*e9f9467dSSimon Pilgrim 14369a322feSNoah Goldstein // `vpermilps r, i` -> `vshufps r, r, i` 1448ac8c579SNoah Goldstein // `vpermilps r, i, k` -> `vshufps r, r, i, k` 1458ac8c579SNoah Goldstein // `vshufps` is always as fast or faster than `vpermilps` and takes 14623472766SSimon Pilgrim // 1 less byte of code size for VEX and EVEX encoding. 14769a322feSNoah Goldstein auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool { 1486b29a6f2SNoah Goldstein if (!NewOpcPreferable(NewOpc)) 1496b29a6f2SNoah Goldstein return false; 15069a322feSNoah Goldstein unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 15169a322feSNoah Goldstein MI.removeOperand(NumOperands - 1); 1528ac8c579SNoah Goldstein MI.addOperand(MI.getOperand(NumOperands - 2)); 15369a322feSNoah Goldstein MI.setDesc(TII->get(NewOpc)); 15469a322feSNoah Goldstein MI.addOperand(MachineOperand::CreateImm(MaskImm)); 15569a322feSNoah Goldstein return true; 15669a322feSNoah Goldstein }; 15769a322feSNoah Goldstein 15869a322feSNoah Goldstein // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles. 15969a322feSNoah Goldstein // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less 16069a322feSNoah Goldstein // byte of code size. 16169a322feSNoah Goldstein auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool { 16269a322feSNoah Goldstein // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as 16369a322feSNoah Goldstein // `vpshufd` saves a byte of code size. 1642ce1698aSNoah Goldstein if (!ST->hasNoDomainDelayShuffle() || 1656b29a6f2SNoah Goldstein !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 16669a322feSNoah Goldstein return false; 16769a322feSNoah Goldstein MI.setDesc(TII->get(NewOpc)); 16869a322feSNoah Goldstein return true; 16969a322feSNoah Goldstein }; 17069a322feSNoah Goldstein 171c3f01f13SNoah Goldstein // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00` 172c3f01f13SNoah Goldstein // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff` 173c3f01f13SNoah Goldstein // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00` 174c3f01f13SNoah Goldstein // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff` 175c3f01f13SNoah Goldstein // `vunpcklpd r, m` -> `vunpcklqdq r, m, k` 176c3f01f13SNoah Goldstein // `vunpckhpd r, m` -> `vunpckhqdq r, m, k` 177c3f01f13SNoah Goldstein // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k` 178c3f01f13SNoah Goldstein // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k` 179c3f01f13SNoah Goldstein // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd` 180c3f01f13SNoah Goldstein // -> `vunpck{l|h}qdq` 181c3f01f13SNoah Goldstein // 2) If `vshufpd` faster than `vunpck{l|h}pd` 182c3f01f13SNoah Goldstein // -> `vshufpd` 183d6572065SNoah Goldstein // 184d6572065SNoah Goldstein // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay) 185d6572065SNoah Goldstein auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool { 1866b29a6f2SNoah Goldstein if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 1876b29a6f2SNoah Goldstein return false; 1886b29a6f2SNoah Goldstein 1896b29a6f2SNoah Goldstein MI.setDesc(TII->get(NewOpc)); 1906b29a6f2SNoah Goldstein MI.addOperand(MachineOperand::CreateImm(MaskImm)); 1916b29a6f2SNoah Goldstein return true; 1926b29a6f2SNoah Goldstein }; 193fd347ceaSNoah Goldstein 194d6572065SNoah Goldstein auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool { 195c3f01f13SNoah Goldstein // TODO it may be worth it to set ReplaceInTie to `true` as there is no real 196c3f01f13SNoah Goldstein // downside to the integer unpck, but if someone doesn't specify exact 197c3f01f13SNoah Goldstein // target we won't find it faster. 198c3f01f13SNoah Goldstein if (!ST->hasNoDomainDelayShuffle() || 199c3f01f13SNoah Goldstein !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 200c3f01f13SNoah Goldstein return false; 201c3f01f13SNoah Goldstein MI.setDesc(TII->get(NewOpc)); 202c3f01f13SNoah Goldstein return true; 203c3f01f13SNoah Goldstein }; 204c3f01f13SNoah Goldstein 205c3f01f13SNoah Goldstein auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain, 206c3f01f13SNoah Goldstein unsigned NewOpc) -> bool { 207d6572065SNoah Goldstein if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 208c3f01f13SNoah Goldstein return true; 209d6572065SNoah Goldstein return ProcessUNPCK(NewOpc, 0x00); 2106b29a6f2SNoah Goldstein }; 211c3f01f13SNoah Goldstein auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain, 212c3f01f13SNoah Goldstein unsigned NewOpc) -> bool { 213d6572065SNoah Goldstein if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 214c3f01f13SNoah Goldstein return true; 215d6572065SNoah Goldstein return ProcessUNPCK(NewOpc, 0xff); 2166b29a6f2SNoah Goldstein }; 2176b29a6f2SNoah Goldstein 218c3f01f13SNoah Goldstein auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool { 219d6572065SNoah Goldstein return ProcessUNPCKToIntDomain(NewOpcIntDomain); 220d6572065SNoah Goldstein }; 221d6572065SNoah Goldstein 222d6572065SNoah Goldstein auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool { 223d6572065SNoah Goldstein return ProcessUNPCKToIntDomain(NewOpc); 224c3f01f13SNoah Goldstein }; 225c3f01f13SNoah Goldstein 22669a322feSNoah Goldstein switch (Opc) { 227*e9f9467dSSimon Pilgrim case X86::VPERMILPDri: 228*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDrri); 229*e9f9467dSSimon Pilgrim case X86::VPERMILPDYri: 230*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDYrri); 231*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ128ri: 232*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ128rri); 233*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ256ri: 234*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ256rri); 235*e9f9467dSSimon Pilgrim case X86::VPERMILPDZri: 236*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZrri); 237*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ128rikz: 238*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz); 239*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ256rikz: 240*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz); 241*e9f9467dSSimon Pilgrim case X86::VPERMILPDZrikz: 242*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZrrikz); 243*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ128rik: 244*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik); 245*e9f9467dSSimon Pilgrim case X86::VPERMILPDZ256rik: 246*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik); 247*e9f9467dSSimon Pilgrim case X86::VPERMILPDZrik: 248*e9f9467dSSimon Pilgrim return ProcessVPERMILPDri(X86::VSHUFPDZrrik); 249*e9f9467dSSimon Pilgrim 25069a322feSNoah Goldstein case X86::VPERMILPSri: 25169a322feSNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSrri); 25269a322feSNoah Goldstein case X86::VPERMILPSYri: 25369a322feSNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSYrri); 25469a322feSNoah Goldstein case X86::VPERMILPSZ128ri: 25569a322feSNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ128rri); 25669a322feSNoah Goldstein case X86::VPERMILPSZ256ri: 25769a322feSNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ256rri); 25869a322feSNoah Goldstein case X86::VPERMILPSZri: 25969a322feSNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZrri); 2608ac8c579SNoah Goldstein case X86::VPERMILPSZ128rikz: 2618ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz); 2628ac8c579SNoah Goldstein case X86::VPERMILPSZ256rikz: 2638ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz); 2648ac8c579SNoah Goldstein case X86::VPERMILPSZrikz: 2658ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZrrikz); 2668ac8c579SNoah Goldstein case X86::VPERMILPSZ128rik: 2678ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik); 2688ac8c579SNoah Goldstein case X86::VPERMILPSZ256rik: 2698ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik); 2708ac8c579SNoah Goldstein case X86::VPERMILPSZrik: 2718ac8c579SNoah Goldstein return ProcessVPERMILPSri(X86::VSHUFPSZrrik); 27269a322feSNoah Goldstein case X86::VPERMILPSmi: 27369a322feSNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDmi); 27469a322feSNoah Goldstein case X86::VPERMILPSYmi: 27569a322feSNoah Goldstein // TODO: See if there is a more generic way we can test if the replacement 27669a322feSNoah Goldstein // instruction is supported. 27769a322feSNoah Goldstein return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false; 27869a322feSNoah Goldstein case X86::VPERMILPSZ128mi: 27969a322feSNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi); 28069a322feSNoah Goldstein case X86::VPERMILPSZ256mi: 28169a322feSNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi); 28269a322feSNoah Goldstein case X86::VPERMILPSZmi: 28369a322feSNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZmi); 2848ac8c579SNoah Goldstein case X86::VPERMILPSZ128mikz: 2858ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz); 2868ac8c579SNoah Goldstein case X86::VPERMILPSZ256mikz: 2878ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz); 2888ac8c579SNoah Goldstein case X86::VPERMILPSZmikz: 2898ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZmikz); 2908ac8c579SNoah Goldstein case X86::VPERMILPSZ128mik: 2918ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik); 2928ac8c579SNoah Goldstein case X86::VPERMILPSZ256mik: 2938ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik); 2948ac8c579SNoah Goldstein case X86::VPERMILPSZmik: 2958ac8c579SNoah Goldstein return ProcessVPERMILPSmi(X86::VPSHUFDZmik); 2966b29a6f2SNoah Goldstein 2976b29a6f2SNoah Goldstein case X86::MOVLHPSrr: 2986b29a6f2SNoah Goldstein case X86::UNPCKLPDrr: 299c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri); 3006b29a6f2SNoah Goldstein case X86::VMOVLHPSrr: 3016b29a6f2SNoah Goldstein case X86::VUNPCKLPDrr: 302c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri); 3036b29a6f2SNoah Goldstein case X86::VUNPCKLPDYrr: 304c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri); 3056b29a6f2SNoah Goldstein // VMOVLHPS is always 128 bits. 3066b29a6f2SNoah Goldstein case X86::VMOVLHPSZrr: 3076b29a6f2SNoah Goldstein case X86::VUNPCKLPDZ128rr: 308c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri); 3096b29a6f2SNoah Goldstein case X86::VUNPCKLPDZ256rr: 310c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri); 3116b29a6f2SNoah Goldstein case X86::VUNPCKLPDZrr: 312c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri); 313fd347ceaSNoah Goldstein case X86::VUNPCKLPDZ128rrk: 314c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik); 315fd347ceaSNoah Goldstein case X86::VUNPCKLPDZ256rrk: 316c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik); 317fd347ceaSNoah Goldstein case X86::VUNPCKLPDZrrk: 318c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik); 319fd347ceaSNoah Goldstein case X86::VUNPCKLPDZ128rrkz: 320c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 321fd347ceaSNoah Goldstein case X86::VUNPCKLPDZ256rrkz: 322c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 323fd347ceaSNoah Goldstein case X86::VUNPCKLPDZrrkz: 324c3f01f13SNoah Goldstein return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz); 3256b29a6f2SNoah Goldstein case X86::UNPCKHPDrr: 326c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri); 3276b29a6f2SNoah Goldstein case X86::VUNPCKHPDrr: 328c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri); 3296b29a6f2SNoah Goldstein case X86::VUNPCKHPDYrr: 330c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri); 3316b29a6f2SNoah Goldstein case X86::VUNPCKHPDZ128rr: 332c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri); 3336b29a6f2SNoah Goldstein case X86::VUNPCKHPDZ256rr: 334c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri); 3356b29a6f2SNoah Goldstein case X86::VUNPCKHPDZrr: 336c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri); 337fd347ceaSNoah Goldstein case X86::VUNPCKHPDZ128rrk: 338c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik); 339fd347ceaSNoah Goldstein case X86::VUNPCKHPDZ256rrk: 340c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik); 341fd347ceaSNoah Goldstein case X86::VUNPCKHPDZrrk: 342c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik); 343fd347ceaSNoah Goldstein case X86::VUNPCKHPDZ128rrkz: 344c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 345fd347ceaSNoah Goldstein case X86::VUNPCKHPDZ256rrkz: 346c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 347fd347ceaSNoah Goldstein case X86::VUNPCKHPDZrrkz: 348c3f01f13SNoah Goldstein return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz); 349c3f01f13SNoah Goldstein case X86::UNPCKLPDrm: 350c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm); 351c3f01f13SNoah Goldstein case X86::VUNPCKLPDrm: 352c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm); 353c3f01f13SNoah Goldstein case X86::VUNPCKLPDYrm: 354c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm); 355c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ128rm: 356c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm); 357c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ256rm: 358c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm); 359c3f01f13SNoah Goldstein case X86::VUNPCKLPDZrm: 360c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm); 361c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ128rmk: 362c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk); 363c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ256rmk: 364c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk); 365c3f01f13SNoah Goldstein case X86::VUNPCKLPDZrmk: 366c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk); 367c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ128rmkz: 368c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz); 369c3f01f13SNoah Goldstein case X86::VUNPCKLPDZ256rmkz: 370c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz); 371c3f01f13SNoah Goldstein case X86::VUNPCKLPDZrmkz: 372c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz); 373c3f01f13SNoah Goldstein case X86::UNPCKHPDrm: 374c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm); 375c3f01f13SNoah Goldstein case X86::VUNPCKHPDrm: 376c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm); 377c3f01f13SNoah Goldstein case X86::VUNPCKHPDYrm: 378c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm); 379c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ128rm: 380c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm); 381c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ256rm: 382c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm); 383c3f01f13SNoah Goldstein case X86::VUNPCKHPDZrm: 384c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm); 385c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ128rmk: 386c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk); 387c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ256rmk: 388c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk); 389c3f01f13SNoah Goldstein case X86::VUNPCKHPDZrmk: 390c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk); 391c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ128rmkz: 392c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz); 393c3f01f13SNoah Goldstein case X86::VUNPCKHPDZ256rmkz: 394c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz); 395c3f01f13SNoah Goldstein case X86::VUNPCKHPDZrmkz: 396c3f01f13SNoah Goldstein return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz); 397d6572065SNoah Goldstein 398d6572065SNoah Goldstein case X86::UNPCKLPSrr: 399d6572065SNoah Goldstein return ProcessUNPCKPS(X86::PUNPCKLDQrr); 400d6572065SNoah Goldstein case X86::VUNPCKLPSrr: 401d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQrr); 402d6572065SNoah Goldstein case X86::VUNPCKLPSYrr: 403d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQYrr); 404d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rr: 405d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr); 406d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rr: 407d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr); 408d6572065SNoah Goldstein case X86::VUNPCKLPSZrr: 409d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrr); 410d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rrk: 411d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk); 412d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rrk: 413d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk); 414d6572065SNoah Goldstein case X86::VUNPCKLPSZrrk: 415d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk); 416d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rrkz: 417d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz); 418d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rrkz: 419d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz); 420d6572065SNoah Goldstein case X86::VUNPCKLPSZrrkz: 421d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz); 422d6572065SNoah Goldstein case X86::UNPCKHPSrr: 423d6572065SNoah Goldstein return ProcessUNPCKPS(X86::PUNPCKHDQrr); 424d6572065SNoah Goldstein case X86::VUNPCKHPSrr: 425d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQrr); 426d6572065SNoah Goldstein case X86::VUNPCKHPSYrr: 427d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQYrr); 428d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rr: 429d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr); 430d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rr: 431d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr); 432d6572065SNoah Goldstein case X86::VUNPCKHPSZrr: 433d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrr); 434d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rrk: 435d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk); 436d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rrk: 437d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk); 438d6572065SNoah Goldstein case X86::VUNPCKHPSZrrk: 439d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk); 440d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rrkz: 441d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz); 442d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rrkz: 443d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz); 444d6572065SNoah Goldstein case X86::VUNPCKHPSZrrkz: 445d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz); 446d6572065SNoah Goldstein case X86::UNPCKLPSrm: 447d6572065SNoah Goldstein return ProcessUNPCKPS(X86::PUNPCKLDQrm); 448d6572065SNoah Goldstein case X86::VUNPCKLPSrm: 449d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQrm); 450d6572065SNoah Goldstein case X86::VUNPCKLPSYrm: 451d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQYrm); 452d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rm: 453d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm); 454d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rm: 455d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm); 456d6572065SNoah Goldstein case X86::VUNPCKLPSZrm: 457d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrm); 458d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rmk: 459d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk); 460d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rmk: 461d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk); 462d6572065SNoah Goldstein case X86::VUNPCKLPSZrmk: 463d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk); 464d6572065SNoah Goldstein case X86::VUNPCKLPSZ128rmkz: 465d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz); 466d6572065SNoah Goldstein case X86::VUNPCKLPSZ256rmkz: 467d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz); 468d6572065SNoah Goldstein case X86::VUNPCKLPSZrmkz: 469d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz); 470d6572065SNoah Goldstein case X86::UNPCKHPSrm: 471d6572065SNoah Goldstein return ProcessUNPCKPS(X86::PUNPCKHDQrm); 472d6572065SNoah Goldstein case X86::VUNPCKHPSrm: 473d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQrm); 474d6572065SNoah Goldstein case X86::VUNPCKHPSYrm: 475d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQYrm); 476d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rm: 477d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm); 478d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rm: 479d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm); 480d6572065SNoah Goldstein case X86::VUNPCKHPSZrm: 481d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrm); 482d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rmk: 483d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk); 484d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rmk: 485d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk); 486d6572065SNoah Goldstein case X86::VUNPCKHPSZrmk: 487d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk); 488d6572065SNoah Goldstein case X86::VUNPCKHPSZ128rmkz: 489d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz); 490d6572065SNoah Goldstein case X86::VUNPCKHPSZ256rmkz: 491d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz); 492d6572065SNoah Goldstein case X86::VUNPCKHPSZrmkz: 493d6572065SNoah Goldstein return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz); 49469a322feSNoah Goldstein default: 49569a322feSNoah Goldstein return false; 49669a322feSNoah Goldstein } 49769a322feSNoah Goldstein } 49869a322feSNoah Goldstein 49969a322feSNoah Goldstein bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) { 50069a322feSNoah Goldstein LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";); 50169a322feSNoah Goldstein bool Changed = false; 50269a322feSNoah Goldstein ST = &MF.getSubtarget<X86Subtarget>(); 50369a322feSNoah Goldstein TII = ST->getInstrInfo(); 5046b29a6f2SNoah Goldstein SM = &ST->getSchedModel(); 5056b29a6f2SNoah Goldstein 50669a322feSNoah Goldstein for (MachineBasicBlock &MBB : MF) { 50769a322feSNoah Goldstein for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 50869a322feSNoah Goldstein if (processInstruction(MF, MBB, I)) { 50969a322feSNoah Goldstein ++NumInstChanges; 51069a322feSNoah Goldstein Changed = true; 51169a322feSNoah Goldstein } 51269a322feSNoah Goldstein } 51369a322feSNoah Goldstein } 51469a322feSNoah Goldstein LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";); 51569a322feSNoah Goldstein return Changed; 51669a322feSNoah Goldstein } 517