xref: /llvm-project/llvm/lib/Target/X86/X86FixupInstTuning.cpp (revision dfe43bd1ca46c59399b7cbbf81b09256232e27f9)
169a322feSNoah Goldstein //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
269a322feSNoah Goldstein //
369a322feSNoah Goldstein // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
469a322feSNoah Goldstein // See https://llvm.org/LICENSE.txt for license information.
569a322feSNoah Goldstein // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
669a322feSNoah Goldstein //
769a322feSNoah Goldstein //===----------------------------------------------------------------------===//
869a322feSNoah Goldstein //
969a322feSNoah Goldstein // This file does a tuning pass replacing slower machine instructions
1069a322feSNoah Goldstein // with faster ones. We do this here, as opposed to during normal ISel, as
1169a322feSNoah Goldstein // attempting to get the "right" instruction can break patterns. This pass
1269a322feSNoah Goldstein // is not meant search for special cases where an instruction can be transformed
1369a322feSNoah Goldstein // to another, it is only meant to do transformations where the old instruction
1469a322feSNoah Goldstein // is always replacable with the new instructions. For example:
1569a322feSNoah Goldstein //
1669a322feSNoah Goldstein //      `vpermq ymm` -> `vshufd ymm`
1769a322feSNoah Goldstein //          -- BAD, not always valid (lane cross/non-repeated mask)
1869a322feSNoah Goldstein //
1969a322feSNoah Goldstein //      `vpermilps ymm` -> `vshufd ymm`
2069a322feSNoah Goldstein //          -- GOOD, always replaceable
2169a322feSNoah Goldstein //
2269a322feSNoah Goldstein //===----------------------------------------------------------------------===//
2369a322feSNoah Goldstein 
2469a322feSNoah Goldstein #include "X86.h"
2569a322feSNoah Goldstein #include "X86InstrInfo.h"
2669a322feSNoah Goldstein #include "X86Subtarget.h"
2769a322feSNoah Goldstein #include "llvm/ADT/Statistic.h"
2869a322feSNoah Goldstein #include "llvm/CodeGen/MachineFunctionPass.h"
2969a322feSNoah Goldstein #include "llvm/CodeGen/MachineInstrBuilder.h"
3069a322feSNoah Goldstein 
3169a322feSNoah Goldstein using namespace llvm;
3269a322feSNoah Goldstein 
3369a322feSNoah Goldstein #define DEBUG_TYPE "x86-fixup-inst-tuning"
3469a322feSNoah Goldstein 
3569a322feSNoah Goldstein STATISTIC(NumInstChanges, "Number of instructions changes");
3669a322feSNoah Goldstein 
3769a322feSNoah Goldstein namespace {
3869a322feSNoah Goldstein class X86FixupInstTuningPass : public MachineFunctionPass {
3969a322feSNoah Goldstein public:
4069a322feSNoah Goldstein   static char ID;
4169a322feSNoah Goldstein 
4269a322feSNoah Goldstein   X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
4369a322feSNoah Goldstein 
4469a322feSNoah Goldstein   StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
4569a322feSNoah Goldstein 
4669a322feSNoah Goldstein   bool runOnMachineFunction(MachineFunction &MF) override;
4769a322feSNoah Goldstein   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
4869a322feSNoah Goldstein                           MachineBasicBlock::iterator &I);
4969a322feSNoah Goldstein 
5069a322feSNoah Goldstein   // This pass runs after regalloc and doesn't support VReg operands.
5169a322feSNoah Goldstein   MachineFunctionProperties getRequiredProperties() const override {
5269a322feSNoah Goldstein     return MachineFunctionProperties().set(
5369a322feSNoah Goldstein         MachineFunctionProperties::Property::NoVRegs);
5469a322feSNoah Goldstein   }
5569a322feSNoah Goldstein 
5669a322feSNoah Goldstein private:
5769a322feSNoah Goldstein   const X86InstrInfo *TII = nullptr;
5869a322feSNoah Goldstein   const X86Subtarget *ST = nullptr;
596b29a6f2SNoah Goldstein   const MCSchedModel *SM = nullptr;
6069a322feSNoah Goldstein };
6169a322feSNoah Goldstein } // end anonymous namespace
6269a322feSNoah Goldstein 
6369a322feSNoah Goldstein char X86FixupInstTuningPass::ID = 0;
6469a322feSNoah Goldstein 
6569a322feSNoah Goldstein INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
6669a322feSNoah Goldstein 
6769a322feSNoah Goldstein FunctionPass *llvm::createX86FixupInstTuning() {
6869a322feSNoah Goldstein   return new X86FixupInstTuningPass();
6969a322feSNoah Goldstein }
7069a322feSNoah Goldstein 
716b29a6f2SNoah Goldstein template <typename T>
726b29a6f2SNoah Goldstein static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
736b29a6f2SNoah Goldstein   if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
746b29a6f2SNoah Goldstein     return *NewVal < *CurVal;
756b29a6f2SNoah Goldstein 
766b29a6f2SNoah Goldstein   return std::nullopt;
776b29a6f2SNoah Goldstein }
786b29a6f2SNoah Goldstein 
7969a322feSNoah Goldstein bool X86FixupInstTuningPass::processInstruction(
8069a322feSNoah Goldstein     MachineFunction &MF, MachineBasicBlock &MBB,
8169a322feSNoah Goldstein     MachineBasicBlock::iterator &I) {
8269a322feSNoah Goldstein   MachineInstr &MI = *I;
8369a322feSNoah Goldstein   unsigned Opc = MI.getOpcode();
8469a322feSNoah Goldstein   unsigned NumOperands = MI.getDesc().getNumOperands();
8569a322feSNoah Goldstein 
866b29a6f2SNoah Goldstein   auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
876b29a6f2SNoah Goldstein     // We already checked that SchedModel exists in `NewOpcPreferable`.
886b29a6f2SNoah Goldstein     return MCSchedModel::getReciprocalThroughput(
896b29a6f2SNoah Goldstein         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
906b29a6f2SNoah Goldstein   };
916b29a6f2SNoah Goldstein 
926b29a6f2SNoah Goldstein   auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
936b29a6f2SNoah Goldstein     // We already checked that SchedModel exists in `NewOpcPreferable`.
946b29a6f2SNoah Goldstein     return MCSchedModel::computeInstrLatency(
956b29a6f2SNoah Goldstein         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
966b29a6f2SNoah Goldstein   };
976b29a6f2SNoah Goldstein 
986b29a6f2SNoah Goldstein   auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
996b29a6f2SNoah Goldstein     if (unsigned Size = TII->get(Opcode).getSize())
1006b29a6f2SNoah Goldstein       return Size;
1016b29a6f2SNoah Goldstein     // Zero size means we where unable to compute it.
1026b29a6f2SNoah Goldstein     return std::nullopt;
1036b29a6f2SNoah Goldstein   };
1046b29a6f2SNoah Goldstein 
1056b29a6f2SNoah Goldstein   auto NewOpcPreferable = [&](unsigned NewOpc,
1066b29a6f2SNoah Goldstein                               bool ReplaceInTie = true) -> bool {
1076b29a6f2SNoah Goldstein     std::optional<bool> Res;
1086b29a6f2SNoah Goldstein     if (SM->hasInstrSchedModel()) {
1096b29a6f2SNoah Goldstein       // Compare tput -> lat -> code size.
1106b29a6f2SNoah Goldstein       Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
1116b29a6f2SNoah Goldstein       if (Res.has_value())
1126b29a6f2SNoah Goldstein         return *Res;
1136b29a6f2SNoah Goldstein 
1146b29a6f2SNoah Goldstein       Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
1156b29a6f2SNoah Goldstein       if (Res.has_value())
1166b29a6f2SNoah Goldstein         return *Res;
1176b29a6f2SNoah Goldstein     }
1186b29a6f2SNoah Goldstein 
1196b29a6f2SNoah Goldstein     Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
1206b29a6f2SNoah Goldstein     if (Res.has_value())
1216b29a6f2SNoah Goldstein       return *Res;
1226b29a6f2SNoah Goldstein 
1236b29a6f2SNoah Goldstein     // We either have either were unable to get tput/lat/codesize or all values
1246b29a6f2SNoah Goldstein     // were equal. Return specified option for a tie.
1256b29a6f2SNoah Goldstein     return ReplaceInTie;
1266b29a6f2SNoah Goldstein   };
1276b29a6f2SNoah Goldstein 
128*e9f9467dSSimon Pilgrim   // `vpermilpd r, i` -> `vshufpd r, r, i`
129*e9f9467dSSimon Pilgrim   // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
130*e9f9467dSSimon Pilgrim   // `vshufpd` is always as fast or faster than `vpermilpd` and takes
131*e9f9467dSSimon Pilgrim   // 1 less byte of code size for VEX and EVEX encoding.
132*e9f9467dSSimon Pilgrim   auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
133*e9f9467dSSimon Pilgrim     if (!NewOpcPreferable(NewOpc))
134*e9f9467dSSimon Pilgrim       return false;
135*e9f9467dSSimon Pilgrim     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
136*e9f9467dSSimon Pilgrim     MI.removeOperand(NumOperands - 1);
137*e9f9467dSSimon Pilgrim     MI.addOperand(MI.getOperand(NumOperands - 2));
138*e9f9467dSSimon Pilgrim     MI.setDesc(TII->get(NewOpc));
139*e9f9467dSSimon Pilgrim     MI.addOperand(MachineOperand::CreateImm(MaskImm));
140*e9f9467dSSimon Pilgrim     return true;
141*e9f9467dSSimon Pilgrim   };
142*e9f9467dSSimon Pilgrim 
14369a322feSNoah Goldstein   // `vpermilps r, i` -> `vshufps r, r, i`
1448ac8c579SNoah Goldstein   // `vpermilps r, i, k` -> `vshufps r, r, i, k`
1458ac8c579SNoah Goldstein   // `vshufps` is always as fast or faster than `vpermilps` and takes
14623472766SSimon Pilgrim   // 1 less byte of code size for VEX and EVEX encoding.
14769a322feSNoah Goldstein   auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
1486b29a6f2SNoah Goldstein     if (!NewOpcPreferable(NewOpc))
1496b29a6f2SNoah Goldstein       return false;
15069a322feSNoah Goldstein     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
15169a322feSNoah Goldstein     MI.removeOperand(NumOperands - 1);
1528ac8c579SNoah Goldstein     MI.addOperand(MI.getOperand(NumOperands - 2));
15369a322feSNoah Goldstein     MI.setDesc(TII->get(NewOpc));
15469a322feSNoah Goldstein     MI.addOperand(MachineOperand::CreateImm(MaskImm));
15569a322feSNoah Goldstein     return true;
15669a322feSNoah Goldstein   };
15769a322feSNoah Goldstein 
15869a322feSNoah Goldstein   // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
15969a322feSNoah Goldstein   // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
16069a322feSNoah Goldstein   // byte of code size.
16169a322feSNoah Goldstein   auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
16269a322feSNoah Goldstein     // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
16369a322feSNoah Goldstein     // `vpshufd` saves a byte of code size.
1642ce1698aSNoah Goldstein     if (!ST->hasNoDomainDelayShuffle() ||
1656b29a6f2SNoah Goldstein         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
16669a322feSNoah Goldstein       return false;
16769a322feSNoah Goldstein     MI.setDesc(TII->get(NewOpc));
16869a322feSNoah Goldstein     return true;
16969a322feSNoah Goldstein   };
17069a322feSNoah Goldstein 
171c3f01f13SNoah Goldstein   // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
172c3f01f13SNoah Goldstein   // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
173c3f01f13SNoah Goldstein   // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
174c3f01f13SNoah Goldstein   // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
175c3f01f13SNoah Goldstein   // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
176c3f01f13SNoah Goldstein   // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
177c3f01f13SNoah Goldstein   // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
178c3f01f13SNoah Goldstein   // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
179c3f01f13SNoah Goldstein   // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
180c3f01f13SNoah Goldstein   //        -> `vunpck{l|h}qdq`
181c3f01f13SNoah Goldstein   // 2) If `vshufpd` faster than `vunpck{l|h}pd`
182c3f01f13SNoah Goldstein   //        -> `vshufpd`
183d6572065SNoah Goldstein   //
184d6572065SNoah Goldstein   // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
185d6572065SNoah Goldstein   auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
1866b29a6f2SNoah Goldstein     if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
1876b29a6f2SNoah Goldstein       return false;
1886b29a6f2SNoah Goldstein 
1896b29a6f2SNoah Goldstein     MI.setDesc(TII->get(NewOpc));
1906b29a6f2SNoah Goldstein     MI.addOperand(MachineOperand::CreateImm(MaskImm));
1916b29a6f2SNoah Goldstein     return true;
1926b29a6f2SNoah Goldstein   };
193fd347ceaSNoah Goldstein 
194d6572065SNoah Goldstein   auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
195c3f01f13SNoah Goldstein     // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
196c3f01f13SNoah Goldstein     // downside to the integer unpck, but if someone doesn't specify exact
197c3f01f13SNoah Goldstein     // target we won't find it faster.
198c3f01f13SNoah Goldstein     if (!ST->hasNoDomainDelayShuffle() ||
199c3f01f13SNoah Goldstein         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
200c3f01f13SNoah Goldstein       return false;
201c3f01f13SNoah Goldstein     MI.setDesc(TII->get(NewOpc));
202c3f01f13SNoah Goldstein     return true;
203c3f01f13SNoah Goldstein   };
204c3f01f13SNoah Goldstein 
205c3f01f13SNoah Goldstein   auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
206c3f01f13SNoah Goldstein                                unsigned NewOpc) -> bool {
207d6572065SNoah Goldstein     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
208c3f01f13SNoah Goldstein       return true;
209d6572065SNoah Goldstein     return ProcessUNPCK(NewOpc, 0x00);
2106b29a6f2SNoah Goldstein   };
211c3f01f13SNoah Goldstein   auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
212c3f01f13SNoah Goldstein                                unsigned NewOpc) -> bool {
213d6572065SNoah Goldstein     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
214c3f01f13SNoah Goldstein       return true;
215d6572065SNoah Goldstein     return ProcessUNPCK(NewOpc, 0xff);
2166b29a6f2SNoah Goldstein   };
2176b29a6f2SNoah Goldstein 
218c3f01f13SNoah Goldstein   auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
219d6572065SNoah Goldstein     return ProcessUNPCKToIntDomain(NewOpcIntDomain);
220d6572065SNoah Goldstein   };
221d6572065SNoah Goldstein 
222d6572065SNoah Goldstein   auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
223d6572065SNoah Goldstein     return ProcessUNPCKToIntDomain(NewOpc);
224c3f01f13SNoah Goldstein   };
225c3f01f13SNoah Goldstein 
22669a322feSNoah Goldstein   switch (Opc) {
227*e9f9467dSSimon Pilgrim   case X86::VPERMILPDri:
228*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDrri);
229*e9f9467dSSimon Pilgrim   case X86::VPERMILPDYri:
230*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDYrri);
231*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ128ri:
232*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
233*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ256ri:
234*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
235*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZri:
236*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZrri);
237*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ128rikz:
238*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
239*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ256rikz:
240*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
241*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZrikz:
242*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
243*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ128rik:
244*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
245*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZ256rik:
246*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
247*e9f9467dSSimon Pilgrim   case X86::VPERMILPDZrik:
248*e9f9467dSSimon Pilgrim     return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
249*e9f9467dSSimon Pilgrim 
25069a322feSNoah Goldstein   case X86::VPERMILPSri:
25169a322feSNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSrri);
25269a322feSNoah Goldstein   case X86::VPERMILPSYri:
25369a322feSNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSYrri);
25469a322feSNoah Goldstein   case X86::VPERMILPSZ128ri:
25569a322feSNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
25669a322feSNoah Goldstein   case X86::VPERMILPSZ256ri:
25769a322feSNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
25869a322feSNoah Goldstein   case X86::VPERMILPSZri:
25969a322feSNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZrri);
2608ac8c579SNoah Goldstein   case X86::VPERMILPSZ128rikz:
2618ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
2628ac8c579SNoah Goldstein   case X86::VPERMILPSZ256rikz:
2638ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
2648ac8c579SNoah Goldstein   case X86::VPERMILPSZrikz:
2658ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
2668ac8c579SNoah Goldstein   case X86::VPERMILPSZ128rik:
2678ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
2688ac8c579SNoah Goldstein   case X86::VPERMILPSZ256rik:
2698ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
2708ac8c579SNoah Goldstein   case X86::VPERMILPSZrik:
2718ac8c579SNoah Goldstein     return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
27269a322feSNoah Goldstein   case X86::VPERMILPSmi:
27369a322feSNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDmi);
27469a322feSNoah Goldstein   case X86::VPERMILPSYmi:
27569a322feSNoah Goldstein     // TODO: See if there is a more generic way we can test if the replacement
27669a322feSNoah Goldstein     // instruction is supported.
27769a322feSNoah Goldstein     return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
27869a322feSNoah Goldstein   case X86::VPERMILPSZ128mi:
27969a322feSNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
28069a322feSNoah Goldstein   case X86::VPERMILPSZ256mi:
28169a322feSNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
28269a322feSNoah Goldstein   case X86::VPERMILPSZmi:
28369a322feSNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
2848ac8c579SNoah Goldstein   case X86::VPERMILPSZ128mikz:
2858ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
2868ac8c579SNoah Goldstein   case X86::VPERMILPSZ256mikz:
2878ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
2888ac8c579SNoah Goldstein   case X86::VPERMILPSZmikz:
2898ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
2908ac8c579SNoah Goldstein   case X86::VPERMILPSZ128mik:
2918ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
2928ac8c579SNoah Goldstein   case X86::VPERMILPSZ256mik:
2938ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
2948ac8c579SNoah Goldstein   case X86::VPERMILPSZmik:
2958ac8c579SNoah Goldstein     return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
2966b29a6f2SNoah Goldstein 
2976b29a6f2SNoah Goldstein   case X86::MOVLHPSrr:
2986b29a6f2SNoah Goldstein   case X86::UNPCKLPDrr:
299c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
3006b29a6f2SNoah Goldstein   case X86::VMOVLHPSrr:
3016b29a6f2SNoah Goldstein   case X86::VUNPCKLPDrr:
302c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
3036b29a6f2SNoah Goldstein   case X86::VUNPCKLPDYrr:
304c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
3056b29a6f2SNoah Goldstein     // VMOVLHPS is always 128 bits.
3066b29a6f2SNoah Goldstein   case X86::VMOVLHPSZrr:
3076b29a6f2SNoah Goldstein   case X86::VUNPCKLPDZ128rr:
308c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
3096b29a6f2SNoah Goldstein   case X86::VUNPCKLPDZ256rr:
310c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
3116b29a6f2SNoah Goldstein   case X86::VUNPCKLPDZrr:
312c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
313fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZ128rrk:
314c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
315fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZ256rrk:
316c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
317fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZrrk:
318c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
319fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZ128rrkz:
320c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
321fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZ256rrkz:
322c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
323fd347ceaSNoah Goldstein   case X86::VUNPCKLPDZrrkz:
324c3f01f13SNoah Goldstein     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
3256b29a6f2SNoah Goldstein   case X86::UNPCKHPDrr:
326c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
3276b29a6f2SNoah Goldstein   case X86::VUNPCKHPDrr:
328c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
3296b29a6f2SNoah Goldstein   case X86::VUNPCKHPDYrr:
330c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
3316b29a6f2SNoah Goldstein   case X86::VUNPCKHPDZ128rr:
332c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
3336b29a6f2SNoah Goldstein   case X86::VUNPCKHPDZ256rr:
334c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
3356b29a6f2SNoah Goldstein   case X86::VUNPCKHPDZrr:
336c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
337fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZ128rrk:
338c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
339fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZ256rrk:
340c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
341fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZrrk:
342c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
343fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZ128rrkz:
344c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
345fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZ256rrkz:
346c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
347fd347ceaSNoah Goldstein   case X86::VUNPCKHPDZrrkz:
348c3f01f13SNoah Goldstein     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
349c3f01f13SNoah Goldstein   case X86::UNPCKLPDrm:
350c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
351c3f01f13SNoah Goldstein   case X86::VUNPCKLPDrm:
352c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
353c3f01f13SNoah Goldstein   case X86::VUNPCKLPDYrm:
354c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
355c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ128rm:
356c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
357c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ256rm:
358c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
359c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZrm:
360c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
361c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ128rmk:
362c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
363c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ256rmk:
364c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
365c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZrmk:
366c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
367c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ128rmkz:
368c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
369c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZ256rmkz:
370c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
371c3f01f13SNoah Goldstein   case X86::VUNPCKLPDZrmkz:
372c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
373c3f01f13SNoah Goldstein   case X86::UNPCKHPDrm:
374c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
375c3f01f13SNoah Goldstein   case X86::VUNPCKHPDrm:
376c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
377c3f01f13SNoah Goldstein   case X86::VUNPCKHPDYrm:
378c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
379c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ128rm:
380c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
381c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ256rm:
382c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
383c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZrm:
384c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
385c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ128rmk:
386c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
387c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ256rmk:
388c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
389c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZrmk:
390c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
391c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ128rmkz:
392c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
393c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZ256rmkz:
394c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
395c3f01f13SNoah Goldstein   case X86::VUNPCKHPDZrmkz:
396c3f01f13SNoah Goldstein     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
397d6572065SNoah Goldstein 
398d6572065SNoah Goldstein   case X86::UNPCKLPSrr:
399d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::PUNPCKLDQrr);
400d6572065SNoah Goldstein   case X86::VUNPCKLPSrr:
401d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
402d6572065SNoah Goldstein   case X86::VUNPCKLPSYrr:
403d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
404d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rr:
405d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
406d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rr:
407d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
408d6572065SNoah Goldstein   case X86::VUNPCKLPSZrr:
409d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
410d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rrk:
411d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
412d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rrk:
413d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
414d6572065SNoah Goldstein   case X86::VUNPCKLPSZrrk:
415d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
416d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rrkz:
417d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
418d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rrkz:
419d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
420d6572065SNoah Goldstein   case X86::VUNPCKLPSZrrkz:
421d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
422d6572065SNoah Goldstein   case X86::UNPCKHPSrr:
423d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::PUNPCKHDQrr);
424d6572065SNoah Goldstein   case X86::VUNPCKHPSrr:
425d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
426d6572065SNoah Goldstein   case X86::VUNPCKHPSYrr:
427d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
428d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rr:
429d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
430d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rr:
431d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
432d6572065SNoah Goldstein   case X86::VUNPCKHPSZrr:
433d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
434d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rrk:
435d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
436d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rrk:
437d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
438d6572065SNoah Goldstein   case X86::VUNPCKHPSZrrk:
439d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
440d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rrkz:
441d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
442d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rrkz:
443d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
444d6572065SNoah Goldstein   case X86::VUNPCKHPSZrrkz:
445d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
446d6572065SNoah Goldstein   case X86::UNPCKLPSrm:
447d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::PUNPCKLDQrm);
448d6572065SNoah Goldstein   case X86::VUNPCKLPSrm:
449d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
450d6572065SNoah Goldstein   case X86::VUNPCKLPSYrm:
451d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
452d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rm:
453d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
454d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rm:
455d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
456d6572065SNoah Goldstein   case X86::VUNPCKLPSZrm:
457d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
458d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rmk:
459d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
460d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rmk:
461d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
462d6572065SNoah Goldstein   case X86::VUNPCKLPSZrmk:
463d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
464d6572065SNoah Goldstein   case X86::VUNPCKLPSZ128rmkz:
465d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
466d6572065SNoah Goldstein   case X86::VUNPCKLPSZ256rmkz:
467d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
468d6572065SNoah Goldstein   case X86::VUNPCKLPSZrmkz:
469d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
470d6572065SNoah Goldstein   case X86::UNPCKHPSrm:
471d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::PUNPCKHDQrm);
472d6572065SNoah Goldstein   case X86::VUNPCKHPSrm:
473d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
474d6572065SNoah Goldstein   case X86::VUNPCKHPSYrm:
475d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
476d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rm:
477d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
478d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rm:
479d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
480d6572065SNoah Goldstein   case X86::VUNPCKHPSZrm:
481d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
482d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rmk:
483d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
484d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rmk:
485d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
486d6572065SNoah Goldstein   case X86::VUNPCKHPSZrmk:
487d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
488d6572065SNoah Goldstein   case X86::VUNPCKHPSZ128rmkz:
489d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
490d6572065SNoah Goldstein   case X86::VUNPCKHPSZ256rmkz:
491d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
492d6572065SNoah Goldstein   case X86::VUNPCKHPSZrmkz:
493d6572065SNoah Goldstein     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
49469a322feSNoah Goldstein   default:
49569a322feSNoah Goldstein     return false;
49669a322feSNoah Goldstein   }
49769a322feSNoah Goldstein }
49869a322feSNoah Goldstein 
49969a322feSNoah Goldstein bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
50069a322feSNoah Goldstein   LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
50169a322feSNoah Goldstein   bool Changed = false;
50269a322feSNoah Goldstein   ST = &MF.getSubtarget<X86Subtarget>();
50369a322feSNoah Goldstein   TII = ST->getInstrInfo();
5046b29a6f2SNoah Goldstein   SM = &ST->getSchedModel();
5056b29a6f2SNoah Goldstein 
50669a322feSNoah Goldstein   for (MachineBasicBlock &MBB : MF) {
50769a322feSNoah Goldstein     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
50869a322feSNoah Goldstein       if (processInstruction(MF, MBB, I)) {
50969a322feSNoah Goldstein         ++NumInstChanges;
51069a322feSNoah Goldstein         Changed = true;
51169a322feSNoah Goldstein       }
51269a322feSNoah Goldstein     }
51369a322feSNoah Goldstein   }
51469a322feSNoah Goldstein   LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
51569a322feSNoah Goldstein   return Changed;
51669a322feSNoah Goldstein }
517