xref: /llvm-project/llvm/lib/Target/X86/X86FixupInstTuning.cpp (revision dfe43bd1ca46c59399b7cbbf81b09256232e27f9)
1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file does a tuning pass replacing slower machine instructions
10 // with faster ones. We do this here, as opposed to during normal ISel, as
11 // attempting to get the "right" instruction can break patterns. This pass
12 // is not meant search for special cases where an instruction can be transformed
13 // to another, it is only meant to do transformations where the old instruction
14 // is always replacable with the new instructions. For example:
15 //
16 //      `vpermq ymm` -> `vshufd ymm`
17 //          -- BAD, not always valid (lane cross/non-repeated mask)
18 //
19 //      `vpermilps ymm` -> `vshufd ymm`
20 //          -- GOOD, always replaceable
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "X86.h"
25 #include "X86InstrInfo.h"
26 #include "X86Subtarget.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "x86-fixup-inst-tuning"
34 
35 STATISTIC(NumInstChanges, "Number of instructions changes");
36 
37 namespace {
38 class X86FixupInstTuningPass : public MachineFunctionPass {
39 public:
40   static char ID;
41 
42   X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
43 
44   StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
45 
46   bool runOnMachineFunction(MachineFunction &MF) override;
47   bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
48                           MachineBasicBlock::iterator &I);
49 
50   // This pass runs after regalloc and doesn't support VReg operands.
51   MachineFunctionProperties getRequiredProperties() const override {
52     return MachineFunctionProperties().set(
53         MachineFunctionProperties::Property::NoVRegs);
54   }
55 
56 private:
57   const X86InstrInfo *TII = nullptr;
58   const X86Subtarget *ST = nullptr;
59   const MCSchedModel *SM = nullptr;
60 };
61 } // end anonymous namespace
62 
63 char X86FixupInstTuningPass::ID = 0;
64 
65 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
66 
67 FunctionPass *llvm::createX86FixupInstTuning() {
68   return new X86FixupInstTuningPass();
69 }
70 
71 template <typename T>
72 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
73   if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
74     return *NewVal < *CurVal;
75 
76   return std::nullopt;
77 }
78 
79 bool X86FixupInstTuningPass::processInstruction(
80     MachineFunction &MF, MachineBasicBlock &MBB,
81     MachineBasicBlock::iterator &I) {
82   MachineInstr &MI = *I;
83   unsigned Opc = MI.getOpcode();
84   unsigned NumOperands = MI.getDesc().getNumOperands();
85 
86   auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
87     // We already checked that SchedModel exists in `NewOpcPreferable`.
88     return MCSchedModel::getReciprocalThroughput(
89         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
90   };
91 
92   auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
93     // We already checked that SchedModel exists in `NewOpcPreferable`.
94     return MCSchedModel::computeInstrLatency(
95         *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
96   };
97 
98   auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
99     if (unsigned Size = TII->get(Opcode).getSize())
100       return Size;
101     // Zero size means we where unable to compute it.
102     return std::nullopt;
103   };
104 
105   auto NewOpcPreferable = [&](unsigned NewOpc,
106                               bool ReplaceInTie = true) -> bool {
107     std::optional<bool> Res;
108     if (SM->hasInstrSchedModel()) {
109       // Compare tput -> lat -> code size.
110       Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
111       if (Res.has_value())
112         return *Res;
113 
114       Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
115       if (Res.has_value())
116         return *Res;
117     }
118 
119     Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
120     if (Res.has_value())
121       return *Res;
122 
123     // We either have either were unable to get tput/lat/codesize or all values
124     // were equal. Return specified option for a tie.
125     return ReplaceInTie;
126   };
127 
128   // `vpermilpd r, i` -> `vshufpd r, r, i`
129   // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
130   // `vshufpd` is always as fast or faster than `vpermilpd` and takes
131   // 1 less byte of code size for VEX and EVEX encoding.
132   auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
133     if (!NewOpcPreferable(NewOpc))
134       return false;
135     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
136     MI.removeOperand(NumOperands - 1);
137     MI.addOperand(MI.getOperand(NumOperands - 2));
138     MI.setDesc(TII->get(NewOpc));
139     MI.addOperand(MachineOperand::CreateImm(MaskImm));
140     return true;
141   };
142 
143   // `vpermilps r, i` -> `vshufps r, r, i`
144   // `vpermilps r, i, k` -> `vshufps r, r, i, k`
145   // `vshufps` is always as fast or faster than `vpermilps` and takes
146   // 1 less byte of code size for VEX and EVEX encoding.
147   auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
148     if (!NewOpcPreferable(NewOpc))
149       return false;
150     unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
151     MI.removeOperand(NumOperands - 1);
152     MI.addOperand(MI.getOperand(NumOperands - 2));
153     MI.setDesc(TII->get(NewOpc));
154     MI.addOperand(MachineOperand::CreateImm(MaskImm));
155     return true;
156   };
157 
158   // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
159   // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
160   // byte of code size.
161   auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
162     // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
163     // `vpshufd` saves a byte of code size.
164     if (!ST->hasNoDomainDelayShuffle() ||
165         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
166       return false;
167     MI.setDesc(TII->get(NewOpc));
168     return true;
169   };
170 
171   // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
172   // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
173   // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
174   // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
175   // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
176   // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
177   // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
178   // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
179   // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
180   //        -> `vunpck{l|h}qdq`
181   // 2) If `vshufpd` faster than `vunpck{l|h}pd`
182   //        -> `vshufpd`
183   //
184   // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
185   auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
186     if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
187       return false;
188 
189     MI.setDesc(TII->get(NewOpc));
190     MI.addOperand(MachineOperand::CreateImm(MaskImm));
191     return true;
192   };
193 
194   auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
195     // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
196     // downside to the integer unpck, but if someone doesn't specify exact
197     // target we won't find it faster.
198     if (!ST->hasNoDomainDelayShuffle() ||
199         !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
200       return false;
201     MI.setDesc(TII->get(NewOpc));
202     return true;
203   };
204 
205   auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
206                                unsigned NewOpc) -> bool {
207     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
208       return true;
209     return ProcessUNPCK(NewOpc, 0x00);
210   };
211   auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
212                                unsigned NewOpc) -> bool {
213     if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
214       return true;
215     return ProcessUNPCK(NewOpc, 0xff);
216   };
217 
218   auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
219     return ProcessUNPCKToIntDomain(NewOpcIntDomain);
220   };
221 
222   auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
223     return ProcessUNPCKToIntDomain(NewOpc);
224   };
225 
226   switch (Opc) {
227   case X86::VPERMILPDri:
228     return ProcessVPERMILPDri(X86::VSHUFPDrri);
229   case X86::VPERMILPDYri:
230     return ProcessVPERMILPDri(X86::VSHUFPDYrri);
231   case X86::VPERMILPDZ128ri:
232     return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
233   case X86::VPERMILPDZ256ri:
234     return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
235   case X86::VPERMILPDZri:
236     return ProcessVPERMILPDri(X86::VSHUFPDZrri);
237   case X86::VPERMILPDZ128rikz:
238     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
239   case X86::VPERMILPDZ256rikz:
240     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
241   case X86::VPERMILPDZrikz:
242     return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
243   case X86::VPERMILPDZ128rik:
244     return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
245   case X86::VPERMILPDZ256rik:
246     return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
247   case X86::VPERMILPDZrik:
248     return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
249 
250   case X86::VPERMILPSri:
251     return ProcessVPERMILPSri(X86::VSHUFPSrri);
252   case X86::VPERMILPSYri:
253     return ProcessVPERMILPSri(X86::VSHUFPSYrri);
254   case X86::VPERMILPSZ128ri:
255     return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
256   case X86::VPERMILPSZ256ri:
257     return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
258   case X86::VPERMILPSZri:
259     return ProcessVPERMILPSri(X86::VSHUFPSZrri);
260   case X86::VPERMILPSZ128rikz:
261     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
262   case X86::VPERMILPSZ256rikz:
263     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
264   case X86::VPERMILPSZrikz:
265     return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
266   case X86::VPERMILPSZ128rik:
267     return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
268   case X86::VPERMILPSZ256rik:
269     return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
270   case X86::VPERMILPSZrik:
271     return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
272   case X86::VPERMILPSmi:
273     return ProcessVPERMILPSmi(X86::VPSHUFDmi);
274   case X86::VPERMILPSYmi:
275     // TODO: See if there is a more generic way we can test if the replacement
276     // instruction is supported.
277     return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
278   case X86::VPERMILPSZ128mi:
279     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
280   case X86::VPERMILPSZ256mi:
281     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
282   case X86::VPERMILPSZmi:
283     return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
284   case X86::VPERMILPSZ128mikz:
285     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
286   case X86::VPERMILPSZ256mikz:
287     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
288   case X86::VPERMILPSZmikz:
289     return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
290   case X86::VPERMILPSZ128mik:
291     return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
292   case X86::VPERMILPSZ256mik:
293     return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
294   case X86::VPERMILPSZmik:
295     return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
296 
297   case X86::MOVLHPSrr:
298   case X86::UNPCKLPDrr:
299     return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
300   case X86::VMOVLHPSrr:
301   case X86::VUNPCKLPDrr:
302     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
303   case X86::VUNPCKLPDYrr:
304     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
305     // VMOVLHPS is always 128 bits.
306   case X86::VMOVLHPSZrr:
307   case X86::VUNPCKLPDZ128rr:
308     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
309   case X86::VUNPCKLPDZ256rr:
310     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
311   case X86::VUNPCKLPDZrr:
312     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
313   case X86::VUNPCKLPDZ128rrk:
314     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
315   case X86::VUNPCKLPDZ256rrk:
316     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
317   case X86::VUNPCKLPDZrrk:
318     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
319   case X86::VUNPCKLPDZ128rrkz:
320     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
321   case X86::VUNPCKLPDZ256rrkz:
322     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
323   case X86::VUNPCKLPDZrrkz:
324     return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
325   case X86::UNPCKHPDrr:
326     return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
327   case X86::VUNPCKHPDrr:
328     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
329   case X86::VUNPCKHPDYrr:
330     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
331   case X86::VUNPCKHPDZ128rr:
332     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
333   case X86::VUNPCKHPDZ256rr:
334     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
335   case X86::VUNPCKHPDZrr:
336     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
337   case X86::VUNPCKHPDZ128rrk:
338     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
339   case X86::VUNPCKHPDZ256rrk:
340     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
341   case X86::VUNPCKHPDZrrk:
342     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
343   case X86::VUNPCKHPDZ128rrkz:
344     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
345   case X86::VUNPCKHPDZ256rrkz:
346     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
347   case X86::VUNPCKHPDZrrkz:
348     return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
349   case X86::UNPCKLPDrm:
350     return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
351   case X86::VUNPCKLPDrm:
352     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
353   case X86::VUNPCKLPDYrm:
354     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
355   case X86::VUNPCKLPDZ128rm:
356     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
357   case X86::VUNPCKLPDZ256rm:
358     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
359   case X86::VUNPCKLPDZrm:
360     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
361   case X86::VUNPCKLPDZ128rmk:
362     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
363   case X86::VUNPCKLPDZ256rmk:
364     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
365   case X86::VUNPCKLPDZrmk:
366     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
367   case X86::VUNPCKLPDZ128rmkz:
368     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
369   case X86::VUNPCKLPDZ256rmkz:
370     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
371   case X86::VUNPCKLPDZrmkz:
372     return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
373   case X86::UNPCKHPDrm:
374     return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
375   case X86::VUNPCKHPDrm:
376     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
377   case X86::VUNPCKHPDYrm:
378     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
379   case X86::VUNPCKHPDZ128rm:
380     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
381   case X86::VUNPCKHPDZ256rm:
382     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
383   case X86::VUNPCKHPDZrm:
384     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
385   case X86::VUNPCKHPDZ128rmk:
386     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
387   case X86::VUNPCKHPDZ256rmk:
388     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
389   case X86::VUNPCKHPDZrmk:
390     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
391   case X86::VUNPCKHPDZ128rmkz:
392     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
393   case X86::VUNPCKHPDZ256rmkz:
394     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
395   case X86::VUNPCKHPDZrmkz:
396     return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
397 
398   case X86::UNPCKLPSrr:
399     return ProcessUNPCKPS(X86::PUNPCKLDQrr);
400   case X86::VUNPCKLPSrr:
401     return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
402   case X86::VUNPCKLPSYrr:
403     return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
404   case X86::VUNPCKLPSZ128rr:
405     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
406   case X86::VUNPCKLPSZ256rr:
407     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
408   case X86::VUNPCKLPSZrr:
409     return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
410   case X86::VUNPCKLPSZ128rrk:
411     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
412   case X86::VUNPCKLPSZ256rrk:
413     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
414   case X86::VUNPCKLPSZrrk:
415     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
416   case X86::VUNPCKLPSZ128rrkz:
417     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
418   case X86::VUNPCKLPSZ256rrkz:
419     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
420   case X86::VUNPCKLPSZrrkz:
421     return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
422   case X86::UNPCKHPSrr:
423     return ProcessUNPCKPS(X86::PUNPCKHDQrr);
424   case X86::VUNPCKHPSrr:
425     return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
426   case X86::VUNPCKHPSYrr:
427     return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
428   case X86::VUNPCKHPSZ128rr:
429     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
430   case X86::VUNPCKHPSZ256rr:
431     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
432   case X86::VUNPCKHPSZrr:
433     return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
434   case X86::VUNPCKHPSZ128rrk:
435     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
436   case X86::VUNPCKHPSZ256rrk:
437     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
438   case X86::VUNPCKHPSZrrk:
439     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
440   case X86::VUNPCKHPSZ128rrkz:
441     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
442   case X86::VUNPCKHPSZ256rrkz:
443     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
444   case X86::VUNPCKHPSZrrkz:
445     return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
446   case X86::UNPCKLPSrm:
447     return ProcessUNPCKPS(X86::PUNPCKLDQrm);
448   case X86::VUNPCKLPSrm:
449     return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
450   case X86::VUNPCKLPSYrm:
451     return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
452   case X86::VUNPCKLPSZ128rm:
453     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
454   case X86::VUNPCKLPSZ256rm:
455     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
456   case X86::VUNPCKLPSZrm:
457     return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
458   case X86::VUNPCKLPSZ128rmk:
459     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
460   case X86::VUNPCKLPSZ256rmk:
461     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
462   case X86::VUNPCKLPSZrmk:
463     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
464   case X86::VUNPCKLPSZ128rmkz:
465     return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
466   case X86::VUNPCKLPSZ256rmkz:
467     return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
468   case X86::VUNPCKLPSZrmkz:
469     return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
470   case X86::UNPCKHPSrm:
471     return ProcessUNPCKPS(X86::PUNPCKHDQrm);
472   case X86::VUNPCKHPSrm:
473     return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
474   case X86::VUNPCKHPSYrm:
475     return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
476   case X86::VUNPCKHPSZ128rm:
477     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
478   case X86::VUNPCKHPSZ256rm:
479     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
480   case X86::VUNPCKHPSZrm:
481     return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
482   case X86::VUNPCKHPSZ128rmk:
483     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
484   case X86::VUNPCKHPSZ256rmk:
485     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
486   case X86::VUNPCKHPSZrmk:
487     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
488   case X86::VUNPCKHPSZ128rmkz:
489     return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
490   case X86::VUNPCKHPSZ256rmkz:
491     return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
492   case X86::VUNPCKHPSZrmkz:
493     return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
494   default:
495     return false;
496   }
497 }
498 
499 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
500   LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
501   bool Changed = false;
502   ST = &MF.getSubtarget<X86Subtarget>();
503   TII = ST->getInstrInfo();
504   SM = &ST->getSchedModel();
505 
506   for (MachineBasicBlock &MBB : MF) {
507     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
508       if (processInstruction(MF, MBB, I)) {
509         ++NumInstChanges;
510         Changed = true;
511       }
512     }
513   }
514   LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
515   return Changed;
516 }
517