1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file does a tuning pass replacing slower machine instructions 10 // with faster ones. We do this here, as opposed to during normal ISel, as 11 // attempting to get the "right" instruction can break patterns. This pass 12 // is not meant search for special cases where an instruction can be transformed 13 // to another, it is only meant to do transformations where the old instruction 14 // is always replacable with the new instructions. For example: 15 // 16 // `vpermq ymm` -> `vshufd ymm` 17 // -- BAD, not always valid (lane cross/non-repeated mask) 18 // 19 // `vpermilps ymm` -> `vshufd ymm` 20 // -- GOOD, always replaceable 21 // 22 //===----------------------------------------------------------------------===// 23 24 #include "X86.h" 25 #include "X86InstrInfo.h" 26 #include "X86Subtarget.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "x86-fixup-inst-tuning" 35 36 STATISTIC(NumInstChanges, "Number of instructions changes"); 37 38 namespace { 39 class X86FixupInstTuningPass : public MachineFunctionPass { 40 public: 41 static char ID; 42 43 X86FixupInstTuningPass() : MachineFunctionPass(ID) {} 44 45 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; } 46 47 bool runOnMachineFunction(MachineFunction &MF) override; 48 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 49 MachineBasicBlock::iterator &I); 50 51 // This pass runs after regalloc and doesn't support VReg operands. 52 MachineFunctionProperties getRequiredProperties() const override { 53 return MachineFunctionProperties().set( 54 MachineFunctionProperties::Property::NoVRegs); 55 } 56 57 private: 58 const X86InstrInfo *TII = nullptr; 59 const X86Subtarget *ST = nullptr; 60 const MCSchedModel *SM = nullptr; 61 }; 62 } // end anonymous namespace 63 64 char X86FixupInstTuningPass::ID = 0; 65 66 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 67 68 FunctionPass *llvm::createX86FixupInstTuning() { 69 return new X86FixupInstTuningPass(); 70 } 71 72 template <typename T> 73 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) { 74 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal) 75 return *NewVal < *CurVal; 76 77 return std::nullopt; 78 } 79 80 bool X86FixupInstTuningPass::processInstruction( 81 MachineFunction &MF, MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator &I) { 83 MachineInstr &MI = *I; 84 unsigned Opc = MI.getOpcode(); 85 unsigned NumOperands = MI.getDesc().getNumOperands(); 86 87 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> { 88 // We already checked that SchedModel exists in `NewOpcPreferable`. 89 return MCSchedModel::getReciprocalThroughput( 90 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 91 }; 92 93 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> { 94 // We already checked that SchedModel exists in `NewOpcPreferable`. 95 return MCSchedModel::computeInstrLatency( 96 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 97 }; 98 99 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> { 100 if (unsigned Size = TII->get(Opcode).getSize()) 101 return Size; 102 // Zero size means we where unable to compute it. 103 return std::nullopt; 104 }; 105 106 auto NewOpcPreferable = [&](unsigned NewOpc, 107 bool ReplaceInTie = true) -> bool { 108 std::optional<bool> Res; 109 if (SM->hasInstrSchedModel()) { 110 // Compare tput -> lat -> code size. 111 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc)); 112 if (Res.has_value()) 113 return *Res; 114 115 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc)); 116 if (Res.has_value()) 117 return *Res; 118 } 119 120 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc)); 121 if (Res.has_value()) 122 return *Res; 123 124 // We either have either were unable to get tput/lat/codesize or all values 125 // were equal. Return specified option for a tie. 126 return ReplaceInTie; 127 }; 128 129 // `vpermilps r, i` -> `vshufps r, r, i` 130 // `vpermilps r, i, k` -> `vshufps r, r, i, k` 131 // `vshufps` is always as fast or faster than `vpermilps` and takes 132 // 1 less byte of code size for VEX and EVEX encoding. 133 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool { 134 if (!NewOpcPreferable(NewOpc)) 135 return false; 136 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 137 MI.removeOperand(NumOperands - 1); 138 MI.addOperand(MI.getOperand(NumOperands - 2)); 139 MI.setDesc(TII->get(NewOpc)); 140 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 141 return true; 142 }; 143 144 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles. 145 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less 146 // byte of code size. 147 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool { 148 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as 149 // `vpshufd` saves a byte of code size. 150 if (!ST->hasNoDomainDelayShuffle() || 151 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 152 return false; 153 MI.setDesc(TII->get(NewOpc)); 154 return true; 155 }; 156 157 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00` 158 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff` 159 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00` 160 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff` 161 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k` 162 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k` 163 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k` 164 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k` 165 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd` 166 // -> `vunpck{l|h}qdq` 167 // 2) If `vshufpd` faster than `vunpck{l|h}pd` 168 // -> `vshufpd` 169 // 170 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay) 171 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool { 172 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 173 return false; 174 175 MI.setDesc(TII->get(NewOpc)); 176 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 177 return true; 178 }; 179 180 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool { 181 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real 182 // downside to the integer unpck, but if someone doesn't specify exact 183 // target we won't find it faster. 184 if (!ST->hasNoDomainDelayShuffle() || 185 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 186 return false; 187 MI.setDesc(TII->get(NewOpc)); 188 return true; 189 }; 190 191 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain, 192 unsigned NewOpc) -> bool { 193 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 194 return true; 195 return ProcessUNPCK(NewOpc, 0x00); 196 }; 197 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain, 198 unsigned NewOpc) -> bool { 199 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 200 return true; 201 return ProcessUNPCK(NewOpc, 0xff); 202 }; 203 204 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool { 205 return ProcessUNPCKToIntDomain(NewOpcIntDomain); 206 }; 207 208 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool { 209 return ProcessUNPCKToIntDomain(NewOpc); 210 }; 211 212 switch (Opc) { 213 case X86::VPERMILPSri: 214 return ProcessVPERMILPSri(X86::VSHUFPSrri); 215 case X86::VPERMILPSYri: 216 return ProcessVPERMILPSri(X86::VSHUFPSYrri); 217 case X86::VPERMILPSZ128ri: 218 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri); 219 case X86::VPERMILPSZ256ri: 220 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri); 221 case X86::VPERMILPSZri: 222 return ProcessVPERMILPSri(X86::VSHUFPSZrri); 223 case X86::VPERMILPSZ128rikz: 224 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz); 225 case X86::VPERMILPSZ256rikz: 226 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz); 227 case X86::VPERMILPSZrikz: 228 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz); 229 case X86::VPERMILPSZ128rik: 230 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik); 231 case X86::VPERMILPSZ256rik: 232 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik); 233 case X86::VPERMILPSZrik: 234 return ProcessVPERMILPSri(X86::VSHUFPSZrrik); 235 case X86::VPERMILPSmi: 236 return ProcessVPERMILPSmi(X86::VPSHUFDmi); 237 case X86::VPERMILPSYmi: 238 // TODO: See if there is a more generic way we can test if the replacement 239 // instruction is supported. 240 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false; 241 case X86::VPERMILPSZ128mi: 242 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi); 243 case X86::VPERMILPSZ256mi: 244 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi); 245 case X86::VPERMILPSZmi: 246 return ProcessVPERMILPSmi(X86::VPSHUFDZmi); 247 case X86::VPERMILPSZ128mikz: 248 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz); 249 case X86::VPERMILPSZ256mikz: 250 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz); 251 case X86::VPERMILPSZmikz: 252 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz); 253 case X86::VPERMILPSZ128mik: 254 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik); 255 case X86::VPERMILPSZ256mik: 256 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik); 257 case X86::VPERMILPSZmik: 258 return ProcessVPERMILPSmi(X86::VPSHUFDZmik); 259 260 case X86::MOVLHPSrr: 261 case X86::UNPCKLPDrr: 262 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri); 263 case X86::VMOVLHPSrr: 264 case X86::VUNPCKLPDrr: 265 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri); 266 case X86::VUNPCKLPDYrr: 267 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri); 268 // VMOVLHPS is always 128 bits. 269 case X86::VMOVLHPSZrr: 270 case X86::VUNPCKLPDZ128rr: 271 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri); 272 case X86::VUNPCKLPDZ256rr: 273 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri); 274 case X86::VUNPCKLPDZrr: 275 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri); 276 case X86::VUNPCKLPDZ128rrk: 277 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik); 278 case X86::VUNPCKLPDZ256rrk: 279 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik); 280 case X86::VUNPCKLPDZrrk: 281 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik); 282 case X86::VUNPCKLPDZ128rrkz: 283 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 284 case X86::VUNPCKLPDZ256rrkz: 285 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 286 case X86::VUNPCKLPDZrrkz: 287 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz); 288 case X86::UNPCKHPDrr: 289 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri); 290 case X86::VUNPCKHPDrr: 291 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri); 292 case X86::VUNPCKHPDYrr: 293 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri); 294 case X86::VUNPCKHPDZ128rr: 295 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri); 296 case X86::VUNPCKHPDZ256rr: 297 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri); 298 case X86::VUNPCKHPDZrr: 299 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri); 300 case X86::VUNPCKHPDZ128rrk: 301 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik); 302 case X86::VUNPCKHPDZ256rrk: 303 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik); 304 case X86::VUNPCKHPDZrrk: 305 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik); 306 case X86::VUNPCKHPDZ128rrkz: 307 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 308 case X86::VUNPCKHPDZ256rrkz: 309 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 310 case X86::VUNPCKHPDZrrkz: 311 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz); 312 case X86::UNPCKLPDrm: 313 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm); 314 case X86::VUNPCKLPDrm: 315 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm); 316 case X86::VUNPCKLPDYrm: 317 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm); 318 case X86::VUNPCKLPDZ128rm: 319 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm); 320 case X86::VUNPCKLPDZ256rm: 321 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm); 322 case X86::VUNPCKLPDZrm: 323 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm); 324 case X86::VUNPCKLPDZ128rmk: 325 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk); 326 case X86::VUNPCKLPDZ256rmk: 327 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk); 328 case X86::VUNPCKLPDZrmk: 329 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk); 330 case X86::VUNPCKLPDZ128rmkz: 331 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz); 332 case X86::VUNPCKLPDZ256rmkz: 333 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz); 334 case X86::VUNPCKLPDZrmkz: 335 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz); 336 case X86::UNPCKHPDrm: 337 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm); 338 case X86::VUNPCKHPDrm: 339 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm); 340 case X86::VUNPCKHPDYrm: 341 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm); 342 case X86::VUNPCKHPDZ128rm: 343 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm); 344 case X86::VUNPCKHPDZ256rm: 345 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm); 346 case X86::VUNPCKHPDZrm: 347 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm); 348 case X86::VUNPCKHPDZ128rmk: 349 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk); 350 case X86::VUNPCKHPDZ256rmk: 351 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk); 352 case X86::VUNPCKHPDZrmk: 353 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk); 354 case X86::VUNPCKHPDZ128rmkz: 355 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz); 356 case X86::VUNPCKHPDZ256rmkz: 357 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz); 358 case X86::VUNPCKHPDZrmkz: 359 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz); 360 361 case X86::UNPCKLPSrr: 362 return ProcessUNPCKPS(X86::PUNPCKLDQrr); 363 case X86::VUNPCKLPSrr: 364 return ProcessUNPCKPS(X86::VPUNPCKLDQrr); 365 case X86::VUNPCKLPSYrr: 366 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr); 367 case X86::VUNPCKLPSZ128rr: 368 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr); 369 case X86::VUNPCKLPSZ256rr: 370 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr); 371 case X86::VUNPCKLPSZrr: 372 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr); 373 case X86::VUNPCKLPSZ128rrk: 374 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk); 375 case X86::VUNPCKLPSZ256rrk: 376 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk); 377 case X86::VUNPCKLPSZrrk: 378 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk); 379 case X86::VUNPCKLPSZ128rrkz: 380 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz); 381 case X86::VUNPCKLPSZ256rrkz: 382 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz); 383 case X86::VUNPCKLPSZrrkz: 384 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz); 385 case X86::UNPCKHPSrr: 386 return ProcessUNPCKPS(X86::PUNPCKHDQrr); 387 case X86::VUNPCKHPSrr: 388 return ProcessUNPCKPS(X86::VPUNPCKHDQrr); 389 case X86::VUNPCKHPSYrr: 390 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr); 391 case X86::VUNPCKHPSZ128rr: 392 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr); 393 case X86::VUNPCKHPSZ256rr: 394 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr); 395 case X86::VUNPCKHPSZrr: 396 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr); 397 case X86::VUNPCKHPSZ128rrk: 398 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk); 399 case X86::VUNPCKHPSZ256rrk: 400 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk); 401 case X86::VUNPCKHPSZrrk: 402 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk); 403 case X86::VUNPCKHPSZ128rrkz: 404 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz); 405 case X86::VUNPCKHPSZ256rrkz: 406 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz); 407 case X86::VUNPCKHPSZrrkz: 408 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz); 409 case X86::UNPCKLPSrm: 410 return ProcessUNPCKPS(X86::PUNPCKLDQrm); 411 case X86::VUNPCKLPSrm: 412 return ProcessUNPCKPS(X86::VPUNPCKLDQrm); 413 case X86::VUNPCKLPSYrm: 414 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm); 415 case X86::VUNPCKLPSZ128rm: 416 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm); 417 case X86::VUNPCKLPSZ256rm: 418 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm); 419 case X86::VUNPCKLPSZrm: 420 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm); 421 case X86::VUNPCKLPSZ128rmk: 422 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk); 423 case X86::VUNPCKLPSZ256rmk: 424 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk); 425 case X86::VUNPCKLPSZrmk: 426 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk); 427 case X86::VUNPCKLPSZ128rmkz: 428 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz); 429 case X86::VUNPCKLPSZ256rmkz: 430 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz); 431 case X86::VUNPCKLPSZrmkz: 432 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz); 433 case X86::UNPCKHPSrm: 434 return ProcessUNPCKPS(X86::PUNPCKHDQrm); 435 case X86::VUNPCKHPSrm: 436 return ProcessUNPCKPS(X86::VPUNPCKHDQrm); 437 case X86::VUNPCKHPSYrm: 438 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm); 439 case X86::VUNPCKHPSZ128rm: 440 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm); 441 case X86::VUNPCKHPSZ256rm: 442 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm); 443 case X86::VUNPCKHPSZrm: 444 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm); 445 case X86::VUNPCKHPSZ128rmk: 446 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk); 447 case X86::VUNPCKHPSZ256rmk: 448 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk); 449 case X86::VUNPCKHPSZrmk: 450 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk); 451 case X86::VUNPCKHPSZ128rmkz: 452 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz); 453 case X86::VUNPCKHPSZ256rmkz: 454 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz); 455 case X86::VUNPCKHPSZrmkz: 456 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz); 457 default: 458 return false; 459 } 460 } 461 462 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) { 463 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";); 464 bool Changed = false; 465 ST = &MF.getSubtarget<X86Subtarget>(); 466 TII = ST->getInstrInfo(); 467 SM = &ST->getSchedModel(); 468 469 for (MachineBasicBlock &MBB : MF) { 470 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 471 if (processInstruction(MF, MBB, I)) { 472 ++NumInstChanges; 473 Changed = true; 474 } 475 } 476 } 477 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";); 478 return Changed; 479 } 480