1 //===-- X86FixupInstTunings.cpp - replace instructions -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file does a tuning pass replacing slower machine instructions 10 // with faster ones. We do this here, as opposed to during normal ISel, as 11 // attempting to get the "right" instruction can break patterns. This pass 12 // is not meant search for special cases where an instruction can be transformed 13 // to another, it is only meant to do transformations where the old instruction 14 // is always replacable with the new instructions. For example: 15 // 16 // `vpermq ymm` -> `vshufd ymm` 17 // -- BAD, not always valid (lane cross/non-repeated mask) 18 // 19 // `vpermilps ymm` -> `vshufd ymm` 20 // -- GOOD, always replaceable 21 // 22 //===----------------------------------------------------------------------===// 23 24 #include "X86.h" 25 #include "X86InstrInfo.h" 26 #include "X86Subtarget.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "x86-fixup-inst-tuning" 34 35 STATISTIC(NumInstChanges, "Number of instructions changes"); 36 37 namespace { 38 class X86FixupInstTuningPass : public MachineFunctionPass { 39 public: 40 static char ID; 41 42 X86FixupInstTuningPass() : MachineFunctionPass(ID) {} 43 44 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; } 45 46 bool runOnMachineFunction(MachineFunction &MF) override; 47 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB, 48 MachineBasicBlock::iterator &I); 49 50 // This pass runs after regalloc and doesn't support VReg operands. 51 MachineFunctionProperties getRequiredProperties() const override { 52 return MachineFunctionProperties().set( 53 MachineFunctionProperties::Property::NoVRegs); 54 } 55 56 private: 57 const X86InstrInfo *TII = nullptr; 58 const X86Subtarget *ST = nullptr; 59 const MCSchedModel *SM = nullptr; 60 }; 61 } // end anonymous namespace 62 63 char X86FixupInstTuningPass::ID = 0; 64 65 INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false) 66 67 FunctionPass *llvm::createX86FixupInstTuning() { 68 return new X86FixupInstTuningPass(); 69 } 70 71 template <typename T> 72 static std::optional<bool> CmpOptionals(T NewVal, T CurVal) { 73 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal) 74 return *NewVal < *CurVal; 75 76 return std::nullopt; 77 } 78 79 bool X86FixupInstTuningPass::processInstruction( 80 MachineFunction &MF, MachineBasicBlock &MBB, 81 MachineBasicBlock::iterator &I) { 82 MachineInstr &MI = *I; 83 unsigned Opc = MI.getOpcode(); 84 unsigned NumOperands = MI.getDesc().getNumOperands(); 85 86 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> { 87 // We already checked that SchedModel exists in `NewOpcPreferable`. 88 return MCSchedModel::getReciprocalThroughput( 89 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 90 }; 91 92 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> { 93 // We already checked that SchedModel exists in `NewOpcPreferable`. 94 return MCSchedModel::computeInstrLatency( 95 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass()))); 96 }; 97 98 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> { 99 if (unsigned Size = TII->get(Opcode).getSize()) 100 return Size; 101 // Zero size means we where unable to compute it. 102 return std::nullopt; 103 }; 104 105 auto NewOpcPreferable = [&](unsigned NewOpc, 106 bool ReplaceInTie = true) -> bool { 107 std::optional<bool> Res; 108 if (SM->hasInstrSchedModel()) { 109 // Compare tput -> lat -> code size. 110 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc)); 111 if (Res.has_value()) 112 return *Res; 113 114 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc)); 115 if (Res.has_value()) 116 return *Res; 117 } 118 119 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc)); 120 if (Res.has_value()) 121 return *Res; 122 123 // We either have either were unable to get tput/lat/codesize or all values 124 // were equal. Return specified option for a tie. 125 return ReplaceInTie; 126 }; 127 128 // `vpermilpd r, i` -> `vshufpd r, r, i` 129 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k` 130 // `vshufpd` is always as fast or faster than `vpermilpd` and takes 131 // 1 less byte of code size for VEX and EVEX encoding. 132 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool { 133 if (!NewOpcPreferable(NewOpc)) 134 return false; 135 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 136 MI.removeOperand(NumOperands - 1); 137 MI.addOperand(MI.getOperand(NumOperands - 2)); 138 MI.setDesc(TII->get(NewOpc)); 139 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 140 return true; 141 }; 142 143 // `vpermilps r, i` -> `vshufps r, r, i` 144 // `vpermilps r, i, k` -> `vshufps r, r, i, k` 145 // `vshufps` is always as fast or faster than `vpermilps` and takes 146 // 1 less byte of code size for VEX and EVEX encoding. 147 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool { 148 if (!NewOpcPreferable(NewOpc)) 149 return false; 150 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm(); 151 MI.removeOperand(NumOperands - 1); 152 MI.addOperand(MI.getOperand(NumOperands - 2)); 153 MI.setDesc(TII->get(NewOpc)); 154 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 155 return true; 156 }; 157 158 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles. 159 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less 160 // byte of code size. 161 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool { 162 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as 163 // `vpshufd` saves a byte of code size. 164 if (!ST->hasNoDomainDelayShuffle() || 165 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 166 return false; 167 MI.setDesc(TII->get(NewOpc)); 168 return true; 169 }; 170 171 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00` 172 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff` 173 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00` 174 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff` 175 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k` 176 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k` 177 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k` 178 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k` 179 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd` 180 // -> `vunpck{l|h}qdq` 181 // 2) If `vshufpd` faster than `vunpck{l|h}pd` 182 // -> `vshufpd` 183 // 184 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay) 185 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool { 186 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 187 return false; 188 189 MI.setDesc(TII->get(NewOpc)); 190 MI.addOperand(MachineOperand::CreateImm(MaskImm)); 191 return true; 192 }; 193 194 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool { 195 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real 196 // downside to the integer unpck, but if someone doesn't specify exact 197 // target we won't find it faster. 198 if (!ST->hasNoDomainDelayShuffle() || 199 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false)) 200 return false; 201 MI.setDesc(TII->get(NewOpc)); 202 return true; 203 }; 204 205 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain, 206 unsigned NewOpc) -> bool { 207 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 208 return true; 209 return ProcessUNPCK(NewOpc, 0x00); 210 }; 211 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain, 212 unsigned NewOpc) -> bool { 213 if (ProcessUNPCKToIntDomain(NewOpcIntDomain)) 214 return true; 215 return ProcessUNPCK(NewOpc, 0xff); 216 }; 217 218 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool { 219 return ProcessUNPCKToIntDomain(NewOpcIntDomain); 220 }; 221 222 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool { 223 return ProcessUNPCKToIntDomain(NewOpc); 224 }; 225 226 switch (Opc) { 227 case X86::VPERMILPDri: 228 return ProcessVPERMILPDri(X86::VSHUFPDrri); 229 case X86::VPERMILPDYri: 230 return ProcessVPERMILPDri(X86::VSHUFPDYrri); 231 case X86::VPERMILPDZ128ri: 232 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri); 233 case X86::VPERMILPDZ256ri: 234 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri); 235 case X86::VPERMILPDZri: 236 return ProcessVPERMILPDri(X86::VSHUFPDZrri); 237 case X86::VPERMILPDZ128rikz: 238 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz); 239 case X86::VPERMILPDZ256rikz: 240 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz); 241 case X86::VPERMILPDZrikz: 242 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz); 243 case X86::VPERMILPDZ128rik: 244 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik); 245 case X86::VPERMILPDZ256rik: 246 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik); 247 case X86::VPERMILPDZrik: 248 return ProcessVPERMILPDri(X86::VSHUFPDZrrik); 249 250 case X86::VPERMILPSri: 251 return ProcessVPERMILPSri(X86::VSHUFPSrri); 252 case X86::VPERMILPSYri: 253 return ProcessVPERMILPSri(X86::VSHUFPSYrri); 254 case X86::VPERMILPSZ128ri: 255 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri); 256 case X86::VPERMILPSZ256ri: 257 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri); 258 case X86::VPERMILPSZri: 259 return ProcessVPERMILPSri(X86::VSHUFPSZrri); 260 case X86::VPERMILPSZ128rikz: 261 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz); 262 case X86::VPERMILPSZ256rikz: 263 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz); 264 case X86::VPERMILPSZrikz: 265 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz); 266 case X86::VPERMILPSZ128rik: 267 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik); 268 case X86::VPERMILPSZ256rik: 269 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik); 270 case X86::VPERMILPSZrik: 271 return ProcessVPERMILPSri(X86::VSHUFPSZrrik); 272 case X86::VPERMILPSmi: 273 return ProcessVPERMILPSmi(X86::VPSHUFDmi); 274 case X86::VPERMILPSYmi: 275 // TODO: See if there is a more generic way we can test if the replacement 276 // instruction is supported. 277 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false; 278 case X86::VPERMILPSZ128mi: 279 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi); 280 case X86::VPERMILPSZ256mi: 281 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi); 282 case X86::VPERMILPSZmi: 283 return ProcessVPERMILPSmi(X86::VPSHUFDZmi); 284 case X86::VPERMILPSZ128mikz: 285 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz); 286 case X86::VPERMILPSZ256mikz: 287 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz); 288 case X86::VPERMILPSZmikz: 289 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz); 290 case X86::VPERMILPSZ128mik: 291 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik); 292 case X86::VPERMILPSZ256mik: 293 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik); 294 case X86::VPERMILPSZmik: 295 return ProcessVPERMILPSmi(X86::VPSHUFDZmik); 296 297 case X86::MOVLHPSrr: 298 case X86::UNPCKLPDrr: 299 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri); 300 case X86::VMOVLHPSrr: 301 case X86::VUNPCKLPDrr: 302 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri); 303 case X86::VUNPCKLPDYrr: 304 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri); 305 // VMOVLHPS is always 128 bits. 306 case X86::VMOVLHPSZrr: 307 case X86::VUNPCKLPDZ128rr: 308 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri); 309 case X86::VUNPCKLPDZ256rr: 310 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri); 311 case X86::VUNPCKLPDZrr: 312 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri); 313 case X86::VUNPCKLPDZ128rrk: 314 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik); 315 case X86::VUNPCKLPDZ256rrk: 316 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik); 317 case X86::VUNPCKLPDZrrk: 318 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik); 319 case X86::VUNPCKLPDZ128rrkz: 320 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 321 case X86::VUNPCKLPDZ256rrkz: 322 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 323 case X86::VUNPCKLPDZrrkz: 324 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz); 325 case X86::UNPCKHPDrr: 326 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri); 327 case X86::VUNPCKHPDrr: 328 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri); 329 case X86::VUNPCKHPDYrr: 330 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri); 331 case X86::VUNPCKHPDZ128rr: 332 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri); 333 case X86::VUNPCKHPDZ256rr: 334 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri); 335 case X86::VUNPCKHPDZrr: 336 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri); 337 case X86::VUNPCKHPDZ128rrk: 338 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik); 339 case X86::VUNPCKHPDZ256rrk: 340 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik); 341 case X86::VUNPCKHPDZrrk: 342 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik); 343 case X86::VUNPCKHPDZ128rrkz: 344 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz); 345 case X86::VUNPCKHPDZ256rrkz: 346 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz); 347 case X86::VUNPCKHPDZrrkz: 348 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz); 349 case X86::UNPCKLPDrm: 350 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm); 351 case X86::VUNPCKLPDrm: 352 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm); 353 case X86::VUNPCKLPDYrm: 354 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm); 355 case X86::VUNPCKLPDZ128rm: 356 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm); 357 case X86::VUNPCKLPDZ256rm: 358 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm); 359 case X86::VUNPCKLPDZrm: 360 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm); 361 case X86::VUNPCKLPDZ128rmk: 362 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk); 363 case X86::VUNPCKLPDZ256rmk: 364 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk); 365 case X86::VUNPCKLPDZrmk: 366 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk); 367 case X86::VUNPCKLPDZ128rmkz: 368 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz); 369 case X86::VUNPCKLPDZ256rmkz: 370 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz); 371 case X86::VUNPCKLPDZrmkz: 372 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz); 373 case X86::UNPCKHPDrm: 374 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm); 375 case X86::VUNPCKHPDrm: 376 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm); 377 case X86::VUNPCKHPDYrm: 378 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm); 379 case X86::VUNPCKHPDZ128rm: 380 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm); 381 case X86::VUNPCKHPDZ256rm: 382 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm); 383 case X86::VUNPCKHPDZrm: 384 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm); 385 case X86::VUNPCKHPDZ128rmk: 386 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk); 387 case X86::VUNPCKHPDZ256rmk: 388 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk); 389 case X86::VUNPCKHPDZrmk: 390 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk); 391 case X86::VUNPCKHPDZ128rmkz: 392 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz); 393 case X86::VUNPCKHPDZ256rmkz: 394 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz); 395 case X86::VUNPCKHPDZrmkz: 396 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz); 397 398 case X86::UNPCKLPSrr: 399 return ProcessUNPCKPS(X86::PUNPCKLDQrr); 400 case X86::VUNPCKLPSrr: 401 return ProcessUNPCKPS(X86::VPUNPCKLDQrr); 402 case X86::VUNPCKLPSYrr: 403 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr); 404 case X86::VUNPCKLPSZ128rr: 405 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr); 406 case X86::VUNPCKLPSZ256rr: 407 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr); 408 case X86::VUNPCKLPSZrr: 409 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr); 410 case X86::VUNPCKLPSZ128rrk: 411 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk); 412 case X86::VUNPCKLPSZ256rrk: 413 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk); 414 case X86::VUNPCKLPSZrrk: 415 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk); 416 case X86::VUNPCKLPSZ128rrkz: 417 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz); 418 case X86::VUNPCKLPSZ256rrkz: 419 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz); 420 case X86::VUNPCKLPSZrrkz: 421 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz); 422 case X86::UNPCKHPSrr: 423 return ProcessUNPCKPS(X86::PUNPCKHDQrr); 424 case X86::VUNPCKHPSrr: 425 return ProcessUNPCKPS(X86::VPUNPCKHDQrr); 426 case X86::VUNPCKHPSYrr: 427 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr); 428 case X86::VUNPCKHPSZ128rr: 429 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr); 430 case X86::VUNPCKHPSZ256rr: 431 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr); 432 case X86::VUNPCKHPSZrr: 433 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr); 434 case X86::VUNPCKHPSZ128rrk: 435 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk); 436 case X86::VUNPCKHPSZ256rrk: 437 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk); 438 case X86::VUNPCKHPSZrrk: 439 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk); 440 case X86::VUNPCKHPSZ128rrkz: 441 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz); 442 case X86::VUNPCKHPSZ256rrkz: 443 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz); 444 case X86::VUNPCKHPSZrrkz: 445 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz); 446 case X86::UNPCKLPSrm: 447 return ProcessUNPCKPS(X86::PUNPCKLDQrm); 448 case X86::VUNPCKLPSrm: 449 return ProcessUNPCKPS(X86::VPUNPCKLDQrm); 450 case X86::VUNPCKLPSYrm: 451 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm); 452 case X86::VUNPCKLPSZ128rm: 453 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm); 454 case X86::VUNPCKLPSZ256rm: 455 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm); 456 case X86::VUNPCKLPSZrm: 457 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm); 458 case X86::VUNPCKLPSZ128rmk: 459 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk); 460 case X86::VUNPCKLPSZ256rmk: 461 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk); 462 case X86::VUNPCKLPSZrmk: 463 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk); 464 case X86::VUNPCKLPSZ128rmkz: 465 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz); 466 case X86::VUNPCKLPSZ256rmkz: 467 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz); 468 case X86::VUNPCKLPSZrmkz: 469 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz); 470 case X86::UNPCKHPSrm: 471 return ProcessUNPCKPS(X86::PUNPCKHDQrm); 472 case X86::VUNPCKHPSrm: 473 return ProcessUNPCKPS(X86::VPUNPCKHDQrm); 474 case X86::VUNPCKHPSYrm: 475 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm); 476 case X86::VUNPCKHPSZ128rm: 477 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm); 478 case X86::VUNPCKHPSZ256rm: 479 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm); 480 case X86::VUNPCKHPSZrm: 481 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm); 482 case X86::VUNPCKHPSZ128rmk: 483 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk); 484 case X86::VUNPCKHPSZ256rmk: 485 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk); 486 case X86::VUNPCKHPSZrmk: 487 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk); 488 case X86::VUNPCKHPSZ128rmkz: 489 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz); 490 case X86::VUNPCKHPSZ256rmkz: 491 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz); 492 case X86::VUNPCKHPSZrmkz: 493 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz); 494 default: 495 return false; 496 } 497 } 498 499 bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) { 500 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";); 501 bool Changed = false; 502 ST = &MF.getSubtarget<X86Subtarget>(); 503 TII = ST->getInstrInfo(); 504 SM = &ST->getSchedModel(); 505 506 for (MachineBasicBlock &MBB : MF) { 507 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { 508 if (processInstruction(MF, MBB, I)) { 509 ++NumInstChanges; 510 Changed = true; 511 } 512 } 513 } 514 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";); 515 return Changed; 516 } 517