xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86CmovConversion.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file implements a pass that converts X86 cmov instructions into
110b57cec5SDimitry Andric /// branches when profitable. This pass is conservative. It transforms if and
120b57cec5SDimitry Andric /// only if it can guarantee a gain with high confidence.
130b57cec5SDimitry Andric ///
140b57cec5SDimitry Andric /// Thus, the optimization applies under the following conditions:
150b57cec5SDimitry Andric ///   1. Consider as candidates only CMOVs in innermost loops (assume that
160b57cec5SDimitry Andric ///      most hotspots are represented by these loops).
170b57cec5SDimitry Andric ///   2. Given a group of CMOV instructions that are using the same EFLAGS def
180b57cec5SDimitry Andric ///      instruction:
190b57cec5SDimitry Andric ///      a. Consider them as candidates only if all have the same code condition
200b57cec5SDimitry Andric ///         or the opposite one to prevent generating more than one conditional
210b57cec5SDimitry Andric ///         jump per EFLAGS def instruction.
220b57cec5SDimitry Andric ///      b. Consider them as candidates only if all are profitable to be
230b57cec5SDimitry Andric ///         converted (assume that one bad conversion may cause a degradation).
240b57cec5SDimitry Andric ///   3. Apply conversion only for loops that are found profitable and only for
250b57cec5SDimitry Andric ///      CMOV candidates that were found profitable.
260b57cec5SDimitry Andric ///      a. A loop is considered profitable only if conversion will reduce its
270b57cec5SDimitry Andric ///         depth cost by some threshold.
280b57cec5SDimitry Andric ///      b. CMOV is considered profitable if the cost of its condition is higher
290b57cec5SDimitry Andric ///         than the average cost of its true-value and false-value by 25% of
300b57cec5SDimitry Andric ///         branch-misprediction-penalty. This assures no degradation even with
310b57cec5SDimitry Andric ///         25% branch misprediction.
320b57cec5SDimitry Andric ///
330b57cec5SDimitry Andric /// Note: This pass is assumed to run on SSA machine code.
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric //  External interfaces:
380b57cec5SDimitry Andric //      FunctionPass *llvm::createX86CmovConverterPass();
390b57cec5SDimitry Andric //      bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF);
400b57cec5SDimitry Andric //
410b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric #include "X86.h"
440b57cec5SDimitry Andric #include "X86InstrInfo.h"
450b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
460b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
470b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
480b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
490b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
500b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
510b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
520b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
530b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
540b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
550b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
560b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
570b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
580b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
590b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
600b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
610b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
620b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
630b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
64480093f4SDimitry Andric #include "llvm/InitializePasses.h"
650b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h"
660b57cec5SDimitry Andric #include "llvm/Pass.h"
670b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
680b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
690b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
70bdd1243dSDimitry Andric #include "llvm/Target/CGPassBuilderOption.h"
710b57cec5SDimitry Andric #include <algorithm>
720b57cec5SDimitry Andric #include <cassert>
730b57cec5SDimitry Andric #include <iterator>
740b57cec5SDimitry Andric #include <utility>
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric using namespace llvm;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric #define DEBUG_TYPE "x86-cmov-conversion"
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups");
810b57cec5SDimitry Andric STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates");
820b57cec5SDimitry Andric STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops");
830b57cec5SDimitry Andric STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups");
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric // This internal switch can be used to turn off the cmov/branch optimization.
860b57cec5SDimitry Andric static cl::opt<bool>
870b57cec5SDimitry Andric     EnableCmovConverter("x86-cmov-converter",
880b57cec5SDimitry Andric                         cl::desc("Enable the X86 cmov-to-branch optimization."),
890b57cec5SDimitry Andric                         cl::init(true), cl::Hidden);
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric static cl::opt<unsigned>
920b57cec5SDimitry Andric     GainCycleThreshold("x86-cmov-converter-threshold",
930b57cec5SDimitry Andric                        cl::desc("Minimum gain per loop (in cycles) threshold."),
940b57cec5SDimitry Andric                        cl::init(4), cl::Hidden);
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric static cl::opt<bool> ForceMemOperand(
970b57cec5SDimitry Andric     "x86-cmov-converter-force-mem-operand",
980b57cec5SDimitry Andric     cl::desc("Convert cmovs to branches whenever they have memory operands."),
990b57cec5SDimitry Andric     cl::init(true), cl::Hidden);
1000b57cec5SDimitry Andric 
10181ad6265SDimitry Andric static cl::opt<bool> ForceAll(
10281ad6265SDimitry Andric     "x86-cmov-converter-force-all",
10381ad6265SDimitry Andric     cl::desc("Convert all cmovs to branches."),
10481ad6265SDimitry Andric     cl::init(false), cl::Hidden);
10581ad6265SDimitry Andric 
1060b57cec5SDimitry Andric namespace {
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric /// Converts X86 cmov instructions into branches when profitable.
1090b57cec5SDimitry Andric class X86CmovConverterPass : public MachineFunctionPass {
1100b57cec5SDimitry Andric public:
1110b57cec5SDimitry Andric   X86CmovConverterPass() : MachineFunctionPass(ID) { }
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   StringRef getPassName() const override { return "X86 cmov Conversion"; }
1140b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
1150b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   /// Pass identification, replacement for typeid.
1180b57cec5SDimitry Andric   static char ID;
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric private:
121480093f4SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
122480093f4SDimitry Andric   const TargetInstrInfo *TII = nullptr;
123480093f4SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
124fe6060f1SDimitry Andric   MachineLoopInfo *MLI = nullptr;
1250b57cec5SDimitry Andric   TargetSchedModel TSchedModel;
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   /// List of consecutive CMOV instructions.
1280b57cec5SDimitry Andric   using CmovGroup = SmallVector<MachineInstr *, 2>;
1290b57cec5SDimitry Andric   using CmovGroups = SmallVector<CmovGroup, 2>;
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   /// Collect all CMOV-group-candidates in \p CurrLoop and update \p
1320b57cec5SDimitry Andric   /// CmovInstGroups accordingly.
1330b57cec5SDimitry Andric   ///
1340b57cec5SDimitry Andric   /// \param Blocks List of blocks to process.
1350b57cec5SDimitry Andric   /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
1360b57cec5SDimitry Andric   /// \returns true iff it found any CMOV-group-candidate.
1370b57cec5SDimitry Andric   bool collectCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
1380b57cec5SDimitry Andric                              CmovGroups &CmovInstGroups,
1390b57cec5SDimitry Andric                              bool IncludeLoads = false);
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   /// Check if it is profitable to transform each CMOV-group-candidates into
1420b57cec5SDimitry Andric   /// branch. Remove all groups that are not profitable from \p CmovInstGroups.
1430b57cec5SDimitry Andric   ///
1440b57cec5SDimitry Andric   /// \param Blocks List of blocks to process.
1450b57cec5SDimitry Andric   /// \param CmovInstGroups List of consecutive CMOV instructions in CurrLoop.
1460b57cec5SDimitry Andric   /// \returns true iff any CMOV-group-candidate remain.
1470b57cec5SDimitry Andric   bool checkForProfitableCmovCandidates(ArrayRef<MachineBasicBlock *> Blocks,
1480b57cec5SDimitry Andric                                         CmovGroups &CmovInstGroups);
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric   /// Convert the given list of consecutive CMOV instructions into a branch.
1510b57cec5SDimitry Andric   ///
1520b57cec5SDimitry Andric   /// \param Group Consecutive CMOV instructions to be converted into branch.
1530b57cec5SDimitry Andric   void convertCmovInstsToBranches(SmallVectorImpl<MachineInstr *> &Group) const;
1540b57cec5SDimitry Andric };
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric } // end anonymous namespace
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric char X86CmovConverterPass::ID = 0;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const {
1610b57cec5SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
162*0fca6ea1SDimitry Andric   AU.addRequired<MachineLoopInfoWrapperPass>();
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) {
1660b57cec5SDimitry Andric   if (skipFunction(MF.getFunction()))
1670b57cec5SDimitry Andric     return false;
1680b57cec5SDimitry Andric   if (!EnableCmovConverter)
1690b57cec5SDimitry Andric     return false;
1700b57cec5SDimitry Andric 
171bdd1243dSDimitry Andric   // If the SelectOptimize pass is enabled, cmovs have already been optimized.
172bdd1243dSDimitry Andric   if (!getCGPassBuilderOption().DisableSelectOptimize)
173bdd1243dSDimitry Andric     return false;
174bdd1243dSDimitry Andric 
1750b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
1760b57cec5SDimitry Andric                     << "**********\n");
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric   bool Changed = false;
179*0fca6ea1SDimitry Andric   MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1800b57cec5SDimitry Andric   const TargetSubtargetInfo &STI = MF.getSubtarget();
1810b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
1820b57cec5SDimitry Andric   TII = STI.getInstrInfo();
1830b57cec5SDimitry Andric   TRI = STI.getRegisterInfo();
1840b57cec5SDimitry Andric   TSchedModel.init(&STI);
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   // Before we handle the more subtle cases of register-register CMOVs inside
18781ad6265SDimitry Andric   // of potentially hot loops, we want to quickly remove all CMOVs (ForceAll) or
18881ad6265SDimitry Andric   // the ones with a memory operand (ForceMemOperand option). The latter CMOV
18981ad6265SDimitry Andric   // will risk a stall waiting for the load to complete that speculative
19081ad6265SDimitry Andric   // execution behind a branch is better suited to handle on modern x86 chips.
19181ad6265SDimitry Andric   if (ForceMemOperand || ForceAll) {
1920b57cec5SDimitry Andric     CmovGroups AllCmovGroups;
1930b57cec5SDimitry Andric     SmallVector<MachineBasicBlock *, 4> Blocks;
1940b57cec5SDimitry Andric     for (auto &MBB : MF)
1950b57cec5SDimitry Andric       Blocks.push_back(&MBB);
1960b57cec5SDimitry Andric     if (collectCmovCandidates(Blocks, AllCmovGroups, /*IncludeLoads*/ true)) {
1970b57cec5SDimitry Andric       for (auto &Group : AllCmovGroups) {
1980b57cec5SDimitry Andric         // Skip any group that doesn't do at least one memory operand cmov.
19981ad6265SDimitry Andric         if (ForceMemOperand && !ForceAll &&
20081ad6265SDimitry Andric             llvm::none_of(Group, [&](MachineInstr *I) { return I->mayLoad(); }))
2010b57cec5SDimitry Andric           continue;
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric         // For CMOV groups which we can rewrite and which contain a memory load,
2040b57cec5SDimitry Andric         // always rewrite them. On x86, a CMOV will dramatically amplify any
2050b57cec5SDimitry Andric         // memory latency by blocking speculative execution.
2060b57cec5SDimitry Andric         Changed = true;
2070b57cec5SDimitry Andric         convertCmovInstsToBranches(Group);
2080b57cec5SDimitry Andric       }
2090b57cec5SDimitry Andric     }
21081ad6265SDimitry Andric     // Early return as ForceAll converts all CmovGroups.
21181ad6265SDimitry Andric     if (ForceAll)
21281ad6265SDimitry Andric       return Changed;
2130b57cec5SDimitry Andric   }
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
2160b57cec5SDimitry Andric   // Register-operand Conversion Algorithm
2170b57cec5SDimitry Andric   // ---------
2180b57cec5SDimitry Andric   //   For each innermost loop
2190b57cec5SDimitry Andric   //     collectCmovCandidates() {
2200b57cec5SDimitry Andric   //       Find all CMOV-group-candidates.
2210b57cec5SDimitry Andric   //     }
2220b57cec5SDimitry Andric   //
2230b57cec5SDimitry Andric   //     checkForProfitableCmovCandidates() {
2240b57cec5SDimitry Andric   //       * Calculate both loop-depth and optimized-loop-depth.
2250b57cec5SDimitry Andric   //       * Use these depth to check for loop transformation profitability.
2260b57cec5SDimitry Andric   //       * Check for CMOV-group-candidate transformation profitability.
2270b57cec5SDimitry Andric   //     }
2280b57cec5SDimitry Andric   //
2290b57cec5SDimitry Andric   //     For each profitable CMOV-group-candidate
2300b57cec5SDimitry Andric   //       convertCmovInstsToBranches() {
2310b57cec5SDimitry Andric   //           * Create FalseBB, SinkBB, Conditional branch to SinkBB.
2320b57cec5SDimitry Andric   //           * Replace each CMOV instruction with a PHI instruction in SinkBB.
2330b57cec5SDimitry Andric   //       }
2340b57cec5SDimitry Andric   //
2350b57cec5SDimitry Andric   // Note: For more details, see each function description.
2360b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   // Build up the loops in pre-order.
239fe6060f1SDimitry Andric   SmallVector<MachineLoop *, 4> Loops(MLI->begin(), MLI->end());
2400b57cec5SDimitry Andric   // Note that we need to check size on each iteration as we accumulate child
2410b57cec5SDimitry Andric   // loops.
2420b57cec5SDimitry Andric   for (int i = 0; i < (int)Loops.size(); ++i)
2430b57cec5SDimitry Andric     for (MachineLoop *Child : Loops[i]->getSubLoops())
2440b57cec5SDimitry Andric       Loops.push_back(Child);
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   for (MachineLoop *CurrLoop : Loops) {
2470b57cec5SDimitry Andric     // Optimize only innermost loops.
2480b57cec5SDimitry Andric     if (!CurrLoop->getSubLoops().empty())
2490b57cec5SDimitry Andric       continue;
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric     // List of consecutive CMOV instructions to be processed.
2520b57cec5SDimitry Andric     CmovGroups CmovInstGroups;
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric     if (!collectCmovCandidates(CurrLoop->getBlocks(), CmovInstGroups))
2550b57cec5SDimitry Andric       continue;
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric     if (!checkForProfitableCmovCandidates(CurrLoop->getBlocks(),
2580b57cec5SDimitry Andric                                           CmovInstGroups))
2590b57cec5SDimitry Andric       continue;
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric     Changed = true;
2620b57cec5SDimitry Andric     for (auto &Group : CmovInstGroups)
2630b57cec5SDimitry Andric       convertCmovInstsToBranches(Group);
2640b57cec5SDimitry Andric   }
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric   return Changed;
2670b57cec5SDimitry Andric }
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric bool X86CmovConverterPass::collectCmovCandidates(
2700b57cec5SDimitry Andric     ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups,
2710b57cec5SDimitry Andric     bool IncludeLoads) {
2720b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
2730b57cec5SDimitry Andric   // Collect all CMOV-group-candidates and add them into CmovInstGroups.
2740b57cec5SDimitry Andric   //
2750b57cec5SDimitry Andric   // CMOV-group:
2760b57cec5SDimitry Andric   //   CMOV instructions, in same MBB, that uses same EFLAGS def instruction.
2770b57cec5SDimitry Andric   //
2780b57cec5SDimitry Andric   // CMOV-group-candidate:
2790b57cec5SDimitry Andric   //   CMOV-group where all the CMOV instructions are
2800b57cec5SDimitry Andric   //     1. consecutive.
2810b57cec5SDimitry Andric   //     2. have same condition code or opposite one.
2820b57cec5SDimitry Andric   //     3. have only operand registers (X86::CMOVrr).
2830b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
2840b57cec5SDimitry Andric   // List of possible improvement (TODO's):
2850b57cec5SDimitry Andric   // --------------------------------------
2860b57cec5SDimitry Andric   //   TODO: Add support for X86::CMOVrm instructions.
2870b57cec5SDimitry Andric   //   TODO: Add support for X86::SETcc instructions.
2880b57cec5SDimitry Andric   //   TODO: Add support for CMOV-groups with non consecutive CMOV instructions.
2890b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric   // Current processed CMOV-Group.
2920b57cec5SDimitry Andric   CmovGroup Group;
2930b57cec5SDimitry Andric   for (auto *MBB : Blocks) {
2940b57cec5SDimitry Andric     Group.clear();
2950b57cec5SDimitry Andric     // Condition code of first CMOV instruction current processed range and its
2960b57cec5SDimitry Andric     // opposite condition code.
2970b57cec5SDimitry Andric     X86::CondCode FirstCC = X86::COND_INVALID, FirstOppCC = X86::COND_INVALID,
2980b57cec5SDimitry Andric                   MemOpCC = X86::COND_INVALID;
2990b57cec5SDimitry Andric     // Indicator of a non CMOVrr instruction in the current processed range.
3000b57cec5SDimitry Andric     bool FoundNonCMOVInst = false;
3010b57cec5SDimitry Andric     // Indicator for current processed CMOV-group if it should be skipped.
3020b57cec5SDimitry Andric     bool SkipGroup = false;
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric     for (auto &I : *MBB) {
3050b57cec5SDimitry Andric       // Skip debug instructions.
3060b57cec5SDimitry Andric       if (I.isDebugInstr())
3070b57cec5SDimitry Andric         continue;
30806c3fb27SDimitry Andric 
3090b57cec5SDimitry Andric       X86::CondCode CC = X86::getCondFromCMov(I);
31006c3fb27SDimitry Andric       // Check if we found a X86::CMOVrr instruction. If it is marked as
31106c3fb27SDimitry Andric       // unpredictable, skip it and do not convert it to branch.
31206c3fb27SDimitry Andric       if (CC != X86::COND_INVALID &&
31306c3fb27SDimitry Andric           !I.getFlag(MachineInstr::MIFlag::Unpredictable) &&
31406c3fb27SDimitry Andric           (IncludeLoads || !I.mayLoad())) {
3150b57cec5SDimitry Andric         if (Group.empty()) {
3160b57cec5SDimitry Andric           // We found first CMOV in the range, reset flags.
3170b57cec5SDimitry Andric           FirstCC = CC;
3180b57cec5SDimitry Andric           FirstOppCC = X86::GetOppositeBranchCondition(CC);
3190b57cec5SDimitry Andric           // Clear out the prior group's memory operand CC.
3200b57cec5SDimitry Andric           MemOpCC = X86::COND_INVALID;
3210b57cec5SDimitry Andric           FoundNonCMOVInst = false;
3220b57cec5SDimitry Andric           SkipGroup = false;
3230b57cec5SDimitry Andric         }
3240b57cec5SDimitry Andric         Group.push_back(&I);
3250b57cec5SDimitry Andric         // Check if it is a non-consecutive CMOV instruction or it has different
3260b57cec5SDimitry Andric         // condition code than FirstCC or FirstOppCC.
3270b57cec5SDimitry Andric         if (FoundNonCMOVInst || (CC != FirstCC && CC != FirstOppCC))
3280b57cec5SDimitry Andric           // Mark the SKipGroup indicator to skip current processed CMOV-Group.
3290b57cec5SDimitry Andric           SkipGroup = true;
3300b57cec5SDimitry Andric         if (I.mayLoad()) {
3310b57cec5SDimitry Andric           if (MemOpCC == X86::COND_INVALID)
3320b57cec5SDimitry Andric             // The first memory operand CMOV.
3330b57cec5SDimitry Andric             MemOpCC = CC;
3340b57cec5SDimitry Andric           else if (CC != MemOpCC)
3350b57cec5SDimitry Andric             // Can't handle mixed conditions with memory operands.
3360b57cec5SDimitry Andric             SkipGroup = true;
3370b57cec5SDimitry Andric         }
3380b57cec5SDimitry Andric         // Check if we were relying on zero-extending behavior of the CMOV.
3390b57cec5SDimitry Andric         if (!SkipGroup &&
3400b57cec5SDimitry Andric             llvm::any_of(
3410b57cec5SDimitry Andric                 MRI->use_nodbg_instructions(I.defs().begin()->getReg()),
3420b57cec5SDimitry Andric                 [&](MachineInstr &UseI) {
3430b57cec5SDimitry Andric                   return UseI.getOpcode() == X86::SUBREG_TO_REG;
3440b57cec5SDimitry Andric                 }))
3450b57cec5SDimitry Andric           // FIXME: We should model the cost of using an explicit MOV to handle
3460b57cec5SDimitry Andric           // the zero-extension rather than just refusing to handle this.
3470b57cec5SDimitry Andric           SkipGroup = true;
3480b57cec5SDimitry Andric         continue;
3490b57cec5SDimitry Andric       }
3500b57cec5SDimitry Andric       // If Group is empty, keep looking for first CMOV in the range.
3510b57cec5SDimitry Andric       if (Group.empty())
3520b57cec5SDimitry Andric         continue;
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric       // We found a non X86::CMOVrr instruction.
3550b57cec5SDimitry Andric       FoundNonCMOVInst = true;
3560b57cec5SDimitry Andric       // Check if this instruction define EFLAGS, to determine end of processed
3570b57cec5SDimitry Andric       // range, as there would be no more instructions using current EFLAGS def.
358*0fca6ea1SDimitry Andric       if (I.definesRegister(X86::EFLAGS, /*TRI=*/nullptr)) {
3590b57cec5SDimitry Andric         // Check if current processed CMOV-group should not be skipped and add
3600b57cec5SDimitry Andric         // it as a CMOV-group-candidate.
3610b57cec5SDimitry Andric         if (!SkipGroup)
3620b57cec5SDimitry Andric           CmovInstGroups.push_back(Group);
3630b57cec5SDimitry Andric         else
3640b57cec5SDimitry Andric           ++NumOfSkippedCmovGroups;
3650b57cec5SDimitry Andric         Group.clear();
3660b57cec5SDimitry Andric       }
3670b57cec5SDimitry Andric     }
3680b57cec5SDimitry Andric     // End of basic block is considered end of range, check if current processed
3690b57cec5SDimitry Andric     // CMOV-group should not be skipped and add it as a CMOV-group-candidate.
3700b57cec5SDimitry Andric     if (Group.empty())
3710b57cec5SDimitry Andric       continue;
3720b57cec5SDimitry Andric     if (!SkipGroup)
3730b57cec5SDimitry Andric       CmovInstGroups.push_back(Group);
3740b57cec5SDimitry Andric     else
3750b57cec5SDimitry Andric       ++NumOfSkippedCmovGroups;
3760b57cec5SDimitry Andric   }
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric   NumOfCmovGroupCandidate += CmovInstGroups.size();
3790b57cec5SDimitry Andric   return !CmovInstGroups.empty();
3800b57cec5SDimitry Andric }
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric /// \returns Depth of CMOV instruction as if it was converted into branch.
3830b57cec5SDimitry Andric /// \param TrueOpDepth depth cost of CMOV true value operand.
3840b57cec5SDimitry Andric /// \param FalseOpDepth depth cost of CMOV false value operand.
3850b57cec5SDimitry Andric static unsigned getDepthOfOptCmov(unsigned TrueOpDepth, unsigned FalseOpDepth) {
38647395794SDimitry Andric   // The depth of the result after branch conversion is
38747395794SDimitry Andric   // TrueOpDepth * TrueOpProbability + FalseOpDepth * FalseOpProbability.
38847395794SDimitry Andric   // As we have no info about branch weight, we assume 75% for one and 25% for
38947395794SDimitry Andric   // the other, and pick the result with the largest resulting depth.
39047395794SDimitry Andric   return std::max(
39147395794SDimitry Andric       divideCeil(TrueOpDepth * 3 + FalseOpDepth, 4),
39247395794SDimitry Andric       divideCeil(FalseOpDepth * 3 + TrueOpDepth, 4));
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric bool X86CmovConverterPass::checkForProfitableCmovCandidates(
3960b57cec5SDimitry Andric     ArrayRef<MachineBasicBlock *> Blocks, CmovGroups &CmovInstGroups) {
3970b57cec5SDimitry Andric   struct DepthInfo {
3980b57cec5SDimitry Andric     /// Depth of original loop.
3990b57cec5SDimitry Andric     unsigned Depth;
4000b57cec5SDimitry Andric     /// Depth of optimized loop.
4010b57cec5SDimitry Andric     unsigned OptDepth;
4020b57cec5SDimitry Andric   };
4030b57cec5SDimitry Andric   /// Number of loop iterations to calculate depth for ?!
4040b57cec5SDimitry Andric   static const unsigned LoopIterations = 2;
4050b57cec5SDimitry Andric   DenseMap<MachineInstr *, DepthInfo> DepthMap;
4060b57cec5SDimitry Andric   DepthInfo LoopDepth[LoopIterations] = {{0, 0}, {0, 0}};
4070b57cec5SDimitry Andric   enum { PhyRegType = 0, VirRegType = 1, RegTypeNum = 2 };
4080b57cec5SDimitry Andric   /// For each register type maps the register to its last def instruction.
4090b57cec5SDimitry Andric   DenseMap<unsigned, MachineInstr *> RegDefMaps[RegTypeNum];
4100b57cec5SDimitry Andric   /// Maps register operand to its def instruction, which can be nullptr if it
4110b57cec5SDimitry Andric   /// is unknown (e.g., operand is defined outside the loop).
4120b57cec5SDimitry Andric   DenseMap<MachineOperand *, MachineInstr *> OperandToDefMap;
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   // Set depth of unknown instruction (i.e., nullptr) to zero.
4150b57cec5SDimitry Andric   DepthMap[nullptr] = {0, 0};
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   SmallPtrSet<MachineInstr *, 4> CmovInstructions;
4180b57cec5SDimitry Andric   for (auto &Group : CmovInstGroups)
4190b57cec5SDimitry Andric     CmovInstructions.insert(Group.begin(), Group.end());
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
4220b57cec5SDimitry Andric   // Step 1: Calculate instruction depth and loop depth.
4230b57cec5SDimitry Andric   // Optimized-Loop:
4240b57cec5SDimitry Andric   //   loop with CMOV-group-candidates converted into branches.
4250b57cec5SDimitry Andric   //
4260b57cec5SDimitry Andric   // Instruction-Depth:
4270b57cec5SDimitry Andric   //   instruction latency + max operand depth.
4280b57cec5SDimitry Andric   //     * For CMOV instruction in optimized loop the depth is calculated as:
4290b57cec5SDimitry Andric   //       CMOV latency + getDepthOfOptCmov(True-Op-Depth, False-Op-depth)
4300b57cec5SDimitry Andric   // TODO: Find a better way to estimate the latency of the branch instruction
4310b57cec5SDimitry Andric   //       rather than using the CMOV latency.
4320b57cec5SDimitry Andric   //
4330b57cec5SDimitry Andric   // Loop-Depth:
4340b57cec5SDimitry Andric   //   max instruction depth of all instructions in the loop.
4350b57cec5SDimitry Andric   // Note: instruction with max depth represents the critical-path in the loop.
4360b57cec5SDimitry Andric   //
4370b57cec5SDimitry Andric   // Loop-Depth[i]:
4380b57cec5SDimitry Andric   //   Loop-Depth calculated for first `i` iterations.
4390b57cec5SDimitry Andric   //   Note: it is enough to calculate depth for up to two iterations.
4400b57cec5SDimitry Andric   //
4410b57cec5SDimitry Andric   // Depth-Diff[i]:
4420b57cec5SDimitry Andric   //   Number of cycles saved in first 'i` iterations by optimizing the loop.
4430b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
444bdd1243dSDimitry Andric   for (DepthInfo &MaxDepth : LoopDepth) {
4450b57cec5SDimitry Andric     for (auto *MBB : Blocks) {
4460b57cec5SDimitry Andric       // Clear physical registers Def map.
4470b57cec5SDimitry Andric       RegDefMaps[PhyRegType].clear();
4480b57cec5SDimitry Andric       for (MachineInstr &MI : *MBB) {
4490b57cec5SDimitry Andric         // Skip debug instructions.
4500b57cec5SDimitry Andric         if (MI.isDebugInstr())
4510b57cec5SDimitry Andric           continue;
4520b57cec5SDimitry Andric         unsigned MIDepth = 0;
4530b57cec5SDimitry Andric         unsigned MIDepthOpt = 0;
4540b57cec5SDimitry Andric         bool IsCMOV = CmovInstructions.count(&MI);
4550b57cec5SDimitry Andric         for (auto &MO : MI.uses()) {
4560b57cec5SDimitry Andric           // Checks for "isUse()" as "uses()" returns also implicit definitions.
4570b57cec5SDimitry Andric           if (!MO.isReg() || !MO.isUse())
4580b57cec5SDimitry Andric             continue;
4598bcb0991SDimitry Andric           Register Reg = MO.getReg();
460e8d8bef9SDimitry Andric           auto &RDM = RegDefMaps[Reg.isVirtual()];
4610b57cec5SDimitry Andric           if (MachineInstr *DefMI = RDM.lookup(Reg)) {
4620b57cec5SDimitry Andric             OperandToDefMap[&MO] = DefMI;
4630b57cec5SDimitry Andric             DepthInfo Info = DepthMap.lookup(DefMI);
4640b57cec5SDimitry Andric             MIDepth = std::max(MIDepth, Info.Depth);
4650b57cec5SDimitry Andric             if (!IsCMOV)
4660b57cec5SDimitry Andric               MIDepthOpt = std::max(MIDepthOpt, Info.OptDepth);
4670b57cec5SDimitry Andric           }
4680b57cec5SDimitry Andric         }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric         if (IsCMOV)
4710b57cec5SDimitry Andric           MIDepthOpt = getDepthOfOptCmov(
4720b57cec5SDimitry Andric               DepthMap[OperandToDefMap.lookup(&MI.getOperand(1))].OptDepth,
4730b57cec5SDimitry Andric               DepthMap[OperandToDefMap.lookup(&MI.getOperand(2))].OptDepth);
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric         // Iterates over all operands to handle implicit definitions as well.
4760b57cec5SDimitry Andric         for (auto &MO : MI.operands()) {
4770b57cec5SDimitry Andric           if (!MO.isReg() || !MO.isDef())
4780b57cec5SDimitry Andric             continue;
4798bcb0991SDimitry Andric           Register Reg = MO.getReg();
480e8d8bef9SDimitry Andric           RegDefMaps[Reg.isVirtual()][Reg] = &MI;
4810b57cec5SDimitry Andric         }
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric         unsigned Latency = TSchedModel.computeInstrLatency(&MI);
4840b57cec5SDimitry Andric         DepthMap[&MI] = {MIDepth += Latency, MIDepthOpt += Latency};
4850b57cec5SDimitry Andric         MaxDepth.Depth = std::max(MaxDepth.Depth, MIDepth);
4860b57cec5SDimitry Andric         MaxDepth.OptDepth = std::max(MaxDepth.OptDepth, MIDepthOpt);
4870b57cec5SDimitry Andric       }
4880b57cec5SDimitry Andric     }
4890b57cec5SDimitry Andric   }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric   unsigned Diff[LoopIterations] = {LoopDepth[0].Depth - LoopDepth[0].OptDepth,
4920b57cec5SDimitry Andric                                    LoopDepth[1].Depth - LoopDepth[1].OptDepth};
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
4950b57cec5SDimitry Andric   // Step 2: Check if Loop worth to be optimized.
4960b57cec5SDimitry Andric   // Worth-Optimize-Loop:
4970b57cec5SDimitry Andric   //   case 1: Diff[1] == Diff[0]
4980b57cec5SDimitry Andric   //           Critical-path is iteration independent - there is no dependency
4990b57cec5SDimitry Andric   //           of critical-path instructions on critical-path instructions of
5000b57cec5SDimitry Andric   //           previous iteration.
5010b57cec5SDimitry Andric   //           Thus, it is enough to check gain percent of 1st iteration -
5020b57cec5SDimitry Andric   //           To be conservative, the optimized loop need to have a depth of
5030b57cec5SDimitry Andric   //           12.5% cycles less than original loop, per iteration.
5040b57cec5SDimitry Andric   //
5050b57cec5SDimitry Andric   //   case 2: Diff[1] > Diff[0]
5060b57cec5SDimitry Andric   //           Critical-path is iteration dependent - there is dependency of
5070b57cec5SDimitry Andric   //           critical-path instructions on critical-path instructions of
5080b57cec5SDimitry Andric   //           previous iteration.
5090b57cec5SDimitry Andric   //           Thus, check the gain percent of the 2nd iteration (similar to the
5100b57cec5SDimitry Andric   //           previous case), but it is also required to check the gradient of
5110b57cec5SDimitry Andric   //           the gain - the change in Depth-Diff compared to the change in
5120b57cec5SDimitry Andric   //           Loop-Depth between 1st and 2nd iterations.
5130b57cec5SDimitry Andric   //           To be conservative, the gradient need to be at least 50%.
5140b57cec5SDimitry Andric   //
5150b57cec5SDimitry Andric   //   In addition, In order not to optimize loops with very small gain, the
5160b57cec5SDimitry Andric   //   gain (in cycles) after 2nd iteration should not be less than a given
5170b57cec5SDimitry Andric   //   threshold. Thus, the check (Diff[1] >= GainCycleThreshold) must apply.
5180b57cec5SDimitry Andric   //
5190b57cec5SDimitry Andric   // If loop is not worth optimizing, remove all CMOV-group-candidates.
5200b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5210b57cec5SDimitry Andric   if (Diff[1] < GainCycleThreshold)
5220b57cec5SDimitry Andric     return false;
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric   bool WorthOptLoop = false;
5250b57cec5SDimitry Andric   if (Diff[1] == Diff[0])
5260b57cec5SDimitry Andric     WorthOptLoop = Diff[0] * 8 >= LoopDepth[0].Depth;
5270b57cec5SDimitry Andric   else if (Diff[1] > Diff[0])
5280b57cec5SDimitry Andric     WorthOptLoop =
5290b57cec5SDimitry Andric         (Diff[1] - Diff[0]) * 2 >= (LoopDepth[1].Depth - LoopDepth[0].Depth) &&
5300b57cec5SDimitry Andric         (Diff[1] * 8 >= LoopDepth[1].Depth);
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric   if (!WorthOptLoop)
5330b57cec5SDimitry Andric     return false;
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   ++NumOfLoopCandidate;
5360b57cec5SDimitry Andric 
5370b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5380b57cec5SDimitry Andric   // Step 3: Check for each CMOV-group-candidate if it worth to be optimized.
5390b57cec5SDimitry Andric   // Worth-Optimize-Group:
54081ad6265SDimitry Andric   //   Iff it is worth to optimize all CMOV instructions in the group.
5410b57cec5SDimitry Andric   //
5420b57cec5SDimitry Andric   // Worth-Optimize-CMOV:
5430b57cec5SDimitry Andric   //   Predicted branch is faster than CMOV by the difference between depth of
5440b57cec5SDimitry Andric   //   condition operand and depth of taken (predicted) value operand.
5450b57cec5SDimitry Andric   //   To be conservative, the gain of such CMOV transformation should cover at
5460b57cec5SDimitry Andric   //   at least 25% of branch-misprediction-penalty.
5470b57cec5SDimitry Andric   //===--------------------------------------------------------------------===//
5480b57cec5SDimitry Andric   unsigned MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
5490b57cec5SDimitry Andric   CmovGroups TempGroups;
5500b57cec5SDimitry Andric   std::swap(TempGroups, CmovInstGroups);
5510b57cec5SDimitry Andric   for (auto &Group : TempGroups) {
5520b57cec5SDimitry Andric     bool WorthOpGroup = true;
5530b57cec5SDimitry Andric     for (auto *MI : Group) {
5540b57cec5SDimitry Andric       // Avoid CMOV instruction which value is used as a pointer to load from.
5550b57cec5SDimitry Andric       // This is another conservative check to avoid converting CMOV instruction
5560b57cec5SDimitry Andric       // used with tree-search like algorithm, where the branch is unpredicted.
5570b57cec5SDimitry Andric       auto UIs = MRI->use_instructions(MI->defs().begin()->getReg());
558e8d8bef9SDimitry Andric       if (!UIs.empty() && ++UIs.begin() == UIs.end()) {
5590b57cec5SDimitry Andric         unsigned Op = UIs.begin()->getOpcode();
5600b57cec5SDimitry Andric         if (Op == X86::MOV64rm || Op == X86::MOV32rm) {
5610b57cec5SDimitry Andric           WorthOpGroup = false;
5620b57cec5SDimitry Andric           break;
5630b57cec5SDimitry Andric         }
5640b57cec5SDimitry Andric       }
5650b57cec5SDimitry Andric 
5660b57cec5SDimitry Andric       unsigned CondCost =
5670b57cec5SDimitry Andric           DepthMap[OperandToDefMap.lookup(&MI->getOperand(4))].Depth;
5680b57cec5SDimitry Andric       unsigned ValCost = getDepthOfOptCmov(
5690b57cec5SDimitry Andric           DepthMap[OperandToDefMap.lookup(&MI->getOperand(1))].Depth,
5700b57cec5SDimitry Andric           DepthMap[OperandToDefMap.lookup(&MI->getOperand(2))].Depth);
5710b57cec5SDimitry Andric       if (ValCost > CondCost || (CondCost - ValCost) * 4 < MispredictPenalty) {
5720b57cec5SDimitry Andric         WorthOpGroup = false;
5730b57cec5SDimitry Andric         break;
5740b57cec5SDimitry Andric       }
5750b57cec5SDimitry Andric     }
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric     if (WorthOpGroup)
5780b57cec5SDimitry Andric       CmovInstGroups.push_back(Group);
5790b57cec5SDimitry Andric   }
5800b57cec5SDimitry Andric 
5810b57cec5SDimitry Andric   return !CmovInstGroups.empty();
5820b57cec5SDimitry Andric }
5830b57cec5SDimitry Andric 
5840b57cec5SDimitry Andric static bool checkEFLAGSLive(MachineInstr *MI) {
585*0fca6ea1SDimitry Andric   if (MI->killsRegister(X86::EFLAGS, /*TRI=*/nullptr))
5860b57cec5SDimitry Andric     return false;
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric   // The EFLAGS operand of MI might be missing a kill marker.
5890b57cec5SDimitry Andric   // Figure out whether EFLAGS operand should LIVE after MI instruction.
5900b57cec5SDimitry Andric   MachineBasicBlock *BB = MI->getParent();
5910b57cec5SDimitry Andric   MachineBasicBlock::iterator ItrMI = MI;
5920b57cec5SDimitry Andric 
5930b57cec5SDimitry Andric   // Scan forward through BB for a use/def of EFLAGS.
5940b57cec5SDimitry Andric   for (auto I = std::next(ItrMI), E = BB->end(); I != E; ++I) {
595*0fca6ea1SDimitry Andric     if (I->readsRegister(X86::EFLAGS, /*TRI=*/nullptr))
5960b57cec5SDimitry Andric       return true;
597*0fca6ea1SDimitry Andric     if (I->definesRegister(X86::EFLAGS, /*TRI=*/nullptr))
5980b57cec5SDimitry Andric       return false;
5990b57cec5SDimitry Andric   }
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric   // We hit the end of the block, check whether EFLAGS is live into a successor.
602349cc55cSDimitry Andric   for (MachineBasicBlock *Succ : BB->successors())
603349cc55cSDimitry Andric     if (Succ->isLiveIn(X86::EFLAGS))
6040b57cec5SDimitry Andric       return true;
6050b57cec5SDimitry Andric 
6060b57cec5SDimitry Andric   return false;
6070b57cec5SDimitry Andric }
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric /// Given /p First CMOV instruction and /p Last CMOV instruction representing a
6100b57cec5SDimitry Andric /// group of CMOV instructions, which may contain debug instructions in between,
6110b57cec5SDimitry Andric /// move all debug instructions to after the last CMOV instruction, making the
6120b57cec5SDimitry Andric /// CMOV group consecutive.
6130b57cec5SDimitry Andric static void packCmovGroup(MachineInstr *First, MachineInstr *Last) {
6140b57cec5SDimitry Andric   assert(X86::getCondFromCMov(*Last) != X86::COND_INVALID &&
6150b57cec5SDimitry Andric          "Last instruction in a CMOV group must be a CMOV instruction");
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric   SmallVector<MachineInstr *, 2> DBGInstructions;
6180b57cec5SDimitry Andric   for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) {
6190b57cec5SDimitry Andric     if (I->isDebugInstr())
6200b57cec5SDimitry Andric       DBGInstructions.push_back(&*I);
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric   // Splice the debug instruction after the cmov group.
6240b57cec5SDimitry Andric   MachineBasicBlock *MBB = First->getParent();
6250b57cec5SDimitry Andric   for (auto *MI : DBGInstructions)
6260b57cec5SDimitry Andric     MBB->insertAfter(Last, MI->removeFromParent());
6270b57cec5SDimitry Andric }
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric void X86CmovConverterPass::convertCmovInstsToBranches(
6300b57cec5SDimitry Andric     SmallVectorImpl<MachineInstr *> &Group) const {
6310b57cec5SDimitry Andric   assert(!Group.empty() && "No CMOV instructions to convert");
6320b57cec5SDimitry Andric   ++NumOfOptimizedCmovGroups;
6330b57cec5SDimitry Andric 
6340b57cec5SDimitry Andric   // If the CMOV group is not packed, e.g., there are debug instructions between
6350b57cec5SDimitry Andric   // first CMOV and last CMOV, then pack the group and make the CMOV instruction
6360b57cec5SDimitry Andric   // consecutive by moving the debug instructions to after the last CMOV.
6370b57cec5SDimitry Andric   packCmovGroup(Group.front(), Group.back());
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric   // To convert a CMOVcc instruction, we actually have to insert the diamond
6400b57cec5SDimitry Andric   // control-flow pattern.  The incoming instruction knows the destination vreg
6410b57cec5SDimitry Andric   // to set, the condition code register to branch on, the true/false values to
6420b57cec5SDimitry Andric   // select between, and a branch opcode to use.
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric   // Before
6450b57cec5SDimitry Andric   // -----
6460b57cec5SDimitry Andric   // MBB:
6470b57cec5SDimitry Andric   //   cond = cmp ...
6480b57cec5SDimitry Andric   //   v1 = CMOVge t1, f1, cond
6490b57cec5SDimitry Andric   //   v2 = CMOVlt t2, f2, cond
6500b57cec5SDimitry Andric   //   v3 = CMOVge v1, f3, cond
6510b57cec5SDimitry Andric   //
6520b57cec5SDimitry Andric   // After
6530b57cec5SDimitry Andric   // -----
6540b57cec5SDimitry Andric   // MBB:
6550b57cec5SDimitry Andric   //   cond = cmp ...
6560b57cec5SDimitry Andric   //   jge %SinkMBB
6570b57cec5SDimitry Andric   //
6580b57cec5SDimitry Andric   // FalseMBB:
6590b57cec5SDimitry Andric   //   jmp %SinkMBB
6600b57cec5SDimitry Andric   //
6610b57cec5SDimitry Andric   // SinkMBB:
6620b57cec5SDimitry Andric   //   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
6630b57cec5SDimitry Andric   //   %v2 = phi[%t2, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
6640b57cec5SDimitry Andric   //                                          ; true-value with false-value
6650b57cec5SDimitry Andric   //   %v3 = phi[%f3, %FalseMBB], [%t1, %MBB] ; Phi instruction cannot use
6660b57cec5SDimitry Andric   //                                          ; previous Phi instruction result
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric   MachineInstr &MI = *Group.front();
6690b57cec5SDimitry Andric   MachineInstr *LastCMOV = Group.back();
6700b57cec5SDimitry Andric   DebugLoc DL = MI.getDebugLoc();
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   X86::CondCode CC = X86::CondCode(X86::getCondFromCMov(MI));
6730b57cec5SDimitry Andric   X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
6740b57cec5SDimitry Andric   // Potentially swap the condition codes so that any memory operand to a CMOV
6750b57cec5SDimitry Andric   // is in the *false* position instead of the *true* position. We can invert
6760b57cec5SDimitry Andric   // any non-memory operand CMOV instructions to cope with this and we ensure
6770b57cec5SDimitry Andric   // memory operand CMOVs are only included with a single condition code.
6780b57cec5SDimitry Andric   if (llvm::any_of(Group, [&](MachineInstr *I) {
6790b57cec5SDimitry Andric         return I->mayLoad() && X86::getCondFromCMov(*I) == CC;
6800b57cec5SDimitry Andric       }))
6810b57cec5SDimitry Andric     std::swap(CC, OppCC);
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
6840b57cec5SDimitry Andric   MachineFunction::iterator It = ++MBB->getIterator();
6850b57cec5SDimitry Andric   MachineFunction *F = MBB->getParent();
6860b57cec5SDimitry Andric   const BasicBlock *BB = MBB->getBasicBlock();
6870b57cec5SDimitry Andric 
6880b57cec5SDimitry Andric   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB);
6890b57cec5SDimitry Andric   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(BB);
6900b57cec5SDimitry Andric   F->insert(It, FalseMBB);
6910b57cec5SDimitry Andric   F->insert(It, SinkMBB);
6920b57cec5SDimitry Andric 
6930b57cec5SDimitry Andric   // If the EFLAGS register isn't dead in the terminator, then claim that it's
6940b57cec5SDimitry Andric   // live into the sink and copy blocks.
6950b57cec5SDimitry Andric   if (checkEFLAGSLive(LastCMOV)) {
6960b57cec5SDimitry Andric     FalseMBB->addLiveIn(X86::EFLAGS);
6970b57cec5SDimitry Andric     SinkMBB->addLiveIn(X86::EFLAGS);
6980b57cec5SDimitry Andric   }
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric   // Transfer the remainder of BB and its successor edges to SinkMBB.
7010b57cec5SDimitry Andric   SinkMBB->splice(SinkMBB->begin(), MBB,
7020b57cec5SDimitry Andric                   std::next(MachineBasicBlock::iterator(LastCMOV)), MBB->end());
7030b57cec5SDimitry Andric   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric   // Add the false and sink blocks as its successors.
7060b57cec5SDimitry Andric   MBB->addSuccessor(FalseMBB);
7070b57cec5SDimitry Andric   MBB->addSuccessor(SinkMBB);
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   // Create the conditional branch instruction.
7100b57cec5SDimitry Andric   BuildMI(MBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric   // Add the sink block to the false block successors.
7130b57cec5SDimitry Andric   FalseMBB->addSuccessor(SinkMBB);
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric   MachineInstrBuilder MIB;
7160b57cec5SDimitry Andric   MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
7170b57cec5SDimitry Andric   MachineBasicBlock::iterator MIItEnd =
7180b57cec5SDimitry Andric       std::next(MachineBasicBlock::iterator(LastCMOV));
7190b57cec5SDimitry Andric   MachineBasicBlock::iterator FalseInsertionPoint = FalseMBB->begin();
7200b57cec5SDimitry Andric   MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
7210b57cec5SDimitry Andric 
7220b57cec5SDimitry Andric   // First we need to insert an explicit load on the false path for any memory
7230b57cec5SDimitry Andric   // operand. We also need to potentially do register rewriting here, but it is
7240b57cec5SDimitry Andric   // simpler as the memory operands are always on the false path so we can
7250b57cec5SDimitry Andric   // simply take that input, whatever it is.
7260b57cec5SDimitry Andric   DenseMap<unsigned, unsigned> FalseBBRegRewriteTable;
7270b57cec5SDimitry Andric   for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd;) {
7280b57cec5SDimitry Andric     auto &MI = *MIIt++;
7290b57cec5SDimitry Andric     // Skip any CMOVs in this group which don't load from memory.
7300b57cec5SDimitry Andric     if (!MI.mayLoad()) {
7310b57cec5SDimitry Andric       // Remember the false-side register input.
7328bcb0991SDimitry Andric       Register FalseReg =
7330b57cec5SDimitry Andric           MI.getOperand(X86::getCondFromCMov(MI) == CC ? 1 : 2).getReg();
7340b57cec5SDimitry Andric       // Walk back through any intermediate cmovs referenced.
7350b57cec5SDimitry Andric       while (true) {
7360b57cec5SDimitry Andric         auto FRIt = FalseBBRegRewriteTable.find(FalseReg);
7370b57cec5SDimitry Andric         if (FRIt == FalseBBRegRewriteTable.end())
7380b57cec5SDimitry Andric           break;
7390b57cec5SDimitry Andric         FalseReg = FRIt->second;
7400b57cec5SDimitry Andric       }
7410b57cec5SDimitry Andric       FalseBBRegRewriteTable[MI.getOperand(0).getReg()] = FalseReg;
7420b57cec5SDimitry Andric       continue;
7430b57cec5SDimitry Andric     }
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric     // The condition must be the *opposite* of the one we've decided to branch
7460b57cec5SDimitry Andric     // on as the branch will go *around* the load and the load should happen
7470b57cec5SDimitry Andric     // when the CMOV condition is false.
7480b57cec5SDimitry Andric     assert(X86::getCondFromCMov(MI) == OppCC &&
7490b57cec5SDimitry Andric            "Can only handle memory-operand cmov instructions with a condition "
7500b57cec5SDimitry Andric            "opposite to the selected branch direction.");
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric     // The goal is to rewrite the cmov from:
7530b57cec5SDimitry Andric     //
7540b57cec5SDimitry Andric     //   MBB:
7550b57cec5SDimitry Andric     //     %A = CMOVcc %B (tied), (mem)
7560b57cec5SDimitry Andric     //
7570b57cec5SDimitry Andric     // to
7580b57cec5SDimitry Andric     //
7590b57cec5SDimitry Andric     //   MBB:
7600b57cec5SDimitry Andric     //     %A = CMOVcc %B (tied), %C
7610b57cec5SDimitry Andric     //   FalseMBB:
7620b57cec5SDimitry Andric     //     %C = MOV (mem)
7630b57cec5SDimitry Andric     //
7640b57cec5SDimitry Andric     // Which will allow the next loop to rewrite the CMOV in terms of a PHI:
7650b57cec5SDimitry Andric     //
7660b57cec5SDimitry Andric     //   MBB:
7670b57cec5SDimitry Andric     //     JMP!cc SinkMBB
7680b57cec5SDimitry Andric     //   FalseMBB:
7690b57cec5SDimitry Andric     //     %C = MOV (mem)
7700b57cec5SDimitry Andric     //   SinkMBB:
7710b57cec5SDimitry Andric     //     %A = PHI [ %C, FalseMBB ], [ %B, MBB]
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric     // Get a fresh register to use as the destination of the MOV.
7740b57cec5SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
7758bcb0991SDimitry Andric     Register TmpReg = MRI->createVirtualRegister(RC);
7760b57cec5SDimitry Andric 
77706c3fb27SDimitry Andric     // Retain debug instr number when unfolded.
77806c3fb27SDimitry Andric     unsigned OldDebugInstrNum = MI.peekDebugInstrNum();
7790b57cec5SDimitry Andric     SmallVector<MachineInstr *, 4> NewMIs;
7800b57cec5SDimitry Andric     bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
7810b57cec5SDimitry Andric                                              /*UnfoldLoad*/ true,
7820b57cec5SDimitry Andric                                              /*UnfoldStore*/ false, NewMIs);
7830b57cec5SDimitry Andric     (void)Unfolded;
7840b57cec5SDimitry Andric     assert(Unfolded && "Should never fail to unfold a loading cmov!");
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric     // Move the new CMOV to just before the old one and reset any impacted
7870b57cec5SDimitry Andric     // iterator.
7880b57cec5SDimitry Andric     auto *NewCMOV = NewMIs.pop_back_val();
7890b57cec5SDimitry Andric     assert(X86::getCondFromCMov(*NewCMOV) == OppCC &&
7900b57cec5SDimitry Andric            "Last new instruction isn't the expected CMOV!");
7910b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\tRewritten cmov: "; NewCMOV->dump());
7920b57cec5SDimitry Andric     MBB->insert(MachineBasicBlock::iterator(MI), NewCMOV);
7930b57cec5SDimitry Andric     if (&*MIItBegin == &MI)
7940b57cec5SDimitry Andric       MIItBegin = MachineBasicBlock::iterator(NewCMOV);
7950b57cec5SDimitry Andric 
79606c3fb27SDimitry Andric     if (OldDebugInstrNum)
79706c3fb27SDimitry Andric       NewCMOV->setDebugInstrNum(OldDebugInstrNum);
79806c3fb27SDimitry Andric 
7990b57cec5SDimitry Andric     // Sink whatever instructions were needed to produce the unfolded operand
8000b57cec5SDimitry Andric     // into the false block.
8010b57cec5SDimitry Andric     for (auto *NewMI : NewMIs) {
8020b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "\tRewritten load instr: "; NewMI->dump());
8030b57cec5SDimitry Andric       FalseMBB->insert(FalseInsertionPoint, NewMI);
8040b57cec5SDimitry Andric       // Re-map any operands that are from other cmovs to the inputs for this block.
8050b57cec5SDimitry Andric       for (auto &MOp : NewMI->uses()) {
8060b57cec5SDimitry Andric         if (!MOp.isReg())
8070b57cec5SDimitry Andric           continue;
8080b57cec5SDimitry Andric         auto It = FalseBBRegRewriteTable.find(MOp.getReg());
8090b57cec5SDimitry Andric         if (It == FalseBBRegRewriteTable.end())
8100b57cec5SDimitry Andric           continue;
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric         MOp.setReg(It->second);
8130b57cec5SDimitry Andric         // This might have been a kill when it referenced the cmov result, but
8140b57cec5SDimitry Andric         // it won't necessarily be once rewritten.
8150b57cec5SDimitry Andric         // FIXME: We could potentially improve this by tracking whether the
8160b57cec5SDimitry Andric         // operand to the cmov was also a kill, and then skipping the PHI node
8170b57cec5SDimitry Andric         // construction below.
8180b57cec5SDimitry Andric         MOp.setIsKill(false);
8190b57cec5SDimitry Andric       }
8200b57cec5SDimitry Andric     }
821349cc55cSDimitry Andric     MBB->erase(&MI);
8220b57cec5SDimitry Andric 
8230b57cec5SDimitry Andric     // Add this PHI to the rewrite table.
8240b57cec5SDimitry Andric     FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
8250b57cec5SDimitry Andric   }
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric   // As we are creating the PHIs, we have to be careful if there is more than
8280b57cec5SDimitry Andric   // one.  Later CMOVs may reference the results of earlier CMOVs, but later
8290b57cec5SDimitry Andric   // PHIs have to reference the individual true/false inputs from earlier PHIs.
8300b57cec5SDimitry Andric   // That also means that PHI construction must work forward from earlier to
8310b57cec5SDimitry Andric   // later, and that the code must maintain a mapping from earlier PHI's
8320b57cec5SDimitry Andric   // destination registers, and the registers that went into the PHI.
8330b57cec5SDimitry Andric   DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric   for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
8368bcb0991SDimitry Andric     Register DestReg = MIIt->getOperand(0).getReg();
8378bcb0991SDimitry Andric     Register Op1Reg = MIIt->getOperand(1).getReg();
8388bcb0991SDimitry Andric     Register Op2Reg = MIIt->getOperand(2).getReg();
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric     // If this CMOV we are processing is the opposite condition from the jump we
8410b57cec5SDimitry Andric     // generated, then we have to swap the operands for the PHI that is going to
8420b57cec5SDimitry Andric     // be generated.
8430b57cec5SDimitry Andric     if (X86::getCondFromCMov(*MIIt) == OppCC)
8440b57cec5SDimitry Andric       std::swap(Op1Reg, Op2Reg);
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric     auto Op1Itr = RegRewriteTable.find(Op1Reg);
8470b57cec5SDimitry Andric     if (Op1Itr != RegRewriteTable.end())
8480b57cec5SDimitry Andric       Op1Reg = Op1Itr->second.first;
8490b57cec5SDimitry Andric 
8500b57cec5SDimitry Andric     auto Op2Itr = RegRewriteTable.find(Op2Reg);
8510b57cec5SDimitry Andric     if (Op2Itr != RegRewriteTable.end())
8520b57cec5SDimitry Andric       Op2Reg = Op2Itr->second.second;
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric     //  SinkMBB:
8550b57cec5SDimitry Andric     //   %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, MBB ]
8560b57cec5SDimitry Andric     //  ...
8570b57cec5SDimitry Andric     MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
8580b57cec5SDimitry Andric               .addReg(Op1Reg)
8590b57cec5SDimitry Andric               .addMBB(FalseMBB)
8600b57cec5SDimitry Andric               .addReg(Op2Reg)
8610b57cec5SDimitry Andric               .addMBB(MBB);
8620b57cec5SDimitry Andric     (void)MIB;
8630b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
8640b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
8650b57cec5SDimitry Andric 
86606c3fb27SDimitry Andric     // debug-info: we can just copy the instr-ref number from one instruction
86706c3fb27SDimitry Andric     // to the other, seeing how it's a one-for-one substitution.
86806c3fb27SDimitry Andric     if (unsigned InstrNum = MIIt->peekDebugInstrNum())
86906c3fb27SDimitry Andric       MIB->setDebugInstrNum(InstrNum);
87006c3fb27SDimitry Andric 
8710b57cec5SDimitry Andric     // Add this PHI to the rewrite table.
8720b57cec5SDimitry Andric     RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
8730b57cec5SDimitry Andric   }
8740b57cec5SDimitry Andric 
87506c3fb27SDimitry Andric   // Reset the NoPHIs property if a PHI was inserted to prevent a conflict with
87606c3fb27SDimitry Andric   // the MachineVerifier during testing.
87706c3fb27SDimitry Andric   if (MIItBegin != MIItEnd)
87806c3fb27SDimitry Andric     F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
87906c3fb27SDimitry Andric 
8800b57cec5SDimitry Andric   // Now remove the CMOV(s).
8810b57cec5SDimitry Andric   MBB->erase(MIItBegin, MIItEnd);
882fe6060f1SDimitry Andric 
883fe6060f1SDimitry Andric   // Add new basic blocks to MachineLoopInfo.
884fe6060f1SDimitry Andric   if (MachineLoop *L = MLI->getLoopFor(MBB)) {
885*0fca6ea1SDimitry Andric     L->addBasicBlockToLoop(FalseMBB, *MLI);
886*0fca6ea1SDimitry Andric     L->addBasicBlockToLoop(SinkMBB, *MLI);
887fe6060f1SDimitry Andric   }
8880b57cec5SDimitry Andric }
8890b57cec5SDimitry Andric 
8900b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
8910b57cec5SDimitry Andric                       false, false)
892*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
8930b57cec5SDimitry Andric INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion",
8940b57cec5SDimitry Andric                     false, false)
8950b57cec5SDimitry Andric 
8960b57cec5SDimitry Andric FunctionPass *llvm::createX86CmovConverterPass() {
8970b57cec5SDimitry Andric   return new X86CmovConverterPass();
8980b57cec5SDimitry Andric }
899