xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric ///
100b57cec5SDimitry Andric /// Provide a pass which mitigates speculative execution attacks which operate
110b57cec5SDimitry Andric /// by speculating incorrectly past some predicate (a type check, bounds check,
120b57cec5SDimitry Andric /// or other condition) to reach a load with invalid inputs and leak the data
130b57cec5SDimitry Andric /// accessed by that load using a side channel out of the speculative domain.
140b57cec5SDimitry Andric ///
150b57cec5SDimitry Andric /// For details on the attacks, see the first variant in both the Project Zero
160b57cec5SDimitry Andric /// writeup and the Spectre paper:
170b57cec5SDimitry Andric /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
180b57cec5SDimitry Andric /// https://spectreattack.com/spectre.pdf
190b57cec5SDimitry Andric ///
200b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric #include "X86.h"
230b57cec5SDimitry Andric #include "X86InstrBuilder.h"
240b57cec5SDimitry Andric #include "X86InstrInfo.h"
250b57cec5SDimitry Andric #include "X86Subtarget.h"
260b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
270b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
280b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
290b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
300b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
310b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
320b57cec5SDimitry Andric #include "llvm/ADT/SparseBitVector.h"
330b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
400b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
410b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
420b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
430b57cec5SDimitry Andric #include "llvm/CodeGen/MachineSSAUpdater.h"
440b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
450b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
460b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
470b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
480b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
490b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h"
500b57cec5SDimitry Andric #include "llvm/Pass.h"
510b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
520b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
530b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
545ffd83dbSDimitry Andric #include "llvm/Target/TargetMachine.h"
550b57cec5SDimitry Andric #include <algorithm>
560b57cec5SDimitry Andric #include <cassert>
570b57cec5SDimitry Andric #include <iterator>
58bdd1243dSDimitry Andric #include <optional>
590b57cec5SDimitry Andric #include <utility>
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric using namespace llvm;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric #define PASS_KEY "x86-slh"
640b57cec5SDimitry Andric #define DEBUG_TYPE PASS_KEY
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
670b57cec5SDimitry Andric STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
680b57cec5SDimitry Andric STATISTIC(NumAddrRegsHardened,
690b57cec5SDimitry Andric           "Number of address mode used registers hardaned");
700b57cec5SDimitry Andric STATISTIC(NumPostLoadRegsHardened,
710b57cec5SDimitry Andric           "Number of post-load register values hardened");
720b57cec5SDimitry Andric STATISTIC(NumCallsOrJumpsHardened,
730b57cec5SDimitry Andric           "Number of calls or jumps requiring extra hardening");
740b57cec5SDimitry Andric STATISTIC(NumInstsInserted, "Number of instructions inserted");
750b57cec5SDimitry Andric STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric static cl::opt<bool> EnableSpeculativeLoadHardening(
780b57cec5SDimitry Andric     "x86-speculative-load-hardening",
790b57cec5SDimitry Andric     cl::desc("Force enable speculative load hardening"), cl::init(false),
800b57cec5SDimitry Andric     cl::Hidden);
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric static cl::opt<bool> HardenEdgesWithLFENCE(
830b57cec5SDimitry Andric     PASS_KEY "-lfence",
840b57cec5SDimitry Andric     cl::desc(
850b57cec5SDimitry Andric         "Use LFENCE along each conditional edge to harden against speculative "
860b57cec5SDimitry Andric         "loads rather than conditional movs and poisoned pointers."),
870b57cec5SDimitry Andric     cl::init(false), cl::Hidden);
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric static cl::opt<bool> EnablePostLoadHardening(
900b57cec5SDimitry Andric     PASS_KEY "-post-load",
910b57cec5SDimitry Andric     cl::desc("Harden the value loaded *after* it is loaded by "
920b57cec5SDimitry Andric              "flushing the loaded bits to 1. This is hard to do "
930b57cec5SDimitry Andric              "in general but can be done easily for GPRs."),
940b57cec5SDimitry Andric     cl::init(true), cl::Hidden);
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric static cl::opt<bool> FenceCallAndRet(
970b57cec5SDimitry Andric     PASS_KEY "-fence-call-and-ret",
980b57cec5SDimitry Andric     cl::desc("Use a full speculation fence to harden both call and ret edges "
990b57cec5SDimitry Andric              "rather than a lighter weight mitigation."),
1000b57cec5SDimitry Andric     cl::init(false), cl::Hidden);
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric static cl::opt<bool> HardenInterprocedurally(
1030b57cec5SDimitry Andric     PASS_KEY "-ip",
1040b57cec5SDimitry Andric     cl::desc("Harden interprocedurally by passing our state in and out of "
1050b57cec5SDimitry Andric              "functions in the high bits of the stack pointer."),
1060b57cec5SDimitry Andric     cl::init(true), cl::Hidden);
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric static cl::opt<bool>
1090b57cec5SDimitry Andric     HardenLoads(PASS_KEY "-loads",
1100b57cec5SDimitry Andric                 cl::desc("Sanitize loads from memory. When disable, no "
1110b57cec5SDimitry Andric                          "significant security is provided."),
1120b57cec5SDimitry Andric                 cl::init(true), cl::Hidden);
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric static cl::opt<bool> HardenIndirectCallsAndJumps(
1150b57cec5SDimitry Andric     PASS_KEY "-indirect",
1160b57cec5SDimitry Andric     cl::desc("Harden indirect calls and jumps against using speculatively "
1170b57cec5SDimitry Andric              "stored attacker controlled addresses. This is designed to "
1180b57cec5SDimitry Andric              "mitigate Spectre v1.2 style attacks."),
1190b57cec5SDimitry Andric     cl::init(true), cl::Hidden);
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric namespace {
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
1240b57cec5SDimitry Andric public:
1250b57cec5SDimitry Andric   X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   StringRef getPassName() const override {
1280b57cec5SDimitry Andric     return "X86 speculative load hardening";
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
1310b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric   /// Pass identification, replacement for typeid.
1340b57cec5SDimitry Andric   static char ID;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric private:
1370b57cec5SDimitry Andric   /// The information about a block's conditional terminators needed to trace
1380b57cec5SDimitry Andric   /// our predicate state through the exiting edges.
1390b57cec5SDimitry Andric   struct BlockCondInfo {
1400b57cec5SDimitry Andric     MachineBasicBlock *MBB;
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric     // We mostly have one conditional branch, and in extremely rare cases have
1430b57cec5SDimitry Andric     // two. Three and more are so rare as to be unimportant for compile time.
1440b57cec5SDimitry Andric     SmallVector<MachineInstr *, 2> CondBrs;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric     MachineInstr *UncondBr;
1470b57cec5SDimitry Andric   };
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   /// Manages the predicate state traced through the program.
1500b57cec5SDimitry Andric   struct PredState {
151480093f4SDimitry Andric     unsigned InitialReg = 0;
152480093f4SDimitry Andric     unsigned PoisonReg = 0;
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric     const TargetRegisterClass *RC;
1550b57cec5SDimitry Andric     MachineSSAUpdater SSA;
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric     PredState(MachineFunction &MF, const TargetRegisterClass *RC)
1580b57cec5SDimitry Andric         : RC(RC), SSA(MF) {}
1590b57cec5SDimitry Andric   };
1600b57cec5SDimitry Andric 
161480093f4SDimitry Andric   const X86Subtarget *Subtarget = nullptr;
162480093f4SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
163480093f4SDimitry Andric   const X86InstrInfo *TII = nullptr;
164480093f4SDimitry Andric   const TargetRegisterInfo *TRI = nullptr;
1650b57cec5SDimitry Andric 
166bdd1243dSDimitry Andric   std::optional<PredState> PS;
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   void hardenEdgesWithLFENCE(MachineFunction &MF);
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   SmallVector<MachineInstr *, 16>
1730b57cec5SDimitry Andric   tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   void unfoldCallAndJumpLoads(MachineFunction &MF);
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   SmallVector<MachineInstr *, 16>
1780b57cec5SDimitry Andric   tracePredStateThroughIndirectBranches(MachineFunction &MF);
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric   void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric   unsigned saveEFLAGS(MachineBasicBlock &MBB,
18381ad6265SDimitry Andric                       MachineBasicBlock::iterator InsertPt,
18481ad6265SDimitry Andric                       const DebugLoc &Loc);
1850b57cec5SDimitry Andric   void restoreEFLAGS(MachineBasicBlock &MBB,
18681ad6265SDimitry Andric                      MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
187e8d8bef9SDimitry Andric                      Register Reg);
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   void mergePredStateIntoSP(MachineBasicBlock &MBB,
19081ad6265SDimitry Andric                             MachineBasicBlock::iterator InsertPt,
19181ad6265SDimitry Andric                             const DebugLoc &Loc, unsigned PredStateReg);
1920b57cec5SDimitry Andric   unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
1930b57cec5SDimitry Andric                                   MachineBasicBlock::iterator InsertPt,
19481ad6265SDimitry Andric                                   const DebugLoc &Loc);
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   void
1970b57cec5SDimitry Andric   hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
1980b57cec5SDimitry Andric                  MachineOperand &IndexMO,
1990b57cec5SDimitry Andric                  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
2000b57cec5SDimitry Andric   MachineInstr *
2010b57cec5SDimitry Andric   sinkPostLoadHardenedInst(MachineInstr &MI,
2020b57cec5SDimitry Andric                            SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
203e8d8bef9SDimitry Andric   bool canHardenRegister(Register Reg);
204e8d8bef9SDimitry Andric   unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
2050b57cec5SDimitry Andric                                  MachineBasicBlock::iterator InsertPt,
20681ad6265SDimitry Andric                                  const DebugLoc &Loc);
2070b57cec5SDimitry Andric   unsigned hardenPostLoad(MachineInstr &MI);
2080b57cec5SDimitry Andric   void hardenReturnInstr(MachineInstr &MI);
2090b57cec5SDimitry Andric   void tracePredStateThroughCall(MachineInstr &MI);
2100b57cec5SDimitry Andric   void hardenIndirectCallOrJumpInstr(
2110b57cec5SDimitry Andric       MachineInstr &MI,
2120b57cec5SDimitry Andric       SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
2130b57cec5SDimitry Andric };
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric } // end anonymous namespace
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric char X86SpeculativeLoadHardeningPass::ID = 0;
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
2200b57cec5SDimitry Andric     AnalysisUsage &AU) const {
2210b57cec5SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
2220b57cec5SDimitry Andric }
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB,
2250b57cec5SDimitry Andric                                     MachineBasicBlock &Succ, int SuccCount,
2260b57cec5SDimitry Andric                                     MachineInstr *Br, MachineInstr *&UncondBr,
2270b57cec5SDimitry Andric                                     const X86InstrInfo &TII) {
2280b57cec5SDimitry Andric   assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric   MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   // We have to insert the new block immediately after the current one as we
2350b57cec5SDimitry Andric   // don't know what layout-successor relationships the successor has and we
2360b57cec5SDimitry Andric   // may not be able to (and generally don't want to) try to fix those up.
2370b57cec5SDimitry Andric   MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   // Update the branch instruction if necessary.
2400b57cec5SDimitry Andric   if (Br) {
2410b57cec5SDimitry Andric     assert(Br->getOperand(0).getMBB() == &Succ &&
2420b57cec5SDimitry Andric            "Didn't start with the right target!");
2430b57cec5SDimitry Andric     Br->getOperand(0).setMBB(&NewMBB);
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric     // If this successor was reached through a branch rather than fallthrough,
2460b57cec5SDimitry Andric     // we might have *broken* fallthrough and so need to inject a new
2470b57cec5SDimitry Andric     // unconditional branch.
2480b57cec5SDimitry Andric     if (!UncondBr) {
2490b57cec5SDimitry Andric       MachineBasicBlock &OldLayoutSucc =
2500b57cec5SDimitry Andric           *std::next(MachineFunction::iterator(&NewMBB));
2510b57cec5SDimitry Andric       assert(MBB.isSuccessor(&OldLayoutSucc) &&
2520b57cec5SDimitry Andric              "Without an unconditional branch, the old layout successor should "
2530b57cec5SDimitry Andric              "be an actual successor!");
2540b57cec5SDimitry Andric       auto BrBuilder =
2550b57cec5SDimitry Andric           BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
2560b57cec5SDimitry Andric       // Update the unconditional branch now that we've added one.
2570b57cec5SDimitry Andric       UncondBr = &*BrBuilder;
2580b57cec5SDimitry Andric     }
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric     // Insert unconditional "jump Succ" instruction in the new block if
2610b57cec5SDimitry Andric     // necessary.
2620b57cec5SDimitry Andric     if (!NewMBB.isLayoutSuccessor(&Succ)) {
2630b57cec5SDimitry Andric       SmallVector<MachineOperand, 4> Cond;
2640b57cec5SDimitry Andric       TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
2650b57cec5SDimitry Andric     }
2660b57cec5SDimitry Andric   } else {
2670b57cec5SDimitry Andric     assert(!UncondBr &&
2680b57cec5SDimitry Andric            "Cannot have a branchless successor and an unconditional branch!");
2690b57cec5SDimitry Andric     assert(NewMBB.isLayoutSuccessor(&Succ) &&
2700b57cec5SDimitry Andric            "A non-branch successor must have been a layout successor before "
2710b57cec5SDimitry Andric            "and now is a layout successor of the new block.");
2720b57cec5SDimitry Andric   }
2730b57cec5SDimitry Andric 
2740b57cec5SDimitry Andric   // If this is the only edge to the successor, we can just replace it in the
2750b57cec5SDimitry Andric   // CFG. Otherwise we need to add a new entry in the CFG for the new
2760b57cec5SDimitry Andric   // successor.
2770b57cec5SDimitry Andric   if (SuccCount == 1) {
2780b57cec5SDimitry Andric     MBB.replaceSuccessor(&Succ, &NewMBB);
2790b57cec5SDimitry Andric   } else {
2800b57cec5SDimitry Andric     MBB.splitSuccessor(&Succ, &NewMBB);
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   // Hook up the edge from the new basic block to the old successor in the CFG.
2840b57cec5SDimitry Andric   NewMBB.addSuccessor(&Succ);
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
2870b57cec5SDimitry Andric   for (MachineInstr &MI : Succ) {
2880b57cec5SDimitry Andric     if (!MI.isPHI())
2890b57cec5SDimitry Andric       break;
2900b57cec5SDimitry Andric     for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
2910b57cec5SDimitry Andric          OpIdx += 2) {
2920b57cec5SDimitry Andric       MachineOperand &OpV = MI.getOperand(OpIdx);
2930b57cec5SDimitry Andric       MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
2940b57cec5SDimitry Andric       assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
2950b57cec5SDimitry Andric       if (OpMBB.getMBB() != &MBB)
2960b57cec5SDimitry Andric         continue;
2970b57cec5SDimitry Andric 
2980b57cec5SDimitry Andric       // If this is the last edge to the succesor, just replace MBB in the PHI
2990b57cec5SDimitry Andric       if (SuccCount == 1) {
3000b57cec5SDimitry Andric         OpMBB.setMBB(&NewMBB);
3010b57cec5SDimitry Andric         break;
3020b57cec5SDimitry Andric       }
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric       // Otherwise, append a new pair of operands for the new incoming edge.
3050b57cec5SDimitry Andric       MI.addOperand(MF, OpV);
3060b57cec5SDimitry Andric       MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
3070b57cec5SDimitry Andric       break;
3080b57cec5SDimitry Andric     }
3090b57cec5SDimitry Andric   }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   // Inherit live-ins from the successor
3120b57cec5SDimitry Andric   for (auto &LI : Succ.liveins())
3130b57cec5SDimitry Andric     NewMBB.addLiveIn(LI);
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Split edge from '" << MBB.getName() << "' to '"
3160b57cec5SDimitry Andric                     << Succ.getName() << "'.\n");
3170b57cec5SDimitry Andric   return NewMBB;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric 
3200b57cec5SDimitry Andric /// Removing duplicate PHI operands to leave the PHI in a canonical and
3210b57cec5SDimitry Andric /// predictable form.
3220b57cec5SDimitry Andric ///
3230b57cec5SDimitry Andric /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
3240b57cec5SDimitry Andric /// isn't what you might expect. We may have multiple entries in PHI nodes for
3250b57cec5SDimitry Andric /// a single predecessor. This makes CFG-updating extremely complex, so here we
3260b57cec5SDimitry Andric /// simplify all PHI nodes to a model even simpler than the IR's model: exactly
3270b57cec5SDimitry Andric /// one entry per predecessor, regardless of how many edges there are.
3280b57cec5SDimitry Andric static void canonicalizePHIOperands(MachineFunction &MF) {
3290b57cec5SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> Preds;
3300b57cec5SDimitry Andric   SmallVector<int, 4> DupIndices;
3310b57cec5SDimitry Andric   for (auto &MBB : MF)
3320b57cec5SDimitry Andric     for (auto &MI : MBB) {
3330b57cec5SDimitry Andric       if (!MI.isPHI())
3340b57cec5SDimitry Andric         break;
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric       // First we scan the operands of the PHI looking for duplicate entries
3370b57cec5SDimitry Andric       // a particular predecessor. We retain the operand index of each duplicate
3380b57cec5SDimitry Andric       // entry found.
3390b57cec5SDimitry Andric       for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
3400b57cec5SDimitry Andric            OpIdx += 2)
3410b57cec5SDimitry Andric         if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
3420b57cec5SDimitry Andric           DupIndices.push_back(OpIdx);
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric       // Now walk the duplicate indices, removing both the block and value. Note
3450b57cec5SDimitry Andric       // that these are stored as a vector making this element-wise removal
3460b57cec5SDimitry Andric       // :w
3470b57cec5SDimitry Andric       // potentially quadratic.
3480b57cec5SDimitry Andric       //
3490b57cec5SDimitry Andric       // FIXME: It is really frustrating that we have to use a quadratic
3500b57cec5SDimitry Andric       // removal algorithm here. There should be a better way, but the use-def
3510b57cec5SDimitry Andric       // updates required make that impossible using the public API.
3520b57cec5SDimitry Andric       //
3530b57cec5SDimitry Andric       // Note that we have to process these backwards so that we don't
3540b57cec5SDimitry Andric       // invalidate other indices with each removal.
3550b57cec5SDimitry Andric       while (!DupIndices.empty()) {
3560b57cec5SDimitry Andric         int OpIdx = DupIndices.pop_back_val();
3570b57cec5SDimitry Andric         // Remove both the block and value operand, again in reverse order to
3580b57cec5SDimitry Andric         // preserve indices.
35981ad6265SDimitry Andric         MI.removeOperand(OpIdx + 1);
36081ad6265SDimitry Andric         MI.removeOperand(OpIdx);
3610b57cec5SDimitry Andric       }
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric       Preds.clear();
3640b57cec5SDimitry Andric     }
3650b57cec5SDimitry Andric }
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric /// Helper to scan a function for loads vulnerable to misspeculation that we
3680b57cec5SDimitry Andric /// want to harden.
3690b57cec5SDimitry Andric ///
3700b57cec5SDimitry Andric /// We use this to avoid making changes to functions where there is nothing we
3710b57cec5SDimitry Andric /// need to do to harden against misspeculation.
3720b57cec5SDimitry Andric static bool hasVulnerableLoad(MachineFunction &MF) {
3730b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
3740b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
3750b57cec5SDimitry Andric       // Loads within this basic block after an LFENCE are not at risk of
3760b57cec5SDimitry Andric       // speculatively executing with invalid predicates from prior control
3770b57cec5SDimitry Andric       // flow. So break out of this block but continue scanning the function.
3780b57cec5SDimitry Andric       if (MI.getOpcode() == X86::LFENCE)
3790b57cec5SDimitry Andric         break;
3800b57cec5SDimitry Andric 
3810b57cec5SDimitry Andric       // Looking for loads only.
3820b57cec5SDimitry Andric       if (!MI.mayLoad())
3830b57cec5SDimitry Andric         continue;
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric       // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
3860b57cec5SDimitry Andric       if (MI.getOpcode() == X86::MFENCE)
3870b57cec5SDimitry Andric         continue;
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric       // We found a load.
3900b57cec5SDimitry Andric       return true;
3910b57cec5SDimitry Andric     }
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   // No loads found.
3950b57cec5SDimitry Andric   return false;
3960b57cec5SDimitry Andric }
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
3990b57cec5SDimitry Andric     MachineFunction &MF) {
4000b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
4010b57cec5SDimitry Andric                     << " **********\n");
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric   // Only run if this pass is forced enabled or we detect the relevant function
4040b57cec5SDimitry Andric   // attribute requesting SLH.
4050b57cec5SDimitry Andric   if (!EnableSpeculativeLoadHardening &&
4060b57cec5SDimitry Andric       !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
4070b57cec5SDimitry Andric     return false;
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric   Subtarget = &MF.getSubtarget<X86Subtarget>();
4100b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
4110b57cec5SDimitry Andric   TII = Subtarget->getInstrInfo();
4120b57cec5SDimitry Andric   TRI = Subtarget->getRegisterInfo();
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   // FIXME: Support for 32-bit.
4150b57cec5SDimitry Andric   PS.emplace(MF, &X86::GR64_NOSPRegClass);
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   if (MF.begin() == MF.end())
4180b57cec5SDimitry Andric     // Nothing to do for a degenerate empty function...
4190b57cec5SDimitry Andric     return false;
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   // We support an alternative hardening technique based on a debug flag.
4220b57cec5SDimitry Andric   if (HardenEdgesWithLFENCE) {
4230b57cec5SDimitry Andric     hardenEdgesWithLFENCE(MF);
4240b57cec5SDimitry Andric     return true;
4250b57cec5SDimitry Andric   }
4260b57cec5SDimitry Andric 
4270b57cec5SDimitry Andric   // Create a dummy debug loc to use for all the generated code here.
4280b57cec5SDimitry Andric   DebugLoc Loc;
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric   MachineBasicBlock &Entry = *MF.begin();
4310b57cec5SDimitry Andric   auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   // Do a quick scan to see if we have any checkable loads.
4340b57cec5SDimitry Andric   bool HasVulnerableLoad = hasVulnerableLoad(MF);
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric   // See if we have any conditional branching blocks that we will need to trace
4370b57cec5SDimitry Andric   // predicate state through.
4380b57cec5SDimitry Andric   SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
4390b57cec5SDimitry Andric 
4400b57cec5SDimitry Andric   // If we have no interesting conditions or loads, nothing to do here.
4410b57cec5SDimitry Andric   if (!HasVulnerableLoad && Infos.empty())
4420b57cec5SDimitry Andric     return true;
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric   // The poison value is required to be an all-ones value for many aspects of
4450b57cec5SDimitry Andric   // this mitigation.
4460b57cec5SDimitry Andric   const int PoisonVal = -1;
4470b57cec5SDimitry Andric   PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
4480b57cec5SDimitry Andric   BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
4490b57cec5SDimitry Andric       .addImm(PoisonVal);
4500b57cec5SDimitry Andric   ++NumInstsInserted;
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   // If we have loads being hardened and we've asked for call and ret edges to
4530b57cec5SDimitry Andric   // get a full fence-based mitigation, inject that fence.
4540b57cec5SDimitry Andric   if (HasVulnerableLoad && FenceCallAndRet) {
4550b57cec5SDimitry Andric     // We need to insert an LFENCE at the start of the function to suspend any
4560b57cec5SDimitry Andric     // incoming misspeculation from the caller. This helps two-fold: the caller
4570b57cec5SDimitry Andric     // may not have been protected as this code has been, and this code gets to
4580b57cec5SDimitry Andric     // not take any specific action to protect across calls.
4590b57cec5SDimitry Andric     // FIXME: We could skip this for functions which unconditionally return
4600b57cec5SDimitry Andric     // a constant.
4610b57cec5SDimitry Andric     BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
4620b57cec5SDimitry Andric     ++NumInstsInserted;
4630b57cec5SDimitry Andric     ++NumLFENCEsInserted;
4640b57cec5SDimitry Andric   }
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric   // If we guarded the entry with an LFENCE and have no conditionals to protect
4670b57cec5SDimitry Andric   // in blocks, then we're done.
4680b57cec5SDimitry Andric   if (FenceCallAndRet && Infos.empty())
4690b57cec5SDimitry Andric     // We may have changed the function's code at this point to insert fences.
4700b57cec5SDimitry Andric     return true;
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   // For every basic block in the function which can b
4730b57cec5SDimitry Andric   if (HardenInterprocedurally && !FenceCallAndRet) {
4740b57cec5SDimitry Andric     // Set up the predicate state by extracting it from the incoming stack
4750b57cec5SDimitry Andric     // pointer so we pick up any misspeculation in our caller.
4760b57cec5SDimitry Andric     PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
4770b57cec5SDimitry Andric   } else {
4780b57cec5SDimitry Andric     // Otherwise, just build the predicate state itself by zeroing a register
4790b57cec5SDimitry Andric     // as we don't need any initial state.
4800b57cec5SDimitry Andric     PS->InitialReg = MRI->createVirtualRegister(PS->RC);
4818bcb0991SDimitry Andric     Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
4820b57cec5SDimitry Andric     auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
4830b57cec5SDimitry Andric                          PredStateSubReg);
4840b57cec5SDimitry Andric     ++NumInstsInserted;
4850b57cec5SDimitry Andric     MachineOperand *ZeroEFLAGSDefOp =
486*0fca6ea1SDimitry Andric         ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
4870b57cec5SDimitry Andric     assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
4880b57cec5SDimitry Andric            "Must have an implicit def of EFLAGS!");
4890b57cec5SDimitry Andric     ZeroEFLAGSDefOp->setIsDead(true);
4900b57cec5SDimitry Andric     BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
4910b57cec5SDimitry Andric             PS->InitialReg)
4920b57cec5SDimitry Andric         .addImm(0)
4930b57cec5SDimitry Andric         .addReg(PredStateSubReg)
4940b57cec5SDimitry Andric         .addImm(X86::sub_32bit);
4950b57cec5SDimitry Andric   }
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric   // We're going to need to trace predicate state throughout the function's
4980b57cec5SDimitry Andric   // CFG. Prepare for this by setting up our initial state of PHIs with unique
4990b57cec5SDimitry Andric   // predecessor entries and all the initial predicate state.
5000b57cec5SDimitry Andric   canonicalizePHIOperands(MF);
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric   // Track the updated values in an SSA updater to rewrite into SSA form at the
5030b57cec5SDimitry Andric   // end.
5040b57cec5SDimitry Andric   PS->SSA.Initialize(PS->InitialReg);
5050b57cec5SDimitry Andric   PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric   // Trace through the CFG.
5080b57cec5SDimitry Andric   auto CMovs = tracePredStateThroughCFG(MF, Infos);
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric   // We may also enter basic blocks in this function via exception handling
5110b57cec5SDimitry Andric   // control flow. Here, if we are hardening interprocedurally, we need to
5120b57cec5SDimitry Andric   // re-capture the predicate state from the throwing code. In the Itanium ABI,
5130b57cec5SDimitry Andric   // the throw will always look like a call to __cxa_throw and will have the
5140b57cec5SDimitry Andric   // predicate state in the stack pointer, so extract fresh predicate state from
5150b57cec5SDimitry Andric   // the stack pointer and make it available in SSA.
5160b57cec5SDimitry Andric   // FIXME: Handle non-itanium ABI EH models.
5170b57cec5SDimitry Andric   if (HardenInterprocedurally) {
5180b57cec5SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
5190b57cec5SDimitry Andric       assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
5200b57cec5SDimitry Andric       assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
5210b57cec5SDimitry Andric       assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
5220b57cec5SDimitry Andric       if (!MBB.isEHPad())
5230b57cec5SDimitry Andric         continue;
5240b57cec5SDimitry Andric       PS->SSA.AddAvailableValue(
5250b57cec5SDimitry Andric           &MBB,
5260b57cec5SDimitry Andric           extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
5270b57cec5SDimitry Andric     }
5280b57cec5SDimitry Andric   }
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   if (HardenIndirectCallsAndJumps) {
5310b57cec5SDimitry Andric     // If we are going to harden calls and jumps we need to unfold their memory
5320b57cec5SDimitry Andric     // operands.
5330b57cec5SDimitry Andric     unfoldCallAndJumpLoads(MF);
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric     // Then we trace predicate state through the indirect branches.
5360b57cec5SDimitry Andric     auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
5370b57cec5SDimitry Andric     CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
5380b57cec5SDimitry Andric   }
5390b57cec5SDimitry Andric 
5400b57cec5SDimitry Andric   // Now that we have the predicate state available at the start of each block
5410b57cec5SDimitry Andric   // in the CFG, trace it through each block, hardening vulnerable instructions
5420b57cec5SDimitry Andric   // as we go.
5430b57cec5SDimitry Andric   tracePredStateThroughBlocksAndHarden(MF);
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric   // Now rewrite all the uses of the pred state using the SSA updater to insert
5460b57cec5SDimitry Andric   // PHIs connecting the state between blocks along the CFG edges.
5470b57cec5SDimitry Andric   for (MachineInstr *CMovI : CMovs)
5480b57cec5SDimitry Andric     for (MachineOperand &Op : CMovI->operands()) {
5490b57cec5SDimitry Andric       if (!Op.isReg() || Op.getReg() != PS->InitialReg)
5500b57cec5SDimitry Andric         continue;
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric       PS->SSA.RewriteUse(Op);
5530b57cec5SDimitry Andric     }
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
5560b57cec5SDimitry Andric              dbgs() << "\n"; MF.verify(this));
5570b57cec5SDimitry Andric   return true;
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric /// Implements the naive hardening approach of putting an LFENCE after every
5610b57cec5SDimitry Andric /// potentially mis-predicted control flow construct.
5620b57cec5SDimitry Andric ///
5630b57cec5SDimitry Andric /// We include this as an alternative mostly for the purpose of comparison. The
5640b57cec5SDimitry Andric /// performance impact of this is expected to be extremely severe and not
5650b57cec5SDimitry Andric /// practical for any real-world users.
5660b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
5670b57cec5SDimitry Andric     MachineFunction &MF) {
5680b57cec5SDimitry Andric   // First, we scan the function looking for blocks that are reached along edges
5690b57cec5SDimitry Andric   // that we might want to harden.
5700b57cec5SDimitry Andric   SmallSetVector<MachineBasicBlock *, 8> Blocks;
5710b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
5720b57cec5SDimitry Andric     // If there are no or only one successor, nothing to do here.
5730b57cec5SDimitry Andric     if (MBB.succ_size() <= 1)
5740b57cec5SDimitry Andric       continue;
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric     // Skip blocks unless their terminators start with a branch. Other
5770b57cec5SDimitry Andric     // terminators don't seem interesting for guarding against misspeculation.
5780b57cec5SDimitry Andric     auto TermIt = MBB.getFirstTerminator();
5790b57cec5SDimitry Andric     if (TermIt == MBB.end() || !TermIt->isBranch())
5800b57cec5SDimitry Andric       continue;
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric     // Add all the non-EH-pad succossors to the blocks we want to harden. We
5830b57cec5SDimitry Andric     // skip EH pads because there isn't really a condition of interest on
5840b57cec5SDimitry Andric     // entering.
5850b57cec5SDimitry Andric     for (MachineBasicBlock *SuccMBB : MBB.successors())
5860b57cec5SDimitry Andric       if (!SuccMBB->isEHPad())
5870b57cec5SDimitry Andric         Blocks.insert(SuccMBB);
5880b57cec5SDimitry Andric   }
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric   for (MachineBasicBlock *MBB : Blocks) {
5910b57cec5SDimitry Andric     auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
5920b57cec5SDimitry Andric     BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
5930b57cec5SDimitry Andric     ++NumInstsInserted;
5940b57cec5SDimitry Andric     ++NumLFENCEsInserted;
5950b57cec5SDimitry Andric   }
5960b57cec5SDimitry Andric }
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16>
5990b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
6000b57cec5SDimitry Andric   SmallVector<BlockCondInfo, 16> Infos;
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   // Walk the function and build up a summary for each block's conditions that
6030b57cec5SDimitry Andric   // we need to trace through.
6040b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
6050b57cec5SDimitry Andric     // If there are no or only one successor, nothing to do here.
6060b57cec5SDimitry Andric     if (MBB.succ_size() <= 1)
6070b57cec5SDimitry Andric       continue;
6080b57cec5SDimitry Andric 
6090b57cec5SDimitry Andric     // We want to reliably handle any conditional branch terminators in the
6100b57cec5SDimitry Andric     // MBB, so we manually analyze the branch. We can handle all of the
6110b57cec5SDimitry Andric     // permutations here, including ones that analyze branch cannot.
6120b57cec5SDimitry Andric     //
6130b57cec5SDimitry Andric     // The approach is to walk backwards across the terminators, resetting at
6140b57cec5SDimitry Andric     // any unconditional non-indirect branch, and track all conditional edges
6150b57cec5SDimitry Andric     // to basic blocks as well as the fallthrough or unconditional successor
6160b57cec5SDimitry Andric     // edge. For each conditional edge, we track the target and the opposite
6170b57cec5SDimitry Andric     // condition code in order to inject a "no-op" cmov into that successor
6180b57cec5SDimitry Andric     // that will harden the predicate. For the fallthrough/unconditional
6190b57cec5SDimitry Andric     // edge, we inject a separate cmov for each conditional branch with
6200b57cec5SDimitry Andric     // matching condition codes. This effectively implements an "and" of the
6210b57cec5SDimitry Andric     // condition flags, even if there isn't a single condition flag that would
6220b57cec5SDimitry Andric     // directly implement that. We don't bother trying to optimize either of
6230b57cec5SDimitry Andric     // these cases because if such an optimization is possible, LLVM should
6240b57cec5SDimitry Andric     // have optimized the conditional *branches* in that way already to reduce
6250b57cec5SDimitry Andric     // instruction count. This late, we simply assume the minimal number of
6260b57cec5SDimitry Andric     // branch instructions is being emitted and use that to guide our cmov
6270b57cec5SDimitry Andric     // insertion.
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric     BlockCondInfo Info = {&MBB, {}, nullptr};
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric     // Now walk backwards through the terminators and build up successors they
6320b57cec5SDimitry Andric     // reach and the conditions.
6330b57cec5SDimitry Andric     for (MachineInstr &MI : llvm::reverse(MBB)) {
6340b57cec5SDimitry Andric       // Once we've handled all the terminators, we're done.
6350b57cec5SDimitry Andric       if (!MI.isTerminator())
6360b57cec5SDimitry Andric         break;
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric       // If we see a non-branch terminator, we can't handle anything so bail.
6390b57cec5SDimitry Andric       if (!MI.isBranch()) {
6400b57cec5SDimitry Andric         Info.CondBrs.clear();
6410b57cec5SDimitry Andric         break;
6420b57cec5SDimitry Andric       }
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric       // If we see an unconditional branch, reset our state, clear any
6450b57cec5SDimitry Andric       // fallthrough, and set this is the "else" successor.
6460b57cec5SDimitry Andric       if (MI.getOpcode() == X86::JMP_1) {
6470b57cec5SDimitry Andric         Info.CondBrs.clear();
6480b57cec5SDimitry Andric         Info.UncondBr = &MI;
6490b57cec5SDimitry Andric         continue;
6500b57cec5SDimitry Andric       }
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric       // If we get an invalid condition, we have an indirect branch or some
6530b57cec5SDimitry Andric       // other unanalyzable "fallthrough" case. We model this as a nullptr for
6540b57cec5SDimitry Andric       // the destination so we can still guard any conditional successors.
6550b57cec5SDimitry Andric       // Consider code sequences like:
6560b57cec5SDimitry Andric       // ```
6570b57cec5SDimitry Andric       //   jCC L1
6580b57cec5SDimitry Andric       //   jmpq *%rax
6590b57cec5SDimitry Andric       // ```
6600b57cec5SDimitry Andric       // We still want to harden the edge to `L1`.
6610b57cec5SDimitry Andric       if (X86::getCondFromBranch(MI) == X86::COND_INVALID) {
6620b57cec5SDimitry Andric         Info.CondBrs.clear();
6630b57cec5SDimitry Andric         Info.UncondBr = &MI;
6640b57cec5SDimitry Andric         continue;
6650b57cec5SDimitry Andric       }
6660b57cec5SDimitry Andric 
6670b57cec5SDimitry Andric       // We have a vanilla conditional branch, add it to our list.
6680b57cec5SDimitry Andric       Info.CondBrs.push_back(&MI);
6690b57cec5SDimitry Andric     }
6700b57cec5SDimitry Andric     if (Info.CondBrs.empty()) {
6710b57cec5SDimitry Andric       ++NumBranchesUntraced;
6720b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
6730b57cec5SDimitry Andric                  MBB.dump());
6740b57cec5SDimitry Andric       continue;
6750b57cec5SDimitry Andric     }
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric     Infos.push_back(Info);
6780b57cec5SDimitry Andric   }
6790b57cec5SDimitry Andric 
6800b57cec5SDimitry Andric   return Infos;
6810b57cec5SDimitry Andric }
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric /// Trace the predicate state through the CFG, instrumenting each conditional
6840b57cec5SDimitry Andric /// branch such that misspeculation through an edge will poison the predicate
6850b57cec5SDimitry Andric /// state.
6860b57cec5SDimitry Andric ///
6870b57cec5SDimitry Andric /// Returns the list of inserted CMov instructions so that they can have their
6880b57cec5SDimitry Andric /// uses of the predicate state rewritten into proper SSA form once it is
6890b57cec5SDimitry Andric /// complete.
6900b57cec5SDimitry Andric SmallVector<MachineInstr *, 16>
6910b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
6920b57cec5SDimitry Andric     MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) {
6930b57cec5SDimitry Andric   // Collect the inserted cmov instructions so we can rewrite their uses of the
6940b57cec5SDimitry Andric   // predicate state into SSA form.
6950b57cec5SDimitry Andric   SmallVector<MachineInstr *, 16> CMovs;
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   // Now walk all of the basic blocks looking for ones that end in conditional
6980b57cec5SDimitry Andric   // jumps where we need to update this register along each edge.
6990b57cec5SDimitry Andric   for (const BlockCondInfo &Info : Infos) {
7000b57cec5SDimitry Andric     MachineBasicBlock &MBB = *Info.MBB;
7010b57cec5SDimitry Andric     const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
7020b57cec5SDimitry Andric     MachineInstr *UncondBr = Info.UncondBr;
7030b57cec5SDimitry Andric 
7040b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
7050b57cec5SDimitry Andric                       << "\n");
7060b57cec5SDimitry Andric     ++NumCondBranchesTraced;
7070b57cec5SDimitry Andric 
7080b57cec5SDimitry Andric     // Compute the non-conditional successor as either the target of any
7090b57cec5SDimitry Andric     // unconditional branch or the layout successor.
7100b57cec5SDimitry Andric     MachineBasicBlock *UncondSucc =
7110b57cec5SDimitry Andric         UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
7120b57cec5SDimitry Andric                         ? UncondBr->getOperand(0).getMBB()
7130b57cec5SDimitry Andric                         : nullptr)
7140b57cec5SDimitry Andric                  : &*std::next(MachineFunction::iterator(&MBB));
7150b57cec5SDimitry Andric 
7160b57cec5SDimitry Andric     // Count how many edges there are to any given successor.
7170b57cec5SDimitry Andric     SmallDenseMap<MachineBasicBlock *, int> SuccCounts;
7180b57cec5SDimitry Andric     if (UncondSucc)
7190b57cec5SDimitry Andric       ++SuccCounts[UncondSucc];
7200b57cec5SDimitry Andric     for (auto *CondBr : CondBrs)
7210b57cec5SDimitry Andric       ++SuccCounts[CondBr->getOperand(0).getMBB()];
7220b57cec5SDimitry Andric 
7230b57cec5SDimitry Andric     // A lambda to insert cmov instructions into a block checking all of the
7240b57cec5SDimitry Andric     // condition codes in a sequence.
7250b57cec5SDimitry Andric     auto BuildCheckingBlockForSuccAndConds =
7260b57cec5SDimitry Andric         [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
7270b57cec5SDimitry Andric             MachineInstr *Br, MachineInstr *&UncondBr,
7280b57cec5SDimitry Andric             ArrayRef<X86::CondCode> Conds) {
7290b57cec5SDimitry Andric           // First, we split the edge to insert the checking block into a safe
7300b57cec5SDimitry Andric           // location.
7310b57cec5SDimitry Andric           auto &CheckingMBB =
7320b57cec5SDimitry Andric               (SuccCount == 1 && Succ.pred_size() == 1)
7330b57cec5SDimitry Andric                   ? Succ
7340b57cec5SDimitry Andric                   : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
7350b57cec5SDimitry Andric 
7360b57cec5SDimitry Andric           bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
7370b57cec5SDimitry Andric           if (!LiveEFLAGS)
7380b57cec5SDimitry Andric             CheckingMBB.addLiveIn(X86::EFLAGS);
7390b57cec5SDimitry Andric 
7400b57cec5SDimitry Andric           // Now insert the cmovs to implement the checks.
7410b57cec5SDimitry Andric           auto InsertPt = CheckingMBB.begin();
7420b57cec5SDimitry Andric           assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
7430b57cec5SDimitry Andric                  "Should never have a PHI in the initial checking block as it "
7440b57cec5SDimitry Andric                  "always has a single predecessor!");
7450b57cec5SDimitry Andric 
7460b57cec5SDimitry Andric           // We will wire each cmov to each other, but need to start with the
7470b57cec5SDimitry Andric           // incoming pred state.
7480b57cec5SDimitry Andric           unsigned CurStateReg = PS->InitialReg;
7490b57cec5SDimitry Andric 
7500b57cec5SDimitry Andric           for (X86::CondCode Cond : Conds) {
7510b57cec5SDimitry Andric             int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
7520b57cec5SDimitry Andric             auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
7530b57cec5SDimitry Andric 
7548bcb0991SDimitry Andric             Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
7550b57cec5SDimitry Andric             // Note that we intentionally use an empty debug location so that
7560b57cec5SDimitry Andric             // this picks up the preceding location.
7570b57cec5SDimitry Andric             auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
7580b57cec5SDimitry Andric                                  TII->get(CMovOp), UpdatedStateReg)
7590b57cec5SDimitry Andric                              .addReg(CurStateReg)
7600b57cec5SDimitry Andric                              .addReg(PS->PoisonReg)
7610b57cec5SDimitry Andric                              .addImm(Cond);
7620b57cec5SDimitry Andric             // If this is the last cmov and the EFLAGS weren't originally
7630b57cec5SDimitry Andric             // live-in, mark them as killed.
7640b57cec5SDimitry Andric             if (!LiveEFLAGS && Cond == Conds.back())
765*0fca6ea1SDimitry Andric               CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
766*0fca6ea1SDimitry Andric                   ->setIsKill(true);
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric             ++NumInstsInserted;
7690b57cec5SDimitry Andric             LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump();
7700b57cec5SDimitry Andric                        dbgs() << "\n");
7710b57cec5SDimitry Andric 
7720b57cec5SDimitry Andric             // The first one of the cmovs will be using the top level
7730b57cec5SDimitry Andric             // `PredStateReg` and need to get rewritten into SSA form.
7740b57cec5SDimitry Andric             if (CurStateReg == PS->InitialReg)
7750b57cec5SDimitry Andric               CMovs.push_back(&*CMovI);
7760b57cec5SDimitry Andric 
7770b57cec5SDimitry Andric             // The next cmov should start from this one's def.
7780b57cec5SDimitry Andric             CurStateReg = UpdatedStateReg;
7790b57cec5SDimitry Andric           }
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric           // And put the last one into the available values for SSA form of our
7820b57cec5SDimitry Andric           // predicate state.
7830b57cec5SDimitry Andric           PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
7840b57cec5SDimitry Andric         };
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric     std::vector<X86::CondCode> UncondCodeSeq;
7870b57cec5SDimitry Andric     for (auto *CondBr : CondBrs) {
7880b57cec5SDimitry Andric       MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
7890b57cec5SDimitry Andric       int &SuccCount = SuccCounts[&Succ];
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric       X86::CondCode Cond = X86::getCondFromBranch(*CondBr);
7920b57cec5SDimitry Andric       X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond);
7930b57cec5SDimitry Andric       UncondCodeSeq.push_back(Cond);
7940b57cec5SDimitry Andric 
7950b57cec5SDimitry Andric       BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
7960b57cec5SDimitry Andric                                         {InvCond});
7970b57cec5SDimitry Andric 
7980b57cec5SDimitry Andric       // Decrement the successor count now that we've split one of the edges.
7990b57cec5SDimitry Andric       // We need to keep the count of edges to the successor accurate in order
8000b57cec5SDimitry Andric       // to know above when to *replace* the successor in the CFG vs. just
8010b57cec5SDimitry Andric       // adding the new successor.
8020b57cec5SDimitry Andric       --SuccCount;
8030b57cec5SDimitry Andric     }
8040b57cec5SDimitry Andric 
8050b57cec5SDimitry Andric     // Since we may have split edges and changed the number of successors,
8060b57cec5SDimitry Andric     // normalize the probabilities. This avoids doing it each time we split an
8070b57cec5SDimitry Andric     // edge.
8080b57cec5SDimitry Andric     MBB.normalizeSuccProbs();
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric     // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
8110b57cec5SDimitry Andric     // need to intersect the other condition codes. We can do this by just
8120b57cec5SDimitry Andric     // doing a cmov for each one.
8130b57cec5SDimitry Andric     if (!UncondSucc)
8140b57cec5SDimitry Andric       // If we have no fallthrough to protect (perhaps it is an indirect jump?)
8150b57cec5SDimitry Andric       // just skip this and continue.
8160b57cec5SDimitry Andric       continue;
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric     assert(SuccCounts[UncondSucc] == 1 &&
8190b57cec5SDimitry Andric            "We should never have more than one edge to the unconditional "
8200b57cec5SDimitry Andric            "successor at this point because every other edge must have been "
8210b57cec5SDimitry Andric            "split above!");
8220b57cec5SDimitry Andric 
8230b57cec5SDimitry Andric     // Sort and unique the codes to minimize them.
8240b57cec5SDimitry Andric     llvm::sort(UncondCodeSeq);
825*0fca6ea1SDimitry Andric     UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric     // Build a checking version of the successor.
8280b57cec5SDimitry Andric     BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
8290b57cec5SDimitry Andric                                       UncondBr, UncondBr, UncondCodeSeq);
8300b57cec5SDimitry Andric   }
8310b57cec5SDimitry Andric 
8320b57cec5SDimitry Andric   return CMovs;
8330b57cec5SDimitry Andric }
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric /// Compute the register class for the unfolded load.
8360b57cec5SDimitry Andric ///
8370b57cec5SDimitry Andric /// FIXME: This should probably live in X86InstrInfo, potentially by adding
8380b57cec5SDimitry Andric /// a way to unfold into a newly created vreg rather than requiring a register
8390b57cec5SDimitry Andric /// input.
8400b57cec5SDimitry Andric static const TargetRegisterClass *
8410b57cec5SDimitry Andric getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
8420b57cec5SDimitry Andric                            unsigned Opcode) {
8430b57cec5SDimitry Andric   unsigned Index;
8440b57cec5SDimitry Andric   unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
8450b57cec5SDimitry Andric       Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
8460b57cec5SDimitry Andric   const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
8470b57cec5SDimitry Andric   return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
8480b57cec5SDimitry Andric }
8490b57cec5SDimitry Andric 
8500b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
8510b57cec5SDimitry Andric     MachineFunction &MF) {
8520b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF)
853349cc55cSDimitry Andric     // We use make_early_inc_range here so we can remove instructions if needed
854349cc55cSDimitry Andric     // without disturbing the iteration.
855349cc55cSDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
8560b57cec5SDimitry Andric       // Must either be a call or a branch.
8570b57cec5SDimitry Andric       if (!MI.isCall() && !MI.isBranch())
8580b57cec5SDimitry Andric         continue;
8590b57cec5SDimitry Andric       // We only care about loading variants of these instructions.
8600b57cec5SDimitry Andric       if (!MI.mayLoad())
8610b57cec5SDimitry Andric         continue;
8620b57cec5SDimitry Andric 
8630b57cec5SDimitry Andric       switch (MI.getOpcode()) {
8640b57cec5SDimitry Andric       default: {
8650b57cec5SDimitry Andric         LLVM_DEBUG(
8660b57cec5SDimitry Andric             dbgs() << "ERROR: Found an unexpected loading branch or call "
8670b57cec5SDimitry Andric                       "instruction:\n";
8680b57cec5SDimitry Andric             MI.dump(); dbgs() << "\n");
8690b57cec5SDimitry Andric         report_fatal_error("Unexpected loading branch or call!");
8700b57cec5SDimitry Andric       }
8710b57cec5SDimitry Andric 
8720b57cec5SDimitry Andric       case X86::FARCALL16m:
8730b57cec5SDimitry Andric       case X86::FARCALL32m:
8745ffd83dbSDimitry Andric       case X86::FARCALL64m:
8750b57cec5SDimitry Andric       case X86::FARJMP16m:
8760b57cec5SDimitry Andric       case X86::FARJMP32m:
8775ffd83dbSDimitry Andric       case X86::FARJMP64m:
8780b57cec5SDimitry Andric         // We cannot mitigate far jumps or calls, but we also don't expect them
8790b57cec5SDimitry Andric         // to be vulnerable to Spectre v1.2 style attacks.
8800b57cec5SDimitry Andric         continue;
8810b57cec5SDimitry Andric 
8820b57cec5SDimitry Andric       case X86::CALL16m:
8830b57cec5SDimitry Andric       case X86::CALL16m_NT:
8840b57cec5SDimitry Andric       case X86::CALL32m:
8850b57cec5SDimitry Andric       case X86::CALL32m_NT:
8860b57cec5SDimitry Andric       case X86::CALL64m:
8870b57cec5SDimitry Andric       case X86::CALL64m_NT:
8880b57cec5SDimitry Andric       case X86::JMP16m:
8890b57cec5SDimitry Andric       case X86::JMP16m_NT:
8900b57cec5SDimitry Andric       case X86::JMP32m:
8910b57cec5SDimitry Andric       case X86::JMP32m_NT:
8920b57cec5SDimitry Andric       case X86::JMP64m:
8930b57cec5SDimitry Andric       case X86::JMP64m_NT:
8940b57cec5SDimitry Andric       case X86::TAILJMPm64:
8950b57cec5SDimitry Andric       case X86::TAILJMPm64_REX:
8960b57cec5SDimitry Andric       case X86::TAILJMPm:
8970b57cec5SDimitry Andric       case X86::TCRETURNmi64:
8980b57cec5SDimitry Andric       case X86::TCRETURNmi: {
8990b57cec5SDimitry Andric         // Use the generic unfold logic now that we know we're dealing with
9000b57cec5SDimitry Andric         // expected instructions.
9010b57cec5SDimitry Andric         // FIXME: We don't have test coverage for all of these!
9020b57cec5SDimitry Andric         auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
9030b57cec5SDimitry Andric         if (!UnfoldedRC) {
9040b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
9050b57cec5SDimitry Andric                          << "ERROR: Unable to unfold load from instruction:\n";
9060b57cec5SDimitry Andric                      MI.dump(); dbgs() << "\n");
9070b57cec5SDimitry Andric           report_fatal_error("Unable to unfold load!");
9080b57cec5SDimitry Andric         }
9098bcb0991SDimitry Andric         Register Reg = MRI->createVirtualRegister(UnfoldedRC);
9100b57cec5SDimitry Andric         SmallVector<MachineInstr *, 2> NewMIs;
9110b57cec5SDimitry Andric         // If we were able to compute an unfolded reg class, any failure here
9120b57cec5SDimitry Andric         // is just a programming error so just assert.
9130b57cec5SDimitry Andric         bool Unfolded =
9140b57cec5SDimitry Andric             TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
9150b57cec5SDimitry Andric                                      /*UnfoldStore*/ false, NewMIs);
9160b57cec5SDimitry Andric         (void)Unfolded;
9170b57cec5SDimitry Andric         assert(Unfolded &&
9180b57cec5SDimitry Andric                "Computed unfolded register class but failed to unfold");
9190b57cec5SDimitry Andric         // Now stitch the new instructions into place and erase the old one.
9200b57cec5SDimitry Andric         for (auto *NewMI : NewMIs)
9210b57cec5SDimitry Andric           MBB.insert(MI.getIterator(), NewMI);
9225ffd83dbSDimitry Andric 
9235ffd83dbSDimitry Andric         // Update the call site info.
9245ffd83dbSDimitry Andric         if (MI.isCandidateForCallSiteEntry())
9255ffd83dbSDimitry Andric           MF.eraseCallSiteInfo(&MI);
9265ffd83dbSDimitry Andric 
9270b57cec5SDimitry Andric         MI.eraseFromParent();
9280b57cec5SDimitry Andric         LLVM_DEBUG({
9290b57cec5SDimitry Andric           dbgs() << "Unfolded load successfully into:\n";
9300b57cec5SDimitry Andric           for (auto *NewMI : NewMIs) {
9310b57cec5SDimitry Andric             NewMI->dump();
9320b57cec5SDimitry Andric             dbgs() << "\n";
9330b57cec5SDimitry Andric           }
9340b57cec5SDimitry Andric         });
9350b57cec5SDimitry Andric         continue;
9360b57cec5SDimitry Andric       }
9370b57cec5SDimitry Andric       }
9380b57cec5SDimitry Andric       llvm_unreachable("Escaped switch with default!");
9390b57cec5SDimitry Andric     }
9400b57cec5SDimitry Andric }
9410b57cec5SDimitry Andric 
9420b57cec5SDimitry Andric /// Trace the predicate state through indirect branches, instrumenting them to
9430b57cec5SDimitry Andric /// poison the state if a target is reached that does not match the expected
9440b57cec5SDimitry Andric /// target.
9450b57cec5SDimitry Andric ///
9460b57cec5SDimitry Andric /// This is designed to mitigate Spectre variant 1 attacks where an indirect
9470b57cec5SDimitry Andric /// branch is trained to predict a particular target and then mispredicts that
9480b57cec5SDimitry Andric /// target in a way that can leak data. Despite using an indirect branch, this
9490b57cec5SDimitry Andric /// is really a variant 1 style attack: it does not steer execution to an
9500b57cec5SDimitry Andric /// arbitrary or attacker controlled address, and it does not require any
9510b57cec5SDimitry Andric /// special code executing next to the victim. This attack can also be mitigated
9520b57cec5SDimitry Andric /// through retpolines, but those require either replacing indirect branches
9530b57cec5SDimitry Andric /// with conditional direct branches or lowering them through a device that
9540b57cec5SDimitry Andric /// blocks speculation. This mitigation can replace these retpoline-style
9550b57cec5SDimitry Andric /// mitigations for jump tables and other indirect branches within a function
9560b57cec5SDimitry Andric /// when variant 2 isn't a risk while allowing limited speculation. Indirect
9570b57cec5SDimitry Andric /// calls, however, cannot be mitigated through this technique without changing
9580b57cec5SDimitry Andric /// the ABI in a fundamental way.
9590b57cec5SDimitry Andric SmallVector<MachineInstr *, 16>
9600b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
9610b57cec5SDimitry Andric     MachineFunction &MF) {
9620b57cec5SDimitry Andric   // We use the SSAUpdater to insert PHI nodes for the target addresses of
9630b57cec5SDimitry Andric   // indirect branches. We don't actually need the full power of the SSA updater
9640b57cec5SDimitry Andric   // in this particular case as we always have immediately available values, but
9650b57cec5SDimitry Andric   // this avoids us having to re-implement the PHI construction logic.
9660b57cec5SDimitry Andric   MachineSSAUpdater TargetAddrSSA(MF);
9670b57cec5SDimitry Andric   TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
9680b57cec5SDimitry Andric 
9690b57cec5SDimitry Andric   // Track which blocks were terminated with an indirect branch.
9700b57cec5SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
9710b57cec5SDimitry Andric 
9720b57cec5SDimitry Andric   // We need to know what blocks end up reached via indirect branches. We
9730b57cec5SDimitry Andric   // expect this to be a subset of those whose address is taken and so track it
9740b57cec5SDimitry Andric   // directly via the CFG.
9750b57cec5SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
9760b57cec5SDimitry Andric 
9770b57cec5SDimitry Andric   // Walk all the blocks which end in an indirect branch and make the
9780b57cec5SDimitry Andric   // target address available.
9790b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
9800b57cec5SDimitry Andric     // Find the last terminator.
9810b57cec5SDimitry Andric     auto MII = MBB.instr_rbegin();
9820b57cec5SDimitry Andric     while (MII != MBB.instr_rend() && MII->isDebugInstr())
9830b57cec5SDimitry Andric       ++MII;
9840b57cec5SDimitry Andric     if (MII == MBB.instr_rend())
9850b57cec5SDimitry Andric       continue;
9860b57cec5SDimitry Andric     MachineInstr &TI = *MII;
9870b57cec5SDimitry Andric     if (!TI.isTerminator() || !TI.isBranch())
9880b57cec5SDimitry Andric       // No terminator or non-branch terminator.
9890b57cec5SDimitry Andric       continue;
9900b57cec5SDimitry Andric 
9910b57cec5SDimitry Andric     unsigned TargetReg;
9920b57cec5SDimitry Andric 
9930b57cec5SDimitry Andric     switch (TI.getOpcode()) {
9940b57cec5SDimitry Andric     default:
9950b57cec5SDimitry Andric       // Direct branch or conditional branch (leading to fallthrough).
9960b57cec5SDimitry Andric       continue;
9970b57cec5SDimitry Andric 
9980b57cec5SDimitry Andric     case X86::FARJMP16m:
9990b57cec5SDimitry Andric     case X86::FARJMP32m:
10005ffd83dbSDimitry Andric     case X86::FARJMP64m:
10010b57cec5SDimitry Andric       // We cannot mitigate far jumps or calls, but we also don't expect them
10020b57cec5SDimitry Andric       // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
10030b57cec5SDimitry Andric       continue;
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric     case X86::JMP16m:
10060b57cec5SDimitry Andric     case X86::JMP16m_NT:
10070b57cec5SDimitry Andric     case X86::JMP32m:
10080b57cec5SDimitry Andric     case X86::JMP32m_NT:
10090b57cec5SDimitry Andric     case X86::JMP64m:
10100b57cec5SDimitry Andric     case X86::JMP64m_NT:
10110b57cec5SDimitry Andric       // Mostly as documentation.
10120b57cec5SDimitry Andric       report_fatal_error("Memory operand jumps should have been unfolded!");
10130b57cec5SDimitry Andric 
10140b57cec5SDimitry Andric     case X86::JMP16r:
10150b57cec5SDimitry Andric       report_fatal_error(
10160b57cec5SDimitry Andric           "Support for 16-bit indirect branches is not implemented.");
10170b57cec5SDimitry Andric     case X86::JMP32r:
10180b57cec5SDimitry Andric       report_fatal_error(
10190b57cec5SDimitry Andric           "Support for 32-bit indirect branches is not implemented.");
10200b57cec5SDimitry Andric 
10210b57cec5SDimitry Andric     case X86::JMP64r:
10220b57cec5SDimitry Andric       TargetReg = TI.getOperand(0).getReg();
10230b57cec5SDimitry Andric     }
10240b57cec5SDimitry Andric 
10250b57cec5SDimitry Andric     // We have definitely found an indirect  branch. Verify that there are no
10260b57cec5SDimitry Andric     // preceding conditional branches as we don't yet support that.
10270b57cec5SDimitry Andric     if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
10280b57cec5SDimitry Andric           return !OtherTI.isDebugInstr() && &OtherTI != &TI;
10290b57cec5SDimitry Andric         })) {
10300b57cec5SDimitry Andric       LLVM_DEBUG({
10310b57cec5SDimitry Andric         dbgs() << "ERROR: Found other terminators in a block with an indirect "
10320b57cec5SDimitry Andric                   "branch! This is not yet supported! Terminator sequence:\n";
10330b57cec5SDimitry Andric         for (MachineInstr &MI : MBB.terminators()) {
10340b57cec5SDimitry Andric           MI.dump();
10350b57cec5SDimitry Andric           dbgs() << '\n';
10360b57cec5SDimitry Andric         }
10370b57cec5SDimitry Andric       });
10380b57cec5SDimitry Andric       report_fatal_error("Unimplemented terminator sequence!");
10390b57cec5SDimitry Andric     }
10400b57cec5SDimitry Andric 
10410b57cec5SDimitry Andric     // Make the target register an available value for this block.
10420b57cec5SDimitry Andric     TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
10430b57cec5SDimitry Andric     IndirectTerminatedMBBs.insert(&MBB);
10440b57cec5SDimitry Andric 
10450b57cec5SDimitry Andric     // Add all the successors to our target candidates.
10460b57cec5SDimitry Andric     for (MachineBasicBlock *Succ : MBB.successors())
10470b57cec5SDimitry Andric       IndirectTargetMBBs.insert(Succ);
10480b57cec5SDimitry Andric   }
10490b57cec5SDimitry Andric 
10500b57cec5SDimitry Andric   // Keep track of the cmov instructions we insert so we can return them.
10510b57cec5SDimitry Andric   SmallVector<MachineInstr *, 16> CMovs;
10520b57cec5SDimitry Andric 
10530b57cec5SDimitry Andric   // If we didn't find any indirect branches with targets, nothing to do here.
10540b57cec5SDimitry Andric   if (IndirectTargetMBBs.empty())
10550b57cec5SDimitry Andric     return CMovs;
10560b57cec5SDimitry Andric 
10570b57cec5SDimitry Andric   // We found indirect branches and targets that need to be instrumented to
10580b57cec5SDimitry Andric   // harden loads within them. Walk the blocks of the function (to get a stable
10590b57cec5SDimitry Andric   // ordering) and instrument each target of an indirect branch.
10600b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
10610b57cec5SDimitry Andric     // Skip the blocks that aren't candidate targets.
10620b57cec5SDimitry Andric     if (!IndirectTargetMBBs.count(&MBB))
10630b57cec5SDimitry Andric       continue;
10640b57cec5SDimitry Andric 
10650b57cec5SDimitry Andric     // We don't expect EH pads to ever be reached via an indirect branch. If
10660b57cec5SDimitry Andric     // this is desired for some reason, we could simply skip them here rather
10670b57cec5SDimitry Andric     // than asserting.
10680b57cec5SDimitry Andric     assert(!MBB.isEHPad() &&
10690b57cec5SDimitry Andric            "Unexpected EH pad as target of an indirect branch!");
10700b57cec5SDimitry Andric 
10710b57cec5SDimitry Andric     // We should never end up threading EFLAGS into a block to harden
10720b57cec5SDimitry Andric     // conditional jumps as there would be an additional successor via the
10730b57cec5SDimitry Andric     // indirect branch. As a consequence, all such edges would be split before
10740b57cec5SDimitry Andric     // reaching here, and the inserted block will handle the EFLAGS-based
10750b57cec5SDimitry Andric     // hardening.
10760b57cec5SDimitry Andric     assert(!MBB.isLiveIn(X86::EFLAGS) &&
10770b57cec5SDimitry Andric            "Cannot check within a block that already has live-in EFLAGS!");
10780b57cec5SDimitry Andric 
10790b57cec5SDimitry Andric     // We can't handle having non-indirect edges into this block unless this is
10800b57cec5SDimitry Andric     // the only successor and we can synthesize the necessary target address.
10810b57cec5SDimitry Andric     for (MachineBasicBlock *Pred : MBB.predecessors()) {
10820b57cec5SDimitry Andric       // If we've already handled this by extracting the target directly,
10830b57cec5SDimitry Andric       // nothing to do.
10840b57cec5SDimitry Andric       if (IndirectTerminatedMBBs.count(Pred))
10850b57cec5SDimitry Andric         continue;
10860b57cec5SDimitry Andric 
10870b57cec5SDimitry Andric       // Otherwise, we have to be the only successor. We generally expect this
10880b57cec5SDimitry Andric       // to be true as conditional branches should have had a critical edge
10890b57cec5SDimitry Andric       // split already. We don't however need to worry about EH pad successors
10900b57cec5SDimitry Andric       // as they'll happily ignore the target and their hardening strategy is
10910b57cec5SDimitry Andric       // resilient to all ways in which they could be reached speculatively.
10920b57cec5SDimitry Andric       if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
10930b57cec5SDimitry Andric             return Succ->isEHPad() || Succ == &MBB;
10940b57cec5SDimitry Andric           })) {
10950b57cec5SDimitry Andric         LLVM_DEBUG({
10960b57cec5SDimitry Andric           dbgs() << "ERROR: Found conditional entry to target of indirect "
10970b57cec5SDimitry Andric                     "branch!\n";
10980b57cec5SDimitry Andric           Pred->dump();
10990b57cec5SDimitry Andric           MBB.dump();
11000b57cec5SDimitry Andric         });
11010b57cec5SDimitry Andric         report_fatal_error("Cannot harden a conditional entry to a target of "
11020b57cec5SDimitry Andric                            "an indirect branch!");
11030b57cec5SDimitry Andric       }
11040b57cec5SDimitry Andric 
11050b57cec5SDimitry Andric       // Now we need to compute the address of this block and install it as a
11060b57cec5SDimitry Andric       // synthetic target in the predecessor. We do this at the bottom of the
11070b57cec5SDimitry Andric       // predecessor.
11080b57cec5SDimitry Andric       auto InsertPt = Pred->getFirstTerminator();
11098bcb0991SDimitry Andric       Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
11100b57cec5SDimitry Andric       if (MF.getTarget().getCodeModel() == CodeModel::Small &&
11110b57cec5SDimitry Andric           !Subtarget->isPositionIndependent()) {
11120b57cec5SDimitry Andric         // Directly materialize it into an immediate.
11130b57cec5SDimitry Andric         auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
11140b57cec5SDimitry Andric                              TII->get(X86::MOV64ri32), TargetReg)
11150b57cec5SDimitry Andric                          .addMBB(&MBB);
11160b57cec5SDimitry Andric         ++NumInstsInserted;
11170b57cec5SDimitry Andric         (void)AddrI;
11180b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Inserting mov: "; AddrI->dump();
11190b57cec5SDimitry Andric                    dbgs() << "\n");
11200b57cec5SDimitry Andric       } else {
11210b57cec5SDimitry Andric         auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
11220b57cec5SDimitry Andric                              TargetReg)
11230b57cec5SDimitry Andric                          .addReg(/*Base*/ X86::RIP)
11240b57cec5SDimitry Andric                          .addImm(/*Scale*/ 1)
11250b57cec5SDimitry Andric                          .addReg(/*Index*/ 0)
11260b57cec5SDimitry Andric                          .addMBB(&MBB)
11270b57cec5SDimitry Andric                          .addReg(/*Segment*/ 0);
11280b57cec5SDimitry Andric         ++NumInstsInserted;
11290b57cec5SDimitry Andric         (void)AddrI;
11300b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Inserting lea: "; AddrI->dump();
11310b57cec5SDimitry Andric                    dbgs() << "\n");
11320b57cec5SDimitry Andric       }
11330b57cec5SDimitry Andric       // And make this available.
11340b57cec5SDimitry Andric       TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
11350b57cec5SDimitry Andric     }
11360b57cec5SDimitry Andric 
11370b57cec5SDimitry Andric     // Materialize the needed SSA value of the target. Note that we need the
11380b57cec5SDimitry Andric     // middle of the block as this block might at the bottom have an indirect
11390b57cec5SDimitry Andric     // branch back to itself. We can do this here because at this point, every
11400b57cec5SDimitry Andric     // predecessor of this block has an available value. This is basically just
11410b57cec5SDimitry Andric     // automating the construction of a PHI node for this target.
114204eeddc0SDimitry Andric     Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
11430b57cec5SDimitry Andric 
11440b57cec5SDimitry Andric     // Insert a comparison of the incoming target register with this block's
11450b57cec5SDimitry Andric     // address. This also requires us to mark the block as having its address
11460b57cec5SDimitry Andric     // taken explicitly.
1147bdd1243dSDimitry Andric     MBB.setMachineBlockAddressTaken();
11480b57cec5SDimitry Andric     auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
11490b57cec5SDimitry Andric     if (MF.getTarget().getCodeModel() == CodeModel::Small &&
11500b57cec5SDimitry Andric         !Subtarget->isPositionIndependent()) {
11510b57cec5SDimitry Andric       // Check directly against a relocated immediate when we can.
11520b57cec5SDimitry Andric       auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
11530b57cec5SDimitry Andric                         .addReg(TargetReg, RegState::Kill)
11540b57cec5SDimitry Andric                         .addMBB(&MBB);
11550b57cec5SDimitry Andric       ++NumInstsInserted;
11560b57cec5SDimitry Andric       (void)CheckI;
11570b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
11580b57cec5SDimitry Andric     } else {
11590b57cec5SDimitry Andric       // Otherwise compute the address into a register first.
11608bcb0991SDimitry Andric       Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
11610b57cec5SDimitry Andric       auto AddrI =
11620b57cec5SDimitry Andric           BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
11630b57cec5SDimitry Andric               .addReg(/*Base*/ X86::RIP)
11640b57cec5SDimitry Andric               .addImm(/*Scale*/ 1)
11650b57cec5SDimitry Andric               .addReg(/*Index*/ 0)
11660b57cec5SDimitry Andric               .addMBB(&MBB)
11670b57cec5SDimitry Andric               .addReg(/*Segment*/ 0);
11680b57cec5SDimitry Andric       ++NumInstsInserted;
11690b57cec5SDimitry Andric       (void)AddrI;
11700b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting lea: "; AddrI->dump(); dbgs() << "\n");
11710b57cec5SDimitry Andric       auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
11720b57cec5SDimitry Andric                         .addReg(TargetReg, RegState::Kill)
11730b57cec5SDimitry Andric                         .addReg(AddrReg, RegState::Kill);
11740b57cec5SDimitry Andric       ++NumInstsInserted;
11750b57cec5SDimitry Andric       (void)CheckI;
11760b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
11770b57cec5SDimitry Andric     }
11780b57cec5SDimitry Andric 
11790b57cec5SDimitry Andric     // Now cmov over the predicate if the comparison wasn't equal.
11800b57cec5SDimitry Andric     int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
11810b57cec5SDimitry Andric     auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
11828bcb0991SDimitry Andric     Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
11830b57cec5SDimitry Andric     auto CMovI =
11840b57cec5SDimitry Andric         BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
11850b57cec5SDimitry Andric             .addReg(PS->InitialReg)
11860b57cec5SDimitry Andric             .addReg(PS->PoisonReg)
11870b57cec5SDimitry Andric             .addImm(X86::COND_NE);
1188*0fca6ea1SDimitry Andric     CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1189*0fca6ea1SDimitry Andric         ->setIsKill(true);
11900b57cec5SDimitry Andric     ++NumInstsInserted;
11910b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
11920b57cec5SDimitry Andric     CMovs.push_back(&*CMovI);
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric     // And put the new value into the available values for SSA form of our
11950b57cec5SDimitry Andric     // predicate state.
11960b57cec5SDimitry Andric     PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
11970b57cec5SDimitry Andric   }
11980b57cec5SDimitry Andric 
11990b57cec5SDimitry Andric   // Return all the newly inserted cmov instructions of the predicate state.
12000b57cec5SDimitry Andric   return CMovs;
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric 
12035ffd83dbSDimitry Andric // Returns true if the MI has EFLAGS as a register def operand and it's live,
12045ffd83dbSDimitry Andric // otherwise it returns false
12055ffd83dbSDimitry Andric static bool isEFLAGSDefLive(const MachineInstr &MI) {
1206*0fca6ea1SDimitry Andric   if (const MachineOperand *DefOp =
1207*0fca6ea1SDimitry Andric           MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
12085ffd83dbSDimitry Andric     return !DefOp->isDead();
12095ffd83dbSDimitry Andric   }
12100b57cec5SDimitry Andric   return false;
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric 
12130b57cec5SDimitry Andric static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
12140b57cec5SDimitry Andric                          const TargetRegisterInfo &TRI) {
12150b57cec5SDimitry Andric   // Check if EFLAGS are alive by seeing if there is a def of them or they
12160b57cec5SDimitry Andric   // live-in, and then seeing if that def is in turn used.
12170b57cec5SDimitry Andric   for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) {
1218*0fca6ea1SDimitry Andric     if (MachineOperand *DefOp =
1219*0fca6ea1SDimitry Andric             MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
12200b57cec5SDimitry Andric       // If the def is dead, then EFLAGS is not live.
12210b57cec5SDimitry Andric       if (DefOp->isDead())
12220b57cec5SDimitry Andric         return false;
12230b57cec5SDimitry Andric 
12240b57cec5SDimitry Andric       // Otherwise we've def'ed it, and it is live.
12250b57cec5SDimitry Andric       return true;
12260b57cec5SDimitry Andric     }
12270b57cec5SDimitry Andric     // While at this instruction, also check if we use and kill EFLAGS
12280b57cec5SDimitry Andric     // which means it isn't live.
12290b57cec5SDimitry Andric     if (MI.killsRegister(X86::EFLAGS, &TRI))
12300b57cec5SDimitry Andric       return false;
12310b57cec5SDimitry Andric   }
12320b57cec5SDimitry Andric 
12330b57cec5SDimitry Andric   // If we didn't find anything conclusive (neither definitely alive or
12340b57cec5SDimitry Andric   // definitely dead) return whether it lives into the block.
12350b57cec5SDimitry Andric   return MBB.isLiveIn(X86::EFLAGS);
12360b57cec5SDimitry Andric }
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric /// Trace the predicate state through each of the blocks in the function,
12390b57cec5SDimitry Andric /// hardening everything necessary along the way.
12400b57cec5SDimitry Andric ///
12410b57cec5SDimitry Andric /// We call this routine once the initial predicate state has been established
12420b57cec5SDimitry Andric /// for each basic block in the function in the SSA updater. This routine traces
12430b57cec5SDimitry Andric /// it through the instructions within each basic block, and for non-returning
12440b57cec5SDimitry Andric /// blocks informs the SSA updater about the final state that lives out of the
12450b57cec5SDimitry Andric /// block. Along the way, it hardens any vulnerable instruction using the
12460b57cec5SDimitry Andric /// currently valid predicate state. We have to do these two things together
12470b57cec5SDimitry Andric /// because the SSA updater only works across blocks. Within a block, we track
12480b57cec5SDimitry Andric /// the current predicate state directly and update it as it changes.
12490b57cec5SDimitry Andric ///
12500b57cec5SDimitry Andric /// This operates in two passes over each block. First, we analyze the loads in
12510b57cec5SDimitry Andric /// the block to determine which strategy will be used to harden them: hardening
12520b57cec5SDimitry Andric /// the address or hardening the loaded value when loaded into a register
12530b57cec5SDimitry Andric /// amenable to hardening. We have to process these first because the two
12540b57cec5SDimitry Andric /// strategies may interact -- later hardening may change what strategy we wish
12550b57cec5SDimitry Andric /// to use. We also will analyze data dependencies between loads and avoid
12560b57cec5SDimitry Andric /// hardening those loads that are data dependent on a load with a hardened
12570b57cec5SDimitry Andric /// address. We also skip hardening loads already behind an LFENCE as that is
12580b57cec5SDimitry Andric /// sufficient to harden them against misspeculation.
12590b57cec5SDimitry Andric ///
12600b57cec5SDimitry Andric /// Second, we actively trace the predicate state through the block, applying
12610b57cec5SDimitry Andric /// the hardening steps we determined necessary in the first pass as we go.
12620b57cec5SDimitry Andric ///
12630b57cec5SDimitry Andric /// These two passes are applied to each basic block. We operate one block at a
12640b57cec5SDimitry Andric /// time to simplify reasoning about reachability and sequencing.
12650b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
12660b57cec5SDimitry Andric     MachineFunction &MF) {
12670b57cec5SDimitry Andric   SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
12680b57cec5SDimitry Andric   SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
12690b57cec5SDimitry Andric 
12700b57cec5SDimitry Andric   SmallSet<unsigned, 16> HardenedAddrRegs;
12710b57cec5SDimitry Andric 
12720b57cec5SDimitry Andric   SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
12730b57cec5SDimitry Andric 
12740b57cec5SDimitry Andric   // Track the set of load-dependent registers through the basic block. Because
12750b57cec5SDimitry Andric   // the values of these registers have an existing data dependency on a loaded
12760b57cec5SDimitry Andric   // value which we would have checked, we can omit any checks on them.
12770b57cec5SDimitry Andric   SparseBitVector<> LoadDepRegs;
12780b57cec5SDimitry Andric 
12790b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
12800b57cec5SDimitry Andric     // The first pass over the block: collect all the loads which can have their
12810b57cec5SDimitry Andric     // loaded value hardened and all the loads that instead need their address
12820b57cec5SDimitry Andric     // hardened. During this walk we propagate load dependence for address
12830b57cec5SDimitry Andric     // hardened loads and also look for LFENCE to stop hardening wherever
12840b57cec5SDimitry Andric     // possible. When deciding whether or not to harden the loaded value or not,
12850b57cec5SDimitry Andric     // we check to see if any registers used in the address will have been
12860b57cec5SDimitry Andric     // hardened at this point and if so, harden any remaining address registers
12870b57cec5SDimitry Andric     // as that often successfully re-uses hardened addresses and minimizes
12880b57cec5SDimitry Andric     // instructions.
12890b57cec5SDimitry Andric     //
12900b57cec5SDimitry Andric     // FIXME: We should consider an aggressive mode where we continue to keep as
12910b57cec5SDimitry Andric     // many loads value hardened even when some address register hardening would
12920b57cec5SDimitry Andric     // be free (due to reuse).
12930b57cec5SDimitry Andric     //
12940b57cec5SDimitry Andric     // Note that we only need this pass if we are actually hardening loads.
12950b57cec5SDimitry Andric     if (HardenLoads)
12960b57cec5SDimitry Andric       for (MachineInstr &MI : MBB) {
12970b57cec5SDimitry Andric         // We naively assume that all def'ed registers of an instruction have
12980b57cec5SDimitry Andric         // a data dependency on all of their operands.
12990b57cec5SDimitry Andric         // FIXME: Do a more careful analysis of x86 to build a conservative
13000b57cec5SDimitry Andric         // model here.
13010b57cec5SDimitry Andric         if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
13020b57cec5SDimitry Andric               return Op.isReg() && LoadDepRegs.test(Op.getReg());
13030b57cec5SDimitry Andric             }))
13040b57cec5SDimitry Andric           for (MachineOperand &Def : MI.defs())
13050b57cec5SDimitry Andric             if (Def.isReg())
13060b57cec5SDimitry Andric               LoadDepRegs.set(Def.getReg());
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric         // Both Intel and AMD are guiding that they will change the semantics of
13090b57cec5SDimitry Andric         // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
13100b57cec5SDimitry Andric         // no more need to guard things in this block.
13110b57cec5SDimitry Andric         if (MI.getOpcode() == X86::LFENCE)
13120b57cec5SDimitry Andric           break;
13130b57cec5SDimitry Andric 
13140b57cec5SDimitry Andric         // If this instruction cannot load, nothing to do.
13150b57cec5SDimitry Andric         if (!MI.mayLoad())
13160b57cec5SDimitry Andric           continue;
13170b57cec5SDimitry Andric 
13180b57cec5SDimitry Andric         // Some instructions which "load" are trivially safe or unimportant.
13190b57cec5SDimitry Andric         if (MI.getOpcode() == X86::MFENCE)
13200b57cec5SDimitry Andric           continue;
13210b57cec5SDimitry Andric 
13220b57cec5SDimitry Andric         // Extract the memory operand information about this instruction.
13237a6dacacSDimitry Andric         const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
13240b57cec5SDimitry Andric         if (MemRefBeginIdx < 0) {
13250b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
13260b57cec5SDimitry Andric                          << "WARNING: unable to harden loading instruction: ";
13270b57cec5SDimitry Andric                      MI.dump());
13280b57cec5SDimitry Andric           continue;
13290b57cec5SDimitry Andric         }
13300b57cec5SDimitry Andric 
13310b57cec5SDimitry Andric         MachineOperand &BaseMO =
13320b57cec5SDimitry Andric             MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
13330b57cec5SDimitry Andric         MachineOperand &IndexMO =
13340b57cec5SDimitry Andric             MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
13350b57cec5SDimitry Andric 
13360b57cec5SDimitry Andric         // If we have at least one (non-frame-index, non-RIP) register operand,
13370b57cec5SDimitry Andric         // and neither operand is load-dependent, we need to check the load.
13380b57cec5SDimitry Andric         unsigned BaseReg = 0, IndexReg = 0;
13390b57cec5SDimitry Andric         if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
13400b57cec5SDimitry Andric             BaseMO.getReg() != X86::NoRegister)
13410b57cec5SDimitry Andric           BaseReg = BaseMO.getReg();
13420b57cec5SDimitry Andric         if (IndexMO.getReg() != X86::NoRegister)
13430b57cec5SDimitry Andric           IndexReg = IndexMO.getReg();
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric         if (!BaseReg && !IndexReg)
13460b57cec5SDimitry Andric           // No register operands!
13470b57cec5SDimitry Andric           continue;
13480b57cec5SDimitry Andric 
13490b57cec5SDimitry Andric         // If any register operand is dependent, this load is dependent and we
13500b57cec5SDimitry Andric         // needn't check it.
13510b57cec5SDimitry Andric         // FIXME: Is this true in the case where we are hardening loads after
13520b57cec5SDimitry Andric         // they complete? Unclear, need to investigate.
13530b57cec5SDimitry Andric         if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
13540b57cec5SDimitry Andric             (IndexReg && LoadDepRegs.test(IndexReg)))
13550b57cec5SDimitry Andric           continue;
13560b57cec5SDimitry Andric 
13570b57cec5SDimitry Andric         // If post-load hardening is enabled, this load is compatible with
13580b57cec5SDimitry Andric         // post-load hardening, and we aren't already going to harden one of the
13590b57cec5SDimitry Andric         // address registers, queue it up to be hardened post-load. Notably,
13600b57cec5SDimitry Andric         // even once hardened this won't introduce a useful dependency that
13610b57cec5SDimitry Andric         // could prune out subsequent loads.
13625ffd83dbSDimitry Andric         if (EnablePostLoadHardening && X86InstrInfo::isDataInvariantLoad(MI) &&
13635ffd83dbSDimitry Andric             !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
13645ffd83dbSDimitry Andric             MI.getOperand(0).isReg() &&
13650b57cec5SDimitry Andric             canHardenRegister(MI.getOperand(0).getReg()) &&
13660b57cec5SDimitry Andric             !HardenedAddrRegs.count(BaseReg) &&
13670b57cec5SDimitry Andric             !HardenedAddrRegs.count(IndexReg)) {
13680b57cec5SDimitry Andric           HardenPostLoad.insert(&MI);
13690b57cec5SDimitry Andric           HardenedAddrRegs.insert(MI.getOperand(0).getReg());
13700b57cec5SDimitry Andric           continue;
13710b57cec5SDimitry Andric         }
13720b57cec5SDimitry Andric 
13730b57cec5SDimitry Andric         // Record this instruction for address hardening and record its register
13740b57cec5SDimitry Andric         // operands as being address-hardened.
13750b57cec5SDimitry Andric         HardenLoadAddr.insert(&MI);
13760b57cec5SDimitry Andric         if (BaseReg)
13770b57cec5SDimitry Andric           HardenedAddrRegs.insert(BaseReg);
13780b57cec5SDimitry Andric         if (IndexReg)
13790b57cec5SDimitry Andric           HardenedAddrRegs.insert(IndexReg);
13800b57cec5SDimitry Andric 
13810b57cec5SDimitry Andric         for (MachineOperand &Def : MI.defs())
13820b57cec5SDimitry Andric           if (Def.isReg())
13830b57cec5SDimitry Andric             LoadDepRegs.set(Def.getReg());
13840b57cec5SDimitry Andric       }
13850b57cec5SDimitry Andric 
13860b57cec5SDimitry Andric     // Now re-walk the instructions in the basic block, and apply whichever
13870b57cec5SDimitry Andric     // hardening strategy we have elected. Note that we do this in a second
13880b57cec5SDimitry Andric     // pass specifically so that we have the complete set of instructions for
13890b57cec5SDimitry Andric     // which we will do post-load hardening and can defer it in certain
13900b57cec5SDimitry Andric     // circumstances.
13910b57cec5SDimitry Andric     for (MachineInstr &MI : MBB) {
13920b57cec5SDimitry Andric       if (HardenLoads) {
13930b57cec5SDimitry Andric         // We cannot both require hardening the def of a load and its address.
13940b57cec5SDimitry Andric         assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
13950b57cec5SDimitry Andric                "Requested to harden both the address and def of a load!");
13960b57cec5SDimitry Andric 
13970b57cec5SDimitry Andric         // Check if this is a load whose address needs to be hardened.
13980b57cec5SDimitry Andric         if (HardenLoadAddr.erase(&MI)) {
13997a6dacacSDimitry Andric           const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
14000b57cec5SDimitry Andric           assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
14010b57cec5SDimitry Andric 
14020b57cec5SDimitry Andric           MachineOperand &BaseMO =
14030b57cec5SDimitry Andric               MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
14040b57cec5SDimitry Andric           MachineOperand &IndexMO =
14050b57cec5SDimitry Andric               MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
14060b57cec5SDimitry Andric           hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
14070b57cec5SDimitry Andric           continue;
14080b57cec5SDimitry Andric         }
14090b57cec5SDimitry Andric 
14100b57cec5SDimitry Andric         // Test if this instruction is one of our post load instructions (and
14110b57cec5SDimitry Andric         // remove it from the set if so).
14120b57cec5SDimitry Andric         if (HardenPostLoad.erase(&MI)) {
14130b57cec5SDimitry Andric           assert(!MI.isCall() && "Must not try to post-load harden a call!");
14140b57cec5SDimitry Andric 
14155ffd83dbSDimitry Andric           // If this is a data-invariant load and there is no EFLAGS
14165ffd83dbSDimitry Andric           // interference, we want to try and sink any hardening as far as
14175ffd83dbSDimitry Andric           // possible.
14185ffd83dbSDimitry Andric           if (X86InstrInfo::isDataInvariantLoad(MI) && !isEFLAGSDefLive(MI)) {
14190b57cec5SDimitry Andric             // Sink the instruction we'll need to harden as far as we can down
14200b57cec5SDimitry Andric             // the graph.
14210b57cec5SDimitry Andric             MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
14220b57cec5SDimitry Andric 
14230b57cec5SDimitry Andric             // If we managed to sink this instruction, update everything so we
14240b57cec5SDimitry Andric             // harden that instruction when we reach it in the instruction
14250b57cec5SDimitry Andric             // sequence.
14260b57cec5SDimitry Andric             if (SunkMI != &MI) {
14270b57cec5SDimitry Andric               // If in sinking there was no instruction needing to be hardened,
14280b57cec5SDimitry Andric               // we're done.
14290b57cec5SDimitry Andric               if (!SunkMI)
14300b57cec5SDimitry Andric                 continue;
14310b57cec5SDimitry Andric 
14320b57cec5SDimitry Andric               // Otherwise, add this to the set of defs we harden.
14330b57cec5SDimitry Andric               HardenPostLoad.insert(SunkMI);
14340b57cec5SDimitry Andric               continue;
14350b57cec5SDimitry Andric             }
14360b57cec5SDimitry Andric           }
14370b57cec5SDimitry Andric 
14380b57cec5SDimitry Andric           unsigned HardenedReg = hardenPostLoad(MI);
14390b57cec5SDimitry Andric 
14400b57cec5SDimitry Andric           // Mark the resulting hardened register as such so we don't re-harden.
14410b57cec5SDimitry Andric           AddrRegToHardenedReg[HardenedReg] = HardenedReg;
14420b57cec5SDimitry Andric 
14430b57cec5SDimitry Andric           continue;
14440b57cec5SDimitry Andric         }
14450b57cec5SDimitry Andric 
14460b57cec5SDimitry Andric         // Check for an indirect call or branch that may need its input hardened
14470b57cec5SDimitry Andric         // even if we couldn't find the specific load used, or were able to
14480b57cec5SDimitry Andric         // avoid hardening it for some reason. Note that here we cannot break
14490b57cec5SDimitry Andric         // out afterward as we may still need to handle any call aspect of this
14500b57cec5SDimitry Andric         // instruction.
14510b57cec5SDimitry Andric         if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
14520b57cec5SDimitry Andric           hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
14530b57cec5SDimitry Andric       }
14540b57cec5SDimitry Andric 
14550b57cec5SDimitry Andric       // After we finish hardening loads we handle interprocedural hardening if
14560b57cec5SDimitry Andric       // enabled and relevant for this instruction.
14570b57cec5SDimitry Andric       if (!HardenInterprocedurally)
14580b57cec5SDimitry Andric         continue;
14590b57cec5SDimitry Andric       if (!MI.isCall() && !MI.isReturn())
14600b57cec5SDimitry Andric         continue;
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric       // If this is a direct return (IE, not a tail call) just directly harden
14630b57cec5SDimitry Andric       // it.
14640b57cec5SDimitry Andric       if (MI.isReturn() && !MI.isCall()) {
14650b57cec5SDimitry Andric         hardenReturnInstr(MI);
14660b57cec5SDimitry Andric         continue;
14670b57cec5SDimitry Andric       }
14680b57cec5SDimitry Andric 
14690b57cec5SDimitry Andric       // Otherwise we have a call. We need to handle transferring the predicate
14700b57cec5SDimitry Andric       // state into a call and recovering it after the call returns (unless this
14710b57cec5SDimitry Andric       // is a tail call).
14720b57cec5SDimitry Andric       assert(MI.isCall() && "Should only reach here for calls!");
14730b57cec5SDimitry Andric       tracePredStateThroughCall(MI);
14740b57cec5SDimitry Andric     }
14750b57cec5SDimitry Andric 
14760b57cec5SDimitry Andric     HardenPostLoad.clear();
14770b57cec5SDimitry Andric     HardenLoadAddr.clear();
14780b57cec5SDimitry Andric     HardenedAddrRegs.clear();
14790b57cec5SDimitry Andric     AddrRegToHardenedReg.clear();
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric     // Currently, we only track data-dependent loads within a basic block.
14820b57cec5SDimitry Andric     // FIXME: We should see if this is necessary or if we could be more
14830b57cec5SDimitry Andric     // aggressive here without opening up attack avenues.
14840b57cec5SDimitry Andric     LoadDepRegs.clear();
14850b57cec5SDimitry Andric   }
14860b57cec5SDimitry Andric }
14870b57cec5SDimitry Andric 
14880b57cec5SDimitry Andric /// Save EFLAGS into the returned GPR. This can in turn be restored with
14890b57cec5SDimitry Andric /// `restoreEFLAGS`.
14900b57cec5SDimitry Andric ///
14910b57cec5SDimitry Andric /// Note that LLVM can only lower very simple patterns of saved and restored
14920b57cec5SDimitry Andric /// EFLAGS registers. The restore should always be within the same basic block
14930b57cec5SDimitry Andric /// as the save so that no PHI nodes are inserted.
14940b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
14950b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
149681ad6265SDimitry Andric     const DebugLoc &Loc) {
14970b57cec5SDimitry Andric   // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
14980b57cec5SDimitry Andric   // what instruction selection does.
14998bcb0991SDimitry Andric   Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
15000b57cec5SDimitry Andric   // We directly copy the FLAGS register and rely on later lowering to clean
15010b57cec5SDimitry Andric   // this up into the appropriate setCC instructions.
15020b57cec5SDimitry Andric   BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
15030b57cec5SDimitry Andric   ++NumInstsInserted;
15040b57cec5SDimitry Andric   return Reg;
15050b57cec5SDimitry Andric }
15060b57cec5SDimitry Andric 
15070b57cec5SDimitry Andric /// Restore EFLAGS from the provided GPR. This should be produced by
15080b57cec5SDimitry Andric /// `saveEFLAGS`.
15090b57cec5SDimitry Andric ///
15100b57cec5SDimitry Andric /// This must be done within the same basic block as the save in order to
15110b57cec5SDimitry Andric /// reliably lower.
15120b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
151381ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
151481ad6265SDimitry Andric     const DebugLoc &Loc, Register Reg) {
15150b57cec5SDimitry Andric   BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
15160b57cec5SDimitry Andric   ++NumInstsInserted;
15170b57cec5SDimitry Andric }
15180b57cec5SDimitry Andric 
15190b57cec5SDimitry Andric /// Takes the current predicate state (in a register) and merges it into the
15200b57cec5SDimitry Andric /// stack pointer. The state is essentially a single bit, but we merge this in
15210b57cec5SDimitry Andric /// a way that won't form non-canonical pointers and also will be preserved
15220b57cec5SDimitry Andric /// across normal stack adjustments.
15230b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
152481ad6265SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
152581ad6265SDimitry Andric     const DebugLoc &Loc, unsigned PredStateReg) {
15268bcb0991SDimitry Andric   Register TmpReg = MRI->createVirtualRegister(PS->RC);
15270b57cec5SDimitry Andric   // FIXME: This hard codes a shift distance based on the number of bits needed
15280b57cec5SDimitry Andric   // to stay canonical on 64-bit. We should compute this somehow and support
15290b57cec5SDimitry Andric   // 32-bit as part of that.
15300b57cec5SDimitry Andric   auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
15310b57cec5SDimitry Andric                     .addReg(PredStateReg, RegState::Kill)
15320b57cec5SDimitry Andric                     .addImm(47);
15330b57cec5SDimitry Andric   ShiftI->addRegisterDead(X86::EFLAGS, TRI);
15340b57cec5SDimitry Andric   ++NumInstsInserted;
15350b57cec5SDimitry Andric   auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
15360b57cec5SDimitry Andric                  .addReg(X86::RSP)
15370b57cec5SDimitry Andric                  .addReg(TmpReg, RegState::Kill);
15380b57cec5SDimitry Andric   OrI->addRegisterDead(X86::EFLAGS, TRI);
15390b57cec5SDimitry Andric   ++NumInstsInserted;
15400b57cec5SDimitry Andric }
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric /// Extracts the predicate state stored in the high bits of the stack pointer.
15430b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
15440b57cec5SDimitry Andric     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
154581ad6265SDimitry Andric     const DebugLoc &Loc) {
15468bcb0991SDimitry Andric   Register PredStateReg = MRI->createVirtualRegister(PS->RC);
15478bcb0991SDimitry Andric   Register TmpReg = MRI->createVirtualRegister(PS->RC);
15480b57cec5SDimitry Andric 
15490b57cec5SDimitry Andric   // We know that the stack pointer will have any preserved predicate state in
15500b57cec5SDimitry Andric   // its high bit. We just want to smear this across the other bits. Turns out,
15510b57cec5SDimitry Andric   // this is exactly what an arithmetic right shift does.
15520b57cec5SDimitry Andric   BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
15530b57cec5SDimitry Andric       .addReg(X86::RSP);
15540b57cec5SDimitry Andric   auto ShiftI =
15550b57cec5SDimitry Andric       BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
15560b57cec5SDimitry Andric           .addReg(TmpReg, RegState::Kill)
15570b57cec5SDimitry Andric           .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
15580b57cec5SDimitry Andric   ShiftI->addRegisterDead(X86::EFLAGS, TRI);
15590b57cec5SDimitry Andric   ++NumInstsInserted;
15600b57cec5SDimitry Andric 
15610b57cec5SDimitry Andric   return PredStateReg;
15620b57cec5SDimitry Andric }
15630b57cec5SDimitry Andric 
15640b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
15650b57cec5SDimitry Andric     MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
15660b57cec5SDimitry Andric     SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
15670b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
1568fe6060f1SDimitry Andric   const DebugLoc &Loc = MI.getDebugLoc();
15690b57cec5SDimitry Andric 
15700b57cec5SDimitry Andric   // Check if EFLAGS are alive by seeing if there is a def of them or they
15710b57cec5SDimitry Andric   // live-in, and then seeing if that def is in turn used.
15720b57cec5SDimitry Andric   bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
15730b57cec5SDimitry Andric 
15740b57cec5SDimitry Andric   SmallVector<MachineOperand *, 2> HardenOpRegs;
15750b57cec5SDimitry Andric 
15760b57cec5SDimitry Andric   if (BaseMO.isFI()) {
15770b57cec5SDimitry Andric     // A frame index is never a dynamically controllable load, so only
15780b57cec5SDimitry Andric     // harden it if we're covering fixed address loads as well.
15790b57cec5SDimitry Andric     LLVM_DEBUG(
15800b57cec5SDimitry Andric         dbgs() << "  Skipping hardening base of explicit stack frame load: ";
15810b57cec5SDimitry Andric         MI.dump(); dbgs() << "\n");
15820b57cec5SDimitry Andric   } else if (BaseMO.getReg() == X86::RSP) {
15830b57cec5SDimitry Andric     // Some idempotent atomic operations are lowered directly to a locked
15840b57cec5SDimitry Andric     // OR with 0 to the top of stack(or slightly offset from top) which uses an
15850b57cec5SDimitry Andric     // explicit RSP register as the base.
15860b57cec5SDimitry Andric     assert(IndexMO.getReg() == X86::NoRegister &&
15870b57cec5SDimitry Andric            "Explicit RSP access with dynamic index!");
15880b57cec5SDimitry Andric     LLVM_DEBUG(
15890b57cec5SDimitry Andric         dbgs() << "  Cannot harden base of explicit RSP offset in a load!");
15900b57cec5SDimitry Andric   } else if (BaseMO.getReg() == X86::RIP ||
15910b57cec5SDimitry Andric              BaseMO.getReg() == X86::NoRegister) {
15920b57cec5SDimitry Andric     // For both RIP-relative addressed loads or absolute loads, we cannot
15930b57cec5SDimitry Andric     // meaningfully harden them because the address being loaded has no
15940b57cec5SDimitry Andric     // dynamic component.
15950b57cec5SDimitry Andric     //
15960b57cec5SDimitry Andric     // FIXME: When using a segment base (like TLS does) we end up with the
15970b57cec5SDimitry Andric     // dynamic address being the base plus -1 because we can't mutate the
15980b57cec5SDimitry Andric     // segment register here. This allows the signed 32-bit offset to point at
15990b57cec5SDimitry Andric     // valid segment-relative addresses and load them successfully.
16000b57cec5SDimitry Andric     LLVM_DEBUG(
16010b57cec5SDimitry Andric         dbgs() << "  Cannot harden base of "
16020b57cec5SDimitry Andric                << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
16030b57cec5SDimitry Andric                << " address in a load!");
16040b57cec5SDimitry Andric   } else {
16050b57cec5SDimitry Andric     assert(BaseMO.isReg() &&
16060b57cec5SDimitry Andric            "Only allowed to have a frame index or register base.");
16070b57cec5SDimitry Andric     HardenOpRegs.push_back(&BaseMO);
16080b57cec5SDimitry Andric   }
16090b57cec5SDimitry Andric 
16100b57cec5SDimitry Andric   if (IndexMO.getReg() != X86::NoRegister &&
16110b57cec5SDimitry Andric       (HardenOpRegs.empty() ||
16120b57cec5SDimitry Andric        HardenOpRegs.front()->getReg() != IndexMO.getReg()))
16130b57cec5SDimitry Andric     HardenOpRegs.push_back(&IndexMO);
16140b57cec5SDimitry Andric 
16150b57cec5SDimitry Andric   assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
16160b57cec5SDimitry Andric          "Should have exactly one or two registers to harden!");
16170b57cec5SDimitry Andric   assert((HardenOpRegs.size() == 1 ||
16180b57cec5SDimitry Andric           HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
16190b57cec5SDimitry Andric          "Should not have two of the same registers!");
16200b57cec5SDimitry Andric 
16210b57cec5SDimitry Andric   // Remove any registers that have alreaded been checked.
16220b57cec5SDimitry Andric   llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
16230b57cec5SDimitry Andric     // See if this operand's register has already been checked.
16240b57cec5SDimitry Andric     auto It = AddrRegToHardenedReg.find(Op->getReg());
16250b57cec5SDimitry Andric     if (It == AddrRegToHardenedReg.end())
16260b57cec5SDimitry Andric       // Not checked, so retain this one.
16270b57cec5SDimitry Andric       return false;
16280b57cec5SDimitry Andric 
16290b57cec5SDimitry Andric     // Otherwise, we can directly update this operand and remove it.
16300b57cec5SDimitry Andric     Op->setReg(It->second);
16310b57cec5SDimitry Andric     return true;
16320b57cec5SDimitry Andric   });
16330b57cec5SDimitry Andric   // If there are none left, we're done.
16340b57cec5SDimitry Andric   if (HardenOpRegs.empty())
16350b57cec5SDimitry Andric     return;
16360b57cec5SDimitry Andric 
16370b57cec5SDimitry Andric   // Compute the current predicate state.
163804eeddc0SDimitry Andric   Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
16390b57cec5SDimitry Andric 
16400b57cec5SDimitry Andric   auto InsertPt = MI.getIterator();
16410b57cec5SDimitry Andric 
16420b57cec5SDimitry Andric   // If EFLAGS are live and we don't have access to instructions that avoid
16430b57cec5SDimitry Andric   // clobbering EFLAGS we need to save and restore them. This in turn makes
16440b57cec5SDimitry Andric   // the EFLAGS no longer live.
16450b57cec5SDimitry Andric   unsigned FlagsReg = 0;
16460b57cec5SDimitry Andric   if (EFLAGSLive && !Subtarget->hasBMI2()) {
16470b57cec5SDimitry Andric     EFLAGSLive = false;
16480b57cec5SDimitry Andric     FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
16490b57cec5SDimitry Andric   }
16500b57cec5SDimitry Andric 
16510b57cec5SDimitry Andric   for (MachineOperand *Op : HardenOpRegs) {
16528bcb0991SDimitry Andric     Register OpReg = Op->getReg();
16530b57cec5SDimitry Andric     auto *OpRC = MRI->getRegClass(OpReg);
16548bcb0991SDimitry Andric     Register TmpReg = MRI->createVirtualRegister(OpRC);
16550b57cec5SDimitry Andric 
16560b57cec5SDimitry Andric     // If this is a vector register, we'll need somewhat custom logic to handle
16570b57cec5SDimitry Andric     // hardening it.
16580b57cec5SDimitry Andric     if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
16590b57cec5SDimitry Andric                                  OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
16600b57cec5SDimitry Andric       assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
16610b57cec5SDimitry Andric       bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
16620b57cec5SDimitry Andric 
16630b57cec5SDimitry Andric       // Move our state into a vector register.
16640b57cec5SDimitry Andric       // FIXME: We could skip this at the cost of longer encodings with AVX-512
16650b57cec5SDimitry Andric       // but that doesn't seem likely worth it.
16668bcb0991SDimitry Andric       Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
16670b57cec5SDimitry Andric       auto MovI =
16680b57cec5SDimitry Andric           BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
16690b57cec5SDimitry Andric               .addReg(StateReg);
16700b57cec5SDimitry Andric       (void)MovI;
16710b57cec5SDimitry Andric       ++NumInstsInserted;
16720b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting mov: "; MovI->dump(); dbgs() << "\n");
16730b57cec5SDimitry Andric 
16740b57cec5SDimitry Andric       // Broadcast it across the vector register.
16758bcb0991SDimitry Andric       Register VBStateReg = MRI->createVirtualRegister(OpRC);
16760b57cec5SDimitry Andric       auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
16770b57cec5SDimitry Andric                                 TII->get(Is128Bit ? X86::VPBROADCASTQrr
16780b57cec5SDimitry Andric                                                   : X86::VPBROADCASTQYrr),
16790b57cec5SDimitry Andric                                 VBStateReg)
16800b57cec5SDimitry Andric                             .addReg(VStateReg);
16810b57cec5SDimitry Andric       (void)BroadcastI;
16820b57cec5SDimitry Andric       ++NumInstsInserted;
16830b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting broadcast: "; BroadcastI->dump();
16840b57cec5SDimitry Andric                  dbgs() << "\n");
16850b57cec5SDimitry Andric 
16860b57cec5SDimitry Andric       // Merge our potential poison state into the value with a vector or.
16870b57cec5SDimitry Andric       auto OrI =
16880b57cec5SDimitry Andric           BuildMI(MBB, InsertPt, Loc,
16890b57cec5SDimitry Andric                   TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
16900b57cec5SDimitry Andric               .addReg(VBStateReg)
16910b57cec5SDimitry Andric               .addReg(OpReg);
16920b57cec5SDimitry Andric       (void)OrI;
16930b57cec5SDimitry Andric       ++NumInstsInserted;
16940b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting or: "; OrI->dump(); dbgs() << "\n");
16950b57cec5SDimitry Andric     } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
16960b57cec5SDimitry Andric                OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
16970b57cec5SDimitry Andric                OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
16980b57cec5SDimitry Andric       assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
16990b57cec5SDimitry Andric       bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
17000b57cec5SDimitry Andric       bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
17010b57cec5SDimitry Andric       if (Is128Bit || Is256Bit)
17020b57cec5SDimitry Andric         assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
17030b57cec5SDimitry Andric 
17040b57cec5SDimitry Andric       // Broadcast our state into a vector register.
17058bcb0991SDimitry Andric       Register VStateReg = MRI->createVirtualRegister(OpRC);
17065ffd83dbSDimitry Andric       unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
17075ffd83dbSDimitry Andric                                       : Is256Bit ? X86::VPBROADCASTQrZ256rr
17085ffd83dbSDimitry Andric                                                  : X86::VPBROADCASTQrZrr;
17090b57cec5SDimitry Andric       auto BroadcastI =
17100b57cec5SDimitry Andric           BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
17110b57cec5SDimitry Andric               .addReg(StateReg);
17120b57cec5SDimitry Andric       (void)BroadcastI;
17130b57cec5SDimitry Andric       ++NumInstsInserted;
17140b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting broadcast: "; BroadcastI->dump();
17150b57cec5SDimitry Andric                  dbgs() << "\n");
17160b57cec5SDimitry Andric 
17170b57cec5SDimitry Andric       // Merge our potential poison state into the value with a vector or.
17180b57cec5SDimitry Andric       unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
17190b57cec5SDimitry Andric                                : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
17200b57cec5SDimitry Andric       auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
17210b57cec5SDimitry Andric                      .addReg(VStateReg)
17220b57cec5SDimitry Andric                      .addReg(OpReg);
17230b57cec5SDimitry Andric       (void)OrI;
17240b57cec5SDimitry Andric       ++NumInstsInserted;
17250b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Inserting or: "; OrI->dump(); dbgs() << "\n");
17260b57cec5SDimitry Andric     } else {
17270b57cec5SDimitry Andric       // FIXME: Need to support GR32 here for 32-bit code.
17280b57cec5SDimitry Andric       assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
17290b57cec5SDimitry Andric              "Not a supported register class for address hardening!");
17300b57cec5SDimitry Andric 
17310b57cec5SDimitry Andric       if (!EFLAGSLive) {
17320b57cec5SDimitry Andric         // Merge our potential poison state into the value with an or.
17330b57cec5SDimitry Andric         auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
17340b57cec5SDimitry Andric                        .addReg(StateReg)
17350b57cec5SDimitry Andric                        .addReg(OpReg);
17360b57cec5SDimitry Andric         OrI->addRegisterDead(X86::EFLAGS, TRI);
17370b57cec5SDimitry Andric         ++NumInstsInserted;
17380b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Inserting or: "; OrI->dump(); dbgs() << "\n");
17390b57cec5SDimitry Andric       } else {
17400b57cec5SDimitry Andric         // We need to avoid touching EFLAGS so shift out all but the least
17410b57cec5SDimitry Andric         // significant bit using the instruction that doesn't update flags.
17420b57cec5SDimitry Andric         auto ShiftI =
17430b57cec5SDimitry Andric             BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
17440b57cec5SDimitry Andric                 .addReg(OpReg)
17450b57cec5SDimitry Andric                 .addReg(StateReg);
17460b57cec5SDimitry Andric         (void)ShiftI;
17470b57cec5SDimitry Andric         ++NumInstsInserted;
17480b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Inserting shrx: "; ShiftI->dump();
17490b57cec5SDimitry Andric                    dbgs() << "\n");
17500b57cec5SDimitry Andric       }
17510b57cec5SDimitry Andric     }
17520b57cec5SDimitry Andric 
17530b57cec5SDimitry Andric     // Record this register as checked and update the operand.
17540b57cec5SDimitry Andric     assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
17550b57cec5SDimitry Andric            "Should not have checked this register yet!");
17560b57cec5SDimitry Andric     AddrRegToHardenedReg[Op->getReg()] = TmpReg;
17570b57cec5SDimitry Andric     Op->setReg(TmpReg);
17580b57cec5SDimitry Andric     ++NumAddrRegsHardened;
17590b57cec5SDimitry Andric   }
17600b57cec5SDimitry Andric 
17610b57cec5SDimitry Andric   // And restore the flags if needed.
17620b57cec5SDimitry Andric   if (FlagsReg)
17630b57cec5SDimitry Andric     restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
17640b57cec5SDimitry Andric }
17650b57cec5SDimitry Andric 
17660b57cec5SDimitry Andric MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
17670b57cec5SDimitry Andric     MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
17685ffd83dbSDimitry Andric   assert(X86InstrInfo::isDataInvariantLoad(InitialMI) &&
17690b57cec5SDimitry Andric          "Cannot get here with a non-invariant load!");
17705ffd83dbSDimitry Andric   assert(!isEFLAGSDefLive(InitialMI) &&
17715ffd83dbSDimitry Andric          "Cannot get here with a data invariant load "
17725ffd83dbSDimitry Andric          "that interferes with EFLAGS!");
17730b57cec5SDimitry Andric 
17740b57cec5SDimitry Andric   // See if we can sink hardening the loaded value.
17750b57cec5SDimitry Andric   auto SinkCheckToSingleUse =
1776bdd1243dSDimitry Andric       [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
17778bcb0991SDimitry Andric     Register DefReg = MI.getOperand(0).getReg();
17780b57cec5SDimitry Andric 
17790b57cec5SDimitry Andric     // We need to find a single use which we can sink the check. We can
17800b57cec5SDimitry Andric     // primarily do this because many uses may already end up checked on their
17810b57cec5SDimitry Andric     // own.
17820b57cec5SDimitry Andric     MachineInstr *SingleUseMI = nullptr;
17830b57cec5SDimitry Andric     for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
17845ffd83dbSDimitry Andric       // If we're already going to harden this use, it is data invariant, it
17855ffd83dbSDimitry Andric       // does not interfere with EFLAGS, and within our block.
17860b57cec5SDimitry Andric       if (HardenedInstrs.count(&UseMI)) {
17875ffd83dbSDimitry Andric         if (!X86InstrInfo::isDataInvariantLoad(UseMI) || isEFLAGSDefLive(UseMI)) {
17880b57cec5SDimitry Andric           // If we've already decided to harden a non-load, we must have sunk
17890b57cec5SDimitry Andric           // some other post-load hardened instruction to it and it must itself
17900b57cec5SDimitry Andric           // be data-invariant.
17915ffd83dbSDimitry Andric           assert(X86InstrInfo::isDataInvariant(UseMI) &&
17920b57cec5SDimitry Andric                  "Data variant instruction being hardened!");
17930b57cec5SDimitry Andric           continue;
17940b57cec5SDimitry Andric         }
17950b57cec5SDimitry Andric 
17960b57cec5SDimitry Andric         // Otherwise, this is a load and the load component can't be data
17970b57cec5SDimitry Andric         // invariant so check how this register is being used.
17987a6dacacSDimitry Andric         const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
17990b57cec5SDimitry Andric         assert(MemRefBeginIdx >= 0 &&
18000b57cec5SDimitry Andric                "Should always have mem references here!");
18010b57cec5SDimitry Andric 
18020b57cec5SDimitry Andric         MachineOperand &BaseMO =
18030b57cec5SDimitry Andric             UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
18040b57cec5SDimitry Andric         MachineOperand &IndexMO =
18050b57cec5SDimitry Andric             UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
18060b57cec5SDimitry Andric         if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
18070b57cec5SDimitry Andric             (IndexMO.isReg() && IndexMO.getReg() == DefReg))
18080b57cec5SDimitry Andric           // The load uses the register as part of its address making it not
18090b57cec5SDimitry Andric           // invariant.
18100b57cec5SDimitry Andric           return {};
18110b57cec5SDimitry Andric 
18120b57cec5SDimitry Andric         continue;
18130b57cec5SDimitry Andric       }
18140b57cec5SDimitry Andric 
18150b57cec5SDimitry Andric       if (SingleUseMI)
18160b57cec5SDimitry Andric         // We already have a single use, this would make two. Bail.
18170b57cec5SDimitry Andric         return {};
18180b57cec5SDimitry Andric 
18190b57cec5SDimitry Andric       // If this single use isn't data invariant, isn't in this block, or has
18200b57cec5SDimitry Andric       // interfering EFLAGS, we can't sink the hardening to it.
18215ffd83dbSDimitry Andric       if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
18225ffd83dbSDimitry Andric           isEFLAGSDefLive(UseMI))
18230b57cec5SDimitry Andric         return {};
18240b57cec5SDimitry Andric 
18250b57cec5SDimitry Andric       // If this instruction defines multiple registers bail as we won't harden
18260b57cec5SDimitry Andric       // all of them.
18270b57cec5SDimitry Andric       if (UseMI.getDesc().getNumDefs() > 1)
18280b57cec5SDimitry Andric         return {};
18290b57cec5SDimitry Andric 
18300b57cec5SDimitry Andric       // If this register isn't a virtual register we can't walk uses of sanely,
18310b57cec5SDimitry Andric       // just bail. Also check that its register class is one of the ones we
18320b57cec5SDimitry Andric       // can harden.
18338bcb0991SDimitry Andric       Register UseDefReg = UseMI.getOperand(0).getReg();
18347a6dacacSDimitry Andric       if (!canHardenRegister(UseDefReg))
18350b57cec5SDimitry Andric         return {};
18360b57cec5SDimitry Andric 
18370b57cec5SDimitry Andric       SingleUseMI = &UseMI;
18380b57cec5SDimitry Andric     }
18390b57cec5SDimitry Andric 
18400b57cec5SDimitry Andric     // If SingleUseMI is still null, there is no use that needs its own
18410b57cec5SDimitry Andric     // checking. Otherwise, it is the single use that needs checking.
18420b57cec5SDimitry Andric     return {SingleUseMI};
18430b57cec5SDimitry Andric   };
18440b57cec5SDimitry Andric 
18450b57cec5SDimitry Andric   MachineInstr *MI = &InitialMI;
1846bdd1243dSDimitry Andric   while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
18470b57cec5SDimitry Andric     // Update which MI we're checking now.
18480b57cec5SDimitry Andric     MI = *SingleUse;
18490b57cec5SDimitry Andric     if (!MI)
18500b57cec5SDimitry Andric       break;
18510b57cec5SDimitry Andric   }
18520b57cec5SDimitry Andric 
18530b57cec5SDimitry Andric   return MI;
18540b57cec5SDimitry Andric }
18550b57cec5SDimitry Andric 
1856e8d8bef9SDimitry Andric bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
18577a6dacacSDimitry Andric   // We only support hardening virtual registers.
18587a6dacacSDimitry Andric   if (!Reg.isVirtual())
18597a6dacacSDimitry Andric     return false;
18607a6dacacSDimitry Andric 
18610b57cec5SDimitry Andric   auto *RC = MRI->getRegClass(Reg);
18620b57cec5SDimitry Andric   int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
18630b57cec5SDimitry Andric   if (RegBytes > 8)
18640b57cec5SDimitry Andric     // We don't support post-load hardening of vectors.
18650b57cec5SDimitry Andric     return false;
18660b57cec5SDimitry Andric 
18678bcb0991SDimitry Andric   unsigned RegIdx = Log2_32(RegBytes);
18688bcb0991SDimitry Andric   assert(RegIdx < 4 && "Unsupported register size");
18698bcb0991SDimitry Andric 
18700b57cec5SDimitry Andric   // If this register class is explicitly constrained to a class that doesn't
18710b57cec5SDimitry Andric   // require REX prefix, we may not be able to satisfy that constraint when
18720b57cec5SDimitry Andric   // emitting the hardening instructions, so bail out here.
18730b57cec5SDimitry Andric   // FIXME: This seems like a pretty lame hack. The way this comes up is when we
18740b57cec5SDimitry Andric   // end up both with a NOREX and REX-only register as operands to the hardening
18750b57cec5SDimitry Andric   // instructions. It would be better to fix that code to handle this situation
18760b57cec5SDimitry Andric   // rather than hack around it in this way.
18770b57cec5SDimitry Andric   const TargetRegisterClass *NOREXRegClasses[] = {
18780b57cec5SDimitry Andric       &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
18790b57cec5SDimitry Andric       &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
18808bcb0991SDimitry Andric   if (RC == NOREXRegClasses[RegIdx])
18810b57cec5SDimitry Andric     return false;
18820b57cec5SDimitry Andric 
18830b57cec5SDimitry Andric   const TargetRegisterClass *GPRRegClasses[] = {
18840b57cec5SDimitry Andric       &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
18850b57cec5SDimitry Andric       &X86::GR64RegClass};
18868bcb0991SDimitry Andric   return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
18870b57cec5SDimitry Andric }
18880b57cec5SDimitry Andric 
18890b57cec5SDimitry Andric /// Harden a value in a register.
18900b57cec5SDimitry Andric ///
18910b57cec5SDimitry Andric /// This is the low-level logic to fully harden a value sitting in a register
18920b57cec5SDimitry Andric /// against leaking during speculative execution.
18930b57cec5SDimitry Andric ///
18940b57cec5SDimitry Andric /// Unlike hardening an address that is used by a load, this routine is required
18950b57cec5SDimitry Andric /// to hide *all* incoming bits in the register.
18960b57cec5SDimitry Andric ///
18970b57cec5SDimitry Andric /// `Reg` must be a virtual register. Currently, it is required to be a GPR no
18980b57cec5SDimitry Andric /// larger than the predicate state register. FIXME: We should support vector
18990b57cec5SDimitry Andric /// registers here by broadcasting the predicate state.
19000b57cec5SDimitry Andric ///
19010b57cec5SDimitry Andric /// The new, hardened virtual register is returned. It will have the same
19020b57cec5SDimitry Andric /// register class as `Reg`.
19030b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1904e8d8bef9SDimitry Andric     Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
190581ad6265SDimitry Andric     const DebugLoc &Loc) {
19060b57cec5SDimitry Andric   assert(canHardenRegister(Reg) && "Cannot harden this register!");
19070b57cec5SDimitry Andric 
19080b57cec5SDimitry Andric   auto *RC = MRI->getRegClass(Reg);
19090b57cec5SDimitry Andric   int Bytes = TRI->getRegSizeInBits(*RC) / 8;
191004eeddc0SDimitry Andric   Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1911fe6060f1SDimitry Andric   assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1912fe6060f1SDimitry Andric          "Unknown register size");
19130b57cec5SDimitry Andric 
19140b57cec5SDimitry Andric   // FIXME: Need to teach this about 32-bit mode.
19150b57cec5SDimitry Andric   if (Bytes != 8) {
19160b57cec5SDimitry Andric     unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
19170b57cec5SDimitry Andric     unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
19188bcb0991SDimitry Andric     Register NarrowStateReg = MRI->createVirtualRegister(RC);
19190b57cec5SDimitry Andric     BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
19200b57cec5SDimitry Andric         .addReg(StateReg, 0, SubRegImm);
19210b57cec5SDimitry Andric     StateReg = NarrowStateReg;
19220b57cec5SDimitry Andric   }
19230b57cec5SDimitry Andric 
19240b57cec5SDimitry Andric   unsigned FlagsReg = 0;
19250b57cec5SDimitry Andric   if (isEFLAGSLive(MBB, InsertPt, *TRI))
19260b57cec5SDimitry Andric     FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
19270b57cec5SDimitry Andric 
19288bcb0991SDimitry Andric   Register NewReg = MRI->createVirtualRegister(RC);
19290b57cec5SDimitry Andric   unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
19300b57cec5SDimitry Andric   unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
19310b57cec5SDimitry Andric   auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
19320b57cec5SDimitry Andric                  .addReg(StateReg)
19330b57cec5SDimitry Andric                  .addReg(Reg);
19340b57cec5SDimitry Andric   OrI->addRegisterDead(X86::EFLAGS, TRI);
19350b57cec5SDimitry Andric   ++NumInstsInserted;
19360b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Inserting or: "; OrI->dump(); dbgs() << "\n");
19370b57cec5SDimitry Andric 
19380b57cec5SDimitry Andric   if (FlagsReg)
19390b57cec5SDimitry Andric     restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
19400b57cec5SDimitry Andric 
19410b57cec5SDimitry Andric   return NewReg;
19420b57cec5SDimitry Andric }
19430b57cec5SDimitry Andric 
19440b57cec5SDimitry Andric /// Harden a load by hardening the loaded value in the defined register.
19450b57cec5SDimitry Andric ///
19460b57cec5SDimitry Andric /// We can harden a non-leaking load into a register without touching the
19470b57cec5SDimitry Andric /// address by just hiding all of the loaded bits during misspeculation. We use
19480b57cec5SDimitry Andric /// an `or` instruction to do this because we set up our poison value as all
19490b57cec5SDimitry Andric /// ones. And the goal is just for the loaded bits to not be exposed to
19500b57cec5SDimitry Andric /// execution and coercing them to one is sufficient.
19510b57cec5SDimitry Andric ///
19520b57cec5SDimitry Andric /// Returns the newly hardened register.
19530b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
19540b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
1955fe6060f1SDimitry Andric   const DebugLoc &Loc = MI.getDebugLoc();
19560b57cec5SDimitry Andric 
19570b57cec5SDimitry Andric   auto &DefOp = MI.getOperand(0);
19588bcb0991SDimitry Andric   Register OldDefReg = DefOp.getReg();
19590b57cec5SDimitry Andric   auto *DefRC = MRI->getRegClass(OldDefReg);
19600b57cec5SDimitry Andric 
19610b57cec5SDimitry Andric   // Because we want to completely replace the uses of this def'ed value with
19620b57cec5SDimitry Andric   // the hardened value, create a dedicated new register that will only be used
19630b57cec5SDimitry Andric   // to communicate the unhardened value to the hardening.
19648bcb0991SDimitry Andric   Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
19650b57cec5SDimitry Andric   DefOp.setReg(UnhardenedReg);
19660b57cec5SDimitry Andric 
19670b57cec5SDimitry Andric   // Now harden this register's value, getting a hardened reg that is safe to
19680b57cec5SDimitry Andric   // use. Note that we insert the instructions to compute this *after* the
19690b57cec5SDimitry Andric   // defining instruction, not before it.
19700b57cec5SDimitry Andric   unsigned HardenedReg = hardenValueInRegister(
19710b57cec5SDimitry Andric       UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
19720b57cec5SDimitry Andric 
19730b57cec5SDimitry Andric   // Finally, replace the old register (which now only has the uses of the
19740b57cec5SDimitry Andric   // original def) with the hardened register.
19750b57cec5SDimitry Andric   MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
19760b57cec5SDimitry Andric 
19770b57cec5SDimitry Andric   ++NumPostLoadRegsHardened;
19780b57cec5SDimitry Andric   return HardenedReg;
19790b57cec5SDimitry Andric }
19800b57cec5SDimitry Andric 
19810b57cec5SDimitry Andric /// Harden a return instruction.
19820b57cec5SDimitry Andric ///
19830b57cec5SDimitry Andric /// Returns implicitly perform a load which we need to harden. Without hardening
19840b57cec5SDimitry Andric /// this load, an attacker my speculatively write over the return address to
19850b57cec5SDimitry Andric /// steer speculation of the return to an attacker controlled address. This is
19860b57cec5SDimitry Andric /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
19870b57cec5SDimitry Andric /// this paper:
19880b57cec5SDimitry Andric /// https://people.csail.mit.edu/vlk/spectre11.pdf
19890b57cec5SDimitry Andric ///
19900b57cec5SDimitry Andric /// We can harden this by introducing an LFENCE that will delay any load of the
19910b57cec5SDimitry Andric /// return address until prior instructions have retired (and thus are not being
19920b57cec5SDimitry Andric /// speculated), or we can harden the address used by the implicit load: the
19930b57cec5SDimitry Andric /// stack pointer.
19940b57cec5SDimitry Andric ///
19950b57cec5SDimitry Andric /// If we are not using an LFENCE, hardening the stack pointer has an additional
19960b57cec5SDimitry Andric /// benefit: it allows us to pass the predicate state accumulated in this
19970b57cec5SDimitry Andric /// function back to the caller. In the absence of a BCBS attack on the return,
19980b57cec5SDimitry Andric /// the caller will typically be resumed and speculatively executed due to the
19990b57cec5SDimitry Andric /// Return Stack Buffer (RSB) prediction which is very accurate and has a high
20000b57cec5SDimitry Andric /// priority. It is possible that some code from the caller will be executed
20010b57cec5SDimitry Andric /// speculatively even during a BCBS-attacked return until the steering takes
20020b57cec5SDimitry Andric /// effect. Whenever this happens, the caller can recover the (poisoned)
20030b57cec5SDimitry Andric /// predicate state from the stack pointer and continue to harden loads.
20040b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
20050b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
2006fe6060f1SDimitry Andric   const DebugLoc &Loc = MI.getDebugLoc();
20070b57cec5SDimitry Andric   auto InsertPt = MI.getIterator();
20080b57cec5SDimitry Andric 
20090b57cec5SDimitry Andric   if (FenceCallAndRet)
20100b57cec5SDimitry Andric     // No need to fence here as we'll fence at the return site itself. That
20110b57cec5SDimitry Andric     // handles more cases than we can handle here.
20120b57cec5SDimitry Andric     return;
20130b57cec5SDimitry Andric 
20140b57cec5SDimitry Andric   // Take our predicate state, shift it to the high 17 bits (so that we keep
20150b57cec5SDimitry Andric   // pointers canonical) and merge it into RSP. This will allow the caller to
20160b57cec5SDimitry Andric   // extract it when we return (speculatively).
20170b57cec5SDimitry Andric   mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
20180b57cec5SDimitry Andric }
20190b57cec5SDimitry Andric 
20200b57cec5SDimitry Andric /// Trace the predicate state through a call.
20210b57cec5SDimitry Andric ///
20220b57cec5SDimitry Andric /// There are several layers of this needed to handle the full complexity of
20230b57cec5SDimitry Andric /// calls.
20240b57cec5SDimitry Andric ///
20250b57cec5SDimitry Andric /// First, we need to send the predicate state into the called function. We do
20260b57cec5SDimitry Andric /// this by merging it into the high bits of the stack pointer.
20270b57cec5SDimitry Andric ///
20280b57cec5SDimitry Andric /// For tail calls, this is all we need to do.
20290b57cec5SDimitry Andric ///
20300b57cec5SDimitry Andric /// For calls where we might return and resume the control flow, we need to
20310b57cec5SDimitry Andric /// extract the predicate state from the high bits of the stack pointer after
20320b57cec5SDimitry Andric /// control returns from the called function.
20330b57cec5SDimitry Andric ///
20340b57cec5SDimitry Andric /// We also need to verify that we intended to return to this location in the
20350b57cec5SDimitry Andric /// code. An attacker might arrange for the processor to mispredict the return
20360b57cec5SDimitry Andric /// to this valid but incorrect return address in the program rather than the
20370b57cec5SDimitry Andric /// correct one. See the paper on this attack, called "ret2spec" by the
20380b57cec5SDimitry Andric /// researchers, here:
20390b57cec5SDimitry Andric /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
20400b57cec5SDimitry Andric ///
20410b57cec5SDimitry Andric /// The way we verify that we returned to the correct location is by preserving
20420b57cec5SDimitry Andric /// the expected return address across the call. One technique involves taking
20430b57cec5SDimitry Andric /// advantage of the red-zone to load the return address from `8(%rsp)` where it
20440b57cec5SDimitry Andric /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
20450b57cec5SDimitry Andric /// directly save the address into a register that will be preserved across the
20460b57cec5SDimitry Andric /// call. We compare this intended return address against the address
20470b57cec5SDimitry Andric /// immediately following the call (the observed return address). If these
20480b57cec5SDimitry Andric /// mismatch, we have detected misspeculation and can poison our predicate
20490b57cec5SDimitry Andric /// state.
20500b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
20510b57cec5SDimitry Andric     MachineInstr &MI) {
20520b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
20530b57cec5SDimitry Andric   MachineFunction &MF = *MBB.getParent();
20540b57cec5SDimitry Andric   auto InsertPt = MI.getIterator();
2055fe6060f1SDimitry Andric   const DebugLoc &Loc = MI.getDebugLoc();
20560b57cec5SDimitry Andric 
20570b57cec5SDimitry Andric   if (FenceCallAndRet) {
20580b57cec5SDimitry Andric     if (MI.isReturn())
20590b57cec5SDimitry Andric       // Tail call, we don't return to this function.
20600b57cec5SDimitry Andric       // FIXME: We should also handle noreturn calls.
20610b57cec5SDimitry Andric       return;
20620b57cec5SDimitry Andric 
20630b57cec5SDimitry Andric     // We don't need to fence before the call because the function should fence
20640b57cec5SDimitry Andric     // in its entry. However, we do need to fence after the call returns.
20650b57cec5SDimitry Andric     // Fencing before the return doesn't correctly handle cases where the return
20660b57cec5SDimitry Andric     // itself is mispredicted.
20670b57cec5SDimitry Andric     BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
20680b57cec5SDimitry Andric     ++NumInstsInserted;
20690b57cec5SDimitry Andric     ++NumLFENCEsInserted;
20700b57cec5SDimitry Andric     return;
20710b57cec5SDimitry Andric   }
20720b57cec5SDimitry Andric 
20730b57cec5SDimitry Andric   // First, we transfer the predicate state into the called function by merging
20740b57cec5SDimitry Andric   // it into the stack pointer. This will kill the current def of the state.
207504eeddc0SDimitry Andric   Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
20760b57cec5SDimitry Andric   mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
20770b57cec5SDimitry Andric 
20780b57cec5SDimitry Andric   // If this call is also a return, it is a tail call and we don't need anything
20790b57cec5SDimitry Andric   // else to handle it so just return. Also, if there are no further
20800b57cec5SDimitry Andric   // instructions and no successors, this call does not return so we can also
20810b57cec5SDimitry Andric   // bail.
20820b57cec5SDimitry Andric   if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
20830b57cec5SDimitry Andric     return;
20840b57cec5SDimitry Andric 
20850b57cec5SDimitry Andric   // Create a symbol to track the return address and attach it to the call
20860b57cec5SDimitry Andric   // machine instruction. We will lower extra symbols attached to call
20870b57cec5SDimitry Andric   // instructions as label immediately following the call.
20880b57cec5SDimitry Andric   MCSymbol *RetSymbol =
20890b57cec5SDimitry Andric       MF.getContext().createTempSymbol("slh_ret_addr",
20900b57cec5SDimitry Andric                                        /*AlwaysAddSuffix*/ true);
20910b57cec5SDimitry Andric   MI.setPostInstrSymbol(MF, RetSymbol);
20920b57cec5SDimitry Andric 
20930b57cec5SDimitry Andric   const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
20940b57cec5SDimitry Andric   unsigned ExpectedRetAddrReg = 0;
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric   // If we have no red zones or if the function returns twice (possibly without
20970b57cec5SDimitry Andric   // using the `ret` instruction) like setjmp, we need to save the expected
20980b57cec5SDimitry Andric   // return address prior to the call.
20990b57cec5SDimitry Andric   if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
21000b57cec5SDimitry Andric       MF.exposesReturnsTwice()) {
21010b57cec5SDimitry Andric     // If we don't have red zones, we need to compute the expected return
21020b57cec5SDimitry Andric     // address prior to the call and store it in a register that lives across
21030b57cec5SDimitry Andric     // the call.
21040b57cec5SDimitry Andric     //
21050b57cec5SDimitry Andric     // In some ways, this is doubly satisfying as a mitigation because it will
21060b57cec5SDimitry Andric     // also successfully detect stack smashing bugs in some cases (typically,
21070b57cec5SDimitry Andric     // when a callee-saved register is used and the callee doesn't push it onto
21080b57cec5SDimitry Andric     // the stack). But that isn't our primary goal, so we only use it as
21090b57cec5SDimitry Andric     // a fallback.
21100b57cec5SDimitry Andric     //
21110b57cec5SDimitry Andric     // FIXME: It isn't clear that this is reliable in the face of
21120b57cec5SDimitry Andric     // rematerialization in the register allocator. We somehow need to force
21130b57cec5SDimitry Andric     // that to not occur for this particular instruction, and instead to spill
21140b57cec5SDimitry Andric     // or otherwise preserve the value computed *prior* to the call.
21150b57cec5SDimitry Andric     //
21160b57cec5SDimitry Andric     // FIXME: It is even less clear why MachineCSE can't just fold this when we
21170b57cec5SDimitry Andric     // end up having to use identical instructions both before and after the
21180b57cec5SDimitry Andric     // call to feed the comparison.
21190b57cec5SDimitry Andric     ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
21200b57cec5SDimitry Andric     if (MF.getTarget().getCodeModel() == CodeModel::Small &&
21210b57cec5SDimitry Andric         !Subtarget->isPositionIndependent()) {
21220b57cec5SDimitry Andric       BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
21230b57cec5SDimitry Andric           .addSym(RetSymbol);
21240b57cec5SDimitry Andric     } else {
21250b57cec5SDimitry Andric       BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
21260b57cec5SDimitry Andric           .addReg(/*Base*/ X86::RIP)
21270b57cec5SDimitry Andric           .addImm(/*Scale*/ 1)
21280b57cec5SDimitry Andric           .addReg(/*Index*/ 0)
21290b57cec5SDimitry Andric           .addSym(RetSymbol)
21300b57cec5SDimitry Andric           .addReg(/*Segment*/ 0);
21310b57cec5SDimitry Andric     }
21320b57cec5SDimitry Andric   }
21330b57cec5SDimitry Andric 
21340b57cec5SDimitry Andric   // Step past the call to handle when it returns.
21350b57cec5SDimitry Andric   ++InsertPt;
21360b57cec5SDimitry Andric 
21370b57cec5SDimitry Andric   // If we didn't pre-compute the expected return address into a register, then
21380b57cec5SDimitry Andric   // red zones are enabled and the return address is still available on the
21390b57cec5SDimitry Andric   // stack immediately after the call. As the very first instruction, we load it
21400b57cec5SDimitry Andric   // into a register.
21410b57cec5SDimitry Andric   if (!ExpectedRetAddrReg) {
21420b57cec5SDimitry Andric     ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
21430b57cec5SDimitry Andric     BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
21440b57cec5SDimitry Andric         .addReg(/*Base*/ X86::RSP)
21450b57cec5SDimitry Andric         .addImm(/*Scale*/ 1)
21460b57cec5SDimitry Andric         .addReg(/*Index*/ 0)
21470b57cec5SDimitry Andric         .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
21480b57cec5SDimitry Andric                                      // the return address is 8-bytes past it.
21490b57cec5SDimitry Andric         .addReg(/*Segment*/ 0);
21500b57cec5SDimitry Andric   }
21510b57cec5SDimitry Andric 
21520b57cec5SDimitry Andric   // Now we extract the callee's predicate state from the stack pointer.
21530b57cec5SDimitry Andric   unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric   // Test the expected return address against our actual address. If we can
21560b57cec5SDimitry Andric   // form this basic block's address as an immediate, this is easy. Otherwise
21570b57cec5SDimitry Andric   // we compute it.
21580b57cec5SDimitry Andric   if (MF.getTarget().getCodeModel() == CodeModel::Small &&
21590b57cec5SDimitry Andric       !Subtarget->isPositionIndependent()) {
21600b57cec5SDimitry Andric     // FIXME: Could we fold this with the load? It would require careful EFLAGS
21610b57cec5SDimitry Andric     // management.
21620b57cec5SDimitry Andric     BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
21630b57cec5SDimitry Andric         .addReg(ExpectedRetAddrReg, RegState::Kill)
21640b57cec5SDimitry Andric         .addSym(RetSymbol);
21650b57cec5SDimitry Andric   } else {
21668bcb0991SDimitry Andric     Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
21670b57cec5SDimitry Andric     BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
21680b57cec5SDimitry Andric         .addReg(/*Base*/ X86::RIP)
21690b57cec5SDimitry Andric         .addImm(/*Scale*/ 1)
21700b57cec5SDimitry Andric         .addReg(/*Index*/ 0)
21710b57cec5SDimitry Andric         .addSym(RetSymbol)
21720b57cec5SDimitry Andric         .addReg(/*Segment*/ 0);
21730b57cec5SDimitry Andric     BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
21740b57cec5SDimitry Andric         .addReg(ExpectedRetAddrReg, RegState::Kill)
21750b57cec5SDimitry Andric         .addReg(ActualRetAddrReg, RegState::Kill);
21760b57cec5SDimitry Andric   }
21770b57cec5SDimitry Andric 
21780b57cec5SDimitry Andric   // Now conditionally update the predicate state we just extracted if we ended
21790b57cec5SDimitry Andric   // up at a different return address than expected.
21800b57cec5SDimitry Andric   int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
21810b57cec5SDimitry Andric   auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
21820b57cec5SDimitry Andric 
21838bcb0991SDimitry Andric   Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
21840b57cec5SDimitry Andric   auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
21850b57cec5SDimitry Andric                    .addReg(NewStateReg, RegState::Kill)
21860b57cec5SDimitry Andric                    .addReg(PS->PoisonReg)
21870b57cec5SDimitry Andric                    .addImm(X86::COND_NE);
2188*0fca6ea1SDimitry Andric   CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
21890b57cec5SDimitry Andric   ++NumInstsInserted;
21900b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
21910b57cec5SDimitry Andric 
21920b57cec5SDimitry Andric   PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
21930b57cec5SDimitry Andric }
21940b57cec5SDimitry Andric 
21950b57cec5SDimitry Andric /// An attacker may speculatively store over a value that is then speculatively
21960b57cec5SDimitry Andric /// loaded and used as the target of an indirect call or jump instruction. This
21970b57cec5SDimitry Andric /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
21980b57cec5SDimitry Andric /// in this paper:
21990b57cec5SDimitry Andric /// https://people.csail.mit.edu/vlk/spectre11.pdf
22000b57cec5SDimitry Andric ///
22010b57cec5SDimitry Andric /// When this happens, the speculative execution of the call or jump will end up
22020b57cec5SDimitry Andric /// being steered to this attacker controlled address. While most such loads
22030b57cec5SDimitry Andric /// will be adequately hardened already, we want to ensure that they are
22040b57cec5SDimitry Andric /// definitively treated as needing post-load hardening. While address hardening
22050b57cec5SDimitry Andric /// is sufficient to prevent secret data from leaking to the attacker, it may
22060b57cec5SDimitry Andric /// not be sufficient to prevent an attacker from steering speculative
22070b57cec5SDimitry Andric /// execution. We forcibly unfolded all relevant loads above and so will always
22080b57cec5SDimitry Andric /// have an opportunity to post-load harden here, we just need to scan for cases
22090b57cec5SDimitry Andric /// not already flagged and add them.
22100b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
22110b57cec5SDimitry Andric     MachineInstr &MI,
22120b57cec5SDimitry Andric     SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
22130b57cec5SDimitry Andric   switch (MI.getOpcode()) {
22140b57cec5SDimitry Andric   case X86::FARCALL16m:
22150b57cec5SDimitry Andric   case X86::FARCALL32m:
22165ffd83dbSDimitry Andric   case X86::FARCALL64m:
22170b57cec5SDimitry Andric   case X86::FARJMP16m:
22180b57cec5SDimitry Andric   case X86::FARJMP32m:
22195ffd83dbSDimitry Andric   case X86::FARJMP64m:
22200b57cec5SDimitry Andric     // We don't need to harden either far calls or far jumps as they are
22210b57cec5SDimitry Andric     // safe from Spectre.
22220b57cec5SDimitry Andric     return;
22230b57cec5SDimitry Andric 
22240b57cec5SDimitry Andric   default:
22250b57cec5SDimitry Andric     break;
22260b57cec5SDimitry Andric   }
22270b57cec5SDimitry Andric 
22280b57cec5SDimitry Andric   // We should never see a loading instruction at this point, as those should
22290b57cec5SDimitry Andric   // have been unfolded.
22300b57cec5SDimitry Andric   assert(!MI.mayLoad() && "Found a lingering loading instruction!");
22310b57cec5SDimitry Andric 
22320b57cec5SDimitry Andric   // If the first operand isn't a register, this is a branch or call
22330b57cec5SDimitry Andric   // instruction with an immediate operand which doesn't need to be hardened.
22340b57cec5SDimitry Andric   if (!MI.getOperand(0).isReg())
22350b57cec5SDimitry Andric     return;
22360b57cec5SDimitry Andric 
22370b57cec5SDimitry Andric   // For all of these, the target register is the first operand of the
22380b57cec5SDimitry Andric   // instruction.
22390b57cec5SDimitry Andric   auto &TargetOp = MI.getOperand(0);
22408bcb0991SDimitry Andric   Register OldTargetReg = TargetOp.getReg();
22410b57cec5SDimitry Andric 
22420b57cec5SDimitry Andric   // Try to lookup a hardened version of this register. We retain a reference
22430b57cec5SDimitry Andric   // here as we want to update the map to track any newly computed hardened
22440b57cec5SDimitry Andric   // register.
22450b57cec5SDimitry Andric   unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
22460b57cec5SDimitry Andric 
22470b57cec5SDimitry Andric   // If we don't have a hardened register yet, compute one. Otherwise, just use
22480b57cec5SDimitry Andric   // the already hardened register.
22490b57cec5SDimitry Andric   //
22500b57cec5SDimitry Andric   // FIXME: It is a little suspect that we use partially hardened registers that
22510b57cec5SDimitry Andric   // only feed addresses. The complexity of partial hardening with SHRX
22520b57cec5SDimitry Andric   // continues to pile up. Should definitively measure its value and consider
22530b57cec5SDimitry Andric   // eliminating it.
22540b57cec5SDimitry Andric   if (!HardenedTargetReg)
22550b57cec5SDimitry Andric     HardenedTargetReg = hardenValueInRegister(
22560b57cec5SDimitry Andric         OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
22570b57cec5SDimitry Andric 
22580b57cec5SDimitry Andric   // Set the target operand to the hardened register.
22590b57cec5SDimitry Andric   TargetOp.setReg(HardenedTargetReg);
22600b57cec5SDimitry Andric 
22610b57cec5SDimitry Andric   ++NumCallsOrJumpsHardened;
22620b57cec5SDimitry Andric }
22630b57cec5SDimitry Andric 
22640b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
22650b57cec5SDimitry Andric                       "X86 speculative load hardener", false, false)
22660b57cec5SDimitry Andric INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
22670b57cec5SDimitry Andric                     "X86 speculative load hardener", false, false)
22680b57cec5SDimitry Andric 
22690b57cec5SDimitry Andric FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() {
22700b57cec5SDimitry Andric   return new X86SpeculativeLoadHardeningPass();
22710b57cec5SDimitry Andric }
2272