10b57cec5SDimitry Andric //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// 100b57cec5SDimitry Andric /// Provide a pass which mitigates speculative execution attacks which operate 110b57cec5SDimitry Andric /// by speculating incorrectly past some predicate (a type check, bounds check, 120b57cec5SDimitry Andric /// or other condition) to reach a load with invalid inputs and leak the data 130b57cec5SDimitry Andric /// accessed by that load using a side channel out of the speculative domain. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric /// For details on the attacks, see the first variant in both the Project Zero 160b57cec5SDimitry Andric /// writeup and the Spectre paper: 170b57cec5SDimitry Andric /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html 180b57cec5SDimitry Andric /// https://spectreattack.com/spectre.pdf 190b57cec5SDimitry Andric /// 200b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric #include "X86.h" 230b57cec5SDimitry Andric #include "X86InstrBuilder.h" 240b57cec5SDimitry Andric #include "X86InstrInfo.h" 250b57cec5SDimitry Andric #include "X86Subtarget.h" 260b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 270b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 280b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 290b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 300b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 310b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 320b57cec5SDimitry Andric #include "llvm/ADT/SparseBitVector.h" 330b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineConstantPool.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 410b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 420b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 430b57cec5SDimitry Andric #include "llvm/CodeGen/MachineSSAUpdater.h" 440b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 450b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 460b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h" 470b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 480b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 490b57cec5SDimitry Andric #include "llvm/MC/MCSchedule.h" 500b57cec5SDimitry Andric #include "llvm/Pass.h" 510b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 520b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 530b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 545ffd83dbSDimitry Andric #include "llvm/Target/TargetMachine.h" 550b57cec5SDimitry Andric #include <algorithm> 560b57cec5SDimitry Andric #include <cassert> 570b57cec5SDimitry Andric #include <iterator> 58bdd1243dSDimitry Andric #include <optional> 590b57cec5SDimitry Andric #include <utility> 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric using namespace llvm; 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric #define PASS_KEY "x86-slh" 640b57cec5SDimitry Andric #define DEBUG_TYPE PASS_KEY 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced"); 670b57cec5SDimitry Andric STATISTIC(NumBranchesUntraced, "Number of branches unable to trace"); 680b57cec5SDimitry Andric STATISTIC(NumAddrRegsHardened, 690b57cec5SDimitry Andric "Number of address mode used registers hardaned"); 700b57cec5SDimitry Andric STATISTIC(NumPostLoadRegsHardened, 710b57cec5SDimitry Andric "Number of post-load register values hardened"); 720b57cec5SDimitry Andric STATISTIC(NumCallsOrJumpsHardened, 730b57cec5SDimitry Andric "Number of calls or jumps requiring extra hardening"); 740b57cec5SDimitry Andric STATISTIC(NumInstsInserted, "Number of instructions inserted"); 750b57cec5SDimitry Andric STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted"); 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric static cl::opt<bool> EnableSpeculativeLoadHardening( 780b57cec5SDimitry Andric "x86-speculative-load-hardening", 790b57cec5SDimitry Andric cl::desc("Force enable speculative load hardening"), cl::init(false), 800b57cec5SDimitry Andric cl::Hidden); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static cl::opt<bool> HardenEdgesWithLFENCE( 830b57cec5SDimitry Andric PASS_KEY "-lfence", 840b57cec5SDimitry Andric cl::desc( 850b57cec5SDimitry Andric "Use LFENCE along each conditional edge to harden against speculative " 860b57cec5SDimitry Andric "loads rather than conditional movs and poisoned pointers."), 870b57cec5SDimitry Andric cl::init(false), cl::Hidden); 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric static cl::opt<bool> EnablePostLoadHardening( 900b57cec5SDimitry Andric PASS_KEY "-post-load", 910b57cec5SDimitry Andric cl::desc("Harden the value loaded *after* it is loaded by " 920b57cec5SDimitry Andric "flushing the loaded bits to 1. This is hard to do " 930b57cec5SDimitry Andric "in general but can be done easily for GPRs."), 940b57cec5SDimitry Andric cl::init(true), cl::Hidden); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric static cl::opt<bool> FenceCallAndRet( 970b57cec5SDimitry Andric PASS_KEY "-fence-call-and-ret", 980b57cec5SDimitry Andric cl::desc("Use a full speculation fence to harden both call and ret edges " 990b57cec5SDimitry Andric "rather than a lighter weight mitigation."), 1000b57cec5SDimitry Andric cl::init(false), cl::Hidden); 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric static cl::opt<bool> HardenInterprocedurally( 1030b57cec5SDimitry Andric PASS_KEY "-ip", 1040b57cec5SDimitry Andric cl::desc("Harden interprocedurally by passing our state in and out of " 1050b57cec5SDimitry Andric "functions in the high bits of the stack pointer."), 1060b57cec5SDimitry Andric cl::init(true), cl::Hidden); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric static cl::opt<bool> 1090b57cec5SDimitry Andric HardenLoads(PASS_KEY "-loads", 1100b57cec5SDimitry Andric cl::desc("Sanitize loads from memory. When disable, no " 1110b57cec5SDimitry Andric "significant security is provided."), 1120b57cec5SDimitry Andric cl::init(true), cl::Hidden); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric static cl::opt<bool> HardenIndirectCallsAndJumps( 1150b57cec5SDimitry Andric PASS_KEY "-indirect", 1160b57cec5SDimitry Andric cl::desc("Harden indirect calls and jumps against using speculatively " 1170b57cec5SDimitry Andric "stored attacker controlled addresses. This is designed to " 1180b57cec5SDimitry Andric "mitigate Spectre v1.2 style attacks."), 1190b57cec5SDimitry Andric cl::init(true), cl::Hidden); 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric namespace { 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric class X86SpeculativeLoadHardeningPass : public MachineFunctionPass { 1240b57cec5SDimitry Andric public: 1250b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { } 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric StringRef getPassName() const override { 1280b57cec5SDimitry Andric return "X86 speculative load hardening"; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 1310b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric /// Pass identification, replacement for typeid. 1340b57cec5SDimitry Andric static char ID; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric private: 1370b57cec5SDimitry Andric /// The information about a block's conditional terminators needed to trace 1380b57cec5SDimitry Andric /// our predicate state through the exiting edges. 1390b57cec5SDimitry Andric struct BlockCondInfo { 1400b57cec5SDimitry Andric MachineBasicBlock *MBB; 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // We mostly have one conditional branch, and in extremely rare cases have 1430b57cec5SDimitry Andric // two. Three and more are so rare as to be unimportant for compile time. 1440b57cec5SDimitry Andric SmallVector<MachineInstr *, 2> CondBrs; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric MachineInstr *UncondBr; 1470b57cec5SDimitry Andric }; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric /// Manages the predicate state traced through the program. 1500b57cec5SDimitry Andric struct PredState { 151480093f4SDimitry Andric unsigned InitialReg = 0; 152480093f4SDimitry Andric unsigned PoisonReg = 0; 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric const TargetRegisterClass *RC; 1550b57cec5SDimitry Andric MachineSSAUpdater SSA; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric PredState(MachineFunction &MF, const TargetRegisterClass *RC) 1580b57cec5SDimitry Andric : RC(RC), SSA(MF) {} 1590b57cec5SDimitry Andric }; 1600b57cec5SDimitry Andric 161480093f4SDimitry Andric const X86Subtarget *Subtarget = nullptr; 162480093f4SDimitry Andric MachineRegisterInfo *MRI = nullptr; 163480093f4SDimitry Andric const X86InstrInfo *TII = nullptr; 164480093f4SDimitry Andric const TargetRegisterInfo *TRI = nullptr; 1650b57cec5SDimitry Andric 166bdd1243dSDimitry Andric std::optional<PredState> PS; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric void hardenEdgesWithLFENCE(MachineFunction &MF); 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF); 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> 1730b57cec5SDimitry Andric tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos); 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric void unfoldCallAndJumpLoads(MachineFunction &MF); 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> 1780b57cec5SDimitry Andric tracePredStateThroughIndirectBranches(MachineFunction &MF); 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric void tracePredStateThroughBlocksAndHarden(MachineFunction &MF); 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric unsigned saveEFLAGS(MachineBasicBlock &MBB, 18381ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, 18481ad6265SDimitry Andric const DebugLoc &Loc); 1850b57cec5SDimitry Andric void restoreEFLAGS(MachineBasicBlock &MBB, 18681ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc, 187e8d8bef9SDimitry Andric Register Reg); 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric void mergePredStateIntoSP(MachineBasicBlock &MBB, 19081ad6265SDimitry Andric MachineBasicBlock::iterator InsertPt, 19181ad6265SDimitry Andric const DebugLoc &Loc, unsigned PredStateReg); 1920b57cec5SDimitry Andric unsigned extractPredStateFromSP(MachineBasicBlock &MBB, 1930b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt, 19481ad6265SDimitry Andric const DebugLoc &Loc); 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric void 1970b57cec5SDimitry Andric hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO, 1980b57cec5SDimitry Andric MachineOperand &IndexMO, 1990b57cec5SDimitry Andric SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg); 2000b57cec5SDimitry Andric MachineInstr * 2010b57cec5SDimitry Andric sinkPostLoadHardenedInst(MachineInstr &MI, 2020b57cec5SDimitry Andric SmallPtrSetImpl<MachineInstr *> &HardenedInstrs); 203e8d8bef9SDimitry Andric bool canHardenRegister(Register Reg); 204e8d8bef9SDimitry Andric unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB, 2050b57cec5SDimitry Andric MachineBasicBlock::iterator InsertPt, 20681ad6265SDimitry Andric const DebugLoc &Loc); 2070b57cec5SDimitry Andric unsigned hardenPostLoad(MachineInstr &MI); 2080b57cec5SDimitry Andric void hardenReturnInstr(MachineInstr &MI); 2090b57cec5SDimitry Andric void tracePredStateThroughCall(MachineInstr &MI); 2100b57cec5SDimitry Andric void hardenIndirectCallOrJumpInstr( 2110b57cec5SDimitry Andric MachineInstr &MI, 2120b57cec5SDimitry Andric SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg); 2130b57cec5SDimitry Andric }; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric } // end anonymous namespace 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric char X86SpeculativeLoadHardeningPass::ID = 0; 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::getAnalysisUsage( 2200b57cec5SDimitry Andric AnalysisUsage &AU) const { 2210b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB, 2250b57cec5SDimitry Andric MachineBasicBlock &Succ, int SuccCount, 2260b57cec5SDimitry Andric MachineInstr *Br, MachineInstr *&UncondBr, 2270b57cec5SDimitry Andric const X86InstrInfo &TII) { 2280b57cec5SDimitry Andric assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!"); 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric // We have to insert the new block immediately after the current one as we 2350b57cec5SDimitry Andric // don't know what layout-successor relationships the successor has and we 2360b57cec5SDimitry Andric // may not be able to (and generally don't want to) try to fix those up. 2370b57cec5SDimitry Andric MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric // Update the branch instruction if necessary. 2400b57cec5SDimitry Andric if (Br) { 2410b57cec5SDimitry Andric assert(Br->getOperand(0).getMBB() == &Succ && 2420b57cec5SDimitry Andric "Didn't start with the right target!"); 2430b57cec5SDimitry Andric Br->getOperand(0).setMBB(&NewMBB); 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric // If this successor was reached through a branch rather than fallthrough, 2460b57cec5SDimitry Andric // we might have *broken* fallthrough and so need to inject a new 2470b57cec5SDimitry Andric // unconditional branch. 2480b57cec5SDimitry Andric if (!UncondBr) { 2490b57cec5SDimitry Andric MachineBasicBlock &OldLayoutSucc = 2500b57cec5SDimitry Andric *std::next(MachineFunction::iterator(&NewMBB)); 2510b57cec5SDimitry Andric assert(MBB.isSuccessor(&OldLayoutSucc) && 2520b57cec5SDimitry Andric "Without an unconditional branch, the old layout successor should " 2530b57cec5SDimitry Andric "be an actual successor!"); 2540b57cec5SDimitry Andric auto BrBuilder = 2550b57cec5SDimitry Andric BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc); 2560b57cec5SDimitry Andric // Update the unconditional branch now that we've added one. 2570b57cec5SDimitry Andric UncondBr = &*BrBuilder; 2580b57cec5SDimitry Andric } 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // Insert unconditional "jump Succ" instruction in the new block if 2610b57cec5SDimitry Andric // necessary. 2620b57cec5SDimitry Andric if (!NewMBB.isLayoutSuccessor(&Succ)) { 2630b57cec5SDimitry Andric SmallVector<MachineOperand, 4> Cond; 2640b57cec5SDimitry Andric TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc()); 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric } else { 2670b57cec5SDimitry Andric assert(!UncondBr && 2680b57cec5SDimitry Andric "Cannot have a branchless successor and an unconditional branch!"); 2690b57cec5SDimitry Andric assert(NewMBB.isLayoutSuccessor(&Succ) && 2700b57cec5SDimitry Andric "A non-branch successor must have been a layout successor before " 2710b57cec5SDimitry Andric "and now is a layout successor of the new block."); 2720b57cec5SDimitry Andric } 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric // If this is the only edge to the successor, we can just replace it in the 2750b57cec5SDimitry Andric // CFG. Otherwise we need to add a new entry in the CFG for the new 2760b57cec5SDimitry Andric // successor. 2770b57cec5SDimitry Andric if (SuccCount == 1) { 2780b57cec5SDimitry Andric MBB.replaceSuccessor(&Succ, &NewMBB); 2790b57cec5SDimitry Andric } else { 2800b57cec5SDimitry Andric MBB.splitSuccessor(&Succ, &NewMBB); 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // Hook up the edge from the new basic block to the old successor in the CFG. 2840b57cec5SDimitry Andric NewMBB.addSuccessor(&Succ); 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB. 2870b57cec5SDimitry Andric for (MachineInstr &MI : Succ) { 2880b57cec5SDimitry Andric if (!MI.isPHI()) 2890b57cec5SDimitry Andric break; 2900b57cec5SDimitry Andric for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 2910b57cec5SDimitry Andric OpIdx += 2) { 2920b57cec5SDimitry Andric MachineOperand &OpV = MI.getOperand(OpIdx); 2930b57cec5SDimitry Andric MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); 2940b57cec5SDimitry Andric assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); 2950b57cec5SDimitry Andric if (OpMBB.getMBB() != &MBB) 2960b57cec5SDimitry Andric continue; 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric // If this is the last edge to the succesor, just replace MBB in the PHI 2990b57cec5SDimitry Andric if (SuccCount == 1) { 3000b57cec5SDimitry Andric OpMBB.setMBB(&NewMBB); 3010b57cec5SDimitry Andric break; 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric // Otherwise, append a new pair of operands for the new incoming edge. 3050b57cec5SDimitry Andric MI.addOperand(MF, OpV); 3060b57cec5SDimitry Andric MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); 3070b57cec5SDimitry Andric break; 3080b57cec5SDimitry Andric } 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric // Inherit live-ins from the successor 3120b57cec5SDimitry Andric for (auto &LI : Succ.liveins()) 3130b57cec5SDimitry Andric NewMBB.addLiveIn(LI); 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '" 3160b57cec5SDimitry Andric << Succ.getName() << "'.\n"); 3170b57cec5SDimitry Andric return NewMBB; 3180b57cec5SDimitry Andric } 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric /// Removing duplicate PHI operands to leave the PHI in a canonical and 3210b57cec5SDimitry Andric /// predictable form. 3220b57cec5SDimitry Andric /// 3230b57cec5SDimitry Andric /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR 3240b57cec5SDimitry Andric /// isn't what you might expect. We may have multiple entries in PHI nodes for 3250b57cec5SDimitry Andric /// a single predecessor. This makes CFG-updating extremely complex, so here we 3260b57cec5SDimitry Andric /// simplify all PHI nodes to a model even simpler than the IR's model: exactly 3270b57cec5SDimitry Andric /// one entry per predecessor, regardless of how many edges there are. 3280b57cec5SDimitry Andric static void canonicalizePHIOperands(MachineFunction &MF) { 3290b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> Preds; 3300b57cec5SDimitry Andric SmallVector<int, 4> DupIndices; 3310b57cec5SDimitry Andric for (auto &MBB : MF) 3320b57cec5SDimitry Andric for (auto &MI : MBB) { 3330b57cec5SDimitry Andric if (!MI.isPHI()) 3340b57cec5SDimitry Andric break; 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric // First we scan the operands of the PHI looking for duplicate entries 3370b57cec5SDimitry Andric // a particular predecessor. We retain the operand index of each duplicate 3380b57cec5SDimitry Andric // entry found. 3390b57cec5SDimitry Andric for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; 3400b57cec5SDimitry Andric OpIdx += 2) 3410b57cec5SDimitry Andric if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second) 3420b57cec5SDimitry Andric DupIndices.push_back(OpIdx); 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric // Now walk the duplicate indices, removing both the block and value. Note 3450b57cec5SDimitry Andric // that these are stored as a vector making this element-wise removal 3460b57cec5SDimitry Andric // :w 3470b57cec5SDimitry Andric // potentially quadratic. 3480b57cec5SDimitry Andric // 3490b57cec5SDimitry Andric // FIXME: It is really frustrating that we have to use a quadratic 3500b57cec5SDimitry Andric // removal algorithm here. There should be a better way, but the use-def 3510b57cec5SDimitry Andric // updates required make that impossible using the public API. 3520b57cec5SDimitry Andric // 3530b57cec5SDimitry Andric // Note that we have to process these backwards so that we don't 3540b57cec5SDimitry Andric // invalidate other indices with each removal. 3550b57cec5SDimitry Andric while (!DupIndices.empty()) { 3560b57cec5SDimitry Andric int OpIdx = DupIndices.pop_back_val(); 3570b57cec5SDimitry Andric // Remove both the block and value operand, again in reverse order to 3580b57cec5SDimitry Andric // preserve indices. 35981ad6265SDimitry Andric MI.removeOperand(OpIdx + 1); 36081ad6265SDimitry Andric MI.removeOperand(OpIdx); 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric Preds.clear(); 3640b57cec5SDimitry Andric } 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric /// Helper to scan a function for loads vulnerable to misspeculation that we 3680b57cec5SDimitry Andric /// want to harden. 3690b57cec5SDimitry Andric /// 3700b57cec5SDimitry Andric /// We use this to avoid making changes to functions where there is nothing we 3710b57cec5SDimitry Andric /// need to do to harden against misspeculation. 3720b57cec5SDimitry Andric static bool hasVulnerableLoad(MachineFunction &MF) { 3730b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 3740b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 3750b57cec5SDimitry Andric // Loads within this basic block after an LFENCE are not at risk of 3760b57cec5SDimitry Andric // speculatively executing with invalid predicates from prior control 3770b57cec5SDimitry Andric // flow. So break out of this block but continue scanning the function. 3780b57cec5SDimitry Andric if (MI.getOpcode() == X86::LFENCE) 3790b57cec5SDimitry Andric break; 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric // Looking for loads only. 3820b57cec5SDimitry Andric if (!MI.mayLoad()) 3830b57cec5SDimitry Andric continue; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric // An MFENCE is modeled as a load but isn't vulnerable to misspeculation. 3860b57cec5SDimitry Andric if (MI.getOpcode() == X86::MFENCE) 3870b57cec5SDimitry Andric continue; 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric // We found a load. 3900b57cec5SDimitry Andric return true; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric // No loads found. 3950b57cec5SDimitry Andric return false; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( 3990b57cec5SDimitry Andric MachineFunction &MF) { 4000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() 4010b57cec5SDimitry Andric << " **********\n"); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // Only run if this pass is forced enabled or we detect the relevant function 4040b57cec5SDimitry Andric // attribute requesting SLH. 4050b57cec5SDimitry Andric if (!EnableSpeculativeLoadHardening && 4060b57cec5SDimitry Andric !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) 4070b57cec5SDimitry Andric return false; 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric Subtarget = &MF.getSubtarget<X86Subtarget>(); 4100b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 4110b57cec5SDimitry Andric TII = Subtarget->getInstrInfo(); 4120b57cec5SDimitry Andric TRI = Subtarget->getRegisterInfo(); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric // FIXME: Support for 32-bit. 4150b57cec5SDimitry Andric PS.emplace(MF, &X86::GR64_NOSPRegClass); 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric if (MF.begin() == MF.end()) 4180b57cec5SDimitry Andric // Nothing to do for a degenerate empty function... 4190b57cec5SDimitry Andric return false; 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric // We support an alternative hardening technique based on a debug flag. 4220b57cec5SDimitry Andric if (HardenEdgesWithLFENCE) { 4230b57cec5SDimitry Andric hardenEdgesWithLFENCE(MF); 4240b57cec5SDimitry Andric return true; 4250b57cec5SDimitry Andric } 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // Create a dummy debug loc to use for all the generated code here. 4280b57cec5SDimitry Andric DebugLoc Loc; 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric MachineBasicBlock &Entry = *MF.begin(); 4310b57cec5SDimitry Andric auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin()); 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric // Do a quick scan to see if we have any checkable loads. 4340b57cec5SDimitry Andric bool HasVulnerableLoad = hasVulnerableLoad(MF); 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric // See if we have any conditional branching blocks that we will need to trace 4370b57cec5SDimitry Andric // predicate state through. 4380b57cec5SDimitry Andric SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF); 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric // If we have no interesting conditions or loads, nothing to do here. 4410b57cec5SDimitry Andric if (!HasVulnerableLoad && Infos.empty()) 4420b57cec5SDimitry Andric return true; 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric // The poison value is required to be an all-ones value for many aspects of 4450b57cec5SDimitry Andric // this mitigation. 4460b57cec5SDimitry Andric const int PoisonVal = -1; 4470b57cec5SDimitry Andric PS->PoisonReg = MRI->createVirtualRegister(PS->RC); 4480b57cec5SDimitry Andric BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg) 4490b57cec5SDimitry Andric .addImm(PoisonVal); 4500b57cec5SDimitry Andric ++NumInstsInserted; 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // If we have loads being hardened and we've asked for call and ret edges to 4530b57cec5SDimitry Andric // get a full fence-based mitigation, inject that fence. 4540b57cec5SDimitry Andric if (HasVulnerableLoad && FenceCallAndRet) { 4550b57cec5SDimitry Andric // We need to insert an LFENCE at the start of the function to suspend any 4560b57cec5SDimitry Andric // incoming misspeculation from the caller. This helps two-fold: the caller 4570b57cec5SDimitry Andric // may not have been protected as this code has been, and this code gets to 4580b57cec5SDimitry Andric // not take any specific action to protect across calls. 4590b57cec5SDimitry Andric // FIXME: We could skip this for functions which unconditionally return 4600b57cec5SDimitry Andric // a constant. 4610b57cec5SDimitry Andric BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE)); 4620b57cec5SDimitry Andric ++NumInstsInserted; 4630b57cec5SDimitry Andric ++NumLFENCEsInserted; 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric // If we guarded the entry with an LFENCE and have no conditionals to protect 4670b57cec5SDimitry Andric // in blocks, then we're done. 4680b57cec5SDimitry Andric if (FenceCallAndRet && Infos.empty()) 4690b57cec5SDimitry Andric // We may have changed the function's code at this point to insert fences. 4700b57cec5SDimitry Andric return true; 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric // For every basic block in the function which can b 4730b57cec5SDimitry Andric if (HardenInterprocedurally && !FenceCallAndRet) { 4740b57cec5SDimitry Andric // Set up the predicate state by extracting it from the incoming stack 4750b57cec5SDimitry Andric // pointer so we pick up any misspeculation in our caller. 4760b57cec5SDimitry Andric PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc); 4770b57cec5SDimitry Andric } else { 4780b57cec5SDimitry Andric // Otherwise, just build the predicate state itself by zeroing a register 4790b57cec5SDimitry Andric // as we don't need any initial state. 4800b57cec5SDimitry Andric PS->InitialReg = MRI->createVirtualRegister(PS->RC); 4818bcb0991SDimitry Andric Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); 4820b57cec5SDimitry Andric auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0), 4830b57cec5SDimitry Andric PredStateSubReg); 4840b57cec5SDimitry Andric ++NumInstsInserted; 4850b57cec5SDimitry Andric MachineOperand *ZeroEFLAGSDefOp = 486*0fca6ea1SDimitry Andric ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr); 4870b57cec5SDimitry Andric assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && 4880b57cec5SDimitry Andric "Must have an implicit def of EFLAGS!"); 4890b57cec5SDimitry Andric ZeroEFLAGSDefOp->setIsDead(true); 4900b57cec5SDimitry Andric BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG), 4910b57cec5SDimitry Andric PS->InitialReg) 4920b57cec5SDimitry Andric .addImm(0) 4930b57cec5SDimitry Andric .addReg(PredStateSubReg) 4940b57cec5SDimitry Andric .addImm(X86::sub_32bit); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric // We're going to need to trace predicate state throughout the function's 4980b57cec5SDimitry Andric // CFG. Prepare for this by setting up our initial state of PHIs with unique 4990b57cec5SDimitry Andric // predecessor entries and all the initial predicate state. 5000b57cec5SDimitry Andric canonicalizePHIOperands(MF); 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Track the updated values in an SSA updater to rewrite into SSA form at the 5030b57cec5SDimitry Andric // end. 5040b57cec5SDimitry Andric PS->SSA.Initialize(PS->InitialReg); 5050b57cec5SDimitry Andric PS->SSA.AddAvailableValue(&Entry, PS->InitialReg); 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric // Trace through the CFG. 5080b57cec5SDimitry Andric auto CMovs = tracePredStateThroughCFG(MF, Infos); 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric // We may also enter basic blocks in this function via exception handling 5110b57cec5SDimitry Andric // control flow. Here, if we are hardening interprocedurally, we need to 5120b57cec5SDimitry Andric // re-capture the predicate state from the throwing code. In the Itanium ABI, 5130b57cec5SDimitry Andric // the throw will always look like a call to __cxa_throw and will have the 5140b57cec5SDimitry Andric // predicate state in the stack pointer, so extract fresh predicate state from 5150b57cec5SDimitry Andric // the stack pointer and make it available in SSA. 5160b57cec5SDimitry Andric // FIXME: Handle non-itanium ABI EH models. 5170b57cec5SDimitry Andric if (HardenInterprocedurally) { 5180b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 5190b57cec5SDimitry Andric assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!"); 5200b57cec5SDimitry Andric assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!"); 5210b57cec5SDimitry Andric assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!"); 5220b57cec5SDimitry Andric if (!MBB.isEHPad()) 5230b57cec5SDimitry Andric continue; 5240b57cec5SDimitry Andric PS->SSA.AddAvailableValue( 5250b57cec5SDimitry Andric &MBB, 5260b57cec5SDimitry Andric extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc)); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric if (HardenIndirectCallsAndJumps) { 5310b57cec5SDimitry Andric // If we are going to harden calls and jumps we need to unfold their memory 5320b57cec5SDimitry Andric // operands. 5330b57cec5SDimitry Andric unfoldCallAndJumpLoads(MF); 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Then we trace predicate state through the indirect branches. 5360b57cec5SDimitry Andric auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF); 5370b57cec5SDimitry Andric CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end()); 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric // Now that we have the predicate state available at the start of each block 5410b57cec5SDimitry Andric // in the CFG, trace it through each block, hardening vulnerable instructions 5420b57cec5SDimitry Andric // as we go. 5430b57cec5SDimitry Andric tracePredStateThroughBlocksAndHarden(MF); 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric // Now rewrite all the uses of the pred state using the SSA updater to insert 5460b57cec5SDimitry Andric // PHIs connecting the state between blocks along the CFG edges. 5470b57cec5SDimitry Andric for (MachineInstr *CMovI : CMovs) 5480b57cec5SDimitry Andric for (MachineOperand &Op : CMovI->operands()) { 5490b57cec5SDimitry Andric if (!Op.isReg() || Op.getReg() != PS->InitialReg) 5500b57cec5SDimitry Andric continue; 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric PS->SSA.RewriteUse(Op); 5530b57cec5SDimitry Andric } 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump(); 5560b57cec5SDimitry Andric dbgs() << "\n"; MF.verify(this)); 5570b57cec5SDimitry Andric return true; 5580b57cec5SDimitry Andric } 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric /// Implements the naive hardening approach of putting an LFENCE after every 5610b57cec5SDimitry Andric /// potentially mis-predicted control flow construct. 5620b57cec5SDimitry Andric /// 5630b57cec5SDimitry Andric /// We include this as an alternative mostly for the purpose of comparison. The 5640b57cec5SDimitry Andric /// performance impact of this is expected to be extremely severe and not 5650b57cec5SDimitry Andric /// practical for any real-world users. 5660b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE( 5670b57cec5SDimitry Andric MachineFunction &MF) { 5680b57cec5SDimitry Andric // First, we scan the function looking for blocks that are reached along edges 5690b57cec5SDimitry Andric // that we might want to harden. 5700b57cec5SDimitry Andric SmallSetVector<MachineBasicBlock *, 8> Blocks; 5710b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 5720b57cec5SDimitry Andric // If there are no or only one successor, nothing to do here. 5730b57cec5SDimitry Andric if (MBB.succ_size() <= 1) 5740b57cec5SDimitry Andric continue; 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric // Skip blocks unless their terminators start with a branch. Other 5770b57cec5SDimitry Andric // terminators don't seem interesting for guarding against misspeculation. 5780b57cec5SDimitry Andric auto TermIt = MBB.getFirstTerminator(); 5790b57cec5SDimitry Andric if (TermIt == MBB.end() || !TermIt->isBranch()) 5800b57cec5SDimitry Andric continue; 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric // Add all the non-EH-pad succossors to the blocks we want to harden. We 5830b57cec5SDimitry Andric // skip EH pads because there isn't really a condition of interest on 5840b57cec5SDimitry Andric // entering. 5850b57cec5SDimitry Andric for (MachineBasicBlock *SuccMBB : MBB.successors()) 5860b57cec5SDimitry Andric if (!SuccMBB->isEHPad()) 5870b57cec5SDimitry Andric Blocks.insert(SuccMBB); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric for (MachineBasicBlock *MBB : Blocks) { 5910b57cec5SDimitry Andric auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin()); 5920b57cec5SDimitry Andric BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE)); 5930b57cec5SDimitry Andric ++NumInstsInserted; 5940b57cec5SDimitry Andric ++NumLFENCEsInserted; 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16> 5990b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) { 6000b57cec5SDimitry Andric SmallVector<BlockCondInfo, 16> Infos; 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric // Walk the function and build up a summary for each block's conditions that 6030b57cec5SDimitry Andric // we need to trace through. 6040b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 6050b57cec5SDimitry Andric // If there are no or only one successor, nothing to do here. 6060b57cec5SDimitry Andric if (MBB.succ_size() <= 1) 6070b57cec5SDimitry Andric continue; 6080b57cec5SDimitry Andric 6090b57cec5SDimitry Andric // We want to reliably handle any conditional branch terminators in the 6100b57cec5SDimitry Andric // MBB, so we manually analyze the branch. We can handle all of the 6110b57cec5SDimitry Andric // permutations here, including ones that analyze branch cannot. 6120b57cec5SDimitry Andric // 6130b57cec5SDimitry Andric // The approach is to walk backwards across the terminators, resetting at 6140b57cec5SDimitry Andric // any unconditional non-indirect branch, and track all conditional edges 6150b57cec5SDimitry Andric // to basic blocks as well as the fallthrough or unconditional successor 6160b57cec5SDimitry Andric // edge. For each conditional edge, we track the target and the opposite 6170b57cec5SDimitry Andric // condition code in order to inject a "no-op" cmov into that successor 6180b57cec5SDimitry Andric // that will harden the predicate. For the fallthrough/unconditional 6190b57cec5SDimitry Andric // edge, we inject a separate cmov for each conditional branch with 6200b57cec5SDimitry Andric // matching condition codes. This effectively implements an "and" of the 6210b57cec5SDimitry Andric // condition flags, even if there isn't a single condition flag that would 6220b57cec5SDimitry Andric // directly implement that. We don't bother trying to optimize either of 6230b57cec5SDimitry Andric // these cases because if such an optimization is possible, LLVM should 6240b57cec5SDimitry Andric // have optimized the conditional *branches* in that way already to reduce 6250b57cec5SDimitry Andric // instruction count. This late, we simply assume the minimal number of 6260b57cec5SDimitry Andric // branch instructions is being emitted and use that to guide our cmov 6270b57cec5SDimitry Andric // insertion. 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric BlockCondInfo Info = {&MBB, {}, nullptr}; 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric // Now walk backwards through the terminators and build up successors they 6320b57cec5SDimitry Andric // reach and the conditions. 6330b57cec5SDimitry Andric for (MachineInstr &MI : llvm::reverse(MBB)) { 6340b57cec5SDimitry Andric // Once we've handled all the terminators, we're done. 6350b57cec5SDimitry Andric if (!MI.isTerminator()) 6360b57cec5SDimitry Andric break; 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric // If we see a non-branch terminator, we can't handle anything so bail. 6390b57cec5SDimitry Andric if (!MI.isBranch()) { 6400b57cec5SDimitry Andric Info.CondBrs.clear(); 6410b57cec5SDimitry Andric break; 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric // If we see an unconditional branch, reset our state, clear any 6450b57cec5SDimitry Andric // fallthrough, and set this is the "else" successor. 6460b57cec5SDimitry Andric if (MI.getOpcode() == X86::JMP_1) { 6470b57cec5SDimitry Andric Info.CondBrs.clear(); 6480b57cec5SDimitry Andric Info.UncondBr = &MI; 6490b57cec5SDimitry Andric continue; 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric // If we get an invalid condition, we have an indirect branch or some 6530b57cec5SDimitry Andric // other unanalyzable "fallthrough" case. We model this as a nullptr for 6540b57cec5SDimitry Andric // the destination so we can still guard any conditional successors. 6550b57cec5SDimitry Andric // Consider code sequences like: 6560b57cec5SDimitry Andric // ``` 6570b57cec5SDimitry Andric // jCC L1 6580b57cec5SDimitry Andric // jmpq *%rax 6590b57cec5SDimitry Andric // ``` 6600b57cec5SDimitry Andric // We still want to harden the edge to `L1`. 6610b57cec5SDimitry Andric if (X86::getCondFromBranch(MI) == X86::COND_INVALID) { 6620b57cec5SDimitry Andric Info.CondBrs.clear(); 6630b57cec5SDimitry Andric Info.UncondBr = &MI; 6640b57cec5SDimitry Andric continue; 6650b57cec5SDimitry Andric } 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric // We have a vanilla conditional branch, add it to our list. 6680b57cec5SDimitry Andric Info.CondBrs.push_back(&MI); 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric if (Info.CondBrs.empty()) { 6710b57cec5SDimitry Andric ++NumBranchesUntraced; 6720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n"; 6730b57cec5SDimitry Andric MBB.dump()); 6740b57cec5SDimitry Andric continue; 6750b57cec5SDimitry Andric } 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric Infos.push_back(Info); 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric return Infos; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric /// Trace the predicate state through the CFG, instrumenting each conditional 6840b57cec5SDimitry Andric /// branch such that misspeculation through an edge will poison the predicate 6850b57cec5SDimitry Andric /// state. 6860b57cec5SDimitry Andric /// 6870b57cec5SDimitry Andric /// Returns the list of inserted CMov instructions so that they can have their 6880b57cec5SDimitry Andric /// uses of the predicate state rewritten into proper SSA form once it is 6890b57cec5SDimitry Andric /// complete. 6900b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> 6910b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( 6920b57cec5SDimitry Andric MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) { 6930b57cec5SDimitry Andric // Collect the inserted cmov instructions so we can rewrite their uses of the 6940b57cec5SDimitry Andric // predicate state into SSA form. 6950b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> CMovs; 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric // Now walk all of the basic blocks looking for ones that end in conditional 6980b57cec5SDimitry Andric // jumps where we need to update this register along each edge. 6990b57cec5SDimitry Andric for (const BlockCondInfo &Info : Infos) { 7000b57cec5SDimitry Andric MachineBasicBlock &MBB = *Info.MBB; 7010b57cec5SDimitry Andric const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs; 7020b57cec5SDimitry Andric MachineInstr *UncondBr = Info.UncondBr; 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName() 7050b57cec5SDimitry Andric << "\n"); 7060b57cec5SDimitry Andric ++NumCondBranchesTraced; 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric // Compute the non-conditional successor as either the target of any 7090b57cec5SDimitry Andric // unconditional branch or the layout successor. 7100b57cec5SDimitry Andric MachineBasicBlock *UncondSucc = 7110b57cec5SDimitry Andric UncondBr ? (UncondBr->getOpcode() == X86::JMP_1 7120b57cec5SDimitry Andric ? UncondBr->getOperand(0).getMBB() 7130b57cec5SDimitry Andric : nullptr) 7140b57cec5SDimitry Andric : &*std::next(MachineFunction::iterator(&MBB)); 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric // Count how many edges there are to any given successor. 7170b57cec5SDimitry Andric SmallDenseMap<MachineBasicBlock *, int> SuccCounts; 7180b57cec5SDimitry Andric if (UncondSucc) 7190b57cec5SDimitry Andric ++SuccCounts[UncondSucc]; 7200b57cec5SDimitry Andric for (auto *CondBr : CondBrs) 7210b57cec5SDimitry Andric ++SuccCounts[CondBr->getOperand(0).getMBB()]; 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric // A lambda to insert cmov instructions into a block checking all of the 7240b57cec5SDimitry Andric // condition codes in a sequence. 7250b57cec5SDimitry Andric auto BuildCheckingBlockForSuccAndConds = 7260b57cec5SDimitry Andric [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, 7270b57cec5SDimitry Andric MachineInstr *Br, MachineInstr *&UncondBr, 7280b57cec5SDimitry Andric ArrayRef<X86::CondCode> Conds) { 7290b57cec5SDimitry Andric // First, we split the edge to insert the checking block into a safe 7300b57cec5SDimitry Andric // location. 7310b57cec5SDimitry Andric auto &CheckingMBB = 7320b57cec5SDimitry Andric (SuccCount == 1 && Succ.pred_size() == 1) 7330b57cec5SDimitry Andric ? Succ 7340b57cec5SDimitry Andric : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII); 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS); 7370b57cec5SDimitry Andric if (!LiveEFLAGS) 7380b57cec5SDimitry Andric CheckingMBB.addLiveIn(X86::EFLAGS); 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric // Now insert the cmovs to implement the checks. 7410b57cec5SDimitry Andric auto InsertPt = CheckingMBB.begin(); 7420b57cec5SDimitry Andric assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && 7430b57cec5SDimitry Andric "Should never have a PHI in the initial checking block as it " 7440b57cec5SDimitry Andric "always has a single predecessor!"); 7450b57cec5SDimitry Andric 7460b57cec5SDimitry Andric // We will wire each cmov to each other, but need to start with the 7470b57cec5SDimitry Andric // incoming pred state. 7480b57cec5SDimitry Andric unsigned CurStateReg = PS->InitialReg; 7490b57cec5SDimitry Andric 7500b57cec5SDimitry Andric for (X86::CondCode Cond : Conds) { 7510b57cec5SDimitry Andric int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; 7520b57cec5SDimitry Andric auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); 7530b57cec5SDimitry Andric 7548bcb0991SDimitry Andric Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); 7550b57cec5SDimitry Andric // Note that we intentionally use an empty debug location so that 7560b57cec5SDimitry Andric // this picks up the preceding location. 7570b57cec5SDimitry Andric auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), 7580b57cec5SDimitry Andric TII->get(CMovOp), UpdatedStateReg) 7590b57cec5SDimitry Andric .addReg(CurStateReg) 7600b57cec5SDimitry Andric .addReg(PS->PoisonReg) 7610b57cec5SDimitry Andric .addImm(Cond); 7620b57cec5SDimitry Andric // If this is the last cmov and the EFLAGS weren't originally 7630b57cec5SDimitry Andric // live-in, mark them as killed. 7640b57cec5SDimitry Andric if (!LiveEFLAGS && Cond == Conds.back()) 765*0fca6ea1SDimitry Andric CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr) 766*0fca6ea1SDimitry Andric ->setIsKill(true); 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric ++NumInstsInserted; 7690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); 7700b57cec5SDimitry Andric dbgs() << "\n"); 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric // The first one of the cmovs will be using the top level 7730b57cec5SDimitry Andric // `PredStateReg` and need to get rewritten into SSA form. 7740b57cec5SDimitry Andric if (CurStateReg == PS->InitialReg) 7750b57cec5SDimitry Andric CMovs.push_back(&*CMovI); 7760b57cec5SDimitry Andric 7770b57cec5SDimitry Andric // The next cmov should start from this one's def. 7780b57cec5SDimitry Andric CurStateReg = UpdatedStateReg; 7790b57cec5SDimitry Andric } 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric // And put the last one into the available values for SSA form of our 7820b57cec5SDimitry Andric // predicate state. 7830b57cec5SDimitry Andric PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg); 7840b57cec5SDimitry Andric }; 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric std::vector<X86::CondCode> UncondCodeSeq; 7870b57cec5SDimitry Andric for (auto *CondBr : CondBrs) { 7880b57cec5SDimitry Andric MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB(); 7890b57cec5SDimitry Andric int &SuccCount = SuccCounts[&Succ]; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric X86::CondCode Cond = X86::getCondFromBranch(*CondBr); 7920b57cec5SDimitry Andric X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond); 7930b57cec5SDimitry Andric UncondCodeSeq.push_back(Cond); 7940b57cec5SDimitry Andric 7950b57cec5SDimitry Andric BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr, 7960b57cec5SDimitry Andric {InvCond}); 7970b57cec5SDimitry Andric 7980b57cec5SDimitry Andric // Decrement the successor count now that we've split one of the edges. 7990b57cec5SDimitry Andric // We need to keep the count of edges to the successor accurate in order 8000b57cec5SDimitry Andric // to know above when to *replace* the successor in the CFG vs. just 8010b57cec5SDimitry Andric // adding the new successor. 8020b57cec5SDimitry Andric --SuccCount; 8030b57cec5SDimitry Andric } 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // Since we may have split edges and changed the number of successors, 8060b57cec5SDimitry Andric // normalize the probabilities. This avoids doing it each time we split an 8070b57cec5SDimitry Andric // edge. 8080b57cec5SDimitry Andric MBB.normalizeSuccProbs(); 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we 8110b57cec5SDimitry Andric // need to intersect the other condition codes. We can do this by just 8120b57cec5SDimitry Andric // doing a cmov for each one. 8130b57cec5SDimitry Andric if (!UncondSucc) 8140b57cec5SDimitry Andric // If we have no fallthrough to protect (perhaps it is an indirect jump?) 8150b57cec5SDimitry Andric // just skip this and continue. 8160b57cec5SDimitry Andric continue; 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric assert(SuccCounts[UncondSucc] == 1 && 8190b57cec5SDimitry Andric "We should never have more than one edge to the unconditional " 8200b57cec5SDimitry Andric "successor at this point because every other edge must have been " 8210b57cec5SDimitry Andric "split above!"); 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric // Sort and unique the codes to minimize them. 8240b57cec5SDimitry Andric llvm::sort(UncondCodeSeq); 825*0fca6ea1SDimitry Andric UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end()); 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric // Build a checking version of the successor. 8280b57cec5SDimitry Andric BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1, 8290b57cec5SDimitry Andric UncondBr, UncondBr, UncondCodeSeq); 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric return CMovs; 8330b57cec5SDimitry Andric } 8340b57cec5SDimitry Andric 8350b57cec5SDimitry Andric /// Compute the register class for the unfolded load. 8360b57cec5SDimitry Andric /// 8370b57cec5SDimitry Andric /// FIXME: This should probably live in X86InstrInfo, potentially by adding 8380b57cec5SDimitry Andric /// a way to unfold into a newly created vreg rather than requiring a register 8390b57cec5SDimitry Andric /// input. 8400b57cec5SDimitry Andric static const TargetRegisterClass * 8410b57cec5SDimitry Andric getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, 8420b57cec5SDimitry Andric unsigned Opcode) { 8430b57cec5SDimitry Andric unsigned Index; 8440b57cec5SDimitry Andric unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold( 8450b57cec5SDimitry Andric Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index); 8460b57cec5SDimitry Andric const MCInstrDesc &MCID = TII.get(UnfoldedOpc); 8470b57cec5SDimitry Andric return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF); 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( 8510b57cec5SDimitry Andric MachineFunction &MF) { 8520b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) 853349cc55cSDimitry Andric // We use make_early_inc_range here so we can remove instructions if needed 854349cc55cSDimitry Andric // without disturbing the iteration. 855349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) { 8560b57cec5SDimitry Andric // Must either be a call or a branch. 8570b57cec5SDimitry Andric if (!MI.isCall() && !MI.isBranch()) 8580b57cec5SDimitry Andric continue; 8590b57cec5SDimitry Andric // We only care about loading variants of these instructions. 8600b57cec5SDimitry Andric if (!MI.mayLoad()) 8610b57cec5SDimitry Andric continue; 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric switch (MI.getOpcode()) { 8640b57cec5SDimitry Andric default: { 8650b57cec5SDimitry Andric LLVM_DEBUG( 8660b57cec5SDimitry Andric dbgs() << "ERROR: Found an unexpected loading branch or call " 8670b57cec5SDimitry Andric "instruction:\n"; 8680b57cec5SDimitry Andric MI.dump(); dbgs() << "\n"); 8690b57cec5SDimitry Andric report_fatal_error("Unexpected loading branch or call!"); 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric case X86::FARCALL16m: 8730b57cec5SDimitry Andric case X86::FARCALL32m: 8745ffd83dbSDimitry Andric case X86::FARCALL64m: 8750b57cec5SDimitry Andric case X86::FARJMP16m: 8760b57cec5SDimitry Andric case X86::FARJMP32m: 8775ffd83dbSDimitry Andric case X86::FARJMP64m: 8780b57cec5SDimitry Andric // We cannot mitigate far jumps or calls, but we also don't expect them 8790b57cec5SDimitry Andric // to be vulnerable to Spectre v1.2 style attacks. 8800b57cec5SDimitry Andric continue; 8810b57cec5SDimitry Andric 8820b57cec5SDimitry Andric case X86::CALL16m: 8830b57cec5SDimitry Andric case X86::CALL16m_NT: 8840b57cec5SDimitry Andric case X86::CALL32m: 8850b57cec5SDimitry Andric case X86::CALL32m_NT: 8860b57cec5SDimitry Andric case X86::CALL64m: 8870b57cec5SDimitry Andric case X86::CALL64m_NT: 8880b57cec5SDimitry Andric case X86::JMP16m: 8890b57cec5SDimitry Andric case X86::JMP16m_NT: 8900b57cec5SDimitry Andric case X86::JMP32m: 8910b57cec5SDimitry Andric case X86::JMP32m_NT: 8920b57cec5SDimitry Andric case X86::JMP64m: 8930b57cec5SDimitry Andric case X86::JMP64m_NT: 8940b57cec5SDimitry Andric case X86::TAILJMPm64: 8950b57cec5SDimitry Andric case X86::TAILJMPm64_REX: 8960b57cec5SDimitry Andric case X86::TAILJMPm: 8970b57cec5SDimitry Andric case X86::TCRETURNmi64: 8980b57cec5SDimitry Andric case X86::TCRETURNmi: { 8990b57cec5SDimitry Andric // Use the generic unfold logic now that we know we're dealing with 9000b57cec5SDimitry Andric // expected instructions. 9010b57cec5SDimitry Andric // FIXME: We don't have test coverage for all of these! 9020b57cec5SDimitry Andric auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode()); 9030b57cec5SDimitry Andric if (!UnfoldedRC) { 9040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 9050b57cec5SDimitry Andric << "ERROR: Unable to unfold load from instruction:\n"; 9060b57cec5SDimitry Andric MI.dump(); dbgs() << "\n"); 9070b57cec5SDimitry Andric report_fatal_error("Unable to unfold load!"); 9080b57cec5SDimitry Andric } 9098bcb0991SDimitry Andric Register Reg = MRI->createVirtualRegister(UnfoldedRC); 9100b57cec5SDimitry Andric SmallVector<MachineInstr *, 2> NewMIs; 9110b57cec5SDimitry Andric // If we were able to compute an unfolded reg class, any failure here 9120b57cec5SDimitry Andric // is just a programming error so just assert. 9130b57cec5SDimitry Andric bool Unfolded = 9140b57cec5SDimitry Andric TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true, 9150b57cec5SDimitry Andric /*UnfoldStore*/ false, NewMIs); 9160b57cec5SDimitry Andric (void)Unfolded; 9170b57cec5SDimitry Andric assert(Unfolded && 9180b57cec5SDimitry Andric "Computed unfolded register class but failed to unfold"); 9190b57cec5SDimitry Andric // Now stitch the new instructions into place and erase the old one. 9200b57cec5SDimitry Andric for (auto *NewMI : NewMIs) 9210b57cec5SDimitry Andric MBB.insert(MI.getIterator(), NewMI); 9225ffd83dbSDimitry Andric 9235ffd83dbSDimitry Andric // Update the call site info. 9245ffd83dbSDimitry Andric if (MI.isCandidateForCallSiteEntry()) 9255ffd83dbSDimitry Andric MF.eraseCallSiteInfo(&MI); 9265ffd83dbSDimitry Andric 9270b57cec5SDimitry Andric MI.eraseFromParent(); 9280b57cec5SDimitry Andric LLVM_DEBUG({ 9290b57cec5SDimitry Andric dbgs() << "Unfolded load successfully into:\n"; 9300b57cec5SDimitry Andric for (auto *NewMI : NewMIs) { 9310b57cec5SDimitry Andric NewMI->dump(); 9320b57cec5SDimitry Andric dbgs() << "\n"; 9330b57cec5SDimitry Andric } 9340b57cec5SDimitry Andric }); 9350b57cec5SDimitry Andric continue; 9360b57cec5SDimitry Andric } 9370b57cec5SDimitry Andric } 9380b57cec5SDimitry Andric llvm_unreachable("Escaped switch with default!"); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric } 9410b57cec5SDimitry Andric 9420b57cec5SDimitry Andric /// Trace the predicate state through indirect branches, instrumenting them to 9430b57cec5SDimitry Andric /// poison the state if a target is reached that does not match the expected 9440b57cec5SDimitry Andric /// target. 9450b57cec5SDimitry Andric /// 9460b57cec5SDimitry Andric /// This is designed to mitigate Spectre variant 1 attacks where an indirect 9470b57cec5SDimitry Andric /// branch is trained to predict a particular target and then mispredicts that 9480b57cec5SDimitry Andric /// target in a way that can leak data. Despite using an indirect branch, this 9490b57cec5SDimitry Andric /// is really a variant 1 style attack: it does not steer execution to an 9500b57cec5SDimitry Andric /// arbitrary or attacker controlled address, and it does not require any 9510b57cec5SDimitry Andric /// special code executing next to the victim. This attack can also be mitigated 9520b57cec5SDimitry Andric /// through retpolines, but those require either replacing indirect branches 9530b57cec5SDimitry Andric /// with conditional direct branches or lowering them through a device that 9540b57cec5SDimitry Andric /// blocks speculation. This mitigation can replace these retpoline-style 9550b57cec5SDimitry Andric /// mitigations for jump tables and other indirect branches within a function 9560b57cec5SDimitry Andric /// when variant 2 isn't a risk while allowing limited speculation. Indirect 9570b57cec5SDimitry Andric /// calls, however, cannot be mitigated through this technique without changing 9580b57cec5SDimitry Andric /// the ABI in a fundamental way. 9590b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> 9600b57cec5SDimitry Andric X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( 9610b57cec5SDimitry Andric MachineFunction &MF) { 9620b57cec5SDimitry Andric // We use the SSAUpdater to insert PHI nodes for the target addresses of 9630b57cec5SDimitry Andric // indirect branches. We don't actually need the full power of the SSA updater 9640b57cec5SDimitry Andric // in this particular case as we always have immediately available values, but 9650b57cec5SDimitry Andric // this avoids us having to re-implement the PHI construction logic. 9660b57cec5SDimitry Andric MachineSSAUpdater TargetAddrSSA(MF); 9670b57cec5SDimitry Andric TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass)); 9680b57cec5SDimitry Andric 9690b57cec5SDimitry Andric // Track which blocks were terminated with an indirect branch. 9700b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs; 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric // We need to know what blocks end up reached via indirect branches. We 9730b57cec5SDimitry Andric // expect this to be a subset of those whose address is taken and so track it 9740b57cec5SDimitry Andric // directly via the CFG. 9750b57cec5SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs; 9760b57cec5SDimitry Andric 9770b57cec5SDimitry Andric // Walk all the blocks which end in an indirect branch and make the 9780b57cec5SDimitry Andric // target address available. 9790b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 9800b57cec5SDimitry Andric // Find the last terminator. 9810b57cec5SDimitry Andric auto MII = MBB.instr_rbegin(); 9820b57cec5SDimitry Andric while (MII != MBB.instr_rend() && MII->isDebugInstr()) 9830b57cec5SDimitry Andric ++MII; 9840b57cec5SDimitry Andric if (MII == MBB.instr_rend()) 9850b57cec5SDimitry Andric continue; 9860b57cec5SDimitry Andric MachineInstr &TI = *MII; 9870b57cec5SDimitry Andric if (!TI.isTerminator() || !TI.isBranch()) 9880b57cec5SDimitry Andric // No terminator or non-branch terminator. 9890b57cec5SDimitry Andric continue; 9900b57cec5SDimitry Andric 9910b57cec5SDimitry Andric unsigned TargetReg; 9920b57cec5SDimitry Andric 9930b57cec5SDimitry Andric switch (TI.getOpcode()) { 9940b57cec5SDimitry Andric default: 9950b57cec5SDimitry Andric // Direct branch or conditional branch (leading to fallthrough). 9960b57cec5SDimitry Andric continue; 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric case X86::FARJMP16m: 9990b57cec5SDimitry Andric case X86::FARJMP32m: 10005ffd83dbSDimitry Andric case X86::FARJMP64m: 10010b57cec5SDimitry Andric // We cannot mitigate far jumps or calls, but we also don't expect them 10020b57cec5SDimitry Andric // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks. 10030b57cec5SDimitry Andric continue; 10040b57cec5SDimitry Andric 10050b57cec5SDimitry Andric case X86::JMP16m: 10060b57cec5SDimitry Andric case X86::JMP16m_NT: 10070b57cec5SDimitry Andric case X86::JMP32m: 10080b57cec5SDimitry Andric case X86::JMP32m_NT: 10090b57cec5SDimitry Andric case X86::JMP64m: 10100b57cec5SDimitry Andric case X86::JMP64m_NT: 10110b57cec5SDimitry Andric // Mostly as documentation. 10120b57cec5SDimitry Andric report_fatal_error("Memory operand jumps should have been unfolded!"); 10130b57cec5SDimitry Andric 10140b57cec5SDimitry Andric case X86::JMP16r: 10150b57cec5SDimitry Andric report_fatal_error( 10160b57cec5SDimitry Andric "Support for 16-bit indirect branches is not implemented."); 10170b57cec5SDimitry Andric case X86::JMP32r: 10180b57cec5SDimitry Andric report_fatal_error( 10190b57cec5SDimitry Andric "Support for 32-bit indirect branches is not implemented."); 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric case X86::JMP64r: 10220b57cec5SDimitry Andric TargetReg = TI.getOperand(0).getReg(); 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric // We have definitely found an indirect branch. Verify that there are no 10260b57cec5SDimitry Andric // preceding conditional branches as we don't yet support that. 10270b57cec5SDimitry Andric if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) { 10280b57cec5SDimitry Andric return !OtherTI.isDebugInstr() && &OtherTI != &TI; 10290b57cec5SDimitry Andric })) { 10300b57cec5SDimitry Andric LLVM_DEBUG({ 10310b57cec5SDimitry Andric dbgs() << "ERROR: Found other terminators in a block with an indirect " 10320b57cec5SDimitry Andric "branch! This is not yet supported! Terminator sequence:\n"; 10330b57cec5SDimitry Andric for (MachineInstr &MI : MBB.terminators()) { 10340b57cec5SDimitry Andric MI.dump(); 10350b57cec5SDimitry Andric dbgs() << '\n'; 10360b57cec5SDimitry Andric } 10370b57cec5SDimitry Andric }); 10380b57cec5SDimitry Andric report_fatal_error("Unimplemented terminator sequence!"); 10390b57cec5SDimitry Andric } 10400b57cec5SDimitry Andric 10410b57cec5SDimitry Andric // Make the target register an available value for this block. 10420b57cec5SDimitry Andric TargetAddrSSA.AddAvailableValue(&MBB, TargetReg); 10430b57cec5SDimitry Andric IndirectTerminatedMBBs.insert(&MBB); 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric // Add all the successors to our target candidates. 10460b57cec5SDimitry Andric for (MachineBasicBlock *Succ : MBB.successors()) 10470b57cec5SDimitry Andric IndirectTargetMBBs.insert(Succ); 10480b57cec5SDimitry Andric } 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric // Keep track of the cmov instructions we insert so we can return them. 10510b57cec5SDimitry Andric SmallVector<MachineInstr *, 16> CMovs; 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric // If we didn't find any indirect branches with targets, nothing to do here. 10540b57cec5SDimitry Andric if (IndirectTargetMBBs.empty()) 10550b57cec5SDimitry Andric return CMovs; 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andric // We found indirect branches and targets that need to be instrumented to 10580b57cec5SDimitry Andric // harden loads within them. Walk the blocks of the function (to get a stable 10590b57cec5SDimitry Andric // ordering) and instrument each target of an indirect branch. 10600b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 10610b57cec5SDimitry Andric // Skip the blocks that aren't candidate targets. 10620b57cec5SDimitry Andric if (!IndirectTargetMBBs.count(&MBB)) 10630b57cec5SDimitry Andric continue; 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric // We don't expect EH pads to ever be reached via an indirect branch. If 10660b57cec5SDimitry Andric // this is desired for some reason, we could simply skip them here rather 10670b57cec5SDimitry Andric // than asserting. 10680b57cec5SDimitry Andric assert(!MBB.isEHPad() && 10690b57cec5SDimitry Andric "Unexpected EH pad as target of an indirect branch!"); 10700b57cec5SDimitry Andric 10710b57cec5SDimitry Andric // We should never end up threading EFLAGS into a block to harden 10720b57cec5SDimitry Andric // conditional jumps as there would be an additional successor via the 10730b57cec5SDimitry Andric // indirect branch. As a consequence, all such edges would be split before 10740b57cec5SDimitry Andric // reaching here, and the inserted block will handle the EFLAGS-based 10750b57cec5SDimitry Andric // hardening. 10760b57cec5SDimitry Andric assert(!MBB.isLiveIn(X86::EFLAGS) && 10770b57cec5SDimitry Andric "Cannot check within a block that already has live-in EFLAGS!"); 10780b57cec5SDimitry Andric 10790b57cec5SDimitry Andric // We can't handle having non-indirect edges into this block unless this is 10800b57cec5SDimitry Andric // the only successor and we can synthesize the necessary target address. 10810b57cec5SDimitry Andric for (MachineBasicBlock *Pred : MBB.predecessors()) { 10820b57cec5SDimitry Andric // If we've already handled this by extracting the target directly, 10830b57cec5SDimitry Andric // nothing to do. 10840b57cec5SDimitry Andric if (IndirectTerminatedMBBs.count(Pred)) 10850b57cec5SDimitry Andric continue; 10860b57cec5SDimitry Andric 10870b57cec5SDimitry Andric // Otherwise, we have to be the only successor. We generally expect this 10880b57cec5SDimitry Andric // to be true as conditional branches should have had a critical edge 10890b57cec5SDimitry Andric // split already. We don't however need to worry about EH pad successors 10900b57cec5SDimitry Andric // as they'll happily ignore the target and their hardening strategy is 10910b57cec5SDimitry Andric // resilient to all ways in which they could be reached speculatively. 10920b57cec5SDimitry Andric if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) { 10930b57cec5SDimitry Andric return Succ->isEHPad() || Succ == &MBB; 10940b57cec5SDimitry Andric })) { 10950b57cec5SDimitry Andric LLVM_DEBUG({ 10960b57cec5SDimitry Andric dbgs() << "ERROR: Found conditional entry to target of indirect " 10970b57cec5SDimitry Andric "branch!\n"; 10980b57cec5SDimitry Andric Pred->dump(); 10990b57cec5SDimitry Andric MBB.dump(); 11000b57cec5SDimitry Andric }); 11010b57cec5SDimitry Andric report_fatal_error("Cannot harden a conditional entry to a target of " 11020b57cec5SDimitry Andric "an indirect branch!"); 11030b57cec5SDimitry Andric } 11040b57cec5SDimitry Andric 11050b57cec5SDimitry Andric // Now we need to compute the address of this block and install it as a 11060b57cec5SDimitry Andric // synthetic target in the predecessor. We do this at the bottom of the 11070b57cec5SDimitry Andric // predecessor. 11080b57cec5SDimitry Andric auto InsertPt = Pred->getFirstTerminator(); 11098bcb0991SDimitry Andric Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); 11100b57cec5SDimitry Andric if (MF.getTarget().getCodeModel() == CodeModel::Small && 11110b57cec5SDimitry Andric !Subtarget->isPositionIndependent()) { 11120b57cec5SDimitry Andric // Directly materialize it into an immediate. 11130b57cec5SDimitry Andric auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), 11140b57cec5SDimitry Andric TII->get(X86::MOV64ri32), TargetReg) 11150b57cec5SDimitry Andric .addMBB(&MBB); 11160b57cec5SDimitry Andric ++NumInstsInserted; 11170b57cec5SDimitry Andric (void)AddrI; 11180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump(); 11190b57cec5SDimitry Andric dbgs() << "\n"); 11200b57cec5SDimitry Andric } else { 11210b57cec5SDimitry Andric auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r), 11220b57cec5SDimitry Andric TargetReg) 11230b57cec5SDimitry Andric .addReg(/*Base*/ X86::RIP) 11240b57cec5SDimitry Andric .addImm(/*Scale*/ 1) 11250b57cec5SDimitry Andric .addReg(/*Index*/ 0) 11260b57cec5SDimitry Andric .addMBB(&MBB) 11270b57cec5SDimitry Andric .addReg(/*Segment*/ 0); 11280b57cec5SDimitry Andric ++NumInstsInserted; 11290b57cec5SDimitry Andric (void)AddrI; 11300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); 11310b57cec5SDimitry Andric dbgs() << "\n"); 11320b57cec5SDimitry Andric } 11330b57cec5SDimitry Andric // And make this available. 11340b57cec5SDimitry Andric TargetAddrSSA.AddAvailableValue(Pred, TargetReg); 11350b57cec5SDimitry Andric } 11360b57cec5SDimitry Andric 11370b57cec5SDimitry Andric // Materialize the needed SSA value of the target. Note that we need the 11380b57cec5SDimitry Andric // middle of the block as this block might at the bottom have an indirect 11390b57cec5SDimitry Andric // branch back to itself. We can do this here because at this point, every 11400b57cec5SDimitry Andric // predecessor of this block has an available value. This is basically just 11410b57cec5SDimitry Andric // automating the construction of a PHI node for this target. 114204eeddc0SDimitry Andric Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB); 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric // Insert a comparison of the incoming target register with this block's 11450b57cec5SDimitry Andric // address. This also requires us to mark the block as having its address 11460b57cec5SDimitry Andric // taken explicitly. 1147bdd1243dSDimitry Andric MBB.setMachineBlockAddressTaken(); 11480b57cec5SDimitry Andric auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); 11490b57cec5SDimitry Andric if (MF.getTarget().getCodeModel() == CodeModel::Small && 11500b57cec5SDimitry Andric !Subtarget->isPositionIndependent()) { 11510b57cec5SDimitry Andric // Check directly against a relocated immediate when we can. 11520b57cec5SDimitry Andric auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32)) 11530b57cec5SDimitry Andric .addReg(TargetReg, RegState::Kill) 11540b57cec5SDimitry Andric .addMBB(&MBB); 11550b57cec5SDimitry Andric ++NumInstsInserted; 11560b57cec5SDimitry Andric (void)CheckI; 11570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); 11580b57cec5SDimitry Andric } else { 11590b57cec5SDimitry Andric // Otherwise compute the address into a register first. 11608bcb0991SDimitry Andric Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); 11610b57cec5SDimitry Andric auto AddrI = 11620b57cec5SDimitry Andric BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) 11630b57cec5SDimitry Andric .addReg(/*Base*/ X86::RIP) 11640b57cec5SDimitry Andric .addImm(/*Scale*/ 1) 11650b57cec5SDimitry Andric .addReg(/*Index*/ 0) 11660b57cec5SDimitry Andric .addMBB(&MBB) 11670b57cec5SDimitry Andric .addReg(/*Segment*/ 0); 11680b57cec5SDimitry Andric ++NumInstsInserted; 11690b57cec5SDimitry Andric (void)AddrI; 11700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n"); 11710b57cec5SDimitry Andric auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr)) 11720b57cec5SDimitry Andric .addReg(TargetReg, RegState::Kill) 11730b57cec5SDimitry Andric .addReg(AddrReg, RegState::Kill); 11740b57cec5SDimitry Andric ++NumInstsInserted; 11750b57cec5SDimitry Andric (void)CheckI; 11760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); 11770b57cec5SDimitry Andric } 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric // Now cmov over the predicate if the comparison wasn't equal. 11800b57cec5SDimitry Andric int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; 11810b57cec5SDimitry Andric auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); 11828bcb0991SDimitry Andric Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); 11830b57cec5SDimitry Andric auto CMovI = 11840b57cec5SDimitry Andric BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) 11850b57cec5SDimitry Andric .addReg(PS->InitialReg) 11860b57cec5SDimitry Andric .addReg(PS->PoisonReg) 11870b57cec5SDimitry Andric .addImm(X86::COND_NE); 1188*0fca6ea1SDimitry Andric CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr) 1189*0fca6ea1SDimitry Andric ->setIsKill(true); 11900b57cec5SDimitry Andric ++NumInstsInserted; 11910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); 11920b57cec5SDimitry Andric CMovs.push_back(&*CMovI); 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric // And put the new value into the available values for SSA form of our 11950b57cec5SDimitry Andric // predicate state. 11960b57cec5SDimitry Andric PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric 11990b57cec5SDimitry Andric // Return all the newly inserted cmov instructions of the predicate state. 12000b57cec5SDimitry Andric return CMovs; 12010b57cec5SDimitry Andric } 12020b57cec5SDimitry Andric 12035ffd83dbSDimitry Andric // Returns true if the MI has EFLAGS as a register def operand and it's live, 12045ffd83dbSDimitry Andric // otherwise it returns false 12055ffd83dbSDimitry Andric static bool isEFLAGSDefLive(const MachineInstr &MI) { 1206*0fca6ea1SDimitry Andric if (const MachineOperand *DefOp = 1207*0fca6ea1SDimitry Andric MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) { 12085ffd83dbSDimitry Andric return !DefOp->isDead(); 12095ffd83dbSDimitry Andric } 12100b57cec5SDimitry Andric return false; 12110b57cec5SDimitry Andric } 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 12140b57cec5SDimitry Andric const TargetRegisterInfo &TRI) { 12150b57cec5SDimitry Andric // Check if EFLAGS are alive by seeing if there is a def of them or they 12160b57cec5SDimitry Andric // live-in, and then seeing if that def is in turn used. 12170b57cec5SDimitry Andric for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) { 1218*0fca6ea1SDimitry Andric if (MachineOperand *DefOp = 1219*0fca6ea1SDimitry Andric MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) { 12200b57cec5SDimitry Andric // If the def is dead, then EFLAGS is not live. 12210b57cec5SDimitry Andric if (DefOp->isDead()) 12220b57cec5SDimitry Andric return false; 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric // Otherwise we've def'ed it, and it is live. 12250b57cec5SDimitry Andric return true; 12260b57cec5SDimitry Andric } 12270b57cec5SDimitry Andric // While at this instruction, also check if we use and kill EFLAGS 12280b57cec5SDimitry Andric // which means it isn't live. 12290b57cec5SDimitry Andric if (MI.killsRegister(X86::EFLAGS, &TRI)) 12300b57cec5SDimitry Andric return false; 12310b57cec5SDimitry Andric } 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric // If we didn't find anything conclusive (neither definitely alive or 12340b57cec5SDimitry Andric // definitely dead) return whether it lives into the block. 12350b57cec5SDimitry Andric return MBB.isLiveIn(X86::EFLAGS); 12360b57cec5SDimitry Andric } 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric /// Trace the predicate state through each of the blocks in the function, 12390b57cec5SDimitry Andric /// hardening everything necessary along the way. 12400b57cec5SDimitry Andric /// 12410b57cec5SDimitry Andric /// We call this routine once the initial predicate state has been established 12420b57cec5SDimitry Andric /// for each basic block in the function in the SSA updater. This routine traces 12430b57cec5SDimitry Andric /// it through the instructions within each basic block, and for non-returning 12440b57cec5SDimitry Andric /// blocks informs the SSA updater about the final state that lives out of the 12450b57cec5SDimitry Andric /// block. Along the way, it hardens any vulnerable instruction using the 12460b57cec5SDimitry Andric /// currently valid predicate state. We have to do these two things together 12470b57cec5SDimitry Andric /// because the SSA updater only works across blocks. Within a block, we track 12480b57cec5SDimitry Andric /// the current predicate state directly and update it as it changes. 12490b57cec5SDimitry Andric /// 12500b57cec5SDimitry Andric /// This operates in two passes over each block. First, we analyze the loads in 12510b57cec5SDimitry Andric /// the block to determine which strategy will be used to harden them: hardening 12520b57cec5SDimitry Andric /// the address or hardening the loaded value when loaded into a register 12530b57cec5SDimitry Andric /// amenable to hardening. We have to process these first because the two 12540b57cec5SDimitry Andric /// strategies may interact -- later hardening may change what strategy we wish 12550b57cec5SDimitry Andric /// to use. We also will analyze data dependencies between loads and avoid 12560b57cec5SDimitry Andric /// hardening those loads that are data dependent on a load with a hardened 12570b57cec5SDimitry Andric /// address. We also skip hardening loads already behind an LFENCE as that is 12580b57cec5SDimitry Andric /// sufficient to harden them against misspeculation. 12590b57cec5SDimitry Andric /// 12600b57cec5SDimitry Andric /// Second, we actively trace the predicate state through the block, applying 12610b57cec5SDimitry Andric /// the hardening steps we determined necessary in the first pass as we go. 12620b57cec5SDimitry Andric /// 12630b57cec5SDimitry Andric /// These two passes are applied to each basic block. We operate one block at a 12640b57cec5SDimitry Andric /// time to simplify reasoning about reachability and sequencing. 12650b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( 12660b57cec5SDimitry Andric MachineFunction &MF) { 12670b57cec5SDimitry Andric SmallPtrSet<MachineInstr *, 16> HardenPostLoad; 12680b57cec5SDimitry Andric SmallPtrSet<MachineInstr *, 16> HardenLoadAddr; 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric SmallSet<unsigned, 16> HardenedAddrRegs; 12710b57cec5SDimitry Andric 12720b57cec5SDimitry Andric SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg; 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric // Track the set of load-dependent registers through the basic block. Because 12750b57cec5SDimitry Andric // the values of these registers have an existing data dependency on a loaded 12760b57cec5SDimitry Andric // value which we would have checked, we can omit any checks on them. 12770b57cec5SDimitry Andric SparseBitVector<> LoadDepRegs; 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 12800b57cec5SDimitry Andric // The first pass over the block: collect all the loads which can have their 12810b57cec5SDimitry Andric // loaded value hardened and all the loads that instead need their address 12820b57cec5SDimitry Andric // hardened. During this walk we propagate load dependence for address 12830b57cec5SDimitry Andric // hardened loads and also look for LFENCE to stop hardening wherever 12840b57cec5SDimitry Andric // possible. When deciding whether or not to harden the loaded value or not, 12850b57cec5SDimitry Andric // we check to see if any registers used in the address will have been 12860b57cec5SDimitry Andric // hardened at this point and if so, harden any remaining address registers 12870b57cec5SDimitry Andric // as that often successfully re-uses hardened addresses and minimizes 12880b57cec5SDimitry Andric // instructions. 12890b57cec5SDimitry Andric // 12900b57cec5SDimitry Andric // FIXME: We should consider an aggressive mode where we continue to keep as 12910b57cec5SDimitry Andric // many loads value hardened even when some address register hardening would 12920b57cec5SDimitry Andric // be free (due to reuse). 12930b57cec5SDimitry Andric // 12940b57cec5SDimitry Andric // Note that we only need this pass if we are actually hardening loads. 12950b57cec5SDimitry Andric if (HardenLoads) 12960b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 12970b57cec5SDimitry Andric // We naively assume that all def'ed registers of an instruction have 12980b57cec5SDimitry Andric // a data dependency on all of their operands. 12990b57cec5SDimitry Andric // FIXME: Do a more careful analysis of x86 to build a conservative 13000b57cec5SDimitry Andric // model here. 13010b57cec5SDimitry Andric if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) { 13020b57cec5SDimitry Andric return Op.isReg() && LoadDepRegs.test(Op.getReg()); 13030b57cec5SDimitry Andric })) 13040b57cec5SDimitry Andric for (MachineOperand &Def : MI.defs()) 13050b57cec5SDimitry Andric if (Def.isReg()) 13060b57cec5SDimitry Andric LoadDepRegs.set(Def.getReg()); 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric // Both Intel and AMD are guiding that they will change the semantics of 13090b57cec5SDimitry Andric // LFENCE to be a speculation barrier, so if we see an LFENCE, there is 13100b57cec5SDimitry Andric // no more need to guard things in this block. 13110b57cec5SDimitry Andric if (MI.getOpcode() == X86::LFENCE) 13120b57cec5SDimitry Andric break; 13130b57cec5SDimitry Andric 13140b57cec5SDimitry Andric // If this instruction cannot load, nothing to do. 13150b57cec5SDimitry Andric if (!MI.mayLoad()) 13160b57cec5SDimitry Andric continue; 13170b57cec5SDimitry Andric 13180b57cec5SDimitry Andric // Some instructions which "load" are trivially safe or unimportant. 13190b57cec5SDimitry Andric if (MI.getOpcode() == X86::MFENCE) 13200b57cec5SDimitry Andric continue; 13210b57cec5SDimitry Andric 13220b57cec5SDimitry Andric // Extract the memory operand information about this instruction. 13237a6dacacSDimitry Andric const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI); 13240b57cec5SDimitry Andric if (MemRefBeginIdx < 0) { 13250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 13260b57cec5SDimitry Andric << "WARNING: unable to harden loading instruction: "; 13270b57cec5SDimitry Andric MI.dump()); 13280b57cec5SDimitry Andric continue; 13290b57cec5SDimitry Andric } 13300b57cec5SDimitry Andric 13310b57cec5SDimitry Andric MachineOperand &BaseMO = 13320b57cec5SDimitry Andric MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); 13330b57cec5SDimitry Andric MachineOperand &IndexMO = 13340b57cec5SDimitry Andric MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); 13350b57cec5SDimitry Andric 13360b57cec5SDimitry Andric // If we have at least one (non-frame-index, non-RIP) register operand, 13370b57cec5SDimitry Andric // and neither operand is load-dependent, we need to check the load. 13380b57cec5SDimitry Andric unsigned BaseReg = 0, IndexReg = 0; 13390b57cec5SDimitry Andric if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP && 13400b57cec5SDimitry Andric BaseMO.getReg() != X86::NoRegister) 13410b57cec5SDimitry Andric BaseReg = BaseMO.getReg(); 13420b57cec5SDimitry Andric if (IndexMO.getReg() != X86::NoRegister) 13430b57cec5SDimitry Andric IndexReg = IndexMO.getReg(); 13440b57cec5SDimitry Andric 13450b57cec5SDimitry Andric if (!BaseReg && !IndexReg) 13460b57cec5SDimitry Andric // No register operands! 13470b57cec5SDimitry Andric continue; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric // If any register operand is dependent, this load is dependent and we 13500b57cec5SDimitry Andric // needn't check it. 13510b57cec5SDimitry Andric // FIXME: Is this true in the case where we are hardening loads after 13520b57cec5SDimitry Andric // they complete? Unclear, need to investigate. 13530b57cec5SDimitry Andric if ((BaseReg && LoadDepRegs.test(BaseReg)) || 13540b57cec5SDimitry Andric (IndexReg && LoadDepRegs.test(IndexReg))) 13550b57cec5SDimitry Andric continue; 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andric // If post-load hardening is enabled, this load is compatible with 13580b57cec5SDimitry Andric // post-load hardening, and we aren't already going to harden one of the 13590b57cec5SDimitry Andric // address registers, queue it up to be hardened post-load. Notably, 13600b57cec5SDimitry Andric // even once hardened this won't introduce a useful dependency that 13610b57cec5SDimitry Andric // could prune out subsequent loads. 13625ffd83dbSDimitry Andric if (EnablePostLoadHardening && X86InstrInfo::isDataInvariantLoad(MI) && 13635ffd83dbSDimitry Andric !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 && 13645ffd83dbSDimitry Andric MI.getOperand(0).isReg() && 13650b57cec5SDimitry Andric canHardenRegister(MI.getOperand(0).getReg()) && 13660b57cec5SDimitry Andric !HardenedAddrRegs.count(BaseReg) && 13670b57cec5SDimitry Andric !HardenedAddrRegs.count(IndexReg)) { 13680b57cec5SDimitry Andric HardenPostLoad.insert(&MI); 13690b57cec5SDimitry Andric HardenedAddrRegs.insert(MI.getOperand(0).getReg()); 13700b57cec5SDimitry Andric continue; 13710b57cec5SDimitry Andric } 13720b57cec5SDimitry Andric 13730b57cec5SDimitry Andric // Record this instruction for address hardening and record its register 13740b57cec5SDimitry Andric // operands as being address-hardened. 13750b57cec5SDimitry Andric HardenLoadAddr.insert(&MI); 13760b57cec5SDimitry Andric if (BaseReg) 13770b57cec5SDimitry Andric HardenedAddrRegs.insert(BaseReg); 13780b57cec5SDimitry Andric if (IndexReg) 13790b57cec5SDimitry Andric HardenedAddrRegs.insert(IndexReg); 13800b57cec5SDimitry Andric 13810b57cec5SDimitry Andric for (MachineOperand &Def : MI.defs()) 13820b57cec5SDimitry Andric if (Def.isReg()) 13830b57cec5SDimitry Andric LoadDepRegs.set(Def.getReg()); 13840b57cec5SDimitry Andric } 13850b57cec5SDimitry Andric 13860b57cec5SDimitry Andric // Now re-walk the instructions in the basic block, and apply whichever 13870b57cec5SDimitry Andric // hardening strategy we have elected. Note that we do this in a second 13880b57cec5SDimitry Andric // pass specifically so that we have the complete set of instructions for 13890b57cec5SDimitry Andric // which we will do post-load hardening and can defer it in certain 13900b57cec5SDimitry Andric // circumstances. 13910b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 13920b57cec5SDimitry Andric if (HardenLoads) { 13930b57cec5SDimitry Andric // We cannot both require hardening the def of a load and its address. 13940b57cec5SDimitry Andric assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && 13950b57cec5SDimitry Andric "Requested to harden both the address and def of a load!"); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric // Check if this is a load whose address needs to be hardened. 13980b57cec5SDimitry Andric if (HardenLoadAddr.erase(&MI)) { 13997a6dacacSDimitry Andric const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI); 14000b57cec5SDimitry Andric assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!"); 14010b57cec5SDimitry Andric 14020b57cec5SDimitry Andric MachineOperand &BaseMO = 14030b57cec5SDimitry Andric MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); 14040b57cec5SDimitry Andric MachineOperand &IndexMO = 14050b57cec5SDimitry Andric MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); 14060b57cec5SDimitry Andric hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg); 14070b57cec5SDimitry Andric continue; 14080b57cec5SDimitry Andric } 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andric // Test if this instruction is one of our post load instructions (and 14110b57cec5SDimitry Andric // remove it from the set if so). 14120b57cec5SDimitry Andric if (HardenPostLoad.erase(&MI)) { 14130b57cec5SDimitry Andric assert(!MI.isCall() && "Must not try to post-load harden a call!"); 14140b57cec5SDimitry Andric 14155ffd83dbSDimitry Andric // If this is a data-invariant load and there is no EFLAGS 14165ffd83dbSDimitry Andric // interference, we want to try and sink any hardening as far as 14175ffd83dbSDimitry Andric // possible. 14185ffd83dbSDimitry Andric if (X86InstrInfo::isDataInvariantLoad(MI) && !isEFLAGSDefLive(MI)) { 14190b57cec5SDimitry Andric // Sink the instruction we'll need to harden as far as we can down 14200b57cec5SDimitry Andric // the graph. 14210b57cec5SDimitry Andric MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad); 14220b57cec5SDimitry Andric 14230b57cec5SDimitry Andric // If we managed to sink this instruction, update everything so we 14240b57cec5SDimitry Andric // harden that instruction when we reach it in the instruction 14250b57cec5SDimitry Andric // sequence. 14260b57cec5SDimitry Andric if (SunkMI != &MI) { 14270b57cec5SDimitry Andric // If in sinking there was no instruction needing to be hardened, 14280b57cec5SDimitry Andric // we're done. 14290b57cec5SDimitry Andric if (!SunkMI) 14300b57cec5SDimitry Andric continue; 14310b57cec5SDimitry Andric 14320b57cec5SDimitry Andric // Otherwise, add this to the set of defs we harden. 14330b57cec5SDimitry Andric HardenPostLoad.insert(SunkMI); 14340b57cec5SDimitry Andric continue; 14350b57cec5SDimitry Andric } 14360b57cec5SDimitry Andric } 14370b57cec5SDimitry Andric 14380b57cec5SDimitry Andric unsigned HardenedReg = hardenPostLoad(MI); 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric // Mark the resulting hardened register as such so we don't re-harden. 14410b57cec5SDimitry Andric AddrRegToHardenedReg[HardenedReg] = HardenedReg; 14420b57cec5SDimitry Andric 14430b57cec5SDimitry Andric continue; 14440b57cec5SDimitry Andric } 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric // Check for an indirect call or branch that may need its input hardened 14470b57cec5SDimitry Andric // even if we couldn't find the specific load used, or were able to 14480b57cec5SDimitry Andric // avoid hardening it for some reason. Note that here we cannot break 14490b57cec5SDimitry Andric // out afterward as we may still need to handle any call aspect of this 14500b57cec5SDimitry Andric // instruction. 14510b57cec5SDimitry Andric if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps) 14520b57cec5SDimitry Andric hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg); 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric 14550b57cec5SDimitry Andric // After we finish hardening loads we handle interprocedural hardening if 14560b57cec5SDimitry Andric // enabled and relevant for this instruction. 14570b57cec5SDimitry Andric if (!HardenInterprocedurally) 14580b57cec5SDimitry Andric continue; 14590b57cec5SDimitry Andric if (!MI.isCall() && !MI.isReturn()) 14600b57cec5SDimitry Andric continue; 14610b57cec5SDimitry Andric 14620b57cec5SDimitry Andric // If this is a direct return (IE, not a tail call) just directly harden 14630b57cec5SDimitry Andric // it. 14640b57cec5SDimitry Andric if (MI.isReturn() && !MI.isCall()) { 14650b57cec5SDimitry Andric hardenReturnInstr(MI); 14660b57cec5SDimitry Andric continue; 14670b57cec5SDimitry Andric } 14680b57cec5SDimitry Andric 14690b57cec5SDimitry Andric // Otherwise we have a call. We need to handle transferring the predicate 14700b57cec5SDimitry Andric // state into a call and recovering it after the call returns (unless this 14710b57cec5SDimitry Andric // is a tail call). 14720b57cec5SDimitry Andric assert(MI.isCall() && "Should only reach here for calls!"); 14730b57cec5SDimitry Andric tracePredStateThroughCall(MI); 14740b57cec5SDimitry Andric } 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric HardenPostLoad.clear(); 14770b57cec5SDimitry Andric HardenLoadAddr.clear(); 14780b57cec5SDimitry Andric HardenedAddrRegs.clear(); 14790b57cec5SDimitry Andric AddrRegToHardenedReg.clear(); 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andric // Currently, we only track data-dependent loads within a basic block. 14820b57cec5SDimitry Andric // FIXME: We should see if this is necessary or if we could be more 14830b57cec5SDimitry Andric // aggressive here without opening up attack avenues. 14840b57cec5SDimitry Andric LoadDepRegs.clear(); 14850b57cec5SDimitry Andric } 14860b57cec5SDimitry Andric } 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric /// Save EFLAGS into the returned GPR. This can in turn be restored with 14890b57cec5SDimitry Andric /// `restoreEFLAGS`. 14900b57cec5SDimitry Andric /// 14910b57cec5SDimitry Andric /// Note that LLVM can only lower very simple patterns of saved and restored 14920b57cec5SDimitry Andric /// EFLAGS registers. The restore should always be within the same basic block 14930b57cec5SDimitry Andric /// as the save so that no PHI nodes are inserted. 14940b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( 14950b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, 149681ad6265SDimitry Andric const DebugLoc &Loc) { 14970b57cec5SDimitry Andric // FIXME: Hard coding this to a 32-bit register class seems weird, but matches 14980b57cec5SDimitry Andric // what instruction selection does. 14998bcb0991SDimitry Andric Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass); 15000b57cec5SDimitry Andric // We directly copy the FLAGS register and rely on later lowering to clean 15010b57cec5SDimitry Andric // this up into the appropriate setCC instructions. 15020b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS); 15030b57cec5SDimitry Andric ++NumInstsInserted; 15040b57cec5SDimitry Andric return Reg; 15050b57cec5SDimitry Andric } 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric /// Restore EFLAGS from the provided GPR. This should be produced by 15080b57cec5SDimitry Andric /// `saveEFLAGS`. 15090b57cec5SDimitry Andric /// 15100b57cec5SDimitry Andric /// This must be done within the same basic block as the save in order to 15110b57cec5SDimitry Andric /// reliably lower. 15120b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::restoreEFLAGS( 151381ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, 151481ad6265SDimitry Andric const DebugLoc &Loc, Register Reg) { 15150b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg); 15160b57cec5SDimitry Andric ++NumInstsInserted; 15170b57cec5SDimitry Andric } 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andric /// Takes the current predicate state (in a register) and merges it into the 15200b57cec5SDimitry Andric /// stack pointer. The state is essentially a single bit, but we merge this in 15210b57cec5SDimitry Andric /// a way that won't form non-canonical pointers and also will be preserved 15220b57cec5SDimitry Andric /// across normal stack adjustments. 15230b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( 152481ad6265SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, 152581ad6265SDimitry Andric const DebugLoc &Loc, unsigned PredStateReg) { 15268bcb0991SDimitry Andric Register TmpReg = MRI->createVirtualRegister(PS->RC); 15270b57cec5SDimitry Andric // FIXME: This hard codes a shift distance based on the number of bits needed 15280b57cec5SDimitry Andric // to stay canonical on 64-bit. We should compute this somehow and support 15290b57cec5SDimitry Andric // 32-bit as part of that. 15300b57cec5SDimitry Andric auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg) 15310b57cec5SDimitry Andric .addReg(PredStateReg, RegState::Kill) 15320b57cec5SDimitry Andric .addImm(47); 15330b57cec5SDimitry Andric ShiftI->addRegisterDead(X86::EFLAGS, TRI); 15340b57cec5SDimitry Andric ++NumInstsInserted; 15350b57cec5SDimitry Andric auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP) 15360b57cec5SDimitry Andric .addReg(X86::RSP) 15370b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill); 15380b57cec5SDimitry Andric OrI->addRegisterDead(X86::EFLAGS, TRI); 15390b57cec5SDimitry Andric ++NumInstsInserted; 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric /// Extracts the predicate state stored in the high bits of the stack pointer. 15430b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( 15440b57cec5SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, 154581ad6265SDimitry Andric const DebugLoc &Loc) { 15468bcb0991SDimitry Andric Register PredStateReg = MRI->createVirtualRegister(PS->RC); 15478bcb0991SDimitry Andric Register TmpReg = MRI->createVirtualRegister(PS->RC); 15480b57cec5SDimitry Andric 15490b57cec5SDimitry Andric // We know that the stack pointer will have any preserved predicate state in 15500b57cec5SDimitry Andric // its high bit. We just want to smear this across the other bits. Turns out, 15510b57cec5SDimitry Andric // this is exactly what an arithmetic right shift does. 15520b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg) 15530b57cec5SDimitry Andric .addReg(X86::RSP); 15540b57cec5SDimitry Andric auto ShiftI = 15550b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg) 15560b57cec5SDimitry Andric .addReg(TmpReg, RegState::Kill) 15570b57cec5SDimitry Andric .addImm(TRI->getRegSizeInBits(*PS->RC) - 1); 15580b57cec5SDimitry Andric ShiftI->addRegisterDead(X86::EFLAGS, TRI); 15590b57cec5SDimitry Andric ++NumInstsInserted; 15600b57cec5SDimitry Andric 15610b57cec5SDimitry Andric return PredStateReg; 15620b57cec5SDimitry Andric } 15630b57cec5SDimitry Andric 15640b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenLoadAddr( 15650b57cec5SDimitry Andric MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO, 15660b57cec5SDimitry Andric SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { 15670b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 1568fe6060f1SDimitry Andric const DebugLoc &Loc = MI.getDebugLoc(); 15690b57cec5SDimitry Andric 15700b57cec5SDimitry Andric // Check if EFLAGS are alive by seeing if there is a def of them or they 15710b57cec5SDimitry Andric // live-in, and then seeing if that def is in turn used. 15720b57cec5SDimitry Andric bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI); 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andric SmallVector<MachineOperand *, 2> HardenOpRegs; 15750b57cec5SDimitry Andric 15760b57cec5SDimitry Andric if (BaseMO.isFI()) { 15770b57cec5SDimitry Andric // A frame index is never a dynamically controllable load, so only 15780b57cec5SDimitry Andric // harden it if we're covering fixed address loads as well. 15790b57cec5SDimitry Andric LLVM_DEBUG( 15800b57cec5SDimitry Andric dbgs() << " Skipping hardening base of explicit stack frame load: "; 15810b57cec5SDimitry Andric MI.dump(); dbgs() << "\n"); 15820b57cec5SDimitry Andric } else if (BaseMO.getReg() == X86::RSP) { 15830b57cec5SDimitry Andric // Some idempotent atomic operations are lowered directly to a locked 15840b57cec5SDimitry Andric // OR with 0 to the top of stack(or slightly offset from top) which uses an 15850b57cec5SDimitry Andric // explicit RSP register as the base. 15860b57cec5SDimitry Andric assert(IndexMO.getReg() == X86::NoRegister && 15870b57cec5SDimitry Andric "Explicit RSP access with dynamic index!"); 15880b57cec5SDimitry Andric LLVM_DEBUG( 15890b57cec5SDimitry Andric dbgs() << " Cannot harden base of explicit RSP offset in a load!"); 15900b57cec5SDimitry Andric } else if (BaseMO.getReg() == X86::RIP || 15910b57cec5SDimitry Andric BaseMO.getReg() == X86::NoRegister) { 15920b57cec5SDimitry Andric // For both RIP-relative addressed loads or absolute loads, we cannot 15930b57cec5SDimitry Andric // meaningfully harden them because the address being loaded has no 15940b57cec5SDimitry Andric // dynamic component. 15950b57cec5SDimitry Andric // 15960b57cec5SDimitry Andric // FIXME: When using a segment base (like TLS does) we end up with the 15970b57cec5SDimitry Andric // dynamic address being the base plus -1 because we can't mutate the 15980b57cec5SDimitry Andric // segment register here. This allows the signed 32-bit offset to point at 15990b57cec5SDimitry Andric // valid segment-relative addresses and load them successfully. 16000b57cec5SDimitry Andric LLVM_DEBUG( 16010b57cec5SDimitry Andric dbgs() << " Cannot harden base of " 16020b57cec5SDimitry Andric << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base") 16030b57cec5SDimitry Andric << " address in a load!"); 16040b57cec5SDimitry Andric } else { 16050b57cec5SDimitry Andric assert(BaseMO.isReg() && 16060b57cec5SDimitry Andric "Only allowed to have a frame index or register base."); 16070b57cec5SDimitry Andric HardenOpRegs.push_back(&BaseMO); 16080b57cec5SDimitry Andric } 16090b57cec5SDimitry Andric 16100b57cec5SDimitry Andric if (IndexMO.getReg() != X86::NoRegister && 16110b57cec5SDimitry Andric (HardenOpRegs.empty() || 16120b57cec5SDimitry Andric HardenOpRegs.front()->getReg() != IndexMO.getReg())) 16130b57cec5SDimitry Andric HardenOpRegs.push_back(&IndexMO); 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && 16160b57cec5SDimitry Andric "Should have exactly one or two registers to harden!"); 16170b57cec5SDimitry Andric assert((HardenOpRegs.size() == 1 || 16180b57cec5SDimitry Andric HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && 16190b57cec5SDimitry Andric "Should not have two of the same registers!"); 16200b57cec5SDimitry Andric 16210b57cec5SDimitry Andric // Remove any registers that have alreaded been checked. 16220b57cec5SDimitry Andric llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) { 16230b57cec5SDimitry Andric // See if this operand's register has already been checked. 16240b57cec5SDimitry Andric auto It = AddrRegToHardenedReg.find(Op->getReg()); 16250b57cec5SDimitry Andric if (It == AddrRegToHardenedReg.end()) 16260b57cec5SDimitry Andric // Not checked, so retain this one. 16270b57cec5SDimitry Andric return false; 16280b57cec5SDimitry Andric 16290b57cec5SDimitry Andric // Otherwise, we can directly update this operand and remove it. 16300b57cec5SDimitry Andric Op->setReg(It->second); 16310b57cec5SDimitry Andric return true; 16320b57cec5SDimitry Andric }); 16330b57cec5SDimitry Andric // If there are none left, we're done. 16340b57cec5SDimitry Andric if (HardenOpRegs.empty()) 16350b57cec5SDimitry Andric return; 16360b57cec5SDimitry Andric 16370b57cec5SDimitry Andric // Compute the current predicate state. 163804eeddc0SDimitry Andric Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); 16390b57cec5SDimitry Andric 16400b57cec5SDimitry Andric auto InsertPt = MI.getIterator(); 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric // If EFLAGS are live and we don't have access to instructions that avoid 16430b57cec5SDimitry Andric // clobbering EFLAGS we need to save and restore them. This in turn makes 16440b57cec5SDimitry Andric // the EFLAGS no longer live. 16450b57cec5SDimitry Andric unsigned FlagsReg = 0; 16460b57cec5SDimitry Andric if (EFLAGSLive && !Subtarget->hasBMI2()) { 16470b57cec5SDimitry Andric EFLAGSLive = false; 16480b57cec5SDimitry Andric FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); 16490b57cec5SDimitry Andric } 16500b57cec5SDimitry Andric 16510b57cec5SDimitry Andric for (MachineOperand *Op : HardenOpRegs) { 16528bcb0991SDimitry Andric Register OpReg = Op->getReg(); 16530b57cec5SDimitry Andric auto *OpRC = MRI->getRegClass(OpReg); 16548bcb0991SDimitry Andric Register TmpReg = MRI->createVirtualRegister(OpRC); 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric // If this is a vector register, we'll need somewhat custom logic to handle 16570b57cec5SDimitry Andric // hardening it. 16580b57cec5SDimitry Andric if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) || 16590b57cec5SDimitry Andric OpRC->hasSuperClassEq(&X86::VR256RegClass))) { 16600b57cec5SDimitry Andric assert(Subtarget->hasAVX2() && "AVX2-specific register classes!"); 16610b57cec5SDimitry Andric bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass); 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric // Move our state into a vector register. 16640b57cec5SDimitry Andric // FIXME: We could skip this at the cost of longer encodings with AVX-512 16650b57cec5SDimitry Andric // but that doesn't seem likely worth it. 16668bcb0991SDimitry Andric Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); 16670b57cec5SDimitry Andric auto MovI = 16680b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg) 16690b57cec5SDimitry Andric .addReg(StateReg); 16700b57cec5SDimitry Andric (void)MovI; 16710b57cec5SDimitry Andric ++NumInstsInserted; 16720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n"); 16730b57cec5SDimitry Andric 16740b57cec5SDimitry Andric // Broadcast it across the vector register. 16758bcb0991SDimitry Andric Register VBStateReg = MRI->createVirtualRegister(OpRC); 16760b57cec5SDimitry Andric auto BroadcastI = BuildMI(MBB, InsertPt, Loc, 16770b57cec5SDimitry Andric TII->get(Is128Bit ? X86::VPBROADCASTQrr 16780b57cec5SDimitry Andric : X86::VPBROADCASTQYrr), 16790b57cec5SDimitry Andric VBStateReg) 16800b57cec5SDimitry Andric .addReg(VStateReg); 16810b57cec5SDimitry Andric (void)BroadcastI; 16820b57cec5SDimitry Andric ++NumInstsInserted; 16830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump(); 16840b57cec5SDimitry Andric dbgs() << "\n"); 16850b57cec5SDimitry Andric 16860b57cec5SDimitry Andric // Merge our potential poison state into the value with a vector or. 16870b57cec5SDimitry Andric auto OrI = 16880b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, 16890b57cec5SDimitry Andric TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg) 16900b57cec5SDimitry Andric .addReg(VBStateReg) 16910b57cec5SDimitry Andric .addReg(OpReg); 16920b57cec5SDimitry Andric (void)OrI; 16930b57cec5SDimitry Andric ++NumInstsInserted; 16940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n"); 16950b57cec5SDimitry Andric } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) || 16960b57cec5SDimitry Andric OpRC->hasSuperClassEq(&X86::VR256XRegClass) || 16970b57cec5SDimitry Andric OpRC->hasSuperClassEq(&X86::VR512RegClass)) { 16980b57cec5SDimitry Andric assert(Subtarget->hasAVX512() && "AVX512-specific register classes!"); 16990b57cec5SDimitry Andric bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass); 17000b57cec5SDimitry Andric bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass); 17010b57cec5SDimitry Andric if (Is128Bit || Is256Bit) 17020b57cec5SDimitry Andric assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!"); 17030b57cec5SDimitry Andric 17040b57cec5SDimitry Andric // Broadcast our state into a vector register. 17058bcb0991SDimitry Andric Register VStateReg = MRI->createVirtualRegister(OpRC); 17065ffd83dbSDimitry Andric unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr 17075ffd83dbSDimitry Andric : Is256Bit ? X86::VPBROADCASTQrZ256rr 17085ffd83dbSDimitry Andric : X86::VPBROADCASTQrZrr; 17090b57cec5SDimitry Andric auto BroadcastI = 17100b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg) 17110b57cec5SDimitry Andric .addReg(StateReg); 17120b57cec5SDimitry Andric (void)BroadcastI; 17130b57cec5SDimitry Andric ++NumInstsInserted; 17140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump(); 17150b57cec5SDimitry Andric dbgs() << "\n"); 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric // Merge our potential poison state into the value with a vector or. 17180b57cec5SDimitry Andric unsigned OrOp = Is128Bit ? X86::VPORQZ128rr 17190b57cec5SDimitry Andric : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr; 17200b57cec5SDimitry Andric auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg) 17210b57cec5SDimitry Andric .addReg(VStateReg) 17220b57cec5SDimitry Andric .addReg(OpReg); 17230b57cec5SDimitry Andric (void)OrI; 17240b57cec5SDimitry Andric ++NumInstsInserted; 17250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n"); 17260b57cec5SDimitry Andric } else { 17270b57cec5SDimitry Andric // FIXME: Need to support GR32 here for 32-bit code. 17280b57cec5SDimitry Andric assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) && 17290b57cec5SDimitry Andric "Not a supported register class for address hardening!"); 17300b57cec5SDimitry Andric 17310b57cec5SDimitry Andric if (!EFLAGSLive) { 17320b57cec5SDimitry Andric // Merge our potential poison state into the value with an or. 17330b57cec5SDimitry Andric auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg) 17340b57cec5SDimitry Andric .addReg(StateReg) 17350b57cec5SDimitry Andric .addReg(OpReg); 17360b57cec5SDimitry Andric OrI->addRegisterDead(X86::EFLAGS, TRI); 17370b57cec5SDimitry Andric ++NumInstsInserted; 17380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n"); 17390b57cec5SDimitry Andric } else { 17400b57cec5SDimitry Andric // We need to avoid touching EFLAGS so shift out all but the least 17410b57cec5SDimitry Andric // significant bit using the instruction that doesn't update flags. 17420b57cec5SDimitry Andric auto ShiftI = 17430b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg) 17440b57cec5SDimitry Andric .addReg(OpReg) 17450b57cec5SDimitry Andric .addReg(StateReg); 17460b57cec5SDimitry Andric (void)ShiftI; 17470b57cec5SDimitry Andric ++NumInstsInserted; 17480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump(); 17490b57cec5SDimitry Andric dbgs() << "\n"); 17500b57cec5SDimitry Andric } 17510b57cec5SDimitry Andric } 17520b57cec5SDimitry Andric 17530b57cec5SDimitry Andric // Record this register as checked and update the operand. 17540b57cec5SDimitry Andric assert(!AddrRegToHardenedReg.count(Op->getReg()) && 17550b57cec5SDimitry Andric "Should not have checked this register yet!"); 17560b57cec5SDimitry Andric AddrRegToHardenedReg[Op->getReg()] = TmpReg; 17570b57cec5SDimitry Andric Op->setReg(TmpReg); 17580b57cec5SDimitry Andric ++NumAddrRegsHardened; 17590b57cec5SDimitry Andric } 17600b57cec5SDimitry Andric 17610b57cec5SDimitry Andric // And restore the flags if needed. 17620b57cec5SDimitry Andric if (FlagsReg) 17630b57cec5SDimitry Andric restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); 17640b57cec5SDimitry Andric } 17650b57cec5SDimitry Andric 17660b57cec5SDimitry Andric MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( 17670b57cec5SDimitry Andric MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) { 17685ffd83dbSDimitry Andric assert(X86InstrInfo::isDataInvariantLoad(InitialMI) && 17690b57cec5SDimitry Andric "Cannot get here with a non-invariant load!"); 17705ffd83dbSDimitry Andric assert(!isEFLAGSDefLive(InitialMI) && 17715ffd83dbSDimitry Andric "Cannot get here with a data invariant load " 17725ffd83dbSDimitry Andric "that interferes with EFLAGS!"); 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric // See if we can sink hardening the loaded value. 17750b57cec5SDimitry Andric auto SinkCheckToSingleUse = 1776bdd1243dSDimitry Andric [&](MachineInstr &MI) -> std::optional<MachineInstr *> { 17778bcb0991SDimitry Andric Register DefReg = MI.getOperand(0).getReg(); 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric // We need to find a single use which we can sink the check. We can 17800b57cec5SDimitry Andric // primarily do this because many uses may already end up checked on their 17810b57cec5SDimitry Andric // own. 17820b57cec5SDimitry Andric MachineInstr *SingleUseMI = nullptr; 17830b57cec5SDimitry Andric for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) { 17845ffd83dbSDimitry Andric // If we're already going to harden this use, it is data invariant, it 17855ffd83dbSDimitry Andric // does not interfere with EFLAGS, and within our block. 17860b57cec5SDimitry Andric if (HardenedInstrs.count(&UseMI)) { 17875ffd83dbSDimitry Andric if (!X86InstrInfo::isDataInvariantLoad(UseMI) || isEFLAGSDefLive(UseMI)) { 17880b57cec5SDimitry Andric // If we've already decided to harden a non-load, we must have sunk 17890b57cec5SDimitry Andric // some other post-load hardened instruction to it and it must itself 17900b57cec5SDimitry Andric // be data-invariant. 17915ffd83dbSDimitry Andric assert(X86InstrInfo::isDataInvariant(UseMI) && 17920b57cec5SDimitry Andric "Data variant instruction being hardened!"); 17930b57cec5SDimitry Andric continue; 17940b57cec5SDimitry Andric } 17950b57cec5SDimitry Andric 17960b57cec5SDimitry Andric // Otherwise, this is a load and the load component can't be data 17970b57cec5SDimitry Andric // invariant so check how this register is being used. 17987a6dacacSDimitry Andric const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI); 17990b57cec5SDimitry Andric assert(MemRefBeginIdx >= 0 && 18000b57cec5SDimitry Andric "Should always have mem references here!"); 18010b57cec5SDimitry Andric 18020b57cec5SDimitry Andric MachineOperand &BaseMO = 18030b57cec5SDimitry Andric UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); 18040b57cec5SDimitry Andric MachineOperand &IndexMO = 18050b57cec5SDimitry Andric UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); 18060b57cec5SDimitry Andric if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) || 18070b57cec5SDimitry Andric (IndexMO.isReg() && IndexMO.getReg() == DefReg)) 18080b57cec5SDimitry Andric // The load uses the register as part of its address making it not 18090b57cec5SDimitry Andric // invariant. 18100b57cec5SDimitry Andric return {}; 18110b57cec5SDimitry Andric 18120b57cec5SDimitry Andric continue; 18130b57cec5SDimitry Andric } 18140b57cec5SDimitry Andric 18150b57cec5SDimitry Andric if (SingleUseMI) 18160b57cec5SDimitry Andric // We already have a single use, this would make two. Bail. 18170b57cec5SDimitry Andric return {}; 18180b57cec5SDimitry Andric 18190b57cec5SDimitry Andric // If this single use isn't data invariant, isn't in this block, or has 18200b57cec5SDimitry Andric // interfering EFLAGS, we can't sink the hardening to it. 18215ffd83dbSDimitry Andric if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() || 18225ffd83dbSDimitry Andric isEFLAGSDefLive(UseMI)) 18230b57cec5SDimitry Andric return {}; 18240b57cec5SDimitry Andric 18250b57cec5SDimitry Andric // If this instruction defines multiple registers bail as we won't harden 18260b57cec5SDimitry Andric // all of them. 18270b57cec5SDimitry Andric if (UseMI.getDesc().getNumDefs() > 1) 18280b57cec5SDimitry Andric return {}; 18290b57cec5SDimitry Andric 18300b57cec5SDimitry Andric // If this register isn't a virtual register we can't walk uses of sanely, 18310b57cec5SDimitry Andric // just bail. Also check that its register class is one of the ones we 18320b57cec5SDimitry Andric // can harden. 18338bcb0991SDimitry Andric Register UseDefReg = UseMI.getOperand(0).getReg(); 18347a6dacacSDimitry Andric if (!canHardenRegister(UseDefReg)) 18350b57cec5SDimitry Andric return {}; 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric SingleUseMI = &UseMI; 18380b57cec5SDimitry Andric } 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric // If SingleUseMI is still null, there is no use that needs its own 18410b57cec5SDimitry Andric // checking. Otherwise, it is the single use that needs checking. 18420b57cec5SDimitry Andric return {SingleUseMI}; 18430b57cec5SDimitry Andric }; 18440b57cec5SDimitry Andric 18450b57cec5SDimitry Andric MachineInstr *MI = &InitialMI; 1846bdd1243dSDimitry Andric while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) { 18470b57cec5SDimitry Andric // Update which MI we're checking now. 18480b57cec5SDimitry Andric MI = *SingleUse; 18490b57cec5SDimitry Andric if (!MI) 18500b57cec5SDimitry Andric break; 18510b57cec5SDimitry Andric } 18520b57cec5SDimitry Andric 18530b57cec5SDimitry Andric return MI; 18540b57cec5SDimitry Andric } 18550b57cec5SDimitry Andric 1856e8d8bef9SDimitry Andric bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) { 18577a6dacacSDimitry Andric // We only support hardening virtual registers. 18587a6dacacSDimitry Andric if (!Reg.isVirtual()) 18597a6dacacSDimitry Andric return false; 18607a6dacacSDimitry Andric 18610b57cec5SDimitry Andric auto *RC = MRI->getRegClass(Reg); 18620b57cec5SDimitry Andric int RegBytes = TRI->getRegSizeInBits(*RC) / 8; 18630b57cec5SDimitry Andric if (RegBytes > 8) 18640b57cec5SDimitry Andric // We don't support post-load hardening of vectors. 18650b57cec5SDimitry Andric return false; 18660b57cec5SDimitry Andric 18678bcb0991SDimitry Andric unsigned RegIdx = Log2_32(RegBytes); 18688bcb0991SDimitry Andric assert(RegIdx < 4 && "Unsupported register size"); 18698bcb0991SDimitry Andric 18700b57cec5SDimitry Andric // If this register class is explicitly constrained to a class that doesn't 18710b57cec5SDimitry Andric // require REX prefix, we may not be able to satisfy that constraint when 18720b57cec5SDimitry Andric // emitting the hardening instructions, so bail out here. 18730b57cec5SDimitry Andric // FIXME: This seems like a pretty lame hack. The way this comes up is when we 18740b57cec5SDimitry Andric // end up both with a NOREX and REX-only register as operands to the hardening 18750b57cec5SDimitry Andric // instructions. It would be better to fix that code to handle this situation 18760b57cec5SDimitry Andric // rather than hack around it in this way. 18770b57cec5SDimitry Andric const TargetRegisterClass *NOREXRegClasses[] = { 18780b57cec5SDimitry Andric &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass, 18790b57cec5SDimitry Andric &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass}; 18808bcb0991SDimitry Andric if (RC == NOREXRegClasses[RegIdx]) 18810b57cec5SDimitry Andric return false; 18820b57cec5SDimitry Andric 18830b57cec5SDimitry Andric const TargetRegisterClass *GPRRegClasses[] = { 18840b57cec5SDimitry Andric &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, 18850b57cec5SDimitry Andric &X86::GR64RegClass}; 18868bcb0991SDimitry Andric return RC->hasSuperClassEq(GPRRegClasses[RegIdx]); 18870b57cec5SDimitry Andric } 18880b57cec5SDimitry Andric 18890b57cec5SDimitry Andric /// Harden a value in a register. 18900b57cec5SDimitry Andric /// 18910b57cec5SDimitry Andric /// This is the low-level logic to fully harden a value sitting in a register 18920b57cec5SDimitry Andric /// against leaking during speculative execution. 18930b57cec5SDimitry Andric /// 18940b57cec5SDimitry Andric /// Unlike hardening an address that is used by a load, this routine is required 18950b57cec5SDimitry Andric /// to hide *all* incoming bits in the register. 18960b57cec5SDimitry Andric /// 18970b57cec5SDimitry Andric /// `Reg` must be a virtual register. Currently, it is required to be a GPR no 18980b57cec5SDimitry Andric /// larger than the predicate state register. FIXME: We should support vector 18990b57cec5SDimitry Andric /// registers here by broadcasting the predicate state. 19000b57cec5SDimitry Andric /// 19010b57cec5SDimitry Andric /// The new, hardened virtual register is returned. It will have the same 19020b57cec5SDimitry Andric /// register class as `Reg`. 19030b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( 1904e8d8bef9SDimitry Andric Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, 190581ad6265SDimitry Andric const DebugLoc &Loc) { 19060b57cec5SDimitry Andric assert(canHardenRegister(Reg) && "Cannot harden this register!"); 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric auto *RC = MRI->getRegClass(Reg); 19090b57cec5SDimitry Andric int Bytes = TRI->getRegSizeInBits(*RC) / 8; 191004eeddc0SDimitry Andric Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); 1911fe6060f1SDimitry Andric assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) && 1912fe6060f1SDimitry Andric "Unknown register size"); 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric // FIXME: Need to teach this about 32-bit mode. 19150b57cec5SDimitry Andric if (Bytes != 8) { 19160b57cec5SDimitry Andric unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; 19170b57cec5SDimitry Andric unsigned SubRegImm = SubRegImms[Log2_32(Bytes)]; 19188bcb0991SDimitry Andric Register NarrowStateReg = MRI->createVirtualRegister(RC); 19190b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg) 19200b57cec5SDimitry Andric .addReg(StateReg, 0, SubRegImm); 19210b57cec5SDimitry Andric StateReg = NarrowStateReg; 19220b57cec5SDimitry Andric } 19230b57cec5SDimitry Andric 19240b57cec5SDimitry Andric unsigned FlagsReg = 0; 19250b57cec5SDimitry Andric if (isEFLAGSLive(MBB, InsertPt, *TRI)) 19260b57cec5SDimitry Andric FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); 19270b57cec5SDimitry Andric 19288bcb0991SDimitry Andric Register NewReg = MRI->createVirtualRegister(RC); 19290b57cec5SDimitry Andric unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; 19300b57cec5SDimitry Andric unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)]; 19310b57cec5SDimitry Andric auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg) 19320b57cec5SDimitry Andric .addReg(StateReg) 19330b57cec5SDimitry Andric .addReg(Reg); 19340b57cec5SDimitry Andric OrI->addRegisterDead(X86::EFLAGS, TRI); 19350b57cec5SDimitry Andric ++NumInstsInserted; 19360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n"); 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric if (FlagsReg) 19390b57cec5SDimitry Andric restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); 19400b57cec5SDimitry Andric 19410b57cec5SDimitry Andric return NewReg; 19420b57cec5SDimitry Andric } 19430b57cec5SDimitry Andric 19440b57cec5SDimitry Andric /// Harden a load by hardening the loaded value in the defined register. 19450b57cec5SDimitry Andric /// 19460b57cec5SDimitry Andric /// We can harden a non-leaking load into a register without touching the 19470b57cec5SDimitry Andric /// address by just hiding all of the loaded bits during misspeculation. We use 19480b57cec5SDimitry Andric /// an `or` instruction to do this because we set up our poison value as all 19490b57cec5SDimitry Andric /// ones. And the goal is just for the loaded bits to not be exposed to 19500b57cec5SDimitry Andric /// execution and coercing them to one is sufficient. 19510b57cec5SDimitry Andric /// 19520b57cec5SDimitry Andric /// Returns the newly hardened register. 19530b57cec5SDimitry Andric unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { 19540b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 1955fe6060f1SDimitry Andric const DebugLoc &Loc = MI.getDebugLoc(); 19560b57cec5SDimitry Andric 19570b57cec5SDimitry Andric auto &DefOp = MI.getOperand(0); 19588bcb0991SDimitry Andric Register OldDefReg = DefOp.getReg(); 19590b57cec5SDimitry Andric auto *DefRC = MRI->getRegClass(OldDefReg); 19600b57cec5SDimitry Andric 19610b57cec5SDimitry Andric // Because we want to completely replace the uses of this def'ed value with 19620b57cec5SDimitry Andric // the hardened value, create a dedicated new register that will only be used 19630b57cec5SDimitry Andric // to communicate the unhardened value to the hardening. 19648bcb0991SDimitry Andric Register UnhardenedReg = MRI->createVirtualRegister(DefRC); 19650b57cec5SDimitry Andric DefOp.setReg(UnhardenedReg); 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric // Now harden this register's value, getting a hardened reg that is safe to 19680b57cec5SDimitry Andric // use. Note that we insert the instructions to compute this *after* the 19690b57cec5SDimitry Andric // defining instruction, not before it. 19700b57cec5SDimitry Andric unsigned HardenedReg = hardenValueInRegister( 19710b57cec5SDimitry Andric UnhardenedReg, MBB, std::next(MI.getIterator()), Loc); 19720b57cec5SDimitry Andric 19730b57cec5SDimitry Andric // Finally, replace the old register (which now only has the uses of the 19740b57cec5SDimitry Andric // original def) with the hardened register. 19750b57cec5SDimitry Andric MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg); 19760b57cec5SDimitry Andric 19770b57cec5SDimitry Andric ++NumPostLoadRegsHardened; 19780b57cec5SDimitry Andric return HardenedReg; 19790b57cec5SDimitry Andric } 19800b57cec5SDimitry Andric 19810b57cec5SDimitry Andric /// Harden a return instruction. 19820b57cec5SDimitry Andric /// 19830b57cec5SDimitry Andric /// Returns implicitly perform a load which we need to harden. Without hardening 19840b57cec5SDimitry Andric /// this load, an attacker my speculatively write over the return address to 19850b57cec5SDimitry Andric /// steer speculation of the return to an attacker controlled address. This is 19860b57cec5SDimitry Andric /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in 19870b57cec5SDimitry Andric /// this paper: 19880b57cec5SDimitry Andric /// https://people.csail.mit.edu/vlk/spectre11.pdf 19890b57cec5SDimitry Andric /// 19900b57cec5SDimitry Andric /// We can harden this by introducing an LFENCE that will delay any load of the 19910b57cec5SDimitry Andric /// return address until prior instructions have retired (and thus are not being 19920b57cec5SDimitry Andric /// speculated), or we can harden the address used by the implicit load: the 19930b57cec5SDimitry Andric /// stack pointer. 19940b57cec5SDimitry Andric /// 19950b57cec5SDimitry Andric /// If we are not using an LFENCE, hardening the stack pointer has an additional 19960b57cec5SDimitry Andric /// benefit: it allows us to pass the predicate state accumulated in this 19970b57cec5SDimitry Andric /// function back to the caller. In the absence of a BCBS attack on the return, 19980b57cec5SDimitry Andric /// the caller will typically be resumed and speculatively executed due to the 19990b57cec5SDimitry Andric /// Return Stack Buffer (RSB) prediction which is very accurate and has a high 20000b57cec5SDimitry Andric /// priority. It is possible that some code from the caller will be executed 20010b57cec5SDimitry Andric /// speculatively even during a BCBS-attacked return until the steering takes 20020b57cec5SDimitry Andric /// effect. Whenever this happens, the caller can recover the (poisoned) 20030b57cec5SDimitry Andric /// predicate state from the stack pointer and continue to harden loads. 20040b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { 20050b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 2006fe6060f1SDimitry Andric const DebugLoc &Loc = MI.getDebugLoc(); 20070b57cec5SDimitry Andric auto InsertPt = MI.getIterator(); 20080b57cec5SDimitry Andric 20090b57cec5SDimitry Andric if (FenceCallAndRet) 20100b57cec5SDimitry Andric // No need to fence here as we'll fence at the return site itself. That 20110b57cec5SDimitry Andric // handles more cases than we can handle here. 20120b57cec5SDimitry Andric return; 20130b57cec5SDimitry Andric 20140b57cec5SDimitry Andric // Take our predicate state, shift it to the high 17 bits (so that we keep 20150b57cec5SDimitry Andric // pointers canonical) and merge it into RSP. This will allow the caller to 20160b57cec5SDimitry Andric // extract it when we return (speculatively). 20170b57cec5SDimitry Andric mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB)); 20180b57cec5SDimitry Andric } 20190b57cec5SDimitry Andric 20200b57cec5SDimitry Andric /// Trace the predicate state through a call. 20210b57cec5SDimitry Andric /// 20220b57cec5SDimitry Andric /// There are several layers of this needed to handle the full complexity of 20230b57cec5SDimitry Andric /// calls. 20240b57cec5SDimitry Andric /// 20250b57cec5SDimitry Andric /// First, we need to send the predicate state into the called function. We do 20260b57cec5SDimitry Andric /// this by merging it into the high bits of the stack pointer. 20270b57cec5SDimitry Andric /// 20280b57cec5SDimitry Andric /// For tail calls, this is all we need to do. 20290b57cec5SDimitry Andric /// 20300b57cec5SDimitry Andric /// For calls where we might return and resume the control flow, we need to 20310b57cec5SDimitry Andric /// extract the predicate state from the high bits of the stack pointer after 20320b57cec5SDimitry Andric /// control returns from the called function. 20330b57cec5SDimitry Andric /// 20340b57cec5SDimitry Andric /// We also need to verify that we intended to return to this location in the 20350b57cec5SDimitry Andric /// code. An attacker might arrange for the processor to mispredict the return 20360b57cec5SDimitry Andric /// to this valid but incorrect return address in the program rather than the 20370b57cec5SDimitry Andric /// correct one. See the paper on this attack, called "ret2spec" by the 20380b57cec5SDimitry Andric /// researchers, here: 20390b57cec5SDimitry Andric /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf 20400b57cec5SDimitry Andric /// 20410b57cec5SDimitry Andric /// The way we verify that we returned to the correct location is by preserving 20420b57cec5SDimitry Andric /// the expected return address across the call. One technique involves taking 20430b57cec5SDimitry Andric /// advantage of the red-zone to load the return address from `8(%rsp)` where it 20440b57cec5SDimitry Andric /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can 20450b57cec5SDimitry Andric /// directly save the address into a register that will be preserved across the 20460b57cec5SDimitry Andric /// call. We compare this intended return address against the address 20470b57cec5SDimitry Andric /// immediately following the call (the observed return address). If these 20480b57cec5SDimitry Andric /// mismatch, we have detected misspeculation and can poison our predicate 20490b57cec5SDimitry Andric /// state. 20500b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( 20510b57cec5SDimitry Andric MachineInstr &MI) { 20520b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI.getParent(); 20530b57cec5SDimitry Andric MachineFunction &MF = *MBB.getParent(); 20540b57cec5SDimitry Andric auto InsertPt = MI.getIterator(); 2055fe6060f1SDimitry Andric const DebugLoc &Loc = MI.getDebugLoc(); 20560b57cec5SDimitry Andric 20570b57cec5SDimitry Andric if (FenceCallAndRet) { 20580b57cec5SDimitry Andric if (MI.isReturn()) 20590b57cec5SDimitry Andric // Tail call, we don't return to this function. 20600b57cec5SDimitry Andric // FIXME: We should also handle noreturn calls. 20610b57cec5SDimitry Andric return; 20620b57cec5SDimitry Andric 20630b57cec5SDimitry Andric // We don't need to fence before the call because the function should fence 20640b57cec5SDimitry Andric // in its entry. However, we do need to fence after the call returns. 20650b57cec5SDimitry Andric // Fencing before the return doesn't correctly handle cases where the return 20660b57cec5SDimitry Andric // itself is mispredicted. 20670b57cec5SDimitry Andric BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE)); 20680b57cec5SDimitry Andric ++NumInstsInserted; 20690b57cec5SDimitry Andric ++NumLFENCEsInserted; 20700b57cec5SDimitry Andric return; 20710b57cec5SDimitry Andric } 20720b57cec5SDimitry Andric 20730b57cec5SDimitry Andric // First, we transfer the predicate state into the called function by merging 20740b57cec5SDimitry Andric // it into the stack pointer. This will kill the current def of the state. 207504eeddc0SDimitry Andric Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); 20760b57cec5SDimitry Andric mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric // If this call is also a return, it is a tail call and we don't need anything 20790b57cec5SDimitry Andric // else to handle it so just return. Also, if there are no further 20800b57cec5SDimitry Andric // instructions and no successors, this call does not return so we can also 20810b57cec5SDimitry Andric // bail. 20820b57cec5SDimitry Andric if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty())) 20830b57cec5SDimitry Andric return; 20840b57cec5SDimitry Andric 20850b57cec5SDimitry Andric // Create a symbol to track the return address and attach it to the call 20860b57cec5SDimitry Andric // machine instruction. We will lower extra symbols attached to call 20870b57cec5SDimitry Andric // instructions as label immediately following the call. 20880b57cec5SDimitry Andric MCSymbol *RetSymbol = 20890b57cec5SDimitry Andric MF.getContext().createTempSymbol("slh_ret_addr", 20900b57cec5SDimitry Andric /*AlwaysAddSuffix*/ true); 20910b57cec5SDimitry Andric MI.setPostInstrSymbol(MF, RetSymbol); 20920b57cec5SDimitry Andric 20930b57cec5SDimitry Andric const TargetRegisterClass *AddrRC = &X86::GR64RegClass; 20940b57cec5SDimitry Andric unsigned ExpectedRetAddrReg = 0; 20950b57cec5SDimitry Andric 20960b57cec5SDimitry Andric // If we have no red zones or if the function returns twice (possibly without 20970b57cec5SDimitry Andric // using the `ret` instruction) like setjmp, we need to save the expected 20980b57cec5SDimitry Andric // return address prior to the call. 20990b57cec5SDimitry Andric if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) || 21000b57cec5SDimitry Andric MF.exposesReturnsTwice()) { 21010b57cec5SDimitry Andric // If we don't have red zones, we need to compute the expected return 21020b57cec5SDimitry Andric // address prior to the call and store it in a register that lives across 21030b57cec5SDimitry Andric // the call. 21040b57cec5SDimitry Andric // 21050b57cec5SDimitry Andric // In some ways, this is doubly satisfying as a mitigation because it will 21060b57cec5SDimitry Andric // also successfully detect stack smashing bugs in some cases (typically, 21070b57cec5SDimitry Andric // when a callee-saved register is used and the callee doesn't push it onto 21080b57cec5SDimitry Andric // the stack). But that isn't our primary goal, so we only use it as 21090b57cec5SDimitry Andric // a fallback. 21100b57cec5SDimitry Andric // 21110b57cec5SDimitry Andric // FIXME: It isn't clear that this is reliable in the face of 21120b57cec5SDimitry Andric // rematerialization in the register allocator. We somehow need to force 21130b57cec5SDimitry Andric // that to not occur for this particular instruction, and instead to spill 21140b57cec5SDimitry Andric // or otherwise preserve the value computed *prior* to the call. 21150b57cec5SDimitry Andric // 21160b57cec5SDimitry Andric // FIXME: It is even less clear why MachineCSE can't just fold this when we 21170b57cec5SDimitry Andric // end up having to use identical instructions both before and after the 21180b57cec5SDimitry Andric // call to feed the comparison. 21190b57cec5SDimitry Andric ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); 21200b57cec5SDimitry Andric if (MF.getTarget().getCodeModel() == CodeModel::Small && 21210b57cec5SDimitry Andric !Subtarget->isPositionIndependent()) { 21220b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg) 21230b57cec5SDimitry Andric .addSym(RetSymbol); 21240b57cec5SDimitry Andric } else { 21250b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg) 21260b57cec5SDimitry Andric .addReg(/*Base*/ X86::RIP) 21270b57cec5SDimitry Andric .addImm(/*Scale*/ 1) 21280b57cec5SDimitry Andric .addReg(/*Index*/ 0) 21290b57cec5SDimitry Andric .addSym(RetSymbol) 21300b57cec5SDimitry Andric .addReg(/*Segment*/ 0); 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric } 21330b57cec5SDimitry Andric 21340b57cec5SDimitry Andric // Step past the call to handle when it returns. 21350b57cec5SDimitry Andric ++InsertPt; 21360b57cec5SDimitry Andric 21370b57cec5SDimitry Andric // If we didn't pre-compute the expected return address into a register, then 21380b57cec5SDimitry Andric // red zones are enabled and the return address is still available on the 21390b57cec5SDimitry Andric // stack immediately after the call. As the very first instruction, we load it 21400b57cec5SDimitry Andric // into a register. 21410b57cec5SDimitry Andric if (!ExpectedRetAddrReg) { 21420b57cec5SDimitry Andric ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); 21430b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg) 21440b57cec5SDimitry Andric .addReg(/*Base*/ X86::RSP) 21450b57cec5SDimitry Andric .addImm(/*Scale*/ 1) 21460b57cec5SDimitry Andric .addReg(/*Index*/ 0) 21470b57cec5SDimitry Andric .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so 21480b57cec5SDimitry Andric // the return address is 8-bytes past it. 21490b57cec5SDimitry Andric .addReg(/*Segment*/ 0); 21500b57cec5SDimitry Andric } 21510b57cec5SDimitry Andric 21520b57cec5SDimitry Andric // Now we extract the callee's predicate state from the stack pointer. 21530b57cec5SDimitry Andric unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric // Test the expected return address against our actual address. If we can 21560b57cec5SDimitry Andric // form this basic block's address as an immediate, this is easy. Otherwise 21570b57cec5SDimitry Andric // we compute it. 21580b57cec5SDimitry Andric if (MF.getTarget().getCodeModel() == CodeModel::Small && 21590b57cec5SDimitry Andric !Subtarget->isPositionIndependent()) { 21600b57cec5SDimitry Andric // FIXME: Could we fold this with the load? It would require careful EFLAGS 21610b57cec5SDimitry Andric // management. 21620b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32)) 21630b57cec5SDimitry Andric .addReg(ExpectedRetAddrReg, RegState::Kill) 21640b57cec5SDimitry Andric .addSym(RetSymbol); 21650b57cec5SDimitry Andric } else { 21668bcb0991SDimitry Andric Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); 21670b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) 21680b57cec5SDimitry Andric .addReg(/*Base*/ X86::RIP) 21690b57cec5SDimitry Andric .addImm(/*Scale*/ 1) 21700b57cec5SDimitry Andric .addReg(/*Index*/ 0) 21710b57cec5SDimitry Andric .addSym(RetSymbol) 21720b57cec5SDimitry Andric .addReg(/*Segment*/ 0); 21730b57cec5SDimitry Andric BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr)) 21740b57cec5SDimitry Andric .addReg(ExpectedRetAddrReg, RegState::Kill) 21750b57cec5SDimitry Andric .addReg(ActualRetAddrReg, RegState::Kill); 21760b57cec5SDimitry Andric } 21770b57cec5SDimitry Andric 21780b57cec5SDimitry Andric // Now conditionally update the predicate state we just extracted if we ended 21790b57cec5SDimitry Andric // up at a different return address than expected. 21800b57cec5SDimitry Andric int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; 21810b57cec5SDimitry Andric auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); 21820b57cec5SDimitry Andric 21838bcb0991SDimitry Andric Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC); 21840b57cec5SDimitry Andric auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) 21850b57cec5SDimitry Andric .addReg(NewStateReg, RegState::Kill) 21860b57cec5SDimitry Andric .addReg(PS->PoisonReg) 21870b57cec5SDimitry Andric .addImm(X86::COND_NE); 2188*0fca6ea1SDimitry Andric CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true); 21890b57cec5SDimitry Andric ++NumInstsInserted; 21900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); 21910b57cec5SDimitry Andric 21920b57cec5SDimitry Andric PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); 21930b57cec5SDimitry Andric } 21940b57cec5SDimitry Andric 21950b57cec5SDimitry Andric /// An attacker may speculatively store over a value that is then speculatively 21960b57cec5SDimitry Andric /// loaded and used as the target of an indirect call or jump instruction. This 21970b57cec5SDimitry Andric /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described 21980b57cec5SDimitry Andric /// in this paper: 21990b57cec5SDimitry Andric /// https://people.csail.mit.edu/vlk/spectre11.pdf 22000b57cec5SDimitry Andric /// 22010b57cec5SDimitry Andric /// When this happens, the speculative execution of the call or jump will end up 22020b57cec5SDimitry Andric /// being steered to this attacker controlled address. While most such loads 22030b57cec5SDimitry Andric /// will be adequately hardened already, we want to ensure that they are 22040b57cec5SDimitry Andric /// definitively treated as needing post-load hardening. While address hardening 22050b57cec5SDimitry Andric /// is sufficient to prevent secret data from leaking to the attacker, it may 22060b57cec5SDimitry Andric /// not be sufficient to prevent an attacker from steering speculative 22070b57cec5SDimitry Andric /// execution. We forcibly unfolded all relevant loads above and so will always 22080b57cec5SDimitry Andric /// have an opportunity to post-load harden here, we just need to scan for cases 22090b57cec5SDimitry Andric /// not already flagged and add them. 22100b57cec5SDimitry Andric void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr( 22110b57cec5SDimitry Andric MachineInstr &MI, 22120b57cec5SDimitry Andric SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { 22130b57cec5SDimitry Andric switch (MI.getOpcode()) { 22140b57cec5SDimitry Andric case X86::FARCALL16m: 22150b57cec5SDimitry Andric case X86::FARCALL32m: 22165ffd83dbSDimitry Andric case X86::FARCALL64m: 22170b57cec5SDimitry Andric case X86::FARJMP16m: 22180b57cec5SDimitry Andric case X86::FARJMP32m: 22195ffd83dbSDimitry Andric case X86::FARJMP64m: 22200b57cec5SDimitry Andric // We don't need to harden either far calls or far jumps as they are 22210b57cec5SDimitry Andric // safe from Spectre. 22220b57cec5SDimitry Andric return; 22230b57cec5SDimitry Andric 22240b57cec5SDimitry Andric default: 22250b57cec5SDimitry Andric break; 22260b57cec5SDimitry Andric } 22270b57cec5SDimitry Andric 22280b57cec5SDimitry Andric // We should never see a loading instruction at this point, as those should 22290b57cec5SDimitry Andric // have been unfolded. 22300b57cec5SDimitry Andric assert(!MI.mayLoad() && "Found a lingering loading instruction!"); 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric // If the first operand isn't a register, this is a branch or call 22330b57cec5SDimitry Andric // instruction with an immediate operand which doesn't need to be hardened. 22340b57cec5SDimitry Andric if (!MI.getOperand(0).isReg()) 22350b57cec5SDimitry Andric return; 22360b57cec5SDimitry Andric 22370b57cec5SDimitry Andric // For all of these, the target register is the first operand of the 22380b57cec5SDimitry Andric // instruction. 22390b57cec5SDimitry Andric auto &TargetOp = MI.getOperand(0); 22408bcb0991SDimitry Andric Register OldTargetReg = TargetOp.getReg(); 22410b57cec5SDimitry Andric 22420b57cec5SDimitry Andric // Try to lookup a hardened version of this register. We retain a reference 22430b57cec5SDimitry Andric // here as we want to update the map to track any newly computed hardened 22440b57cec5SDimitry Andric // register. 22450b57cec5SDimitry Andric unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg]; 22460b57cec5SDimitry Andric 22470b57cec5SDimitry Andric // If we don't have a hardened register yet, compute one. Otherwise, just use 22480b57cec5SDimitry Andric // the already hardened register. 22490b57cec5SDimitry Andric // 22500b57cec5SDimitry Andric // FIXME: It is a little suspect that we use partially hardened registers that 22510b57cec5SDimitry Andric // only feed addresses. The complexity of partial hardening with SHRX 22520b57cec5SDimitry Andric // continues to pile up. Should definitively measure its value and consider 22530b57cec5SDimitry Andric // eliminating it. 22540b57cec5SDimitry Andric if (!HardenedTargetReg) 22550b57cec5SDimitry Andric HardenedTargetReg = hardenValueInRegister( 22560b57cec5SDimitry Andric OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc()); 22570b57cec5SDimitry Andric 22580b57cec5SDimitry Andric // Set the target operand to the hardened register. 22590b57cec5SDimitry Andric TargetOp.setReg(HardenedTargetReg); 22600b57cec5SDimitry Andric 22610b57cec5SDimitry Andric ++NumCallsOrJumpsHardened; 22620b57cec5SDimitry Andric } 22630b57cec5SDimitry Andric 22640b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, 22650b57cec5SDimitry Andric "X86 speculative load hardener", false, false) 22660b57cec5SDimitry Andric INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY, 22670b57cec5SDimitry Andric "X86 speculative load hardener", false, false) 22680b57cec5SDimitry Andric 22690b57cec5SDimitry Andric FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() { 22700b57cec5SDimitry Andric return new X86SpeculativeLoadHardeningPass(); 22710b57cec5SDimitry Andric } 2272