10b57cec5SDimitry Andric //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains a pass (at IR level) to replace atomic instructions with 100b57cec5SDimitry Andric // __atomic_* library calls, or target specific instruction which implement the 110b57cec5SDimitry Andric // same semantics in a way which better fits the target backend. This can 120b57cec5SDimitry Andric // include the use of (intrinsic-based) load-linked/store-conditional loops, 130b57cec5SDimitry Andric // AtomicCmpXchg, or type coercions. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 1881ad6265SDimitry Andric #include "llvm/ADT/STLFunctionalExtras.h" 190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 20bdd1243dSDimitry Andric #include "llvm/Analysis/InstSimplifyFolder.h" 21349cc55cSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22*0fca6ea1SDimitry Andric #include "llvm/CodeGen/AtomicExpand.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/AtomicExpandUtils.h" 24*0fca6ea1SDimitry Andric #include "llvm/CodeGen/RuntimeLibcallUtil.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h" 290b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 300b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 310b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 320b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 330b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 340b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 350b57cec5SDimitry Andric #include "llvm/IR/Function.h" 360b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 370b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 380b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 390b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 40*0fca6ea1SDimitry Andric #include "llvm/IR/MDBuilder.h" 41*0fca6ea1SDimitry Andric #include "llvm/IR/MemoryModelRelaxationAnnotations.h" 420b57cec5SDimitry Andric #include "llvm/IR/Module.h" 430b57cec5SDimitry Andric #include "llvm/IR/Type.h" 440b57cec5SDimitry Andric #include "llvm/IR/User.h" 450b57cec5SDimitry Andric #include "llvm/IR/Value.h" 46480093f4SDimitry Andric #include "llvm/InitializePasses.h" 470b57cec5SDimitry Andric #include "llvm/Pass.h" 480b57cec5SDimitry Andric #include "llvm/Support/AtomicOrdering.h" 490b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 500b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 510b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 520b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 530b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 5481ad6265SDimitry Andric #include "llvm/Transforms/Utils/LowerAtomic.h" 550b57cec5SDimitry Andric #include <cassert> 560b57cec5SDimitry Andric #include <cstdint> 570b57cec5SDimitry Andric #include <iterator> 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric using namespace llvm; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric #define DEBUG_TYPE "atomic-expand" 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric namespace { 640b57cec5SDimitry Andric 65*0fca6ea1SDimitry Andric class AtomicExpandImpl { 660b57cec5SDimitry Andric const TargetLowering *TLI = nullptr; 67bdd1243dSDimitry Andric const DataLayout *DL = nullptr; 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric private: 700b57cec5SDimitry Andric bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); 710b57cec5SDimitry Andric IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); 720b57cec5SDimitry Andric LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); 730b57cec5SDimitry Andric bool tryExpandAtomicLoad(LoadInst *LI); 740b57cec5SDimitry Andric bool expandAtomicLoadToLL(LoadInst *LI); 750b57cec5SDimitry Andric bool expandAtomicLoadToCmpXchg(LoadInst *LI); 760b57cec5SDimitry Andric StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); 7781ad6265SDimitry Andric bool tryExpandAtomicStore(StoreInst *SI); 7881ad6265SDimitry Andric void expandAtomicStore(StoreInst *SI); 790b57cec5SDimitry Andric bool tryExpandAtomicRMW(AtomicRMWInst *AI); 80fe6060f1SDimitry Andric AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); 810b57cec5SDimitry Andric Value * 82bdd1243dSDimitry Andric insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, 83fe6060f1SDimitry Andric Align AddrAlign, AtomicOrdering MemOpOrder, 84bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 85bdd1243dSDimitry Andric void expandAtomicOpToLLSC( 86bdd1243dSDimitry Andric Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, 87bdd1243dSDimitry Andric AtomicOrdering MemOpOrder, 88bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 890b57cec5SDimitry Andric void expandPartwordAtomicRMW( 9081ad6265SDimitry Andric AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); 910b57cec5SDimitry Andric AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); 925ffd83dbSDimitry Andric bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); 930b57cec5SDimitry Andric void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); 940b57cec5SDimitry Andric void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); 97bdd1243dSDimitry Andric static Value *insertRMWCmpXchgLoop( 98bdd1243dSDimitry Andric IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, 99bdd1243dSDimitry Andric AtomicOrdering MemOpOrder, SyncScope::ID SSID, 100bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 1010b57cec5SDimitry Andric CreateCmpXchgInstFun CreateCmpXchg); 1020b57cec5SDimitry Andric bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 1050b57cec5SDimitry Andric bool isIdempotentRMW(AtomicRMWInst *RMWI); 1060b57cec5SDimitry Andric bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); 1070b57cec5SDimitry Andric 1085ffd83dbSDimitry Andric bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, 1090b57cec5SDimitry Andric Value *PointerOperand, Value *ValueOperand, 1100b57cec5SDimitry Andric Value *CASExpected, AtomicOrdering Ordering, 1110b57cec5SDimitry Andric AtomicOrdering Ordering2, 1120b57cec5SDimitry Andric ArrayRef<RTLIB::Libcall> Libcalls); 1130b57cec5SDimitry Andric void expandAtomicLoadToLibcall(LoadInst *LI); 1140b57cec5SDimitry Andric void expandAtomicStoreToLibcall(StoreInst *LI); 1150b57cec5SDimitry Andric void expandAtomicRMWToLibcall(AtomicRMWInst *I); 1160b57cec5SDimitry Andric void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric friend bool 1190b57cec5SDimitry Andric llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 1200b57cec5SDimitry Andric CreateCmpXchgInstFun CreateCmpXchg); 121*0fca6ea1SDimitry Andric 122*0fca6ea1SDimitry Andric public: 123*0fca6ea1SDimitry Andric bool run(Function &F, const TargetMachine *TM); 124*0fca6ea1SDimitry Andric }; 125*0fca6ea1SDimitry Andric 126*0fca6ea1SDimitry Andric class AtomicExpandLegacy : public FunctionPass { 127*0fca6ea1SDimitry Andric public: 128*0fca6ea1SDimitry Andric static char ID; // Pass identification, replacement for typeid 129*0fca6ea1SDimitry Andric 130*0fca6ea1SDimitry Andric AtomicExpandLegacy() : FunctionPass(ID) { 131*0fca6ea1SDimitry Andric initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry()); 132*0fca6ea1SDimitry Andric } 133*0fca6ea1SDimitry Andric 134*0fca6ea1SDimitry Andric bool runOnFunction(Function &F) override; 1350b57cec5SDimitry Andric }; 1360b57cec5SDimitry Andric 137bdd1243dSDimitry Andric // IRBuilder to be used for replacement atomic instructions. 138*0fca6ea1SDimitry Andric struct ReplacementIRBuilder 139*0fca6ea1SDimitry Andric : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> { 140*0fca6ea1SDimitry Andric MDNode *MMRAMD = nullptr; 141*0fca6ea1SDimitry Andric 142bdd1243dSDimitry Andric // Preserves the DebugLoc from I, and preserves still valid metadata. 143*0fca6ea1SDimitry Andric // Enable StrictFP builder mode when appropriate. 144bdd1243dSDimitry Andric explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) 145*0fca6ea1SDimitry Andric : IRBuilder(I->getContext(), DL, 146*0fca6ea1SDimitry Andric IRBuilderCallbackInserter( 147*0fca6ea1SDimitry Andric [this](Instruction *I) { addMMRAMD(I); })) { 148bdd1243dSDimitry Andric SetInsertPoint(I); 149bdd1243dSDimitry Andric this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); 150*0fca6ea1SDimitry Andric if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) 151*0fca6ea1SDimitry Andric this->setIsFPConstrained(true); 152*0fca6ea1SDimitry Andric 153*0fca6ea1SDimitry Andric MMRAMD = I->getMetadata(LLVMContext::MD_mmra); 154*0fca6ea1SDimitry Andric } 155*0fca6ea1SDimitry Andric 156*0fca6ea1SDimitry Andric void addMMRAMD(Instruction *I) { 157*0fca6ea1SDimitry Andric if (canInstructionHaveMMRAs(*I)) 158*0fca6ea1SDimitry Andric I->setMetadata(LLVMContext::MD_mmra, MMRAMD); 159bdd1243dSDimitry Andric } 160bdd1243dSDimitry Andric }; 161bdd1243dSDimitry Andric 1620b57cec5SDimitry Andric } // end anonymous namespace 1630b57cec5SDimitry Andric 164*0fca6ea1SDimitry Andric char AtomicExpandLegacy::ID = 0; 1650b57cec5SDimitry Andric 166*0fca6ea1SDimitry Andric char &llvm::AtomicExpandID = AtomicExpandLegacy::ID; 1670b57cec5SDimitry Andric 168*0fca6ea1SDimitry Andric INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE, 169*0fca6ea1SDimitry Andric "Expand Atomic instructions", false, false) 170*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 171*0fca6ea1SDimitry Andric INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE, 172*0fca6ea1SDimitry Andric "Expand Atomic instructions", false, false) 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // Helper functions to retrieve the size of atomic instructions. 1750b57cec5SDimitry Andric static unsigned getAtomicOpSize(LoadInst *LI) { 176*0fca6ea1SDimitry Andric const DataLayout &DL = LI->getDataLayout(); 1770b57cec5SDimitry Andric return DL.getTypeStoreSize(LI->getType()); 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric static unsigned getAtomicOpSize(StoreInst *SI) { 181*0fca6ea1SDimitry Andric const DataLayout &DL = SI->getDataLayout(); 1820b57cec5SDimitry Andric return DL.getTypeStoreSize(SI->getValueOperand()->getType()); 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { 186*0fca6ea1SDimitry Andric const DataLayout &DL = RMWI->getDataLayout(); 1870b57cec5SDimitry Andric return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 1880b57cec5SDimitry Andric } 1890b57cec5SDimitry Andric 1900b57cec5SDimitry Andric static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { 191*0fca6ea1SDimitry Andric const DataLayout &DL = CASI->getDataLayout(); 1920b57cec5SDimitry Andric return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // Determine if a particular atomic operation has a supported size, 1960b57cec5SDimitry Andric // and is of appropriate alignment, to be passed through for target 1970b57cec5SDimitry Andric // lowering. (Versus turning into a __atomic libcall) 1980b57cec5SDimitry Andric template <typename Inst> 1990b57cec5SDimitry Andric static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { 2000b57cec5SDimitry Andric unsigned Size = getAtomicOpSize(I); 2015ffd83dbSDimitry Andric Align Alignment = I->getAlign(); 2025ffd83dbSDimitry Andric return Alignment >= Size && 2035ffd83dbSDimitry Andric Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; 2040b57cec5SDimitry Andric } 2050b57cec5SDimitry Andric 206*0fca6ea1SDimitry Andric bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) { 207*0fca6ea1SDimitry Andric const auto *Subtarget = TM->getSubtargetImpl(F); 208bdd1243dSDimitry Andric if (!Subtarget->enableAtomicExpand()) 2090b57cec5SDimitry Andric return false; 210bdd1243dSDimitry Andric TLI = Subtarget->getTargetLowering(); 211*0fca6ea1SDimitry Andric DL = &F.getDataLayout(); 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric SmallVector<Instruction *, 1> AtomicInsts; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric // Changing control-flow while iterating through it is a bad idea, so gather a 2160b57cec5SDimitry Andric // list of all atomic instructions before we start. 217349cc55cSDimitry Andric for (Instruction &I : instructions(F)) 218349cc55cSDimitry Andric if (I.isAtomic() && !isa<FenceInst>(&I)) 219349cc55cSDimitry Andric AtomicInsts.push_back(&I); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric bool MadeChange = false; 222fcaf7f86SDimitry Andric for (auto *I : AtomicInsts) { 2230b57cec5SDimitry Andric auto LI = dyn_cast<LoadInst>(I); 2240b57cec5SDimitry Andric auto SI = dyn_cast<StoreInst>(I); 2250b57cec5SDimitry Andric auto RMWI = dyn_cast<AtomicRMWInst>(I); 2260b57cec5SDimitry Andric auto CASI = dyn_cast<AtomicCmpXchgInst>(I); 2270b57cec5SDimitry Andric assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction"); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric // If the Size/Alignment is not supported, replace with a libcall. 2300b57cec5SDimitry Andric if (LI) { 2310b57cec5SDimitry Andric if (!atomicSizeSupported(TLI, LI)) { 2320b57cec5SDimitry Andric expandAtomicLoadToLibcall(LI); 2330b57cec5SDimitry Andric MadeChange = true; 2340b57cec5SDimitry Andric continue; 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric } else if (SI) { 2370b57cec5SDimitry Andric if (!atomicSizeSupported(TLI, SI)) { 2380b57cec5SDimitry Andric expandAtomicStoreToLibcall(SI); 2390b57cec5SDimitry Andric MadeChange = true; 2400b57cec5SDimitry Andric continue; 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric } else if (RMWI) { 2430b57cec5SDimitry Andric if (!atomicSizeSupported(TLI, RMWI)) { 2440b57cec5SDimitry Andric expandAtomicRMWToLibcall(RMWI); 2450b57cec5SDimitry Andric MadeChange = true; 2460b57cec5SDimitry Andric continue; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric } else if (CASI) { 2490b57cec5SDimitry Andric if (!atomicSizeSupported(TLI, CASI)) { 2500b57cec5SDimitry Andric expandAtomicCASToLibcall(CASI); 2510b57cec5SDimitry Andric MadeChange = true; 2520b57cec5SDimitry Andric continue; 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 256bdd1243dSDimitry Andric if (LI && TLI->shouldCastAtomicLoadInIR(LI) == 257bdd1243dSDimitry Andric TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 258bdd1243dSDimitry Andric I = LI = convertAtomicLoadToIntegerType(LI); 259bdd1243dSDimitry Andric MadeChange = true; 260bdd1243dSDimitry Andric } else if (SI && 261bdd1243dSDimitry Andric TLI->shouldCastAtomicStoreInIR(SI) == 262bdd1243dSDimitry Andric TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 263bdd1243dSDimitry Andric I = SI = convertAtomicStoreToIntegerType(SI); 264bdd1243dSDimitry Andric MadeChange = true; 265bdd1243dSDimitry Andric } else if (RMWI && 266bdd1243dSDimitry Andric TLI->shouldCastAtomicRMWIInIR(RMWI) == 267bdd1243dSDimitry Andric TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 268bdd1243dSDimitry Andric I = RMWI = convertAtomicXchgToIntegerType(RMWI); 269bdd1243dSDimitry Andric MadeChange = true; 270bdd1243dSDimitry Andric } else if (CASI) { 271bdd1243dSDimitry Andric // TODO: when we're ready to make the change at the IR level, we can 272bdd1243dSDimitry Andric // extend convertCmpXchgToInteger for floating point too. 273bdd1243dSDimitry Andric if (CASI->getCompareOperand()->getType()->isPointerTy()) { 274bdd1243dSDimitry Andric // TODO: add a TLI hook to control this so that each target can 275bdd1243dSDimitry Andric // convert to lowering the original type one at a time. 276bdd1243dSDimitry Andric I = CASI = convertCmpXchgToIntegerType(CASI); 277bdd1243dSDimitry Andric MadeChange = true; 278bdd1243dSDimitry Andric } 279bdd1243dSDimitry Andric } 280bdd1243dSDimitry Andric 2810b57cec5SDimitry Andric if (TLI->shouldInsertFencesForAtomic(I)) { 2820b57cec5SDimitry Andric auto FenceOrdering = AtomicOrdering::Monotonic; 2830b57cec5SDimitry Andric if (LI && isAcquireOrStronger(LI->getOrdering())) { 2840b57cec5SDimitry Andric FenceOrdering = LI->getOrdering(); 2850b57cec5SDimitry Andric LI->setOrdering(AtomicOrdering::Monotonic); 2860b57cec5SDimitry Andric } else if (SI && isReleaseOrStronger(SI->getOrdering())) { 2870b57cec5SDimitry Andric FenceOrdering = SI->getOrdering(); 2880b57cec5SDimitry Andric SI->setOrdering(AtomicOrdering::Monotonic); 2890b57cec5SDimitry Andric } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || 2900b57cec5SDimitry Andric isAcquireOrStronger(RMWI->getOrdering()))) { 2910b57cec5SDimitry Andric FenceOrdering = RMWI->getOrdering(); 2920b57cec5SDimitry Andric RMWI->setOrdering(AtomicOrdering::Monotonic); 2930b57cec5SDimitry Andric } else if (CASI && 2940b57cec5SDimitry Andric TLI->shouldExpandAtomicCmpXchgInIR(CASI) == 2950b57cec5SDimitry Andric TargetLoweringBase::AtomicExpansionKind::None && 2960b57cec5SDimitry Andric (isReleaseOrStronger(CASI->getSuccessOrdering()) || 297fe6060f1SDimitry Andric isAcquireOrStronger(CASI->getSuccessOrdering()) || 298fe6060f1SDimitry Andric isAcquireOrStronger(CASI->getFailureOrdering()))) { 2990b57cec5SDimitry Andric // If a compare and swap is lowered to LL/SC, we can do smarter fence 3000b57cec5SDimitry Andric // insertion, with a stronger one on the success path than on the 3010b57cec5SDimitry Andric // failure path. As a result, fence insertion is directly done by 3020b57cec5SDimitry Andric // expandAtomicCmpXchg in that case. 303fe6060f1SDimitry Andric FenceOrdering = CASI->getMergedOrdering(); 3040b57cec5SDimitry Andric CASI->setSuccessOrdering(AtomicOrdering::Monotonic); 3050b57cec5SDimitry Andric CASI->setFailureOrdering(AtomicOrdering::Monotonic); 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric if (FenceOrdering != AtomicOrdering::Monotonic) { 3090b57cec5SDimitry Andric MadeChange |= bracketInstWithFences(I, FenceOrdering); 3100b57cec5SDimitry Andric } 311bdd1243dSDimitry Andric } else if (I->hasAtomicStore() && 312bdd1243dSDimitry Andric TLI->shouldInsertTrailingFenceForAtomicStore(I)) { 313bdd1243dSDimitry Andric auto FenceOrdering = AtomicOrdering::Monotonic; 314bdd1243dSDimitry Andric if (SI) 315bdd1243dSDimitry Andric FenceOrdering = SI->getOrdering(); 316bdd1243dSDimitry Andric else if (RMWI) 317bdd1243dSDimitry Andric FenceOrdering = RMWI->getOrdering(); 318bdd1243dSDimitry Andric else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) != 319bdd1243dSDimitry Andric TargetLoweringBase::AtomicExpansionKind::LLSC) 320bdd1243dSDimitry Andric // LLSC is handled in expandAtomicCmpXchg(). 321bdd1243dSDimitry Andric FenceOrdering = CASI->getSuccessOrdering(); 3220b57cec5SDimitry Andric 323bdd1243dSDimitry Andric IRBuilder Builder(I); 324bdd1243dSDimitry Andric if (auto TrailingFence = 325bdd1243dSDimitry Andric TLI->emitTrailingFence(Builder, I, FenceOrdering)) { 326bdd1243dSDimitry Andric TrailingFence->moveAfter(I); 3270b57cec5SDimitry Andric MadeChange = true; 3280b57cec5SDimitry Andric } 329bdd1243dSDimitry Andric } 3300b57cec5SDimitry Andric 331bdd1243dSDimitry Andric if (LI) 3320b57cec5SDimitry Andric MadeChange |= tryExpandAtomicLoad(LI); 333bdd1243dSDimitry Andric else if (SI) 334bdd1243dSDimitry Andric MadeChange |= tryExpandAtomicStore(SI); 335bdd1243dSDimitry Andric else if (RMWI) { 3360b57cec5SDimitry Andric // There are two different ways of expanding RMW instructions: 3370b57cec5SDimitry Andric // - into a load if it is idempotent 3380b57cec5SDimitry Andric // - into a Cmpxchg/LL-SC loop otherwise 3390b57cec5SDimitry Andric // we try them in that order. 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { 3420b57cec5SDimitry Andric MadeChange = true; 3430b57cec5SDimitry Andric } else { 3440b57cec5SDimitry Andric MadeChange |= tryExpandAtomicRMW(RMWI); 3450b57cec5SDimitry Andric } 346bdd1243dSDimitry Andric } else if (CASI) 3470b57cec5SDimitry Andric MadeChange |= tryExpandAtomicCmpXchg(CASI); 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric return MadeChange; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 352*0fca6ea1SDimitry Andric bool AtomicExpandLegacy::runOnFunction(Function &F) { 353*0fca6ea1SDimitry Andric 354*0fca6ea1SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 355*0fca6ea1SDimitry Andric if (!TPC) 356*0fca6ea1SDimitry Andric return false; 357*0fca6ea1SDimitry Andric auto *TM = &TPC->getTM<TargetMachine>(); 358*0fca6ea1SDimitry Andric AtomicExpandImpl AE; 359*0fca6ea1SDimitry Andric return AE.run(F, TM); 360*0fca6ea1SDimitry Andric } 361*0fca6ea1SDimitry Andric 362*0fca6ea1SDimitry Andric FunctionPass *llvm::createAtomicExpandLegacyPass() { 363*0fca6ea1SDimitry Andric return new AtomicExpandLegacy(); 364*0fca6ea1SDimitry Andric } 365*0fca6ea1SDimitry Andric 366*0fca6ea1SDimitry Andric PreservedAnalyses AtomicExpandPass::run(Function &F, 367*0fca6ea1SDimitry Andric FunctionAnalysisManager &AM) { 368*0fca6ea1SDimitry Andric AtomicExpandImpl AE; 369*0fca6ea1SDimitry Andric 370*0fca6ea1SDimitry Andric bool Changed = AE.run(F, TM); 371*0fca6ea1SDimitry Andric if (!Changed) 372*0fca6ea1SDimitry Andric return PreservedAnalyses::all(); 373*0fca6ea1SDimitry Andric 374*0fca6ea1SDimitry Andric return PreservedAnalyses::none(); 375*0fca6ea1SDimitry Andric } 376*0fca6ea1SDimitry Andric 377*0fca6ea1SDimitry Andric bool AtomicExpandImpl::bracketInstWithFences(Instruction *I, 378*0fca6ea1SDimitry Andric AtomicOrdering Order) { 379bdd1243dSDimitry Andric ReplacementIRBuilder Builder(I, *DL); 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); 3840b57cec5SDimitry Andric // We have a guard here because not every atomic operation generates a 3850b57cec5SDimitry Andric // trailing fence. 3860b57cec5SDimitry Andric if (TrailingFence) 3870b57cec5SDimitry Andric TrailingFence->moveAfter(I); 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric return (LeadingFence || TrailingFence); 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric /// Get the iX type with the same bitwidth as T. 393*0fca6ea1SDimitry Andric IntegerType * 394*0fca6ea1SDimitry Andric AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { 3950b57cec5SDimitry Andric EVT VT = TLI->getMemValueType(DL, T); 3960b57cec5SDimitry Andric unsigned BitWidth = VT.getStoreSizeInBits(); 3970b57cec5SDimitry Andric assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); 3980b57cec5SDimitry Andric return IntegerType::get(T->getContext(), BitWidth); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric /// Convert an atomic load of a non-integral type to an integer load of the 4020b57cec5SDimitry Andric /// equivalent bitwidth. See the function comment on 4030b57cec5SDimitry Andric /// convertAtomicStoreToIntegerType for background. 404*0fca6ea1SDimitry Andric LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) { 4050b57cec5SDimitry Andric auto *M = LI->getModule(); 40681ad6265SDimitry Andric Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); 4070b57cec5SDimitry Andric 408bdd1243dSDimitry Andric ReplacementIRBuilder Builder(LI, *DL); 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric Value *Addr = LI->getPointerOperand(); 4110b57cec5SDimitry Andric 4125f757f3fSDimitry Andric auto *NewLI = Builder.CreateLoad(NewTy, Addr); 4135ffd83dbSDimitry Andric NewLI->setAlignment(LI->getAlign()); 4140b57cec5SDimitry Andric NewLI->setVolatile(LI->isVolatile()); 4150b57cec5SDimitry Andric NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); 4160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); 4190b57cec5SDimitry Andric LI->replaceAllUsesWith(NewVal); 4200b57cec5SDimitry Andric LI->eraseFromParent(); 4210b57cec5SDimitry Andric return NewLI; 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 424fe6060f1SDimitry Andric AtomicRMWInst * 425*0fca6ea1SDimitry Andric AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { 426fe6060f1SDimitry Andric auto *M = RMWI->getModule(); 427fe6060f1SDimitry Andric Type *NewTy = 428fe6060f1SDimitry Andric getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); 429fe6060f1SDimitry Andric 430bdd1243dSDimitry Andric ReplacementIRBuilder Builder(RMWI, *DL); 431fe6060f1SDimitry Andric 432fe6060f1SDimitry Andric Value *Addr = RMWI->getPointerOperand(); 433fe6060f1SDimitry Andric Value *Val = RMWI->getValOperand(); 43481ad6265SDimitry Andric Value *NewVal = Val->getType()->isPointerTy() 43581ad6265SDimitry Andric ? Builder.CreatePtrToInt(Val, NewTy) 43681ad6265SDimitry Andric : Builder.CreateBitCast(Val, NewTy); 437fe6060f1SDimitry Andric 438*0fca6ea1SDimitry Andric auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal, 439*0fca6ea1SDimitry Andric RMWI->getAlign(), RMWI->getOrdering(), 440*0fca6ea1SDimitry Andric RMWI->getSyncScopeID()); 441fe6060f1SDimitry Andric NewRMWI->setVolatile(RMWI->isVolatile()); 442fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); 443fe6060f1SDimitry Andric 44481ad6265SDimitry Andric Value *NewRVal = RMWI->getType()->isPointerTy() 44581ad6265SDimitry Andric ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) 44681ad6265SDimitry Andric : Builder.CreateBitCast(NewRMWI, RMWI->getType()); 447fe6060f1SDimitry Andric RMWI->replaceAllUsesWith(NewRVal); 448fe6060f1SDimitry Andric RMWI->eraseFromParent(); 449fe6060f1SDimitry Andric return NewRMWI; 450fe6060f1SDimitry Andric } 451fe6060f1SDimitry Andric 452*0fca6ea1SDimitry Andric bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) { 4530b57cec5SDimitry Andric switch (TLI->shouldExpandAtomicLoadInIR(LI)) { 4540b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::None: 4550b57cec5SDimitry Andric return false; 4560b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::LLSC: 4570b57cec5SDimitry Andric expandAtomicOpToLLSC( 458fe6060f1SDimitry Andric LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), 459fe6060f1SDimitry Andric LI->getOrdering(), 460bdd1243dSDimitry Andric [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); 4610b57cec5SDimitry Andric return true; 4620b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::LLOnly: 4630b57cec5SDimitry Andric return expandAtomicLoadToLL(LI); 4640b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::CmpXChg: 4650b57cec5SDimitry Andric return expandAtomicLoadToCmpXchg(LI); 46681ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 46781ad6265SDimitry Andric LI->setAtomic(AtomicOrdering::NotAtomic); 46881ad6265SDimitry Andric return true; 4690b57cec5SDimitry Andric default: 4700b57cec5SDimitry Andric llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 474*0fca6ea1SDimitry Andric bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) { 47581ad6265SDimitry Andric switch (TLI->shouldExpandAtomicStoreInIR(SI)) { 47681ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::None: 47781ad6265SDimitry Andric return false; 47881ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::Expand: 47981ad6265SDimitry Andric expandAtomicStore(SI); 48081ad6265SDimitry Andric return true; 48181ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 48281ad6265SDimitry Andric SI->setAtomic(AtomicOrdering::NotAtomic); 48381ad6265SDimitry Andric return true; 48481ad6265SDimitry Andric default: 48581ad6265SDimitry Andric llvm_unreachable("Unhandled case in tryExpandAtomicStore"); 48681ad6265SDimitry Andric } 48781ad6265SDimitry Andric } 48881ad6265SDimitry Andric 489*0fca6ea1SDimitry Andric bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) { 490bdd1243dSDimitry Andric ReplacementIRBuilder Builder(LI, *DL); 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric // On some architectures, load-linked instructions are atomic for larger 4930b57cec5SDimitry Andric // sizes than normal loads. For example, the only 64-bit load guaranteed 4940b57cec5SDimitry Andric // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 495fe6060f1SDimitry Andric Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), 496fe6060f1SDimitry Andric LI->getPointerOperand(), LI->getOrdering()); 4970b57cec5SDimitry Andric TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric LI->replaceAllUsesWith(Val); 5000b57cec5SDimitry Andric LI->eraseFromParent(); 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric return true; 5030b57cec5SDimitry Andric } 5040b57cec5SDimitry Andric 505*0fca6ea1SDimitry Andric bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) { 506bdd1243dSDimitry Andric ReplacementIRBuilder Builder(LI, *DL); 5070b57cec5SDimitry Andric AtomicOrdering Order = LI->getOrdering(); 5080b57cec5SDimitry Andric if (Order == AtomicOrdering::Unordered) 5090b57cec5SDimitry Andric Order = AtomicOrdering::Monotonic; 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric Value *Addr = LI->getPointerOperand(); 512fe6060f1SDimitry Andric Type *Ty = LI->getType(); 5130b57cec5SDimitry Andric Constant *DummyVal = Constant::getNullValue(Ty); 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric Value *Pair = Builder.CreateAtomicCmpXchg( 516fe6060f1SDimitry Andric Addr, DummyVal, DummyVal, LI->getAlign(), Order, 5170b57cec5SDimitry Andric AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 5180b57cec5SDimitry Andric Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric LI->replaceAllUsesWith(Loaded); 5210b57cec5SDimitry Andric LI->eraseFromParent(); 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric return true; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric /// Convert an atomic store of a non-integral type to an integer store of the 5270b57cec5SDimitry Andric /// equivalent bitwidth. We used to not support floating point or vector 5280b57cec5SDimitry Andric /// atomics in the IR at all. The backends learned to deal with the bitcast 5290b57cec5SDimitry Andric /// idiom because that was the only way of expressing the notion of a atomic 5300b57cec5SDimitry Andric /// float or vector store. The long term plan is to teach each backend to 5310b57cec5SDimitry Andric /// instruction select from the original atomic store, but as a migration 5320b57cec5SDimitry Andric /// mechanism, we convert back to the old format which the backends understand. 5330b57cec5SDimitry Andric /// Each backend will need individual work to recognize the new format. 534*0fca6ea1SDimitry Andric StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { 535bdd1243dSDimitry Andric ReplacementIRBuilder Builder(SI, *DL); 5360b57cec5SDimitry Andric auto *M = SI->getModule(); 5370b57cec5SDimitry Andric Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), 5380b57cec5SDimitry Andric M->getDataLayout()); 5390b57cec5SDimitry Andric Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric Value *Addr = SI->getPointerOperand(); 5420b57cec5SDimitry Andric 5435f757f3fSDimitry Andric StoreInst *NewSI = Builder.CreateStore(NewVal, Addr); 5445ffd83dbSDimitry Andric NewSI->setAlignment(SI->getAlign()); 5450b57cec5SDimitry Andric NewSI->setVolatile(SI->isVolatile()); 5460b57cec5SDimitry Andric NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); 5470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); 5480b57cec5SDimitry Andric SI->eraseFromParent(); 5490b57cec5SDimitry Andric return NewSI; 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric 552*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) { 5530b57cec5SDimitry Andric // This function is only called on atomic stores that are too large to be 5540b57cec5SDimitry Andric // atomic if implemented as a native store. So we replace them by an 5550b57cec5SDimitry Andric // atomic swap, that can be implemented for example as a ldrex/strex on ARM 5560b57cec5SDimitry Andric // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 5570b57cec5SDimitry Andric // It is the responsibility of the target to only signal expansion via 5580b57cec5SDimitry Andric // shouldExpandAtomicRMW in cases where this is required and possible. 559bdd1243dSDimitry Andric ReplacementIRBuilder Builder(SI, *DL); 560972a253aSDimitry Andric AtomicOrdering Ordering = SI->getOrdering(); 561972a253aSDimitry Andric assert(Ordering != AtomicOrdering::NotAtomic); 562972a253aSDimitry Andric AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered 563972a253aSDimitry Andric ? AtomicOrdering::Monotonic 564972a253aSDimitry Andric : Ordering; 565fe6060f1SDimitry Andric AtomicRMWInst *AI = Builder.CreateAtomicRMW( 566fe6060f1SDimitry Andric AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), 567972a253aSDimitry Andric SI->getAlign(), RMWOrdering); 5680b57cec5SDimitry Andric SI->eraseFromParent(); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric // Now we have an appropriate swap instruction, lower it as usual. 57181ad6265SDimitry Andric tryExpandAtomicRMW(AI); 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 574bdd1243dSDimitry Andric static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, 575fe6060f1SDimitry Andric Value *Loaded, Value *NewVal, Align AddrAlign, 576fe6060f1SDimitry Andric AtomicOrdering MemOpOrder, SyncScope::ID SSID, 5770b57cec5SDimitry Andric Value *&Success, Value *&NewLoaded) { 5780b57cec5SDimitry Andric Type *OrigTy = NewVal->getType(); 5790b57cec5SDimitry Andric 580*0fca6ea1SDimitry Andric // This code can go away when cmpxchg supports FP and vector types. 58181ad6265SDimitry Andric assert(!OrigTy->isPointerTy()); 582*0fca6ea1SDimitry Andric bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy(); 5830b57cec5SDimitry Andric if (NeedBitcast) { 5840b57cec5SDimitry Andric IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); 5850b57cec5SDimitry Andric NewVal = Builder.CreateBitCast(NewVal, IntTy); 5860b57cec5SDimitry Andric Loaded = Builder.CreateBitCast(Loaded, IntTy); 5870b57cec5SDimitry Andric } 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric Value *Pair = Builder.CreateAtomicCmpXchg( 590fe6060f1SDimitry Andric Addr, Loaded, NewVal, AddrAlign, MemOpOrder, 591fe6060f1SDimitry Andric AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 5920b57cec5SDimitry Andric Success = Builder.CreateExtractValue(Pair, 1, "success"); 5930b57cec5SDimitry Andric NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric if (NeedBitcast) 5960b57cec5SDimitry Andric NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric 599*0fca6ea1SDimitry Andric bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) { 600349cc55cSDimitry Andric LLVMContext &Ctx = AI->getModule()->getContext(); 601349cc55cSDimitry Andric TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); 602349cc55cSDimitry Andric switch (Kind) { 6030b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::None: 6040b57cec5SDimitry Andric return false; 6050b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::LLSC: { 6060b57cec5SDimitry Andric unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 6070b57cec5SDimitry Andric unsigned ValueSize = getAtomicOpSize(AI); 6080b57cec5SDimitry Andric if (ValueSize < MinCASSize) { 6095ffd83dbSDimitry Andric expandPartwordAtomicRMW(AI, 6105ffd83dbSDimitry Andric TargetLoweringBase::AtomicExpansionKind::LLSC); 6110b57cec5SDimitry Andric } else { 612bdd1243dSDimitry Andric auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { 61381ad6265SDimitry Andric return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 6140b57cec5SDimitry Andric AI->getValOperand()); 6150b57cec5SDimitry Andric }; 6160b57cec5SDimitry Andric expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), 617fe6060f1SDimitry Andric AI->getAlign(), AI->getOrdering(), PerformOp); 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric return true; 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { 6220b57cec5SDimitry Andric unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 6230b57cec5SDimitry Andric unsigned ValueSize = getAtomicOpSize(AI); 6240b57cec5SDimitry Andric if (ValueSize < MinCASSize) { 6250b57cec5SDimitry Andric expandPartwordAtomicRMW(AI, 6260b57cec5SDimitry Andric TargetLoweringBase::AtomicExpansionKind::CmpXChg); 6270b57cec5SDimitry Andric } else { 628349cc55cSDimitry Andric SmallVector<StringRef> SSNs; 629349cc55cSDimitry Andric Ctx.getSyncScopeNames(SSNs); 630349cc55cSDimitry Andric auto MemScope = SSNs[AI->getSyncScopeID()].empty() 631349cc55cSDimitry Andric ? "system" 632349cc55cSDimitry Andric : SSNs[AI->getSyncScopeID()]; 633349cc55cSDimitry Andric OptimizationRemarkEmitter ORE(AI->getFunction()); 634349cc55cSDimitry Andric ORE.emit([&]() { 635349cc55cSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Passed", AI) 636349cc55cSDimitry Andric << "A compare and swap loop was generated for an atomic " 637349cc55cSDimitry Andric << AI->getOperationName(AI->getOperation()) << " operation at " 638349cc55cSDimitry Andric << MemScope << " memory scope"; 639349cc55cSDimitry Andric }); 6400b57cec5SDimitry Andric expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric return true; 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { 645*0fca6ea1SDimitry Andric unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 646*0fca6ea1SDimitry Andric unsigned ValueSize = getAtomicOpSize(AI); 647*0fca6ea1SDimitry Andric if (ValueSize < MinCASSize) { 648*0fca6ea1SDimitry Andric AtomicRMWInst::BinOp Op = AI->getOperation(); 649*0fca6ea1SDimitry Andric // Widen And/Or/Xor and give the target another chance at expanding it. 650*0fca6ea1SDimitry Andric if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 651*0fca6ea1SDimitry Andric Op == AtomicRMWInst::And) { 652*0fca6ea1SDimitry Andric tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 653*0fca6ea1SDimitry Andric return true; 654*0fca6ea1SDimitry Andric } 655*0fca6ea1SDimitry Andric } 6560b57cec5SDimitry Andric expandAtomicRMWToMaskedIntrinsic(AI); 6570b57cec5SDimitry Andric return true; 6580b57cec5SDimitry Andric } 65981ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { 66081ad6265SDimitry Andric TLI->emitBitTestAtomicRMWIntrinsic(AI); 66181ad6265SDimitry Andric return true; 66281ad6265SDimitry Andric } 663bdd1243dSDimitry Andric case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: { 664bdd1243dSDimitry Andric TLI->emitCmpArithAtomicRMWIntrinsic(AI); 665bdd1243dSDimitry Andric return true; 666bdd1243dSDimitry Andric } 66781ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 66881ad6265SDimitry Andric return lowerAtomicRMWInst(AI); 669bdd1243dSDimitry Andric case TargetLoweringBase::AtomicExpansionKind::Expand: 670bdd1243dSDimitry Andric TLI->emitExpandAtomicRMW(AI); 671bdd1243dSDimitry Andric return true; 6720b57cec5SDimitry Andric default: 6730b57cec5SDimitry Andric llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric } 6760b57cec5SDimitry Andric 6770b57cec5SDimitry Andric namespace { 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric struct PartwordMaskValues { 6805ffd83dbSDimitry Andric // These three fields are guaranteed to be set by createMaskInstrs. 6815ffd83dbSDimitry Andric Type *WordType = nullptr; 6825ffd83dbSDimitry Andric Type *ValueType = nullptr; 683bdd1243dSDimitry Andric Type *IntValueType = nullptr; 6845ffd83dbSDimitry Andric Value *AlignedAddr = nullptr; 685fe6060f1SDimitry Andric Align AlignedAddrAlignment; 6865ffd83dbSDimitry Andric // The remaining fields can be null. 6875ffd83dbSDimitry Andric Value *ShiftAmt = nullptr; 6885ffd83dbSDimitry Andric Value *Mask = nullptr; 6895ffd83dbSDimitry Andric Value *Inv_Mask = nullptr; 6900b57cec5SDimitry Andric }; 6910b57cec5SDimitry Andric 6925ffd83dbSDimitry Andric LLVM_ATTRIBUTE_UNUSED 6935ffd83dbSDimitry Andric raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { 6945ffd83dbSDimitry Andric auto PrintObj = [&O](auto *V) { 6955ffd83dbSDimitry Andric if (V) 6965ffd83dbSDimitry Andric O << *V; 6975ffd83dbSDimitry Andric else 6985ffd83dbSDimitry Andric O << "nullptr"; 6995ffd83dbSDimitry Andric O << '\n'; 7005ffd83dbSDimitry Andric }; 7015ffd83dbSDimitry Andric O << "PartwordMaskValues {\n"; 7025ffd83dbSDimitry Andric O << " WordType: "; 7035ffd83dbSDimitry Andric PrintObj(PMV.WordType); 7045ffd83dbSDimitry Andric O << " ValueType: "; 7055ffd83dbSDimitry Andric PrintObj(PMV.ValueType); 7065ffd83dbSDimitry Andric O << " AlignedAddr: "; 7075ffd83dbSDimitry Andric PrintObj(PMV.AlignedAddr); 708fe6060f1SDimitry Andric O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; 7095ffd83dbSDimitry Andric O << " ShiftAmt: "; 7105ffd83dbSDimitry Andric PrintObj(PMV.ShiftAmt); 7115ffd83dbSDimitry Andric O << " Mask: "; 7125ffd83dbSDimitry Andric PrintObj(PMV.Mask); 7135ffd83dbSDimitry Andric O << " Inv_Mask: "; 7145ffd83dbSDimitry Andric PrintObj(PMV.Inv_Mask); 7155ffd83dbSDimitry Andric O << "}\n"; 7165ffd83dbSDimitry Andric return O; 7175ffd83dbSDimitry Andric } 7185ffd83dbSDimitry Andric 7190b57cec5SDimitry Andric } // end anonymous namespace 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric /// This is a helper function which builds instructions to provide 7220b57cec5SDimitry Andric /// values necessary for partword atomic operations. It takes an 7230b57cec5SDimitry Andric /// incoming address, Addr, and ValueType, and constructs the address, 7240b57cec5SDimitry Andric /// shift-amounts and masks needed to work with a larger value of size 7250b57cec5SDimitry Andric /// WordSize. 7260b57cec5SDimitry Andric /// 7270b57cec5SDimitry Andric /// AlignedAddr: Addr rounded down to a multiple of WordSize 7280b57cec5SDimitry Andric /// 7290b57cec5SDimitry Andric /// ShiftAmt: Number of bits to right-shift a WordSize value loaded 7300b57cec5SDimitry Andric /// from AlignAddr for it to have the same value as if 7310b57cec5SDimitry Andric /// ValueType was loaded from Addr. 7320b57cec5SDimitry Andric /// 7330b57cec5SDimitry Andric /// Mask: Value to mask with the value loaded from AlignAddr to 7340b57cec5SDimitry Andric /// include only the part that would've been loaded from Addr. 7350b57cec5SDimitry Andric /// 7360b57cec5SDimitry Andric /// Inv_Mask: The inverse of Mask. 737bdd1243dSDimitry Andric static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, 738bdd1243dSDimitry Andric Instruction *I, Type *ValueType, 739bdd1243dSDimitry Andric Value *Addr, Align AddrAlign, 7405ffd83dbSDimitry Andric unsigned MinWordSize) { 7415ffd83dbSDimitry Andric PartwordMaskValues PMV; 7420b57cec5SDimitry Andric 7430b57cec5SDimitry Andric Module *M = I->getModule(); 7445ffd83dbSDimitry Andric LLVMContext &Ctx = M->getContext(); 7450b57cec5SDimitry Andric const DataLayout &DL = M->getDataLayout(); 7460b57cec5SDimitry Andric unsigned ValueSize = DL.getTypeStoreSize(ValueType); 7470b57cec5SDimitry Andric 748bdd1243dSDimitry Andric PMV.ValueType = PMV.IntValueType = ValueType; 749*0fca6ea1SDimitry Andric if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy()) 750bdd1243dSDimitry Andric PMV.IntValueType = 751bdd1243dSDimitry Andric Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); 752bdd1243dSDimitry Andric 7535ffd83dbSDimitry Andric PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) 7545ffd83dbSDimitry Andric : ValueType; 7555ffd83dbSDimitry Andric if (PMV.ValueType == PMV.WordType) { 7565ffd83dbSDimitry Andric PMV.AlignedAddr = Addr; 757fe6060f1SDimitry Andric PMV.AlignedAddrAlignment = AddrAlign; 758fe6060f1SDimitry Andric PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); 75981ad6265SDimitry Andric PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); 7605ffd83dbSDimitry Andric return PMV; 7610b57cec5SDimitry Andric } 7620b57cec5SDimitry Andric 763fe6060f1SDimitry Andric PMV.AlignedAddrAlignment = Align(MinWordSize); 7645ffd83dbSDimitry Andric 765bdd1243dSDimitry Andric assert(ValueSize < MinWordSize); 766bdd1243dSDimitry Andric 767bdd1243dSDimitry Andric PointerType *PtrTy = cast<PointerType>(Addr->getType()); 768*0fca6ea1SDimitry Andric IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace()); 769bdd1243dSDimitry Andric Value *PtrLSB; 770bdd1243dSDimitry Andric 771bdd1243dSDimitry Andric if (AddrAlign < MinWordSize) { 772bdd1243dSDimitry Andric PMV.AlignedAddr = Builder.CreateIntrinsic( 773bdd1243dSDimitry Andric Intrinsic::ptrmask, {PtrTy, IntTy}, 774bdd1243dSDimitry Andric {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, 775bdd1243dSDimitry Andric "AlignedAddr"); 776bdd1243dSDimitry Andric 777bdd1243dSDimitry Andric Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); 778bdd1243dSDimitry Andric PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); 779bdd1243dSDimitry Andric } else { 780bdd1243dSDimitry Andric // If the alignment is high enough, the LSB are known 0. 781bdd1243dSDimitry Andric PMV.AlignedAddr = Addr; 782bdd1243dSDimitry Andric PtrLSB = ConstantInt::getNullValue(IntTy); 783bdd1243dSDimitry Andric } 784bdd1243dSDimitry Andric 7855ffd83dbSDimitry Andric if (DL.isLittleEndian()) { 7865ffd83dbSDimitry Andric // turn bytes into bits 7875ffd83dbSDimitry Andric PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); 7885ffd83dbSDimitry Andric } else { 7895ffd83dbSDimitry Andric // turn bytes into bits, and count from the other side. 7905ffd83dbSDimitry Andric PMV.ShiftAmt = Builder.CreateShl( 7915ffd83dbSDimitry Andric Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); 7925ffd83dbSDimitry Andric } 7935ffd83dbSDimitry Andric 7945ffd83dbSDimitry Andric PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); 7955ffd83dbSDimitry Andric PMV.Mask = Builder.CreateShl( 7965ffd83dbSDimitry Andric ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, 7975ffd83dbSDimitry Andric "Mask"); 798bdd1243dSDimitry Andric 7995ffd83dbSDimitry Andric PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); 800bdd1243dSDimitry Andric 8015ffd83dbSDimitry Andric return PMV; 8025ffd83dbSDimitry Andric } 8035ffd83dbSDimitry Andric 804bdd1243dSDimitry Andric static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, 8055ffd83dbSDimitry Andric const PartwordMaskValues &PMV) { 8065ffd83dbSDimitry Andric assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 8075ffd83dbSDimitry Andric if (PMV.WordType == PMV.ValueType) 8085ffd83dbSDimitry Andric return WideWord; 8095ffd83dbSDimitry Andric 8105ffd83dbSDimitry Andric Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); 811bdd1243dSDimitry Andric Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted"); 812bdd1243dSDimitry Andric return Builder.CreateBitCast(Trunc, PMV.ValueType); 8135ffd83dbSDimitry Andric } 8145ffd83dbSDimitry Andric 815bdd1243dSDimitry Andric static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, 8165ffd83dbSDimitry Andric Value *Updated, const PartwordMaskValues &PMV) { 8175ffd83dbSDimitry Andric assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 8185ffd83dbSDimitry Andric assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); 8195ffd83dbSDimitry Andric if (PMV.WordType == PMV.ValueType) 8205ffd83dbSDimitry Andric return Updated; 8215ffd83dbSDimitry Andric 822bdd1243dSDimitry Andric Updated = Builder.CreateBitCast(Updated, PMV.IntValueType); 823bdd1243dSDimitry Andric 8245ffd83dbSDimitry Andric Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); 8255ffd83dbSDimitry Andric Value *Shift = 8265ffd83dbSDimitry Andric Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); 8275ffd83dbSDimitry Andric Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); 8285ffd83dbSDimitry Andric Value *Or = Builder.CreateOr(And, Shift, "inserted"); 8295ffd83dbSDimitry Andric return Or; 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric /// Emit IR to implement a masked version of a given atomicrmw 8330b57cec5SDimitry Andric /// operation. (That is, only the bits under the Mask should be 8340b57cec5SDimitry Andric /// affected by the operation) 8350b57cec5SDimitry Andric static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, 836bdd1243dSDimitry Andric IRBuilderBase &Builder, Value *Loaded, 8370b57cec5SDimitry Andric Value *Shifted_Inc, Value *Inc, 8380b57cec5SDimitry Andric const PartwordMaskValues &PMV) { 8390b57cec5SDimitry Andric // TODO: update to use 8400b57cec5SDimitry Andric // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order 8410b57cec5SDimitry Andric // to merge bits from two values without requiring PMV.Inv_Mask. 8420b57cec5SDimitry Andric switch (Op) { 8430b57cec5SDimitry Andric case AtomicRMWInst::Xchg: { 8440b57cec5SDimitry Andric Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 8450b57cec5SDimitry Andric Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); 8460b57cec5SDimitry Andric return FinalVal; 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric case AtomicRMWInst::Or: 8490b57cec5SDimitry Andric case AtomicRMWInst::Xor: 8500b57cec5SDimitry Andric case AtomicRMWInst::And: 8510b57cec5SDimitry Andric llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); 8520b57cec5SDimitry Andric case AtomicRMWInst::Add: 8530b57cec5SDimitry Andric case AtomicRMWInst::Sub: 8540b57cec5SDimitry Andric case AtomicRMWInst::Nand: { 8550b57cec5SDimitry Andric // The other arithmetic ops need to be masked into place. 85681ad6265SDimitry Andric Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); 8570b57cec5SDimitry Andric Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); 8580b57cec5SDimitry Andric Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 8590b57cec5SDimitry Andric Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); 8600b57cec5SDimitry Andric return FinalVal; 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric case AtomicRMWInst::Max: 8630b57cec5SDimitry Andric case AtomicRMWInst::Min: 8640b57cec5SDimitry Andric case AtomicRMWInst::UMax: 865bdd1243dSDimitry Andric case AtomicRMWInst::UMin: 866bdd1243dSDimitry Andric case AtomicRMWInst::FAdd: 867bdd1243dSDimitry Andric case AtomicRMWInst::FSub: 868bdd1243dSDimitry Andric case AtomicRMWInst::FMin: 869bdd1243dSDimitry Andric case AtomicRMWInst::FMax: 870bdd1243dSDimitry Andric case AtomicRMWInst::UIncWrap: 871bdd1243dSDimitry Andric case AtomicRMWInst::UDecWrap: { 872bdd1243dSDimitry Andric // Finally, other ops will operate on the full value, so truncate down to 873bdd1243dSDimitry Andric // the original size, and expand out again after doing the 874bdd1243dSDimitry Andric // operation. Bitcasts will be inserted for FP values. 8755ffd83dbSDimitry Andric Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); 87681ad6265SDimitry Andric Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); 8775ffd83dbSDimitry Andric Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); 8780b57cec5SDimitry Andric return FinalVal; 8790b57cec5SDimitry Andric } 8800b57cec5SDimitry Andric default: 8810b57cec5SDimitry Andric llvm_unreachable("Unknown atomic op"); 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric /// Expand a sub-word atomicrmw operation into an appropriate 8860b57cec5SDimitry Andric /// word-sized operation. 8870b57cec5SDimitry Andric /// 8880b57cec5SDimitry Andric /// It will create an LL/SC or cmpxchg loop, as appropriate, the same 8890b57cec5SDimitry Andric /// way as a typical atomicrmw expansion. The only difference here is 8905ffd83dbSDimitry Andric /// that the operation inside of the loop may operate upon only a 8910b57cec5SDimitry Andric /// part of the value. 892*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandPartwordAtomicRMW( 8930b57cec5SDimitry Andric AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { 894*0fca6ea1SDimitry Andric // Widen And/Or/Xor and give the target another chance at expanding it. 895*0fca6ea1SDimitry Andric AtomicRMWInst::BinOp Op = AI->getOperation(); 896*0fca6ea1SDimitry Andric if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 897*0fca6ea1SDimitry Andric Op == AtomicRMWInst::And) { 898*0fca6ea1SDimitry Andric tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 899*0fca6ea1SDimitry Andric return; 900*0fca6ea1SDimitry Andric } 9010b57cec5SDimitry Andric AtomicOrdering MemOpOrder = AI->getOrdering(); 902fe6060f1SDimitry Andric SyncScope::ID SSID = AI->getSyncScopeID(); 9030b57cec5SDimitry Andric 904bdd1243dSDimitry Andric ReplacementIRBuilder Builder(AI, *DL); 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric PartwordMaskValues PMV = 9070b57cec5SDimitry Andric createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 908fe6060f1SDimitry Andric AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 9090b57cec5SDimitry Andric 910bdd1243dSDimitry Andric Value *ValOperand_Shifted = nullptr; 911*0fca6ea1SDimitry Andric if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add || 912*0fca6ea1SDimitry Andric Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) { 913*0fca6ea1SDimitry Andric Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType); 914bdd1243dSDimitry Andric ValOperand_Shifted = 915*0fca6ea1SDimitry Andric Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt, 916*0fca6ea1SDimitry Andric "ValOperand_Shifted"); 917bdd1243dSDimitry Andric } 9180b57cec5SDimitry Andric 919bdd1243dSDimitry Andric auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { 920*0fca6ea1SDimitry Andric return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted, 921*0fca6ea1SDimitry Andric AI->getValOperand(), PMV); 9220b57cec5SDimitry Andric }; 9230b57cec5SDimitry Andric 9245ffd83dbSDimitry Andric Value *OldResult; 9255ffd83dbSDimitry Andric if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { 926fe6060f1SDimitry Andric OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, 92781ad6265SDimitry Andric PMV.AlignedAddrAlignment, MemOpOrder, SSID, 92881ad6265SDimitry Andric PerformPartwordOp, createCmpXchgInstFun); 9295ffd83dbSDimitry Andric } else { 9305ffd83dbSDimitry Andric assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); 9315ffd83dbSDimitry Andric OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, 932fe6060f1SDimitry Andric PMV.AlignedAddrAlignment, MemOpOrder, 933fe6060f1SDimitry Andric PerformPartwordOp); 9345ffd83dbSDimitry Andric } 9355ffd83dbSDimitry Andric 9365ffd83dbSDimitry Andric Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 9370b57cec5SDimitry Andric AI->replaceAllUsesWith(FinalOldResult); 9380b57cec5SDimitry Andric AI->eraseFromParent(); 9390b57cec5SDimitry Andric } 9400b57cec5SDimitry Andric 941*0fca6ea1SDimitry Andric /// Copy metadata that's safe to preserve when widening atomics. 942*0fca6ea1SDimitry Andric static void copyMetadataForAtomic(Instruction &Dest, 943*0fca6ea1SDimitry Andric const Instruction &Source) { 944*0fca6ea1SDimitry Andric SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 945*0fca6ea1SDimitry Andric Source.getAllMetadata(MD); 946*0fca6ea1SDimitry Andric LLVMContext &Ctx = Dest.getContext(); 947*0fca6ea1SDimitry Andric MDBuilder MDB(Ctx); 948*0fca6ea1SDimitry Andric 949*0fca6ea1SDimitry Andric for (auto [ID, N] : MD) { 950*0fca6ea1SDimitry Andric switch (ID) { 951*0fca6ea1SDimitry Andric case LLVMContext::MD_dbg: 952*0fca6ea1SDimitry Andric case LLVMContext::MD_tbaa: 953*0fca6ea1SDimitry Andric case LLVMContext::MD_tbaa_struct: 954*0fca6ea1SDimitry Andric case LLVMContext::MD_alias_scope: 955*0fca6ea1SDimitry Andric case LLVMContext::MD_noalias: 956*0fca6ea1SDimitry Andric case LLVMContext::MD_access_group: 957*0fca6ea1SDimitry Andric case LLVMContext::MD_mmra: 958*0fca6ea1SDimitry Andric Dest.setMetadata(ID, N); 959*0fca6ea1SDimitry Andric break; 960*0fca6ea1SDimitry Andric default: 961*0fca6ea1SDimitry Andric if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory")) 962*0fca6ea1SDimitry Andric Dest.setMetadata(ID, N); 963*0fca6ea1SDimitry Andric else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) 964*0fca6ea1SDimitry Andric Dest.setMetadata(ID, N); 965*0fca6ea1SDimitry Andric 966*0fca6ea1SDimitry Andric break; 967*0fca6ea1SDimitry Andric } 968*0fca6ea1SDimitry Andric } 969*0fca6ea1SDimitry Andric } 970*0fca6ea1SDimitry Andric 9710b57cec5SDimitry Andric // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. 972*0fca6ea1SDimitry Andric AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { 973bdd1243dSDimitry Andric ReplacementIRBuilder Builder(AI, *DL); 9740b57cec5SDimitry Andric AtomicRMWInst::BinOp Op = AI->getOperation(); 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 9770b57cec5SDimitry Andric Op == AtomicRMWInst::And) && 9780b57cec5SDimitry Andric "Unable to widen operation"); 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric PartwordMaskValues PMV = 9810b57cec5SDimitry Andric createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 982fe6060f1SDimitry Andric AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 9830b57cec5SDimitry Andric 9840b57cec5SDimitry Andric Value *ValOperand_Shifted = 9850b57cec5SDimitry Andric Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 9860b57cec5SDimitry Andric PMV.ShiftAmt, "ValOperand_Shifted"); 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric Value *NewOperand; 9890b57cec5SDimitry Andric 9900b57cec5SDimitry Andric if (Op == AtomicRMWInst::And) 9910b57cec5SDimitry Andric NewOperand = 992*0fca6ea1SDimitry Andric Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand"); 9930b57cec5SDimitry Andric else 9940b57cec5SDimitry Andric NewOperand = ValOperand_Shifted; 9950b57cec5SDimitry Andric 9965f757f3fSDimitry Andric AtomicRMWInst *NewAI = Builder.CreateAtomicRMW( 9975f757f3fSDimitry Andric Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment, 9985f757f3fSDimitry Andric AI->getOrdering(), AI->getSyncScopeID()); 999*0fca6ea1SDimitry Andric 1000*0fca6ea1SDimitry Andric copyMetadataForAtomic(*NewAI, *AI); 10010b57cec5SDimitry Andric 10025ffd83dbSDimitry Andric Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); 10030b57cec5SDimitry Andric AI->replaceAllUsesWith(FinalOldResult); 10040b57cec5SDimitry Andric AI->eraseFromParent(); 10050b57cec5SDimitry Andric return NewAI; 10060b57cec5SDimitry Andric } 10070b57cec5SDimitry Andric 1008*0fca6ea1SDimitry Andric bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { 10090b57cec5SDimitry Andric // The basic idea here is that we're expanding a cmpxchg of a 10100b57cec5SDimitry Andric // smaller memory size up to a word-sized cmpxchg. To do this, we 10110b57cec5SDimitry Andric // need to add a retry-loop for strong cmpxchg, so that 10120b57cec5SDimitry Andric // modifications to other parts of the word don't cause a spurious 10130b57cec5SDimitry Andric // failure. 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric // This generates code like the following: 10160b57cec5SDimitry Andric // [[Setup mask values PMV.*]] 10170b57cec5SDimitry Andric // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt 10180b57cec5SDimitry Andric // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt 10190b57cec5SDimitry Andric // %InitLoaded = load i32* %addr 10200b57cec5SDimitry Andric // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask 10210b57cec5SDimitry Andric // br partword.cmpxchg.loop 10220b57cec5SDimitry Andric // partword.cmpxchg.loop: 10230b57cec5SDimitry Andric // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], 10240b57cec5SDimitry Andric // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] 10250b57cec5SDimitry Andric // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted 10260b57cec5SDimitry Andric // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted 10270b57cec5SDimitry Andric // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, 10280b57cec5SDimitry Andric // i32 %FullWord_NewVal success_ordering failure_ordering 10290b57cec5SDimitry Andric // %OldVal = extractvalue { i32, i1 } %NewCI, 0 10300b57cec5SDimitry Andric // %Success = extractvalue { i32, i1 } %NewCI, 1 10310b57cec5SDimitry Andric // br i1 %Success, label %partword.cmpxchg.end, 10320b57cec5SDimitry Andric // label %partword.cmpxchg.failure 10330b57cec5SDimitry Andric // partword.cmpxchg.failure: 10340b57cec5SDimitry Andric // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask 10350b57cec5SDimitry Andric // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut 10360b57cec5SDimitry Andric // br i1 %ShouldContinue, label %partword.cmpxchg.loop, 10370b57cec5SDimitry Andric // label %partword.cmpxchg.end 10380b57cec5SDimitry Andric // partword.cmpxchg.end: 10390b57cec5SDimitry Andric // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt 10400b57cec5SDimitry Andric // %FinalOldVal = trunc i32 %tmp1 to i8 10410b57cec5SDimitry Andric // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 10420b57cec5SDimitry Andric // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric Value *Addr = CI->getPointerOperand(); 10450b57cec5SDimitry Andric Value *Cmp = CI->getCompareOperand(); 10460b57cec5SDimitry Andric Value *NewVal = CI->getNewValOperand(); 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric BasicBlock *BB = CI->getParent(); 10490b57cec5SDimitry Andric Function *F = BB->getParent(); 1050bdd1243dSDimitry Andric ReplacementIRBuilder Builder(CI, *DL); 10510b57cec5SDimitry Andric LLVMContext &Ctx = Builder.getContext(); 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric BasicBlock *EndBB = 10540b57cec5SDimitry Andric BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); 10550b57cec5SDimitry Andric auto FailureBB = 10560b57cec5SDimitry Andric BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); 10570b57cec5SDimitry Andric auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); 10580b57cec5SDimitry Andric 10590b57cec5SDimitry Andric // The split call above "helpfully" added a branch at the end of BB 10600b57cec5SDimitry Andric // (to the wrong place). 10610b57cec5SDimitry Andric std::prev(BB->end())->eraseFromParent(); 10620b57cec5SDimitry Andric Builder.SetInsertPoint(BB); 10630b57cec5SDimitry Andric 1064fe6060f1SDimitry Andric PartwordMaskValues PMV = 1065fe6060f1SDimitry Andric createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1066fe6060f1SDimitry Andric CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 10670b57cec5SDimitry Andric 10680b57cec5SDimitry Andric // Shift the incoming values over, into the right location in the word. 10690b57cec5SDimitry Andric Value *NewVal_Shifted = 10700b57cec5SDimitry Andric Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 10710b57cec5SDimitry Andric Value *Cmp_Shifted = 10720b57cec5SDimitry Andric Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); 10730b57cec5SDimitry Andric 10740b57cec5SDimitry Andric // Load the entire current word, and mask into place the expected and new 10750b57cec5SDimitry Andric // values 10760b57cec5SDimitry Andric LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); 10770b57cec5SDimitry Andric InitLoaded->setVolatile(CI->isVolatile()); 10780b57cec5SDimitry Andric Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); 10790b57cec5SDimitry Andric Builder.CreateBr(LoopBB); 10800b57cec5SDimitry Andric 10810b57cec5SDimitry Andric // partword.cmpxchg.loop: 10820b57cec5SDimitry Andric Builder.SetInsertPoint(LoopBB); 10830b57cec5SDimitry Andric PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); 10840b57cec5SDimitry Andric Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric // Mask/Or the expected and new values into place in the loaded word. 10870b57cec5SDimitry Andric Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); 10880b57cec5SDimitry Andric Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); 10890b57cec5SDimitry Andric AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( 1090fe6060f1SDimitry Andric PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, 1091fe6060f1SDimitry Andric CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); 10920b57cec5SDimitry Andric NewCI->setVolatile(CI->isVolatile()); 10930b57cec5SDimitry Andric // When we're building a strong cmpxchg, we need a loop, so you 10940b57cec5SDimitry Andric // might think we could use a weak cmpxchg inside. But, using strong 10950b57cec5SDimitry Andric // allows the below comparison for ShouldContinue, and we're 10960b57cec5SDimitry Andric // expecting the underlying cmpxchg to be a machine instruction, 10970b57cec5SDimitry Andric // which is strong anyways. 10980b57cec5SDimitry Andric NewCI->setWeak(CI->isWeak()); 10990b57cec5SDimitry Andric 11000b57cec5SDimitry Andric Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 11010b57cec5SDimitry Andric Value *Success = Builder.CreateExtractValue(NewCI, 1); 11020b57cec5SDimitry Andric 11030b57cec5SDimitry Andric if (CI->isWeak()) 11040b57cec5SDimitry Andric Builder.CreateBr(EndBB); 11050b57cec5SDimitry Andric else 11060b57cec5SDimitry Andric Builder.CreateCondBr(Success, EndBB, FailureBB); 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric // partword.cmpxchg.failure: 11090b57cec5SDimitry Andric Builder.SetInsertPoint(FailureBB); 11100b57cec5SDimitry Andric // Upon failure, verify that the masked-out part of the loaded value 11110b57cec5SDimitry Andric // has been modified. If it didn't, abort the cmpxchg, since the 11120b57cec5SDimitry Andric // masked-in part must've. 11130b57cec5SDimitry Andric Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); 11140b57cec5SDimitry Andric Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); 11150b57cec5SDimitry Andric Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric // Add the second value to the phi from above 11180b57cec5SDimitry Andric Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric // partword.cmpxchg.end: 11210b57cec5SDimitry Andric Builder.SetInsertPoint(CI); 11220b57cec5SDimitry Andric 11235ffd83dbSDimitry Andric Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1124bdd1243dSDimitry Andric Value *Res = PoisonValue::get(CI->getType()); 11250b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 11260b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, Success, 1); 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric CI->replaceAllUsesWith(Res); 11290b57cec5SDimitry Andric CI->eraseFromParent(); 11305ffd83dbSDimitry Andric return true; 11310b57cec5SDimitry Andric } 11320b57cec5SDimitry Andric 1133*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicOpToLLSC( 1134fe6060f1SDimitry Andric Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, 1135fe6060f1SDimitry Andric AtomicOrdering MemOpOrder, 1136bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1137bdd1243dSDimitry Andric ReplacementIRBuilder Builder(I, *DL); 1138fe6060f1SDimitry Andric Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, 1139fe6060f1SDimitry Andric MemOpOrder, PerformOp); 11400b57cec5SDimitry Andric 11410b57cec5SDimitry Andric I->replaceAllUsesWith(Loaded); 11420b57cec5SDimitry Andric I->eraseFromParent(); 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric 1145*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { 1146bdd1243dSDimitry Andric ReplacementIRBuilder Builder(AI, *DL); 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric PartwordMaskValues PMV = 11490b57cec5SDimitry Andric createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1150fe6060f1SDimitry Andric AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 11510b57cec5SDimitry Andric 11520b57cec5SDimitry Andric // The value operand must be sign-extended for signed min/max so that the 11530b57cec5SDimitry Andric // target's signed comparison instructions can be used. Otherwise, just 11540b57cec5SDimitry Andric // zero-ext. 11550b57cec5SDimitry Andric Instruction::CastOps CastOp = Instruction::ZExt; 11560b57cec5SDimitry Andric AtomicRMWInst::BinOp RMWOp = AI->getOperation(); 11570b57cec5SDimitry Andric if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) 11580b57cec5SDimitry Andric CastOp = Instruction::SExt; 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric Value *ValOperand_Shifted = Builder.CreateShl( 11610b57cec5SDimitry Andric Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), 11620b57cec5SDimitry Andric PMV.ShiftAmt, "ValOperand_Shifted"); 11630b57cec5SDimitry Andric Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( 11640b57cec5SDimitry Andric Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, 11650b57cec5SDimitry Andric AI->getOrdering()); 11665ffd83dbSDimitry Andric Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 11670b57cec5SDimitry Andric AI->replaceAllUsesWith(FinalOldResult); 11680b57cec5SDimitry Andric AI->eraseFromParent(); 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric 1171*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic( 1172*0fca6ea1SDimitry Andric AtomicCmpXchgInst *CI) { 1173bdd1243dSDimitry Andric ReplacementIRBuilder Builder(CI, *DL); 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric PartwordMaskValues PMV = createMaskInstrs( 11760b57cec5SDimitry Andric Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), 1177fe6060f1SDimitry Andric CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 11780b57cec5SDimitry Andric 11790b57cec5SDimitry Andric Value *CmpVal_Shifted = Builder.CreateShl( 11800b57cec5SDimitry Andric Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, 11810b57cec5SDimitry Andric "CmpVal_Shifted"); 11820b57cec5SDimitry Andric Value *NewVal_Shifted = Builder.CreateShl( 11830b57cec5SDimitry Andric Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, 11840b57cec5SDimitry Andric "NewVal_Shifted"); 11850b57cec5SDimitry Andric Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( 11860b57cec5SDimitry Andric Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, 1187fe6060f1SDimitry Andric CI->getMergedOrdering()); 11885ffd83dbSDimitry Andric Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1189bdd1243dSDimitry Andric Value *Res = PoisonValue::get(CI->getType()); 11900b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 11910b57cec5SDimitry Andric Value *Success = Builder.CreateICmpEQ( 11920b57cec5SDimitry Andric CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); 11930b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, Success, 1); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric CI->replaceAllUsesWith(Res); 11960b57cec5SDimitry Andric CI->eraseFromParent(); 11970b57cec5SDimitry Andric } 11980b57cec5SDimitry Andric 1199*0fca6ea1SDimitry Andric Value *AtomicExpandImpl::insertRMWLLSCLoop( 1200bdd1243dSDimitry Andric IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 12010b57cec5SDimitry Andric AtomicOrdering MemOpOrder, 1202bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 12030b57cec5SDimitry Andric LLVMContext &Ctx = Builder.getContext(); 12040b57cec5SDimitry Andric BasicBlock *BB = Builder.GetInsertBlock(); 12050b57cec5SDimitry Andric Function *F = BB->getParent(); 12060b57cec5SDimitry Andric 1207fe6060f1SDimitry Andric assert(AddrAlign >= 1208*0fca6ea1SDimitry Andric F->getDataLayout().getTypeStoreSize(ResultTy) && 1209fe6060f1SDimitry Andric "Expected at least natural alignment at this point."); 1210fe6060f1SDimitry Andric 12110b57cec5SDimitry Andric // Given: atomicrmw some_op iN* %addr, iN %incr ordering 12120b57cec5SDimitry Andric // 12130b57cec5SDimitry Andric // The standard expansion we produce is: 12140b57cec5SDimitry Andric // [...] 12150b57cec5SDimitry Andric // atomicrmw.start: 12160b57cec5SDimitry Andric // %loaded = @load.linked(%addr) 12170b57cec5SDimitry Andric // %new = some_op iN %loaded, %incr 12180b57cec5SDimitry Andric // %stored = @store_conditional(%new, %addr) 12190b57cec5SDimitry Andric // %try_again = icmp i32 ne %stored, 0 12200b57cec5SDimitry Andric // br i1 %try_again, label %loop, label %atomicrmw.end 12210b57cec5SDimitry Andric // atomicrmw.end: 12220b57cec5SDimitry Andric // [...] 12230b57cec5SDimitry Andric BasicBlock *ExitBB = 12240b57cec5SDimitry Andric BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 12250b57cec5SDimitry Andric BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 12260b57cec5SDimitry Andric 12270b57cec5SDimitry Andric // The split call above "helpfully" added a branch at the end of BB (to the 12280b57cec5SDimitry Andric // wrong place). 12290b57cec5SDimitry Andric std::prev(BB->end())->eraseFromParent(); 12300b57cec5SDimitry Andric Builder.SetInsertPoint(BB); 12310b57cec5SDimitry Andric Builder.CreateBr(LoopBB); 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric // Start the main loop block now that we've taken care of the preliminaries. 12340b57cec5SDimitry Andric Builder.SetInsertPoint(LoopBB); 1235fe6060f1SDimitry Andric Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric Value *NewVal = PerformOp(Builder, Loaded); 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric Value *StoreSuccess = 12400b57cec5SDimitry Andric TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 12410b57cec5SDimitry Andric Value *TryAgain = Builder.CreateICmpNE( 12420b57cec5SDimitry Andric StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 12430b57cec5SDimitry Andric Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 12440b57cec5SDimitry Andric 12450b57cec5SDimitry Andric Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 12460b57cec5SDimitry Andric return Loaded; 12470b57cec5SDimitry Andric } 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of 12500b57cec5SDimitry Andric /// the equivalent bitwidth. We used to not support pointer cmpxchg in the 12510b57cec5SDimitry Andric /// IR. As a migration step, we convert back to what use to be the standard 12520b57cec5SDimitry Andric /// way to represent a pointer cmpxchg so that we can update backends one by 12530b57cec5SDimitry Andric /// one. 125481ad6265SDimitry Andric AtomicCmpXchgInst * 1255*0fca6ea1SDimitry Andric AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { 12560b57cec5SDimitry Andric auto *M = CI->getModule(); 12570b57cec5SDimitry Andric Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), 12580b57cec5SDimitry Andric M->getDataLayout()); 12590b57cec5SDimitry Andric 1260bdd1243dSDimitry Andric ReplacementIRBuilder Builder(CI, *DL); 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric Value *Addr = CI->getPointerOperand(); 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andric Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); 12650b57cec5SDimitry Andric Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); 12660b57cec5SDimitry Andric 1267fe6060f1SDimitry Andric auto *NewCI = Builder.CreateAtomicCmpXchg( 12685f757f3fSDimitry Andric Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), 1269fe6060f1SDimitry Andric CI->getFailureOrdering(), CI->getSyncScopeID()); 12700b57cec5SDimitry Andric NewCI->setVolatile(CI->isVolatile()); 12710b57cec5SDimitry Andric NewCI->setWeak(CI->isWeak()); 12720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); 12730b57cec5SDimitry Andric 12740b57cec5SDimitry Andric Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 12750b57cec5SDimitry Andric Value *Succ = Builder.CreateExtractValue(NewCI, 1); 12760b57cec5SDimitry Andric 12770b57cec5SDimitry Andric OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); 12780b57cec5SDimitry Andric 1279bdd1243dSDimitry Andric Value *Res = PoisonValue::get(CI->getType()); 12800b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, OldVal, 0); 12810b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, Succ, 1); 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric CI->replaceAllUsesWith(Res); 12840b57cec5SDimitry Andric CI->eraseFromParent(); 12850b57cec5SDimitry Andric return NewCI; 12860b57cec5SDimitry Andric } 12870b57cec5SDimitry Andric 1288*0fca6ea1SDimitry Andric bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 12890b57cec5SDimitry Andric AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 12900b57cec5SDimitry Andric AtomicOrdering FailureOrder = CI->getFailureOrdering(); 12910b57cec5SDimitry Andric Value *Addr = CI->getPointerOperand(); 12920b57cec5SDimitry Andric BasicBlock *BB = CI->getParent(); 12930b57cec5SDimitry Andric Function *F = BB->getParent(); 12940b57cec5SDimitry Andric LLVMContext &Ctx = F->getContext(); 12950b57cec5SDimitry Andric // If shouldInsertFencesForAtomic() returns true, then the target does not 12960b57cec5SDimitry Andric // want to deal with memory orders, and emitLeading/TrailingFence should take 12970b57cec5SDimitry Andric // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we 12980b57cec5SDimitry Andric // should preserve the ordering. 12990b57cec5SDimitry Andric bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); 1300fe6060f1SDimitry Andric AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic 1301fe6060f1SDimitry Andric ? AtomicOrdering::Monotonic 1302fe6060f1SDimitry Andric : CI->getMergedOrdering(); 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric // In implementations which use a barrier to achieve release semantics, we can 13050b57cec5SDimitry Andric // delay emitting this barrier until we know a store is actually going to be 13060b57cec5SDimitry Andric // attempted. The cost of this delay is that we need 2 copies of the block 13070b57cec5SDimitry Andric // emitting the load-linked, affecting code size. 13080b57cec5SDimitry Andric // 13090b57cec5SDimitry Andric // Ideally, this logic would be unconditional except for the minsize check 13100b57cec5SDimitry Andric // since in other cases the extra blocks naturally collapse down to the 13110b57cec5SDimitry Andric // minimal loop. Unfortunately, this puts too much stress on later 13120b57cec5SDimitry Andric // optimisations so we avoid emitting the extra logic in those cases too. 13130b57cec5SDimitry Andric bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && 13140b57cec5SDimitry Andric SuccessOrder != AtomicOrdering::Monotonic && 13150b57cec5SDimitry Andric SuccessOrder != AtomicOrdering::Acquire && 13160b57cec5SDimitry Andric !F->hasMinSize(); 13170b57cec5SDimitry Andric 13180b57cec5SDimitry Andric // There's no overhead for sinking the release barrier in a weak cmpxchg, so 13190b57cec5SDimitry Andric // do it even on minsize. 13200b57cec5SDimitry Andric bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); 13210b57cec5SDimitry Andric 13220b57cec5SDimitry Andric // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 13230b57cec5SDimitry Andric // 13240b57cec5SDimitry Andric // The full expansion we produce is: 13250b57cec5SDimitry Andric // [...] 13265ffd83dbSDimitry Andric // %aligned.addr = ... 13270b57cec5SDimitry Andric // cmpxchg.start: 13285ffd83dbSDimitry Andric // %unreleasedload = @load.linked(%aligned.addr) 13295ffd83dbSDimitry Andric // %unreleasedload.extract = extract value from %unreleasedload 13305ffd83dbSDimitry Andric // %should_store = icmp eq %unreleasedload.extract, %desired 13315ffd83dbSDimitry Andric // br i1 %should_store, label %cmpxchg.releasingstore, 13320b57cec5SDimitry Andric // label %cmpxchg.nostore 13330b57cec5SDimitry Andric // cmpxchg.releasingstore: 13340b57cec5SDimitry Andric // fence? 13350b57cec5SDimitry Andric // br label cmpxchg.trystore 13360b57cec5SDimitry Andric // cmpxchg.trystore: 13375ffd83dbSDimitry Andric // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], 13380b57cec5SDimitry Andric // [%releasedload, %cmpxchg.releasedload] 13395ffd83dbSDimitry Andric // %updated.new = insert %new into %loaded.trystore 13405ffd83dbSDimitry Andric // %stored = @store_conditional(%updated.new, %aligned.addr) 13410b57cec5SDimitry Andric // %success = icmp eq i32 %stored, 0 13420b57cec5SDimitry Andric // br i1 %success, label %cmpxchg.success, 13430b57cec5SDimitry Andric // label %cmpxchg.releasedload/%cmpxchg.failure 13440b57cec5SDimitry Andric // cmpxchg.releasedload: 13455ffd83dbSDimitry Andric // %releasedload = @load.linked(%aligned.addr) 13465ffd83dbSDimitry Andric // %releasedload.extract = extract value from %releasedload 13475ffd83dbSDimitry Andric // %should_store = icmp eq %releasedload.extract, %desired 13480b57cec5SDimitry Andric // br i1 %should_store, label %cmpxchg.trystore, 13490b57cec5SDimitry Andric // label %cmpxchg.failure 13500b57cec5SDimitry Andric // cmpxchg.success: 13510b57cec5SDimitry Andric // fence? 13520b57cec5SDimitry Andric // br label %cmpxchg.end 13530b57cec5SDimitry Andric // cmpxchg.nostore: 13540b57cec5SDimitry Andric // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], 13550b57cec5SDimitry Andric // [%releasedload, 13560b57cec5SDimitry Andric // %cmpxchg.releasedload/%cmpxchg.trystore] 13570b57cec5SDimitry Andric // @load_linked_fail_balance()? 13580b57cec5SDimitry Andric // br label %cmpxchg.failure 13590b57cec5SDimitry Andric // cmpxchg.failure: 13600b57cec5SDimitry Andric // fence? 13610b57cec5SDimitry Andric // br label %cmpxchg.end 13620b57cec5SDimitry Andric // cmpxchg.end: 13635ffd83dbSDimitry Andric // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], 13640b57cec5SDimitry Andric // [%loaded.trystore, %cmpxchg.trystore] 13650b57cec5SDimitry Andric // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 13665ffd83dbSDimitry Andric // %loaded = extract value from %loaded.exit 13670b57cec5SDimitry Andric // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 13680b57cec5SDimitry Andric // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 13690b57cec5SDimitry Andric // [...] 13700b57cec5SDimitry Andric BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); 13710b57cec5SDimitry Andric auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 13720b57cec5SDimitry Andric auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); 13730b57cec5SDimitry Andric auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); 13740b57cec5SDimitry Andric auto ReleasedLoadBB = 13750b57cec5SDimitry Andric BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); 13760b57cec5SDimitry Andric auto TryStoreBB = 13770b57cec5SDimitry Andric BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); 13780b57cec5SDimitry Andric auto ReleasingStoreBB = 13790b57cec5SDimitry Andric BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); 13800b57cec5SDimitry Andric auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); 13810b57cec5SDimitry Andric 1382bdd1243dSDimitry Andric ReplacementIRBuilder Builder(CI, *DL); 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric // The split call above "helpfully" added a branch at the end of BB (to the 13850b57cec5SDimitry Andric // wrong place), but we might want a fence too. It's easiest to just remove 13860b57cec5SDimitry Andric // the branch entirely. 13870b57cec5SDimitry Andric std::prev(BB->end())->eraseFromParent(); 13880b57cec5SDimitry Andric Builder.SetInsertPoint(BB); 13890b57cec5SDimitry Andric if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) 13900b57cec5SDimitry Andric TLI->emitLeadingFence(Builder, CI, SuccessOrder); 13915ffd83dbSDimitry Andric 13925ffd83dbSDimitry Andric PartwordMaskValues PMV = 13935ffd83dbSDimitry Andric createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1394fe6060f1SDimitry Andric CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 13950b57cec5SDimitry Andric Builder.CreateBr(StartBB); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric // Start the main loop block now that we've taken care of the preliminaries. 13980b57cec5SDimitry Andric Builder.SetInsertPoint(StartBB); 13995ffd83dbSDimitry Andric Value *UnreleasedLoad = 1400fe6060f1SDimitry Andric TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 14015ffd83dbSDimitry Andric Value *UnreleasedLoadExtract = 14025ffd83dbSDimitry Andric extractMaskedValue(Builder, UnreleasedLoad, PMV); 14030b57cec5SDimitry Andric Value *ShouldStore = Builder.CreateICmpEQ( 14045ffd83dbSDimitry Andric UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); 14050b57cec5SDimitry Andric 14060b57cec5SDimitry Andric // If the cmpxchg doesn't actually need any ordering when it fails, we can 14070b57cec5SDimitry Andric // jump straight past that fence instruction (if it exists). 14080b57cec5SDimitry Andric Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); 14090b57cec5SDimitry Andric 14100b57cec5SDimitry Andric Builder.SetInsertPoint(ReleasingStoreBB); 14110b57cec5SDimitry Andric if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) 14120b57cec5SDimitry Andric TLI->emitLeadingFence(Builder, CI, SuccessOrder); 14130b57cec5SDimitry Andric Builder.CreateBr(TryStoreBB); 14140b57cec5SDimitry Andric 14150b57cec5SDimitry Andric Builder.SetInsertPoint(TryStoreBB); 14165ffd83dbSDimitry Andric PHINode *LoadedTryStore = 14175ffd83dbSDimitry Andric Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); 14185ffd83dbSDimitry Andric LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); 14195ffd83dbSDimitry Andric Value *NewValueInsert = 14205ffd83dbSDimitry Andric insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); 142181ad6265SDimitry Andric Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, 142281ad6265SDimitry Andric PMV.AlignedAddr, MemOpOrder); 14230b57cec5SDimitry Andric StoreSuccess = Builder.CreateICmpEQ( 14240b57cec5SDimitry Andric StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 14250b57cec5SDimitry Andric BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; 14260b57cec5SDimitry Andric Builder.CreateCondBr(StoreSuccess, SuccessBB, 14270b57cec5SDimitry Andric CI->isWeak() ? FailureBB : RetryBB); 14280b57cec5SDimitry Andric 14290b57cec5SDimitry Andric Builder.SetInsertPoint(ReleasedLoadBB); 14300b57cec5SDimitry Andric Value *SecondLoad; 14310b57cec5SDimitry Andric if (HasReleasedLoadBB) { 1432fe6060f1SDimitry Andric SecondLoad = 1433fe6060f1SDimitry Andric TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 14345ffd83dbSDimitry Andric Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); 14355ffd83dbSDimitry Andric ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, 14365ffd83dbSDimitry Andric CI->getCompareOperand(), "should_store"); 14370b57cec5SDimitry Andric 14380b57cec5SDimitry Andric // If the cmpxchg doesn't actually need any ordering when it fails, we can 14390b57cec5SDimitry Andric // jump straight past that fence instruction (if it exists). 14400b57cec5SDimitry Andric Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); 14415ffd83dbSDimitry Andric // Update PHI node in TryStoreBB. 14425ffd83dbSDimitry Andric LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); 14430b57cec5SDimitry Andric } else 14440b57cec5SDimitry Andric Builder.CreateUnreachable(); 14450b57cec5SDimitry Andric 14460b57cec5SDimitry Andric // Make sure later instructions don't get reordered with a fence if 14470b57cec5SDimitry Andric // necessary. 14480b57cec5SDimitry Andric Builder.SetInsertPoint(SuccessBB); 1449bdd1243dSDimitry Andric if (ShouldInsertFencesForAtomic || 1450bdd1243dSDimitry Andric TLI->shouldInsertTrailingFenceForAtomicStore(CI)) 14510b57cec5SDimitry Andric TLI->emitTrailingFence(Builder, CI, SuccessOrder); 14520b57cec5SDimitry Andric Builder.CreateBr(ExitBB); 14530b57cec5SDimitry Andric 14540b57cec5SDimitry Andric Builder.SetInsertPoint(NoStoreBB); 14555ffd83dbSDimitry Andric PHINode *LoadedNoStore = 14565ffd83dbSDimitry Andric Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); 14575ffd83dbSDimitry Andric LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); 14585ffd83dbSDimitry Andric if (HasReleasedLoadBB) 14595ffd83dbSDimitry Andric LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); 14605ffd83dbSDimitry Andric 14610b57cec5SDimitry Andric // In the failing case, where we don't execute the store-conditional, the 14620b57cec5SDimitry Andric // target might want to balance out the load-linked with a dedicated 14630b57cec5SDimitry Andric // instruction (e.g., on ARM, clearing the exclusive monitor). 14640b57cec5SDimitry Andric TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 14650b57cec5SDimitry Andric Builder.CreateBr(FailureBB); 14660b57cec5SDimitry Andric 14670b57cec5SDimitry Andric Builder.SetInsertPoint(FailureBB); 14685ffd83dbSDimitry Andric PHINode *LoadedFailure = 14695ffd83dbSDimitry Andric Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); 14705ffd83dbSDimitry Andric LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); 14715ffd83dbSDimitry Andric if (CI->isWeak()) 14725ffd83dbSDimitry Andric LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); 14730b57cec5SDimitry Andric if (ShouldInsertFencesForAtomic) 14740b57cec5SDimitry Andric TLI->emitTrailingFence(Builder, CI, FailureOrder); 14750b57cec5SDimitry Andric Builder.CreateBr(ExitBB); 14760b57cec5SDimitry Andric 14770b57cec5SDimitry Andric // Finally, we have control-flow based knowledge of whether the cmpxchg 14780b57cec5SDimitry Andric // succeeded or not. We expose this to later passes by converting any 14790b57cec5SDimitry Andric // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate 14800b57cec5SDimitry Andric // PHI. 14810b57cec5SDimitry Andric Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 14825ffd83dbSDimitry Andric PHINode *LoadedExit = 14835ffd83dbSDimitry Andric Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); 14845ffd83dbSDimitry Andric LoadedExit->addIncoming(LoadedTryStore, SuccessBB); 14855ffd83dbSDimitry Andric LoadedExit->addIncoming(LoadedFailure, FailureBB); 14865ffd83dbSDimitry Andric PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); 14870b57cec5SDimitry Andric Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 14880b57cec5SDimitry Andric Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 14890b57cec5SDimitry Andric 14905ffd83dbSDimitry Andric // This is the "exit value" from the cmpxchg expansion. It may be of 14915ffd83dbSDimitry Andric // a type wider than the one in the cmpxchg instruction. 14925ffd83dbSDimitry Andric Value *LoadedFull = LoadedExit; 14930b57cec5SDimitry Andric 14945ffd83dbSDimitry Andric Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); 14955ffd83dbSDimitry Andric Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric // Look for any users of the cmpxchg that are just comparing the loaded value 14980b57cec5SDimitry Andric // against the desired one, and replace them with the CFG-derived version. 14990b57cec5SDimitry Andric SmallVector<ExtractValueInst *, 2> PrunedInsts; 1500fcaf7f86SDimitry Andric for (auto *User : CI->users()) { 15010b57cec5SDimitry Andric ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 15020b57cec5SDimitry Andric if (!EV) 15030b57cec5SDimitry Andric continue; 15040b57cec5SDimitry Andric 15050b57cec5SDimitry Andric assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 15060b57cec5SDimitry Andric "weird extraction from { iN, i1 }"); 15070b57cec5SDimitry Andric 15080b57cec5SDimitry Andric if (EV->getIndices()[0] == 0) 15090b57cec5SDimitry Andric EV->replaceAllUsesWith(Loaded); 15100b57cec5SDimitry Andric else 15110b57cec5SDimitry Andric EV->replaceAllUsesWith(Success); 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric PrunedInsts.push_back(EV); 15140b57cec5SDimitry Andric } 15150b57cec5SDimitry Andric 15160b57cec5SDimitry Andric // We can remove the instructions now we're no longer iterating through them. 1517fcaf7f86SDimitry Andric for (auto *EV : PrunedInsts) 15180b57cec5SDimitry Andric EV->eraseFromParent(); 15190b57cec5SDimitry Andric 15200b57cec5SDimitry Andric if (!CI->use_empty()) { 15210b57cec5SDimitry Andric // Some use of the full struct return that we don't understand has happened, 15220b57cec5SDimitry Andric // so we've got to reconstruct it properly. 15230b57cec5SDimitry Andric Value *Res; 1524bdd1243dSDimitry Andric Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0); 15250b57cec5SDimitry Andric Res = Builder.CreateInsertValue(Res, Success, 1); 15260b57cec5SDimitry Andric 15270b57cec5SDimitry Andric CI->replaceAllUsesWith(Res); 15280b57cec5SDimitry Andric } 15290b57cec5SDimitry Andric 15300b57cec5SDimitry Andric CI->eraseFromParent(); 15310b57cec5SDimitry Andric return true; 15320b57cec5SDimitry Andric } 15330b57cec5SDimitry Andric 1534*0fca6ea1SDimitry Andric bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) { 15350b57cec5SDimitry Andric auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 15360b57cec5SDimitry Andric if (!C) 15370b57cec5SDimitry Andric return false; 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andric AtomicRMWInst::BinOp Op = RMWI->getOperation(); 15400b57cec5SDimitry Andric switch (Op) { 15410b57cec5SDimitry Andric case AtomicRMWInst::Add: 15420b57cec5SDimitry Andric case AtomicRMWInst::Sub: 15430b57cec5SDimitry Andric case AtomicRMWInst::Or: 15440b57cec5SDimitry Andric case AtomicRMWInst::Xor: 15450b57cec5SDimitry Andric return C->isZero(); 15460b57cec5SDimitry Andric case AtomicRMWInst::And: 15470b57cec5SDimitry Andric return C->isMinusOne(); 15480b57cec5SDimitry Andric // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... 15490b57cec5SDimitry Andric default: 15500b57cec5SDimitry Andric return false; 15510b57cec5SDimitry Andric } 15520b57cec5SDimitry Andric } 15530b57cec5SDimitry Andric 1554*0fca6ea1SDimitry Andric bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { 15550b57cec5SDimitry Andric if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 15560b57cec5SDimitry Andric tryExpandAtomicLoad(ResultingLoad); 15570b57cec5SDimitry Andric return true; 15580b57cec5SDimitry Andric } 15590b57cec5SDimitry Andric return false; 15600b57cec5SDimitry Andric } 15610b57cec5SDimitry Andric 1562*0fca6ea1SDimitry Andric Value *AtomicExpandImpl::insertRMWCmpXchgLoop( 1563bdd1243dSDimitry Andric IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1564fe6060f1SDimitry Andric AtomicOrdering MemOpOrder, SyncScope::ID SSID, 1565bdd1243dSDimitry Andric function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 15660b57cec5SDimitry Andric CreateCmpXchgInstFun CreateCmpXchg) { 15670b57cec5SDimitry Andric LLVMContext &Ctx = Builder.getContext(); 15680b57cec5SDimitry Andric BasicBlock *BB = Builder.GetInsertBlock(); 15690b57cec5SDimitry Andric Function *F = BB->getParent(); 15700b57cec5SDimitry Andric 15710b57cec5SDimitry Andric // Given: atomicrmw some_op iN* %addr, iN %incr ordering 15720b57cec5SDimitry Andric // 15730b57cec5SDimitry Andric // The standard expansion we produce is: 15740b57cec5SDimitry Andric // [...] 15750b57cec5SDimitry Andric // %init_loaded = load atomic iN* %addr 15760b57cec5SDimitry Andric // br label %loop 15770b57cec5SDimitry Andric // loop: 15780b57cec5SDimitry Andric // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 15790b57cec5SDimitry Andric // %new = some_op iN %loaded, %incr 15800b57cec5SDimitry Andric // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 15810b57cec5SDimitry Andric // %new_loaded = extractvalue { iN, i1 } %pair, 0 15820b57cec5SDimitry Andric // %success = extractvalue { iN, i1 } %pair, 1 15830b57cec5SDimitry Andric // br i1 %success, label %atomicrmw.end, label %loop 15840b57cec5SDimitry Andric // atomicrmw.end: 15850b57cec5SDimitry Andric // [...] 15860b57cec5SDimitry Andric BasicBlock *ExitBB = 15870b57cec5SDimitry Andric BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 15880b57cec5SDimitry Andric BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 15890b57cec5SDimitry Andric 15900b57cec5SDimitry Andric // The split call above "helpfully" added a branch at the end of BB (to the 15910b57cec5SDimitry Andric // wrong place), but we want a load. It's easiest to just remove 15920b57cec5SDimitry Andric // the branch entirely. 15930b57cec5SDimitry Andric std::prev(BB->end())->eraseFromParent(); 15940b57cec5SDimitry Andric Builder.SetInsertPoint(BB); 1595fe6060f1SDimitry Andric LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); 15960b57cec5SDimitry Andric Builder.CreateBr(LoopBB); 15970b57cec5SDimitry Andric 15980b57cec5SDimitry Andric // Start the main loop block now that we've taken care of the preliminaries. 15990b57cec5SDimitry Andric Builder.SetInsertPoint(LoopBB); 16000b57cec5SDimitry Andric PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); 16010b57cec5SDimitry Andric Loaded->addIncoming(InitLoaded, BB); 16020b57cec5SDimitry Andric 16030b57cec5SDimitry Andric Value *NewVal = PerformOp(Builder, Loaded); 16040b57cec5SDimitry Andric 16050b57cec5SDimitry Andric Value *NewLoaded = nullptr; 16060b57cec5SDimitry Andric Value *Success = nullptr; 16070b57cec5SDimitry Andric 1608fe6060f1SDimitry Andric CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, 16090b57cec5SDimitry Andric MemOpOrder == AtomicOrdering::Unordered 16100b57cec5SDimitry Andric ? AtomicOrdering::Monotonic 16110b57cec5SDimitry Andric : MemOpOrder, 1612fe6060f1SDimitry Andric SSID, Success, NewLoaded); 16130b57cec5SDimitry Andric assert(Success && NewLoaded); 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric Loaded->addIncoming(NewLoaded, LoopBB); 16160b57cec5SDimitry Andric 16170b57cec5SDimitry Andric Builder.CreateCondBr(Success, ExitBB, LoopBB); 16180b57cec5SDimitry Andric 16190b57cec5SDimitry Andric Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 16200b57cec5SDimitry Andric return NewLoaded; 16210b57cec5SDimitry Andric } 16220b57cec5SDimitry Andric 1623*0fca6ea1SDimitry Andric bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 16240b57cec5SDimitry Andric unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 16250b57cec5SDimitry Andric unsigned ValueSize = getAtomicOpSize(CI); 16260b57cec5SDimitry Andric 16270b57cec5SDimitry Andric switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { 16280b57cec5SDimitry Andric default: 16290b57cec5SDimitry Andric llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); 16300b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::None: 16310b57cec5SDimitry Andric if (ValueSize < MinCASSize) 16325ffd83dbSDimitry Andric return expandPartwordCmpXchg(CI); 16330b57cec5SDimitry Andric return false; 16340b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::LLSC: { 16350b57cec5SDimitry Andric return expandAtomicCmpXchg(CI); 16360b57cec5SDimitry Andric } 16370b57cec5SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: 16380b57cec5SDimitry Andric expandAtomicCmpXchgToMaskedIntrinsic(CI); 16390b57cec5SDimitry Andric return true; 164081ad6265SDimitry Andric case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 164181ad6265SDimitry Andric return lowerAtomicCmpXchgInst(CI); 16420b57cec5SDimitry Andric } 16430b57cec5SDimitry Andric } 16440b57cec5SDimitry Andric 16450b57cec5SDimitry Andric // Note: This function is exposed externally by AtomicExpandUtils.h 16460b57cec5SDimitry Andric bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 16470b57cec5SDimitry Andric CreateCmpXchgInstFun CreateCmpXchg) { 1648*0fca6ea1SDimitry Andric ReplacementIRBuilder Builder(AI, AI->getDataLayout()); 164906c3fb27SDimitry Andric Builder.setIsFPConstrained( 165006c3fb27SDimitry Andric AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); 165106c3fb27SDimitry Andric 165206c3fb27SDimitry Andric // FIXME: If FP exceptions are observable, we should force them off for the 165306c3fb27SDimitry Andric // loop for the FP atomics. 1654*0fca6ea1SDimitry Andric Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop( 1655fe6060f1SDimitry Andric Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), 1656fe6060f1SDimitry Andric AI->getOrdering(), AI->getSyncScopeID(), 1657bdd1243dSDimitry Andric [&](IRBuilderBase &Builder, Value *Loaded) { 165881ad6265SDimitry Andric return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 16590b57cec5SDimitry Andric AI->getValOperand()); 16600b57cec5SDimitry Andric }, 16610b57cec5SDimitry Andric CreateCmpXchg); 16620b57cec5SDimitry Andric 16630b57cec5SDimitry Andric AI->replaceAllUsesWith(Loaded); 16640b57cec5SDimitry Andric AI->eraseFromParent(); 16650b57cec5SDimitry Andric return true; 16660b57cec5SDimitry Andric } 16670b57cec5SDimitry Andric 16680b57cec5SDimitry Andric // In order to use one of the sized library calls such as 16690b57cec5SDimitry Andric // __atomic_fetch_add_4, the alignment must be sufficient, the size 16700b57cec5SDimitry Andric // must be one of the potentially-specialized sizes, and the value 16710b57cec5SDimitry Andric // type must actually exist in C on the target (otherwise, the 16720b57cec5SDimitry Andric // function wouldn't actually be defined.) 16735ffd83dbSDimitry Andric static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, 16740b57cec5SDimitry Andric const DataLayout &DL) { 16750b57cec5SDimitry Andric // TODO: "LargestSize" is an approximation for "largest type that 16760b57cec5SDimitry Andric // you can express in C". It seems to be the case that int128 is 16770b57cec5SDimitry Andric // supported on all 64-bit platforms, otherwise only up to 64-bit 16780b57cec5SDimitry Andric // integers are supported. If we get this wrong, then we'll try to 16790b57cec5SDimitry Andric // call a sized libcall that doesn't actually exist. There should 16800b57cec5SDimitry Andric // really be some more reliable way in LLVM of determining integer 16810b57cec5SDimitry Andric // sizes which are valid in the target's C ABI... 16820b57cec5SDimitry Andric unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; 16835ffd83dbSDimitry Andric return Alignment >= Size && 16840b57cec5SDimitry Andric (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && 16850b57cec5SDimitry Andric Size <= LargestSize; 16860b57cec5SDimitry Andric } 16870b57cec5SDimitry Andric 1688*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) { 16890b57cec5SDimitry Andric static const RTLIB::Libcall Libcalls[6] = { 16900b57cec5SDimitry Andric RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, 16910b57cec5SDimitry Andric RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; 16920b57cec5SDimitry Andric unsigned Size = getAtomicOpSize(I); 16930b57cec5SDimitry Andric 16940b57cec5SDimitry Andric bool expanded = expandAtomicOpToLibcall( 16955ffd83dbSDimitry Andric I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, 16960b57cec5SDimitry Andric I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1697e8d8bef9SDimitry Andric if (!expanded) 1698e8d8bef9SDimitry Andric report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); 16990b57cec5SDimitry Andric } 17000b57cec5SDimitry Andric 1701*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) { 17020b57cec5SDimitry Andric static const RTLIB::Libcall Libcalls[6] = { 17030b57cec5SDimitry Andric RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, 17040b57cec5SDimitry Andric RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; 17050b57cec5SDimitry Andric unsigned Size = getAtomicOpSize(I); 17060b57cec5SDimitry Andric 17070b57cec5SDimitry Andric bool expanded = expandAtomicOpToLibcall( 17085ffd83dbSDimitry Andric I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), 17095ffd83dbSDimitry Andric nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1710e8d8bef9SDimitry Andric if (!expanded) 1711e8d8bef9SDimitry Andric report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); 17120b57cec5SDimitry Andric } 17130b57cec5SDimitry Andric 1714*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { 17150b57cec5SDimitry Andric static const RTLIB::Libcall Libcalls[6] = { 17160b57cec5SDimitry Andric RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, 17170b57cec5SDimitry Andric RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, 17180b57cec5SDimitry Andric RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; 17190b57cec5SDimitry Andric unsigned Size = getAtomicOpSize(I); 17200b57cec5SDimitry Andric 17210b57cec5SDimitry Andric bool expanded = expandAtomicOpToLibcall( 17225ffd83dbSDimitry Andric I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), 17230b57cec5SDimitry Andric I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), 17240b57cec5SDimitry Andric Libcalls); 1725e8d8bef9SDimitry Andric if (!expanded) 1726e8d8bef9SDimitry Andric report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); 17270b57cec5SDimitry Andric } 17280b57cec5SDimitry Andric 17290b57cec5SDimitry Andric static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { 17300b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsXchg[6] = { 17310b57cec5SDimitry Andric RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, 17320b57cec5SDimitry Andric RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, 17330b57cec5SDimitry Andric RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; 17340b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsAdd[6] = { 17350b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, 17360b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, 17370b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; 17380b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsSub[6] = { 17390b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, 17400b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, 17410b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; 17420b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsAnd[6] = { 17430b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, 17440b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, 17450b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; 17460b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsOr[6] = { 17470b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, 17480b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, 17490b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; 17500b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsXor[6] = { 17510b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, 17520b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, 17530b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; 17540b57cec5SDimitry Andric static const RTLIB::Libcall LibcallsNand[6] = { 17550b57cec5SDimitry Andric RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, 17560b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, 17570b57cec5SDimitry Andric RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; 17580b57cec5SDimitry Andric 17590b57cec5SDimitry Andric switch (Op) { 17600b57cec5SDimitry Andric case AtomicRMWInst::BAD_BINOP: 17610b57cec5SDimitry Andric llvm_unreachable("Should not have BAD_BINOP."); 17620b57cec5SDimitry Andric case AtomicRMWInst::Xchg: 1763bdd1243dSDimitry Andric return ArrayRef(LibcallsXchg); 17640b57cec5SDimitry Andric case AtomicRMWInst::Add: 1765bdd1243dSDimitry Andric return ArrayRef(LibcallsAdd); 17660b57cec5SDimitry Andric case AtomicRMWInst::Sub: 1767bdd1243dSDimitry Andric return ArrayRef(LibcallsSub); 17680b57cec5SDimitry Andric case AtomicRMWInst::And: 1769bdd1243dSDimitry Andric return ArrayRef(LibcallsAnd); 17700b57cec5SDimitry Andric case AtomicRMWInst::Or: 1771bdd1243dSDimitry Andric return ArrayRef(LibcallsOr); 17720b57cec5SDimitry Andric case AtomicRMWInst::Xor: 1773bdd1243dSDimitry Andric return ArrayRef(LibcallsXor); 17740b57cec5SDimitry Andric case AtomicRMWInst::Nand: 1775bdd1243dSDimitry Andric return ArrayRef(LibcallsNand); 17760b57cec5SDimitry Andric case AtomicRMWInst::Max: 17770b57cec5SDimitry Andric case AtomicRMWInst::Min: 17780b57cec5SDimitry Andric case AtomicRMWInst::UMax: 17790b57cec5SDimitry Andric case AtomicRMWInst::UMin: 1780753f127fSDimitry Andric case AtomicRMWInst::FMax: 1781753f127fSDimitry Andric case AtomicRMWInst::FMin: 17820b57cec5SDimitry Andric case AtomicRMWInst::FAdd: 17830b57cec5SDimitry Andric case AtomicRMWInst::FSub: 1784bdd1243dSDimitry Andric case AtomicRMWInst::UIncWrap: 1785bdd1243dSDimitry Andric case AtomicRMWInst::UDecWrap: 17860b57cec5SDimitry Andric // No atomic libcalls are available for max/min/umax/umin. 17870b57cec5SDimitry Andric return {}; 17880b57cec5SDimitry Andric } 17890b57cec5SDimitry Andric llvm_unreachable("Unexpected AtomicRMW operation."); 17900b57cec5SDimitry Andric } 17910b57cec5SDimitry Andric 1792*0fca6ea1SDimitry Andric void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) { 17930b57cec5SDimitry Andric ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric unsigned Size = getAtomicOpSize(I); 17960b57cec5SDimitry Andric 17970b57cec5SDimitry Andric bool Success = false; 17980b57cec5SDimitry Andric if (!Libcalls.empty()) 17990b57cec5SDimitry Andric Success = expandAtomicOpToLibcall( 18005ffd83dbSDimitry Andric I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), 18015ffd83dbSDimitry Andric nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 18020b57cec5SDimitry Andric 18030b57cec5SDimitry Andric // The expansion failed: either there were no libcalls at all for 18040b57cec5SDimitry Andric // the operation (min/max), or there were only size-specialized 18050b57cec5SDimitry Andric // libcalls (add/sub/etc) and we needed a generic. So, expand to a 18060b57cec5SDimitry Andric // CAS libcall, via a CAS loop, instead. 18070b57cec5SDimitry Andric if (!Success) { 1808fe6060f1SDimitry Andric expandAtomicRMWToCmpXchg( 1809bdd1243dSDimitry Andric I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, 1810fe6060f1SDimitry Andric Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, 1811fe6060f1SDimitry Andric SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { 18120b57cec5SDimitry Andric // Create the CAS instruction normally... 18130b57cec5SDimitry Andric AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 1814fe6060f1SDimitry Andric Addr, Loaded, NewVal, Alignment, MemOpOrder, 1815fe6060f1SDimitry Andric AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 18160b57cec5SDimitry Andric Success = Builder.CreateExtractValue(Pair, 1, "success"); 18170b57cec5SDimitry Andric NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 18180b57cec5SDimitry Andric 18190b57cec5SDimitry Andric // ...and then expand the CAS into a libcall. 18200b57cec5SDimitry Andric expandAtomicCASToLibcall(Pair); 18210b57cec5SDimitry Andric }); 18220b57cec5SDimitry Andric } 18230b57cec5SDimitry Andric } 18240b57cec5SDimitry Andric 18250b57cec5SDimitry Andric // A helper routine for the above expandAtomic*ToLibcall functions. 18260b57cec5SDimitry Andric // 18270b57cec5SDimitry Andric // 'Libcalls' contains an array of enum values for the particular 18280b57cec5SDimitry Andric // ATOMIC libcalls to be emitted. All of the other arguments besides 18290b57cec5SDimitry Andric // 'I' are extracted from the Instruction subclass by the 18300b57cec5SDimitry Andric // caller. Depending on the particular call, some will be null. 1831*0fca6ea1SDimitry Andric bool AtomicExpandImpl::expandAtomicOpToLibcall( 18325ffd83dbSDimitry Andric Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, 18330b57cec5SDimitry Andric Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, 18340b57cec5SDimitry Andric AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { 18350b57cec5SDimitry Andric assert(Libcalls.size() == 6); 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric LLVMContext &Ctx = I->getContext(); 18380b57cec5SDimitry Andric Module *M = I->getModule(); 18390b57cec5SDimitry Andric const DataLayout &DL = M->getDataLayout(); 18400b57cec5SDimitry Andric IRBuilder<> Builder(I); 18410b57cec5SDimitry Andric IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); 18420b57cec5SDimitry Andric 18435ffd83dbSDimitry Andric bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); 18440b57cec5SDimitry Andric Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); 18450b57cec5SDimitry Andric 18465ffd83dbSDimitry Andric const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); 18470b57cec5SDimitry Andric 18480b57cec5SDimitry Andric // TODO: the "order" argument type is "int", not int32. So 18490b57cec5SDimitry Andric // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. 18500b57cec5SDimitry Andric ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); 18510b57cec5SDimitry Andric assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); 18520b57cec5SDimitry Andric Constant *OrderingVal = 18530b57cec5SDimitry Andric ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); 18540b57cec5SDimitry Andric Constant *Ordering2Val = nullptr; 18550b57cec5SDimitry Andric if (CASExpected) { 18560b57cec5SDimitry Andric assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); 18570b57cec5SDimitry Andric Ordering2Val = 18580b57cec5SDimitry Andric ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); 18590b57cec5SDimitry Andric } 18600b57cec5SDimitry Andric bool HasResult = I->getType() != Type::getVoidTy(Ctx); 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric RTLIB::Libcall RTLibType; 18630b57cec5SDimitry Andric if (UseSizedLibcall) { 18640b57cec5SDimitry Andric switch (Size) { 186581ad6265SDimitry Andric case 1: 186681ad6265SDimitry Andric RTLibType = Libcalls[1]; 186781ad6265SDimitry Andric break; 186881ad6265SDimitry Andric case 2: 186981ad6265SDimitry Andric RTLibType = Libcalls[2]; 187081ad6265SDimitry Andric break; 187181ad6265SDimitry Andric case 4: 187281ad6265SDimitry Andric RTLibType = Libcalls[3]; 187381ad6265SDimitry Andric break; 187481ad6265SDimitry Andric case 8: 187581ad6265SDimitry Andric RTLibType = Libcalls[4]; 187681ad6265SDimitry Andric break; 187781ad6265SDimitry Andric case 16: 187881ad6265SDimitry Andric RTLibType = Libcalls[5]; 187981ad6265SDimitry Andric break; 18800b57cec5SDimitry Andric } 18810b57cec5SDimitry Andric } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { 18820b57cec5SDimitry Andric RTLibType = Libcalls[0]; 18830b57cec5SDimitry Andric } else { 18840b57cec5SDimitry Andric // Can't use sized function, and there's no generic for this 18850b57cec5SDimitry Andric // operation, so give up. 18860b57cec5SDimitry Andric return false; 18870b57cec5SDimitry Andric } 18880b57cec5SDimitry Andric 1889e8d8bef9SDimitry Andric if (!TLI->getLibcallName(RTLibType)) { 1890e8d8bef9SDimitry Andric // This target does not implement the requested atomic libcall so give up. 1891e8d8bef9SDimitry Andric return false; 1892e8d8bef9SDimitry Andric } 1893e8d8bef9SDimitry Andric 18940b57cec5SDimitry Andric // Build up the function call. There's two kinds. First, the sized 18950b57cec5SDimitry Andric // variants. These calls are going to be one of the following (with 18960b57cec5SDimitry Andric // N=1,2,4,8,16): 18970b57cec5SDimitry Andric // iN __atomic_load_N(iN *ptr, int ordering) 18980b57cec5SDimitry Andric // void __atomic_store_N(iN *ptr, iN val, int ordering) 18990b57cec5SDimitry Andric // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) 19000b57cec5SDimitry Andric // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, 19010b57cec5SDimitry Andric // int success_order, int failure_order) 19020b57cec5SDimitry Andric // 19030b57cec5SDimitry Andric // Note that these functions can be used for non-integer atomic 19040b57cec5SDimitry Andric // operations, the values just need to be bitcast to integers on the 19050b57cec5SDimitry Andric // way in and out. 19060b57cec5SDimitry Andric // 19070b57cec5SDimitry Andric // And, then, the generic variants. They look like the following: 19080b57cec5SDimitry Andric // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) 19090b57cec5SDimitry Andric // void __atomic_store(size_t size, void *ptr, void *val, int ordering) 19100b57cec5SDimitry Andric // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, 19110b57cec5SDimitry Andric // int ordering) 19120b57cec5SDimitry Andric // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, 19130b57cec5SDimitry Andric // void *desired, int success_order, 19140b57cec5SDimitry Andric // int failure_order) 19150b57cec5SDimitry Andric // 19160b57cec5SDimitry Andric // The different signatures are built up depending on the 19170b57cec5SDimitry Andric // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' 19180b57cec5SDimitry Andric // variables. 19190b57cec5SDimitry Andric 19200b57cec5SDimitry Andric AllocaInst *AllocaCASExpected = nullptr; 19210b57cec5SDimitry Andric AllocaInst *AllocaValue = nullptr; 19220b57cec5SDimitry Andric AllocaInst *AllocaResult = nullptr; 19230b57cec5SDimitry Andric 19240b57cec5SDimitry Andric Type *ResultTy; 19250b57cec5SDimitry Andric SmallVector<Value *, 6> Args; 19260b57cec5SDimitry Andric AttributeList Attr; 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric // 'size' argument. 19290b57cec5SDimitry Andric if (!UseSizedLibcall) { 19300b57cec5SDimitry Andric // Note, getIntPtrType is assumed equivalent to size_t. 19310b57cec5SDimitry Andric Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); 19320b57cec5SDimitry Andric } 19330b57cec5SDimitry Andric 19340b57cec5SDimitry Andric // 'ptr' argument. 19350b57cec5SDimitry Andric // note: This assumes all address spaces share a common libfunc 19360b57cec5SDimitry Andric // implementation and that addresses are convertable. For systems without 19370b57cec5SDimitry Andric // that property, we'd need to extend this mechanism to support AS-specific 19380b57cec5SDimitry Andric // families of atomic intrinsics. 19395f757f3fSDimitry Andric Value *PtrVal = PointerOperand; 19405f757f3fSDimitry Andric PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx)); 19410b57cec5SDimitry Andric Args.push_back(PtrVal); 19420b57cec5SDimitry Andric 19430b57cec5SDimitry Andric // 'expected' argument, if present. 19440b57cec5SDimitry Andric if (CASExpected) { 19450b57cec5SDimitry Andric AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); 19465ffd83dbSDimitry Andric AllocaCASExpected->setAlignment(AllocaAlignment); 19475f757f3fSDimitry Andric Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64); 19480b57cec5SDimitry Andric Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); 19495f757f3fSDimitry Andric Args.push_back(AllocaCASExpected); 19500b57cec5SDimitry Andric } 19510b57cec5SDimitry Andric 19520b57cec5SDimitry Andric // 'val' argument ('desired' for cas), if present. 19530b57cec5SDimitry Andric if (ValueOperand) { 19540b57cec5SDimitry Andric if (UseSizedLibcall) { 19550b57cec5SDimitry Andric Value *IntValue = 19560b57cec5SDimitry Andric Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); 19570b57cec5SDimitry Andric Args.push_back(IntValue); 19580b57cec5SDimitry Andric } else { 19590b57cec5SDimitry Andric AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); 19605ffd83dbSDimitry Andric AllocaValue->setAlignment(AllocaAlignment); 19615f757f3fSDimitry Andric Builder.CreateLifetimeStart(AllocaValue, SizeVal64); 19620b57cec5SDimitry Andric Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); 19635f757f3fSDimitry Andric Args.push_back(AllocaValue); 19640b57cec5SDimitry Andric } 19650b57cec5SDimitry Andric } 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric // 'ret' argument. 19680b57cec5SDimitry Andric if (!CASExpected && HasResult && !UseSizedLibcall) { 19690b57cec5SDimitry Andric AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); 19705ffd83dbSDimitry Andric AllocaResult->setAlignment(AllocaAlignment); 19715f757f3fSDimitry Andric Builder.CreateLifetimeStart(AllocaResult, SizeVal64); 19725f757f3fSDimitry Andric Args.push_back(AllocaResult); 19730b57cec5SDimitry Andric } 19740b57cec5SDimitry Andric 19750b57cec5SDimitry Andric // 'ordering' ('success_order' for cas) argument. 19760b57cec5SDimitry Andric Args.push_back(OrderingVal); 19770b57cec5SDimitry Andric 19780b57cec5SDimitry Andric // 'failure_order' argument, if present. 19790b57cec5SDimitry Andric if (Ordering2Val) 19800b57cec5SDimitry Andric Args.push_back(Ordering2Val); 19810b57cec5SDimitry Andric 19820b57cec5SDimitry Andric // Now, the return type. 19830b57cec5SDimitry Andric if (CASExpected) { 19840b57cec5SDimitry Andric ResultTy = Type::getInt1Ty(Ctx); 1985349cc55cSDimitry Andric Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt); 19860b57cec5SDimitry Andric } else if (HasResult && UseSizedLibcall) 19870b57cec5SDimitry Andric ResultTy = SizedIntTy; 19880b57cec5SDimitry Andric else 19890b57cec5SDimitry Andric ResultTy = Type::getVoidTy(Ctx); 19900b57cec5SDimitry Andric 19910b57cec5SDimitry Andric // Done with setting up arguments and return types, create the call: 19920b57cec5SDimitry Andric SmallVector<Type *, 6> ArgTys; 19930b57cec5SDimitry Andric for (Value *Arg : Args) 19940b57cec5SDimitry Andric ArgTys.push_back(Arg->getType()); 19950b57cec5SDimitry Andric FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); 19960b57cec5SDimitry Andric FunctionCallee LibcallFn = 19970b57cec5SDimitry Andric M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); 19980b57cec5SDimitry Andric CallInst *Call = Builder.CreateCall(LibcallFn, Args); 19990b57cec5SDimitry Andric Call->setAttributes(Attr); 20000b57cec5SDimitry Andric Value *Result = Call; 20010b57cec5SDimitry Andric 20020b57cec5SDimitry Andric // And then, extract the results... 20030b57cec5SDimitry Andric if (ValueOperand && !UseSizedLibcall) 20045f757f3fSDimitry Andric Builder.CreateLifetimeEnd(AllocaValue, SizeVal64); 20050b57cec5SDimitry Andric 20060b57cec5SDimitry Andric if (CASExpected) { 20070b57cec5SDimitry Andric // The final result from the CAS is {load of 'expected' alloca, bool result 20080b57cec5SDimitry Andric // from call} 20090b57cec5SDimitry Andric Type *FinalResultTy = I->getType(); 2010bdd1243dSDimitry Andric Value *V = PoisonValue::get(FinalResultTy); 20110b57cec5SDimitry Andric Value *ExpectedOut = Builder.CreateAlignedLoad( 20120b57cec5SDimitry Andric CASExpected->getType(), AllocaCASExpected, AllocaAlignment); 20135f757f3fSDimitry Andric Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64); 20140b57cec5SDimitry Andric V = Builder.CreateInsertValue(V, ExpectedOut, 0); 20150b57cec5SDimitry Andric V = Builder.CreateInsertValue(V, Result, 1); 20160b57cec5SDimitry Andric I->replaceAllUsesWith(V); 20170b57cec5SDimitry Andric } else if (HasResult) { 20180b57cec5SDimitry Andric Value *V; 20190b57cec5SDimitry Andric if (UseSizedLibcall) 20200b57cec5SDimitry Andric V = Builder.CreateBitOrPointerCast(Result, I->getType()); 20210b57cec5SDimitry Andric else { 20220b57cec5SDimitry Andric V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, 20230b57cec5SDimitry Andric AllocaAlignment); 20245f757f3fSDimitry Andric Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); 20250b57cec5SDimitry Andric } 20260b57cec5SDimitry Andric I->replaceAllUsesWith(V); 20270b57cec5SDimitry Andric } 20280b57cec5SDimitry Andric I->eraseFromParent(); 20290b57cec5SDimitry Andric return true; 20300b57cec5SDimitry Andric } 2031