1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass (at IR level) to replace atomic instructions with 10 // __atomic_* library calls, or target specific instruction which implement the 11 // same semantics in a way which better fits the target backend. This can 12 // include the use of (intrinsic-based) load-linked/store-conditional loops, 13 // AtomicCmpXchg, or type coercions. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/STLFunctionalExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Analysis/InstSimplifyFolder.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/CodeGen/AtomicExpand.h" 23 #include "llvm/CodeGen/AtomicExpandUtils.h" 24 #include "llvm/CodeGen/TargetLowering.h" 25 #include "llvm/CodeGen/TargetPassConfig.h" 26 #include "llvm/CodeGen/TargetSubtargetInfo.h" 27 #include "llvm/CodeGen/ValueTypes.h" 28 #include "llvm/IR/Attributes.h" 29 #include "llvm/IR/BasicBlock.h" 30 #include "llvm/IR/Constant.h" 31 #include "llvm/IR/Constants.h" 32 #include "llvm/IR/DataLayout.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/MDBuilder.h" 39 #include "llvm/IR/MemoryModelRelaxationAnnotations.h" 40 #include "llvm/IR/Module.h" 41 #include "llvm/IR/Type.h" 42 #include "llvm/IR/User.h" 43 #include "llvm/IR/Value.h" 44 #include "llvm/InitializePasses.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/AtomicOrdering.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include "llvm/Target/TargetMachine.h" 52 #include "llvm/Transforms/Utils/LowerAtomic.h" 53 #include <cassert> 54 #include <cstdint> 55 #include <iterator> 56 57 using namespace llvm; 58 59 #define DEBUG_TYPE "atomic-expand" 60 61 namespace { 62 63 class AtomicExpandImpl { 64 const TargetLowering *TLI = nullptr; 65 const DataLayout *DL = nullptr; 66 67 private: 68 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); 69 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); 70 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); 71 bool tryExpandAtomicLoad(LoadInst *LI); 72 bool expandAtomicLoadToLL(LoadInst *LI); 73 bool expandAtomicLoadToCmpXchg(LoadInst *LI); 74 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); 75 bool tryExpandAtomicStore(StoreInst *SI); 76 void expandAtomicStore(StoreInst *SI); 77 bool tryExpandAtomicRMW(AtomicRMWInst *AI); 78 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); 79 Value * 80 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr, 81 Align AddrAlign, AtomicOrdering MemOpOrder, 82 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 83 void expandAtomicOpToLLSC( 84 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, 85 AtomicOrdering MemOpOrder, 86 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp); 87 void expandPartwordAtomicRMW( 88 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); 89 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); 90 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); 91 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); 92 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); 93 94 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); 95 static Value *insertRMWCmpXchgLoop( 96 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, 97 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 99 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc); 100 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); 101 102 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); 103 bool isIdempotentRMW(AtomicRMWInst *RMWI); 104 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); 105 106 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, 107 Value *PointerOperand, Value *ValueOperand, 108 Value *CASExpected, AtomicOrdering Ordering, 109 AtomicOrdering Ordering2, 110 ArrayRef<RTLIB::Libcall> Libcalls); 111 void expandAtomicLoadToLibcall(LoadInst *LI); 112 void expandAtomicStoreToLibcall(StoreInst *LI); 113 void expandAtomicRMWToLibcall(AtomicRMWInst *I); 114 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); 115 116 friend bool 117 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 118 CreateCmpXchgInstFun CreateCmpXchg); 119 120 bool processAtomicInstr(Instruction *I); 121 122 public: 123 bool run(Function &F, const TargetMachine *TM); 124 }; 125 126 class AtomicExpandLegacy : public FunctionPass { 127 public: 128 static char ID; // Pass identification, replacement for typeid 129 130 AtomicExpandLegacy() : FunctionPass(ID) { 131 initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry()); 132 } 133 134 bool runOnFunction(Function &F) override; 135 }; 136 137 // IRBuilder to be used for replacement atomic instructions. 138 struct ReplacementIRBuilder 139 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> { 140 MDNode *MMRAMD = nullptr; 141 142 // Preserves the DebugLoc from I, and preserves still valid metadata. 143 // Enable StrictFP builder mode when appropriate. 144 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) 145 : IRBuilder(I->getContext(), InstSimplifyFolder(DL), 146 IRBuilderCallbackInserter( 147 [this](Instruction *I) { addMMRAMD(I); })) { 148 SetInsertPoint(I); 149 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); 150 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) 151 this->setIsFPConstrained(true); 152 153 MMRAMD = I->getMetadata(LLVMContext::MD_mmra); 154 } 155 156 void addMMRAMD(Instruction *I) { 157 if (canInstructionHaveMMRAs(*I)) 158 I->setMetadata(LLVMContext::MD_mmra, MMRAMD); 159 } 160 }; 161 162 } // end anonymous namespace 163 164 char AtomicExpandLegacy::ID = 0; 165 166 char &llvm::AtomicExpandID = AtomicExpandLegacy::ID; 167 168 INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE, 169 "Expand Atomic instructions", false, false) 170 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 171 INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE, 172 "Expand Atomic instructions", false, false) 173 174 // Helper functions to retrieve the size of atomic instructions. 175 static unsigned getAtomicOpSize(LoadInst *LI) { 176 const DataLayout &DL = LI->getDataLayout(); 177 return DL.getTypeStoreSize(LI->getType()); 178 } 179 180 static unsigned getAtomicOpSize(StoreInst *SI) { 181 const DataLayout &DL = SI->getDataLayout(); 182 return DL.getTypeStoreSize(SI->getValueOperand()->getType()); 183 } 184 185 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { 186 const DataLayout &DL = RMWI->getDataLayout(); 187 return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); 188 } 189 190 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { 191 const DataLayout &DL = CASI->getDataLayout(); 192 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); 193 } 194 195 /// Copy metadata that's safe to preserve when widening atomics. 196 static void copyMetadataForAtomic(Instruction &Dest, 197 const Instruction &Source) { 198 SmallVector<std::pair<unsigned, MDNode *>, 8> MD; 199 Source.getAllMetadata(MD); 200 LLVMContext &Ctx = Dest.getContext(); 201 MDBuilder MDB(Ctx); 202 203 for (auto [ID, N] : MD) { 204 switch (ID) { 205 case LLVMContext::MD_dbg: 206 case LLVMContext::MD_tbaa: 207 case LLVMContext::MD_tbaa_struct: 208 case LLVMContext::MD_alias_scope: 209 case LLVMContext::MD_noalias: 210 case LLVMContext::MD_noalias_addrspace: 211 case LLVMContext::MD_access_group: 212 case LLVMContext::MD_mmra: 213 Dest.setMetadata(ID, N); 214 break; 215 default: 216 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory")) 217 Dest.setMetadata(ID, N); 218 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory")) 219 Dest.setMetadata(ID, N); 220 221 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current 222 // uses. 223 break; 224 } 225 } 226 } 227 228 // Determine if a particular atomic operation has a supported size, 229 // and is of appropriate alignment, to be passed through for target 230 // lowering. (Versus turning into a __atomic libcall) 231 template <typename Inst> 232 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { 233 unsigned Size = getAtomicOpSize(I); 234 Align Alignment = I->getAlign(); 235 return Alignment >= Size && 236 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; 237 } 238 239 bool AtomicExpandImpl::processAtomicInstr(Instruction *I) { 240 auto *LI = dyn_cast<LoadInst>(I); 241 auto *SI = dyn_cast<StoreInst>(I); 242 auto *RMWI = dyn_cast<AtomicRMWInst>(I); 243 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I); 244 245 bool MadeChange = false; 246 247 // If the Size/Alignment is not supported, replace with a libcall. 248 if (LI) { 249 if (!LI->isAtomic()) 250 return false; 251 252 if (!atomicSizeSupported(TLI, LI)) { 253 expandAtomicLoadToLibcall(LI); 254 return true; 255 } 256 257 if (TLI->shouldCastAtomicLoadInIR(LI) == 258 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 259 I = LI = convertAtomicLoadToIntegerType(LI); 260 MadeChange = true; 261 } 262 } else if (SI) { 263 if (!SI->isAtomic()) 264 return false; 265 266 if (!atomicSizeSupported(TLI, SI)) { 267 expandAtomicStoreToLibcall(SI); 268 return true; 269 } 270 271 if (TLI->shouldCastAtomicStoreInIR(SI) == 272 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 273 I = SI = convertAtomicStoreToIntegerType(SI); 274 MadeChange = true; 275 } 276 } else if (RMWI) { 277 if (!atomicSizeSupported(TLI, RMWI)) { 278 expandAtomicRMWToLibcall(RMWI); 279 return true; 280 } 281 282 if (TLI->shouldCastAtomicRMWIInIR(RMWI) == 283 TargetLoweringBase::AtomicExpansionKind::CastToInteger) { 284 I = RMWI = convertAtomicXchgToIntegerType(RMWI); 285 MadeChange = true; 286 } 287 } else if (CASI) { 288 if (!atomicSizeSupported(TLI, CASI)) { 289 expandAtomicCASToLibcall(CASI); 290 return true; 291 } 292 293 // TODO: when we're ready to make the change at the IR level, we can 294 // extend convertCmpXchgToInteger for floating point too. 295 if (CASI->getCompareOperand()->getType()->isPointerTy()) { 296 // TODO: add a TLI hook to control this so that each target can 297 // convert to lowering the original type one at a time. 298 I = CASI = convertCmpXchgToIntegerType(CASI); 299 MadeChange = true; 300 } 301 } else 302 return false; 303 304 if (TLI->shouldInsertFencesForAtomic(I)) { 305 auto FenceOrdering = AtomicOrdering::Monotonic; 306 if (LI && isAcquireOrStronger(LI->getOrdering())) { 307 FenceOrdering = LI->getOrdering(); 308 LI->setOrdering(AtomicOrdering::Monotonic); 309 } else if (SI && isReleaseOrStronger(SI->getOrdering())) { 310 FenceOrdering = SI->getOrdering(); 311 SI->setOrdering(AtomicOrdering::Monotonic); 312 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || 313 isAcquireOrStronger(RMWI->getOrdering()))) { 314 FenceOrdering = RMWI->getOrdering(); 315 RMWI->setOrdering(AtomicOrdering::Monotonic); 316 } else if (CASI && 317 TLI->shouldExpandAtomicCmpXchgInIR(CASI) == 318 TargetLoweringBase::AtomicExpansionKind::None && 319 (isReleaseOrStronger(CASI->getSuccessOrdering()) || 320 isAcquireOrStronger(CASI->getSuccessOrdering()) || 321 isAcquireOrStronger(CASI->getFailureOrdering()))) { 322 // If a compare and swap is lowered to LL/SC, we can do smarter fence 323 // insertion, with a stronger one on the success path than on the 324 // failure path. As a result, fence insertion is directly done by 325 // expandAtomicCmpXchg in that case. 326 FenceOrdering = CASI->getMergedOrdering(); 327 CASI->setSuccessOrdering(AtomicOrdering::Monotonic); 328 CASI->setFailureOrdering(AtomicOrdering::Monotonic); 329 } 330 331 if (FenceOrdering != AtomicOrdering::Monotonic) { 332 MadeChange |= bracketInstWithFences(I, FenceOrdering); 333 } 334 } else if (I->hasAtomicStore() && 335 TLI->shouldInsertTrailingFenceForAtomicStore(I)) { 336 auto FenceOrdering = AtomicOrdering::Monotonic; 337 if (SI) 338 FenceOrdering = SI->getOrdering(); 339 else if (RMWI) 340 FenceOrdering = RMWI->getOrdering(); 341 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) != 342 TargetLoweringBase::AtomicExpansionKind::LLSC) 343 // LLSC is handled in expandAtomicCmpXchg(). 344 FenceOrdering = CASI->getSuccessOrdering(); 345 346 IRBuilder Builder(I); 347 if (auto TrailingFence = 348 TLI->emitTrailingFence(Builder, I, FenceOrdering)) { 349 TrailingFence->moveAfter(I); 350 MadeChange = true; 351 } 352 } 353 354 if (LI) 355 MadeChange |= tryExpandAtomicLoad(LI); 356 else if (SI) 357 MadeChange |= tryExpandAtomicStore(SI); 358 else if (RMWI) { 359 // There are two different ways of expanding RMW instructions: 360 // - into a load if it is idempotent 361 // - into a Cmpxchg/LL-SC loop otherwise 362 // we try them in that order. 363 364 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { 365 MadeChange = true; 366 367 } else { 368 MadeChange |= tryExpandAtomicRMW(RMWI); 369 } 370 } else if (CASI) 371 MadeChange |= tryExpandAtomicCmpXchg(CASI); 372 373 return MadeChange; 374 } 375 376 bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) { 377 const auto *Subtarget = TM->getSubtargetImpl(F); 378 if (!Subtarget->enableAtomicExpand()) 379 return false; 380 TLI = Subtarget->getTargetLowering(); 381 DL = &F.getDataLayout(); 382 383 bool MadeChange = false; 384 385 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { 386 BasicBlock *BB = &*BBI; 387 388 BasicBlock::reverse_iterator Next; 389 390 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E; 391 I = Next) { 392 Instruction &Inst = *I; 393 Next = std::next(I); 394 395 if (processAtomicInstr(&Inst)) { 396 MadeChange = true; 397 398 // New blocks may have been inserted. 399 BBE = F.end(); 400 } 401 } 402 } 403 404 return MadeChange; 405 } 406 407 bool AtomicExpandLegacy::runOnFunction(Function &F) { 408 409 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 410 if (!TPC) 411 return false; 412 auto *TM = &TPC->getTM<TargetMachine>(); 413 AtomicExpandImpl AE; 414 return AE.run(F, TM); 415 } 416 417 FunctionPass *llvm::createAtomicExpandLegacyPass() { 418 return new AtomicExpandLegacy(); 419 } 420 421 PreservedAnalyses AtomicExpandPass::run(Function &F, 422 FunctionAnalysisManager &AM) { 423 AtomicExpandImpl AE; 424 425 bool Changed = AE.run(F, TM); 426 if (!Changed) 427 return PreservedAnalyses::all(); 428 429 return PreservedAnalyses::none(); 430 } 431 432 bool AtomicExpandImpl::bracketInstWithFences(Instruction *I, 433 AtomicOrdering Order) { 434 ReplacementIRBuilder Builder(I, *DL); 435 436 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); 437 438 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); 439 // We have a guard here because not every atomic operation generates a 440 // trailing fence. 441 if (TrailingFence) 442 TrailingFence->moveAfter(I); 443 444 return (LeadingFence || TrailingFence); 445 } 446 447 /// Get the iX type with the same bitwidth as T. 448 IntegerType * 449 AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) { 450 EVT VT = TLI->getMemValueType(DL, T); 451 unsigned BitWidth = VT.getStoreSizeInBits(); 452 assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); 453 return IntegerType::get(T->getContext(), BitWidth); 454 } 455 456 /// Convert an atomic load of a non-integral type to an integer load of the 457 /// equivalent bitwidth. See the function comment on 458 /// convertAtomicStoreToIntegerType for background. 459 LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) { 460 auto *M = LI->getModule(); 461 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); 462 463 ReplacementIRBuilder Builder(LI, *DL); 464 465 Value *Addr = LI->getPointerOperand(); 466 467 auto *NewLI = Builder.CreateLoad(NewTy, Addr); 468 NewLI->setAlignment(LI->getAlign()); 469 NewLI->setVolatile(LI->isVolatile()); 470 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); 471 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); 472 473 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); 474 LI->replaceAllUsesWith(NewVal); 475 LI->eraseFromParent(); 476 return NewLI; 477 } 478 479 AtomicRMWInst * 480 AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { 481 assert(RMWI->getOperation() == AtomicRMWInst::Xchg); 482 483 auto *M = RMWI->getModule(); 484 Type *NewTy = 485 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout()); 486 487 ReplacementIRBuilder Builder(RMWI, *DL); 488 489 Value *Addr = RMWI->getPointerOperand(); 490 Value *Val = RMWI->getValOperand(); 491 Value *NewVal = Val->getType()->isPointerTy() 492 ? Builder.CreatePtrToInt(Val, NewTy) 493 : Builder.CreateBitCast(Val, NewTy); 494 495 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal, 496 RMWI->getAlign(), RMWI->getOrdering(), 497 RMWI->getSyncScopeID()); 498 NewRMWI->setVolatile(RMWI->isVolatile()); 499 copyMetadataForAtomic(*NewRMWI, *RMWI); 500 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); 501 502 Value *NewRVal = RMWI->getType()->isPointerTy() 503 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) 504 : Builder.CreateBitCast(NewRMWI, RMWI->getType()); 505 RMWI->replaceAllUsesWith(NewRVal); 506 RMWI->eraseFromParent(); 507 return NewRMWI; 508 } 509 510 bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) { 511 switch (TLI->shouldExpandAtomicLoadInIR(LI)) { 512 case TargetLoweringBase::AtomicExpansionKind::None: 513 return false; 514 case TargetLoweringBase::AtomicExpansionKind::LLSC: 515 expandAtomicOpToLLSC( 516 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(), 517 LI->getOrdering(), 518 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; }); 519 return true; 520 case TargetLoweringBase::AtomicExpansionKind::LLOnly: 521 return expandAtomicLoadToLL(LI); 522 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: 523 return expandAtomicLoadToCmpXchg(LI); 524 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 525 LI->setAtomic(AtomicOrdering::NotAtomic); 526 return true; 527 default: 528 llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); 529 } 530 } 531 532 bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) { 533 switch (TLI->shouldExpandAtomicStoreInIR(SI)) { 534 case TargetLoweringBase::AtomicExpansionKind::None: 535 return false; 536 case TargetLoweringBase::AtomicExpansionKind::Expand: 537 expandAtomicStore(SI); 538 return true; 539 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 540 SI->setAtomic(AtomicOrdering::NotAtomic); 541 return true; 542 default: 543 llvm_unreachable("Unhandled case in tryExpandAtomicStore"); 544 } 545 } 546 547 bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) { 548 ReplacementIRBuilder Builder(LI, *DL); 549 550 // On some architectures, load-linked instructions are atomic for larger 551 // sizes than normal loads. For example, the only 64-bit load guaranteed 552 // to be single-copy atomic by ARM is an ldrexd (A3.5.3). 553 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(), 554 LI->getPointerOperand(), LI->getOrdering()); 555 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 556 557 LI->replaceAllUsesWith(Val); 558 LI->eraseFromParent(); 559 560 return true; 561 } 562 563 bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) { 564 ReplacementIRBuilder Builder(LI, *DL); 565 AtomicOrdering Order = LI->getOrdering(); 566 if (Order == AtomicOrdering::Unordered) 567 Order = AtomicOrdering::Monotonic; 568 569 Value *Addr = LI->getPointerOperand(); 570 Type *Ty = LI->getType(); 571 Constant *DummyVal = Constant::getNullValue(Ty); 572 573 Value *Pair = Builder.CreateAtomicCmpXchg( 574 Addr, DummyVal, DummyVal, LI->getAlign(), Order, 575 AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 576 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); 577 578 LI->replaceAllUsesWith(Loaded); 579 LI->eraseFromParent(); 580 581 return true; 582 } 583 584 /// Convert an atomic store of a non-integral type to an integer store of the 585 /// equivalent bitwidth. We used to not support floating point or vector 586 /// atomics in the IR at all. The backends learned to deal with the bitcast 587 /// idiom because that was the only way of expressing the notion of a atomic 588 /// float or vector store. The long term plan is to teach each backend to 589 /// instruction select from the original atomic store, but as a migration 590 /// mechanism, we convert back to the old format which the backends understand. 591 /// Each backend will need individual work to recognize the new format. 592 StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) { 593 ReplacementIRBuilder Builder(SI, *DL); 594 auto *M = SI->getModule(); 595 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), 596 M->getDataLayout()); 597 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); 598 599 Value *Addr = SI->getPointerOperand(); 600 601 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr); 602 NewSI->setAlignment(SI->getAlign()); 603 NewSI->setVolatile(SI->isVolatile()); 604 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); 605 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); 606 SI->eraseFromParent(); 607 return NewSI; 608 } 609 610 void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) { 611 // This function is only called on atomic stores that are too large to be 612 // atomic if implemented as a native store. So we replace them by an 613 // atomic swap, that can be implemented for example as a ldrex/strex on ARM 614 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. 615 // It is the responsibility of the target to only signal expansion via 616 // shouldExpandAtomicRMW in cases where this is required and possible. 617 ReplacementIRBuilder Builder(SI, *DL); 618 AtomicOrdering Ordering = SI->getOrdering(); 619 assert(Ordering != AtomicOrdering::NotAtomic); 620 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered 621 ? AtomicOrdering::Monotonic 622 : Ordering; 623 AtomicRMWInst *AI = Builder.CreateAtomicRMW( 624 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(), 625 SI->getAlign(), RMWOrdering); 626 SI->eraseFromParent(); 627 628 // Now we have an appropriate swap instruction, lower it as usual. 629 tryExpandAtomicRMW(AI); 630 } 631 632 static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, 633 Value *Loaded, Value *NewVal, Align AddrAlign, 634 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 635 Value *&Success, Value *&NewLoaded, 636 Instruction *MetadataSrc) { 637 Type *OrigTy = NewVal->getType(); 638 639 // This code can go away when cmpxchg supports FP and vector types. 640 assert(!OrigTy->isPointerTy()); 641 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy(); 642 if (NeedBitcast) { 643 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); 644 NewVal = Builder.CreateBitCast(NewVal, IntTy); 645 Loaded = Builder.CreateBitCast(Loaded, IntTy); 646 } 647 648 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 649 Addr, Loaded, NewVal, AddrAlign, MemOpOrder, 650 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 651 if (MetadataSrc) 652 copyMetadataForAtomic(*Pair, *MetadataSrc); 653 654 Success = Builder.CreateExtractValue(Pair, 1, "success"); 655 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 656 657 if (NeedBitcast) 658 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); 659 } 660 661 bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) { 662 LLVMContext &Ctx = AI->getModule()->getContext(); 663 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); 664 switch (Kind) { 665 case TargetLoweringBase::AtomicExpansionKind::None: 666 return false; 667 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 668 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 669 unsigned ValueSize = getAtomicOpSize(AI); 670 if (ValueSize < MinCASSize) { 671 expandPartwordAtomicRMW(AI, 672 TargetLoweringBase::AtomicExpansionKind::LLSC); 673 } else { 674 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) { 675 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 676 AI->getValOperand()); 677 }; 678 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), 679 AI->getAlign(), AI->getOrdering(), PerformOp); 680 } 681 return true; 682 } 683 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { 684 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 685 unsigned ValueSize = getAtomicOpSize(AI); 686 if (ValueSize < MinCASSize) { 687 expandPartwordAtomicRMW(AI, 688 TargetLoweringBase::AtomicExpansionKind::CmpXChg); 689 } else { 690 SmallVector<StringRef> SSNs; 691 Ctx.getSyncScopeNames(SSNs); 692 auto MemScope = SSNs[AI->getSyncScopeID()].empty() 693 ? "system" 694 : SSNs[AI->getSyncScopeID()]; 695 OptimizationRemarkEmitter ORE(AI->getFunction()); 696 ORE.emit([&]() { 697 return OptimizationRemark(DEBUG_TYPE, "Passed", AI) 698 << "A compare and swap loop was generated for an atomic " 699 << AI->getOperationName(AI->getOperation()) << " operation at " 700 << MemScope << " memory scope"; 701 }); 702 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); 703 } 704 return true; 705 } 706 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: { 707 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 708 unsigned ValueSize = getAtomicOpSize(AI); 709 if (ValueSize < MinCASSize) { 710 AtomicRMWInst::BinOp Op = AI->getOperation(); 711 // Widen And/Or/Xor and give the target another chance at expanding it. 712 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 713 Op == AtomicRMWInst::And) { 714 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 715 return true; 716 } 717 } 718 expandAtomicRMWToMaskedIntrinsic(AI); 719 return true; 720 } 721 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { 722 TLI->emitBitTestAtomicRMWIntrinsic(AI); 723 return true; 724 } 725 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: { 726 TLI->emitCmpArithAtomicRMWIntrinsic(AI); 727 return true; 728 } 729 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 730 return lowerAtomicRMWInst(AI); 731 case TargetLoweringBase::AtomicExpansionKind::Expand: 732 TLI->emitExpandAtomicRMW(AI); 733 return true; 734 default: 735 llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); 736 } 737 } 738 739 namespace { 740 741 struct PartwordMaskValues { 742 // These three fields are guaranteed to be set by createMaskInstrs. 743 Type *WordType = nullptr; 744 Type *ValueType = nullptr; 745 Type *IntValueType = nullptr; 746 Value *AlignedAddr = nullptr; 747 Align AlignedAddrAlignment; 748 // The remaining fields can be null. 749 Value *ShiftAmt = nullptr; 750 Value *Mask = nullptr; 751 Value *Inv_Mask = nullptr; 752 }; 753 754 LLVM_ATTRIBUTE_UNUSED 755 raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) { 756 auto PrintObj = [&O](auto *V) { 757 if (V) 758 O << *V; 759 else 760 O << "nullptr"; 761 O << '\n'; 762 }; 763 O << "PartwordMaskValues {\n"; 764 O << " WordType: "; 765 PrintObj(PMV.WordType); 766 O << " ValueType: "; 767 PrintObj(PMV.ValueType); 768 O << " AlignedAddr: "; 769 PrintObj(PMV.AlignedAddr); 770 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n'; 771 O << " ShiftAmt: "; 772 PrintObj(PMV.ShiftAmt); 773 O << " Mask: "; 774 PrintObj(PMV.Mask); 775 O << " Inv_Mask: "; 776 PrintObj(PMV.Inv_Mask); 777 O << "}\n"; 778 return O; 779 } 780 781 } // end anonymous namespace 782 783 /// This is a helper function which builds instructions to provide 784 /// values necessary for partword atomic operations. It takes an 785 /// incoming address, Addr, and ValueType, and constructs the address, 786 /// shift-amounts and masks needed to work with a larger value of size 787 /// WordSize. 788 /// 789 /// AlignedAddr: Addr rounded down to a multiple of WordSize 790 /// 791 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded 792 /// from AlignAddr for it to have the same value as if 793 /// ValueType was loaded from Addr. 794 /// 795 /// Mask: Value to mask with the value loaded from AlignAddr to 796 /// include only the part that would've been loaded from Addr. 797 /// 798 /// Inv_Mask: The inverse of Mask. 799 static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, 800 Instruction *I, Type *ValueType, 801 Value *Addr, Align AddrAlign, 802 unsigned MinWordSize) { 803 PartwordMaskValues PMV; 804 805 Module *M = I->getModule(); 806 LLVMContext &Ctx = M->getContext(); 807 const DataLayout &DL = M->getDataLayout(); 808 unsigned ValueSize = DL.getTypeStoreSize(ValueType); 809 810 PMV.ValueType = PMV.IntValueType = ValueType; 811 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy()) 812 PMV.IntValueType = 813 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); 814 815 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8) 816 : ValueType; 817 if (PMV.ValueType == PMV.WordType) { 818 PMV.AlignedAddr = Addr; 819 PMV.AlignedAddrAlignment = AddrAlign; 820 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); 821 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); 822 return PMV; 823 } 824 825 PMV.AlignedAddrAlignment = Align(MinWordSize); 826 827 assert(ValueSize < MinWordSize); 828 829 PointerType *PtrTy = cast<PointerType>(Addr->getType()); 830 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace()); 831 Value *PtrLSB; 832 833 if (AddrAlign < MinWordSize) { 834 PMV.AlignedAddr = Builder.CreateIntrinsic( 835 Intrinsic::ptrmask, {PtrTy, IntTy}, 836 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr, 837 "AlignedAddr"); 838 839 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy); 840 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); 841 } else { 842 // If the alignment is high enough, the LSB are known 0. 843 PMV.AlignedAddr = Addr; 844 PtrLSB = ConstantInt::getNullValue(IntTy); 845 } 846 847 if (DL.isLittleEndian()) { 848 // turn bytes into bits 849 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3); 850 } else { 851 // turn bytes into bits, and count from the other side. 852 PMV.ShiftAmt = Builder.CreateShl( 853 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); 854 } 855 856 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt"); 857 PMV.Mask = Builder.CreateShl( 858 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt, 859 "Mask"); 860 861 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask"); 862 863 return PMV; 864 } 865 866 static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, 867 const PartwordMaskValues &PMV) { 868 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 869 if (PMV.WordType == PMV.ValueType) 870 return WideWord; 871 872 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted"); 873 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted"); 874 return Builder.CreateBitCast(Trunc, PMV.ValueType); 875 } 876 877 static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, 878 Value *Updated, const PartwordMaskValues &PMV) { 879 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch"); 880 assert(Updated->getType() == PMV.ValueType && "Value type mismatch"); 881 if (PMV.WordType == PMV.ValueType) 882 return Updated; 883 884 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType); 885 886 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended"); 887 Value *Shift = 888 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true); 889 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked"); 890 Value *Or = Builder.CreateOr(And, Shift, "inserted"); 891 return Or; 892 } 893 894 /// Emit IR to implement a masked version of a given atomicrmw 895 /// operation. (That is, only the bits under the Mask should be 896 /// affected by the operation) 897 static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, 898 IRBuilderBase &Builder, Value *Loaded, 899 Value *Shifted_Inc, Value *Inc, 900 const PartwordMaskValues &PMV) { 901 // TODO: update to use 902 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order 903 // to merge bits from two values without requiring PMV.Inv_Mask. 904 switch (Op) { 905 case AtomicRMWInst::Xchg: { 906 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 907 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); 908 return FinalVal; 909 } 910 case AtomicRMWInst::Or: 911 case AtomicRMWInst::Xor: 912 case AtomicRMWInst::And: 913 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW"); 914 case AtomicRMWInst::Add: 915 case AtomicRMWInst::Sub: 916 case AtomicRMWInst::Nand: { 917 // The other arithmetic ops need to be masked into place. 918 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); 919 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); 920 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); 921 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); 922 return FinalVal; 923 } 924 case AtomicRMWInst::Max: 925 case AtomicRMWInst::Min: 926 case AtomicRMWInst::UMax: 927 case AtomicRMWInst::UMin: 928 case AtomicRMWInst::FAdd: 929 case AtomicRMWInst::FSub: 930 case AtomicRMWInst::FMin: 931 case AtomicRMWInst::FMax: 932 case AtomicRMWInst::UIncWrap: 933 case AtomicRMWInst::UDecWrap: 934 case AtomicRMWInst::USubCond: 935 case AtomicRMWInst::USubSat: { 936 // Finally, other ops will operate on the full value, so truncate down to 937 // the original size, and expand out again after doing the 938 // operation. Bitcasts will be inserted for FP values. 939 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); 940 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); 941 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); 942 return FinalVal; 943 } 944 default: 945 llvm_unreachable("Unknown atomic op"); 946 } 947 } 948 949 /// Expand a sub-word atomicrmw operation into an appropriate 950 /// word-sized operation. 951 /// 952 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same 953 /// way as a typical atomicrmw expansion. The only difference here is 954 /// that the operation inside of the loop may operate upon only a 955 /// part of the value. 956 void AtomicExpandImpl::expandPartwordAtomicRMW( 957 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { 958 // Widen And/Or/Xor and give the target another chance at expanding it. 959 AtomicRMWInst::BinOp Op = AI->getOperation(); 960 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 961 Op == AtomicRMWInst::And) { 962 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI)); 963 return; 964 } 965 AtomicOrdering MemOpOrder = AI->getOrdering(); 966 SyncScope::ID SSID = AI->getSyncScopeID(); 967 968 ReplacementIRBuilder Builder(AI, *DL); 969 970 PartwordMaskValues PMV = 971 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 972 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 973 974 Value *ValOperand_Shifted = nullptr; 975 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add || 976 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) { 977 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType); 978 ValOperand_Shifted = 979 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt, 980 "ValOperand_Shifted"); 981 } 982 983 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) { 984 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted, 985 AI->getValOperand(), PMV); 986 }; 987 988 Value *OldResult; 989 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { 990 OldResult = insertRMWCmpXchgLoop( 991 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment, 992 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI); 993 } else { 994 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); 995 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, 996 PMV.AlignedAddrAlignment, MemOpOrder, 997 PerformPartwordOp); 998 } 999 1000 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 1001 AI->replaceAllUsesWith(FinalOldResult); 1002 AI->eraseFromParent(); 1003 } 1004 1005 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width. 1006 AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) { 1007 ReplacementIRBuilder Builder(AI, *DL); 1008 AtomicRMWInst::BinOp Op = AI->getOperation(); 1009 1010 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor || 1011 Op == AtomicRMWInst::And) && 1012 "Unable to widen operation"); 1013 1014 PartwordMaskValues PMV = 1015 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1016 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1017 1018 Value *ValOperand_Shifted = 1019 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), 1020 PMV.ShiftAmt, "ValOperand_Shifted"); 1021 1022 Value *NewOperand; 1023 1024 if (Op == AtomicRMWInst::And) 1025 NewOperand = 1026 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand"); 1027 else 1028 NewOperand = ValOperand_Shifted; 1029 1030 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW( 1031 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment, 1032 AI->getOrdering(), AI->getSyncScopeID()); 1033 1034 copyMetadataForAtomic(*NewAI, *AI); 1035 1036 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV); 1037 AI->replaceAllUsesWith(FinalOldResult); 1038 AI->eraseFromParent(); 1039 return NewAI; 1040 } 1041 1042 bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { 1043 // The basic idea here is that we're expanding a cmpxchg of a 1044 // smaller memory size up to a word-sized cmpxchg. To do this, we 1045 // need to add a retry-loop for strong cmpxchg, so that 1046 // modifications to other parts of the word don't cause a spurious 1047 // failure. 1048 1049 // This generates code like the following: 1050 // [[Setup mask values PMV.*]] 1051 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt 1052 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt 1053 // %InitLoaded = load i32* %addr 1054 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask 1055 // br partword.cmpxchg.loop 1056 // partword.cmpxchg.loop: 1057 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], 1058 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] 1059 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted 1060 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted 1061 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, 1062 // i32 %FullWord_NewVal success_ordering failure_ordering 1063 // %OldVal = extractvalue { i32, i1 } %NewCI, 0 1064 // %Success = extractvalue { i32, i1 } %NewCI, 1 1065 // br i1 %Success, label %partword.cmpxchg.end, 1066 // label %partword.cmpxchg.failure 1067 // partword.cmpxchg.failure: 1068 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask 1069 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut 1070 // br i1 %ShouldContinue, label %partword.cmpxchg.loop, 1071 // label %partword.cmpxchg.end 1072 // partword.cmpxchg.end: 1073 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt 1074 // %FinalOldVal = trunc i32 %tmp1 to i8 1075 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 1076 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 1077 1078 Value *Addr = CI->getPointerOperand(); 1079 Value *Cmp = CI->getCompareOperand(); 1080 Value *NewVal = CI->getNewValOperand(); 1081 1082 BasicBlock *BB = CI->getParent(); 1083 Function *F = BB->getParent(); 1084 ReplacementIRBuilder Builder(CI, *DL); 1085 LLVMContext &Ctx = Builder.getContext(); 1086 1087 BasicBlock *EndBB = 1088 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); 1089 auto FailureBB = 1090 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); 1091 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); 1092 1093 // The split call above "helpfully" added a branch at the end of BB 1094 // (to the wrong place). 1095 std::prev(BB->end())->eraseFromParent(); 1096 Builder.SetInsertPoint(BB); 1097 1098 PartwordMaskValues PMV = 1099 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1100 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1101 1102 // Shift the incoming values over, into the right location in the word. 1103 Value *NewVal_Shifted = 1104 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); 1105 Value *Cmp_Shifted = 1106 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); 1107 1108 // Load the entire current word, and mask into place the expected and new 1109 // values 1110 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); 1111 InitLoaded->setVolatile(CI->isVolatile()); 1112 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); 1113 Builder.CreateBr(LoopBB); 1114 1115 // partword.cmpxchg.loop: 1116 Builder.SetInsertPoint(LoopBB); 1117 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); 1118 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); 1119 1120 // Mask/Or the expected and new values into place in the loaded word. 1121 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); 1122 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); 1123 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( 1124 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment, 1125 CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID()); 1126 NewCI->setVolatile(CI->isVolatile()); 1127 // When we're building a strong cmpxchg, we need a loop, so you 1128 // might think we could use a weak cmpxchg inside. But, using strong 1129 // allows the below comparison for ShouldContinue, and we're 1130 // expecting the underlying cmpxchg to be a machine instruction, 1131 // which is strong anyways. 1132 NewCI->setWeak(CI->isWeak()); 1133 1134 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1135 Value *Success = Builder.CreateExtractValue(NewCI, 1); 1136 1137 if (CI->isWeak()) 1138 Builder.CreateBr(EndBB); 1139 else 1140 Builder.CreateCondBr(Success, EndBB, FailureBB); 1141 1142 // partword.cmpxchg.failure: 1143 Builder.SetInsertPoint(FailureBB); 1144 // Upon failure, verify that the masked-out part of the loaded value 1145 // has been modified. If it didn't, abort the cmpxchg, since the 1146 // masked-in part must've. 1147 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); 1148 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); 1149 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); 1150 1151 // Add the second value to the phi from above 1152 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); 1153 1154 // partword.cmpxchg.end: 1155 Builder.SetInsertPoint(CI); 1156 1157 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1158 Value *Res = PoisonValue::get(CI->getType()); 1159 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1160 Res = Builder.CreateInsertValue(Res, Success, 1); 1161 1162 CI->replaceAllUsesWith(Res); 1163 CI->eraseFromParent(); 1164 return true; 1165 } 1166 1167 void AtomicExpandImpl::expandAtomicOpToLLSC( 1168 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign, 1169 AtomicOrdering MemOpOrder, 1170 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1171 ReplacementIRBuilder Builder(I, *DL); 1172 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign, 1173 MemOpOrder, PerformOp); 1174 1175 I->replaceAllUsesWith(Loaded); 1176 I->eraseFromParent(); 1177 } 1178 1179 void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { 1180 ReplacementIRBuilder Builder(AI, *DL); 1181 1182 PartwordMaskValues PMV = 1183 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), 1184 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1185 1186 // The value operand must be sign-extended for signed min/max so that the 1187 // target's signed comparison instructions can be used. Otherwise, just 1188 // zero-ext. 1189 Instruction::CastOps CastOp = Instruction::ZExt; 1190 AtomicRMWInst::BinOp RMWOp = AI->getOperation(); 1191 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min) 1192 CastOp = Instruction::SExt; 1193 1194 Value *ValOperand_Shifted = Builder.CreateShl( 1195 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType), 1196 PMV.ShiftAmt, "ValOperand_Shifted"); 1197 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic( 1198 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt, 1199 AI->getOrdering()); 1200 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV); 1201 AI->replaceAllUsesWith(FinalOldResult); 1202 AI->eraseFromParent(); 1203 } 1204 1205 void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic( 1206 AtomicCmpXchgInst *CI) { 1207 ReplacementIRBuilder Builder(CI, *DL); 1208 1209 PartwordMaskValues PMV = createMaskInstrs( 1210 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), 1211 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1212 1213 Value *CmpVal_Shifted = Builder.CreateShl( 1214 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, 1215 "CmpVal_Shifted"); 1216 Value *NewVal_Shifted = Builder.CreateShl( 1217 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt, 1218 "NewVal_Shifted"); 1219 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic( 1220 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask, 1221 CI->getMergedOrdering()); 1222 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV); 1223 Value *Res = PoisonValue::get(CI->getType()); 1224 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); 1225 Value *Success = Builder.CreateICmpEQ( 1226 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success"); 1227 Res = Builder.CreateInsertValue(Res, Success, 1); 1228 1229 CI->replaceAllUsesWith(Res); 1230 CI->eraseFromParent(); 1231 } 1232 1233 Value *AtomicExpandImpl::insertRMWLLSCLoop( 1234 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1235 AtomicOrdering MemOpOrder, 1236 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) { 1237 LLVMContext &Ctx = Builder.getContext(); 1238 BasicBlock *BB = Builder.GetInsertBlock(); 1239 Function *F = BB->getParent(); 1240 1241 assert(AddrAlign >= 1242 F->getDataLayout().getTypeStoreSize(ResultTy) && 1243 "Expected at least natural alignment at this point."); 1244 1245 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1246 // 1247 // The standard expansion we produce is: 1248 // [...] 1249 // atomicrmw.start: 1250 // %loaded = @load.linked(%addr) 1251 // %new = some_op iN %loaded, %incr 1252 // %stored = @store_conditional(%new, %addr) 1253 // %try_again = icmp i32 ne %stored, 0 1254 // br i1 %try_again, label %loop, label %atomicrmw.end 1255 // atomicrmw.end: 1256 // [...] 1257 BasicBlock *ExitBB = 1258 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1259 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1260 1261 // The split call above "helpfully" added a branch at the end of BB (to the 1262 // wrong place). 1263 std::prev(BB->end())->eraseFromParent(); 1264 Builder.SetInsertPoint(BB); 1265 Builder.CreateBr(LoopBB); 1266 1267 // Start the main loop block now that we've taken care of the preliminaries. 1268 Builder.SetInsertPoint(LoopBB); 1269 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder); 1270 1271 Value *NewVal = PerformOp(Builder, Loaded); 1272 1273 Value *StoreSuccess = 1274 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); 1275 Value *TryAgain = Builder.CreateICmpNE( 1276 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); 1277 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); 1278 1279 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1280 return Loaded; 1281 } 1282 1283 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of 1284 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the 1285 /// IR. As a migration step, we convert back to what use to be the standard 1286 /// way to represent a pointer cmpxchg so that we can update backends one by 1287 /// one. 1288 AtomicCmpXchgInst * 1289 AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { 1290 auto *M = CI->getModule(); 1291 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), 1292 M->getDataLayout()); 1293 1294 ReplacementIRBuilder Builder(CI, *DL); 1295 1296 Value *Addr = CI->getPointerOperand(); 1297 1298 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); 1299 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); 1300 1301 auto *NewCI = Builder.CreateAtomicCmpXchg( 1302 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(), 1303 CI->getFailureOrdering(), CI->getSyncScopeID()); 1304 NewCI->setVolatile(CI->isVolatile()); 1305 NewCI->setWeak(CI->isWeak()); 1306 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); 1307 1308 Value *OldVal = Builder.CreateExtractValue(NewCI, 0); 1309 Value *Succ = Builder.CreateExtractValue(NewCI, 1); 1310 1311 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); 1312 1313 Value *Res = PoisonValue::get(CI->getType()); 1314 Res = Builder.CreateInsertValue(Res, OldVal, 0); 1315 Res = Builder.CreateInsertValue(Res, Succ, 1); 1316 1317 CI->replaceAllUsesWith(Res); 1318 CI->eraseFromParent(); 1319 return NewCI; 1320 } 1321 1322 bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1323 AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); 1324 AtomicOrdering FailureOrder = CI->getFailureOrdering(); 1325 Value *Addr = CI->getPointerOperand(); 1326 BasicBlock *BB = CI->getParent(); 1327 Function *F = BB->getParent(); 1328 LLVMContext &Ctx = F->getContext(); 1329 // If shouldInsertFencesForAtomic() returns true, then the target does not 1330 // want to deal with memory orders, and emitLeading/TrailingFence should take 1331 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we 1332 // should preserve the ordering. 1333 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); 1334 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic 1335 ? AtomicOrdering::Monotonic 1336 : CI->getMergedOrdering(); 1337 1338 // In implementations which use a barrier to achieve release semantics, we can 1339 // delay emitting this barrier until we know a store is actually going to be 1340 // attempted. The cost of this delay is that we need 2 copies of the block 1341 // emitting the load-linked, affecting code size. 1342 // 1343 // Ideally, this logic would be unconditional except for the minsize check 1344 // since in other cases the extra blocks naturally collapse down to the 1345 // minimal loop. Unfortunately, this puts too much stress on later 1346 // optimisations so we avoid emitting the extra logic in those cases too. 1347 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && 1348 SuccessOrder != AtomicOrdering::Monotonic && 1349 SuccessOrder != AtomicOrdering::Acquire && 1350 !F->hasMinSize(); 1351 1352 // There's no overhead for sinking the release barrier in a weak cmpxchg, so 1353 // do it even on minsize. 1354 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); 1355 1356 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord 1357 // 1358 // The full expansion we produce is: 1359 // [...] 1360 // %aligned.addr = ... 1361 // cmpxchg.start: 1362 // %unreleasedload = @load.linked(%aligned.addr) 1363 // %unreleasedload.extract = extract value from %unreleasedload 1364 // %should_store = icmp eq %unreleasedload.extract, %desired 1365 // br i1 %should_store, label %cmpxchg.releasingstore, 1366 // label %cmpxchg.nostore 1367 // cmpxchg.releasingstore: 1368 // fence? 1369 // br label cmpxchg.trystore 1370 // cmpxchg.trystore: 1371 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], 1372 // [%releasedload, %cmpxchg.releasedload] 1373 // %updated.new = insert %new into %loaded.trystore 1374 // %stored = @store_conditional(%updated.new, %aligned.addr) 1375 // %success = icmp eq i32 %stored, 0 1376 // br i1 %success, label %cmpxchg.success, 1377 // label %cmpxchg.releasedload/%cmpxchg.failure 1378 // cmpxchg.releasedload: 1379 // %releasedload = @load.linked(%aligned.addr) 1380 // %releasedload.extract = extract value from %releasedload 1381 // %should_store = icmp eq %releasedload.extract, %desired 1382 // br i1 %should_store, label %cmpxchg.trystore, 1383 // label %cmpxchg.failure 1384 // cmpxchg.success: 1385 // fence? 1386 // br label %cmpxchg.end 1387 // cmpxchg.nostore: 1388 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], 1389 // [%releasedload, 1390 // %cmpxchg.releasedload/%cmpxchg.trystore] 1391 // @load_linked_fail_balance()? 1392 // br label %cmpxchg.failure 1393 // cmpxchg.failure: 1394 // fence? 1395 // br label %cmpxchg.end 1396 // cmpxchg.end: 1397 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], 1398 // [%loaded.trystore, %cmpxchg.trystore] 1399 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] 1400 // %loaded = extract value from %loaded.exit 1401 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 1402 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 1403 // [...] 1404 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); 1405 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); 1406 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); 1407 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); 1408 auto ReleasedLoadBB = 1409 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); 1410 auto TryStoreBB = 1411 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); 1412 auto ReleasingStoreBB = 1413 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); 1414 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); 1415 1416 ReplacementIRBuilder Builder(CI, *DL); 1417 1418 // The split call above "helpfully" added a branch at the end of BB (to the 1419 // wrong place), but we might want a fence too. It's easiest to just remove 1420 // the branch entirely. 1421 std::prev(BB->end())->eraseFromParent(); 1422 Builder.SetInsertPoint(BB); 1423 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) 1424 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1425 1426 PartwordMaskValues PMV = 1427 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr, 1428 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8); 1429 Builder.CreateBr(StartBB); 1430 1431 // Start the main loop block now that we've taken care of the preliminaries. 1432 Builder.SetInsertPoint(StartBB); 1433 Value *UnreleasedLoad = 1434 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1435 Value *UnreleasedLoadExtract = 1436 extractMaskedValue(Builder, UnreleasedLoad, PMV); 1437 Value *ShouldStore = Builder.CreateICmpEQ( 1438 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); 1439 1440 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1441 // jump straight past that fence instruction (if it exists). 1442 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); 1443 1444 Builder.SetInsertPoint(ReleasingStoreBB); 1445 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) 1446 TLI->emitLeadingFence(Builder, CI, SuccessOrder); 1447 Builder.CreateBr(TryStoreBB); 1448 1449 Builder.SetInsertPoint(TryStoreBB); 1450 PHINode *LoadedTryStore = 1451 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); 1452 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); 1453 Value *NewValueInsert = 1454 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); 1455 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, 1456 PMV.AlignedAddr, MemOpOrder); 1457 StoreSuccess = Builder.CreateICmpEQ( 1458 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); 1459 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; 1460 Builder.CreateCondBr(StoreSuccess, SuccessBB, 1461 CI->isWeak() ? FailureBB : RetryBB); 1462 1463 Builder.SetInsertPoint(ReleasedLoadBB); 1464 Value *SecondLoad; 1465 if (HasReleasedLoadBB) { 1466 SecondLoad = 1467 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder); 1468 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV); 1469 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract, 1470 CI->getCompareOperand(), "should_store"); 1471 1472 // If the cmpxchg doesn't actually need any ordering when it fails, we can 1473 // jump straight past that fence instruction (if it exists). 1474 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); 1475 // Update PHI node in TryStoreBB. 1476 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); 1477 } else 1478 Builder.CreateUnreachable(); 1479 1480 // Make sure later instructions don't get reordered with a fence if 1481 // necessary. 1482 Builder.SetInsertPoint(SuccessBB); 1483 if (ShouldInsertFencesForAtomic || 1484 TLI->shouldInsertTrailingFenceForAtomicStore(CI)) 1485 TLI->emitTrailingFence(Builder, CI, SuccessOrder); 1486 Builder.CreateBr(ExitBB); 1487 1488 Builder.SetInsertPoint(NoStoreBB); 1489 PHINode *LoadedNoStore = 1490 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); 1491 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); 1492 if (HasReleasedLoadBB) 1493 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); 1494 1495 // In the failing case, where we don't execute the store-conditional, the 1496 // target might want to balance out the load-linked with a dedicated 1497 // instruction (e.g., on ARM, clearing the exclusive monitor). 1498 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); 1499 Builder.CreateBr(FailureBB); 1500 1501 Builder.SetInsertPoint(FailureBB); 1502 PHINode *LoadedFailure = 1503 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); 1504 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); 1505 if (CI->isWeak()) 1506 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); 1507 if (ShouldInsertFencesForAtomic) 1508 TLI->emitTrailingFence(Builder, CI, FailureOrder); 1509 Builder.CreateBr(ExitBB); 1510 1511 // Finally, we have control-flow based knowledge of whether the cmpxchg 1512 // succeeded or not. We expose this to later passes by converting any 1513 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate 1514 // PHI. 1515 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1516 PHINode *LoadedExit = 1517 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); 1518 LoadedExit->addIncoming(LoadedTryStore, SuccessBB); 1519 LoadedExit->addIncoming(LoadedFailure, FailureBB); 1520 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); 1521 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); 1522 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); 1523 1524 // This is the "exit value" from the cmpxchg expansion. It may be of 1525 // a type wider than the one in the cmpxchg instruction. 1526 Value *LoadedFull = LoadedExit; 1527 1528 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); 1529 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV); 1530 1531 // Look for any users of the cmpxchg that are just comparing the loaded value 1532 // against the desired one, and replace them with the CFG-derived version. 1533 SmallVector<ExtractValueInst *, 2> PrunedInsts; 1534 for (auto *User : CI->users()) { 1535 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); 1536 if (!EV) 1537 continue; 1538 1539 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && 1540 "weird extraction from { iN, i1 }"); 1541 1542 if (EV->getIndices()[0] == 0) 1543 EV->replaceAllUsesWith(Loaded); 1544 else 1545 EV->replaceAllUsesWith(Success); 1546 1547 PrunedInsts.push_back(EV); 1548 } 1549 1550 // We can remove the instructions now we're no longer iterating through them. 1551 for (auto *EV : PrunedInsts) 1552 EV->eraseFromParent(); 1553 1554 if (!CI->use_empty()) { 1555 // Some use of the full struct return that we don't understand has happened, 1556 // so we've got to reconstruct it properly. 1557 Value *Res; 1558 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0); 1559 Res = Builder.CreateInsertValue(Res, Success, 1); 1560 1561 CI->replaceAllUsesWith(Res); 1562 } 1563 1564 CI->eraseFromParent(); 1565 return true; 1566 } 1567 1568 bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) { 1569 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); 1570 if (!C) 1571 return false; 1572 1573 AtomicRMWInst::BinOp Op = RMWI->getOperation(); 1574 switch (Op) { 1575 case AtomicRMWInst::Add: 1576 case AtomicRMWInst::Sub: 1577 case AtomicRMWInst::Or: 1578 case AtomicRMWInst::Xor: 1579 return C->isZero(); 1580 case AtomicRMWInst::And: 1581 return C->isMinusOne(); 1582 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... 1583 default: 1584 return false; 1585 } 1586 } 1587 1588 bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { 1589 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { 1590 tryExpandAtomicLoad(ResultingLoad); 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 Value *AtomicExpandImpl::insertRMWCmpXchgLoop( 1597 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, 1598 AtomicOrdering MemOpOrder, SyncScope::ID SSID, 1599 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp, 1600 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) { 1601 LLVMContext &Ctx = Builder.getContext(); 1602 BasicBlock *BB = Builder.GetInsertBlock(); 1603 Function *F = BB->getParent(); 1604 1605 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 1606 // 1607 // The standard expansion we produce is: 1608 // [...] 1609 // %init_loaded = load atomic iN* %addr 1610 // br label %loop 1611 // loop: 1612 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 1613 // %new = some_op iN %loaded, %incr 1614 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 1615 // %new_loaded = extractvalue { iN, i1 } %pair, 0 1616 // %success = extractvalue { iN, i1 } %pair, 1 1617 // br i1 %success, label %atomicrmw.end, label %loop 1618 // atomicrmw.end: 1619 // [...] 1620 BasicBlock *ExitBB = 1621 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); 1622 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 1623 1624 // The split call above "helpfully" added a branch at the end of BB (to the 1625 // wrong place), but we want a load. It's easiest to just remove 1626 // the branch entirely. 1627 std::prev(BB->end())->eraseFromParent(); 1628 Builder.SetInsertPoint(BB); 1629 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign); 1630 Builder.CreateBr(LoopBB); 1631 1632 // Start the main loop block now that we've taken care of the preliminaries. 1633 Builder.SetInsertPoint(LoopBB); 1634 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); 1635 Loaded->addIncoming(InitLoaded, BB); 1636 1637 Value *NewVal = PerformOp(Builder, Loaded); 1638 1639 Value *NewLoaded = nullptr; 1640 Value *Success = nullptr; 1641 1642 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign, 1643 MemOpOrder == AtomicOrdering::Unordered 1644 ? AtomicOrdering::Monotonic 1645 : MemOpOrder, 1646 SSID, Success, NewLoaded, MetadataSrc); 1647 assert(Success && NewLoaded); 1648 1649 Loaded->addIncoming(NewLoaded, LoopBB); 1650 1651 Builder.CreateCondBr(Success, ExitBB, LoopBB); 1652 1653 Builder.SetInsertPoint(ExitBB, ExitBB->begin()); 1654 return NewLoaded; 1655 } 1656 1657 bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { 1658 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; 1659 unsigned ValueSize = getAtomicOpSize(CI); 1660 1661 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) { 1662 default: 1663 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg"); 1664 case TargetLoweringBase::AtomicExpansionKind::None: 1665 if (ValueSize < MinCASSize) 1666 return expandPartwordCmpXchg(CI); 1667 return false; 1668 case TargetLoweringBase::AtomicExpansionKind::LLSC: { 1669 return expandAtomicCmpXchg(CI); 1670 } 1671 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: 1672 expandAtomicCmpXchgToMaskedIntrinsic(CI); 1673 return true; 1674 case TargetLoweringBase::AtomicExpansionKind::NotAtomic: 1675 return lowerAtomicCmpXchgInst(CI); 1676 case TargetLoweringBase::AtomicExpansionKind::Expand: { 1677 TLI->emitExpandAtomicCmpXchg(CI); 1678 return true; 1679 } 1680 } 1681 } 1682 1683 // Note: This function is exposed externally by AtomicExpandUtils.h 1684 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, 1685 CreateCmpXchgInstFun CreateCmpXchg) { 1686 ReplacementIRBuilder Builder(AI, AI->getDataLayout()); 1687 Builder.setIsFPConstrained( 1688 AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); 1689 1690 // FIXME: If FP exceptions are observable, we should force them off for the 1691 // loop for the FP atomics. 1692 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop( 1693 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), 1694 AI->getOrdering(), AI->getSyncScopeID(), 1695 [&](IRBuilderBase &Builder, Value *Loaded) { 1696 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, 1697 AI->getValOperand()); 1698 }, 1699 CreateCmpXchg, /*MetadataSrc=*/AI); 1700 1701 AI->replaceAllUsesWith(Loaded); 1702 AI->eraseFromParent(); 1703 return true; 1704 } 1705 1706 // In order to use one of the sized library calls such as 1707 // __atomic_fetch_add_4, the alignment must be sufficient, the size 1708 // must be one of the potentially-specialized sizes, and the value 1709 // type must actually exist in C on the target (otherwise, the 1710 // function wouldn't actually be defined.) 1711 static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, 1712 const DataLayout &DL) { 1713 // TODO: "LargestSize" is an approximation for "largest type that 1714 // you can express in C". It seems to be the case that int128 is 1715 // supported on all 64-bit platforms, otherwise only up to 64-bit 1716 // integers are supported. If we get this wrong, then we'll try to 1717 // call a sized libcall that doesn't actually exist. There should 1718 // really be some more reliable way in LLVM of determining integer 1719 // sizes which are valid in the target's C ABI... 1720 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; 1721 return Alignment >= Size && 1722 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && 1723 Size <= LargestSize; 1724 } 1725 1726 void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) { 1727 static const RTLIB::Libcall Libcalls[6] = { 1728 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, 1729 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; 1730 unsigned Size = getAtomicOpSize(I); 1731 1732 bool expanded = expandAtomicOpToLibcall( 1733 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr, 1734 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1735 if (!expanded) 1736 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load"); 1737 } 1738 1739 void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) { 1740 static const RTLIB::Libcall Libcalls[6] = { 1741 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, 1742 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; 1743 unsigned Size = getAtomicOpSize(I); 1744 1745 bool expanded = expandAtomicOpToLibcall( 1746 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(), 1747 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1748 if (!expanded) 1749 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store"); 1750 } 1751 1752 void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { 1753 static const RTLIB::Libcall Libcalls[6] = { 1754 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, 1755 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, 1756 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; 1757 unsigned Size = getAtomicOpSize(I); 1758 1759 bool expanded = expandAtomicOpToLibcall( 1760 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(), 1761 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), 1762 Libcalls); 1763 if (!expanded) 1764 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS"); 1765 } 1766 1767 static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { 1768 static const RTLIB::Libcall LibcallsXchg[6] = { 1769 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, 1770 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, 1771 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; 1772 static const RTLIB::Libcall LibcallsAdd[6] = { 1773 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, 1774 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, 1775 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; 1776 static const RTLIB::Libcall LibcallsSub[6] = { 1777 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, 1778 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, 1779 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; 1780 static const RTLIB::Libcall LibcallsAnd[6] = { 1781 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, 1782 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, 1783 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; 1784 static const RTLIB::Libcall LibcallsOr[6] = { 1785 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, 1786 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, 1787 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; 1788 static const RTLIB::Libcall LibcallsXor[6] = { 1789 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, 1790 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, 1791 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; 1792 static const RTLIB::Libcall LibcallsNand[6] = { 1793 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, 1794 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, 1795 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; 1796 1797 switch (Op) { 1798 case AtomicRMWInst::BAD_BINOP: 1799 llvm_unreachable("Should not have BAD_BINOP."); 1800 case AtomicRMWInst::Xchg: 1801 return ArrayRef(LibcallsXchg); 1802 case AtomicRMWInst::Add: 1803 return ArrayRef(LibcallsAdd); 1804 case AtomicRMWInst::Sub: 1805 return ArrayRef(LibcallsSub); 1806 case AtomicRMWInst::And: 1807 return ArrayRef(LibcallsAnd); 1808 case AtomicRMWInst::Or: 1809 return ArrayRef(LibcallsOr); 1810 case AtomicRMWInst::Xor: 1811 return ArrayRef(LibcallsXor); 1812 case AtomicRMWInst::Nand: 1813 return ArrayRef(LibcallsNand); 1814 case AtomicRMWInst::Max: 1815 case AtomicRMWInst::Min: 1816 case AtomicRMWInst::UMax: 1817 case AtomicRMWInst::UMin: 1818 case AtomicRMWInst::FMax: 1819 case AtomicRMWInst::FMin: 1820 case AtomicRMWInst::FAdd: 1821 case AtomicRMWInst::FSub: 1822 case AtomicRMWInst::UIncWrap: 1823 case AtomicRMWInst::UDecWrap: 1824 case AtomicRMWInst::USubCond: 1825 case AtomicRMWInst::USubSat: 1826 // No atomic libcalls are available for these. 1827 return {}; 1828 } 1829 llvm_unreachable("Unexpected AtomicRMW operation."); 1830 } 1831 1832 void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) { 1833 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); 1834 1835 unsigned Size = getAtomicOpSize(I); 1836 1837 bool Success = false; 1838 if (!Libcalls.empty()) 1839 Success = expandAtomicOpToLibcall( 1840 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(), 1841 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); 1842 1843 // The expansion failed: either there were no libcalls at all for 1844 // the operation (min/max), or there were only size-specialized 1845 // libcalls (add/sub/etc) and we needed a generic. So, expand to a 1846 // CAS libcall, via a CAS loop, instead. 1847 if (!Success) { 1848 expandAtomicRMWToCmpXchg( 1849 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, 1850 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, 1851 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, 1852 Instruction *MetadataSrc) { 1853 // Create the CAS instruction normally... 1854 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( 1855 Addr, Loaded, NewVal, Alignment, MemOpOrder, 1856 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID); 1857 if (MetadataSrc) 1858 copyMetadataForAtomic(*Pair, *MetadataSrc); 1859 1860 Success = Builder.CreateExtractValue(Pair, 1, "success"); 1861 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 1862 1863 // ...and then expand the CAS into a libcall. 1864 expandAtomicCASToLibcall(Pair); 1865 }); 1866 } 1867 } 1868 1869 // A helper routine for the above expandAtomic*ToLibcall functions. 1870 // 1871 // 'Libcalls' contains an array of enum values for the particular 1872 // ATOMIC libcalls to be emitted. All of the other arguments besides 1873 // 'I' are extracted from the Instruction subclass by the 1874 // caller. Depending on the particular call, some will be null. 1875 bool AtomicExpandImpl::expandAtomicOpToLibcall( 1876 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand, 1877 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, 1878 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { 1879 assert(Libcalls.size() == 6); 1880 1881 LLVMContext &Ctx = I->getContext(); 1882 Module *M = I->getModule(); 1883 const DataLayout &DL = M->getDataLayout(); 1884 IRBuilder<> Builder(I); 1885 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); 1886 1887 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); 1888 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); 1889 1890 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy); 1891 1892 // TODO: the "order" argument type is "int", not int32. So 1893 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. 1894 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); 1895 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); 1896 Constant *OrderingVal = 1897 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); 1898 Constant *Ordering2Val = nullptr; 1899 if (CASExpected) { 1900 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); 1901 Ordering2Val = 1902 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); 1903 } 1904 bool HasResult = I->getType() != Type::getVoidTy(Ctx); 1905 1906 RTLIB::Libcall RTLibType; 1907 if (UseSizedLibcall) { 1908 switch (Size) { 1909 case 1: 1910 RTLibType = Libcalls[1]; 1911 break; 1912 case 2: 1913 RTLibType = Libcalls[2]; 1914 break; 1915 case 4: 1916 RTLibType = Libcalls[3]; 1917 break; 1918 case 8: 1919 RTLibType = Libcalls[4]; 1920 break; 1921 case 16: 1922 RTLibType = Libcalls[5]; 1923 break; 1924 } 1925 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { 1926 RTLibType = Libcalls[0]; 1927 } else { 1928 // Can't use sized function, and there's no generic for this 1929 // operation, so give up. 1930 return false; 1931 } 1932 1933 if (!TLI->getLibcallName(RTLibType)) { 1934 // This target does not implement the requested atomic libcall so give up. 1935 return false; 1936 } 1937 1938 // Build up the function call. There's two kinds. First, the sized 1939 // variants. These calls are going to be one of the following (with 1940 // N=1,2,4,8,16): 1941 // iN __atomic_load_N(iN *ptr, int ordering) 1942 // void __atomic_store_N(iN *ptr, iN val, int ordering) 1943 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) 1944 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, 1945 // int success_order, int failure_order) 1946 // 1947 // Note that these functions can be used for non-integer atomic 1948 // operations, the values just need to be bitcast to integers on the 1949 // way in and out. 1950 // 1951 // And, then, the generic variants. They look like the following: 1952 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) 1953 // void __atomic_store(size_t size, void *ptr, void *val, int ordering) 1954 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, 1955 // int ordering) 1956 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, 1957 // void *desired, int success_order, 1958 // int failure_order) 1959 // 1960 // The different signatures are built up depending on the 1961 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' 1962 // variables. 1963 1964 AllocaInst *AllocaCASExpected = nullptr; 1965 AllocaInst *AllocaValue = nullptr; 1966 AllocaInst *AllocaResult = nullptr; 1967 1968 Type *ResultTy; 1969 SmallVector<Value *, 6> Args; 1970 AttributeList Attr; 1971 1972 // 'size' argument. 1973 if (!UseSizedLibcall) { 1974 // Note, getIntPtrType is assumed equivalent to size_t. 1975 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); 1976 } 1977 1978 // 'ptr' argument. 1979 // note: This assumes all address spaces share a common libfunc 1980 // implementation and that addresses are convertable. For systems without 1981 // that property, we'd need to extend this mechanism to support AS-specific 1982 // families of atomic intrinsics. 1983 Value *PtrVal = PointerOperand; 1984 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx)); 1985 Args.push_back(PtrVal); 1986 1987 // 'expected' argument, if present. 1988 if (CASExpected) { 1989 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); 1990 AllocaCASExpected->setAlignment(AllocaAlignment); 1991 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64); 1992 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); 1993 Args.push_back(AllocaCASExpected); 1994 } 1995 1996 // 'val' argument ('desired' for cas), if present. 1997 if (ValueOperand) { 1998 if (UseSizedLibcall) { 1999 Value *IntValue = 2000 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); 2001 Args.push_back(IntValue); 2002 } else { 2003 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); 2004 AllocaValue->setAlignment(AllocaAlignment); 2005 Builder.CreateLifetimeStart(AllocaValue, SizeVal64); 2006 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); 2007 Args.push_back(AllocaValue); 2008 } 2009 } 2010 2011 // 'ret' argument. 2012 if (!CASExpected && HasResult && !UseSizedLibcall) { 2013 AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); 2014 AllocaResult->setAlignment(AllocaAlignment); 2015 Builder.CreateLifetimeStart(AllocaResult, SizeVal64); 2016 Args.push_back(AllocaResult); 2017 } 2018 2019 // 'ordering' ('success_order' for cas) argument. 2020 Args.push_back(OrderingVal); 2021 2022 // 'failure_order' argument, if present. 2023 if (Ordering2Val) 2024 Args.push_back(Ordering2Val); 2025 2026 // Now, the return type. 2027 if (CASExpected) { 2028 ResultTy = Type::getInt1Ty(Ctx); 2029 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt); 2030 } else if (HasResult && UseSizedLibcall) 2031 ResultTy = SizedIntTy; 2032 else 2033 ResultTy = Type::getVoidTy(Ctx); 2034 2035 // Done with setting up arguments and return types, create the call: 2036 SmallVector<Type *, 6> ArgTys; 2037 for (Value *Arg : Args) 2038 ArgTys.push_back(Arg->getType()); 2039 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); 2040 FunctionCallee LibcallFn = 2041 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); 2042 CallInst *Call = Builder.CreateCall(LibcallFn, Args); 2043 Call->setAttributes(Attr); 2044 Value *Result = Call; 2045 2046 // And then, extract the results... 2047 if (ValueOperand && !UseSizedLibcall) 2048 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64); 2049 2050 if (CASExpected) { 2051 // The final result from the CAS is {load of 'expected' alloca, bool result 2052 // from call} 2053 Type *FinalResultTy = I->getType(); 2054 Value *V = PoisonValue::get(FinalResultTy); 2055 Value *ExpectedOut = Builder.CreateAlignedLoad( 2056 CASExpected->getType(), AllocaCASExpected, AllocaAlignment); 2057 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64); 2058 V = Builder.CreateInsertValue(V, ExpectedOut, 0); 2059 V = Builder.CreateInsertValue(V, Result, 1); 2060 I->replaceAllUsesWith(V); 2061 } else if (HasResult) { 2062 Value *V; 2063 if (UseSizedLibcall) 2064 V = Builder.CreateBitOrPointerCast(Result, I->getType()); 2065 else { 2066 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, 2067 AllocaAlignment); 2068 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); 2069 } 2070 I->replaceAllUsesWith(V); 2071 } 2072 I->eraseFromParent(); 2073 return true; 2074 } 2075