1 //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the code for emitting atomic operations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGCall.h" 16 #include "CodeGenModule.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/CodeGen/CGFunctionInfo.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/Operator.h" 23 24 using namespace clang; 25 using namespace CodeGen; 26 27 // The ABI values for various atomic memory orderings. 28 enum AtomicOrderingKind { 29 AO_ABI_memory_order_relaxed = 0, 30 AO_ABI_memory_order_consume = 1, 31 AO_ABI_memory_order_acquire = 2, 32 AO_ABI_memory_order_release = 3, 33 AO_ABI_memory_order_acq_rel = 4, 34 AO_ABI_memory_order_seq_cst = 5 35 }; 36 37 namespace { 38 class AtomicInfo { 39 CodeGenFunction &CGF; 40 QualType AtomicTy; 41 QualType ValueTy; 42 uint64_t AtomicSizeInBits; 43 uint64_t ValueSizeInBits; 44 CharUnits AtomicAlign; 45 CharUnits ValueAlign; 46 CharUnits LValueAlign; 47 TypeEvaluationKind EvaluationKind; 48 bool UseLibcall; 49 public: 50 AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) { 51 assert(lvalue.isSimple()); 52 53 AtomicTy = lvalue.getType(); 54 ValueTy = AtomicTy->castAs<AtomicType>()->getValueType(); 55 EvaluationKind = CGF.getEvaluationKind(ValueTy); 56 57 ASTContext &C = CGF.getContext(); 58 59 uint64_t valueAlignInBits; 60 llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy); 61 62 uint64_t atomicAlignInBits; 63 llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy); 64 65 assert(ValueSizeInBits <= AtomicSizeInBits); 66 assert(valueAlignInBits <= atomicAlignInBits); 67 68 AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits); 69 ValueAlign = C.toCharUnitsFromBits(valueAlignInBits); 70 if (lvalue.getAlignment().isZero()) 71 lvalue.setAlignment(AtomicAlign); 72 73 UseLibcall = 74 (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) || 75 AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth()); 76 } 77 78 QualType getAtomicType() const { return AtomicTy; } 79 QualType getValueType() const { return ValueTy; } 80 CharUnits getAtomicAlignment() const { return AtomicAlign; } 81 CharUnits getValueAlignment() const { return ValueAlign; } 82 uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } 83 uint64_t getValueSizeInBits() const { return AtomicSizeInBits; } 84 TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } 85 bool shouldUseLibcall() const { return UseLibcall; } 86 87 /// Is the atomic size larger than the underlying value type? 88 /// 89 /// Note that the absence of padding does not mean that atomic 90 /// objects are completely interchangeable with non-atomic 91 /// objects: we might have promoted the alignment of a type 92 /// without making it bigger. 93 bool hasPadding() const { 94 return (ValueSizeInBits != AtomicSizeInBits); 95 } 96 97 bool emitMemSetZeroIfNecessary(LValue dest) const; 98 99 llvm::Value *getAtomicSizeValue() const { 100 CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits); 101 return CGF.CGM.getSize(size); 102 } 103 104 /// Cast the given pointer to an integer pointer suitable for 105 /// atomic operations. 106 llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const; 107 108 /// Turn an atomic-layout object into an r-value. 109 RValue convertTempToRValue(llvm::Value *addr, 110 AggValueSlot resultSlot, 111 SourceLocation loc) const; 112 113 /// Copy an atomic r-value into atomic-layout memory. 114 void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const; 115 116 /// Project an l-value down to the value field. 117 LValue projectValue(LValue lvalue) const { 118 llvm::Value *addr = lvalue.getAddress(); 119 if (hasPadding()) 120 addr = CGF.Builder.CreateStructGEP(addr, 0); 121 122 return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(), 123 CGF.getContext(), lvalue.getTBAAInfo()); 124 } 125 126 /// Materialize an atomic r-value in atomic-layout memory. 127 llvm::Value *materializeRValue(RValue rvalue) const; 128 129 private: 130 bool requiresMemSetZero(llvm::Type *type) const; 131 }; 132 } 133 134 static RValue emitAtomicLibcall(CodeGenFunction &CGF, 135 StringRef fnName, 136 QualType resultType, 137 CallArgList &args) { 138 const CGFunctionInfo &fnInfo = 139 CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args, 140 FunctionType::ExtInfo(), RequiredArgs::All); 141 llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo); 142 llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName); 143 return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args); 144 } 145 146 /// Does a store of the given IR type modify the full expected width? 147 static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type, 148 uint64_t expectedSize) { 149 return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize); 150 } 151 152 /// Does the atomic type require memsetting to zero before initialization? 153 /// 154 /// The IR type is provided as a way of making certain queries faster. 155 bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const { 156 // If the atomic type has size padding, we definitely need a memset. 157 if (hasPadding()) return true; 158 159 // Otherwise, do some simple heuristics to try to avoid it: 160 switch (getEvaluationKind()) { 161 // For scalars and complexes, check whether the store size of the 162 // type uses the full size. 163 case TEK_Scalar: 164 return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits); 165 case TEK_Complex: 166 return !isFullSizeType(CGF.CGM, type->getStructElementType(0), 167 AtomicSizeInBits / 2); 168 169 // Padding in structs has an undefined bit pattern. User beware. 170 case TEK_Aggregate: 171 return false; 172 } 173 llvm_unreachable("bad evaluation kind"); 174 } 175 176 bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const { 177 llvm::Value *addr = dest.getAddress(); 178 if (!requiresMemSetZero(addr->getType()->getPointerElementType())) 179 return false; 180 181 CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), 182 AtomicSizeInBits / 8, 183 dest.getAlignment().getQuantity()); 184 return true; 185 } 186 187 static void 188 EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest, 189 llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2, 190 uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) { 191 llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add; 192 llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0; 193 194 switch (E->getOp()) { 195 case AtomicExpr::AO__c11_atomic_init: 196 llvm_unreachable("Already handled!"); 197 198 case AtomicExpr::AO__c11_atomic_compare_exchange_strong: 199 case AtomicExpr::AO__c11_atomic_compare_exchange_weak: 200 case AtomicExpr::AO__atomic_compare_exchange: 201 case AtomicExpr::AO__atomic_compare_exchange_n: { 202 // Note that cmpxchg only supports specifying one ordering and 203 // doesn't support weak cmpxchg, at least at the moment. 204 llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1); 205 LoadVal1->setAlignment(Align); 206 llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2); 207 LoadVal2->setAlignment(Align); 208 llvm::AtomicCmpXchgInst *CXI = 209 CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order); 210 CXI->setVolatile(E->isVolatile()); 211 llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1); 212 StoreVal1->setAlignment(Align); 213 llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1); 214 CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType())); 215 return; 216 } 217 218 case AtomicExpr::AO__c11_atomic_load: 219 case AtomicExpr::AO__atomic_load_n: 220 case AtomicExpr::AO__atomic_load: { 221 llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr); 222 Load->setAtomic(Order); 223 Load->setAlignment(Size); 224 Load->setVolatile(E->isVolatile()); 225 llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest); 226 StoreDest->setAlignment(Align); 227 return; 228 } 229 230 case AtomicExpr::AO__c11_atomic_store: 231 case AtomicExpr::AO__atomic_store: 232 case AtomicExpr::AO__atomic_store_n: { 233 assert(!Dest && "Store does not return a value"); 234 llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1); 235 LoadVal1->setAlignment(Align); 236 llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); 237 Store->setAtomic(Order); 238 Store->setAlignment(Size); 239 Store->setVolatile(E->isVolatile()); 240 return; 241 } 242 243 case AtomicExpr::AO__c11_atomic_exchange: 244 case AtomicExpr::AO__atomic_exchange_n: 245 case AtomicExpr::AO__atomic_exchange: 246 Op = llvm::AtomicRMWInst::Xchg; 247 break; 248 249 case AtomicExpr::AO__atomic_add_fetch: 250 PostOp = llvm::Instruction::Add; 251 // Fall through. 252 case AtomicExpr::AO__c11_atomic_fetch_add: 253 case AtomicExpr::AO__atomic_fetch_add: 254 Op = llvm::AtomicRMWInst::Add; 255 break; 256 257 case AtomicExpr::AO__atomic_sub_fetch: 258 PostOp = llvm::Instruction::Sub; 259 // Fall through. 260 case AtomicExpr::AO__c11_atomic_fetch_sub: 261 case AtomicExpr::AO__atomic_fetch_sub: 262 Op = llvm::AtomicRMWInst::Sub; 263 break; 264 265 case AtomicExpr::AO__atomic_and_fetch: 266 PostOp = llvm::Instruction::And; 267 // Fall through. 268 case AtomicExpr::AO__c11_atomic_fetch_and: 269 case AtomicExpr::AO__atomic_fetch_and: 270 Op = llvm::AtomicRMWInst::And; 271 break; 272 273 case AtomicExpr::AO__atomic_or_fetch: 274 PostOp = llvm::Instruction::Or; 275 // Fall through. 276 case AtomicExpr::AO__c11_atomic_fetch_or: 277 case AtomicExpr::AO__atomic_fetch_or: 278 Op = llvm::AtomicRMWInst::Or; 279 break; 280 281 case AtomicExpr::AO__atomic_xor_fetch: 282 PostOp = llvm::Instruction::Xor; 283 // Fall through. 284 case AtomicExpr::AO__c11_atomic_fetch_xor: 285 case AtomicExpr::AO__atomic_fetch_xor: 286 Op = llvm::AtomicRMWInst::Xor; 287 break; 288 289 case AtomicExpr::AO__atomic_nand_fetch: 290 PostOp = llvm::Instruction::And; 291 // Fall through. 292 case AtomicExpr::AO__atomic_fetch_nand: 293 Op = llvm::AtomicRMWInst::Nand; 294 break; 295 } 296 297 llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1); 298 LoadVal1->setAlignment(Align); 299 llvm::AtomicRMWInst *RMWI = 300 CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order); 301 RMWI->setVolatile(E->isVolatile()); 302 303 // For __atomic_*_fetch operations, perform the operation again to 304 // determine the value which was written. 305 llvm::Value *Result = RMWI; 306 if (PostOp) 307 Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1); 308 if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch) 309 Result = CGF.Builder.CreateNot(Result); 310 llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest); 311 StoreDest->setAlignment(Align); 312 } 313 314 // This function emits any expression (scalar, complex, or aggregate) 315 // into a temporary alloca. 316 static llvm::Value * 317 EmitValToTemp(CodeGenFunction &CGF, Expr *E) { 318 llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp"); 319 CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(), 320 /*Init*/ true); 321 return DeclPtr; 322 } 323 324 static void 325 AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, 326 bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, 327 SourceLocation Loc) { 328 if (UseOptimizedLibcall) { 329 // Load value and pass it to the function directly. 330 unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity(); 331 Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy, Loc); 332 Args.add(RValue::get(Val), ValTy); 333 } else { 334 // Non-optimized functions always take a reference. 335 Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)), 336 CGF.getContext().VoidPtrTy); 337 } 338 } 339 340 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { 341 QualType AtomicTy = E->getPtr()->getType()->getPointeeType(); 342 QualType MemTy = AtomicTy; 343 if (const AtomicType *AT = AtomicTy->getAs<AtomicType>()) 344 MemTy = AT->getValueType(); 345 CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy); 346 uint64_t Size = sizeChars.getQuantity(); 347 CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy); 348 unsigned Align = alignChars.getQuantity(); 349 unsigned MaxInlineWidthInBits = 350 getTarget().getMaxAtomicInlineWidth(); 351 bool UseLibcall = (Size != Align || 352 getContext().toBits(sizeChars) > MaxInlineWidthInBits); 353 354 llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0; 355 Ptr = EmitScalarExpr(E->getPtr()); 356 357 if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { 358 assert(!Dest && "Init does not return a value"); 359 LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext()); 360 EmitAtomicInit(E->getVal1(), lvalue); 361 return RValue::get(0); 362 } 363 364 Order = EmitScalarExpr(E->getOrder()); 365 366 switch (E->getOp()) { 367 case AtomicExpr::AO__c11_atomic_init: 368 llvm_unreachable("Already handled!"); 369 370 case AtomicExpr::AO__c11_atomic_load: 371 case AtomicExpr::AO__atomic_load_n: 372 break; 373 374 case AtomicExpr::AO__atomic_load: 375 Dest = EmitScalarExpr(E->getVal1()); 376 break; 377 378 case AtomicExpr::AO__atomic_store: 379 Val1 = EmitScalarExpr(E->getVal1()); 380 break; 381 382 case AtomicExpr::AO__atomic_exchange: 383 Val1 = EmitScalarExpr(E->getVal1()); 384 Dest = EmitScalarExpr(E->getVal2()); 385 break; 386 387 case AtomicExpr::AO__c11_atomic_compare_exchange_strong: 388 case AtomicExpr::AO__c11_atomic_compare_exchange_weak: 389 case AtomicExpr::AO__atomic_compare_exchange_n: 390 case AtomicExpr::AO__atomic_compare_exchange: 391 Val1 = EmitScalarExpr(E->getVal1()); 392 if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange) 393 Val2 = EmitScalarExpr(E->getVal2()); 394 else 395 Val2 = EmitValToTemp(*this, E->getVal2()); 396 OrderFail = EmitScalarExpr(E->getOrderFail()); 397 // Evaluate and discard the 'weak' argument. 398 if (E->getNumSubExprs() == 6) 399 EmitScalarExpr(E->getWeak()); 400 break; 401 402 case AtomicExpr::AO__c11_atomic_fetch_add: 403 case AtomicExpr::AO__c11_atomic_fetch_sub: 404 if (MemTy->isPointerType()) { 405 // For pointer arithmetic, we're required to do a bit of math: 406 // adding 1 to an int* is not the same as adding 1 to a uintptr_t. 407 // ... but only for the C11 builtins. The GNU builtins expect the 408 // user to multiply by sizeof(T). 409 QualType Val1Ty = E->getVal1()->getType(); 410 llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1()); 411 CharUnits PointeeIncAmt = 412 getContext().getTypeSizeInChars(MemTy->getPointeeType()); 413 Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt)); 414 Val1 = CreateMemTemp(Val1Ty, ".atomictmp"); 415 EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty)); 416 break; 417 } 418 // Fall through. 419 case AtomicExpr::AO__atomic_fetch_add: 420 case AtomicExpr::AO__atomic_fetch_sub: 421 case AtomicExpr::AO__atomic_add_fetch: 422 case AtomicExpr::AO__atomic_sub_fetch: 423 case AtomicExpr::AO__c11_atomic_store: 424 case AtomicExpr::AO__c11_atomic_exchange: 425 case AtomicExpr::AO__atomic_store_n: 426 case AtomicExpr::AO__atomic_exchange_n: 427 case AtomicExpr::AO__c11_atomic_fetch_and: 428 case AtomicExpr::AO__c11_atomic_fetch_or: 429 case AtomicExpr::AO__c11_atomic_fetch_xor: 430 case AtomicExpr::AO__atomic_fetch_and: 431 case AtomicExpr::AO__atomic_fetch_or: 432 case AtomicExpr::AO__atomic_fetch_xor: 433 case AtomicExpr::AO__atomic_fetch_nand: 434 case AtomicExpr::AO__atomic_and_fetch: 435 case AtomicExpr::AO__atomic_or_fetch: 436 case AtomicExpr::AO__atomic_xor_fetch: 437 case AtomicExpr::AO__atomic_nand_fetch: 438 Val1 = EmitValToTemp(*this, E->getVal1()); 439 break; 440 } 441 442 if (!E->getType()->isVoidType() && !Dest) 443 Dest = CreateMemTemp(E->getType(), ".atomicdst"); 444 445 // Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary . 446 if (UseLibcall) { 447 bool UseOptimizedLibcall = false; 448 switch (E->getOp()) { 449 case AtomicExpr::AO__c11_atomic_fetch_add: 450 case AtomicExpr::AO__atomic_fetch_add: 451 case AtomicExpr::AO__c11_atomic_fetch_and: 452 case AtomicExpr::AO__atomic_fetch_and: 453 case AtomicExpr::AO__c11_atomic_fetch_or: 454 case AtomicExpr::AO__atomic_fetch_or: 455 case AtomicExpr::AO__c11_atomic_fetch_sub: 456 case AtomicExpr::AO__atomic_fetch_sub: 457 case AtomicExpr::AO__c11_atomic_fetch_xor: 458 case AtomicExpr::AO__atomic_fetch_xor: 459 // For these, only library calls for certain sizes exist. 460 UseOptimizedLibcall = true; 461 break; 462 default: 463 // Only use optimized library calls for sizes for which they exist. 464 if (Size == 1 || Size == 2 || Size == 4 || Size == 8) 465 UseOptimizedLibcall = true; 466 break; 467 } 468 469 CallArgList Args; 470 if (!UseOptimizedLibcall) { 471 // For non-optimized library calls, the size is the first parameter 472 Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)), 473 getContext().getSizeType()); 474 } 475 // Atomic address is the first or second parameter 476 Args.add(RValue::get(EmitCastToVoidPtr(Ptr)), getContext().VoidPtrTy); 477 478 std::string LibCallName; 479 QualType RetTy; 480 bool HaveRetTy = false; 481 switch (E->getOp()) { 482 // There is only one libcall for compare an exchange, because there is no 483 // optimisation benefit possible from a libcall version of a weak compare 484 // and exchange. 485 // bool __atomic_compare_exchange(size_t size, void *mem, void *expected, 486 // void *desired, int success, int failure) 487 // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired, 488 // int success, int failure) 489 case AtomicExpr::AO__c11_atomic_compare_exchange_weak: 490 case AtomicExpr::AO__c11_atomic_compare_exchange_strong: 491 case AtomicExpr::AO__atomic_compare_exchange: 492 case AtomicExpr::AO__atomic_compare_exchange_n: 493 LibCallName = "__atomic_compare_exchange"; 494 RetTy = getContext().BoolTy; 495 HaveRetTy = true; 496 Args.add(RValue::get(EmitCastToVoidPtr(Val1)), getContext().VoidPtrTy); 497 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy, 498 E->getExprLoc()); 499 Args.add(RValue::get(Order), getContext().IntTy); 500 Order = OrderFail; 501 break; 502 // void __atomic_exchange(size_t size, void *mem, void *val, void *return, 503 // int order) 504 // T __atomic_exchange_N(T *mem, T val, int order) 505 case AtomicExpr::AO__c11_atomic_exchange: 506 case AtomicExpr::AO__atomic_exchange_n: 507 case AtomicExpr::AO__atomic_exchange: 508 LibCallName = "__atomic_exchange"; 509 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 510 E->getExprLoc()); 511 break; 512 // void __atomic_store(size_t size, void *mem, void *val, int order) 513 // void __atomic_store_N(T *mem, T val, int order) 514 case AtomicExpr::AO__c11_atomic_store: 515 case AtomicExpr::AO__atomic_store: 516 case AtomicExpr::AO__atomic_store_n: 517 LibCallName = "__atomic_store"; 518 RetTy = getContext().VoidTy; 519 HaveRetTy = true; 520 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 521 E->getExprLoc()); 522 break; 523 // void __atomic_load(size_t size, void *mem, void *return, int order) 524 // T __atomic_load_N(T *mem, int order) 525 case AtomicExpr::AO__c11_atomic_load: 526 case AtomicExpr::AO__atomic_load: 527 case AtomicExpr::AO__atomic_load_n: 528 LibCallName = "__atomic_load"; 529 break; 530 // T __atomic_fetch_add_N(T *mem, T val, int order) 531 case AtomicExpr::AO__c11_atomic_fetch_add: 532 case AtomicExpr::AO__atomic_fetch_add: 533 LibCallName = "__atomic_fetch_add"; 534 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 535 E->getExprLoc()); 536 break; 537 // T __atomic_fetch_and_N(T *mem, T val, int order) 538 case AtomicExpr::AO__c11_atomic_fetch_and: 539 case AtomicExpr::AO__atomic_fetch_and: 540 LibCallName = "__atomic_fetch_and"; 541 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 542 E->getExprLoc()); 543 break; 544 // T __atomic_fetch_or_N(T *mem, T val, int order) 545 case AtomicExpr::AO__c11_atomic_fetch_or: 546 case AtomicExpr::AO__atomic_fetch_or: 547 LibCallName = "__atomic_fetch_or"; 548 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 549 E->getExprLoc()); 550 break; 551 // T __atomic_fetch_sub_N(T *mem, T val, int order) 552 case AtomicExpr::AO__c11_atomic_fetch_sub: 553 case AtomicExpr::AO__atomic_fetch_sub: 554 LibCallName = "__atomic_fetch_sub"; 555 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 556 E->getExprLoc()); 557 break; 558 // T __atomic_fetch_xor_N(T *mem, T val, int order) 559 case AtomicExpr::AO__c11_atomic_fetch_xor: 560 case AtomicExpr::AO__atomic_fetch_xor: 561 LibCallName = "__atomic_fetch_xor"; 562 AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, 563 E->getExprLoc()); 564 break; 565 default: return EmitUnsupportedRValue(E, "atomic library call"); 566 } 567 568 // Optimized functions have the size in their name. 569 if (UseOptimizedLibcall) 570 LibCallName += "_" + llvm::utostr(Size); 571 // By default, assume we return a value of the atomic type. 572 if (!HaveRetTy) { 573 if (UseOptimizedLibcall) { 574 // Value is returned directly. 575 RetTy = MemTy; 576 } else { 577 // Value is returned through parameter before the order. 578 RetTy = getContext().VoidTy; 579 Args.add(RValue::get(EmitCastToVoidPtr(Dest)), 580 getContext().VoidPtrTy); 581 } 582 } 583 // order is always the last parameter 584 Args.add(RValue::get(Order), 585 getContext().IntTy); 586 587 const CGFunctionInfo &FuncInfo = 588 CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args, 589 FunctionType::ExtInfo(), RequiredArgs::All); 590 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 591 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 592 RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 593 if (!RetTy->isVoidType()) 594 return Res; 595 if (E->getType()->isVoidType()) 596 return RValue::get(0); 597 return convertTempToRValue(Dest, E->getType(), E->getExprLoc()); 598 } 599 600 bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || 601 E->getOp() == AtomicExpr::AO__atomic_store || 602 E->getOp() == AtomicExpr::AO__atomic_store_n; 603 bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load || 604 E->getOp() == AtomicExpr::AO__atomic_load || 605 E->getOp() == AtomicExpr::AO__atomic_load_n; 606 607 llvm::Type *IPtrTy = 608 llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo(); 609 llvm::Value *OrigDest = Dest; 610 Ptr = Builder.CreateBitCast(Ptr, IPtrTy); 611 if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy); 612 if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy); 613 if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy); 614 615 if (isa<llvm::ConstantInt>(Order)) { 616 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 617 switch (ord) { 618 case AO_ABI_memory_order_relaxed: 619 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 620 llvm::Monotonic); 621 break; 622 case AO_ABI_memory_order_consume: 623 case AO_ABI_memory_order_acquire: 624 if (IsStore) 625 break; // Avoid crashing on code with undefined behavior 626 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 627 llvm::Acquire); 628 break; 629 case AO_ABI_memory_order_release: 630 if (IsLoad) 631 break; // Avoid crashing on code with undefined behavior 632 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 633 llvm::Release); 634 break; 635 case AO_ABI_memory_order_acq_rel: 636 if (IsLoad || IsStore) 637 break; // Avoid crashing on code with undefined behavior 638 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 639 llvm::AcquireRelease); 640 break; 641 case AO_ABI_memory_order_seq_cst: 642 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 643 llvm::SequentiallyConsistent); 644 break; 645 default: // invalid order 646 // We should not ever get here normally, but it's hard to 647 // enforce that in general. 648 break; 649 } 650 if (E->getType()->isVoidType()) 651 return RValue::get(0); 652 return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc()); 653 } 654 655 // Long case, when Order isn't obviously constant. 656 657 // Create all the relevant BB's 658 llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0, 659 *AcqRelBB = 0, *SeqCstBB = 0; 660 MonotonicBB = createBasicBlock("monotonic", CurFn); 661 if (!IsStore) 662 AcquireBB = createBasicBlock("acquire", CurFn); 663 if (!IsLoad) 664 ReleaseBB = createBasicBlock("release", CurFn); 665 if (!IsLoad && !IsStore) 666 AcqRelBB = createBasicBlock("acqrel", CurFn); 667 SeqCstBB = createBasicBlock("seqcst", CurFn); 668 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 669 670 // Create the switch for the split 671 // MonotonicBB is arbitrarily chosen as the default case; in practice, this 672 // doesn't matter unless someone is crazy enough to use something that 673 // doesn't fold to a constant for the ordering. 674 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 675 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB); 676 677 // Emit all the different atomics 678 Builder.SetInsertPoint(MonotonicBB); 679 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 680 llvm::Monotonic); 681 Builder.CreateBr(ContBB); 682 if (!IsStore) { 683 Builder.SetInsertPoint(AcquireBB); 684 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 685 llvm::Acquire); 686 Builder.CreateBr(ContBB); 687 SI->addCase(Builder.getInt32(1), AcquireBB); 688 SI->addCase(Builder.getInt32(2), AcquireBB); 689 } 690 if (!IsLoad) { 691 Builder.SetInsertPoint(ReleaseBB); 692 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 693 llvm::Release); 694 Builder.CreateBr(ContBB); 695 SI->addCase(Builder.getInt32(3), ReleaseBB); 696 } 697 if (!IsLoad && !IsStore) { 698 Builder.SetInsertPoint(AcqRelBB); 699 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 700 llvm::AcquireRelease); 701 Builder.CreateBr(ContBB); 702 SI->addCase(Builder.getInt32(4), AcqRelBB); 703 } 704 Builder.SetInsertPoint(SeqCstBB); 705 EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align, 706 llvm::SequentiallyConsistent); 707 Builder.CreateBr(ContBB); 708 SI->addCase(Builder.getInt32(5), SeqCstBB); 709 710 // Cleanup and return 711 Builder.SetInsertPoint(ContBB); 712 if (E->getType()->isVoidType()) 713 return RValue::get(0); 714 return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc()); 715 } 716 717 llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const { 718 unsigned addrspace = 719 cast<llvm::PointerType>(addr->getType())->getAddressSpace(); 720 llvm::IntegerType *ty = 721 llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits); 722 return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace)); 723 } 724 725 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr, 726 AggValueSlot resultSlot, 727 SourceLocation loc) const { 728 if (EvaluationKind == TEK_Aggregate) 729 return resultSlot.asRValue(); 730 731 // Drill into the padding structure if we have one. 732 if (hasPadding()) 733 addr = CGF.Builder.CreateStructGEP(addr, 0); 734 735 // Otherwise, just convert the temporary to an r-value using the 736 // normal conversion routine. 737 return CGF.convertTempToRValue(addr, getValueType(), loc); 738 } 739 740 /// Emit a load from an l-value of atomic type. Note that the r-value 741 /// we produce is an r-value of the atomic *value* type. 742 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc, 743 AggValueSlot resultSlot) { 744 AtomicInfo atomics(*this, src); 745 746 // Check whether we should use a library call. 747 if (atomics.shouldUseLibcall()) { 748 llvm::Value *tempAddr; 749 if (!resultSlot.isIgnored()) { 750 assert(atomics.getEvaluationKind() == TEK_Aggregate); 751 tempAddr = resultSlot.getAddr(); 752 } else { 753 tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); 754 } 755 756 // void __atomic_load(size_t size, void *mem, void *return, int order); 757 CallArgList args; 758 args.add(RValue::get(atomics.getAtomicSizeValue()), 759 getContext().getSizeType()); 760 args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), 761 getContext().VoidPtrTy); 762 args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), 763 getContext().VoidPtrTy); 764 args.add(RValue::get(llvm::ConstantInt::get(IntTy, 765 AO_ABI_memory_order_seq_cst)), 766 getContext().IntTy); 767 emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); 768 769 // Produce the r-value. 770 return atomics.convertTempToRValue(tempAddr, resultSlot, loc); 771 } 772 773 // Okay, we're doing this natively. 774 llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress()); 775 llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load"); 776 load->setAtomic(llvm::SequentiallyConsistent); 777 778 // Other decoration. 779 load->setAlignment(src.getAlignment().getQuantity()); 780 if (src.isVolatileQualified()) 781 load->setVolatile(true); 782 if (src.getTBAAInfo()) 783 CGM.DecorateInstruction(load, src.getTBAAInfo()); 784 785 // Okay, turn that back into the original value type. 786 QualType valueType = atomics.getValueType(); 787 llvm::Value *result = load; 788 789 // If we're ignoring an aggregate return, don't do anything. 790 if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored()) 791 return RValue::getAggregate(0, false); 792 793 // The easiest way to do this this is to go through memory, but we 794 // try not to in some easy cases. 795 if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) { 796 llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType); 797 if (isa<llvm::IntegerType>(resultTy)) { 798 assert(result->getType() == resultTy); 799 result = EmitFromMemory(result, valueType); 800 } else if (isa<llvm::PointerType>(resultTy)) { 801 result = Builder.CreateIntToPtr(result, resultTy); 802 } else { 803 result = Builder.CreateBitCast(result, resultTy); 804 } 805 return RValue::get(result); 806 } 807 808 // Create a temporary. This needs to be big enough to hold the 809 // atomic integer. 810 llvm::Value *temp; 811 bool tempIsVolatile = false; 812 CharUnits tempAlignment; 813 if (atomics.getEvaluationKind() == TEK_Aggregate) { 814 assert(!resultSlot.isIgnored()); 815 temp = resultSlot.getAddr(); 816 tempAlignment = atomics.getValueAlignment(); 817 tempIsVolatile = resultSlot.isVolatile(); 818 } else { 819 temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); 820 tempAlignment = atomics.getAtomicAlignment(); 821 } 822 823 // Slam the integer into the temporary. 824 llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp); 825 Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity()) 826 ->setVolatile(tempIsVolatile); 827 828 return atomics.convertTempToRValue(temp, resultSlot, loc); 829 } 830 831 832 833 /// Copy an r-value into memory as part of storing to an atomic type. 834 /// This needs to create a bit-pattern suitable for atomic operations. 835 void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const { 836 // If we have an r-value, the rvalue should be of the atomic type, 837 // which means that the caller is responsible for having zeroed 838 // any padding. Just do an aggregate copy of that type. 839 if (rvalue.isAggregate()) { 840 CGF.EmitAggregateCopy(dest.getAddress(), 841 rvalue.getAggregateAddr(), 842 getAtomicType(), 843 (rvalue.isVolatileQualified() 844 || dest.isVolatileQualified()), 845 dest.getAlignment()); 846 return; 847 } 848 849 // Okay, otherwise we're copying stuff. 850 851 // Zero out the buffer if necessary. 852 emitMemSetZeroIfNecessary(dest); 853 854 // Drill past the padding if present. 855 dest = projectValue(dest); 856 857 // Okay, store the rvalue in. 858 if (rvalue.isScalar()) { 859 CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true); 860 } else { 861 CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true); 862 } 863 } 864 865 866 /// Materialize an r-value into memory for the purposes of storing it 867 /// to an atomic type. 868 llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const { 869 // Aggregate r-values are already in memory, and EmitAtomicStore 870 // requires them to be values of the atomic type. 871 if (rvalue.isAggregate()) 872 return rvalue.getAggregateAddr(); 873 874 // Otherwise, make a temporary and materialize into it. 875 llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp"); 876 LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment()); 877 emitCopyIntoMemory(rvalue, tempLV); 878 return temp; 879 } 880 881 /// Emit a store to an l-value of atomic type. 882 /// 883 /// Note that the r-value is expected to be an r-value *of the atomic 884 /// type*; this means that for aggregate r-values, it should include 885 /// storage for any padding that was necessary. 886 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, bool isInit) { 887 // If this is an aggregate r-value, it should agree in type except 888 // maybe for address-space qualification. 889 assert(!rvalue.isAggregate() || 890 rvalue.getAggregateAddr()->getType()->getPointerElementType() 891 == dest.getAddress()->getType()->getPointerElementType()); 892 893 AtomicInfo atomics(*this, dest); 894 895 // If this is an initialization, just put the value there normally. 896 if (isInit) { 897 atomics.emitCopyIntoMemory(rvalue, dest); 898 return; 899 } 900 901 // Check whether we should use a library call. 902 if (atomics.shouldUseLibcall()) { 903 // Produce a source address. 904 llvm::Value *srcAddr = atomics.materializeRValue(rvalue); 905 906 // void __atomic_store(size_t size, void *mem, void *val, int order) 907 CallArgList args; 908 args.add(RValue::get(atomics.getAtomicSizeValue()), 909 getContext().getSizeType()); 910 args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())), 911 getContext().VoidPtrTy); 912 args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), 913 getContext().VoidPtrTy); 914 args.add(RValue::get(llvm::ConstantInt::get(IntTy, 915 AO_ABI_memory_order_seq_cst)), 916 getContext().IntTy); 917 emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); 918 return; 919 } 920 921 // Okay, we're doing this natively. 922 llvm::Value *intValue; 923 924 // If we've got a scalar value of the right size, try to avoid going 925 // through memory. 926 if (rvalue.isScalar() && !atomics.hasPadding()) { 927 llvm::Value *value = rvalue.getScalarVal(); 928 if (isa<llvm::IntegerType>(value->getType())) { 929 intValue = value; 930 } else { 931 llvm::IntegerType *inputIntTy = 932 llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits()); 933 if (isa<llvm::PointerType>(value->getType())) { 934 intValue = Builder.CreatePtrToInt(value, inputIntTy); 935 } else { 936 intValue = Builder.CreateBitCast(value, inputIntTy); 937 } 938 } 939 940 // Otherwise, we need to go through memory. 941 } else { 942 // Put the r-value in memory. 943 llvm::Value *addr = atomics.materializeRValue(rvalue); 944 945 // Cast the temporary to the atomic int type and pull a value out. 946 addr = atomics.emitCastToAtomicIntPointer(addr); 947 intValue = Builder.CreateAlignedLoad(addr, 948 atomics.getAtomicAlignment().getQuantity()); 949 } 950 951 // Do the atomic store. 952 llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress()); 953 llvm::StoreInst *store = Builder.CreateStore(intValue, addr); 954 955 // Initializations don't need to be atomic. 956 if (!isInit) store->setAtomic(llvm::SequentiallyConsistent); 957 958 // Other decoration. 959 store->setAlignment(dest.getAlignment().getQuantity()); 960 if (dest.isVolatileQualified()) 961 store->setVolatile(true); 962 if (dest.getTBAAInfo()) 963 CGM.DecorateInstruction(store, dest.getTBAAInfo()); 964 } 965 966 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { 967 AtomicInfo atomics(*this, dest); 968 969 switch (atomics.getEvaluationKind()) { 970 case TEK_Scalar: { 971 llvm::Value *value = EmitScalarExpr(init); 972 atomics.emitCopyIntoMemory(RValue::get(value), dest); 973 return; 974 } 975 976 case TEK_Complex: { 977 ComplexPairTy value = EmitComplexExpr(init); 978 atomics.emitCopyIntoMemory(RValue::getComplex(value), dest); 979 return; 980 } 981 982 case TEK_Aggregate: { 983 // Fix up the destination if the initializer isn't an expression 984 // of atomic type. 985 bool Zeroed = false; 986 if (!init->getType()->isAtomicType()) { 987 Zeroed = atomics.emitMemSetZeroIfNecessary(dest); 988 dest = atomics.projectValue(dest); 989 } 990 991 // Evaluate the expression directly into the destination. 992 AggValueSlot slot = AggValueSlot::forLValue(dest, 993 AggValueSlot::IsNotDestructed, 994 AggValueSlot::DoesNotNeedGCBarriers, 995 AggValueSlot::IsNotAliased, 996 Zeroed ? AggValueSlot::IsZeroed : 997 AggValueSlot::IsNotZeroed); 998 999 EmitAggExpr(init, slot); 1000 return; 1001 } 1002 } 1003 llvm_unreachable("bad evaluation kind"); 1004 } 1005