1 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass lowers type metadata and calls to the llvm.type.test intrinsic. 10 // It also ensures that globals are properly laid out for the 11 // llvm.icall.branch.funnel intrinsic. 12 // See http://llvm.org/docs/TypeMetadata.html for more information. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/IPO/LowerTypeTests.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/EquivalenceClasses.h" 21 #include "llvm/ADT/PointerUnion.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/TinyPtrVector.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/TypeMetadataUtils.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/IR/Attributes.h" 31 #include "llvm/IR/BasicBlock.h" 32 #include "llvm/IR/Constant.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/GlobalAlias.h" 38 #include "llvm/IR/GlobalObject.h" 39 #include "llvm/IR/GlobalValue.h" 40 #include "llvm/IR/GlobalVariable.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InlineAsm.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/IR/LLVMContext.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/IR/Module.h" 50 #include "llvm/IR/ModuleSummaryIndex.h" 51 #include "llvm/IR/ModuleSummaryIndexYAML.h" 52 #include "llvm/IR/Operator.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/ReplaceConstant.h" 55 #include "llvm/IR/Type.h" 56 #include "llvm/IR/Use.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/Support/Allocator.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/Error.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/FileSystem.h" 66 #include "llvm/Support/MathExtras.h" 67 #include "llvm/Support/MemoryBuffer.h" 68 #include "llvm/Support/TrailingObjects.h" 69 #include "llvm/Support/YAMLTraits.h" 70 #include "llvm/Support/raw_ostream.h" 71 #include "llvm/TargetParser/Triple.h" 72 #include "llvm/Transforms/IPO.h" 73 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 74 #include "llvm/Transforms/Utils/ModuleUtils.h" 75 #include <algorithm> 76 #include <cassert> 77 #include <cstdint> 78 #include <memory> 79 #include <set> 80 #include <string> 81 #include <system_error> 82 #include <utility> 83 #include <vector> 84 85 using namespace llvm; 86 using namespace lowertypetests; 87 88 #define DEBUG_TYPE "lowertypetests" 89 90 STATISTIC(ByteArraySizeBits, "Byte array size in bits"); 91 STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); 92 STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); 93 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered"); 94 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); 95 96 static cl::opt<bool> AvoidReuse( 97 "lowertypetests-avoid-reuse", 98 cl::desc("Try to avoid reuse of byte array addresses using aliases"), 99 cl::Hidden, cl::init(true)); 100 101 static cl::opt<PassSummaryAction> ClSummaryAction( 102 "lowertypetests-summary-action", 103 cl::desc("What to do with the summary when running this pass"), 104 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), 105 clEnumValN(PassSummaryAction::Import, "import", 106 "Import typeid resolutions from summary and globals"), 107 clEnumValN(PassSummaryAction::Export, "export", 108 "Export typeid resolutions to summary and globals")), 109 cl::Hidden); 110 111 static cl::opt<std::string> ClReadSummary( 112 "lowertypetests-read-summary", 113 cl::desc("Read summary from given YAML file before running pass"), 114 cl::Hidden); 115 116 static cl::opt<std::string> ClWriteSummary( 117 "lowertypetests-write-summary", 118 cl::desc("Write summary to given YAML file after running pass"), 119 cl::Hidden); 120 121 static cl::opt<DropTestKind> 122 ClDropTypeTests("lowertypetests-drop-type-tests", 123 cl::desc("Simply drop type test sequences"), 124 cl::values(clEnumValN(DropTestKind::None, "none", 125 "Do not drop any type tests"), 126 clEnumValN(DropTestKind::Assume, "assume", 127 "Drop type test assume sequences"), 128 clEnumValN(DropTestKind::All, "all", 129 "Drop all type test sequences")), 130 cl::Hidden, cl::init(DropTestKind::None)); 131 132 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { 133 if (Offset < ByteOffset) 134 return false; 135 136 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) 137 return false; 138 139 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; 140 if (BitOffset >= BitSize) 141 return false; 142 143 return Bits.count(BitOffset); 144 } 145 146 void BitSetInfo::print(raw_ostream &OS) const { 147 OS << "offset " << ByteOffset << " size " << BitSize << " align " 148 << (1 << AlignLog2); 149 150 if (isAllOnes()) { 151 OS << " all-ones\n"; 152 return; 153 } 154 155 OS << " { "; 156 for (uint64_t B : Bits) 157 OS << B << ' '; 158 OS << "}\n"; 159 } 160 161 BitSetInfo BitSetBuilder::build() { 162 if (Min > Max) 163 Min = 0; 164 165 // Normalize each offset against the minimum observed offset, and compute 166 // the bitwise OR of each of the offsets. The number of trailing zeros 167 // in the mask gives us the log2 of the alignment of all offsets, which 168 // allows us to compress the bitset by only storing one bit per aligned 169 // address. 170 uint64_t Mask = 0; 171 for (uint64_t &Offset : Offsets) { 172 Offset -= Min; 173 Mask |= Offset; 174 } 175 176 BitSetInfo BSI; 177 BSI.ByteOffset = Min; 178 179 BSI.AlignLog2 = 0; 180 if (Mask != 0) 181 BSI.AlignLog2 = llvm::countr_zero(Mask); 182 183 // Build the compressed bitset while normalizing the offsets against the 184 // computed alignment. 185 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; 186 for (uint64_t Offset : Offsets) { 187 Offset >>= BSI.AlignLog2; 188 BSI.Bits.insert(Offset); 189 } 190 191 return BSI; 192 } 193 194 void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { 195 // Create a new fragment to hold the layout for F. 196 Fragments.emplace_back(); 197 std::vector<uint64_t> &Fragment = Fragments.back(); 198 uint64_t FragmentIndex = Fragments.size() - 1; 199 200 for (auto ObjIndex : F) { 201 uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; 202 if (OldFragmentIndex == 0) { 203 // We haven't seen this object index before, so just add it to the current 204 // fragment. 205 Fragment.push_back(ObjIndex); 206 } else { 207 // This index belongs to an existing fragment. Copy the elements of the 208 // old fragment into this one and clear the old fragment. We don't update 209 // the fragment map just yet, this ensures that any further references to 210 // indices from the old fragment in this fragment do not insert any more 211 // indices. 212 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; 213 llvm::append_range(Fragment, OldFragment); 214 OldFragment.clear(); 215 } 216 } 217 218 // Update the fragment map to point our object indices to this fragment. 219 for (uint64_t ObjIndex : Fragment) 220 FragmentMap[ObjIndex] = FragmentIndex; 221 } 222 223 void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, 224 uint64_t BitSize, uint64_t &AllocByteOffset, 225 uint8_t &AllocMask) { 226 // Find the smallest current allocation. 227 unsigned Bit = 0; 228 for (unsigned I = 1; I != BitsPerByte; ++I) 229 if (BitAllocs[I] < BitAllocs[Bit]) 230 Bit = I; 231 232 AllocByteOffset = BitAllocs[Bit]; 233 234 // Add our size to it. 235 unsigned ReqSize = AllocByteOffset + BitSize; 236 BitAllocs[Bit] = ReqSize; 237 if (Bytes.size() < ReqSize) 238 Bytes.resize(ReqSize); 239 240 // Set our bits. 241 AllocMask = 1 << Bit; 242 for (uint64_t B : Bits) 243 Bytes[AllocByteOffset + B] |= AllocMask; 244 } 245 246 bool lowertypetests::isJumpTableCanonical(Function *F) { 247 if (F->isDeclarationForLinker()) 248 return false; 249 auto *CI = mdconst::extract_or_null<ConstantInt>( 250 F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); 251 if (!CI || !CI->isZero()) 252 return true; 253 return F->hasFnAttribute("cfi-canonical-jump-table"); 254 } 255 256 namespace { 257 258 struct ByteArrayInfo { 259 std::set<uint64_t> Bits; 260 uint64_t BitSize; 261 GlobalVariable *ByteArray; 262 GlobalVariable *MaskGlobal; 263 uint8_t *MaskPtr = nullptr; 264 }; 265 266 /// A POD-like structure that we use to store a global reference together with 267 /// its metadata types. In this pass we frequently need to query the set of 268 /// metadata types referenced by a global, which at the IR level is an expensive 269 /// operation involving a map lookup; this data structure helps to reduce the 270 /// number of times we need to do this lookup. 271 class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { 272 friend TrailingObjects; 273 274 GlobalObject *GO; 275 size_t NTypes; 276 277 // For functions: true if the jump table is canonical. This essentially means 278 // whether the canonical address (i.e. the symbol table entry) of the function 279 // is provided by the local jump table. This is normally the same as whether 280 // the function is defined locally, but if canonical jump tables are disabled 281 // by the user then the jump table never provides a canonical definition. 282 bool IsJumpTableCanonical; 283 284 // For functions: true if this function is either defined or used in a thinlto 285 // module and its jumptable entry needs to be exported to thinlto backends. 286 bool IsExported; 287 288 size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; } 289 290 public: 291 static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, 292 bool IsJumpTableCanonical, bool IsExported, 293 ArrayRef<MDNode *> Types) { 294 auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( 295 totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember))); 296 GTM->GO = GO; 297 GTM->NTypes = Types.size(); 298 GTM->IsJumpTableCanonical = IsJumpTableCanonical; 299 GTM->IsExported = IsExported; 300 std::uninitialized_copy(Types.begin(), Types.end(), 301 GTM->getTrailingObjects<MDNode *>()); 302 return GTM; 303 } 304 305 GlobalObject *getGlobal() const { 306 return GO; 307 } 308 309 bool isJumpTableCanonical() const { 310 return IsJumpTableCanonical; 311 } 312 313 bool isExported() const { 314 return IsExported; 315 } 316 317 ArrayRef<MDNode *> types() const { 318 return ArrayRef(getTrailingObjects<MDNode *>(), NTypes); 319 } 320 }; 321 322 struct ICallBranchFunnel final 323 : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> { 324 static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI, 325 ArrayRef<GlobalTypeMember *> Targets, 326 unsigned UniqueId) { 327 auto *Call = static_cast<ICallBranchFunnel *>( 328 Alloc.Allocate(totalSizeToAlloc<GlobalTypeMember *>(Targets.size()), 329 alignof(ICallBranchFunnel))); 330 Call->CI = CI; 331 Call->UniqueId = UniqueId; 332 Call->NTargets = Targets.size(); 333 std::uninitialized_copy(Targets.begin(), Targets.end(), 334 Call->getTrailingObjects<GlobalTypeMember *>()); 335 return Call; 336 } 337 338 CallInst *CI; 339 ArrayRef<GlobalTypeMember *> targets() const { 340 return ArrayRef(getTrailingObjects<GlobalTypeMember *>(), NTargets); 341 } 342 343 unsigned UniqueId; 344 345 private: 346 size_t NTargets; 347 }; 348 349 struct ScopedSaveAliaseesAndUsed { 350 Module &M; 351 SmallVector<GlobalValue *, 4> Used, CompilerUsed; 352 std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases; 353 std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs; 354 355 ScopedSaveAliaseesAndUsed(Module &M) : M(M) { 356 // The users of this class want to replace all function references except 357 // for aliases and llvm.used/llvm.compiler.used with references to a jump 358 // table. We avoid replacing aliases in order to avoid introducing a double 359 // indirection (or an alias pointing to a declaration in ThinLTO mode), and 360 // we avoid replacing llvm.used/llvm.compiler.used because these global 361 // variables describe properties of the global, not the jump table (besides, 362 // offseted references to the jump table in llvm.used are invalid). 363 // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly 364 // indirect) users", so what we do is save the list of globals referenced by 365 // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW 366 // replace the aliasees and then set them back to their original values at 367 // the end. 368 if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) 369 GV->eraseFromParent(); 370 if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) 371 GV->eraseFromParent(); 372 373 for (auto &GA : M.aliases()) { 374 // FIXME: This should look past all aliases not just interposable ones, 375 // see discussion on D65118. 376 if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts())) 377 FunctionAliases.push_back({&GA, F}); 378 } 379 380 for (auto &GI : M.ifuncs()) 381 if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts())) 382 ResolverIFuncs.push_back({&GI, F}); 383 } 384 385 ~ScopedSaveAliaseesAndUsed() { 386 appendToUsed(M, Used); 387 appendToCompilerUsed(M, CompilerUsed); 388 389 for (auto P : FunctionAliases) 390 P.first->setAliasee(P.second); 391 392 for (auto P : ResolverIFuncs) { 393 // This does not preserve pointer casts that may have been stripped by the 394 // constructor, but the resolver's type is different from that of the 395 // ifunc anyway. 396 P.first->setResolver(P.second); 397 } 398 } 399 }; 400 401 class LowerTypeTestsModule { 402 Module &M; 403 404 ModuleSummaryIndex *ExportSummary; 405 const ModuleSummaryIndex *ImportSummary; 406 // Set when the client has invoked this to simply drop all type test assume 407 // sequences. 408 DropTestKind DropTypeTests; 409 410 Triple::ArchType Arch; 411 Triple::OSType OS; 412 Triple::ObjectFormatType ObjectFormat; 413 414 // Determines which kind of Thumb jump table we generate. If arch is 415 // either 'arm' or 'thumb' we need to find this out, because 416 // selectJumpTableArmEncoding may decide to use Thumb in either case. 417 bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false; 418 419 // Cache variable used by hasBranchTargetEnforcement(). 420 int HasBranchTargetEnforcement = -1; 421 422 // The jump table type we ended up deciding on. (Usually the same as 423 // Arch, except that 'arm' and 'thumb' are often interchangeable.) 424 Triple::ArchType JumpTableArch = Triple::UnknownArch; 425 426 IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); 427 IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); 428 PointerType *Int8PtrTy = PointerType::getUnqual(M.getContext()); 429 ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0); 430 IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); 431 PointerType *Int32PtrTy = PointerType::getUnqual(M.getContext()); 432 IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); 433 IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0); 434 435 // Indirect function call index assignment counter for WebAssembly 436 uint64_t IndirectIndex = 1; 437 438 // Mapping from type identifiers to the call sites that test them, as well as 439 // whether the type identifier needs to be exported to ThinLTO backends as 440 // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId). 441 struct TypeIdUserInfo { 442 std::vector<CallInst *> CallSites; 443 bool IsExported = false; 444 }; 445 DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers; 446 447 /// This structure describes how to lower type tests for a particular type 448 /// identifier. It is either built directly from the global analysis (during 449 /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type 450 /// identifier summaries and external symbol references (in ThinLTO backends). 451 struct TypeIdLowering { 452 TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat; 453 454 /// All except Unsat: the start address within the combined global. 455 Constant *OffsetedGlobal; 456 457 /// ByteArray, Inline, AllOnes: log2 of the required global alignment 458 /// relative to the start address. 459 Constant *AlignLog2; 460 461 /// ByteArray, Inline, AllOnes: one less than the size of the memory region 462 /// covering members of this type identifier as a multiple of 2^AlignLog2. 463 Constant *SizeM1; 464 465 /// ByteArray: the byte array to test the address against. 466 Constant *TheByteArray; 467 468 /// ByteArray: the bit mask to apply to bytes loaded from the byte array. 469 Constant *BitMask; 470 471 /// Inline: the bit mask to test the address against. 472 Constant *InlineBits; 473 }; 474 475 std::vector<ByteArrayInfo> ByteArrayInfos; 476 477 Function *WeakInitializerFn = nullptr; 478 479 GlobalVariable *GlobalAnnotation; 480 DenseSet<Value *> FunctionAnnotations; 481 482 bool shouldExportConstantsAsAbsoluteSymbols(); 483 uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); 484 TypeIdLowering importTypeId(StringRef TypeId); 485 void importTypeTest(CallInst *CI); 486 void importFunction(Function *F, bool isJumpTableCanonical, 487 std::vector<GlobalAlias *> &AliasesToErase); 488 489 BitSetInfo 490 buildBitSet(Metadata *TypeId, 491 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); 492 ByteArrayInfo *createByteArray(BitSetInfo &BSI); 493 void allocateByteArrays(); 494 Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, 495 Value *BitOffset); 496 void lowerTypeTestCalls( 497 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, 498 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); 499 Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, 500 const TypeIdLowering &TIL); 501 502 void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds, 503 ArrayRef<GlobalTypeMember *> Globals); 504 Triple::ArchType 505 selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions); 506 bool hasBranchTargetEnforcement(); 507 unsigned getJumpTableEntrySize(); 508 Type *getJumpTableEntryType(); 509 void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS, 510 Triple::ArchType JumpTableArch, 511 SmallVectorImpl<Value *> &AsmArgs, Function *Dest); 512 void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); 513 void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds, 514 ArrayRef<GlobalTypeMember *> Functions); 515 void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds, 516 ArrayRef<GlobalTypeMember *> Functions); 517 void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds, 518 ArrayRef<GlobalTypeMember *> Functions); 519 void 520 buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds, 521 ArrayRef<GlobalTypeMember *> Globals, 522 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels); 523 524 void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, 525 bool IsJumpTableCanonical); 526 void moveInitializerToModuleConstructor(GlobalVariable *GV); 527 void findGlobalVariableUsersOf(Constant *C, 528 SmallSetVector<GlobalVariable *, 8> &Out); 529 530 void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions); 531 532 /// replaceCfiUses - Go through the uses list for this definition 533 /// and make each use point to "V" instead of "this" when the use is outside 534 /// the block. 'This's use list is expected to have at least one element. 535 /// Unlike replaceAllUsesWith this function skips blockaddr and direct call 536 /// uses. 537 void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); 538 539 /// replaceDirectCalls - Go through the uses list for this definition and 540 /// replace each use, which is a direct function call. 541 void replaceDirectCalls(Value *Old, Value *New); 542 543 bool isFunctionAnnotation(Value *V) const { 544 return FunctionAnnotations.contains(V); 545 } 546 547 public: 548 LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, 549 ModuleSummaryIndex *ExportSummary, 550 const ModuleSummaryIndex *ImportSummary, 551 DropTestKind DropTypeTests); 552 553 bool lower(); 554 555 // Lower the module using the action and summary passed as command line 556 // arguments. For testing purposes only. 557 static bool runForTesting(Module &M, ModuleAnalysisManager &AM); 558 }; 559 } // end anonymous namespace 560 561 /// Build a bit set for TypeId using the object layouts in 562 /// GlobalLayout. 563 BitSetInfo LowerTypeTestsModule::buildBitSet( 564 Metadata *TypeId, 565 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { 566 BitSetBuilder BSB; 567 568 // Compute the byte offset of each address associated with this type 569 // identifier. 570 for (const auto &GlobalAndOffset : GlobalLayout) { 571 for (MDNode *Type : GlobalAndOffset.first->types()) { 572 if (Type->getOperand(1) != TypeId) 573 continue; 574 uint64_t Offset = 575 cast<ConstantInt>( 576 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 577 ->getZExtValue(); 578 BSB.addOffset(GlobalAndOffset.second + Offset); 579 } 580 } 581 582 return BSB.build(); 583 } 584 585 /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in 586 /// Bits. This pattern matches to the bt instruction on x86. 587 static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, 588 Value *BitOffset) { 589 auto BitsType = cast<IntegerType>(Bits->getType()); 590 unsigned BitWidth = BitsType->getBitWidth(); 591 592 BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); 593 Value *BitIndex = 594 B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); 595 Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); 596 Value *MaskedBits = B.CreateAnd(Bits, BitMask); 597 return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); 598 } 599 600 ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) { 601 // Create globals to stand in for byte arrays and masks. These never actually 602 // get initialized, we RAUW and erase them later in allocateByteArrays() once 603 // we know the offset and mask to use. 604 auto ByteArrayGlobal = new GlobalVariable( 605 M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 606 auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true, 607 GlobalValue::PrivateLinkage, nullptr); 608 609 ByteArrayInfos.emplace_back(); 610 ByteArrayInfo *BAI = &ByteArrayInfos.back(); 611 612 BAI->Bits = BSI.Bits; 613 BAI->BitSize = BSI.BitSize; 614 BAI->ByteArray = ByteArrayGlobal; 615 BAI->MaskGlobal = MaskGlobal; 616 return BAI; 617 } 618 619 void LowerTypeTestsModule::allocateByteArrays() { 620 llvm::stable_sort(ByteArrayInfos, 621 [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { 622 return BAI1.BitSize > BAI2.BitSize; 623 }); 624 625 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); 626 627 ByteArrayBuilder BAB; 628 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 629 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 630 631 uint8_t Mask; 632 BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); 633 634 BAI->MaskGlobal->replaceAllUsesWith( 635 ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy)); 636 BAI->MaskGlobal->eraseFromParent(); 637 if (BAI->MaskPtr) 638 *BAI->MaskPtr = Mask; 639 } 640 641 Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes); 642 auto ByteArray = 643 new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true, 644 GlobalValue::PrivateLinkage, ByteArrayConst); 645 646 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 647 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 648 649 Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), 650 ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; 651 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( 652 ByteArrayConst->getType(), ByteArray, Idxs); 653 654 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures 655 // that the pc-relative displacement is folded into the lea instead of the 656 // test instruction getting another displacement. 657 GlobalAlias *Alias = GlobalAlias::create( 658 Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); 659 BAI->ByteArray->replaceAllUsesWith(Alias); 660 BAI->ByteArray->eraseFromParent(); 661 } 662 663 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + 664 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + 665 BAB.BitAllocs[6] + BAB.BitAllocs[7]; 666 ByteArraySizeBytes = BAB.Bytes.size(); 667 } 668 669 /// Build a test that bit BitOffset is set in the type identifier that was 670 /// lowered to TIL, which must be either an Inline or a ByteArray. 671 Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, 672 const TypeIdLowering &TIL, 673 Value *BitOffset) { 674 if (TIL.TheKind == TypeTestResolution::Inline) { 675 // If the bit set is sufficiently small, we can avoid a load by bit testing 676 // a constant. 677 return createMaskedBitTest(B, TIL.InlineBits, BitOffset); 678 } else { 679 Constant *ByteArray = TIL.TheByteArray; 680 if (AvoidReuse && !ImportSummary) { 681 // Each use of the byte array uses a different alias. This makes the 682 // backend less likely to reuse previously computed byte array addresses, 683 // improving the security of the CFI mechanism based on this pass. 684 // This won't work when importing because TheByteArray is external. 685 ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage, 686 "bits_use", ByteArray, &M); 687 } 688 689 Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset); 690 Value *Byte = B.CreateLoad(Int8Ty, ByteAddr); 691 692 Value *ByteAndMask = 693 B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty)); 694 return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); 695 } 696 } 697 698 static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, 699 Value *V, uint64_t COffset) { 700 if (auto GV = dyn_cast<GlobalObject>(V)) { 701 SmallVector<MDNode *, 2> Types; 702 GV->getMetadata(LLVMContext::MD_type, Types); 703 for (MDNode *Type : Types) { 704 if (Type->getOperand(1) != TypeId) 705 continue; 706 uint64_t Offset = 707 cast<ConstantInt>( 708 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 709 ->getZExtValue(); 710 if (COffset == Offset) 711 return true; 712 } 713 return false; 714 } 715 716 if (auto GEP = dyn_cast<GEPOperator>(V)) { 717 APInt APOffset(DL.getIndexSizeInBits(0), 0); 718 bool Result = GEP->accumulateConstantOffset(DL, APOffset); 719 if (!Result) 720 return false; 721 COffset += APOffset.getZExtValue(); 722 return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset); 723 } 724 725 if (auto Op = dyn_cast<Operator>(V)) { 726 if (Op->getOpcode() == Instruction::BitCast) 727 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset); 728 729 if (Op->getOpcode() == Instruction::Select) 730 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) && 731 isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset); 732 } 733 734 return false; 735 } 736 737 /// Lower a llvm.type.test call to its implementation. Returns the value to 738 /// replace the call with. 739 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, 740 const TypeIdLowering &TIL) { 741 // Delay lowering if the resolution is currently unknown. 742 if (TIL.TheKind == TypeTestResolution::Unknown) 743 return nullptr; 744 if (TIL.TheKind == TypeTestResolution::Unsat) 745 return ConstantInt::getFalse(M.getContext()); 746 747 Value *Ptr = CI->getArgOperand(0); 748 const DataLayout &DL = M.getDataLayout(); 749 if (isKnownTypeIdMember(TypeId, DL, Ptr, 0)) 750 return ConstantInt::getTrue(M.getContext()); 751 752 BasicBlock *InitialBB = CI->getParent(); 753 754 IRBuilder<> B(CI); 755 756 Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); 757 758 Constant *OffsetedGlobalAsInt = 759 ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy); 760 if (TIL.TheKind == TypeTestResolution::Single) 761 return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); 762 763 Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); 764 765 // We need to check that the offset both falls within our range and is 766 // suitably aligned. We can check both properties at the same time by 767 // performing a right rotate by log2(alignment) followed by an integer 768 // comparison against the bitset size. The rotate will move the lower 769 // order bits that need to be zero into the higher order bits of the 770 // result, causing the comparison to fail if they are nonzero. The rotate 771 // also conveniently gives us a bit offset to use during the load from 772 // the bitset. 773 Value *OffsetSHR = 774 B.CreateLShr(PtrOffset, B.CreateZExt(TIL.AlignLog2, IntPtrTy)); 775 Value *OffsetSHL = B.CreateShl( 776 PtrOffset, B.CreateZExt( 777 ConstantExpr::getSub( 778 ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)), 779 TIL.AlignLog2), 780 IntPtrTy)); 781 Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); 782 783 Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1); 784 785 // If the bit set is all ones, testing against it is unnecessary. 786 if (TIL.TheKind == TypeTestResolution::AllOnes) 787 return OffsetInRange; 788 789 // See if the intrinsic is used in the following common pattern: 790 // br(llvm.type.test(...), thenbb, elsebb) 791 // where nothing happens between the type test and the br. 792 // If so, create slightly simpler IR. 793 if (CI->hasOneUse()) 794 if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin())) 795 if (CI->getNextNode() == Br) { 796 BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); 797 BasicBlock *Else = Br->getSuccessor(1); 798 BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); 799 NewBr->setMetadata(LLVMContext::MD_prof, 800 Br->getMetadata(LLVMContext::MD_prof)); 801 ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); 802 803 // Update phis in Else resulting from InitialBB being split 804 for (auto &Phi : Else->phis()) 805 Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB); 806 807 IRBuilder<> ThenB(CI); 808 return createBitSetTest(ThenB, TIL, BitOffset); 809 } 810 811 IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); 812 813 // Now that we know that the offset is in range and aligned, load the 814 // appropriate bit from the bitset. 815 Value *Bit = createBitSetTest(ThenB, TIL, BitOffset); 816 817 // The value we want is 0 if we came directly from the initial block 818 // (having failed the range or alignment checks), or the loaded bit if 819 // we came from the block in which we loaded it. 820 B.SetInsertPoint(CI); 821 PHINode *P = B.CreatePHI(Int1Ty, 2); 822 P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); 823 P->addIncoming(Bit, ThenB.GetInsertBlock()); 824 return P; 825 } 826 827 /// Given a disjoint set of type identifiers and globals, lay out the globals, 828 /// build the bit sets and lower the llvm.type.test calls. 829 void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( 830 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) { 831 // Build a new global with the combined contents of the referenced globals. 832 // This global is a struct whose even-indexed elements contain the original 833 // contents of the referenced globals and whose odd-indexed elements contain 834 // any padding required to align the next element to the next power of 2 plus 835 // any additional padding required to meet its alignment requirements. 836 std::vector<Constant *> GlobalInits; 837 const DataLayout &DL = M.getDataLayout(); 838 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 839 Align MaxAlign; 840 uint64_t CurOffset = 0; 841 uint64_t DesiredPadding = 0; 842 for (GlobalTypeMember *G : Globals) { 843 auto *GV = cast<GlobalVariable>(G->getGlobal()); 844 Align Alignment = 845 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); 846 MaxAlign = std::max(MaxAlign, Alignment); 847 uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment); 848 GlobalLayout[G] = GVOffset; 849 if (GVOffset != 0) { 850 uint64_t Padding = GVOffset - CurOffset; 851 GlobalInits.push_back( 852 ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); 853 } 854 855 GlobalInits.push_back(GV->getInitializer()); 856 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); 857 CurOffset = GVOffset + InitSize; 858 859 // Compute the amount of padding that we'd like for the next element. 860 DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; 861 862 // Experiments of different caps with Chromium on both x64 and ARM64 863 // have shown that the 32-byte cap generates the smallest binary on 864 // both platforms while different caps yield similar performance. 865 // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) 866 if (DesiredPadding > 32) 867 DesiredPadding = alignTo(InitSize, 32) - InitSize; 868 } 869 870 Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); 871 auto *CombinedGlobal = 872 new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, 873 GlobalValue::PrivateLinkage, NewInit); 874 CombinedGlobal->setAlignment(MaxAlign); 875 876 StructType *NewTy = cast<StructType>(NewInit->getType()); 877 lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); 878 879 // Build aliases pointing to offsets into the combined global for each 880 // global from which we built the combined global, and replace references 881 // to the original globals with references to the aliases. 882 for (unsigned I = 0; I != Globals.size(); ++I) { 883 GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal()); 884 885 // Multiply by 2 to account for padding elements. 886 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), 887 ConstantInt::get(Int32Ty, I * 2)}; 888 Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr( 889 NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); 890 assert(GV->getType()->getAddressSpace() == 0); 891 GlobalAlias *GAlias = 892 GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), 893 "", CombinedGlobalElemPtr, &M); 894 GAlias->setVisibility(GV->getVisibility()); 895 GAlias->takeName(GV); 896 GV->replaceAllUsesWith(GAlias); 897 GV->eraseFromParent(); 898 } 899 } 900 901 bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() { 902 return (Arch == Triple::x86 || Arch == Triple::x86_64) && 903 ObjectFormat == Triple::ELF; 904 } 905 906 /// Export the given type identifier so that ThinLTO backends may import it. 907 /// Type identifiers are exported by adding coarse-grained information about how 908 /// to test the type identifier to the summary, and creating symbols in the 909 /// object file (aliases and absolute symbols) containing fine-grained 910 /// information about the type identifier. 911 /// 912 /// Returns a pointer to the location in which to store the bitmask, if 913 /// applicable. 914 uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId, 915 const TypeIdLowering &TIL) { 916 TypeTestResolution &TTRes = 917 ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes; 918 TTRes.TheKind = TIL.TheKind; 919 920 auto ExportGlobal = [&](StringRef Name, Constant *C) { 921 GlobalAlias *GA = 922 GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage, 923 "__typeid_" + TypeId + "_" + Name, C, &M); 924 GA->setVisibility(GlobalValue::HiddenVisibility); 925 }; 926 927 auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) { 928 if (shouldExportConstantsAsAbsoluteSymbols()) 929 ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy)); 930 else 931 Storage = cast<ConstantInt>(C)->getZExtValue(); 932 }; 933 934 if (TIL.TheKind != TypeTestResolution::Unsat) 935 ExportGlobal("global_addr", TIL.OffsetedGlobal); 936 937 if (TIL.TheKind == TypeTestResolution::ByteArray || 938 TIL.TheKind == TypeTestResolution::Inline || 939 TIL.TheKind == TypeTestResolution::AllOnes) { 940 ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2); 941 ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1); 942 943 uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1; 944 if (TIL.TheKind == TypeTestResolution::Inline) 945 TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6; 946 else 947 TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32; 948 } 949 950 if (TIL.TheKind == TypeTestResolution::ByteArray) { 951 ExportGlobal("byte_array", TIL.TheByteArray); 952 if (shouldExportConstantsAsAbsoluteSymbols()) 953 ExportGlobal("bit_mask", TIL.BitMask); 954 else 955 return &TTRes.BitMask; 956 } 957 958 if (TIL.TheKind == TypeTestResolution::Inline) 959 ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits); 960 961 return nullptr; 962 } 963 964 LowerTypeTestsModule::TypeIdLowering 965 LowerTypeTestsModule::importTypeId(StringRef TypeId) { 966 const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId); 967 if (!TidSummary) 968 return {}; // Unsat: no globals match this type id. 969 const TypeTestResolution &TTRes = TidSummary->TTRes; 970 971 TypeIdLowering TIL; 972 TIL.TheKind = TTRes.TheKind; 973 974 auto ImportGlobal = [&](StringRef Name) { 975 // Give the global a type of length 0 so that it is not assumed not to alias 976 // with any other global. 977 Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), 978 Int8Arr0Ty); 979 if (auto *GV = dyn_cast<GlobalVariable>(C)) 980 GV->setVisibility(GlobalValue::HiddenVisibility); 981 return C; 982 }; 983 984 auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth, 985 Type *Ty) { 986 if (!shouldExportConstantsAsAbsoluteSymbols()) { 987 Constant *C = 988 ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const); 989 if (!isa<IntegerType>(Ty)) 990 C = ConstantExpr::getIntToPtr(C, Ty); 991 return C; 992 } 993 994 Constant *C = ImportGlobal(Name); 995 auto *GV = cast<GlobalVariable>(C->stripPointerCasts()); 996 if (isa<IntegerType>(Ty)) 997 C = ConstantExpr::getPtrToInt(C, Ty); 998 if (GV->getMetadata(LLVMContext::MD_absolute_symbol)) 999 return C; 1000 1001 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { 1002 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min)); 1003 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max)); 1004 GV->setMetadata(LLVMContext::MD_absolute_symbol, 1005 MDNode::get(M.getContext(), {MinC, MaxC})); 1006 }; 1007 if (AbsWidth == IntPtrTy->getBitWidth()) 1008 SetAbsRange(~0ull, ~0ull); // Full set. 1009 else 1010 SetAbsRange(0, 1ull << AbsWidth); 1011 return C; 1012 }; 1013 1014 if (TIL.TheKind != TypeTestResolution::Unsat) 1015 TIL.OffsetedGlobal = ImportGlobal("global_addr"); 1016 1017 if (TIL.TheKind == TypeTestResolution::ByteArray || 1018 TIL.TheKind == TypeTestResolution::Inline || 1019 TIL.TheKind == TypeTestResolution::AllOnes) { 1020 TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty); 1021 TIL.SizeM1 = 1022 ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy); 1023 } 1024 1025 if (TIL.TheKind == TypeTestResolution::ByteArray) { 1026 TIL.TheByteArray = ImportGlobal("byte_array"); 1027 TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy); 1028 } 1029 1030 if (TIL.TheKind == TypeTestResolution::Inline) 1031 TIL.InlineBits = ImportConstant( 1032 "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth, 1033 TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty); 1034 1035 return TIL; 1036 } 1037 1038 void LowerTypeTestsModule::importTypeTest(CallInst *CI) { 1039 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 1040 if (!TypeIdMDVal) 1041 report_fatal_error("Second argument of llvm.type.test must be metadata"); 1042 1043 auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata()); 1044 // If this is a local unpromoted type, which doesn't have a metadata string, 1045 // treat as Unknown and delay lowering, so that we can still utilize it for 1046 // later optimizations. 1047 if (!TypeIdStr) 1048 return; 1049 1050 TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); 1051 Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); 1052 if (Lowered) { 1053 CI->replaceAllUsesWith(Lowered); 1054 CI->eraseFromParent(); 1055 } 1056 } 1057 1058 // ThinLTO backend: the function F has a jump table entry; update this module 1059 // accordingly. isJumpTableCanonical describes the type of the jump table entry. 1060 void LowerTypeTestsModule::importFunction( 1061 Function *F, bool isJumpTableCanonical, 1062 std::vector<GlobalAlias *> &AliasesToErase) { 1063 assert(F->getType()->getAddressSpace() == 0); 1064 1065 GlobalValue::VisibilityTypes Visibility = F->getVisibility(); 1066 std::string Name = std::string(F->getName()); 1067 1068 if (F->isDeclarationForLinker() && isJumpTableCanonical) { 1069 // Non-dso_local functions may be overriden at run time, 1070 // don't short curcuit them 1071 if (F->isDSOLocal()) { 1072 Function *RealF = Function::Create(F->getFunctionType(), 1073 GlobalValue::ExternalLinkage, 1074 F->getAddressSpace(), 1075 Name + ".cfi", &M); 1076 RealF->setVisibility(GlobalVariable::HiddenVisibility); 1077 replaceDirectCalls(F, RealF); 1078 } 1079 return; 1080 } 1081 1082 Function *FDecl; 1083 if (!isJumpTableCanonical) { 1084 // Either a declaration of an external function or a reference to a locally 1085 // defined jump table. 1086 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, 1087 F->getAddressSpace(), Name + ".cfi_jt", &M); 1088 FDecl->setVisibility(GlobalValue::HiddenVisibility); 1089 } else { 1090 F->setName(Name + ".cfi"); 1091 F->setLinkage(GlobalValue::ExternalLinkage); 1092 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, 1093 F->getAddressSpace(), Name, &M); 1094 FDecl->setVisibility(Visibility); 1095 Visibility = GlobalValue::HiddenVisibility; 1096 1097 // Delete aliases pointing to this function, they'll be re-created in the 1098 // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed 1099 // will want to reset the aliasees first. 1100 for (auto &U : F->uses()) { 1101 if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) { 1102 Function *AliasDecl = Function::Create( 1103 F->getFunctionType(), GlobalValue::ExternalLinkage, 1104 F->getAddressSpace(), "", &M); 1105 AliasDecl->takeName(A); 1106 A->replaceAllUsesWith(AliasDecl); 1107 AliasesToErase.push_back(A); 1108 } 1109 } 1110 } 1111 1112 if (F->hasExternalWeakLinkage()) 1113 replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); 1114 else 1115 replaceCfiUses(F, FDecl, isJumpTableCanonical); 1116 1117 // Set visibility late because it's used in replaceCfiUses() to determine 1118 // whether uses need to be replaced. 1119 F->setVisibility(Visibility); 1120 } 1121 1122 void LowerTypeTestsModule::lowerTypeTestCalls( 1123 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, 1124 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { 1125 // For each type identifier in this disjoint set... 1126 for (Metadata *TypeId : TypeIds) { 1127 // Build the bitset. 1128 BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout); 1129 LLVM_DEBUG({ 1130 if (auto MDS = dyn_cast<MDString>(TypeId)) 1131 dbgs() << MDS->getString() << ": "; 1132 else 1133 dbgs() << "<unnamed>: "; 1134 BSI.print(dbgs()); 1135 }); 1136 1137 ByteArrayInfo *BAI = nullptr; 1138 TypeIdLowering TIL; 1139 TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr( 1140 Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)), 1141 TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2); 1142 TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1); 1143 if (BSI.isAllOnes()) { 1144 TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single 1145 : TypeTestResolution::AllOnes; 1146 } else if (BSI.BitSize <= 64) { 1147 TIL.TheKind = TypeTestResolution::Inline; 1148 uint64_t InlineBits = 0; 1149 for (auto Bit : BSI.Bits) 1150 InlineBits |= uint64_t(1) << Bit; 1151 if (InlineBits == 0) 1152 TIL.TheKind = TypeTestResolution::Unsat; 1153 else 1154 TIL.InlineBits = ConstantInt::get( 1155 (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits); 1156 } else { 1157 TIL.TheKind = TypeTestResolution::ByteArray; 1158 ++NumByteArraysCreated; 1159 BAI = createByteArray(BSI); 1160 TIL.TheByteArray = BAI->ByteArray; 1161 TIL.BitMask = BAI->MaskGlobal; 1162 } 1163 1164 TypeIdUserInfo &TIUI = TypeIdUsers[TypeId]; 1165 1166 if (TIUI.IsExported) { 1167 uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL); 1168 if (BAI) 1169 BAI->MaskPtr = MaskPtr; 1170 } 1171 1172 // Lower each call to llvm.type.test for this type identifier. 1173 for (CallInst *CI : TIUI.CallSites) { 1174 ++NumTypeTestCallsLowered; 1175 Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); 1176 if (Lowered) { 1177 CI->replaceAllUsesWith(Lowered); 1178 CI->eraseFromParent(); 1179 } 1180 } 1181 } 1182 } 1183 1184 void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { 1185 if (Type->getNumOperands() != 2) 1186 report_fatal_error("All operands of type metadata must have 2 elements"); 1187 1188 if (GO->isThreadLocal()) 1189 report_fatal_error("Bit set element may not be thread-local"); 1190 if (isa<GlobalVariable>(GO) && GO->hasSection()) 1191 report_fatal_error( 1192 "A member of a type identifier may not have an explicit section"); 1193 1194 // FIXME: We previously checked that global var member of a type identifier 1195 // must be a definition, but the IR linker may leave type metadata on 1196 // declarations. We should restore this check after fixing PR31759. 1197 1198 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0)); 1199 if (!OffsetConstMD) 1200 report_fatal_error("Type offset must be a constant"); 1201 auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); 1202 if (!OffsetInt) 1203 report_fatal_error("Type offset must be an integer constant"); 1204 } 1205 1206 static const unsigned kX86JumpTableEntrySize = 8; 1207 static const unsigned kX86IBTJumpTableEntrySize = 16; 1208 static const unsigned kARMJumpTableEntrySize = 4; 1209 static const unsigned kARMBTIJumpTableEntrySize = 8; 1210 static const unsigned kARMv6MJumpTableEntrySize = 16; 1211 static const unsigned kRISCVJumpTableEntrySize = 8; 1212 static const unsigned kLOONGARCH64JumpTableEntrySize = 8; 1213 1214 bool LowerTypeTestsModule::hasBranchTargetEnforcement() { 1215 if (HasBranchTargetEnforcement == -1) { 1216 // First time this query has been called. Find out the answer by checking 1217 // the module flags. 1218 if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( 1219 M.getModuleFlag("branch-target-enforcement"))) 1220 HasBranchTargetEnforcement = (BTE->getZExtValue() != 0); 1221 else 1222 HasBranchTargetEnforcement = 0; 1223 } 1224 return HasBranchTargetEnforcement; 1225 } 1226 1227 unsigned LowerTypeTestsModule::getJumpTableEntrySize() { 1228 switch (JumpTableArch) { 1229 case Triple::x86: 1230 case Triple::x86_64: 1231 if (const auto *MD = mdconst::extract_or_null<ConstantInt>( 1232 M.getModuleFlag("cf-protection-branch"))) 1233 if (MD->getZExtValue()) 1234 return kX86IBTJumpTableEntrySize; 1235 return kX86JumpTableEntrySize; 1236 case Triple::arm: 1237 return kARMJumpTableEntrySize; 1238 case Triple::thumb: 1239 if (CanUseThumbBWJumpTable) { 1240 if (hasBranchTargetEnforcement()) 1241 return kARMBTIJumpTableEntrySize; 1242 return kARMJumpTableEntrySize; 1243 } else { 1244 return kARMv6MJumpTableEntrySize; 1245 } 1246 case Triple::aarch64: 1247 if (hasBranchTargetEnforcement()) 1248 return kARMBTIJumpTableEntrySize; 1249 return kARMJumpTableEntrySize; 1250 case Triple::riscv32: 1251 case Triple::riscv64: 1252 return kRISCVJumpTableEntrySize; 1253 case Triple::loongarch64: 1254 return kLOONGARCH64JumpTableEntrySize; 1255 default: 1256 report_fatal_error("Unsupported architecture for jump tables"); 1257 } 1258 } 1259 1260 // Create a jump table entry for the target. This consists of an instruction 1261 // sequence containing a relative branch to Dest. Appends inline asm text, 1262 // constraints and arguments to AsmOS, ConstraintOS and AsmArgs. 1263 void LowerTypeTestsModule::createJumpTableEntry( 1264 raw_ostream &AsmOS, raw_ostream &ConstraintOS, 1265 Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs, 1266 Function *Dest) { 1267 unsigned ArgIndex = AsmArgs.size(); 1268 1269 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) { 1270 bool Endbr = false; 1271 if (const auto *MD = mdconst::extract_or_null<ConstantInt>( 1272 Dest->getParent()->getModuleFlag("cf-protection-branch"))) 1273 Endbr = !MD->isZero(); 1274 if (Endbr) 1275 AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n"); 1276 AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n"; 1277 if (Endbr) 1278 AsmOS << ".balign 16, 0xcc\n"; 1279 else 1280 AsmOS << "int3\nint3\nint3\n"; 1281 } else if (JumpTableArch == Triple::arm) { 1282 AsmOS << "b $" << ArgIndex << "\n"; 1283 } else if (JumpTableArch == Triple::aarch64) { 1284 if (hasBranchTargetEnforcement()) 1285 AsmOS << "bti c\n"; 1286 AsmOS << "b $" << ArgIndex << "\n"; 1287 } else if (JumpTableArch == Triple::thumb) { 1288 if (!CanUseThumbBWJumpTable) { 1289 // In Armv6-M, this sequence will generate a branch without corrupting 1290 // any registers. We use two stack words; in the second, we construct the 1291 // address we'll pop into pc, and the first is used to save and restore 1292 // r0 which we use as a temporary register. 1293 // 1294 // To support position-independent use cases, the offset of the target 1295 // function is stored as a relative offset (which will expand into an 1296 // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other 1297 // object file types), and added to pc after we load it. (The alternative 1298 // B.W is automatically pc-relative.) 1299 // 1300 // There are five 16-bit Thumb instructions here, so the .balign 4 adds a 1301 // sixth halfword of padding, and then the offset consumes a further 4 1302 // bytes, for a total of 16, which is very convenient since entries in 1303 // this jump table need to have power-of-two size. 1304 AsmOS << "push {r0,r1}\n" 1305 << "ldr r0, 1f\n" 1306 << "0: add r0, r0, pc\n" 1307 << "str r0, [sp, #4]\n" 1308 << "pop {r0,pc}\n" 1309 << ".balign 4\n" 1310 << "1: .word $" << ArgIndex << " - (0b + 4)\n"; 1311 } else { 1312 if (hasBranchTargetEnforcement()) 1313 AsmOS << "bti\n"; 1314 AsmOS << "b.w $" << ArgIndex << "\n"; 1315 } 1316 } else if (JumpTableArch == Triple::riscv32 || 1317 JumpTableArch == Triple::riscv64) { 1318 AsmOS << "tail $" << ArgIndex << "@plt\n"; 1319 } else if (JumpTableArch == Triple::loongarch64) { 1320 AsmOS << "pcalau12i $$t0, %pc_hi20($" << ArgIndex << ")\n" 1321 << "jirl $$r0, $$t0, %pc_lo12($" << ArgIndex << ")\n"; 1322 } else { 1323 report_fatal_error("Unsupported architecture for jump tables"); 1324 } 1325 1326 ConstraintOS << (ArgIndex > 0 ? ",s" : "s"); 1327 AsmArgs.push_back(Dest); 1328 } 1329 1330 Type *LowerTypeTestsModule::getJumpTableEntryType() { 1331 return ArrayType::get(Int8Ty, getJumpTableEntrySize()); 1332 } 1333 1334 /// Given a disjoint set of type identifiers and functions, build the bit sets 1335 /// and lower the llvm.type.test calls, architecture dependently. 1336 void LowerTypeTestsModule::buildBitSetsFromFunctions( 1337 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1338 if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || 1339 Arch == Triple::thumb || Arch == Triple::aarch64 || 1340 Arch == Triple::riscv32 || Arch == Triple::riscv64 || 1341 Arch == Triple::loongarch64) 1342 buildBitSetsFromFunctionsNative(TypeIds, Functions); 1343 else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) 1344 buildBitSetsFromFunctionsWASM(TypeIds, Functions); 1345 else 1346 report_fatal_error("Unsupported architecture for jump tables"); 1347 } 1348 1349 void LowerTypeTestsModule::moveInitializerToModuleConstructor( 1350 GlobalVariable *GV) { 1351 if (WeakInitializerFn == nullptr) { 1352 WeakInitializerFn = Function::Create( 1353 FunctionType::get(Type::getVoidTy(M.getContext()), 1354 /* IsVarArg */ false), 1355 GlobalValue::InternalLinkage, 1356 M.getDataLayout().getProgramAddressSpace(), 1357 "__cfi_global_var_init", &M); 1358 BasicBlock *BB = 1359 BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn); 1360 ReturnInst::Create(M.getContext(), BB); 1361 WeakInitializerFn->setSection( 1362 ObjectFormat == Triple::MachO 1363 ? "__TEXT,__StaticInit,regular,pure_instructions" 1364 : ".text.startup"); 1365 // This code is equivalent to relocation application, and should run at the 1366 // earliest possible time (i.e. with the highest priority). 1367 appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0); 1368 } 1369 1370 IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator()); 1371 GV->setConstant(false); 1372 IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign()); 1373 GV->setInitializer(Constant::getNullValue(GV->getValueType())); 1374 } 1375 1376 void LowerTypeTestsModule::findGlobalVariableUsersOf( 1377 Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) { 1378 for (auto *U : C->users()){ 1379 if (auto *GV = dyn_cast<GlobalVariable>(U)) 1380 Out.insert(GV); 1381 else if (auto *C2 = dyn_cast<Constant>(U)) 1382 findGlobalVariableUsersOf(C2, Out); 1383 } 1384 } 1385 1386 // Replace all uses of F with (F ? JT : 0). 1387 void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( 1388 Function *F, Constant *JT, bool IsJumpTableCanonical) { 1389 // The target expression can not appear in a constant initializer on most 1390 // (all?) targets. Switch to a runtime initializer. 1391 SmallSetVector<GlobalVariable *, 8> GlobalVarUsers; 1392 findGlobalVariableUsersOf(F, GlobalVarUsers); 1393 for (auto *GV : GlobalVarUsers) { 1394 if (GV == GlobalAnnotation) 1395 continue; 1396 moveInitializerToModuleConstructor(GV); 1397 } 1398 1399 // Can not RAUW F with an expression that uses F. Replace with a temporary 1400 // placeholder first. 1401 Function *PlaceholderFn = 1402 Function::Create(cast<FunctionType>(F->getValueType()), 1403 GlobalValue::ExternalWeakLinkage, 1404 F->getAddressSpace(), "", &M); 1405 replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); 1406 1407 convertUsersOfConstantsToInstructions(PlaceholderFn); 1408 // Don't use range based loop, because use list will be modified. 1409 while (!PlaceholderFn->use_empty()) { 1410 Use &U = *PlaceholderFn->use_begin(); 1411 auto *InsertPt = dyn_cast<Instruction>(U.getUser()); 1412 assert(InsertPt && "Non-instruction users should have been eliminated"); 1413 auto *PN = dyn_cast<PHINode>(InsertPt); 1414 if (PN) 1415 InsertPt = PN->getIncomingBlock(U)->getTerminator(); 1416 IRBuilder Builder(InsertPt); 1417 Value *ICmp = Builder.CreateICmp(CmpInst::ICMP_NE, F, 1418 Constant::getNullValue(F->getType())); 1419 Value *Select = Builder.CreateSelect(ICmp, JT, 1420 Constant::getNullValue(F->getType())); 1421 // For phi nodes, we need to update the incoming value for all operands 1422 // with the same predecessor. 1423 if (PN) 1424 PN->setIncomingValueForBlock(InsertPt->getParent(), Select); 1425 else 1426 U.set(Select); 1427 } 1428 PlaceholderFn->eraseFromParent(); 1429 } 1430 1431 static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) { 1432 Attribute TFAttr = F->getFnAttribute("target-features"); 1433 if (TFAttr.isValid()) { 1434 SmallVector<StringRef, 6> Features; 1435 TFAttr.getValueAsString().split(Features, ','); 1436 for (StringRef Feature : Features) { 1437 if (Feature == "-thumb-mode") 1438 return false; 1439 else if (Feature == "+thumb-mode") 1440 return true; 1441 } 1442 } 1443 1444 return ModuleArch == Triple::thumb; 1445 } 1446 1447 // Each jump table must be either ARM or Thumb as a whole for the bit-test math 1448 // to work. Pick one that matches the majority of members to minimize interop 1449 // veneers inserted by the linker. 1450 Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding( 1451 ArrayRef<GlobalTypeMember *> Functions) { 1452 if (Arch != Triple::arm && Arch != Triple::thumb) 1453 return Arch; 1454 1455 if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) { 1456 // In architectures that provide Arm and Thumb-1 but not Thumb-2, 1457 // we should always prefer the Arm jump table format, because the 1458 // Thumb-1 one is larger and slower. 1459 return Triple::arm; 1460 } 1461 1462 // Otherwise, go with majority vote. 1463 unsigned ArmCount = 0, ThumbCount = 0; 1464 for (const auto GTM : Functions) { 1465 if (!GTM->isJumpTableCanonical()) { 1466 // PLT stubs are always ARM. 1467 // FIXME: This is the wrong heuristic for non-canonical jump tables. 1468 ++ArmCount; 1469 continue; 1470 } 1471 1472 Function *F = cast<Function>(GTM->getGlobal()); 1473 ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount); 1474 } 1475 1476 return ArmCount > ThumbCount ? Triple::arm : Triple::thumb; 1477 } 1478 1479 void LowerTypeTestsModule::createJumpTable( 1480 Function *F, ArrayRef<GlobalTypeMember *> Functions) { 1481 std::string AsmStr, ConstraintStr; 1482 raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr); 1483 SmallVector<Value *, 16> AsmArgs; 1484 AsmArgs.reserve(Functions.size() * 2); 1485 1486 // Check if all entries have the NoUnwind attribute. 1487 // If all entries have it, we can safely mark the 1488 // cfi.jumptable as NoUnwind, otherwise, direct calls 1489 // to the jump table will not handle exceptions properly 1490 bool areAllEntriesNounwind = true; 1491 for (GlobalTypeMember *GTM : Functions) { 1492 if (!llvm::cast<llvm::Function>(GTM->getGlobal()) 1493 ->hasFnAttribute(llvm::Attribute::NoUnwind)) { 1494 areAllEntriesNounwind = false; 1495 } 1496 createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs, 1497 cast<Function>(GTM->getGlobal())); 1498 } 1499 1500 // Align the whole table by entry size. 1501 F->setAlignment(Align(getJumpTableEntrySize())); 1502 // Skip prologue. 1503 // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. 1504 // Luckily, this function does not get any prologue even without the 1505 // attribute. 1506 if (OS != Triple::Win32) 1507 F->addFnAttr(Attribute::Naked); 1508 if (JumpTableArch == Triple::arm) 1509 F->addFnAttr("target-features", "-thumb-mode"); 1510 if (JumpTableArch == Triple::thumb) { 1511 if (hasBranchTargetEnforcement()) { 1512 // If we're generating a Thumb jump table with BTI, add a target-features 1513 // setting to ensure BTI can be assembled. 1514 F->addFnAttr("target-features", "+thumb-mode,+pacbti"); 1515 } else { 1516 F->addFnAttr("target-features", "+thumb-mode"); 1517 if (CanUseThumbBWJumpTable) { 1518 // Thumb jump table assembly needs Thumb2. The following attribute is 1519 // added by Clang for -march=armv7. 1520 F->addFnAttr("target-cpu", "cortex-a8"); 1521 } 1522 } 1523 } 1524 // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI 1525 // for the function to avoid double BTI. This is a no-op without 1526 // -mbranch-protection=. 1527 if (JumpTableArch == Triple::aarch64 || JumpTableArch == Triple::thumb) { 1528 if (F->hasFnAttribute("branch-target-enforcement")) 1529 F->removeFnAttr("branch-target-enforcement"); 1530 if (F->hasFnAttribute("sign-return-address")) 1531 F->removeFnAttr("sign-return-address"); 1532 } 1533 if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { 1534 // Make sure the jump table assembly is not modified by the assembler or 1535 // the linker. 1536 F->addFnAttr("target-features", "-c,-relax"); 1537 } 1538 // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR 1539 // for the function to avoid double ENDBR. This is a no-op without 1540 // -fcf-protection=. 1541 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) 1542 F->addFnAttr(Attribute::NoCfCheck); 1543 1544 // Make sure we don't emit .eh_frame for this function if it isn't needed. 1545 if (areAllEntriesNounwind) 1546 F->addFnAttr(Attribute::NoUnwind); 1547 1548 // Make sure we do not inline any calls to the cfi.jumptable. 1549 F->addFnAttr(Attribute::NoInline); 1550 1551 BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F); 1552 IRBuilder<> IRB(BB); 1553 1554 SmallVector<Type *, 16> ArgTypes; 1555 ArgTypes.reserve(AsmArgs.size()); 1556 for (const auto &Arg : AsmArgs) 1557 ArgTypes.push_back(Arg->getType()); 1558 InlineAsm *JumpTableAsm = 1559 InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false), 1560 AsmOS.str(), ConstraintOS.str(), 1561 /*hasSideEffects=*/true); 1562 1563 IRB.CreateCall(JumpTableAsm, AsmArgs); 1564 IRB.CreateUnreachable(); 1565 } 1566 1567 /// Given a disjoint set of type identifiers and functions, build a jump table 1568 /// for the functions, build the bit sets and lower the llvm.type.test calls. 1569 void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( 1570 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1571 // Unlike the global bitset builder, the function bitset builder cannot 1572 // re-arrange functions in a particular order and base its calculations on the 1573 // layout of the functions' entry points, as we have no idea how large a 1574 // particular function will end up being (the size could even depend on what 1575 // this pass does!) Instead, we build a jump table, which is a block of code 1576 // consisting of one branch instruction for each of the functions in the bit 1577 // set that branches to the target function, and redirect any taken function 1578 // addresses to the corresponding jump table entry. In the object file's 1579 // symbol table, the symbols for the target functions also refer to the jump 1580 // table entries, so that addresses taken outside the module will pass any 1581 // verification done inside the module. 1582 // 1583 // In more concrete terms, suppose we have three functions f, g, h which are 1584 // of the same type, and a function foo that returns their addresses: 1585 // 1586 // f: 1587 // mov 0, %eax 1588 // ret 1589 // 1590 // g: 1591 // mov 1, %eax 1592 // ret 1593 // 1594 // h: 1595 // mov 2, %eax 1596 // ret 1597 // 1598 // foo: 1599 // mov f, %eax 1600 // mov g, %edx 1601 // mov h, %ecx 1602 // ret 1603 // 1604 // We output the jump table as module-level inline asm string. The end result 1605 // will (conceptually) look like this: 1606 // 1607 // f = .cfi.jumptable 1608 // g = .cfi.jumptable + 4 1609 // h = .cfi.jumptable + 8 1610 // .cfi.jumptable: 1611 // jmp f.cfi ; 5 bytes 1612 // int3 ; 1 byte 1613 // int3 ; 1 byte 1614 // int3 ; 1 byte 1615 // jmp g.cfi ; 5 bytes 1616 // int3 ; 1 byte 1617 // int3 ; 1 byte 1618 // int3 ; 1 byte 1619 // jmp h.cfi ; 5 bytes 1620 // int3 ; 1 byte 1621 // int3 ; 1 byte 1622 // int3 ; 1 byte 1623 // 1624 // f.cfi: 1625 // mov 0, %eax 1626 // ret 1627 // 1628 // g.cfi: 1629 // mov 1, %eax 1630 // ret 1631 // 1632 // h.cfi: 1633 // mov 2, %eax 1634 // ret 1635 // 1636 // foo: 1637 // mov f, %eax 1638 // mov g, %edx 1639 // mov h, %ecx 1640 // ret 1641 // 1642 // Because the addresses of f, g, h are evenly spaced at a power of 2, in the 1643 // normal case the check can be carried out using the same kind of simple 1644 // arithmetic that we normally use for globals. 1645 1646 // FIXME: find a better way to represent the jumptable in the IR. 1647 assert(!Functions.empty()); 1648 1649 // Decide on the jump table encoding, so that we know how big the 1650 // entries will be. 1651 JumpTableArch = selectJumpTableArmEncoding(Functions); 1652 1653 // Build a simple layout based on the regular layout of jump tables. 1654 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 1655 unsigned EntrySize = getJumpTableEntrySize(); 1656 for (unsigned I = 0; I != Functions.size(); ++I) 1657 GlobalLayout[Functions[I]] = I * EntrySize; 1658 1659 Function *JumpTableFn = 1660 Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()), 1661 /* IsVarArg */ false), 1662 GlobalValue::PrivateLinkage, 1663 M.getDataLayout().getProgramAddressSpace(), 1664 ".cfi.jumptable", &M); 1665 ArrayType *JumpTableType = 1666 ArrayType::get(getJumpTableEntryType(), Functions.size()); 1667 auto JumpTable = ConstantExpr::getPointerCast( 1668 JumpTableFn, PointerType::getUnqual(M.getContext())); 1669 1670 lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); 1671 1672 { 1673 ScopedSaveAliaseesAndUsed S(M); 1674 1675 // Build aliases pointing to offsets into the jump table, and replace 1676 // references to the original functions with references to the aliases. 1677 for (unsigned I = 0; I != Functions.size(); ++I) { 1678 Function *F = cast<Function>(Functions[I]->getGlobal()); 1679 bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); 1680 1681 Constant *CombinedGlobalElemPtr = ConstantExpr::getInBoundsGetElementPtr( 1682 JumpTableType, JumpTable, 1683 ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), 1684 ConstantInt::get(IntPtrTy, I)}); 1685 1686 const bool IsExported = Functions[I]->isExported(); 1687 if (!IsJumpTableCanonical) { 1688 GlobalValue::LinkageTypes LT = IsExported 1689 ? GlobalValue::ExternalLinkage 1690 : GlobalValue::InternalLinkage; 1691 GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT, 1692 F->getName() + ".cfi_jt", 1693 CombinedGlobalElemPtr, &M); 1694 if (IsExported) 1695 JtAlias->setVisibility(GlobalValue::HiddenVisibility); 1696 else 1697 appendToUsed(M, {JtAlias}); 1698 } 1699 1700 if (IsExported) { 1701 if (IsJumpTableCanonical) 1702 ExportSummary->cfiFunctionDefs().insert(std::string(F->getName())); 1703 else 1704 ExportSummary->cfiFunctionDecls().insert(std::string(F->getName())); 1705 } 1706 1707 if (!IsJumpTableCanonical) { 1708 if (F->hasExternalWeakLinkage()) 1709 replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, 1710 IsJumpTableCanonical); 1711 else 1712 replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); 1713 } else { 1714 assert(F->getType()->getAddressSpace() == 0); 1715 1716 GlobalAlias *FAlias = 1717 GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", 1718 CombinedGlobalElemPtr, &M); 1719 FAlias->setVisibility(F->getVisibility()); 1720 FAlias->takeName(F); 1721 if (FAlias->hasName()) 1722 F->setName(FAlias->getName() + ".cfi"); 1723 replaceCfiUses(F, FAlias, IsJumpTableCanonical); 1724 if (!F->hasLocalLinkage()) 1725 F->setVisibility(GlobalVariable::HiddenVisibility); 1726 } 1727 } 1728 } 1729 1730 createJumpTable(JumpTableFn, Functions); 1731 } 1732 1733 /// Assign a dummy layout using an incrementing counter, tag each function 1734 /// with its index represented as metadata, and lower each type test to an 1735 /// integer range comparison. During generation of the indirect function call 1736 /// table in the backend, it will assign the given indexes. 1737 /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet 1738 /// been finalized. 1739 void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( 1740 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1741 assert(!Functions.empty()); 1742 1743 // Build consecutive monotonic integer ranges for each call target set 1744 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 1745 1746 for (GlobalTypeMember *GTM : Functions) { 1747 Function *F = cast<Function>(GTM->getGlobal()); 1748 1749 // Skip functions that are not address taken, to avoid bloating the table 1750 if (!F->hasAddressTaken()) 1751 continue; 1752 1753 // Store metadata with the index for each function 1754 MDNode *MD = MDNode::get(F->getContext(), 1755 ArrayRef<Metadata *>(ConstantAsMetadata::get( 1756 ConstantInt::get(Int64Ty, IndirectIndex)))); 1757 F->setMetadata("wasm.index", MD); 1758 1759 // Assign the counter value 1760 GlobalLayout[GTM] = IndirectIndex++; 1761 } 1762 1763 // The indirect function table index space starts at zero, so pass a NULL 1764 // pointer as the subtracted "jump table" offset. 1765 lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy), 1766 GlobalLayout); 1767 } 1768 1769 void LowerTypeTestsModule::buildBitSetsFromDisjointSet( 1770 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals, 1771 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) { 1772 DenseMap<Metadata *, uint64_t> TypeIdIndices; 1773 for (unsigned I = 0; I != TypeIds.size(); ++I) 1774 TypeIdIndices[TypeIds[I]] = I; 1775 1776 // For each type identifier, build a set of indices that refer to members of 1777 // the type identifier. 1778 std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size()); 1779 unsigned GlobalIndex = 0; 1780 DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices; 1781 for (GlobalTypeMember *GTM : Globals) { 1782 for (MDNode *Type : GTM->types()) { 1783 // Type = { offset, type identifier } 1784 auto I = TypeIdIndices.find(Type->getOperand(1)); 1785 if (I != TypeIdIndices.end()) 1786 TypeMembers[I->second].insert(GlobalIndex); 1787 } 1788 GlobalIndices[GTM] = GlobalIndex; 1789 GlobalIndex++; 1790 } 1791 1792 for (ICallBranchFunnel *JT : ICallBranchFunnels) { 1793 TypeMembers.emplace_back(); 1794 std::set<uint64_t> &TMSet = TypeMembers.back(); 1795 for (GlobalTypeMember *T : JT->targets()) 1796 TMSet.insert(GlobalIndices[T]); 1797 } 1798 1799 // Order the sets of indices by size. The GlobalLayoutBuilder works best 1800 // when given small index sets first. 1801 llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1, 1802 const std::set<uint64_t> &O2) { 1803 return O1.size() < O2.size(); 1804 }); 1805 1806 // Create a GlobalLayoutBuilder and provide it with index sets as layout 1807 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as 1808 // close together as possible. 1809 GlobalLayoutBuilder GLB(Globals.size()); 1810 for (auto &&MemSet : TypeMembers) 1811 GLB.addFragment(MemSet); 1812 1813 // Build a vector of globals with the computed layout. 1814 bool IsGlobalSet = 1815 Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal()); 1816 std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size()); 1817 auto OGTMI = OrderedGTMs.begin(); 1818 for (auto &&F : GLB.Fragments) { 1819 for (auto &&Offset : F) { 1820 if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal())) 1821 report_fatal_error("Type identifier may not contain both global " 1822 "variables and functions"); 1823 *OGTMI++ = Globals[Offset]; 1824 } 1825 } 1826 1827 // Build the bitsets from this disjoint set. 1828 if (IsGlobalSet) 1829 buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs); 1830 else 1831 buildBitSetsFromFunctions(TypeIds, OrderedGTMs); 1832 } 1833 1834 /// Lower all type tests in this module. 1835 LowerTypeTestsModule::LowerTypeTestsModule( 1836 Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, 1837 const ModuleSummaryIndex *ImportSummary, DropTestKind DropTypeTests) 1838 : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), 1839 DropTypeTests(ClDropTypeTests > DropTypeTests ? ClDropTypeTests 1840 : DropTypeTests) { 1841 assert(!(ExportSummary && ImportSummary)); 1842 Triple TargetTriple(M.getTargetTriple()); 1843 Arch = TargetTriple.getArch(); 1844 if (Arch == Triple::arm) 1845 CanUseArmJumpTable = true; 1846 if (Arch == Triple::arm || Arch == Triple::thumb) { 1847 auto &FAM = 1848 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1849 for (Function &F : M) { 1850 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 1851 if (TTI.hasArmWideBranch(false)) 1852 CanUseArmJumpTable = true; 1853 if (TTI.hasArmWideBranch(true)) 1854 CanUseThumbBWJumpTable = true; 1855 } 1856 } 1857 OS = TargetTriple.getOS(); 1858 ObjectFormat = TargetTriple.getObjectFormat(); 1859 1860 // Function annotation describes or applies to function itself, and 1861 // shouldn't be associated with jump table thunk generated for CFI. 1862 GlobalAnnotation = M.getGlobalVariable("llvm.global.annotations"); 1863 if (GlobalAnnotation && GlobalAnnotation->hasInitializer()) { 1864 const ConstantArray *CA = 1865 cast<ConstantArray>(GlobalAnnotation->getInitializer()); 1866 for (Value *Op : CA->operands()) 1867 FunctionAnnotations.insert(Op); 1868 } 1869 } 1870 1871 bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { 1872 ModuleSummaryIndex Summary(/*HaveGVs=*/false); 1873 1874 // Handle the command-line summary arguments. This code is for testing 1875 // purposes only, so we handle errors directly. 1876 if (!ClReadSummary.empty()) { 1877 ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + 1878 ": "); 1879 auto ReadSummaryFile = ExitOnErr(errorOrToExpected( 1880 MemoryBuffer::getFile(ClReadSummary, /*IsText=*/true))); 1881 1882 yaml::Input In(ReadSummaryFile->getBuffer()); 1883 In >> Summary; 1884 ExitOnErr(errorCodeToError(In.error())); 1885 } 1886 1887 bool Changed = 1888 LowerTypeTestsModule( 1889 M, AM, 1890 ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, 1891 ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, 1892 /*DropTypeTests=*/DropTestKind::None) 1893 .lower(); 1894 1895 if (!ClWriteSummary.empty()) { 1896 ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + 1897 ": "); 1898 std::error_code EC; 1899 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF); 1900 ExitOnErr(errorCodeToError(EC)); 1901 1902 yaml::Output Out(OS); 1903 Out << Summary; 1904 } 1905 1906 return Changed; 1907 } 1908 1909 static bool isDirectCall(Use& U) { 1910 auto *Usr = dyn_cast<CallInst>(U.getUser()); 1911 if (Usr) { 1912 auto *CB = dyn_cast<CallBase>(Usr); 1913 if (CB && CB->isCallee(&U)) 1914 return true; 1915 } 1916 return false; 1917 } 1918 1919 void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, 1920 bool IsJumpTableCanonical) { 1921 SmallSetVector<Constant *, 4> Constants; 1922 for (Use &U : llvm::make_early_inc_range(Old->uses())) { 1923 // Skip block addresses and no_cfi values, which refer to the function 1924 // body instead of the jump table. 1925 if (isa<BlockAddress, NoCFIValue>(U.getUser())) 1926 continue; 1927 1928 // Skip direct calls to externally defined or non-dso_local functions. 1929 if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) 1930 continue; 1931 1932 // Skip function annotation. 1933 if (isFunctionAnnotation(U.getUser())) 1934 continue; 1935 1936 // Must handle Constants specially, we cannot call replaceUsesOfWith on a 1937 // constant because they are uniqued. 1938 if (auto *C = dyn_cast<Constant>(U.getUser())) { 1939 if (!isa<GlobalValue>(C)) { 1940 // Save unique users to avoid processing operand replacement 1941 // more than once. 1942 Constants.insert(C); 1943 continue; 1944 } 1945 } 1946 1947 U.set(New); 1948 } 1949 1950 // Process operand replacement of saved constants. 1951 for (auto *C : Constants) 1952 C->handleOperandChange(Old, New); 1953 } 1954 1955 void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { 1956 Old->replaceUsesWithIf(New, isDirectCall); 1957 } 1958 1959 static void dropTypeTests(Module &M, Function &TypeTestFunc, 1960 bool ShouldDropAll) { 1961 for (Use &U : llvm::make_early_inc_range(TypeTestFunc.uses())) { 1962 auto *CI = cast<CallInst>(U.getUser()); 1963 // Find and erase llvm.assume intrinsics for this llvm.type.test call. 1964 for (Use &CIU : llvm::make_early_inc_range(CI->uses())) 1965 if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser())) 1966 Assume->eraseFromParent(); 1967 // If the assume was merged with another assume, we might have a use on a 1968 // phi (which will feed the assume). Simply replace the use on the phi 1969 // with "true" and leave the merged assume. 1970 // 1971 // If ShouldDropAll is set, then we we need to update any remaining uses, 1972 // regardless of the instruction type. 1973 if (!CI->use_empty()) { 1974 assert(ShouldDropAll || all_of(CI->users(), [](User *U) -> bool { 1975 return isa<PHINode>(U); 1976 })); 1977 CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext())); 1978 } 1979 CI->eraseFromParent(); 1980 } 1981 } 1982 1983 bool LowerTypeTestsModule::lower() { 1984 Function *TypeTestFunc = 1985 Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); 1986 1987 if (DropTypeTests != DropTestKind::None) { 1988 bool ShouldDropAll = DropTypeTests == DropTestKind::All; 1989 if (TypeTestFunc) 1990 dropTypeTests(M, *TypeTestFunc, ShouldDropAll); 1991 // Normally we'd have already removed all @llvm.public.type.test calls, 1992 // except for in the case where we originally were performing ThinLTO but 1993 // decided not to in the backend. 1994 Function *PublicTypeTestFunc = 1995 Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); 1996 if (PublicTypeTestFunc) 1997 dropTypeTests(M, *PublicTypeTestFunc, ShouldDropAll); 1998 if (TypeTestFunc || PublicTypeTestFunc) { 1999 // We have deleted the type intrinsics, so we no longer have enough 2000 // information to reason about the liveness of virtual function pointers 2001 // in GlobalDCE. 2002 for (GlobalVariable &GV : M.globals()) 2003 GV.eraseMetadata(LLVMContext::MD_vcall_visibility); 2004 return true; 2005 } 2006 return false; 2007 } 2008 2009 // If only some of the modules were split, we cannot correctly perform 2010 // this transformation. We already checked for the presense of type tests 2011 // with partially split modules during the thin link, and would have emitted 2012 // an error if any were found, so here we can simply return. 2013 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) || 2014 (ImportSummary && ImportSummary->partiallySplitLTOUnits())) 2015 return false; 2016 2017 Function *ICallBranchFunnelFunc = 2018 Intrinsic::getDeclarationIfExists(&M, Intrinsic::icall_branch_funnel); 2019 if ((!TypeTestFunc || TypeTestFunc->use_empty()) && 2020 (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) && 2021 !ExportSummary && !ImportSummary) 2022 return false; 2023 2024 if (ImportSummary) { 2025 if (TypeTestFunc) 2026 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) 2027 importTypeTest(cast<CallInst>(U.getUser())); 2028 2029 if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) 2030 report_fatal_error( 2031 "unexpected call to llvm.icall.branch.funnel during import phase"); 2032 2033 SmallVector<Function *, 8> Defs; 2034 SmallVector<Function *, 8> Decls; 2035 for (auto &F : M) { 2036 // CFI functions are either external, or promoted. A local function may 2037 // have the same name, but it's not the one we are looking for. 2038 if (F.hasLocalLinkage()) 2039 continue; 2040 if (ImportSummary->cfiFunctionDefs().count(F.getName())) 2041 Defs.push_back(&F); 2042 else if (ImportSummary->cfiFunctionDecls().count(F.getName())) 2043 Decls.push_back(&F); 2044 } 2045 2046 std::vector<GlobalAlias *> AliasesToErase; 2047 { 2048 ScopedSaveAliaseesAndUsed S(M); 2049 for (auto *F : Defs) 2050 importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); 2051 for (auto *F : Decls) 2052 importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); 2053 } 2054 for (GlobalAlias *GA : AliasesToErase) 2055 GA->eraseFromParent(); 2056 2057 return true; 2058 } 2059 2060 // Equivalence class set containing type identifiers and the globals that 2061 // reference them. This is used to partition the set of type identifiers in 2062 // the module into disjoint sets. 2063 using GlobalClassesTy = EquivalenceClasses< 2064 PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>; 2065 GlobalClassesTy GlobalClasses; 2066 2067 // Verify the type metadata and build a few data structures to let us 2068 // efficiently enumerate the type identifiers associated with a global: 2069 // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector 2070 // of associated type metadata) and a mapping from type identifiers to their 2071 // list of GlobalTypeMembers and last observed index in the list of globals. 2072 // The indices will be used later to deterministically order the list of type 2073 // identifiers. 2074 BumpPtrAllocator Alloc; 2075 struct TIInfo { 2076 unsigned UniqueId; 2077 std::vector<GlobalTypeMember *> RefGlobals; 2078 }; 2079 DenseMap<Metadata *, TIInfo> TypeIdInfo; 2080 unsigned CurUniqueId = 0; 2081 SmallVector<MDNode *, 2> Types; 2082 2083 // Cross-DSO CFI emits jumptable entries for exported functions as well as 2084 // address taken functions in case they are address taken in other modules. 2085 const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr; 2086 2087 struct ExportedFunctionInfo { 2088 CfiFunctionLinkage Linkage; 2089 MDNode *FuncMD; // {name, linkage, type[, type...]} 2090 }; 2091 MapVector<StringRef, ExportedFunctionInfo> ExportedFunctions; 2092 if (ExportSummary) { 2093 NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); 2094 if (CfiFunctionsMD) { 2095 // A set of all functions that are address taken by a live global object. 2096 DenseSet<GlobalValue::GUID> AddressTaken; 2097 for (auto &I : *ExportSummary) 2098 for (auto &GVS : I.second.SummaryList) 2099 if (GVS->isLive()) 2100 for (const auto &Ref : GVS->refs()) { 2101 AddressTaken.insert(Ref.getGUID()); 2102 for (auto &RefGVS : Ref.getSummaryList()) 2103 if (auto Alias = dyn_cast<AliasSummary>(RefGVS.get())) 2104 AddressTaken.insert(Alias->getAliaseeGUID()); 2105 } 2106 for (auto *FuncMD : CfiFunctionsMD->operands()) { 2107 assert(FuncMD->getNumOperands() >= 2); 2108 StringRef FunctionName = 2109 cast<MDString>(FuncMD->getOperand(0))->getString(); 2110 CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>( 2111 cast<ConstantAsMetadata>(FuncMD->getOperand(1)) 2112 ->getValue() 2113 ->getUniqueInteger() 2114 .getZExtValue()); 2115 const GlobalValue::GUID GUID = GlobalValue::getGUID( 2116 GlobalValue::dropLLVMManglingEscape(FunctionName)); 2117 // Do not emit jumptable entries for functions that are not-live and 2118 // have no live references (and are not exported with cross-DSO CFI.) 2119 if (!ExportSummary->isGUIDLive(GUID)) 2120 continue; 2121 if (!AddressTaken.count(GUID)) { 2122 if (!CrossDsoCfi || Linkage != CFL_Definition) 2123 continue; 2124 2125 bool Exported = false; 2126 if (auto VI = ExportSummary->getValueInfo(GUID)) 2127 for (const auto &GVS : VI.getSummaryList()) 2128 if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage())) 2129 Exported = true; 2130 2131 if (!Exported) 2132 continue; 2133 } 2134 auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); 2135 if (!P.second && P.first->second.Linkage != CFL_Definition) 2136 P.first->second = {Linkage, FuncMD}; 2137 } 2138 2139 for (const auto &P : ExportedFunctions) { 2140 StringRef FunctionName = P.first; 2141 CfiFunctionLinkage Linkage = P.second.Linkage; 2142 MDNode *FuncMD = P.second.FuncMD; 2143 Function *F = M.getFunction(FunctionName); 2144 if (F && F->hasLocalLinkage()) { 2145 // Locally defined function that happens to have the same name as a 2146 // function defined in a ThinLTO module. Rename it to move it out of 2147 // the way of the external reference that we're about to create. 2148 // Note that setName will find a unique name for the function, so even 2149 // if there is an existing function with the suffix there won't be a 2150 // name collision. 2151 F->setName(F->getName() + ".1"); 2152 F = nullptr; 2153 } 2154 2155 if (!F) 2156 F = Function::Create( 2157 FunctionType::get(Type::getVoidTy(M.getContext()), false), 2158 GlobalVariable::ExternalLinkage, 2159 M.getDataLayout().getProgramAddressSpace(), FunctionName, &M); 2160 2161 // If the function is available_externally, remove its definition so 2162 // that it is handled the same way as a declaration. Later we will try 2163 // to create an alias using this function's linkage, which will fail if 2164 // the linkage is available_externally. This will also result in us 2165 // following the code path below to replace the type metadata. 2166 if (F->hasAvailableExternallyLinkage()) { 2167 F->setLinkage(GlobalValue::ExternalLinkage); 2168 F->deleteBody(); 2169 F->setComdat(nullptr); 2170 F->clearMetadata(); 2171 } 2172 2173 // Update the linkage for extern_weak declarations when a definition 2174 // exists. 2175 if (Linkage == CFL_Definition && F->hasExternalWeakLinkage()) 2176 F->setLinkage(GlobalValue::ExternalLinkage); 2177 2178 // If the function in the full LTO module is a declaration, replace its 2179 // type metadata with the type metadata we found in cfi.functions. That 2180 // metadata is presumed to be more accurate than the metadata attached 2181 // to the declaration. 2182 if (F->isDeclaration()) { 2183 if (Linkage == CFL_WeakDeclaration) 2184 F->setLinkage(GlobalValue::ExternalWeakLinkage); 2185 2186 F->eraseMetadata(LLVMContext::MD_type); 2187 for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) 2188 F->addMetadata(LLVMContext::MD_type, 2189 *cast<MDNode>(FuncMD->getOperand(I).get())); 2190 } 2191 } 2192 } 2193 } 2194 2195 DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers; 2196 for (GlobalObject &GO : M.global_objects()) { 2197 if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker()) 2198 continue; 2199 2200 Types.clear(); 2201 GO.getMetadata(LLVMContext::MD_type, Types); 2202 2203 bool IsJumpTableCanonical = false; 2204 bool IsExported = false; 2205 if (Function *F = dyn_cast<Function>(&GO)) { 2206 IsJumpTableCanonical = isJumpTableCanonical(F); 2207 if (ExportedFunctions.count(F->getName())) { 2208 IsJumpTableCanonical |= 2209 ExportedFunctions[F->getName()].Linkage == CFL_Definition; 2210 IsExported = true; 2211 // TODO: The logic here checks only that the function is address taken, 2212 // not that the address takers are live. This can be updated to check 2213 // their liveness and emit fewer jumptable entries once monolithic LTO 2214 // builds also emit summaries. 2215 } else if (!F->hasAddressTaken()) { 2216 if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) 2217 continue; 2218 } 2219 } 2220 2221 auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, 2222 IsExported, Types); 2223 GlobalTypeMembers[&GO] = GTM; 2224 for (MDNode *Type : Types) { 2225 verifyTypeMDNode(&GO, Type); 2226 auto &Info = TypeIdInfo[Type->getOperand(1)]; 2227 Info.UniqueId = ++CurUniqueId; 2228 Info.RefGlobals.push_back(GTM); 2229 } 2230 } 2231 2232 auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { 2233 // Add the call site to the list of call sites for this type identifier. We 2234 // also use TypeIdUsers to keep track of whether we have seen this type 2235 // identifier before. If we have, we don't need to re-add the referenced 2236 // globals to the equivalence class. 2237 auto Ins = TypeIdUsers.insert({TypeId, {}}); 2238 if (Ins.second) { 2239 // Add the type identifier to the equivalence class. 2240 GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId); 2241 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); 2242 2243 // Add the referenced globals to the type identifier's equivalence class. 2244 for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) 2245 CurSet = GlobalClasses.unionSets( 2246 CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); 2247 } 2248 2249 return Ins.first->second; 2250 }; 2251 2252 if (TypeTestFunc) { 2253 for (const Use &U : TypeTestFunc->uses()) { 2254 auto CI = cast<CallInst>(U.getUser()); 2255 // If this type test is only used by llvm.assume instructions, it 2256 // was used for whole program devirtualization, and is being kept 2257 // for use by other optimization passes. We do not need or want to 2258 // lower it here. We also don't want to rewrite any associated globals 2259 // unnecessarily. These will be removed by a subsequent LTT invocation 2260 // with the DropTypeTests flag set. 2261 bool OnlyAssumeUses = !CI->use_empty(); 2262 for (const Use &CIU : CI->uses()) { 2263 if (isa<AssumeInst>(CIU.getUser())) 2264 continue; 2265 OnlyAssumeUses = false; 2266 break; 2267 } 2268 if (OnlyAssumeUses) 2269 continue; 2270 2271 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 2272 if (!TypeIdMDVal) 2273 report_fatal_error("Second argument of llvm.type.test must be metadata"); 2274 auto TypeId = TypeIdMDVal->getMetadata(); 2275 AddTypeIdUse(TypeId).CallSites.push_back(CI); 2276 } 2277 } 2278 2279 if (ICallBranchFunnelFunc) { 2280 for (const Use &U : ICallBranchFunnelFunc->uses()) { 2281 if (Arch != Triple::x86_64) 2282 report_fatal_error( 2283 "llvm.icall.branch.funnel not supported on this target"); 2284 2285 auto CI = cast<CallInst>(U.getUser()); 2286 2287 std::vector<GlobalTypeMember *> Targets; 2288 if (CI->arg_size() % 2 != 1) 2289 report_fatal_error("number of arguments should be odd"); 2290 2291 GlobalClassesTy::member_iterator CurSet; 2292 for (unsigned I = 1; I != CI->arg_size(); I += 2) { 2293 int64_t Offset; 2294 auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( 2295 CI->getOperand(I), Offset, M.getDataLayout())); 2296 if (!Base) 2297 report_fatal_error( 2298 "Expected branch funnel operand to be global value"); 2299 2300 GlobalTypeMember *GTM = GlobalTypeMembers[Base]; 2301 Targets.push_back(GTM); 2302 GlobalClassesTy::member_iterator NewSet = 2303 GlobalClasses.findLeader(GlobalClasses.insert(GTM)); 2304 if (I == 1) 2305 CurSet = NewSet; 2306 else 2307 CurSet = GlobalClasses.unionSets(CurSet, NewSet); 2308 } 2309 2310 GlobalClasses.unionSets( 2311 CurSet, GlobalClasses.findLeader( 2312 GlobalClasses.insert(ICallBranchFunnel::create( 2313 Alloc, CI, Targets, ++CurUniqueId)))); 2314 } 2315 } 2316 2317 if (ExportSummary) { 2318 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID; 2319 for (auto &P : TypeIdInfo) { 2320 if (auto *TypeId = dyn_cast<MDString>(P.first)) 2321 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back( 2322 TypeId); 2323 } 2324 2325 for (auto &P : *ExportSummary) { 2326 for (auto &S : P.second.SummaryList) { 2327 if (!ExportSummary->isGlobalValueLive(S.get())) 2328 continue; 2329 if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject())) 2330 for (GlobalValue::GUID G : FS->type_tests()) 2331 for (Metadata *MD : MetadataByGUID[G]) 2332 AddTypeIdUse(MD).IsExported = true; 2333 } 2334 } 2335 } 2336 2337 if (GlobalClasses.empty()) 2338 return false; 2339 2340 // Build a list of disjoint sets ordered by their maximum global index for 2341 // determinism. 2342 std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; 2343 for (GlobalClassesTy::iterator I = GlobalClasses.begin(), 2344 E = GlobalClasses.end(); 2345 I != E; ++I) { 2346 if (!I->isLeader()) 2347 continue; 2348 ++NumTypeIdDisjointSets; 2349 2350 unsigned MaxUniqueId = 0; 2351 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); 2352 MI != GlobalClasses.member_end(); ++MI) { 2353 if (auto *MD = dyn_cast_if_present<Metadata *>(*MI)) 2354 MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId); 2355 else if (auto *BF = dyn_cast_if_present<ICallBranchFunnel *>(*MI)) 2356 MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId); 2357 } 2358 Sets.emplace_back(I, MaxUniqueId); 2359 } 2360 llvm::sort(Sets, llvm::less_second()); 2361 2362 // For each disjoint set we found... 2363 for (const auto &S : Sets) { 2364 // Build the list of type identifiers in this disjoint set. 2365 std::vector<Metadata *> TypeIds; 2366 std::vector<GlobalTypeMember *> Globals; 2367 std::vector<ICallBranchFunnel *> ICallBranchFunnels; 2368 for (GlobalClassesTy::member_iterator MI = 2369 GlobalClasses.member_begin(S.first); 2370 MI != GlobalClasses.member_end(); ++MI) { 2371 if (isa<Metadata *>(*MI)) 2372 TypeIds.push_back(cast<Metadata *>(*MI)); 2373 else if (isa<GlobalTypeMember *>(*MI)) 2374 Globals.push_back(cast<GlobalTypeMember *>(*MI)); 2375 else 2376 ICallBranchFunnels.push_back(cast<ICallBranchFunnel *>(*MI)); 2377 } 2378 2379 // Order type identifiers by unique ID for determinism. This ordering is 2380 // stable as there is a one-to-one mapping between metadata and unique IDs. 2381 llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) { 2382 return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; 2383 }); 2384 2385 // Same for the branch funnels. 2386 llvm::sort(ICallBranchFunnels, 2387 [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) { 2388 return F1->UniqueId < F2->UniqueId; 2389 }); 2390 2391 // Build bitsets for this disjoint set. 2392 buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels); 2393 } 2394 2395 allocateByteArrays(); 2396 2397 // Parse alias data to replace stand-in function declarations for aliases 2398 // with an alias to the intended target. 2399 if (ExportSummary) { 2400 if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) { 2401 for (auto *AliasMD : AliasesMD->operands()) { 2402 assert(AliasMD->getNumOperands() >= 4); 2403 StringRef AliasName = 2404 cast<MDString>(AliasMD->getOperand(0))->getString(); 2405 StringRef Aliasee = cast<MDString>(AliasMD->getOperand(1))->getString(); 2406 2407 if (!ExportedFunctions.count(Aliasee) || 2408 ExportedFunctions[Aliasee].Linkage != CFL_Definition || 2409 !M.getNamedAlias(Aliasee)) 2410 continue; 2411 2412 GlobalValue::VisibilityTypes Visibility = 2413 static_cast<GlobalValue::VisibilityTypes>( 2414 cast<ConstantAsMetadata>(AliasMD->getOperand(2)) 2415 ->getValue() 2416 ->getUniqueInteger() 2417 .getZExtValue()); 2418 bool Weak = 2419 static_cast<bool>(cast<ConstantAsMetadata>(AliasMD->getOperand(3)) 2420 ->getValue() 2421 ->getUniqueInteger() 2422 .getZExtValue()); 2423 2424 auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee)); 2425 Alias->setVisibility(Visibility); 2426 if (Weak) 2427 Alias->setLinkage(GlobalValue::WeakAnyLinkage); 2428 2429 if (auto *F = M.getFunction(AliasName)) { 2430 Alias->takeName(F); 2431 F->replaceAllUsesWith(Alias); 2432 F->eraseFromParent(); 2433 } else { 2434 Alias->setName(AliasName); 2435 } 2436 } 2437 } 2438 } 2439 2440 // Emit .symver directives for exported functions, if they exist. 2441 if (ExportSummary) { 2442 if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) { 2443 for (auto *Symver : SymversMD->operands()) { 2444 assert(Symver->getNumOperands() >= 2); 2445 StringRef SymbolName = 2446 cast<MDString>(Symver->getOperand(0))->getString(); 2447 StringRef Alias = cast<MDString>(Symver->getOperand(1))->getString(); 2448 2449 if (!ExportedFunctions.count(SymbolName)) 2450 continue; 2451 2452 M.appendModuleInlineAsm( 2453 (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str()); 2454 } 2455 } 2456 } 2457 2458 return true; 2459 } 2460 2461 PreservedAnalyses LowerTypeTestsPass::run(Module &M, 2462 ModuleAnalysisManager &AM) { 2463 bool Changed; 2464 if (UseCommandLine) 2465 Changed = LowerTypeTestsModule::runForTesting(M, AM); 2466 else 2467 Changed = 2468 LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests) 2469 .lower(); 2470 if (!Changed) 2471 return PreservedAnalyses::all(); 2472 return PreservedAnalyses::none(); 2473 } 2474