1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/IR/DerivedTypes.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/IRBuilder.h" 18 #include "llvm/IR/MDBuilder.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Support/raw_ostream.h" 21 #include "llvm/Support/xxhash.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "moduleutils" 25 26 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 27 int Priority, Constant *Data) { 28 IRBuilder<> IRB(M.getContext()); 29 FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); 30 31 // Get the current set of static global constructors and add the new ctor 32 // to the list. 33 SmallVector<Constant *, 16> CurrentCtors; 34 StructType *EltTy; 35 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 36 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 37 if (Constant *Init = GVCtor->getInitializer()) { 38 unsigned n = Init->getNumOperands(); 39 CurrentCtors.reserve(n + 1); 40 for (unsigned i = 0; i != n; ++i) 41 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 42 } 43 GVCtor->eraseFromParent(); 44 } else { 45 EltTy = StructType::get( 46 IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), 47 IRB.getInt8PtrTy()); 48 } 49 50 // Build a 3 field global_ctor entry. We don't take a comdat key. 51 Constant *CSVals[3]; 52 CSVals[0] = IRB.getInt32(Priority); 53 CSVals[1] = F; 54 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) 55 : Constant::getNullValue(IRB.getInt8PtrTy()); 56 Constant *RuntimeCtorInit = 57 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 58 59 CurrentCtors.push_back(RuntimeCtorInit); 60 61 // Create a new initializer. 62 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 63 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 64 65 // Create the new global variable and replace all uses of 66 // the old global variable with the new one. 67 (void)new GlobalVariable(M, NewInit->getType(), false, 68 GlobalValue::AppendingLinkage, NewInit, ArrayName); 69 } 70 71 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 72 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 73 } 74 75 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 76 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 77 } 78 79 static void collectUsedGlobals(GlobalVariable *GV, 80 SmallSetVector<Constant *, 16> &Init) { 81 if (!GV || !GV->hasInitializer()) 82 return; 83 84 auto *CA = cast<ConstantArray>(GV->getInitializer()); 85 for (Use &Op : CA->operands()) 86 Init.insert(cast<Constant>(Op)); 87 } 88 89 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 90 GlobalVariable *GV = M.getGlobalVariable(Name); 91 92 SmallSetVector<Constant *, 16> Init; 93 collectUsedGlobals(GV, Init); 94 if (GV) 95 GV->eraseFromParent(); 96 97 Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext()); 98 for (auto *V : Values) 99 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 100 101 if (Init.empty()) 102 return; 103 104 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 105 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 106 ConstantArray::get(ATy, Init.getArrayRef()), 107 Name); 108 GV->setSection("llvm.metadata"); 109 } 110 111 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 112 appendToUsedList(M, "llvm.used", Values); 113 } 114 115 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 116 appendToUsedList(M, "llvm.compiler.used", Values); 117 } 118 119 static void removeFromUsedList(Module &M, StringRef Name, 120 function_ref<bool(Constant *)> ShouldRemove) { 121 GlobalVariable *GV = M.getNamedGlobal(Name); 122 if (!GV) 123 return; 124 125 SmallSetVector<Constant *, 16> Init; 126 collectUsedGlobals(GV, Init); 127 128 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 129 130 SmallVector<Constant *, 16> NewInit; 131 for (Constant *MaybeRemoved : Init) { 132 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 133 NewInit.push_back(MaybeRemoved); 134 } 135 136 if (!NewInit.empty()) { 137 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 138 GlobalVariable *NewGV = 139 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 140 ConstantArray::get(ATy, NewInit), "", GV, 141 GV->getThreadLocalMode(), GV->getAddressSpace()); 142 NewGV->setSection(GV->getSection()); 143 NewGV->takeName(GV); 144 } 145 146 GV->eraseFromParent(); 147 } 148 149 void llvm::removeFromUsedLists(Module &M, 150 function_ref<bool(Constant *)> ShouldRemove) { 151 removeFromUsedList(M, "llvm.used", ShouldRemove); 152 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 153 } 154 155 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 156 if (!M.getModuleFlag("kcfi")) 157 return; 158 // Matches CodeGenModule::CreateKCFITypeId in Clang. 159 LLVMContext &Ctx = M.getContext(); 160 MDBuilder MDB(Ctx); 161 F.setMetadata( 162 LLVMContext::MD_kcfi_type, 163 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 164 Type::getInt32Ty(Ctx), 165 static_cast<uint32_t>(xxHash64(MangledType)))))); 166 // If the module was compiled with -fpatchable-function-entry, ensure 167 // we use the same patchable-function-prefix. 168 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 169 M.getModuleFlag("kcfi-offset"))) { 170 if (unsigned Offset = MD->getZExtValue()) 171 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 172 } 173 } 174 175 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 176 ArrayRef<Type *> InitArgTypes, 177 bool Weak) { 178 assert(!InitName.empty() && "Expected init function name"); 179 auto *VoidTy = Type::getVoidTy(M.getContext()); 180 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 181 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 182 auto *Fn = cast<Function>(FnCallee.getCallee()); 183 if (Weak && Fn->isDeclaration()) 184 Fn->setLinkage(Function::ExternalWeakLinkage); 185 return FnCallee; 186 } 187 188 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 189 Function *Ctor = Function::createWithDefaultAttr( 190 FunctionType::get(Type::getVoidTy(M.getContext()), false), 191 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 192 CtorName, &M); 193 Ctor->addFnAttr(Attribute::NoUnwind); 194 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 195 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 196 ReturnInst::Create(M.getContext(), CtorBB); 197 // Ensure Ctor cannot be discarded, even if in a comdat. 198 appendToUsed(M, {Ctor}); 199 return Ctor; 200 } 201 202 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 203 Module &M, StringRef CtorName, StringRef InitName, 204 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 205 StringRef VersionCheckName, bool Weak) { 206 assert(!InitName.empty() && "Expected init function name"); 207 assert(InitArgs.size() == InitArgTypes.size() && 208 "Sanitizer's init function expects different number of arguments"); 209 FunctionCallee InitFunction = 210 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 211 Function *Ctor = createSanitizerCtor(M, CtorName); 212 IRBuilder<> IRB(M.getContext()); 213 214 BasicBlock *RetBB = &Ctor->getEntryBlock(); 215 if (Weak) { 216 RetBB->setName("ret"); 217 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 218 auto *CallInitBB = 219 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 220 auto *InitFn = cast<Function>(InitFunction.getCallee()); 221 auto *InitFnPtr = 222 PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); 223 IRB.SetInsertPoint(EntryBB); 224 Value *InitNotNull = 225 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 226 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 227 IRB.SetInsertPoint(CallInitBB); 228 } else { 229 IRB.SetInsertPoint(RetBB->getTerminator()); 230 } 231 232 IRB.CreateCall(InitFunction, InitArgs); 233 if (!VersionCheckName.empty()) { 234 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 235 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 236 AttributeList()); 237 IRB.CreateCall(VersionCheckFunction, {}); 238 } 239 240 if (Weak) 241 IRB.CreateBr(RetBB); 242 243 return std::make_pair(Ctor, InitFunction); 244 } 245 246 std::pair<Function *, FunctionCallee> 247 llvm::getOrCreateSanitizerCtorAndInitFunctions( 248 Module &M, StringRef CtorName, StringRef InitName, 249 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 250 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 251 StringRef VersionCheckName, bool Weak) { 252 assert(!CtorName.empty() && "Expected ctor function name"); 253 254 if (Function *Ctor = M.getFunction(CtorName)) 255 // FIXME: Sink this logic into the module, similar to the handling of 256 // globals. This will make moving to a concurrent model much easier. 257 if (Ctor->arg_empty() || 258 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 259 return {Ctor, 260 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 261 262 Function *Ctor; 263 FunctionCallee InitFunction; 264 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 265 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 266 FunctionsCreatedCallback(Ctor, InitFunction); 267 return std::make_pair(Ctor, InitFunction); 268 } 269 270 void llvm::filterDeadComdatFunctions( 271 SmallVectorImpl<Function *> &DeadComdatFunctions) { 272 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 273 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 274 for (Function *F : DeadComdatFunctions) { 275 MaybeDeadFunctions.insert(F); 276 if (Comdat *C = F->getComdat()) 277 MaybeDeadComdats.insert(C); 278 } 279 280 // Find comdats for which all users are dead now. 281 SmallPtrSet<Comdat *, 32> DeadComdats; 282 for (Comdat *C : MaybeDeadComdats) { 283 auto IsUserDead = [&](GlobalObject *GO) { 284 auto *F = dyn_cast<Function>(GO); 285 return F && MaybeDeadFunctions.contains(F); 286 }; 287 if (all_of(C->getUsers(), IsUserDead)) 288 DeadComdats.insert(C); 289 } 290 291 // Only keep functions which have no comdat or a dead comdat. 292 erase_if(DeadComdatFunctions, [&](Function *F) { 293 Comdat *C = F->getComdat(); 294 return C && !DeadComdats.contains(C); 295 }); 296 } 297 298 std::string llvm::getUniqueModuleId(Module *M) { 299 MD5 Md5; 300 bool ExportsSymbols = false; 301 auto AddGlobal = [&](GlobalValue &GV) { 302 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 303 !GV.hasExternalLinkage() || GV.hasComdat()) 304 return; 305 ExportsSymbols = true; 306 Md5.update(GV.getName()); 307 Md5.update(ArrayRef<uint8_t>{0}); 308 }; 309 310 for (auto &F : *M) 311 AddGlobal(F); 312 for (auto &GV : M->globals()) 313 AddGlobal(GV); 314 for (auto &GA : M->aliases()) 315 AddGlobal(GA); 316 for (auto &IF : M->ifuncs()) 317 AddGlobal(IF); 318 319 if (!ExportsSymbols) 320 return ""; 321 322 MD5::MD5Result R; 323 Md5.final(R); 324 325 SmallString<32> Str; 326 MD5::stringifyResult(R, Str); 327 return ("." + Str).str(); 328 } 329 330 void VFABI::setVectorVariantNames(CallInst *CI, 331 ArrayRef<std::string> VariantMappings) { 332 if (VariantMappings.empty()) 333 return; 334 335 SmallString<256> Buffer; 336 llvm::raw_svector_ostream Out(Buffer); 337 for (const std::string &VariantMapping : VariantMappings) 338 Out << VariantMapping << ","; 339 // Get rid of the trailing ','. 340 assert(!Buffer.str().empty() && "Must have at least one char."); 341 Buffer.pop_back(); 342 343 Module *M = CI->getModule(); 344 #ifndef NDEBUG 345 for (const std::string &VariantMapping : VariantMappings) { 346 LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); 347 std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M); 348 assert(VI && "Cannot add an invalid VFABI name."); 349 assert(M->getNamedValue(VI->VectorName) && 350 "Cannot add variant to attribute: " 351 "vector function declaration is missing."); 352 } 353 #endif 354 CI->addFnAttr( 355 Attribute::get(M->getContext(), MappingsAttrName, Buffer.str())); 356 } 357 358 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 359 StringRef SectionName, Align Alignment) { 360 // Embed the memory buffer into the module. 361 Constant *ModuleConstant = ConstantDataArray::get( 362 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 363 GlobalVariable *GV = new GlobalVariable( 364 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 365 ModuleConstant, "llvm.embedded.object"); 366 GV->setSection(SectionName); 367 GV->setAlignment(Alignment); 368 369 LLVMContext &Ctx = M.getContext(); 370 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 371 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 372 MDString::get(Ctx, SectionName)}; 373 374 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 375 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 376 377 appendToCompilerUsed(M, GV); 378 } 379 380 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 381 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 382 SmallVector<GlobalIFunc *, 32> AllIFuncs; 383 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 384 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 385 for (GlobalIFunc &GI : M.ifuncs()) 386 AllIFuncs.push_back(&GI); 387 IFuncsToLower = AllIFuncs; 388 } 389 390 bool UnhandledUsers = false; 391 LLVMContext &Ctx = M.getContext(); 392 const DataLayout &DL = M.getDataLayout(); 393 394 PointerType *TableEntryTy = 395 Ctx.supportsTypedPointers() 396 ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace()) 397 : PointerType::get(Ctx, DL.getProgramAddressSpace()); 398 399 ArrayType *FuncPtrTableTy = 400 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 401 402 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 403 404 // Create a global table of function pointers we'll initialize in a global 405 // constructor. 406 auto *FuncPtrTable = new GlobalVariable( 407 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 408 PoisonValue::get(FuncPtrTableTy), "", nullptr, 409 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 410 FuncPtrTable->setAlignment(PtrAlign); 411 412 // Create a function to initialize the function pointer table. 413 Function *NewCtor = Function::Create( 414 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 415 DL.getProgramAddressSpace(), "", &M); 416 417 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 418 IRBuilder<> InitBuilder(BB); 419 420 size_t TableIndex = 0; 421 for (GlobalIFunc *GI : IFuncsToLower) { 422 Function *ResolvedFunction = GI->getResolverFunction(); 423 424 // We don't know what to pass to a resolver function taking arguments 425 // 426 // FIXME: Is this even valid? clang and gcc don't complain but this 427 // probably should be invalid IR. We could just pass through undef. 428 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 429 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 430 << ResolvedFunction->getName() << " with parameters\n"); 431 UnhandledUsers = true; 432 continue; 433 } 434 435 // Initialize the function pointer table. 436 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 437 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 438 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 439 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 440 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 441 442 // Update all users to load a pointer from the global table. 443 for (User *User : make_early_inc_range(GI->users())) { 444 Instruction *UserInst = dyn_cast<Instruction>(User); 445 if (!UserInst) { 446 // TODO: Should handle constantexpr casts in user instructions. Probably 447 // can't do much about constant initializers. 448 UnhandledUsers = true; 449 continue; 450 } 451 452 IRBuilder<> UseBuilder(UserInst); 453 LoadInst *ResolvedTarget = 454 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 455 Value *ResolvedCast = 456 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 457 UserInst->replaceUsesOfWith(GI, ResolvedCast); 458 } 459 460 // If we handled all users, erase the ifunc. 461 if (GI->use_empty()) 462 GI->eraseFromParent(); 463 } 464 465 InitBuilder.CreateRetVoid(); 466 467 PointerType *ConstantDataTy = Ctx.supportsTypedPointers() 468 ? PointerType::get(Type::getInt8Ty(Ctx), 0) 469 : PointerType::get(Ctx, 0); 470 471 // TODO: Is this the right priority? Probably should be before any other 472 // constructors? 473 const int Priority = 10; 474 appendToGlobalCtors(M, NewCtor, Priority, 475 ConstantPointerNull::get(ConstantDataTy)); 476 return UnhandledUsers; 477 } 478