1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This family of functions perform manipulations on Modules. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Transforms/Utils/ModuleUtils.h" 14 #include "llvm/Analysis/VectorUtils.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/IR/DerivedTypes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/MDBuilder.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/MD5.h" 22 #include "llvm/Support/raw_ostream.h" 23 #include "llvm/Support/xxhash.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "moduleutils" 28 29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, 30 int Priority, Constant *Data) { 31 IRBuilder<> IRB(M.getContext()); 32 33 // Get the current set of static global constructors and add the new ctor 34 // to the list. 35 SmallVector<Constant *, 16> CurrentCtors; 36 StructType *EltTy; 37 if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { 38 EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 39 if (Constant *Init = GVCtor->getInitializer()) { 40 unsigned n = Init->getNumOperands(); 41 CurrentCtors.reserve(n + 1); 42 for (unsigned i = 0; i != n; ++i) 43 CurrentCtors.push_back(cast<Constant>(Init->getOperand(i))); 44 } 45 GVCtor->eraseFromParent(); 46 } else { 47 EltTy = StructType::get( 48 IRB.getInt32Ty(), 49 PointerType::get(M.getContext(), F->getAddressSpace()), IRB.getPtrTy()); 50 } 51 52 // Build a 3 field global_ctor entry. We don't take a comdat key. 53 Constant *CSVals[3]; 54 CSVals[0] = IRB.getInt32(Priority); 55 CSVals[1] = F; 56 CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) 57 : Constant::getNullValue(IRB.getPtrTy()); 58 Constant *RuntimeCtorInit = 59 ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); 60 61 CurrentCtors.push_back(RuntimeCtorInit); 62 63 // Create a new initializer. 64 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 65 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 66 67 // Create the new global variable and replace all uses of 68 // the old global variable with the new one. 69 (void)new GlobalVariable(M, NewInit->getType(), false, 70 GlobalValue::AppendingLinkage, NewInit, ArrayName); 71 } 72 73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { 74 appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); 75 } 76 77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { 78 appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); 79 } 80 81 static void transformGlobalArray(StringRef ArrayName, Module &M, 82 const GlobalCtorTransformFn &Fn) { 83 GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName); 84 if (!GVCtor) 85 return; 86 87 IRBuilder<> IRB(M.getContext()); 88 SmallVector<Constant *, 16> CurrentCtors; 89 bool Changed = false; 90 StructType *EltTy = 91 cast<StructType>(GVCtor->getValueType()->getArrayElementType()); 92 if (Constant *Init = GVCtor->getInitializer()) { 93 CurrentCtors.reserve(Init->getNumOperands()); 94 for (Value *OP : Init->operands()) { 95 Constant *C = cast<Constant>(OP); 96 Constant *NewC = Fn(C); 97 Changed |= (!NewC || NewC != C); 98 if (NewC) 99 CurrentCtors.push_back(NewC); 100 } 101 } 102 if (!Changed) 103 return; 104 105 GVCtor->eraseFromParent(); 106 107 // Create a new initializer. 108 ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); 109 Constant *NewInit = ConstantArray::get(AT, CurrentCtors); 110 111 // Create the new global variable and replace all uses of 112 // the old global variable with the new one. 113 (void)new GlobalVariable(M, NewInit->getType(), false, 114 GlobalValue::AppendingLinkage, NewInit, ArrayName); 115 } 116 117 void llvm::transformGlobalCtors(Module &M, const GlobalCtorTransformFn &Fn) { 118 transformGlobalArray("llvm.global_ctors", M, Fn); 119 } 120 121 void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) { 122 transformGlobalArray("llvm.global_dtors", M, Fn); 123 } 124 125 static void collectUsedGlobals(GlobalVariable *GV, 126 SmallSetVector<Constant *, 16> &Init) { 127 if (!GV || !GV->hasInitializer()) 128 return; 129 130 auto *CA = cast<ConstantArray>(GV->getInitializer()); 131 for (Use &Op : CA->operands()) 132 Init.insert(cast<Constant>(Op)); 133 } 134 135 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) { 136 GlobalVariable *GV = M.getGlobalVariable(Name); 137 138 SmallSetVector<Constant *, 16> Init; 139 collectUsedGlobals(GV, Init); 140 if (GV) 141 GV->eraseFromParent(); 142 143 Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); 144 for (auto *V : Values) 145 Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); 146 147 if (Init.empty()) 148 return; 149 150 ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); 151 GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 152 ConstantArray::get(ATy, Init.getArrayRef()), 153 Name); 154 GV->setSection("llvm.metadata"); 155 } 156 157 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) { 158 appendToUsedList(M, "llvm.used", Values); 159 } 160 161 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) { 162 appendToUsedList(M, "llvm.compiler.used", Values); 163 } 164 165 static void removeFromUsedList(Module &M, StringRef Name, 166 function_ref<bool(Constant *)> ShouldRemove) { 167 GlobalVariable *GV = M.getNamedGlobal(Name); 168 if (!GV) 169 return; 170 171 SmallSetVector<Constant *, 16> Init; 172 collectUsedGlobals(GV, Init); 173 174 Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType(); 175 176 SmallVector<Constant *, 16> NewInit; 177 for (Constant *MaybeRemoved : Init) { 178 if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) 179 NewInit.push_back(MaybeRemoved); 180 } 181 182 if (!NewInit.empty()) { 183 ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); 184 GlobalVariable *NewGV = 185 new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, 186 ConstantArray::get(ATy, NewInit), "", GV, 187 GV->getThreadLocalMode(), GV->getAddressSpace()); 188 NewGV->setSection(GV->getSection()); 189 NewGV->takeName(GV); 190 } 191 192 GV->eraseFromParent(); 193 } 194 195 void llvm::removeFromUsedLists(Module &M, 196 function_ref<bool(Constant *)> ShouldRemove) { 197 removeFromUsedList(M, "llvm.used", ShouldRemove); 198 removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); 199 } 200 201 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { 202 if (!M.getModuleFlag("kcfi")) 203 return; 204 // Matches CodeGenModule::CreateKCFITypeId in Clang. 205 LLVMContext &Ctx = M.getContext(); 206 MDBuilder MDB(Ctx); 207 std::string Type = MangledType.str(); 208 if (M.getModuleFlag("cfi-normalize-integers")) 209 Type += ".normalized"; 210 F.setMetadata(LLVMContext::MD_kcfi_type, 211 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( 212 Type::getInt32Ty(Ctx), 213 static_cast<uint32_t>(xxHash64(Type)))))); 214 // If the module was compiled with -fpatchable-function-entry, ensure 215 // we use the same patchable-function-prefix. 216 if (auto *MD = mdconst::extract_or_null<ConstantInt>( 217 M.getModuleFlag("kcfi-offset"))) { 218 if (unsigned Offset = MD->getZExtValue()) 219 F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); 220 } 221 } 222 223 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, 224 ArrayRef<Type *> InitArgTypes, 225 bool Weak) { 226 assert(!InitName.empty() && "Expected init function name"); 227 auto *VoidTy = Type::getVoidTy(M.getContext()); 228 auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); 229 auto FnCallee = M.getOrInsertFunction(InitName, FnTy); 230 auto *Fn = cast<Function>(FnCallee.getCallee()); 231 if (Weak && Fn->isDeclaration()) 232 Fn->setLinkage(Function::ExternalWeakLinkage); 233 return FnCallee; 234 } 235 236 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { 237 Function *Ctor = Function::createWithDefaultAttr( 238 FunctionType::get(Type::getVoidTy(M.getContext()), false), 239 GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), 240 CtorName, &M); 241 Ctor->addFnAttr(Attribute::NoUnwind); 242 setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) 243 BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); 244 ReturnInst::Create(M.getContext(), CtorBB); 245 // Ensure Ctor cannot be discarded, even if in a comdat. 246 appendToUsed(M, {Ctor}); 247 return Ctor; 248 } 249 250 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions( 251 Module &M, StringRef CtorName, StringRef InitName, 252 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 253 StringRef VersionCheckName, bool Weak) { 254 assert(!InitName.empty() && "Expected init function name"); 255 assert(InitArgs.size() == InitArgTypes.size() && 256 "Sanitizer's init function expects different number of arguments"); 257 FunctionCallee InitFunction = 258 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); 259 Function *Ctor = createSanitizerCtor(M, CtorName); 260 IRBuilder<> IRB(M.getContext()); 261 262 BasicBlock *RetBB = &Ctor->getEntryBlock(); 263 if (Weak) { 264 RetBB->setName("ret"); 265 auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); 266 auto *CallInitBB = 267 BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); 268 auto *InitFn = cast<Function>(InitFunction.getCallee()); 269 auto *InitFnPtr = 270 PointerType::get(M.getContext(), InitFn->getAddressSpace()); 271 IRB.SetInsertPoint(EntryBB); 272 Value *InitNotNull = 273 IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); 274 IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); 275 IRB.SetInsertPoint(CallInitBB); 276 } else { 277 IRB.SetInsertPoint(RetBB->getTerminator()); 278 } 279 280 IRB.CreateCall(InitFunction, InitArgs); 281 if (!VersionCheckName.empty()) { 282 FunctionCallee VersionCheckFunction = M.getOrInsertFunction( 283 VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), 284 AttributeList()); 285 IRB.CreateCall(VersionCheckFunction, {}); 286 } 287 288 if (Weak) 289 IRB.CreateBr(RetBB); 290 291 return std::make_pair(Ctor, InitFunction); 292 } 293 294 std::pair<Function *, FunctionCallee> 295 llvm::getOrCreateSanitizerCtorAndInitFunctions( 296 Module &M, StringRef CtorName, StringRef InitName, 297 ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs, 298 function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback, 299 StringRef VersionCheckName, bool Weak) { 300 assert(!CtorName.empty() && "Expected ctor function name"); 301 302 if (Function *Ctor = M.getFunction(CtorName)) 303 // FIXME: Sink this logic into the module, similar to the handling of 304 // globals. This will make moving to a concurrent model much easier. 305 if (Ctor->arg_empty() || 306 Ctor->getReturnType() == Type::getVoidTy(M.getContext())) 307 return {Ctor, 308 declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; 309 310 Function *Ctor; 311 FunctionCallee InitFunction; 312 std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( 313 M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); 314 FunctionsCreatedCallback(Ctor, InitFunction); 315 return std::make_pair(Ctor, InitFunction); 316 } 317 318 void llvm::filterDeadComdatFunctions( 319 SmallVectorImpl<Function *> &DeadComdatFunctions) { 320 SmallPtrSet<Function *, 32> MaybeDeadFunctions; 321 SmallPtrSet<Comdat *, 32> MaybeDeadComdats; 322 for (Function *F : DeadComdatFunctions) { 323 MaybeDeadFunctions.insert(F); 324 if (Comdat *C = F->getComdat()) 325 MaybeDeadComdats.insert(C); 326 } 327 328 // Find comdats for which all users are dead now. 329 SmallPtrSet<Comdat *, 32> DeadComdats; 330 for (Comdat *C : MaybeDeadComdats) { 331 auto IsUserDead = [&](GlobalObject *GO) { 332 auto *F = dyn_cast<Function>(GO); 333 return F && MaybeDeadFunctions.contains(F); 334 }; 335 if (all_of(C->getUsers(), IsUserDead)) 336 DeadComdats.insert(C); 337 } 338 339 // Only keep functions which have no comdat or a dead comdat. 340 erase_if(DeadComdatFunctions, [&](Function *F) { 341 Comdat *C = F->getComdat(); 342 return C && !DeadComdats.contains(C); 343 }); 344 } 345 346 std::string llvm::getUniqueModuleId(Module *M) { 347 MD5 Md5; 348 bool ExportsSymbols = false; 349 auto AddGlobal = [&](GlobalValue &GV) { 350 if (GV.isDeclaration() || GV.getName().starts_with("llvm.") || 351 !GV.hasExternalLinkage() || GV.hasComdat()) 352 return; 353 ExportsSymbols = true; 354 Md5.update(GV.getName()); 355 Md5.update(ArrayRef<uint8_t>{0}); 356 }; 357 358 for (auto &F : *M) 359 AddGlobal(F); 360 for (auto &GV : M->globals()) 361 AddGlobal(GV); 362 for (auto &GA : M->aliases()) 363 AddGlobal(GA); 364 for (auto &IF : M->ifuncs()) 365 AddGlobal(IF); 366 367 if (!ExportsSymbols) 368 return ""; 369 370 MD5::MD5Result R; 371 Md5.final(R); 372 373 SmallString<32> Str; 374 MD5::stringifyResult(R, Str); 375 return ("." + Str).str(); 376 } 377 378 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, 379 StringRef SectionName, Align Alignment) { 380 // Embed the memory buffer into the module. 381 Constant *ModuleConstant = ConstantDataArray::get( 382 M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); 383 GlobalVariable *GV = new GlobalVariable( 384 M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, 385 ModuleConstant, "llvm.embedded.object"); 386 GV->setSection(SectionName); 387 GV->setAlignment(Alignment); 388 389 LLVMContext &Ctx = M.getContext(); 390 NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); 391 Metadata *MDVals[] = {ConstantAsMetadata::get(GV), 392 MDString::get(Ctx, SectionName)}; 393 394 MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); 395 GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); 396 397 appendToCompilerUsed(M, GV); 398 } 399 400 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( 401 Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) { 402 SmallVector<GlobalIFunc *, 32> AllIFuncs; 403 ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower; 404 if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs 405 for (GlobalIFunc &GI : M.ifuncs()) 406 AllIFuncs.push_back(&GI); 407 IFuncsToLower = AllIFuncs; 408 } 409 410 bool UnhandledUsers = false; 411 LLVMContext &Ctx = M.getContext(); 412 const DataLayout &DL = M.getDataLayout(); 413 414 PointerType *TableEntryTy = 415 PointerType::get(Ctx, DL.getProgramAddressSpace()); 416 417 ArrayType *FuncPtrTableTy = 418 ArrayType::get(TableEntryTy, IFuncsToLower.size()); 419 420 Align PtrAlign = DL.getABITypeAlign(TableEntryTy); 421 422 // Create a global table of function pointers we'll initialize in a global 423 // constructor. 424 auto *FuncPtrTable = new GlobalVariable( 425 M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, 426 PoisonValue::get(FuncPtrTableTy), "", nullptr, 427 GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); 428 FuncPtrTable->setAlignment(PtrAlign); 429 430 // Create a function to initialize the function pointer table. 431 Function *NewCtor = Function::Create( 432 FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, 433 DL.getProgramAddressSpace(), "", &M); 434 435 BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); 436 IRBuilder<> InitBuilder(BB); 437 438 size_t TableIndex = 0; 439 for (GlobalIFunc *GI : IFuncsToLower) { 440 Function *ResolvedFunction = GI->getResolverFunction(); 441 442 // We don't know what to pass to a resolver function taking arguments 443 // 444 // FIXME: Is this even valid? clang and gcc don't complain but this 445 // probably should be invalid IR. We could just pass through undef. 446 if (!std::empty(ResolvedFunction->getFunctionType()->params())) { 447 LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " 448 << ResolvedFunction->getName() << " with parameters\n"); 449 UnhandledUsers = true; 450 continue; 451 } 452 453 // Initialize the function pointer table. 454 CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); 455 Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); 456 Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32( 457 FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); 458 InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); 459 460 // Update all users to load a pointer from the global table. 461 for (User *User : make_early_inc_range(GI->users())) { 462 Instruction *UserInst = dyn_cast<Instruction>(User); 463 if (!UserInst) { 464 // TODO: Should handle constantexpr casts in user instructions. Probably 465 // can't do much about constant initializers. 466 UnhandledUsers = true; 467 continue; 468 } 469 470 IRBuilder<> UseBuilder(UserInst); 471 LoadInst *ResolvedTarget = 472 UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); 473 Value *ResolvedCast = 474 UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); 475 UserInst->replaceUsesOfWith(GI, ResolvedCast); 476 } 477 478 // If we handled all users, erase the ifunc. 479 if (GI->use_empty()) 480 GI->eraseFromParent(); 481 } 482 483 InitBuilder.CreateRetVoid(); 484 485 PointerType *ConstantDataTy = PointerType::get(Ctx, 0); 486 487 // TODO: Is this the right priority? Probably should be before any other 488 // constructors? 489 const int Priority = 10; 490 appendToGlobalCtors(M, NewCtor, Priority, 491 ConstantPointerNull::get(ConstantDataTy)); 492 return UnhandledUsers; 493 } 494