xref: /llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp (revision b4307437e51d3a400de21de624a1610aee23346b)
1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/IR/DerivedTypes.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/MDBuilder.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Support/xxhash.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "moduleutils"
25 
26 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
27                                 int Priority, Constant *Data) {
28   IRBuilder<> IRB(M.getContext());
29   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
30 
31   // Get the current set of static global constructors and add the new ctor
32   // to the list.
33   SmallVector<Constant *, 16> CurrentCtors;
34   StructType *EltTy;
35   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
36     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
37     if (Constant *Init = GVCtor->getInitializer()) {
38       unsigned n = Init->getNumOperands();
39       CurrentCtors.reserve(n + 1);
40       for (unsigned i = 0; i != n; ++i)
41         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
42     }
43     GVCtor->eraseFromParent();
44   } else {
45     EltTy = StructType::get(
46         IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
47         IRB.getInt8PtrTy());
48   }
49 
50   // Build a 3 field global_ctor entry.  We don't take a comdat key.
51   Constant *CSVals[3];
52   CSVals[0] = IRB.getInt32(Priority);
53   CSVals[1] = F;
54   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
55                    : Constant::getNullValue(IRB.getInt8PtrTy());
56   Constant *RuntimeCtorInit =
57       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
58 
59   CurrentCtors.push_back(RuntimeCtorInit);
60 
61   // Create a new initializer.
62   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
63   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
64 
65   // Create the new global variable and replace all uses of
66   // the old global variable with the new one.
67   (void)new GlobalVariable(M, NewInit->getType(), false,
68                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
69 }
70 
71 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
72   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
73 }
74 
75 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
76   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
77 }
78 
79 static void collectUsedGlobals(GlobalVariable *GV,
80                                SmallSetVector<Constant *, 16> &Init) {
81   if (!GV || !GV->hasInitializer())
82     return;
83 
84   auto *CA = cast<ConstantArray>(GV->getInitializer());
85   for (Use &Op : CA->operands())
86     Init.insert(cast<Constant>(Op));
87 }
88 
89 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
90   GlobalVariable *GV = M.getGlobalVariable(Name);
91 
92   SmallSetVector<Constant *, 16> Init;
93   collectUsedGlobals(GV, Init);
94   if (GV)
95     GV->eraseFromParent();
96 
97   Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
98   for (auto *V : Values)
99     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
100 
101   if (Init.empty())
102     return;
103 
104   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
105   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
106                                 ConstantArray::get(ATy, Init.getArrayRef()),
107                                 Name);
108   GV->setSection("llvm.metadata");
109 }
110 
111 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
112   appendToUsedList(M, "llvm.used", Values);
113 }
114 
115 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
116   appendToUsedList(M, "llvm.compiler.used", Values);
117 }
118 
119 static void removeFromUsedList(Module &M, StringRef Name,
120                                function_ref<bool(Constant *)> ShouldRemove) {
121   GlobalVariable *GV = M.getNamedGlobal(Name);
122   if (!GV)
123     return;
124 
125   SmallSetVector<Constant *, 16> Init;
126   collectUsedGlobals(GV, Init);
127 
128   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
129 
130   SmallVector<Constant *, 16> NewInit;
131   for (Constant *MaybeRemoved : Init) {
132     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
133       NewInit.push_back(MaybeRemoved);
134   }
135 
136   if (!NewInit.empty()) {
137     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
138     GlobalVariable *NewGV =
139         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
140                            ConstantArray::get(ATy, NewInit), "", GV,
141                            GV->getThreadLocalMode(), GV->getAddressSpace());
142     NewGV->setSection(GV->getSection());
143     NewGV->takeName(GV);
144   }
145 
146   GV->eraseFromParent();
147 }
148 
149 void llvm::removeFromUsedLists(Module &M,
150                                function_ref<bool(Constant *)> ShouldRemove) {
151   removeFromUsedList(M, "llvm.used", ShouldRemove);
152   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
153 }
154 
155 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
156   if (!M.getModuleFlag("kcfi"))
157     return;
158   // Matches CodeGenModule::CreateKCFITypeId in Clang.
159   LLVMContext &Ctx = M.getContext();
160   MDBuilder MDB(Ctx);
161   F.setMetadata(
162       LLVMContext::MD_kcfi_type,
163       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
164                            Type::getInt32Ty(Ctx),
165                            static_cast<uint32_t>(xxHash64(MangledType))))));
166   // If the module was compiled with -fpatchable-function-entry, ensure
167   // we use the same patchable-function-prefix.
168   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
169           M.getModuleFlag("kcfi-offset"))) {
170     if (unsigned Offset = MD->getZExtValue())
171       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
172   }
173 }
174 
175 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
176                                                   ArrayRef<Type *> InitArgTypes,
177                                                   bool Weak) {
178   assert(!InitName.empty() && "Expected init function name");
179   auto *VoidTy = Type::getVoidTy(M.getContext());
180   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
181   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
182   auto *Fn = cast<Function>(FnCallee.getCallee());
183   if (Weak && Fn->isDeclaration())
184     Fn->setLinkage(Function::ExternalWeakLinkage);
185   return FnCallee;
186 }
187 
188 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
189   Function *Ctor = Function::createWithDefaultAttr(
190       FunctionType::get(Type::getVoidTy(M.getContext()), false),
191       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
192       CtorName, &M);
193   Ctor->addFnAttr(Attribute::NoUnwind);
194   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
195   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
196   ReturnInst::Create(M.getContext(), CtorBB);
197   // Ensure Ctor cannot be discarded, even if in a comdat.
198   appendToUsed(M, {Ctor});
199   return Ctor;
200 }
201 
202 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
203     Module &M, StringRef CtorName, StringRef InitName,
204     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
205     StringRef VersionCheckName, bool Weak) {
206   assert(!InitName.empty() && "Expected init function name");
207   assert(InitArgs.size() == InitArgTypes.size() &&
208          "Sanitizer's init function expects different number of arguments");
209   FunctionCallee InitFunction =
210       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
211   Function *Ctor = createSanitizerCtor(M, CtorName);
212   IRBuilder<> IRB(M.getContext());
213 
214   BasicBlock *RetBB = &Ctor->getEntryBlock();
215   if (Weak) {
216     RetBB->setName("ret");
217     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
218     auto *CallInitBB =
219         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
220     auto *InitFn = cast<Function>(InitFunction.getCallee());
221     auto *InitFnPtr =
222         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
223     IRB.SetInsertPoint(EntryBB);
224     Value *InitNotNull =
225         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
226     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
227     IRB.SetInsertPoint(CallInitBB);
228   } else {
229     IRB.SetInsertPoint(RetBB->getTerminator());
230   }
231 
232   IRB.CreateCall(InitFunction, InitArgs);
233   if (!VersionCheckName.empty()) {
234     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
235         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
236         AttributeList());
237     IRB.CreateCall(VersionCheckFunction, {});
238   }
239 
240   if (Weak)
241     IRB.CreateBr(RetBB);
242 
243   return std::make_pair(Ctor, InitFunction);
244 }
245 
246 std::pair<Function *, FunctionCallee>
247 llvm::getOrCreateSanitizerCtorAndInitFunctions(
248     Module &M, StringRef CtorName, StringRef InitName,
249     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
250     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
251     StringRef VersionCheckName, bool Weak) {
252   assert(!CtorName.empty() && "Expected ctor function name");
253 
254   if (Function *Ctor = M.getFunction(CtorName))
255     // FIXME: Sink this logic into the module, similar to the handling of
256     // globals. This will make moving to a concurrent model much easier.
257     if (Ctor->arg_empty() ||
258         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
259       return {Ctor,
260               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
261 
262   Function *Ctor;
263   FunctionCallee InitFunction;
264   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
265       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
266   FunctionsCreatedCallback(Ctor, InitFunction);
267   return std::make_pair(Ctor, InitFunction);
268 }
269 
270 void llvm::filterDeadComdatFunctions(
271     SmallVectorImpl<Function *> &DeadComdatFunctions) {
272   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
273   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
274   for (Function *F : DeadComdatFunctions) {
275     MaybeDeadFunctions.insert(F);
276     if (Comdat *C = F->getComdat())
277       MaybeDeadComdats.insert(C);
278   }
279 
280   // Find comdats for which all users are dead now.
281   SmallPtrSet<Comdat *, 32> DeadComdats;
282   for (Comdat *C : MaybeDeadComdats) {
283     auto IsUserDead = [&](GlobalObject *GO) {
284       auto *F = dyn_cast<Function>(GO);
285       return F && MaybeDeadFunctions.contains(F);
286     };
287     if (all_of(C->getUsers(), IsUserDead))
288       DeadComdats.insert(C);
289   }
290 
291   // Only keep functions which have no comdat or a dead comdat.
292   erase_if(DeadComdatFunctions, [&](Function *F) {
293     Comdat *C = F->getComdat();
294     return C && !DeadComdats.contains(C);
295   });
296 }
297 
298 std::string llvm::getUniqueModuleId(Module *M) {
299   MD5 Md5;
300   bool ExportsSymbols = false;
301   auto AddGlobal = [&](GlobalValue &GV) {
302     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
303         !GV.hasExternalLinkage() || GV.hasComdat())
304       return;
305     ExportsSymbols = true;
306     Md5.update(GV.getName());
307     Md5.update(ArrayRef<uint8_t>{0});
308   };
309 
310   for (auto &F : *M)
311     AddGlobal(F);
312   for (auto &GV : M->globals())
313     AddGlobal(GV);
314   for (auto &GA : M->aliases())
315     AddGlobal(GA);
316   for (auto &IF : M->ifuncs())
317     AddGlobal(IF);
318 
319   if (!ExportsSymbols)
320     return "";
321 
322   MD5::MD5Result R;
323   Md5.final(R);
324 
325   SmallString<32> Str;
326   MD5::stringifyResult(R, Str);
327   return ("." + Str).str();
328 }
329 
330 void VFABI::setVectorVariantNames(CallInst *CI,
331                                   ArrayRef<std::string> VariantMappings) {
332   if (VariantMappings.empty())
333     return;
334 
335   SmallString<256> Buffer;
336   llvm::raw_svector_ostream Out(Buffer);
337   for (const std::string &VariantMapping : VariantMappings)
338     Out << VariantMapping << ",";
339   // Get rid of the trailing ','.
340   assert(!Buffer.str().empty() && "Must have at least one char.");
341   Buffer.pop_back();
342 
343   Module *M = CI->getModule();
344 #ifndef NDEBUG
345   for (const std::string &VariantMapping : VariantMappings) {
346     LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
347     std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
348     assert(VI && "Cannot add an invalid VFABI name.");
349     assert(M->getNamedValue(VI->VectorName) &&
350            "Cannot add variant to attribute: "
351            "vector function declaration is missing.");
352   }
353 #endif
354   CI->addFnAttr(
355       Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
356 }
357 
358 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
359                                StringRef SectionName, Align Alignment) {
360   // Embed the memory buffer into the module.
361   Constant *ModuleConstant = ConstantDataArray::get(
362       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
363   GlobalVariable *GV = new GlobalVariable(
364       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
365       ModuleConstant, "llvm.embedded.object");
366   GV->setSection(SectionName);
367   GV->setAlignment(Alignment);
368 
369   LLVMContext &Ctx = M.getContext();
370   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
371   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
372                         MDString::get(Ctx, SectionName)};
373 
374   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
375   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
376 
377   appendToCompilerUsed(M, GV);
378 }
379 
380 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
381     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
382   SmallVector<GlobalIFunc *, 32> AllIFuncs;
383   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
384   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
385     for (GlobalIFunc &GI : M.ifuncs())
386       AllIFuncs.push_back(&GI);
387     IFuncsToLower = AllIFuncs;
388   }
389 
390   bool UnhandledUsers = false;
391   LLVMContext &Ctx = M.getContext();
392   const DataLayout &DL = M.getDataLayout();
393 
394   PointerType *TableEntryTy =
395       Ctx.supportsTypedPointers()
396           ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
397           : PointerType::get(Ctx, DL.getProgramAddressSpace());
398 
399   ArrayType *FuncPtrTableTy =
400       ArrayType::get(TableEntryTy, IFuncsToLower.size());
401 
402   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
403 
404   // Create a global table of function pointers we'll initialize in a global
405   // constructor.
406   auto *FuncPtrTable = new GlobalVariable(
407       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
408       PoisonValue::get(FuncPtrTableTy), "", nullptr,
409       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
410   FuncPtrTable->setAlignment(PtrAlign);
411 
412   // Create a function to initialize the function pointer table.
413   Function *NewCtor = Function::Create(
414       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
415       DL.getProgramAddressSpace(), "", &M);
416 
417   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
418   IRBuilder<> InitBuilder(BB);
419 
420   size_t TableIndex = 0;
421   for (GlobalIFunc *GI : IFuncsToLower) {
422     Function *ResolvedFunction = GI->getResolverFunction();
423 
424     // We don't know what to pass to a resolver function taking arguments
425     //
426     // FIXME: Is this even valid? clang and gcc don't complain but this
427     // probably should be invalid IR. We could just pass through undef.
428     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
429       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
430                         << ResolvedFunction->getName() << " with parameters\n");
431       UnhandledUsers = true;
432       continue;
433     }
434 
435     // Initialize the function pointer table.
436     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
437     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
438     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
439         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
440     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
441 
442     // Update all users to load a pointer from the global table.
443     for (User *User : make_early_inc_range(GI->users())) {
444       Instruction *UserInst = dyn_cast<Instruction>(User);
445       if (!UserInst) {
446         // TODO: Should handle constantexpr casts in user instructions. Probably
447         // can't do much about constant initializers.
448         UnhandledUsers = true;
449         continue;
450       }
451 
452       IRBuilder<> UseBuilder(UserInst);
453       LoadInst *ResolvedTarget =
454           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
455       Value *ResolvedCast =
456           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
457       UserInst->replaceUsesOfWith(GI, ResolvedCast);
458     }
459 
460     // If we handled all users, erase the ifunc.
461     if (GI->use_empty())
462       GI->eraseFromParent();
463   }
464 
465   InitBuilder.CreateRetVoid();
466 
467   PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
468                                     ? PointerType::get(Type::getInt8Ty(Ctx), 0)
469                                     : PointerType::get(Ctx, 0);
470 
471   // TODO: Is this the right priority? Probably should be before any other
472   // constructors?
473   const int Priority = 10;
474   appendToGlobalCtors(M, NewCtor, Priority,
475                       ConstantPointerNull::get(ConstantDataTy));
476   return UnhandledUsers;
477 }
478