xref: /llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp (revision 048cf8857e081fb80d5ac8b24a79f999d632141b)
1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/MD5.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/xxhash.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "moduleutils"
28 
29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30                                 int Priority, Constant *Data) {
31   IRBuilder<> IRB(M.getContext());
32   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
33 
34   // Get the current set of static global constructors and add the new ctor
35   // to the list.
36   SmallVector<Constant *, 16> CurrentCtors;
37   StructType *EltTy;
38   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
40     if (Constant *Init = GVCtor->getInitializer()) {
41       unsigned n = Init->getNumOperands();
42       CurrentCtors.reserve(n + 1);
43       for (unsigned i = 0; i != n; ++i)
44         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
45     }
46     GVCtor->eraseFromParent();
47   } else {
48     EltTy = StructType::get(IRB.getInt32Ty(),
49                             PointerType::get(FnTy, F->getAddressSpace()),
50                             IRB.getPtrTy());
51   }
52 
53   // Build a 3 field global_ctor entry.  We don't take a comdat key.
54   Constant *CSVals[3];
55   CSVals[0] = IRB.getInt32(Priority);
56   CSVals[1] = F;
57   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
58                    : Constant::getNullValue(IRB.getPtrTy());
59   Constant *RuntimeCtorInit =
60       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
61 
62   CurrentCtors.push_back(RuntimeCtorInit);
63 
64   // Create a new initializer.
65   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
66   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
67 
68   // Create the new global variable and replace all uses of
69   // the old global variable with the new one.
70   (void)new GlobalVariable(M, NewInit->getType(), false,
71                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
72 }
73 
74 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
76 }
77 
78 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
80 }
81 
82 static void transformGlobalArray(StringRef ArrayName, Module &M,
83                                  const GlobalCtorTransformFn &Fn) {
84   GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName);
85   if (!GVCtor)
86     return;
87 
88   IRBuilder<> IRB(M.getContext());
89   SmallVector<Constant *, 16> CurrentCtors;
90   bool Changed = false;
91   StructType *EltTy =
92       cast<StructType>(GVCtor->getValueType()->getArrayElementType());
93   if (Constant *Init = GVCtor->getInitializer()) {
94     CurrentCtors.reserve(Init->getNumOperands());
95     for (Value *OP : Init->operands()) {
96       Constant *C = cast<Constant>(OP);
97       Constant *NewC = Fn(C);
98       Changed |= (!NewC || NewC != C);
99       if (NewC)
100         CurrentCtors.push_back(NewC);
101     }
102   }
103   if (!Changed)
104     return;
105 
106   GVCtor->eraseFromParent();
107 
108   // Create a new initializer.
109   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
110   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
111 
112   // Create the new global variable and replace all uses of
113   // the old global variable with the new one.
114   (void)new GlobalVariable(M, NewInit->getType(), false,
115                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
116 }
117 
118 void llvm::transformGlobalCtors(Module &M, const GlobalCtorTransformFn &Fn) {
119   transformGlobalArray("llvm.global_ctors", M, Fn);
120 }
121 
122 void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) {
123   transformGlobalArray("llvm.global_dtors", M, Fn);
124 }
125 
126 static void collectUsedGlobals(GlobalVariable *GV,
127                                SmallSetVector<Constant *, 16> &Init) {
128   if (!GV || !GV->hasInitializer())
129     return;
130 
131   auto *CA = cast<ConstantArray>(GV->getInitializer());
132   for (Use &Op : CA->operands())
133     Init.insert(cast<Constant>(Op));
134 }
135 
136 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
137   GlobalVariable *GV = M.getGlobalVariable(Name);
138 
139   SmallSetVector<Constant *, 16> Init;
140   collectUsedGlobals(GV, Init);
141   if (GV)
142     GV->eraseFromParent();
143 
144   Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
145   for (auto *V : Values)
146     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
147 
148   if (Init.empty())
149     return;
150 
151   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
152   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
153                                 ConstantArray::get(ATy, Init.getArrayRef()),
154                                 Name);
155   GV->setSection("llvm.metadata");
156 }
157 
158 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
159   appendToUsedList(M, "llvm.used", Values);
160 }
161 
162 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
163   appendToUsedList(M, "llvm.compiler.used", Values);
164 }
165 
166 static void removeFromUsedList(Module &M, StringRef Name,
167                                function_ref<bool(Constant *)> ShouldRemove) {
168   GlobalVariable *GV = M.getNamedGlobal(Name);
169   if (!GV)
170     return;
171 
172   SmallSetVector<Constant *, 16> Init;
173   collectUsedGlobals(GV, Init);
174 
175   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
176 
177   SmallVector<Constant *, 16> NewInit;
178   for (Constant *MaybeRemoved : Init) {
179     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
180       NewInit.push_back(MaybeRemoved);
181   }
182 
183   if (!NewInit.empty()) {
184     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
185     GlobalVariable *NewGV =
186         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
187                            ConstantArray::get(ATy, NewInit), "", GV,
188                            GV->getThreadLocalMode(), GV->getAddressSpace());
189     NewGV->setSection(GV->getSection());
190     NewGV->takeName(GV);
191   }
192 
193   GV->eraseFromParent();
194 }
195 
196 void llvm::removeFromUsedLists(Module &M,
197                                function_ref<bool(Constant *)> ShouldRemove) {
198   removeFromUsedList(M, "llvm.used", ShouldRemove);
199   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
200 }
201 
202 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
203   if (!M.getModuleFlag("kcfi"))
204     return;
205   // Matches CodeGenModule::CreateKCFITypeId in Clang.
206   LLVMContext &Ctx = M.getContext();
207   MDBuilder MDB(Ctx);
208   F.setMetadata(
209       LLVMContext::MD_kcfi_type,
210       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
211                            Type::getInt32Ty(Ctx),
212                            static_cast<uint32_t>(xxHash64(MangledType))))));
213   // If the module was compiled with -fpatchable-function-entry, ensure
214   // we use the same patchable-function-prefix.
215   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
216           M.getModuleFlag("kcfi-offset"))) {
217     if (unsigned Offset = MD->getZExtValue())
218       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
219   }
220 }
221 
222 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
223                                                   ArrayRef<Type *> InitArgTypes,
224                                                   bool Weak) {
225   assert(!InitName.empty() && "Expected init function name");
226   auto *VoidTy = Type::getVoidTy(M.getContext());
227   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
228   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
229   auto *Fn = cast<Function>(FnCallee.getCallee());
230   if (Weak && Fn->isDeclaration())
231     Fn->setLinkage(Function::ExternalWeakLinkage);
232   return FnCallee;
233 }
234 
235 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
236   Function *Ctor = Function::createWithDefaultAttr(
237       FunctionType::get(Type::getVoidTy(M.getContext()), false),
238       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
239       CtorName, &M);
240   Ctor->addFnAttr(Attribute::NoUnwind);
241   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
242   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
243   ReturnInst::Create(M.getContext(), CtorBB);
244   // Ensure Ctor cannot be discarded, even if in a comdat.
245   appendToUsed(M, {Ctor});
246   return Ctor;
247 }
248 
249 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
250     Module &M, StringRef CtorName, StringRef InitName,
251     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
252     StringRef VersionCheckName, bool Weak) {
253   assert(!InitName.empty() && "Expected init function name");
254   assert(InitArgs.size() == InitArgTypes.size() &&
255          "Sanitizer's init function expects different number of arguments");
256   FunctionCallee InitFunction =
257       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
258   Function *Ctor = createSanitizerCtor(M, CtorName);
259   IRBuilder<> IRB(M.getContext());
260 
261   BasicBlock *RetBB = &Ctor->getEntryBlock();
262   if (Weak) {
263     RetBB->setName("ret");
264     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
265     auto *CallInitBB =
266         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
267     auto *InitFn = cast<Function>(InitFunction.getCallee());
268     auto *InitFnPtr =
269         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
270     IRB.SetInsertPoint(EntryBB);
271     Value *InitNotNull =
272         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
273     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
274     IRB.SetInsertPoint(CallInitBB);
275   } else {
276     IRB.SetInsertPoint(RetBB->getTerminator());
277   }
278 
279   IRB.CreateCall(InitFunction, InitArgs);
280   if (!VersionCheckName.empty()) {
281     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
282         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
283         AttributeList());
284     IRB.CreateCall(VersionCheckFunction, {});
285   }
286 
287   if (Weak)
288     IRB.CreateBr(RetBB);
289 
290   return std::make_pair(Ctor, InitFunction);
291 }
292 
293 std::pair<Function *, FunctionCallee>
294 llvm::getOrCreateSanitizerCtorAndInitFunctions(
295     Module &M, StringRef CtorName, StringRef InitName,
296     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
297     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
298     StringRef VersionCheckName, bool Weak) {
299   assert(!CtorName.empty() && "Expected ctor function name");
300 
301   if (Function *Ctor = M.getFunction(CtorName))
302     // FIXME: Sink this logic into the module, similar to the handling of
303     // globals. This will make moving to a concurrent model much easier.
304     if (Ctor->arg_empty() ||
305         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
306       return {Ctor,
307               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
308 
309   Function *Ctor;
310   FunctionCallee InitFunction;
311   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
312       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
313   FunctionsCreatedCallback(Ctor, InitFunction);
314   return std::make_pair(Ctor, InitFunction);
315 }
316 
317 void llvm::filterDeadComdatFunctions(
318     SmallVectorImpl<Function *> &DeadComdatFunctions) {
319   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
320   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
321   for (Function *F : DeadComdatFunctions) {
322     MaybeDeadFunctions.insert(F);
323     if (Comdat *C = F->getComdat())
324       MaybeDeadComdats.insert(C);
325   }
326 
327   // Find comdats for which all users are dead now.
328   SmallPtrSet<Comdat *, 32> DeadComdats;
329   for (Comdat *C : MaybeDeadComdats) {
330     auto IsUserDead = [&](GlobalObject *GO) {
331       auto *F = dyn_cast<Function>(GO);
332       return F && MaybeDeadFunctions.contains(F);
333     };
334     if (all_of(C->getUsers(), IsUserDead))
335       DeadComdats.insert(C);
336   }
337 
338   // Only keep functions which have no comdat or a dead comdat.
339   erase_if(DeadComdatFunctions, [&](Function *F) {
340     Comdat *C = F->getComdat();
341     return C && !DeadComdats.contains(C);
342   });
343 }
344 
345 std::string llvm::getUniqueModuleId(Module *M) {
346   MD5 Md5;
347   bool ExportsSymbols = false;
348   auto AddGlobal = [&](GlobalValue &GV) {
349     if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
350         !GV.hasExternalLinkage() || GV.hasComdat())
351       return;
352     ExportsSymbols = true;
353     Md5.update(GV.getName());
354     Md5.update(ArrayRef<uint8_t>{0});
355   };
356 
357   for (auto &F : *M)
358     AddGlobal(F);
359   for (auto &GV : M->globals())
360     AddGlobal(GV);
361   for (auto &GA : M->aliases())
362     AddGlobal(GA);
363   for (auto &IF : M->ifuncs())
364     AddGlobal(IF);
365 
366   if (!ExportsSymbols)
367     return "";
368 
369   MD5::MD5Result R;
370   Md5.final(R);
371 
372   SmallString<32> Str;
373   MD5::stringifyResult(R, Str);
374   return ("." + Str).str();
375 }
376 
377 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
378                                StringRef SectionName, Align Alignment) {
379   // Embed the memory buffer into the module.
380   Constant *ModuleConstant = ConstantDataArray::get(
381       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
382   GlobalVariable *GV = new GlobalVariable(
383       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
384       ModuleConstant, "llvm.embedded.object");
385   GV->setSection(SectionName);
386   GV->setAlignment(Alignment);
387 
388   LLVMContext &Ctx = M.getContext();
389   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
390   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
391                         MDString::get(Ctx, SectionName)};
392 
393   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
394   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
395 
396   appendToCompilerUsed(M, GV);
397 }
398 
399 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
400     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
401   SmallVector<GlobalIFunc *, 32> AllIFuncs;
402   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
403   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
404     for (GlobalIFunc &GI : M.ifuncs())
405       AllIFuncs.push_back(&GI);
406     IFuncsToLower = AllIFuncs;
407   }
408 
409   bool UnhandledUsers = false;
410   LLVMContext &Ctx = M.getContext();
411   const DataLayout &DL = M.getDataLayout();
412 
413   PointerType *TableEntryTy =
414       PointerType::get(Ctx, DL.getProgramAddressSpace());
415 
416   ArrayType *FuncPtrTableTy =
417       ArrayType::get(TableEntryTy, IFuncsToLower.size());
418 
419   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
420 
421   // Create a global table of function pointers we'll initialize in a global
422   // constructor.
423   auto *FuncPtrTable = new GlobalVariable(
424       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
425       PoisonValue::get(FuncPtrTableTy), "", nullptr,
426       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
427   FuncPtrTable->setAlignment(PtrAlign);
428 
429   // Create a function to initialize the function pointer table.
430   Function *NewCtor = Function::Create(
431       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
432       DL.getProgramAddressSpace(), "", &M);
433 
434   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
435   IRBuilder<> InitBuilder(BB);
436 
437   size_t TableIndex = 0;
438   for (GlobalIFunc *GI : IFuncsToLower) {
439     Function *ResolvedFunction = GI->getResolverFunction();
440 
441     // We don't know what to pass to a resolver function taking arguments
442     //
443     // FIXME: Is this even valid? clang and gcc don't complain but this
444     // probably should be invalid IR. We could just pass through undef.
445     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
446       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
447                         << ResolvedFunction->getName() << " with parameters\n");
448       UnhandledUsers = true;
449       continue;
450     }
451 
452     // Initialize the function pointer table.
453     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
454     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
455     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
456         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
457     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
458 
459     // Update all users to load a pointer from the global table.
460     for (User *User : make_early_inc_range(GI->users())) {
461       Instruction *UserInst = dyn_cast<Instruction>(User);
462       if (!UserInst) {
463         // TODO: Should handle constantexpr casts in user instructions. Probably
464         // can't do much about constant initializers.
465         UnhandledUsers = true;
466         continue;
467       }
468 
469       IRBuilder<> UseBuilder(UserInst);
470       LoadInst *ResolvedTarget =
471           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
472       Value *ResolvedCast =
473           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
474       UserInst->replaceUsesOfWith(GI, ResolvedCast);
475     }
476 
477     // If we handled all users, erase the ifunc.
478     if (GI->use_empty())
479       GI->eraseFromParent();
480   }
481 
482   InitBuilder.CreateRetVoid();
483 
484   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
485 
486   // TODO: Is this the right priority? Probably should be before any other
487   // constructors?
488   const int Priority = 10;
489   appendToGlobalCtors(M, NewCtor, Priority,
490                       ConstantPointerNull::get(ConstantDataTy));
491   return UnhandledUsers;
492 }
493