xref: /llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp (revision b95ed30ea2307dbcbe6199374c1e9a9b7a802ad0)
1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/MD5.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/xxhash.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "moduleutils"
28 
29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30                                 int Priority, Constant *Data) {
31   IRBuilder<> IRB(M.getContext());
32 
33   // Get the current set of static global constructors and add the new ctor
34   // to the list.
35   SmallVector<Constant *, 16> CurrentCtors;
36   StructType *EltTy;
37   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
38     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
39     if (Constant *Init = GVCtor->getInitializer()) {
40       unsigned n = Init->getNumOperands();
41       CurrentCtors.reserve(n + 1);
42       for (unsigned i = 0; i != n; ++i)
43         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44     }
45     GVCtor->eraseFromParent();
46   } else {
47     EltTy = StructType::get(
48         IRB.getInt32Ty(),
49         PointerType::get(M.getContext(), F->getAddressSpace()), IRB.getPtrTy());
50   }
51 
52   // Build a 3 field global_ctor entry.  We don't take a comdat key.
53   Constant *CSVals[3];
54   CSVals[0] = IRB.getInt32(Priority);
55   CSVals[1] = F;
56   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
57                    : Constant::getNullValue(IRB.getPtrTy());
58   Constant *RuntimeCtorInit =
59       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
60 
61   CurrentCtors.push_back(RuntimeCtorInit);
62 
63   // Create a new initializer.
64   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
65   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
66 
67   // Create the new global variable and replace all uses of
68   // the old global variable with the new one.
69   (void)new GlobalVariable(M, NewInit->getType(), false,
70                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
71 }
72 
73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
74   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
75 }
76 
77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
78   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
79 }
80 
81 static void transformGlobalArray(StringRef ArrayName, Module &M,
82                                  const GlobalCtorTransformFn &Fn) {
83   GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName);
84   if (!GVCtor)
85     return;
86 
87   IRBuilder<> IRB(M.getContext());
88   SmallVector<Constant *, 16> CurrentCtors;
89   bool Changed = false;
90   StructType *EltTy =
91       cast<StructType>(GVCtor->getValueType()->getArrayElementType());
92   if (Constant *Init = GVCtor->getInitializer()) {
93     CurrentCtors.reserve(Init->getNumOperands());
94     for (Value *OP : Init->operands()) {
95       Constant *C = cast<Constant>(OP);
96       Constant *NewC = Fn(C);
97       Changed |= (!NewC || NewC != C);
98       if (NewC)
99         CurrentCtors.push_back(NewC);
100     }
101   }
102   if (!Changed)
103     return;
104 
105   GVCtor->eraseFromParent();
106 
107   // Create a new initializer.
108   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
109   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
110 
111   // Create the new global variable and replace all uses of
112   // the old global variable with the new one.
113   (void)new GlobalVariable(M, NewInit->getType(), false,
114                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
115 }
116 
117 void llvm::transformGlobalCtors(Module &M, const GlobalCtorTransformFn &Fn) {
118   transformGlobalArray("llvm.global_ctors", M, Fn);
119 }
120 
121 void llvm::transformGlobalDtors(Module &M, const GlobalCtorTransformFn &Fn) {
122   transformGlobalArray("llvm.global_dtors", M, Fn);
123 }
124 
125 static void collectUsedGlobals(GlobalVariable *GV,
126                                SmallSetVector<Constant *, 16> &Init) {
127   if (!GV || !GV->hasInitializer())
128     return;
129 
130   auto *CA = cast<ConstantArray>(GV->getInitializer());
131   for (Use &Op : CA->operands())
132     Init.insert(cast<Constant>(Op));
133 }
134 
135 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
136   GlobalVariable *GV = M.getGlobalVariable(Name);
137 
138   SmallSetVector<Constant *, 16> Init;
139   collectUsedGlobals(GV, Init);
140   if (GV)
141     GV->eraseFromParent();
142 
143   Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
144   for (auto *V : Values)
145     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
146 
147   if (Init.empty())
148     return;
149 
150   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
151   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
152                                 ConstantArray::get(ATy, Init.getArrayRef()),
153                                 Name);
154   GV->setSection("llvm.metadata");
155 }
156 
157 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
158   appendToUsedList(M, "llvm.used", Values);
159 }
160 
161 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
162   appendToUsedList(M, "llvm.compiler.used", Values);
163 }
164 
165 static void removeFromUsedList(Module &M, StringRef Name,
166                                function_ref<bool(Constant *)> ShouldRemove) {
167   GlobalVariable *GV = M.getNamedGlobal(Name);
168   if (!GV)
169     return;
170 
171   SmallSetVector<Constant *, 16> Init;
172   collectUsedGlobals(GV, Init);
173 
174   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
175 
176   SmallVector<Constant *, 16> NewInit;
177   for (Constant *MaybeRemoved : Init) {
178     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
179       NewInit.push_back(MaybeRemoved);
180   }
181 
182   if (!NewInit.empty()) {
183     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
184     GlobalVariable *NewGV =
185         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
186                            ConstantArray::get(ATy, NewInit), "", GV,
187                            GV->getThreadLocalMode(), GV->getAddressSpace());
188     NewGV->setSection(GV->getSection());
189     NewGV->takeName(GV);
190   }
191 
192   GV->eraseFromParent();
193 }
194 
195 void llvm::removeFromUsedLists(Module &M,
196                                function_ref<bool(Constant *)> ShouldRemove) {
197   removeFromUsedList(M, "llvm.used", ShouldRemove);
198   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
199 }
200 
201 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
202   if (!M.getModuleFlag("kcfi"))
203     return;
204   // Matches CodeGenModule::CreateKCFITypeId in Clang.
205   LLVMContext &Ctx = M.getContext();
206   MDBuilder MDB(Ctx);
207   std::string Type = MangledType.str();
208   if (M.getModuleFlag("cfi-normalize-integers"))
209     Type += ".normalized";
210   F.setMetadata(LLVMContext::MD_kcfi_type,
211                 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
212                                      Type::getInt32Ty(Ctx),
213                                      static_cast<uint32_t>(xxHash64(Type))))));
214   // If the module was compiled with -fpatchable-function-entry, ensure
215   // we use the same patchable-function-prefix.
216   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
217           M.getModuleFlag("kcfi-offset"))) {
218     if (unsigned Offset = MD->getZExtValue())
219       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
220   }
221 }
222 
223 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
224                                                   ArrayRef<Type *> InitArgTypes,
225                                                   bool Weak) {
226   assert(!InitName.empty() && "Expected init function name");
227   auto *VoidTy = Type::getVoidTy(M.getContext());
228   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
229   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
230   auto *Fn = cast<Function>(FnCallee.getCallee());
231   if (Weak && Fn->isDeclaration())
232     Fn->setLinkage(Function::ExternalWeakLinkage);
233   return FnCallee;
234 }
235 
236 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
237   Function *Ctor = Function::createWithDefaultAttr(
238       FunctionType::get(Type::getVoidTy(M.getContext()), false),
239       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
240       CtorName, &M);
241   Ctor->addFnAttr(Attribute::NoUnwind);
242   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
243   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
244   ReturnInst::Create(M.getContext(), CtorBB);
245   // Ensure Ctor cannot be discarded, even if in a comdat.
246   appendToUsed(M, {Ctor});
247   return Ctor;
248 }
249 
250 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
251     Module &M, StringRef CtorName, StringRef InitName,
252     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
253     StringRef VersionCheckName, bool Weak) {
254   assert(!InitName.empty() && "Expected init function name");
255   assert(InitArgs.size() == InitArgTypes.size() &&
256          "Sanitizer's init function expects different number of arguments");
257   FunctionCallee InitFunction =
258       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
259   Function *Ctor = createSanitizerCtor(M, CtorName);
260   IRBuilder<> IRB(M.getContext());
261 
262   BasicBlock *RetBB = &Ctor->getEntryBlock();
263   if (Weak) {
264     RetBB->setName("ret");
265     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
266     auto *CallInitBB =
267         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
268     auto *InitFn = cast<Function>(InitFunction.getCallee());
269     auto *InitFnPtr =
270         PointerType::get(M.getContext(), InitFn->getAddressSpace());
271     IRB.SetInsertPoint(EntryBB);
272     Value *InitNotNull =
273         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
274     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
275     IRB.SetInsertPoint(CallInitBB);
276   } else {
277     IRB.SetInsertPoint(RetBB->getTerminator());
278   }
279 
280   IRB.CreateCall(InitFunction, InitArgs);
281   if (!VersionCheckName.empty()) {
282     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
283         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
284         AttributeList());
285     IRB.CreateCall(VersionCheckFunction, {});
286   }
287 
288   if (Weak)
289     IRB.CreateBr(RetBB);
290 
291   return std::make_pair(Ctor, InitFunction);
292 }
293 
294 std::pair<Function *, FunctionCallee>
295 llvm::getOrCreateSanitizerCtorAndInitFunctions(
296     Module &M, StringRef CtorName, StringRef InitName,
297     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
298     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
299     StringRef VersionCheckName, bool Weak) {
300   assert(!CtorName.empty() && "Expected ctor function name");
301 
302   if (Function *Ctor = M.getFunction(CtorName))
303     // FIXME: Sink this logic into the module, similar to the handling of
304     // globals. This will make moving to a concurrent model much easier.
305     if (Ctor->arg_empty() ||
306         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
307       return {Ctor,
308               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
309 
310   Function *Ctor;
311   FunctionCallee InitFunction;
312   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
313       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
314   FunctionsCreatedCallback(Ctor, InitFunction);
315   return std::make_pair(Ctor, InitFunction);
316 }
317 
318 void llvm::filterDeadComdatFunctions(
319     SmallVectorImpl<Function *> &DeadComdatFunctions) {
320   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
321   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
322   for (Function *F : DeadComdatFunctions) {
323     MaybeDeadFunctions.insert(F);
324     if (Comdat *C = F->getComdat())
325       MaybeDeadComdats.insert(C);
326   }
327 
328   // Find comdats for which all users are dead now.
329   SmallPtrSet<Comdat *, 32> DeadComdats;
330   for (Comdat *C : MaybeDeadComdats) {
331     auto IsUserDead = [&](GlobalObject *GO) {
332       auto *F = dyn_cast<Function>(GO);
333       return F && MaybeDeadFunctions.contains(F);
334     };
335     if (all_of(C->getUsers(), IsUserDead))
336       DeadComdats.insert(C);
337   }
338 
339   // Only keep functions which have no comdat or a dead comdat.
340   erase_if(DeadComdatFunctions, [&](Function *F) {
341     Comdat *C = F->getComdat();
342     return C && !DeadComdats.contains(C);
343   });
344 }
345 
346 std::string llvm::getUniqueModuleId(Module *M) {
347   MD5 Md5;
348   bool ExportsSymbols = false;
349   auto AddGlobal = [&](GlobalValue &GV) {
350     if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
351         !GV.hasExternalLinkage() || GV.hasComdat())
352       return;
353     ExportsSymbols = true;
354     Md5.update(GV.getName());
355     Md5.update(ArrayRef<uint8_t>{0});
356   };
357 
358   for (auto &F : *M)
359     AddGlobal(F);
360   for (auto &GV : M->globals())
361     AddGlobal(GV);
362   for (auto &GA : M->aliases())
363     AddGlobal(GA);
364   for (auto &IF : M->ifuncs())
365     AddGlobal(IF);
366 
367   if (!ExportsSymbols)
368     return "";
369 
370   MD5::MD5Result R;
371   Md5.final(R);
372 
373   SmallString<32> Str;
374   MD5::stringifyResult(R, Str);
375   return ("." + Str).str();
376 }
377 
378 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
379                                StringRef SectionName, Align Alignment) {
380   // Embed the memory buffer into the module.
381   Constant *ModuleConstant = ConstantDataArray::get(
382       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
383   GlobalVariable *GV = new GlobalVariable(
384       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
385       ModuleConstant, "llvm.embedded.object");
386   GV->setSection(SectionName);
387   GV->setAlignment(Alignment);
388 
389   LLVMContext &Ctx = M.getContext();
390   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
391   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
392                         MDString::get(Ctx, SectionName)};
393 
394   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
395   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
396 
397   appendToCompilerUsed(M, GV);
398 }
399 
400 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
401     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
402   SmallVector<GlobalIFunc *, 32> AllIFuncs;
403   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
404   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
405     for (GlobalIFunc &GI : M.ifuncs())
406       AllIFuncs.push_back(&GI);
407     IFuncsToLower = AllIFuncs;
408   }
409 
410   bool UnhandledUsers = false;
411   LLVMContext &Ctx = M.getContext();
412   const DataLayout &DL = M.getDataLayout();
413 
414   PointerType *TableEntryTy =
415       PointerType::get(Ctx, DL.getProgramAddressSpace());
416 
417   ArrayType *FuncPtrTableTy =
418       ArrayType::get(TableEntryTy, IFuncsToLower.size());
419 
420   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
421 
422   // Create a global table of function pointers we'll initialize in a global
423   // constructor.
424   auto *FuncPtrTable = new GlobalVariable(
425       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
426       PoisonValue::get(FuncPtrTableTy), "", nullptr,
427       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
428   FuncPtrTable->setAlignment(PtrAlign);
429 
430   // Create a function to initialize the function pointer table.
431   Function *NewCtor = Function::Create(
432       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
433       DL.getProgramAddressSpace(), "", &M);
434 
435   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
436   IRBuilder<> InitBuilder(BB);
437 
438   size_t TableIndex = 0;
439   for (GlobalIFunc *GI : IFuncsToLower) {
440     Function *ResolvedFunction = GI->getResolverFunction();
441 
442     // We don't know what to pass to a resolver function taking arguments
443     //
444     // FIXME: Is this even valid? clang and gcc don't complain but this
445     // probably should be invalid IR. We could just pass through undef.
446     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
447       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
448                         << ResolvedFunction->getName() << " with parameters\n");
449       UnhandledUsers = true;
450       continue;
451     }
452 
453     // Initialize the function pointer table.
454     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
455     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
456     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
457         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
458     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
459 
460     // Update all users to load a pointer from the global table.
461     for (User *User : make_early_inc_range(GI->users())) {
462       Instruction *UserInst = dyn_cast<Instruction>(User);
463       if (!UserInst) {
464         // TODO: Should handle constantexpr casts in user instructions. Probably
465         // can't do much about constant initializers.
466         UnhandledUsers = true;
467         continue;
468       }
469 
470       IRBuilder<> UseBuilder(UserInst);
471       LoadInst *ResolvedTarget =
472           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
473       Value *ResolvedCast =
474           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
475       UserInst->replaceUsesOfWith(GI, ResolvedCast);
476     }
477 
478     // If we handled all users, erase the ifunc.
479     if (GI->use_empty())
480       GI->eraseFromParent();
481   }
482 
483   InitBuilder.CreateRetVoid();
484 
485   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
486 
487   // TODO: Is this the right priority? Probably should be before any other
488   // constructors?
489   const int Priority = 10;
490   appendToGlobalCtors(M, NewCtor, Priority,
491                       ConstantPointerNull::get(ConstantDataTy));
492   return UnhandledUsers;
493 }
494