xref: /llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp (revision ab976a17121374ae3407374b2aa6306e95863eb3)
1 //===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass implements IR lowering for the llvm.memcpy, llvm.memmove,
10 // llvm.memset, llvm.load.relative and llvm.objc.* intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
15 #include "llvm/Analysis/ObjCARCInstKind.h"
16 #include "llvm/Analysis/ObjCARCUtil.h"
17 #include "llvm/Analysis/TargetLibraryInfo.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/CodeGen/ExpandVectorPredication.h"
20 #include "llvm/CodeGen/Passes.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/CodeGen/TargetPassConfig.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Type.h"
29 #include "llvm/IR/Use.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
35 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
36 #include "llvm/Transforms/Utils/LowerVectorIntrinsics.h"
37 
38 using namespace llvm;
39 
40 /// Threshold to leave statically sized memory intrinsic calls. Calls of known
41 /// size larger than this will be expanded by the pass. Calls of unknown or
42 /// lower size will be left for expansion in codegen.
43 static cl::opt<int64_t> MemIntrinsicExpandSizeThresholdOpt(
44     "mem-intrinsic-expand-size",
45     cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(-1),
46     cl::Hidden);
47 
48 namespace {
49 
50 struct PreISelIntrinsicLowering {
51   const TargetMachine *TM;
52   const function_ref<TargetTransformInfo &(Function &)> LookupTTI;
53   const function_ref<TargetLibraryInfo &(Function &)> LookupTLI;
54 
55   /// If this is true, assume it's preferably to leave memory intrinsic calls
56   /// for replacement with a library call later. Otherwise this depends on
57   /// TargetLoweringInfo availability of the corresponding function.
58   const bool UseMemIntrinsicLibFunc;
59 
60   explicit PreISelIntrinsicLowering(
61       const TargetMachine *TM_,
62       function_ref<TargetTransformInfo &(Function &)> LookupTTI_,
63       function_ref<TargetLibraryInfo &(Function &)> LookupTLI_,
64       bool UseMemIntrinsicLibFunc_ = true)
65       : TM(TM_), LookupTTI(LookupTTI_), LookupTLI(LookupTLI_),
66         UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {}
67 
68   static bool shouldExpandMemIntrinsicWithSize(Value *Size,
69                                                const TargetTransformInfo &TTI);
70   bool expandMemIntrinsicUses(Function &F) const;
71   bool lowerIntrinsics(Module &M) const;
72 };
73 
74 } // namespace
75 
76 template <class T> static bool forEachCall(Function &Intrin, T Callback) {
77   // Lowering all intrinsics in a function will delete multiple uses, so we
78   // can't use an early-inc-range. In case some remain, we don't want to look
79   // at them again. Unfortunately, Value::UseList is private, so we can't use a
80   // simple Use**. If LastUse is null, the next use to consider is
81   // Intrin.use_begin(), otherwise it's LastUse->getNext().
82   Use *LastUse = nullptr;
83   bool Changed = false;
84   while (!Intrin.use_empty() && (!LastUse || LastUse->getNext())) {
85     Use *U = LastUse ? LastUse->getNext() : &*Intrin.use_begin();
86     bool Removed = false;
87     // An intrinsic cannot have its address taken, so it cannot be an argument
88     // operand. It might be used as operand in debug metadata, though.
89     if (auto CI = dyn_cast<CallInst>(U->getUser()))
90       Changed |= Removed = Callback(CI);
91     if (!Removed)
92       LastUse = U;
93   }
94   return Changed;
95 }
96 
97 static bool lowerLoadRelative(Function &F) {
98   if (F.use_empty())
99     return false;
100 
101   bool Changed = false;
102   Type *Int32Ty = Type::getInt32Ty(F.getContext());
103 
104   for (Use &U : llvm::make_early_inc_range(F.uses())) {
105     auto CI = dyn_cast<CallInst>(U.getUser());
106     if (!CI || CI->getCalledOperand() != &F)
107       continue;
108 
109     IRBuilder<> B(CI);
110     Value *OffsetPtr =
111         B.CreatePtrAdd(CI->getArgOperand(0), CI->getArgOperand(1));
112     Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtr, Align(4));
113 
114     Value *ResultPtr = B.CreatePtrAdd(CI->getArgOperand(0), OffsetI32);
115 
116     CI->replaceAllUsesWith(ResultPtr);
117     CI->eraseFromParent();
118     Changed = true;
119   }
120 
121   return Changed;
122 }
123 
124 // ObjCARC has knowledge about whether an obj-c runtime function needs to be
125 // always tail-called or never tail-called.
126 static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
127   objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F);
128   if (objcarc::IsAlwaysTail(Kind))
129     return CallInst::TCK_Tail;
130   else if (objcarc::IsNeverTail(Kind))
131     return CallInst::TCK_NoTail;
132   return CallInst::TCK_None;
133 }
134 
135 static bool lowerObjCCall(Function &F, const char *NewFn,
136                           bool setNonLazyBind = false) {
137   assert(IntrinsicInst::mayLowerToFunctionCall(F.getIntrinsicID()) &&
138          "Pre-ISel intrinsics do lower into regular function calls");
139   if (F.use_empty())
140     return false;
141 
142   // If we haven't already looked up this function, check to see if the
143   // program already contains a function with this name.
144   Module *M = F.getParent();
145   FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
146 
147   if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) {
148     Fn->setLinkage(F.getLinkage());
149     if (setNonLazyBind && !Fn->isWeakForLinker()) {
150       // If we have Native ARC, set nonlazybind attribute for these APIs for
151       // performance.
152       Fn->addFnAttr(Attribute::NonLazyBind);
153     }
154   }
155 
156   CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
157 
158   for (Use &U : llvm::make_early_inc_range(F.uses())) {
159     auto *CB = cast<CallBase>(U.getUser());
160 
161     if (CB->getCalledFunction() != &F) {
162       objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
163       (void)Kind;
164       assert((Kind == objcarc::ARCInstKind::RetainRV ||
165               Kind == objcarc::ARCInstKind::UnsafeClaimRV) &&
166              "use expected to be the argument of operand bundle "
167              "\"clang.arc.attachedcall\"");
168       U.set(FCache.getCallee());
169       continue;
170     }
171 
172     auto *CI = cast<CallInst>(CB);
173     assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
174 
175     IRBuilder<> Builder(CI->getParent(), CI->getIterator());
176     SmallVector<Value *, 8> Args(CI->args());
177     SmallVector<llvm::OperandBundleDef, 1> BundleList;
178     CI->getOperandBundlesAsDefs(BundleList);
179     CallInst *NewCI = Builder.CreateCall(FCache, Args, BundleList);
180     NewCI->setName(CI->getName());
181 
182     // Try to set the most appropriate TailCallKind based on both the current
183     // attributes and the ones that we could get from ObjCARC's special
184     // knowledge of the runtime functions.
185     //
186     // std::max respects both requirements of notail and tail here:
187     // * notail on either the call or from ObjCARC becomes notail
188     // * tail on either side is stronger than none, but not notail
189     CallInst::TailCallKind TCK = CI->getTailCallKind();
190     NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
191 
192     // Transfer the 'returned' attribute from the intrinsic to the call site.
193     // By applying this only to intrinsic call sites, we avoid applying it to
194     // non-ARC explicit calls to things like objc_retain which have not been
195     // auto-upgraded to use the intrinsics.
196     unsigned Index;
197     if (F.getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
198         Index)
199       NewCI->addParamAttr(Index - AttributeList::FirstArgIndex,
200                           Attribute::Returned);
201 
202     if (!CI->use_empty())
203       CI->replaceAllUsesWith(NewCI);
204     CI->eraseFromParent();
205   }
206 
207   return true;
208 }
209 
210 // TODO: Should refine based on estimated number of accesses (e.g. does it
211 // require splitting based on alignment)
212 bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize(
213     Value *Size, const TargetTransformInfo &TTI) {
214   ConstantInt *CI = dyn_cast<ConstantInt>(Size);
215   if (!CI)
216     return true;
217   uint64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences()
218                            ? MemIntrinsicExpandSizeThresholdOpt
219                            : TTI.getMaxMemIntrinsicInlineSizeThreshold();
220   uint64_t SizeVal = CI->getZExtValue();
221 
222   // Treat a threshold of 0 as a special case to force expansion of all
223   // intrinsics, including size 0.
224   return SizeVal > Threshold || Threshold == 0;
225 }
226 
227 static bool canEmitLibcall(const TargetMachine *TM, Function *F,
228                            RTLIB::Libcall LC) {
229   // TODO: Should this consider the address space of the memcpy?
230   if (!TM)
231     return true;
232   const TargetLowering *TLI = TM->getSubtargetImpl(*F)->getTargetLowering();
233   return TLI->getLibcallName(LC) != nullptr;
234 }
235 
236 // TODO: Handle atomic memcpy and memcpy.inline
237 // TODO: Pass ScalarEvolution
238 bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
239   Intrinsic::ID ID = F.getIntrinsicID();
240   bool Changed = false;
241 
242   for (User *U : llvm::make_early_inc_range(F.users())) {
243     Instruction *Inst = cast<Instruction>(U);
244 
245     switch (ID) {
246     case Intrinsic::memcpy: {
247       auto *Memcpy = cast<MemCpyInst>(Inst);
248       Function *ParentFunc = Memcpy->getFunction();
249       const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
250       if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) {
251         if (UseMemIntrinsicLibFunc &&
252             canEmitLibcall(TM, ParentFunc, RTLIB::MEMCPY))
253           break;
254 
255         // TODO: For optsize, emit the loop into a separate function
256         expandMemCpyAsLoop(Memcpy, TTI);
257         Changed = true;
258         Memcpy->eraseFromParent();
259       }
260 
261       break;
262     }
263     case Intrinsic::memcpy_inline: {
264       // Only expand llvm.memcpy.inline with non-constant length in this
265       // codepath, leaving the current SelectionDAG expansion for constant
266       // length memcpy intrinsics undisturbed.
267       auto *Memcpy = cast<MemCpyInlineInst>(Inst);
268       if (isa<ConstantInt>(Memcpy->getLength()))
269         break;
270 
271       Function *ParentFunc = Memcpy->getFunction();
272       const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
273       expandMemCpyAsLoop(Memcpy, TTI);
274       Changed = true;
275       Memcpy->eraseFromParent();
276       break;
277     }
278     case Intrinsic::memmove: {
279       auto *Memmove = cast<MemMoveInst>(Inst);
280       Function *ParentFunc = Memmove->getFunction();
281       const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
282       if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) {
283         if (UseMemIntrinsicLibFunc &&
284             canEmitLibcall(TM, ParentFunc, RTLIB::MEMMOVE))
285           break;
286 
287         if (expandMemMoveAsLoop(Memmove, TTI)) {
288           Changed = true;
289           Memmove->eraseFromParent();
290         }
291       }
292 
293       break;
294     }
295     case Intrinsic::memset: {
296       auto *Memset = cast<MemSetInst>(Inst);
297       Function *ParentFunc = Memset->getFunction();
298       const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
299       if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) {
300         if (UseMemIntrinsicLibFunc &&
301             canEmitLibcall(TM, ParentFunc, RTLIB::MEMSET))
302           break;
303 
304         expandMemSetAsLoop(Memset);
305         Changed = true;
306         Memset->eraseFromParent();
307       }
308 
309       break;
310     }
311     case Intrinsic::memset_inline: {
312       // Only expand llvm.memset.inline with non-constant length in this
313       // codepath, leaving the current SelectionDAG expansion for constant
314       // length memset intrinsics undisturbed.
315       auto *Memset = cast<MemSetInlineInst>(Inst);
316       if (isa<ConstantInt>(Memset->getLength()))
317         break;
318 
319       expandMemSetAsLoop(Memset);
320       Changed = true;
321       Memset->eraseFromParent();
322       break;
323     }
324     case Intrinsic::experimental_memset_pattern: {
325       auto *Memset = cast<MemSetPatternInst>(Inst);
326       expandMemSetPatternAsLoop(Memset);
327       Changed = true;
328       Memset->eraseFromParent();
329       break;
330     }
331     default:
332       llvm_unreachable("unhandled intrinsic");
333     }
334   }
335 
336   return Changed;
337 }
338 
339 bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
340   bool Changed = false;
341   for (Function &F : M) {
342     switch (F.getIntrinsicID()) {
343     default:
344       break;
345     case Intrinsic::memcpy:
346     case Intrinsic::memcpy_inline:
347     case Intrinsic::memmove:
348     case Intrinsic::memset:
349     case Intrinsic::memset_inline:
350     case Intrinsic::experimental_memset_pattern:
351       Changed |= expandMemIntrinsicUses(F);
352       break;
353     case Intrinsic::load_relative:
354       Changed |= lowerLoadRelative(F);
355       break;
356     case Intrinsic::is_constant:
357     case Intrinsic::objectsize:
358       Changed |= forEachCall(F, [&](CallInst *CI) {
359         Function *Parent = CI->getParent()->getParent();
360         TargetLibraryInfo &TLI = LookupTLI(*Parent);
361         // Intrinsics in unreachable code are not lowered.
362         bool Changed = lowerConstantIntrinsics(*Parent, TLI, /*DT=*/nullptr);
363         return Changed;
364       });
365       break;
366 #define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS)                    \
367   case Intrinsic::VPID:
368 #include "llvm/IR/VPIntrinsics.def"
369       forEachCall(F, [&](CallInst *CI) {
370         Function *Parent = CI->getParent()->getParent();
371         const TargetTransformInfo &TTI = LookupTTI(*Parent);
372         auto *VPI = cast<VPIntrinsic>(CI);
373         VPExpansionDetails ED = expandVectorPredicationIntrinsic(*VPI, TTI);
374         // Expansion of VP intrinsics may change the IR but not actually
375         // replace the intrinsic, so update Changed for the pass
376         // and compute Removed for forEachCall.
377         Changed |= ED != VPExpansionDetails::IntrinsicUnchanged;
378         bool Removed = ED == VPExpansionDetails::IntrinsicReplaced;
379         return Removed;
380       });
381       break;
382     case Intrinsic::objc_autorelease:
383       Changed |= lowerObjCCall(F, "objc_autorelease");
384       break;
385     case Intrinsic::objc_autoreleasePoolPop:
386       Changed |= lowerObjCCall(F, "objc_autoreleasePoolPop");
387       break;
388     case Intrinsic::objc_autoreleasePoolPush:
389       Changed |= lowerObjCCall(F, "objc_autoreleasePoolPush");
390       break;
391     case Intrinsic::objc_autoreleaseReturnValue:
392       Changed |= lowerObjCCall(F, "objc_autoreleaseReturnValue");
393       break;
394     case Intrinsic::objc_copyWeak:
395       Changed |= lowerObjCCall(F, "objc_copyWeak");
396       break;
397     case Intrinsic::objc_destroyWeak:
398       Changed |= lowerObjCCall(F, "objc_destroyWeak");
399       break;
400     case Intrinsic::objc_initWeak:
401       Changed |= lowerObjCCall(F, "objc_initWeak");
402       break;
403     case Intrinsic::objc_loadWeak:
404       Changed |= lowerObjCCall(F, "objc_loadWeak");
405       break;
406     case Intrinsic::objc_loadWeakRetained:
407       Changed |= lowerObjCCall(F, "objc_loadWeakRetained");
408       break;
409     case Intrinsic::objc_moveWeak:
410       Changed |= lowerObjCCall(F, "objc_moveWeak");
411       break;
412     case Intrinsic::objc_release:
413       Changed |= lowerObjCCall(F, "objc_release", true);
414       break;
415     case Intrinsic::objc_retain:
416       Changed |= lowerObjCCall(F, "objc_retain", true);
417       break;
418     case Intrinsic::objc_retainAutorelease:
419       Changed |= lowerObjCCall(F, "objc_retainAutorelease");
420       break;
421     case Intrinsic::objc_retainAutoreleaseReturnValue:
422       Changed |= lowerObjCCall(F, "objc_retainAutoreleaseReturnValue");
423       break;
424     case Intrinsic::objc_retainAutoreleasedReturnValue:
425       Changed |= lowerObjCCall(F, "objc_retainAutoreleasedReturnValue");
426       break;
427     case Intrinsic::objc_retainBlock:
428       Changed |= lowerObjCCall(F, "objc_retainBlock");
429       break;
430     case Intrinsic::objc_storeStrong:
431       Changed |= lowerObjCCall(F, "objc_storeStrong");
432       break;
433     case Intrinsic::objc_storeWeak:
434       Changed |= lowerObjCCall(F, "objc_storeWeak");
435       break;
436     case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
437       Changed |= lowerObjCCall(F, "objc_unsafeClaimAutoreleasedReturnValue");
438       break;
439     case Intrinsic::objc_retainedObject:
440       Changed |= lowerObjCCall(F, "objc_retainedObject");
441       break;
442     case Intrinsic::objc_unretainedObject:
443       Changed |= lowerObjCCall(F, "objc_unretainedObject");
444       break;
445     case Intrinsic::objc_unretainedPointer:
446       Changed |= lowerObjCCall(F, "objc_unretainedPointer");
447       break;
448     case Intrinsic::objc_retain_autorelease:
449       Changed |= lowerObjCCall(F, "objc_retain_autorelease");
450       break;
451     case Intrinsic::objc_sync_enter:
452       Changed |= lowerObjCCall(F, "objc_sync_enter");
453       break;
454     case Intrinsic::objc_sync_exit:
455       Changed |= lowerObjCCall(F, "objc_sync_exit");
456       break;
457     case Intrinsic::exp:
458     case Intrinsic::exp2:
459       Changed |= forEachCall(F, [&](CallInst *CI) {
460         Type *Ty = CI->getArgOperand(0)->getType();
461         if (!isa<ScalableVectorType>(Ty))
462           return false;
463         const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();
464         unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID());
465         if (!TL->isOperationExpand(Op, EVT::getEVT(Ty)))
466           return false;
467         return lowerUnaryVectorIntrinsicAsLoop(M, CI);
468       });
469       break;
470     }
471   }
472   return Changed;
473 }
474 
475 namespace {
476 
477 class PreISelIntrinsicLoweringLegacyPass : public ModulePass {
478 public:
479   static char ID;
480 
481   PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
482 
483   void getAnalysisUsage(AnalysisUsage &AU) const override {
484     AU.addRequired<TargetTransformInfoWrapperPass>();
485     AU.addRequired<TargetLibraryInfoWrapperPass>();
486     AU.addRequired<TargetPassConfig>();
487   }
488 
489   bool runOnModule(Module &M) override {
490     auto LookupTTI = [this](Function &F) -> TargetTransformInfo & {
491       return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
492     };
493     auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
494       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
495     };
496 
497     const auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
498     PreISelIntrinsicLowering Lowering(TM, LookupTTI, LookupTLI);
499     return Lowering.lowerIntrinsics(M);
500   }
501 };
502 
503 } // end anonymous namespace
504 
505 char PreISelIntrinsicLoweringLegacyPass::ID;
506 
507 INITIALIZE_PASS_BEGIN(PreISelIntrinsicLoweringLegacyPass,
508                       "pre-isel-intrinsic-lowering",
509                       "Pre-ISel Intrinsic Lowering", false, false)
510 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
511 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
512 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
513 INITIALIZE_PASS_END(PreISelIntrinsicLoweringLegacyPass,
514                     "pre-isel-intrinsic-lowering",
515                     "Pre-ISel Intrinsic Lowering", false, false)
516 
517 ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
518   return new PreISelIntrinsicLoweringLegacyPass();
519 }
520 
521 PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
522                                                     ModuleAnalysisManager &AM) {
523   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
524 
525   auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & {
526     return FAM.getResult<TargetIRAnalysis>(F);
527   };
528   auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
529     return FAM.getResult<TargetLibraryAnalysis>(F);
530   };
531 
532   PreISelIntrinsicLowering Lowering(TM, LookupTTI, LookupTLI);
533   if (!Lowering.lowerIntrinsics(M))
534     return PreservedAnalyses::all();
535   else
536     return PreservedAnalyses::none();
537 }
538