xref: /llvm-project/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp (revision 3630d9ef65b30af7e4ca78e668649bbc48b5be66)
1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to partially inline the fast path of well-known library
10 // functions, such as using square-root instructions for cases where sqrt()
11 // does not need to set errno.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
16 #include "llvm/Analysis/DomTreeUpdater.h"
17 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
18 #include "llvm/Analysis/TargetLibraryInfo.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Support/DebugCounter.h"
24 #include "llvm/Transforms/Scalar.h"
25 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
26 #include <optional>
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "partially-inline-libcalls"
31 
32 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform",
33               "Controls transformations in partially-inline-libcalls");
34 
35 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
36                          BasicBlock &CurrBB, Function::iterator &BB,
37                          const TargetTransformInfo *TTI, DomTreeUpdater *DTU,
38                          OptimizationRemarkEmitter *ORE) {
39   // There is no need to change the IR, since backend will emit sqrt
40   // instruction if the call has already been marked read-only.
41   if (Call->onlyReadsMemory())
42     return false;
43 
44   if (!DebugCounter::shouldExecute(PILCounter))
45     return false;
46 
47   // Do the following transformation:
48   //
49   // (before)
50   // dst = sqrt(src)
51   //
52   // (after)
53   // v0 = sqrt_noreadmem(src) # native sqrt instruction.
54   // [if (v0 is a NaN) || if (src < 0)]
55   //   v1 = sqrt(src)         # library call.
56   // dst = phi(v0, v1)
57   //
58 
59   Type *Ty = Call->getType();
60   IRBuilder<> Builder(Call->getNextNode());
61 
62   // Split CurrBB right after the call, create a 'then' block (that branches
63   // back to split-off tail of CurrBB) into which we'll insert a libcall.
64   Instruction *LibCallTerm = SplitBlockAndInsertIfThen(
65       Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false,
66       /*BranchWeights*/ nullptr, DTU);
67 
68   auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator());
69   // We want an 'else' block though, not a 'then' block.
70   cast<BranchInst>(CurrBBTerm)->swapSuccessors();
71 
72   // Create phi that will merge results of either sqrt and replace all uses.
73   BasicBlock *JoinBB = LibCallTerm->getSuccessor(0);
74   JoinBB->setName(CurrBB.getName() + ".split");
75   Builder.SetInsertPoint(JoinBB, JoinBB->begin());
76   PHINode *Phi = Builder.CreatePHI(Ty, 2);
77   Call->replaceAllUsesWith(Phi);
78 
79   // Finally, insert the libcall into 'else' block.
80   BasicBlock *LibCallBB = LibCallTerm->getParent();
81   LibCallBB->setName("call.sqrt");
82   Builder.SetInsertPoint(LibCallTerm);
83   Instruction *LibCall = Call->clone();
84   Builder.Insert(LibCall);
85 
86   // Add memory(none) attribute, so that the backend can use a native sqrt
87   // instruction for this call.
88   Call->setDoesNotAccessMemory();
89 
90   // Insert a FP compare instruction and use it as the CurrBB branch condition.
91   Builder.SetInsertPoint(CurrBBTerm);
92   Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
93                     ? Builder.CreateFCmpORD(Call, Call)
94                     : Builder.CreateFCmpOGE(Call->getOperand(0),
95                                             ConstantFP::get(Ty, 0.0));
96   CurrBBTerm->setCondition(FCmp);
97 
98   // Add phi operands.
99   Phi->addIncoming(Call, &CurrBB);
100   Phi->addIncoming(LibCall, LibCallBB);
101 
102   BB = JoinBB->getIterator();
103   return true;
104 }
105 
106 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
107                                        const TargetTransformInfo *TTI,
108                                        DominatorTree *DT,
109                                        OptimizationRemarkEmitter *ORE) {
110   std::optional<DomTreeUpdater> DTU;
111   if (DT)
112     DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
113 
114   bool Changed = false;
115 
116   Function::iterator CurrBB;
117   for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
118     CurrBB = BB++;
119 
120     for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
121          II != IE; ++II) {
122       CallInst *Call = dyn_cast<CallInst>(&*II);
123       Function *CalledFunc;
124 
125       if (!Call || !(CalledFunc = Call->getCalledFunction()))
126         continue;
127 
128       if (Call->isNoBuiltin() || Call->isStrictFP())
129         continue;
130 
131       if (Call->isMustTailCall())
132         continue;
133 
134       // Skip if function either has local linkage or is not a known library
135       // function.
136       LibFunc LF;
137       if (CalledFunc->hasLocalLinkage() ||
138           !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF))
139         continue;
140 
141       switch (LF) {
142       case LibFunc_sqrtf:
143       case LibFunc_sqrt:
144         if (TTI->haveFastSqrt(Call->getType()) &&
145             optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI,
146                          DTU ? &*DTU : nullptr, ORE))
147           break;
148         continue;
149       default:
150         continue;
151       }
152 
153       Changed = true;
154       break;
155     }
156   }
157 
158   return Changed;
159 }
160 
161 PreservedAnalyses
162 PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
163   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
164   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
165   auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
166   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
167   if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT, &ORE))
168     return PreservedAnalyses::all();
169   PreservedAnalyses PA;
170   PA.preserve<DominatorTreeAnalysis>();
171   return PA;
172 }
173 
174 namespace {
175 class PartiallyInlineLibCallsLegacyPass : public FunctionPass {
176 public:
177   static char ID;
178 
179   PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) {
180     initializePartiallyInlineLibCallsLegacyPassPass(
181         *PassRegistry::getPassRegistry());
182   }
183 
184   void getAnalysisUsage(AnalysisUsage &AU) const override {
185     AU.addRequired<TargetLibraryInfoWrapperPass>();
186     AU.addRequired<TargetTransformInfoWrapperPass>();
187     AU.addPreserved<DominatorTreeWrapperPass>();
188     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
189     FunctionPass::getAnalysisUsage(AU);
190   }
191 
192   bool runOnFunction(Function &F) override {
193     if (skipFunction(F))
194       return false;
195 
196     TargetLibraryInfo *TLI =
197         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
198     const TargetTransformInfo *TTI =
199         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
200     DominatorTree *DT = nullptr;
201     if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
202       DT = &DTWP->getDomTree();
203     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
204     return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE);
205   }
206 };
207 }
208 
209 char PartiallyInlineLibCallsLegacyPass::ID = 0;
210 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass,
211                       "partially-inline-libcalls",
212                       "Partially inline calls to library functions", false,
213                       false)
214 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
215 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
216 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
217 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
218 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass,
219                     "partially-inline-libcalls",
220                     "Partially inline calls to library functions", false, false)
221 
222 FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
223   return new PartiallyInlineLibCallsLegacyPass();
224 }
225