1 //===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass tries to partially inline the fast path of well-known library 10 // functions, such as using square-root instructions for cases where sqrt() 11 // does not need to set errno. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" 16 #include "llvm/Analysis/DomTreeUpdater.h" 17 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 18 #include "llvm/Analysis/TargetLibraryInfo.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/IR/Dominators.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/InitializePasses.h" 23 #include "llvm/Support/DebugCounter.h" 24 #include "llvm/Transforms/Scalar.h" 25 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 26 #include <optional> 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "partially-inline-libcalls" 31 32 DEBUG_COUNTER(PILCounter, "partially-inline-libcalls-transform", 33 "Controls transformations in partially-inline-libcalls"); 34 35 static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, 36 BasicBlock &CurrBB, Function::iterator &BB, 37 const TargetTransformInfo *TTI, DomTreeUpdater *DTU, 38 OptimizationRemarkEmitter *ORE) { 39 // There is no need to change the IR, since backend will emit sqrt 40 // instruction if the call has already been marked read-only. 41 if (Call->onlyReadsMemory()) 42 return false; 43 44 if (!DebugCounter::shouldExecute(PILCounter)) 45 return false; 46 47 // Do the following transformation: 48 // 49 // (before) 50 // dst = sqrt(src) 51 // 52 // (after) 53 // v0 = sqrt_noreadmem(src) # native sqrt instruction. 54 // [if (v0 is a NaN) || if (src < 0)] 55 // v1 = sqrt(src) # library call. 56 // dst = phi(v0, v1) 57 // 58 59 Type *Ty = Call->getType(); 60 IRBuilder<> Builder(Call->getNextNode()); 61 62 // Split CurrBB right after the call, create a 'then' block (that branches 63 // back to split-off tail of CurrBB) into which we'll insert a libcall. 64 Instruction *LibCallTerm = SplitBlockAndInsertIfThen( 65 Builder.getTrue(), Call->getNextNode(), /*Unreachable=*/false, 66 /*BranchWeights*/ nullptr, DTU); 67 68 auto *CurrBBTerm = cast<BranchInst>(CurrBB.getTerminator()); 69 // We want an 'else' block though, not a 'then' block. 70 cast<BranchInst>(CurrBBTerm)->swapSuccessors(); 71 72 // Create phi that will merge results of either sqrt and replace all uses. 73 BasicBlock *JoinBB = LibCallTerm->getSuccessor(0); 74 JoinBB->setName(CurrBB.getName() + ".split"); 75 Builder.SetInsertPoint(JoinBB, JoinBB->begin()); 76 PHINode *Phi = Builder.CreatePHI(Ty, 2); 77 Call->replaceAllUsesWith(Phi); 78 79 // Finally, insert the libcall into 'else' block. 80 BasicBlock *LibCallBB = LibCallTerm->getParent(); 81 LibCallBB->setName("call.sqrt"); 82 Builder.SetInsertPoint(LibCallTerm); 83 Instruction *LibCall = Call->clone(); 84 Builder.Insert(LibCall); 85 86 // Add memory(none) attribute, so that the backend can use a native sqrt 87 // instruction for this call. 88 Call->setDoesNotAccessMemory(); 89 90 // Insert a FP compare instruction and use it as the CurrBB branch condition. 91 Builder.SetInsertPoint(CurrBBTerm); 92 Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty) 93 ? Builder.CreateFCmpORD(Call, Call) 94 : Builder.CreateFCmpOGE(Call->getOperand(0), 95 ConstantFP::get(Ty, 0.0)); 96 CurrBBTerm->setCondition(FCmp); 97 98 // Add phi operands. 99 Phi->addIncoming(Call, &CurrBB); 100 Phi->addIncoming(LibCall, LibCallBB); 101 102 BB = JoinBB->getIterator(); 103 return true; 104 } 105 106 static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, 107 const TargetTransformInfo *TTI, 108 DominatorTree *DT, 109 OptimizationRemarkEmitter *ORE) { 110 std::optional<DomTreeUpdater> DTU; 111 if (DT) 112 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy); 113 114 bool Changed = false; 115 116 Function::iterator CurrBB; 117 for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { 118 CurrBB = BB++; 119 120 for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); 121 II != IE; ++II) { 122 CallInst *Call = dyn_cast<CallInst>(&*II); 123 Function *CalledFunc; 124 125 if (!Call || !(CalledFunc = Call->getCalledFunction())) 126 continue; 127 128 if (Call->isNoBuiltin() || Call->isStrictFP()) 129 continue; 130 131 if (Call->isMustTailCall()) 132 continue; 133 134 // Skip if function either has local linkage or is not a known library 135 // function. 136 LibFunc LF; 137 if (CalledFunc->hasLocalLinkage() || 138 !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF)) 139 continue; 140 141 switch (LF) { 142 case LibFunc_sqrtf: 143 case LibFunc_sqrt: 144 if (TTI->haveFastSqrt(Call->getType()) && 145 optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI, 146 DTU ? &*DTU : nullptr, ORE)) 147 break; 148 continue; 149 default: 150 continue; 151 } 152 153 Changed = true; 154 break; 155 } 156 } 157 158 return Changed; 159 } 160 161 PreservedAnalyses 162 PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { 163 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 164 auto &TTI = AM.getResult<TargetIRAnalysis>(F); 165 auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); 166 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 167 if (!runPartiallyInlineLibCalls(F, &TLI, &TTI, DT, &ORE)) 168 return PreservedAnalyses::all(); 169 PreservedAnalyses PA; 170 PA.preserve<DominatorTreeAnalysis>(); 171 return PA; 172 } 173 174 namespace { 175 class PartiallyInlineLibCallsLegacyPass : public FunctionPass { 176 public: 177 static char ID; 178 179 PartiallyInlineLibCallsLegacyPass() : FunctionPass(ID) { 180 initializePartiallyInlineLibCallsLegacyPassPass( 181 *PassRegistry::getPassRegistry()); 182 } 183 184 void getAnalysisUsage(AnalysisUsage &AU) const override { 185 AU.addRequired<TargetLibraryInfoWrapperPass>(); 186 AU.addRequired<TargetTransformInfoWrapperPass>(); 187 AU.addPreserved<DominatorTreeWrapperPass>(); 188 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 189 FunctionPass::getAnalysisUsage(AU); 190 } 191 192 bool runOnFunction(Function &F) override { 193 if (skipFunction(F)) 194 return false; 195 196 TargetLibraryInfo *TLI = 197 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 198 const TargetTransformInfo *TTI = 199 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 200 DominatorTree *DT = nullptr; 201 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 202 DT = &DTWP->getDomTree(); 203 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 204 return runPartiallyInlineLibCalls(F, TLI, TTI, DT, ORE); 205 } 206 }; 207 } 208 209 char PartiallyInlineLibCallsLegacyPass::ID = 0; 210 INITIALIZE_PASS_BEGIN(PartiallyInlineLibCallsLegacyPass, 211 "partially-inline-libcalls", 212 "Partially inline calls to library functions", false, 213 false) 214 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 215 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 216 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 217 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 218 INITIALIZE_PASS_END(PartiallyInlineLibCallsLegacyPass, 219 "partially-inline-libcalls", 220 "Partially inline calls to library functions", false, false) 221 222 FunctionPass *llvm::createPartiallyInlineLibCallsPass() { 223 return new PartiallyInlineLibCallsLegacyPass(); 224 } 225