10b57cec5SDimitry Andric //===- AMDGPULibCalls.cpp -------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file does AMD library function optimizations. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "AMDGPU.h" 150b57cec5SDimitry Andric #include "AMDGPULibFunc.h" 16e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 175f757f3fSDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 185f757f3fSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 195f757f3fSDimitry Andric #include "llvm/Analysis/ValueTracking.h" 205f757f3fSDimitry Andric #include "llvm/IR/AttributeMask.h" 215f757f3fSDimitry Andric #include "llvm/IR/Dominators.h" 22fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 231fd87a68SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 241fd87a68SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 25*0fca6ea1SDimitry Andric #include "llvm/IR/MDBuilder.h" 265f757f3fSDimitry Andric #include "llvm/IR/PatternMatch.h" 27480093f4SDimitry Andric #include "llvm/InitializePasses.h" 28bdd1243dSDimitry Andric #include <cmath> 29480093f4SDimitry Andric 30480093f4SDimitry Andric #define DEBUG_TYPE "amdgpu-simplifylib" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 335f757f3fSDimitry Andric using namespace llvm::PatternMatch; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric static cl::opt<bool> EnablePreLink("amdgpu-prelink", 360b57cec5SDimitry Andric cl::desc("Enable pre-link mode optimizations"), 370b57cec5SDimitry Andric cl::init(false), 380b57cec5SDimitry Andric cl::Hidden); 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric static cl::list<std::string> UseNative("amdgpu-use-native", 410b57cec5SDimitry Andric cl::desc("Comma separated list of functions to replace with native, or all"), 420b57cec5SDimitry Andric cl::CommaSeparated, cl::ValueOptional, 430b57cec5SDimitry Andric cl::Hidden); 440b57cec5SDimitry Andric 458bcb0991SDimitry Andric #define MATH_PI numbers::pi 468bcb0991SDimitry Andric #define MATH_E numbers::e 478bcb0991SDimitry Andric #define MATH_SQRT2 numbers::sqrt2 488bcb0991SDimitry Andric #define MATH_SQRT1_2 numbers::inv_sqrt2 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric namespace llvm { 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric class AMDGPULibCalls { 530b57cec5SDimitry Andric private: 545f757f3fSDimitry Andric const TargetLibraryInfo *TLInfo = nullptr; 555f757f3fSDimitry Andric AssumptionCache *AC = nullptr; 565f757f3fSDimitry Andric DominatorTree *DT = nullptr; 570b57cec5SDimitry Andric 58*0fca6ea1SDimitry Andric using FuncInfo = llvm::AMDGPULibFunc; 590b57cec5SDimitry Andric 605f757f3fSDimitry Andric bool UnsafeFPMath = false; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric // -fuse-native. 630b57cec5SDimitry Andric bool AllNative = false; 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric bool useNativeFunc(const StringRef F) const; 660b57cec5SDimitry Andric 67349cc55cSDimitry Andric // Return a pointer (pointer expr) to the function if function definition with 680b57cec5SDimitry Andric // "FuncName" exists. It may create a new function prototype in pre-link mode. 690b57cec5SDimitry Andric FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 700b57cec5SDimitry Andric 71349cc55cSDimitry Andric bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric /* Specialized optimizations */ 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric // pow/powr/pown 785f757f3fSDimitry Andric bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric // rootn 815f757f3fSDimitry Andric bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric // -fuse-native for sincos 840b57cec5SDimitry Andric bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric // evaluate calls if calls' arguments are constants. 875f757f3fSDimitry Andric bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1, 885f757f3fSDimitry Andric Constant *copr0, Constant *copr1); 89349cc55cSDimitry Andric bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); 900b57cec5SDimitry Andric 915f757f3fSDimitry Andric /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value 925f757f3fSDimitry Andric /// of cos, sincos call). 935f757f3fSDimitry Andric std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg, 945f757f3fSDimitry Andric FastMathFlags FMF, 955f757f3fSDimitry Andric IRBuilder<> &B, 965f757f3fSDimitry Andric FunctionCallee Fsincos); 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric // sin/cos 995f757f3fSDimitry Andric bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric // __read_pipe/__write_pipe 102349cc55cSDimitry Andric bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 103349cc55cSDimitry Andric const FuncInfo &FInfo); 1040b57cec5SDimitry Andric 105349cc55cSDimitry Andric // Get a scalar native builtin single argument FP function 1060b57cec5SDimitry Andric FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 1070b57cec5SDimitry Andric 1085f757f3fSDimitry Andric /// Substitute a call to a known libcall with an intrinsic call. If \p 1095f757f3fSDimitry Andric /// AllowMinSize is true, allow the replacement in a minsize function. 1105f757f3fSDimitry Andric bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 1115f757f3fSDimitry Andric bool AllowMinSizeF32 = false, 1125f757f3fSDimitry Andric bool AllowF64 = false, 1135f757f3fSDimitry Andric bool AllowStrictFP = false); 1145f757f3fSDimitry Andric void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 1155f757f3fSDimitry Andric Intrinsic::ID IntrID); 1165f757f3fSDimitry Andric 1175f757f3fSDimitry Andric bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 1185f757f3fSDimitry Andric Intrinsic::ID IntrID, 1195f757f3fSDimitry Andric bool AllowMinSizeF32 = false, 1205f757f3fSDimitry Andric bool AllowF64 = false, 1215f757f3fSDimitry Andric bool AllowStrictFP = false); 1225f757f3fSDimitry Andric 1230b57cec5SDimitry Andric protected: 1245f757f3fSDimitry Andric bool isUnsafeMath(const FPMathOperator *FPOp) const; 1255f757f3fSDimitry Andric bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const; 1260b57cec5SDimitry Andric 1275f757f3fSDimitry Andric bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const; 1280b57cec5SDimitry Andric 1295f757f3fSDimitry Andric static void replaceCall(Instruction *I, Value *With) { 1305f757f3fSDimitry Andric I->replaceAllUsesWith(With); 1315f757f3fSDimitry Andric I->eraseFromParent(); 1325f757f3fSDimitry Andric } 1335f757f3fSDimitry Andric 1345f757f3fSDimitry Andric static void replaceCall(FPMathOperator *I, Value *With) { 1355f757f3fSDimitry Andric replaceCall(cast<Instruction>(I), With); 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric public: 139*0fca6ea1SDimitry Andric AMDGPULibCalls() = default; 1400b57cec5SDimitry Andric 1415f757f3fSDimitry Andric bool fold(CallInst *CI); 1420b57cec5SDimitry Andric 1435f757f3fSDimitry Andric void initFunction(Function &F, FunctionAnalysisManager &FAM); 1440b57cec5SDimitry Andric void initNativeFuncs(); 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric // Replace a normal math function call with that native version 1470b57cec5SDimitry Andric bool useNative(CallInst *CI); 1480b57cec5SDimitry Andric }; 1490b57cec5SDimitry Andric 150*0fca6ea1SDimitry Andric } // end namespace llvm 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric template <typename IRB> 1530b57cec5SDimitry Andric static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 1540b57cec5SDimitry Andric const Twine &Name = "") { 1550b57cec5SDimitry Andric CallInst *R = B.CreateCall(Callee, Arg, Name); 1560b57cec5SDimitry Andric if (Function *F = dyn_cast<Function>(Callee.getCallee())) 1570b57cec5SDimitry Andric R->setCallingConv(F->getCallingConv()); 1580b57cec5SDimitry Andric return R; 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric template <typename IRB> 1620b57cec5SDimitry Andric static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 1630b57cec5SDimitry Andric Value *Arg2, const Twine &Name = "") { 1640b57cec5SDimitry Andric CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 1650b57cec5SDimitry Andric if (Function *F = dyn_cast<Function>(Callee.getCallee())) 1660b57cec5SDimitry Andric R->setCallingConv(F->getCallingConv()); 1670b57cec5SDimitry Andric return R; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1705f757f3fSDimitry Andric static FunctionType *getPownType(FunctionType *FT) { 1715f757f3fSDimitry Andric Type *PowNExpTy = Type::getInt32Ty(FT->getContext()); 1725f757f3fSDimitry Andric if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType())) 1735f757f3fSDimitry Andric PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount()); 1745f757f3fSDimitry Andric 1755f757f3fSDimitry Andric return FunctionType::get(FT->getReturnType(), 1765f757f3fSDimitry Andric {FT->getParamType(0), PowNExpTy}, false); 1775f757f3fSDimitry Andric } 1785f757f3fSDimitry Andric 1790b57cec5SDimitry Andric // Data structures for table-driven optimizations. 1800b57cec5SDimitry Andric // FuncTbl works for both f32 and f64 functions with 1 input argument 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric struct TableEntry { 1830b57cec5SDimitry Andric double result; 1840b57cec5SDimitry Andric double input; 1850b57cec5SDimitry Andric }; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric /* a list of {result, input} */ 1880b57cec5SDimitry Andric static const TableEntry tbl_acos[] = { 1890b57cec5SDimitry Andric {MATH_PI / 2.0, 0.0}, 1900b57cec5SDimitry Andric {MATH_PI / 2.0, -0.0}, 1910b57cec5SDimitry Andric {0.0, 1.0}, 1920b57cec5SDimitry Andric {MATH_PI, -1.0} 1930b57cec5SDimitry Andric }; 1940b57cec5SDimitry Andric static const TableEntry tbl_acosh[] = { 1950b57cec5SDimitry Andric {0.0, 1.0} 1960b57cec5SDimitry Andric }; 1970b57cec5SDimitry Andric static const TableEntry tbl_acospi[] = { 1980b57cec5SDimitry Andric {0.5, 0.0}, 1990b57cec5SDimitry Andric {0.5, -0.0}, 2000b57cec5SDimitry Andric {0.0, 1.0}, 2010b57cec5SDimitry Andric {1.0, -1.0} 2020b57cec5SDimitry Andric }; 2030b57cec5SDimitry Andric static const TableEntry tbl_asin[] = { 2040b57cec5SDimitry Andric {0.0, 0.0}, 2050b57cec5SDimitry Andric {-0.0, -0.0}, 2060b57cec5SDimitry Andric {MATH_PI / 2.0, 1.0}, 2070b57cec5SDimitry Andric {-MATH_PI / 2.0, -1.0} 2080b57cec5SDimitry Andric }; 2090b57cec5SDimitry Andric static const TableEntry tbl_asinh[] = { 2100b57cec5SDimitry Andric {0.0, 0.0}, 2110b57cec5SDimitry Andric {-0.0, -0.0} 2120b57cec5SDimitry Andric }; 2130b57cec5SDimitry Andric static const TableEntry tbl_asinpi[] = { 2140b57cec5SDimitry Andric {0.0, 0.0}, 2150b57cec5SDimitry Andric {-0.0, -0.0}, 2160b57cec5SDimitry Andric {0.5, 1.0}, 2170b57cec5SDimitry Andric {-0.5, -1.0} 2180b57cec5SDimitry Andric }; 2190b57cec5SDimitry Andric static const TableEntry tbl_atan[] = { 2200b57cec5SDimitry Andric {0.0, 0.0}, 2210b57cec5SDimitry Andric {-0.0, -0.0}, 2220b57cec5SDimitry Andric {MATH_PI / 4.0, 1.0}, 2230b57cec5SDimitry Andric {-MATH_PI / 4.0, -1.0} 2240b57cec5SDimitry Andric }; 2250b57cec5SDimitry Andric static const TableEntry tbl_atanh[] = { 2260b57cec5SDimitry Andric {0.0, 0.0}, 2270b57cec5SDimitry Andric {-0.0, -0.0} 2280b57cec5SDimitry Andric }; 2290b57cec5SDimitry Andric static const TableEntry tbl_atanpi[] = { 2300b57cec5SDimitry Andric {0.0, 0.0}, 2310b57cec5SDimitry Andric {-0.0, -0.0}, 2320b57cec5SDimitry Andric {0.25, 1.0}, 2330b57cec5SDimitry Andric {-0.25, -1.0} 2340b57cec5SDimitry Andric }; 2350b57cec5SDimitry Andric static const TableEntry tbl_cbrt[] = { 2360b57cec5SDimitry Andric {0.0, 0.0}, 2370b57cec5SDimitry Andric {-0.0, -0.0}, 2380b57cec5SDimitry Andric {1.0, 1.0}, 2390b57cec5SDimitry Andric {-1.0, -1.0}, 2400b57cec5SDimitry Andric }; 2410b57cec5SDimitry Andric static const TableEntry tbl_cos[] = { 2420b57cec5SDimitry Andric {1.0, 0.0}, 2430b57cec5SDimitry Andric {1.0, -0.0} 2440b57cec5SDimitry Andric }; 2450b57cec5SDimitry Andric static const TableEntry tbl_cosh[] = { 2460b57cec5SDimitry Andric {1.0, 0.0}, 2470b57cec5SDimitry Andric {1.0, -0.0} 2480b57cec5SDimitry Andric }; 2490b57cec5SDimitry Andric static const TableEntry tbl_cospi[] = { 2500b57cec5SDimitry Andric {1.0, 0.0}, 2510b57cec5SDimitry Andric {1.0, -0.0} 2520b57cec5SDimitry Andric }; 2530b57cec5SDimitry Andric static const TableEntry tbl_erfc[] = { 2540b57cec5SDimitry Andric {1.0, 0.0}, 2550b57cec5SDimitry Andric {1.0, -0.0} 2560b57cec5SDimitry Andric }; 2570b57cec5SDimitry Andric static const TableEntry tbl_erf[] = { 2580b57cec5SDimitry Andric {0.0, 0.0}, 2590b57cec5SDimitry Andric {-0.0, -0.0} 2600b57cec5SDimitry Andric }; 2610b57cec5SDimitry Andric static const TableEntry tbl_exp[] = { 2620b57cec5SDimitry Andric {1.0, 0.0}, 2630b57cec5SDimitry Andric {1.0, -0.0}, 2640b57cec5SDimitry Andric {MATH_E, 1.0} 2650b57cec5SDimitry Andric }; 2660b57cec5SDimitry Andric static const TableEntry tbl_exp2[] = { 2670b57cec5SDimitry Andric {1.0, 0.0}, 2680b57cec5SDimitry Andric {1.0, -0.0}, 2690b57cec5SDimitry Andric {2.0, 1.0} 2700b57cec5SDimitry Andric }; 2710b57cec5SDimitry Andric static const TableEntry tbl_exp10[] = { 2720b57cec5SDimitry Andric {1.0, 0.0}, 2730b57cec5SDimitry Andric {1.0, -0.0}, 2740b57cec5SDimitry Andric {10.0, 1.0} 2750b57cec5SDimitry Andric }; 2760b57cec5SDimitry Andric static const TableEntry tbl_expm1[] = { 2770b57cec5SDimitry Andric {0.0, 0.0}, 2780b57cec5SDimitry Andric {-0.0, -0.0} 2790b57cec5SDimitry Andric }; 2800b57cec5SDimitry Andric static const TableEntry tbl_log[] = { 2810b57cec5SDimitry Andric {0.0, 1.0}, 2820b57cec5SDimitry Andric {1.0, MATH_E} 2830b57cec5SDimitry Andric }; 2840b57cec5SDimitry Andric static const TableEntry tbl_log2[] = { 2850b57cec5SDimitry Andric {0.0, 1.0}, 2860b57cec5SDimitry Andric {1.0, 2.0} 2870b57cec5SDimitry Andric }; 2880b57cec5SDimitry Andric static const TableEntry tbl_log10[] = { 2890b57cec5SDimitry Andric {0.0, 1.0}, 2900b57cec5SDimitry Andric {1.0, 10.0} 2910b57cec5SDimitry Andric }; 2920b57cec5SDimitry Andric static const TableEntry tbl_rsqrt[] = { 2930b57cec5SDimitry Andric {1.0, 1.0}, 2948bcb0991SDimitry Andric {MATH_SQRT1_2, 2.0} 2950b57cec5SDimitry Andric }; 2960b57cec5SDimitry Andric static const TableEntry tbl_sin[] = { 2970b57cec5SDimitry Andric {0.0, 0.0}, 2980b57cec5SDimitry Andric {-0.0, -0.0} 2990b57cec5SDimitry Andric }; 3000b57cec5SDimitry Andric static const TableEntry tbl_sinh[] = { 3010b57cec5SDimitry Andric {0.0, 0.0}, 3020b57cec5SDimitry Andric {-0.0, -0.0} 3030b57cec5SDimitry Andric }; 3040b57cec5SDimitry Andric static const TableEntry tbl_sinpi[] = { 3050b57cec5SDimitry Andric {0.0, 0.0}, 3060b57cec5SDimitry Andric {-0.0, -0.0} 3070b57cec5SDimitry Andric }; 3080b57cec5SDimitry Andric static const TableEntry tbl_sqrt[] = { 3090b57cec5SDimitry Andric {0.0, 0.0}, 3100b57cec5SDimitry Andric {1.0, 1.0}, 3110b57cec5SDimitry Andric {MATH_SQRT2, 2.0} 3120b57cec5SDimitry Andric }; 3130b57cec5SDimitry Andric static const TableEntry tbl_tan[] = { 3140b57cec5SDimitry Andric {0.0, 0.0}, 3150b57cec5SDimitry Andric {-0.0, -0.0} 3160b57cec5SDimitry Andric }; 3170b57cec5SDimitry Andric static const TableEntry tbl_tanh[] = { 3180b57cec5SDimitry Andric {0.0, 0.0}, 3190b57cec5SDimitry Andric {-0.0, -0.0} 3200b57cec5SDimitry Andric }; 3210b57cec5SDimitry Andric static const TableEntry tbl_tanpi[] = { 3220b57cec5SDimitry Andric {0.0, 0.0}, 3230b57cec5SDimitry Andric {-0.0, -0.0} 3240b57cec5SDimitry Andric }; 3250b57cec5SDimitry Andric static const TableEntry tbl_tgamma[] = { 3260b57cec5SDimitry Andric {1.0, 1.0}, 3270b57cec5SDimitry Andric {1.0, 2.0}, 3280b57cec5SDimitry Andric {2.0, 3.0}, 3290b57cec5SDimitry Andric {6.0, 4.0} 3300b57cec5SDimitry Andric }; 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric static bool HasNative(AMDGPULibFunc::EFuncId id) { 3330b57cec5SDimitry Andric switch(id) { 3340b57cec5SDimitry Andric case AMDGPULibFunc::EI_DIVIDE: 3350b57cec5SDimitry Andric case AMDGPULibFunc::EI_COS: 3360b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP: 3370b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP2: 3380b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP10: 3390b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG: 3400b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG2: 3410b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG10: 3420b57cec5SDimitry Andric case AMDGPULibFunc::EI_POWR: 3430b57cec5SDimitry Andric case AMDGPULibFunc::EI_RECIP: 3440b57cec5SDimitry Andric case AMDGPULibFunc::EI_RSQRT: 3450b57cec5SDimitry Andric case AMDGPULibFunc::EI_SIN: 3460b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINCOS: 3470b57cec5SDimitry Andric case AMDGPULibFunc::EI_SQRT: 3480b57cec5SDimitry Andric case AMDGPULibFunc::EI_TAN: 3490b57cec5SDimitry Andric return true; 3500b57cec5SDimitry Andric default:; 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric return false; 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 355fcaf7f86SDimitry Andric using TableRef = ArrayRef<TableEntry>; 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 3580b57cec5SDimitry Andric switch(id) { 3590b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 3600b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 3610b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 3620b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 3630b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 3640b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 3650b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 3660b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 3670b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 3680b57cec5SDimitry Andric case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 3690b57cec5SDimitry Andric case AMDGPULibFunc::EI_NCOS: 3700b57cec5SDimitry Andric case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 3710b57cec5SDimitry Andric case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 3720b57cec5SDimitry Andric case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 3730b57cec5SDimitry Andric case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 3740b57cec5SDimitry Andric case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 3750b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 3760b57cec5SDimitry Andric case AMDGPULibFunc::EI_NEXP2: 3770b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 3780b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 3790b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 3800b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 3810b57cec5SDimitry Andric case AMDGPULibFunc::EI_NLOG2: 3820b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 3830b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 3840b57cec5SDimitry Andric case AMDGPULibFunc::EI_NRSQRT: 3850b57cec5SDimitry Andric case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 3860b57cec5SDimitry Andric case AMDGPULibFunc::EI_NSIN: 3870b57cec5SDimitry Andric case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 3880b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 3890b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 3900b57cec5SDimitry Andric case AMDGPULibFunc::EI_NSQRT: 3910b57cec5SDimitry Andric case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 3920b57cec5SDimitry Andric case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 3930b57cec5SDimitry Andric case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 3940b57cec5SDimitry Andric case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 3950b57cec5SDimitry Andric case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 3960b57cec5SDimitry Andric default:; 3970b57cec5SDimitry Andric } 3980b57cec5SDimitry Andric return TableRef(); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric 4010b57cec5SDimitry Andric static inline int getVecSize(const AMDGPULibFunc& FInfo) { 4020b57cec5SDimitry Andric return FInfo.getLeads()[0].VectorSize; 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 4060b57cec5SDimitry Andric return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 4100b57cec5SDimitry Andric // If we are doing PreLinkOpt, the function is external. So it is safe to 4110b57cec5SDimitry Andric // use getOrInsertFunction() at this stage. 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 4140b57cec5SDimitry Andric : AMDGPULibFunc::getFunction(M, fInfo); 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, 418349cc55cSDimitry Andric FuncInfo &FInfo) { 419349cc55cSDimitry Andric return AMDGPULibFunc::parse(FMangledName, FInfo); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4225f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const { 4235f757f3fSDimitry Andric return UnsafeFPMath || FPOp->isFast(); 4245f757f3fSDimitry Andric } 4255f757f3fSDimitry Andric 4265f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const { 4275f757f3fSDimitry Andric return UnsafeFPMath || 4285f757f3fSDimitry Andric (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs()); 4295f757f3fSDimitry Andric } 4305f757f3fSDimitry Andric 4315f757f3fSDimitry Andric bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold( 4325f757f3fSDimitry Andric const FPMathOperator *FPOp) const { 4335f757f3fSDimitry Andric // TODO: Refine to approxFunc or contract 4345f757f3fSDimitry Andric return isUnsafeMath(FPOp); 4355f757f3fSDimitry Andric } 4365f757f3fSDimitry Andric 4375f757f3fSDimitry Andric void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) { 4385f757f3fSDimitry Andric UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool(); 4395f757f3fSDimitry Andric AC = &FAM.getResult<AssumptionAnalysis>(F); 4405f757f3fSDimitry Andric TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F); 4415f757f3fSDimitry Andric DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 4420b57cec5SDimitry Andric } 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 445e8d8bef9SDimitry Andric return AllNative || llvm::is_contained(UseNative, F); 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric void AMDGPULibCalls::initNativeFuncs() { 4490b57cec5SDimitry Andric AllNative = useNativeFunc("all") || 4500b57cec5SDimitry Andric (UseNative.getNumOccurrences() && UseNative.size() == 1 && 4510b57cec5SDimitry Andric UseNative.begin()->empty()); 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 4550b57cec5SDimitry Andric bool native_sin = useNativeFunc("sin"); 4560b57cec5SDimitry Andric bool native_cos = useNativeFunc("cos"); 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric if (native_sin && native_cos) { 4590b57cec5SDimitry Andric Module *M = aCI->getModule(); 4600b57cec5SDimitry Andric Value *opr0 = aCI->getArgOperand(0); 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric AMDGPULibFunc nf; 4630b57cec5SDimitry Andric nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 4640b57cec5SDimitry Andric nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE); 4670b57cec5SDimitry Andric nf.setId(AMDGPULibFunc::EI_SIN); 4680b57cec5SDimitry Andric FunctionCallee sinExpr = getFunction(M, nf); 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE); 4710b57cec5SDimitry Andric nf.setId(AMDGPULibFunc::EI_COS); 4720b57cec5SDimitry Andric FunctionCallee cosExpr = getFunction(M, nf); 4730b57cec5SDimitry Andric if (sinExpr && cosExpr) { 474*0fca6ea1SDimitry Andric Value *sinval = 475*0fca6ea1SDimitry Andric CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator()); 476*0fca6ea1SDimitry Andric Value *cosval = 477*0fca6ea1SDimitry Andric CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator()); 478*0fca6ea1SDimitry Andric new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator()); 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 4810b57cec5SDimitry Andric << " with native version of sin/cos"); 4820b57cec5SDimitry Andric 4835f757f3fSDimitry Andric replaceCall(aCI, sinval); 4840b57cec5SDimitry Andric return true; 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric return false; 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric bool AMDGPULibCalls::useNative(CallInst *aCI) { 4910b57cec5SDimitry Andric Function *Callee = aCI->getCalledFunction(); 4925f757f3fSDimitry Andric if (!Callee || aCI->isNoBuiltin()) 4935f757f3fSDimitry Andric return false; 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric FuncInfo FInfo; 496349cc55cSDimitry Andric if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || 4970b57cec5SDimitry Andric FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 4980b57cec5SDimitry Andric getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 4990b57cec5SDimitry Andric !(AllNative || useNativeFunc(FInfo.getName()))) { 5000b57cec5SDimitry Andric return false; 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 5040b57cec5SDimitry Andric return sincosUseNative(aCI, FInfo); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric FInfo.setPrefix(AMDGPULibFunc::NATIVE); 5070b57cec5SDimitry Andric FunctionCallee F = getFunction(aCI->getModule(), FInfo); 5080b57cec5SDimitry Andric if (!F) 5090b57cec5SDimitry Andric return false; 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric aCI->setCalledFunction(F); 5120b57cec5SDimitry Andric DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 5130b57cec5SDimitry Andric << " with native version"); 5140b57cec5SDimitry Andric return true; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 5180b57cec5SDimitry Andric // builtin, with appended type size and alignment arguments, where 2 or 4 5190b57cec5SDimitry Andric // indicates the original number of arguments. The library has optimized version 5200b57cec5SDimitry Andric // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 5210b57cec5SDimitry Andric // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 5220b57cec5SDimitry Andric // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 5230b57cec5SDimitry Andric // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 5240b57cec5SDimitry Andric bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 525349cc55cSDimitry Andric const FuncInfo &FInfo) { 5260b57cec5SDimitry Andric auto *Callee = CI->getCalledFunction(); 5270b57cec5SDimitry Andric if (!Callee->isDeclaration()) 5280b57cec5SDimitry Andric return false; 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 5310b57cec5SDimitry Andric auto *M = Callee->getParent(); 5325ffd83dbSDimitry Andric std::string Name = std::string(Callee->getName()); 533349cc55cSDimitry Andric auto NumArg = CI->arg_size(); 5340b57cec5SDimitry Andric if (NumArg != 4 && NumArg != 6) 5350b57cec5SDimitry Andric return false; 5365f757f3fSDimitry Andric ConstantInt *PacketSize = 5375f757f3fSDimitry Andric dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2)); 5385f757f3fSDimitry Andric ConstantInt *PacketAlign = 5395f757f3fSDimitry Andric dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1)); 5405f757f3fSDimitry Andric if (!PacketSize || !PacketAlign) 5410b57cec5SDimitry Andric return false; 5425f757f3fSDimitry Andric 5435f757f3fSDimitry Andric unsigned Size = PacketSize->getZExtValue(); 5445f757f3fSDimitry Andric Align Alignment = PacketAlign->getAlignValue(); 5455ffd83dbSDimitry Andric if (Alignment != Size) 5460b57cec5SDimitry Andric return false; 5470b57cec5SDimitry Andric 548349cc55cSDimitry Andric unsigned PtrArgLoc = CI->arg_size() - 3; 5495f757f3fSDimitry Andric Value *PtrArg = CI->getArgOperand(PtrArgLoc); 5505f757f3fSDimitry Andric Type *PtrTy = PtrArg->getType(); 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric SmallVector<llvm::Type *, 6> ArgTys; 5530b57cec5SDimitry Andric for (unsigned I = 0; I != PtrArgLoc; ++I) 5540b57cec5SDimitry Andric ArgTys.push_back(CI->getArgOperand(I)->getType()); 5550b57cec5SDimitry Andric ArgTys.push_back(PtrTy); 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric Name = Name + "_" + std::to_string(Size); 5580b57cec5SDimitry Andric auto *FTy = FunctionType::get(Callee->getReturnType(), 5590b57cec5SDimitry Andric ArrayRef<Type *>(ArgTys), false); 5600b57cec5SDimitry Andric AMDGPULibFunc NewLibFunc(Name, FTy); 5610b57cec5SDimitry Andric FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 5620b57cec5SDimitry Andric if (!F) 5630b57cec5SDimitry Andric return false; 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric SmallVector<Value *, 6> Args; 5660b57cec5SDimitry Andric for (unsigned I = 0; I != PtrArgLoc; ++I) 5670b57cec5SDimitry Andric Args.push_back(CI->getArgOperand(I)); 5685f757f3fSDimitry Andric Args.push_back(PtrArg); 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric auto *NCI = B.CreateCall(F, Args); 5710b57cec5SDimitry Andric NCI->setAttributes(CI->getAttributes()); 5720b57cec5SDimitry Andric CI->replaceAllUsesWith(NCI); 5730b57cec5SDimitry Andric CI->dropAllReferences(); 5740b57cec5SDimitry Andric CI->eraseFromParent(); 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric return true; 5770b57cec5SDimitry Andric } 5780b57cec5SDimitry Andric 5795f757f3fSDimitry Andric static bool isKnownIntegral(const Value *V, const DataLayout &DL, 5805f757f3fSDimitry Andric FastMathFlags FMF) { 581*0fca6ea1SDimitry Andric if (isa<PoisonValue>(V)) 5825f757f3fSDimitry Andric return true; 583*0fca6ea1SDimitry Andric if (isa<UndefValue>(V)) 584*0fca6ea1SDimitry Andric return false; 5850b57cec5SDimitry Andric 5865f757f3fSDimitry Andric if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) 5875f757f3fSDimitry Andric return CF->getValueAPF().isInteger(); 5885f757f3fSDimitry Andric 589*0fca6ea1SDimitry Andric auto *VFVTy = dyn_cast<FixedVectorType>(V->getType()); 590*0fca6ea1SDimitry Andric const Constant *CV = dyn_cast<Constant>(V); 591*0fca6ea1SDimitry Andric if (VFVTy && CV) { 592*0fca6ea1SDimitry Andric unsigned NumElts = VFVTy->getNumElements(); 593*0fca6ea1SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) { 594*0fca6ea1SDimitry Andric Constant *Elt = CV->getAggregateElement(i); 595*0fca6ea1SDimitry Andric if (!Elt) 596*0fca6ea1SDimitry Andric return false; 597*0fca6ea1SDimitry Andric if (isa<PoisonValue>(Elt)) 5985f757f3fSDimitry Andric continue; 599*0fca6ea1SDimitry Andric 600*0fca6ea1SDimitry Andric const ConstantFP *CFP = dyn_cast<ConstantFP>(Elt); 6015f757f3fSDimitry Andric if (!CFP || !CFP->getValue().isInteger()) 6025f757f3fSDimitry Andric return false; 6035f757f3fSDimitry Andric } 6045f757f3fSDimitry Andric 6055f757f3fSDimitry Andric return true; 6065f757f3fSDimitry Andric } 6075f757f3fSDimitry Andric 6085f757f3fSDimitry Andric const Instruction *I = dyn_cast<Instruction>(V); 6095f757f3fSDimitry Andric if (!I) 61004eeddc0SDimitry Andric return false; 6110b57cec5SDimitry Andric 6125f757f3fSDimitry Andric switch (I->getOpcode()) { 6135f757f3fSDimitry Andric case Instruction::SIToFP: 6145f757f3fSDimitry Andric case Instruction::UIToFP: 6155f757f3fSDimitry Andric // TODO: Could check nofpclass(inf) on incoming argument 6165f757f3fSDimitry Andric if (FMF.noInfs()) 6175f757f3fSDimitry Andric return true; 6180b57cec5SDimitry Andric 6195f757f3fSDimitry Andric // Need to check int size cannot produce infinity, which computeKnownFPClass 6205f757f3fSDimitry Andric // knows how to do already. 621*0fca6ea1SDimitry Andric return isKnownNeverInfinity(I, /*Depth=*/0, SimplifyQuery(DL)); 6225f757f3fSDimitry Andric case Instruction::Call: { 6235f757f3fSDimitry Andric const CallInst *CI = cast<CallInst>(I); 6245f757f3fSDimitry Andric switch (CI->getIntrinsicID()) { 6255f757f3fSDimitry Andric case Intrinsic::trunc: 6265f757f3fSDimitry Andric case Intrinsic::floor: 6275f757f3fSDimitry Andric case Intrinsic::ceil: 6285f757f3fSDimitry Andric case Intrinsic::rint: 6295f757f3fSDimitry Andric case Intrinsic::nearbyint: 6305f757f3fSDimitry Andric case Intrinsic::round: 6315f757f3fSDimitry Andric case Intrinsic::roundeven: 6325f757f3fSDimitry Andric return (FMF.noInfs() && FMF.noNaNs()) || 633*0fca6ea1SDimitry Andric isKnownNeverInfOrNaN(I, /*Depth=*/0, SimplifyQuery(DL)); 6340b57cec5SDimitry Andric default: 6350b57cec5SDimitry Andric break; 6360b57cec5SDimitry Andric } 6370b57cec5SDimitry Andric 6385f757f3fSDimitry Andric break; 6395f757f3fSDimitry Andric } 6405f757f3fSDimitry Andric default: 6415f757f3fSDimitry Andric break; 6425f757f3fSDimitry Andric } 6435f757f3fSDimitry Andric 6445f757f3fSDimitry Andric return false; 6455f757f3fSDimitry Andric } 6465f757f3fSDimitry Andric 6475f757f3fSDimitry Andric // This function returns false if no change; return true otherwise. 6485f757f3fSDimitry Andric bool AMDGPULibCalls::fold(CallInst *CI) { 6495f757f3fSDimitry Andric Function *Callee = CI->getCalledFunction(); 6505f757f3fSDimitry Andric // Ignore indirect calls. 6515f757f3fSDimitry Andric if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin()) 6525f757f3fSDimitry Andric return false; 6535f757f3fSDimitry Andric 6540b57cec5SDimitry Andric FuncInfo FInfo; 655349cc55cSDimitry Andric if (!parseFunctionName(Callee->getName(), FInfo)) 6560b57cec5SDimitry Andric return false; 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric // Further check the number of arguments to see if they match. 6595f757f3fSDimitry Andric // TODO: Check calling convention matches too 6605f757f3fSDimitry Andric if (!FInfo.isCompatibleSignature(CI->getFunctionType())) 6610b57cec5SDimitry Andric return false; 6620b57cec5SDimitry Andric 6635f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n'); 6645f757f3fSDimitry Andric 6650b57cec5SDimitry Andric if (TDOFold(CI, FInfo)) 6660b57cec5SDimitry Andric return true; 6670b57cec5SDimitry Andric 6685f757f3fSDimitry Andric IRBuilder<> B(CI); 669*0fca6ea1SDimitry Andric if (CI->isStrictFP()) 670*0fca6ea1SDimitry Andric B.setIsFPConstrained(true); 6715f757f3fSDimitry Andric 6725f757f3fSDimitry Andric if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) { 6730b57cec5SDimitry Andric // Under unsafe-math, evaluate calls if possible. 6740b57cec5SDimitry Andric // According to Brian Sumner, we can do this for all f32 function calls 6750b57cec5SDimitry Andric // using host's double function calls. 6765f757f3fSDimitry Andric if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo)) 6770b57cec5SDimitry Andric return true; 6780b57cec5SDimitry Andric 6795f757f3fSDimitry Andric // Copy fast flags from the original call. 6805f757f3fSDimitry Andric FastMathFlags FMF = FPOp->getFastMathFlags(); 6815f757f3fSDimitry Andric B.setFastMathFlags(FMF); 6825f757f3fSDimitry Andric 6835f757f3fSDimitry Andric // Specialized optimizations for each function call. 6845f757f3fSDimitry Andric // 6855f757f3fSDimitry Andric // TODO: Handle native functions 6860b57cec5SDimitry Andric switch (FInfo.getId()) { 6875f757f3fSDimitry Andric case AMDGPULibFunc::EI_EXP: 6885f757f3fSDimitry Andric if (FMF.none()) 6895f757f3fSDimitry Andric return false; 6905f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp, 6915f757f3fSDimitry Andric FMF.approxFunc()); 6925f757f3fSDimitry Andric case AMDGPULibFunc::EI_EXP2: 6935f757f3fSDimitry Andric if (FMF.none()) 6945f757f3fSDimitry Andric return false; 6955f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2, 6965f757f3fSDimitry Andric FMF.approxFunc()); 6975f757f3fSDimitry Andric case AMDGPULibFunc::EI_LOG: 6985f757f3fSDimitry Andric if (FMF.none()) 6995f757f3fSDimitry Andric return false; 7005f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log, 7015f757f3fSDimitry Andric FMF.approxFunc()); 7025f757f3fSDimitry Andric case AMDGPULibFunc::EI_LOG2: 7035f757f3fSDimitry Andric if (FMF.none()) 7045f757f3fSDimitry Andric return false; 7055f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2, 7065f757f3fSDimitry Andric FMF.approxFunc()); 7075f757f3fSDimitry Andric case AMDGPULibFunc::EI_LOG10: 7085f757f3fSDimitry Andric if (FMF.none()) 7095f757f3fSDimitry Andric return false; 7105f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10, 7115f757f3fSDimitry Andric FMF.approxFunc()); 7125f757f3fSDimitry Andric case AMDGPULibFunc::EI_FMIN: 7135f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum, 7145f757f3fSDimitry Andric true, true); 7155f757f3fSDimitry Andric case AMDGPULibFunc::EI_FMAX: 7165f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum, 7175f757f3fSDimitry Andric true, true); 7185f757f3fSDimitry Andric case AMDGPULibFunc::EI_FMA: 7195f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true, 7205f757f3fSDimitry Andric true); 7215f757f3fSDimitry Andric case AMDGPULibFunc::EI_MAD: 7225f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd, 7235f757f3fSDimitry Andric true, true); 7245f757f3fSDimitry Andric case AMDGPULibFunc::EI_FABS: 7255f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true, 7265f757f3fSDimitry Andric true, true); 7275f757f3fSDimitry Andric case AMDGPULibFunc::EI_COPYSIGN: 7285f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign, 7295f757f3fSDimitry Andric true, true, true); 7305f757f3fSDimitry Andric case AMDGPULibFunc::EI_FLOOR: 7315f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true, 7325f757f3fSDimitry Andric true); 7335f757f3fSDimitry Andric case AMDGPULibFunc::EI_CEIL: 7345f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true, 7355f757f3fSDimitry Andric true); 7365f757f3fSDimitry Andric case AMDGPULibFunc::EI_TRUNC: 7375f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true, 7385f757f3fSDimitry Andric true); 7395f757f3fSDimitry Andric case AMDGPULibFunc::EI_RINT: 7405f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true, 7415f757f3fSDimitry Andric true); 7425f757f3fSDimitry Andric case AMDGPULibFunc::EI_ROUND: 7435f757f3fSDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true, 7445f757f3fSDimitry Andric true); 7455f757f3fSDimitry Andric case AMDGPULibFunc::EI_LDEXP: { 7465f757f3fSDimitry Andric if (!shouldReplaceLibcallWithIntrinsic(CI, true, true)) 7475f757f3fSDimitry Andric return false; 7480b57cec5SDimitry Andric 7495f757f3fSDimitry Andric Value *Arg1 = CI->getArgOperand(1); 7505f757f3fSDimitry Andric if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType()); 7515f757f3fSDimitry Andric VecTy && !isa<VectorType>(Arg1->getType())) { 7525f757f3fSDimitry Andric Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1); 7535f757f3fSDimitry Andric CI->setArgOperand(1, SplatArg1); 7545f757f3fSDimitry Andric } 7550b57cec5SDimitry Andric 7565f757f3fSDimitry Andric CI->setCalledFunction(Intrinsic::getDeclaration( 7575f757f3fSDimitry Andric CI->getModule(), Intrinsic::ldexp, 7585f757f3fSDimitry Andric {CI->getType(), CI->getArgOperand(1)->getType()})); 7595f757f3fSDimitry Andric return true; 7605f757f3fSDimitry Andric } 7615f757f3fSDimitry Andric case AMDGPULibFunc::EI_POW: { 7625f757f3fSDimitry Andric Module *M = Callee->getParent(); 7635f757f3fSDimitry Andric AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo); 7645f757f3fSDimitry Andric FunctionCallee PowrFunc = getFunction(M, PowrInfo); 7655f757f3fSDimitry Andric CallInst *Call = cast<CallInst>(FPOp); 7665f757f3fSDimitry Andric 7675f757f3fSDimitry Andric // pow(x, y) -> powr(x, y) for x >= -0.0 7685f757f3fSDimitry Andric // TODO: Account for flags on current call 7695f757f3fSDimitry Andric if (PowrFunc && 770*0fca6ea1SDimitry Andric cannotBeOrderedLessThanZero( 771*0fca6ea1SDimitry Andric FPOp->getOperand(0), /*Depth=*/0, 772*0fca6ea1SDimitry Andric SimplifyQuery(M->getDataLayout(), TLInfo, DT, AC, Call))) { 7735f757f3fSDimitry Andric Call->setCalledFunction(PowrFunc); 7745f757f3fSDimitry Andric return fold_pow(FPOp, B, PowrInfo) || true; 7755f757f3fSDimitry Andric } 7765f757f3fSDimitry Andric 7775f757f3fSDimitry Andric // pow(x, y) -> pown(x, y) for known integral y 7785f757f3fSDimitry Andric if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(), 7795f757f3fSDimitry Andric FPOp->getFastMathFlags())) { 7805f757f3fSDimitry Andric FunctionType *PownType = getPownType(CI->getFunctionType()); 7815f757f3fSDimitry Andric AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true); 7825f757f3fSDimitry Andric FunctionCallee PownFunc = getFunction(M, PownInfo); 7835f757f3fSDimitry Andric if (PownFunc) { 7845f757f3fSDimitry Andric // TODO: If the incoming integral value is an sitofp/uitofp, it won't 7855f757f3fSDimitry Andric // fold out without a known range. We can probably take the source 7865f757f3fSDimitry Andric // value directly. 7875f757f3fSDimitry Andric Value *CastedArg = 7885f757f3fSDimitry Andric B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1)); 7895f757f3fSDimitry Andric // Have to drop any nofpclass attributes on the original call site. 7905f757f3fSDimitry Andric Call->removeParamAttrs( 7915f757f3fSDimitry Andric 1, AttributeFuncs::typeIncompatible(CastedArg->getType())); 7925f757f3fSDimitry Andric Call->setCalledFunction(PownFunc); 7935f757f3fSDimitry Andric Call->setArgOperand(1, CastedArg); 7945f757f3fSDimitry Andric return fold_pow(FPOp, B, PownInfo) || true; 7955f757f3fSDimitry Andric } 7965f757f3fSDimitry Andric } 7975f757f3fSDimitry Andric 7985f757f3fSDimitry Andric return fold_pow(FPOp, B, FInfo); 7995f757f3fSDimitry Andric } 8000b57cec5SDimitry Andric case AMDGPULibFunc::EI_POWR: 8010b57cec5SDimitry Andric case AMDGPULibFunc::EI_POWN: 8025f757f3fSDimitry Andric return fold_pow(FPOp, B, FInfo); 8030b57cec5SDimitry Andric case AMDGPULibFunc::EI_ROOTN: 8045f757f3fSDimitry Andric return fold_rootn(FPOp, B, FInfo); 8050b57cec5SDimitry Andric case AMDGPULibFunc::EI_SQRT: 8061db9f3b2SDimitry Andric // TODO: Allow with strictfp + constrained intrinsic 8071db9f3b2SDimitry Andric return tryReplaceLibcallWithSimpleIntrinsic( 8081db9f3b2SDimitry Andric B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false); 8090b57cec5SDimitry Andric case AMDGPULibFunc::EI_COS: 8100b57cec5SDimitry Andric case AMDGPULibFunc::EI_SIN: 8115f757f3fSDimitry Andric return fold_sincos(FPOp, B, FInfo); 8125f757f3fSDimitry Andric default: 8130b57cec5SDimitry Andric break; 8145f757f3fSDimitry Andric } 8155f757f3fSDimitry Andric } else { 8165f757f3fSDimitry Andric // Specialized optimizations for each function call 8175f757f3fSDimitry Andric switch (FInfo.getId()) { 8180b57cec5SDimitry Andric case AMDGPULibFunc::EI_READ_PIPE_2: 8190b57cec5SDimitry Andric case AMDGPULibFunc::EI_READ_PIPE_4: 8200b57cec5SDimitry Andric case AMDGPULibFunc::EI_WRITE_PIPE_2: 8210b57cec5SDimitry Andric case AMDGPULibFunc::EI_WRITE_PIPE_4: 8220b57cec5SDimitry Andric return fold_read_write_pipe(CI, B, FInfo); 8230b57cec5SDimitry Andric default: 8240b57cec5SDimitry Andric break; 8250b57cec5SDimitry Andric } 8265f757f3fSDimitry Andric } 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric return false; 8290b57cec5SDimitry Andric } 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 8320b57cec5SDimitry Andric // Table-Driven optimization 8330b57cec5SDimitry Andric const TableRef tr = getOptTable(FInfo.getId()); 834fcaf7f86SDimitry Andric if (tr.empty()) 8350b57cec5SDimitry Andric return false; 8360b57cec5SDimitry Andric 837fcaf7f86SDimitry Andric int const sz = (int)tr.size(); 8380b57cec5SDimitry Andric Value *opr0 = CI->getArgOperand(0); 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric if (getVecSize(FInfo) > 1) { 8410b57cec5SDimitry Andric if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 8420b57cec5SDimitry Andric SmallVector<double, 0> DVal; 8430b57cec5SDimitry Andric for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 8440b57cec5SDimitry Andric ConstantFP *eltval = dyn_cast<ConstantFP>( 8450b57cec5SDimitry Andric CV->getElementAsConstant((unsigned)eltNo)); 8460b57cec5SDimitry Andric assert(eltval && "Non-FP arguments in math function!"); 8470b57cec5SDimitry Andric bool found = false; 8480b57cec5SDimitry Andric for (int i=0; i < sz; ++i) { 849fcaf7f86SDimitry Andric if (eltval->isExactlyValue(tr[i].input)) { 850fcaf7f86SDimitry Andric DVal.push_back(tr[i].result); 8510b57cec5SDimitry Andric found = true; 8520b57cec5SDimitry Andric break; 8530b57cec5SDimitry Andric } 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric if (!found) { 8560b57cec5SDimitry Andric // This vector constants not handled yet. 8570b57cec5SDimitry Andric return false; 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric } 8600b57cec5SDimitry Andric LLVMContext &context = CI->getParent()->getParent()->getContext(); 8610b57cec5SDimitry Andric Constant *nval; 8620b57cec5SDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) { 8630b57cec5SDimitry Andric SmallVector<float, 0> FVal; 864*0fca6ea1SDimitry Andric for (double D : DVal) 865*0fca6ea1SDimitry Andric FVal.push_back((float)D); 8660b57cec5SDimitry Andric ArrayRef<float> tmp(FVal); 8670b57cec5SDimitry Andric nval = ConstantDataVector::get(context, tmp); 8680b57cec5SDimitry Andric } else { // F64 8690b57cec5SDimitry Andric ArrayRef<double> tmp(DVal); 8700b57cec5SDimitry Andric nval = ConstantDataVector::get(context, tmp); 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 8735f757f3fSDimitry Andric replaceCall(CI, nval); 8740b57cec5SDimitry Andric return true; 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric } else { 8770b57cec5SDimitry Andric // Scalar version 8780b57cec5SDimitry Andric if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 8790b57cec5SDimitry Andric for (int i = 0; i < sz; ++i) { 880fcaf7f86SDimitry Andric if (CF->isExactlyValue(tr[i].input)) { 881fcaf7f86SDimitry Andric Value *nval = ConstantFP::get(CF->getType(), tr[i].result); 8820b57cec5SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 8835f757f3fSDimitry Andric replaceCall(CI, nval); 8840b57cec5SDimitry Andric return true; 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric } 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric } 8890b57cec5SDimitry Andric 8900b57cec5SDimitry Andric return false; 8910b57cec5SDimitry Andric } 8920b57cec5SDimitry Andric 8930b57cec5SDimitry Andric namespace llvm { 8940b57cec5SDimitry Andric static double log2(double V) { 8950b57cec5SDimitry Andric #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 8960b57cec5SDimitry Andric return ::log2(V); 8970b57cec5SDimitry Andric #else 8988bcb0991SDimitry Andric return log(V) / numbers::ln2; 8990b57cec5SDimitry Andric #endif 9000b57cec5SDimitry Andric } 901*0fca6ea1SDimitry Andric } // namespace llvm 9020b57cec5SDimitry Andric 9035f757f3fSDimitry Andric bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, 9040b57cec5SDimitry Andric const FuncInfo &FInfo) { 9050b57cec5SDimitry Andric assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 9060b57cec5SDimitry Andric FInfo.getId() == AMDGPULibFunc::EI_POWR || 9070b57cec5SDimitry Andric FInfo.getId() == AMDGPULibFunc::EI_POWN) && 9080b57cec5SDimitry Andric "fold_pow: encounter a wrong function call"); 9090b57cec5SDimitry Andric 9105f757f3fSDimitry Andric Module *M = B.GetInsertBlock()->getModule(); 9115f757f3fSDimitry Andric Type *eltType = FPOp->getType()->getScalarType(); 9125f757f3fSDimitry Andric Value *opr0 = FPOp->getOperand(0); 9135f757f3fSDimitry Andric Value *opr1 = FPOp->getOperand(1); 9140b57cec5SDimitry Andric 9155f757f3fSDimitry Andric const APFloat *CF = nullptr; 9165f757f3fSDimitry Andric const APInt *CINT = nullptr; 917*0fca6ea1SDimitry Andric if (!match(opr1, m_APFloatAllowPoison(CF))) 918*0fca6ea1SDimitry Andric match(opr1, m_APIntAllowPoison(CINT)); 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric // 0x1111111 means that we don't do anything for this call. 9210b57cec5SDimitry Andric int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 9220b57cec5SDimitry Andric 9235f757f3fSDimitry Andric if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) { 9240b57cec5SDimitry Andric // pow/powr/pown(x, 0) == 1 9255f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n"); 9260b57cec5SDimitry Andric Constant *cnval = ConstantFP::get(eltType, 1.0); 9270b57cec5SDimitry Andric if (getVecSize(FInfo) > 1) { 9280b57cec5SDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 9290b57cec5SDimitry Andric } 9305f757f3fSDimitry Andric replaceCall(FPOp, cnval); 9310b57cec5SDimitry Andric return true; 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 9340b57cec5SDimitry Andric // pow/powr/pown(x, 1.0) = x 9355f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 9365f757f3fSDimitry Andric replaceCall(FPOp, opr0); 9370b57cec5SDimitry Andric return true; 9380b57cec5SDimitry Andric } 9390b57cec5SDimitry Andric if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 9400b57cec5SDimitry Andric // pow/powr/pown(x, 2.0) = x*x 9415f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * " 9425f757f3fSDimitry Andric << *opr0 << "\n"); 9430b57cec5SDimitry Andric Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 9445f757f3fSDimitry Andric replaceCall(FPOp, nval); 9450b57cec5SDimitry Andric return true; 9460b57cec5SDimitry Andric } 9470b57cec5SDimitry Andric if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 9480b57cec5SDimitry Andric // pow/powr/pown(x, -1.0) = 1.0/x 9495f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n"); 9500b57cec5SDimitry Andric Constant *cnval = ConstantFP::get(eltType, 1.0); 9510b57cec5SDimitry Andric if (getVecSize(FInfo) > 1) { 9520b57cec5SDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 9530b57cec5SDimitry Andric } 9540b57cec5SDimitry Andric Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 9555f757f3fSDimitry Andric replaceCall(FPOp, nval); 9560b57cec5SDimitry Andric return true; 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 9600b57cec5SDimitry Andric // pow[r](x, [-]0.5) = sqrt(x) 9610b57cec5SDimitry Andric bool issqrt = CF->isExactlyValue(0.5); 9620b57cec5SDimitry Andric if (FunctionCallee FPExpr = 9630b57cec5SDimitry Andric getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 9640b57cec5SDimitry Andric : AMDGPULibFunc::EI_RSQRT, 9650b57cec5SDimitry Andric FInfo))) { 9665f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName() 9675f757f3fSDimitry Andric << '(' << *opr0 << ")\n"); 9680b57cec5SDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 9690b57cec5SDimitry Andric : "__pow2rsqrt"); 9705f757f3fSDimitry Andric replaceCall(FPOp, nval); 9710b57cec5SDimitry Andric return true; 9720b57cec5SDimitry Andric } 9730b57cec5SDimitry Andric } 9740b57cec5SDimitry Andric 9755f757f3fSDimitry Andric if (!isUnsafeFiniteOnlyMath(FPOp)) 9760b57cec5SDimitry Andric return false; 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric // Unsafe Math optimization 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric // Remember that ci_opr1 is set if opr1 is integral 9810b57cec5SDimitry Andric if (CF) { 9820b57cec5SDimitry Andric double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 9835f757f3fSDimitry Andric ? (double)CF->convertToFloat() 9845f757f3fSDimitry Andric : CF->convertToDouble(); 9850b57cec5SDimitry Andric int ival = (int)dval; 9860b57cec5SDimitry Andric if ((double)ival == dval) { 9870b57cec5SDimitry Andric ci_opr1 = ival; 9880b57cec5SDimitry Andric } else 9890b57cec5SDimitry Andric ci_opr1 = 0x11111111; 9900b57cec5SDimitry Andric } 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric // pow/powr/pown(x, c) = [1/](x*x*..x); where 9930b57cec5SDimitry Andric // trunc(c) == c && the number of x == c && |c| <= 12 9940b57cec5SDimitry Andric unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 9950b57cec5SDimitry Andric if (abs_opr1 <= 12) { 9960b57cec5SDimitry Andric Constant *cnval; 9970b57cec5SDimitry Andric Value *nval; 9980b57cec5SDimitry Andric if (abs_opr1 == 0) { 9990b57cec5SDimitry Andric cnval = ConstantFP::get(eltType, 1.0); 10000b57cec5SDimitry Andric if (getVecSize(FInfo) > 1) { 10010b57cec5SDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric nval = cnval; 10040b57cec5SDimitry Andric } else { 10050b57cec5SDimitry Andric Value *valx2 = nullptr; 10060b57cec5SDimitry Andric nval = nullptr; 10070b57cec5SDimitry Andric while (abs_opr1 > 0) { 10080b57cec5SDimitry Andric valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 10090b57cec5SDimitry Andric if (abs_opr1 & 1) { 10100b57cec5SDimitry Andric nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric abs_opr1 >>= 1; 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric } 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric if (ci_opr1 < 0) { 10170b57cec5SDimitry Andric cnval = ConstantFP::get(eltType, 1.0); 10180b57cec5SDimitry Andric if (getVecSize(FInfo) > 1) { 10190b57cec5SDimitry Andric cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 10200b57cec5SDimitry Andric } 10210b57cec5SDimitry Andric nval = B.CreateFDiv(cnval, nval, "__1powprod"); 10220b57cec5SDimitry Andric } 10235f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 10240b57cec5SDimitry Andric << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 10250b57cec5SDimitry Andric << ")\n"); 10265f757f3fSDimitry Andric replaceCall(FPOp, nval); 10270b57cec5SDimitry Andric return true; 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10305f757f3fSDimitry Andric // If we should use the generic intrinsic instead of emitting a libcall 10315f757f3fSDimitry Andric const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy(); 10325f757f3fSDimitry Andric 10330b57cec5SDimitry Andric // powr ---> exp2(y * log2(x)) 10340b57cec5SDimitry Andric // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 10355f757f3fSDimitry Andric FunctionCallee ExpExpr; 10365f757f3fSDimitry Andric if (ShouldUseIntrinsic) 10375f757f3fSDimitry Andric ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()}); 10385f757f3fSDimitry Andric else { 10395f757f3fSDimitry Andric ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 10400b57cec5SDimitry Andric if (!ExpExpr) 10410b57cec5SDimitry Andric return false; 10425f757f3fSDimitry Andric } 10430b57cec5SDimitry Andric 10440b57cec5SDimitry Andric bool needlog = false; 10450b57cec5SDimitry Andric bool needabs = false; 10460b57cec5SDimitry Andric bool needcopysign = false; 10470b57cec5SDimitry Andric Constant *cnval = nullptr; 10480b57cec5SDimitry Andric if (getVecSize(FInfo) == 1) { 10495f757f3fSDimitry Andric CF = nullptr; 1050*0fca6ea1SDimitry Andric match(opr0, m_APFloatAllowPoison(CF)); 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric if (CF) { 10530b57cec5SDimitry Andric double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 10545f757f3fSDimitry Andric ? (double)CF->convertToFloat() 10555f757f3fSDimitry Andric : CF->convertToDouble(); 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andric V = log2(std::abs(V)); 10580b57cec5SDimitry Andric cnval = ConstantFP::get(eltType, V); 10590b57cec5SDimitry Andric needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 10600b57cec5SDimitry Andric CF->isNegative(); 10610b57cec5SDimitry Andric } else { 10620b57cec5SDimitry Andric needlog = true; 1063cb14a3feSDimitry Andric needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 10640b57cec5SDimitry Andric } 10650b57cec5SDimitry Andric } else { 10660b57cec5SDimitry Andric ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 10670b57cec5SDimitry Andric 10680b57cec5SDimitry Andric if (!CDV) { 10690b57cec5SDimitry Andric needlog = true; 10700b57cec5SDimitry Andric needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 10710b57cec5SDimitry Andric } else { 10720b57cec5SDimitry Andric assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 10730b57cec5SDimitry Andric "Wrong vector size detected"); 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric SmallVector<double, 0> DVal; 10760b57cec5SDimitry Andric for (int i=0; i < getVecSize(FInfo); ++i) { 10775f757f3fSDimitry Andric double V = CDV->getElementAsAPFloat(i).convertToDouble(); 10780b57cec5SDimitry Andric if (V < 0.0) needcopysign = true; 10790b57cec5SDimitry Andric V = log2(std::abs(V)); 10800b57cec5SDimitry Andric DVal.push_back(V); 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) { 10830b57cec5SDimitry Andric SmallVector<float, 0> FVal; 1084*0fca6ea1SDimitry Andric for (double D : DVal) 1085*0fca6ea1SDimitry Andric FVal.push_back((float)D); 10860b57cec5SDimitry Andric ArrayRef<float> tmp(FVal); 10870b57cec5SDimitry Andric cnval = ConstantDataVector::get(M->getContext(), tmp); 10880b57cec5SDimitry Andric } else { 10890b57cec5SDimitry Andric ArrayRef<double> tmp(DVal); 10900b57cec5SDimitry Andric cnval = ConstantDataVector::get(M->getContext(), tmp); 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric } 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 10960b57cec5SDimitry Andric // We cannot handle corner cases for a general pow() function, give up 10970b57cec5SDimitry Andric // unless y is a constant integral value. Then proceed as if it were pown. 10985f757f3fSDimitry Andric if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags())) 10990b57cec5SDimitry Andric return false; 11000b57cec5SDimitry Andric } 11010b57cec5SDimitry Andric 11020b57cec5SDimitry Andric Value *nval; 11030b57cec5SDimitry Andric if (needabs) { 11045f757f3fSDimitry Andric nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs"); 11050b57cec5SDimitry Andric } else { 11060b57cec5SDimitry Andric nval = cnval ? cnval : opr0; 11070b57cec5SDimitry Andric } 11080b57cec5SDimitry Andric if (needlog) { 11095f757f3fSDimitry Andric FunctionCallee LogExpr; 11105f757f3fSDimitry Andric if (ShouldUseIntrinsic) { 11115f757f3fSDimitry Andric LogExpr = 11125f757f3fSDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()}); 11135f757f3fSDimitry Andric } else { 11145f757f3fSDimitry Andric LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 11150b57cec5SDimitry Andric if (!LogExpr) 11160b57cec5SDimitry Andric return false; 11175f757f3fSDimitry Andric } 11185f757f3fSDimitry Andric 11190b57cec5SDimitry Andric nval = CreateCallEx(B,LogExpr, nval, "__log2"); 11200b57cec5SDimitry Andric } 11210b57cec5SDimitry Andric 11220b57cec5SDimitry Andric if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 11230b57cec5SDimitry Andric // convert int(32) to fp(f32 or f64) 11240b57cec5SDimitry Andric opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 11250b57cec5SDimitry Andric } 11260b57cec5SDimitry Andric nval = B.CreateFMul(opr1, nval, "__ylogx"); 11270b57cec5SDimitry Andric nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric if (needcopysign) { 11305f757f3fSDimitry Andric Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits()); 1131*0fca6ea1SDimitry Andric Type *nTy = FPOp->getType()->getWithNewType(nTyS); 11320b57cec5SDimitry Andric unsigned size = nTy->getScalarSizeInBits(); 1133*0fca6ea1SDimitry Andric Value *opr_n = FPOp->getOperand(1); 1134*0fca6ea1SDimitry Andric if (opr_n->getType()->getScalarType()->isIntegerTy()) 11355f757f3fSDimitry Andric opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou"); 11360b57cec5SDimitry Andric else 11370b57cec5SDimitry Andric opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 11400b57cec5SDimitry Andric sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 11410b57cec5SDimitry Andric nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 11420b57cec5SDimitry Andric nval = B.CreateBitCast(nval, opr0->getType()); 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric 11455f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 11460b57cec5SDimitry Andric << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 11475f757f3fSDimitry Andric replaceCall(FPOp, nval); 11480b57cec5SDimitry Andric 11490b57cec5SDimitry Andric return true; 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric 11525f757f3fSDimitry Andric bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, 11530b57cec5SDimitry Andric const FuncInfo &FInfo) { 11545f757f3fSDimitry Andric Value *opr0 = FPOp->getOperand(0); 11555f757f3fSDimitry Andric Value *opr1 = FPOp->getOperand(1); 11560b57cec5SDimitry Andric 1157*0fca6ea1SDimitry Andric const APInt *CINT = nullptr; 1158*0fca6ea1SDimitry Andric if (!match(opr1, m_APIntAllowPoison(CINT))) 11590b57cec5SDimitry Andric return false; 1160*0fca6ea1SDimitry Andric 1161*0fca6ea1SDimitry Andric Function *Parent = B.GetInsertBlock()->getParent(); 1162*0fca6ea1SDimitry Andric 11630b57cec5SDimitry Andric int ci_opr1 = (int)CINT->getSExtValue(); 1164*0fca6ea1SDimitry Andric if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) { 1165*0fca6ea1SDimitry Andric // rootn(x, 1) = x 1166*0fca6ea1SDimitry Andric // 1167*0fca6ea1SDimitry Andric // TODO: Insert constrained canonicalize for strictfp case. 1168*0fca6ea1SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n'); 11695f757f3fSDimitry Andric replaceCall(FPOp, opr0); 11700b57cec5SDimitry Andric return true; 11710b57cec5SDimitry Andric } 11725f757f3fSDimitry Andric 11735f757f3fSDimitry Andric Module *M = B.GetInsertBlock()->getModule(); 1174*0fca6ea1SDimitry Andric 1175*0fca6ea1SDimitry Andric CallInst *CI = cast<CallInst>(FPOp); 1176*0fca6ea1SDimitry Andric if (ci_opr1 == 2 && 1177*0fca6ea1SDimitry Andric shouldReplaceLibcallWithIntrinsic(CI, 1178*0fca6ea1SDimitry Andric /*AllowMinSizeF32=*/true, 1179*0fca6ea1SDimitry Andric /*AllowF64=*/true)) { 1180*0fca6ea1SDimitry Andric // rootn(x, 2) = sqrt(x) 1181*0fca6ea1SDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n"); 1182*0fca6ea1SDimitry Andric 1183*0fca6ea1SDimitry Andric CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI); 1184*0fca6ea1SDimitry Andric NewCall->takeName(CI); 1185*0fca6ea1SDimitry Andric 1186*0fca6ea1SDimitry Andric // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some 1187*0fca6ea1SDimitry Andric // metadata. 1188*0fca6ea1SDimitry Andric MDBuilder MDHelper(M->getContext()); 1189*0fca6ea1SDimitry Andric MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f)); 1190*0fca6ea1SDimitry Andric NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD); 1191*0fca6ea1SDimitry Andric 1192*0fca6ea1SDimitry Andric replaceCall(CI, NewCall); 11930b57cec5SDimitry Andric return true; 11940b57cec5SDimitry Andric } 1195*0fca6ea1SDimitry Andric 1196*0fca6ea1SDimitry Andric if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 11970b57cec5SDimitry Andric if (FunctionCallee FPExpr = 11980b57cec5SDimitry Andric getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 11995f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0 12005f757f3fSDimitry Andric << ")\n"); 12010b57cec5SDimitry Andric Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 12025f757f3fSDimitry Andric replaceCall(FPOp, nval); 12030b57cec5SDimitry Andric return true; 12040b57cec5SDimitry Andric } 12050b57cec5SDimitry Andric } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 12065f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n"); 12070b57cec5SDimitry Andric Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 12080b57cec5SDimitry Andric opr0, 12090b57cec5SDimitry Andric "__rootn2div"); 12105f757f3fSDimitry Andric replaceCall(FPOp, nval); 12110b57cec5SDimitry Andric return true; 1212*0fca6ea1SDimitry Andric } 1213*0fca6ea1SDimitry Andric 1214*0fca6ea1SDimitry Andric if (ci_opr1 == -2 && 1215*0fca6ea1SDimitry Andric shouldReplaceLibcallWithIntrinsic(CI, 1216*0fca6ea1SDimitry Andric /*AllowMinSizeF32=*/true, 1217*0fca6ea1SDimitry Andric /*AllowF64=*/true)) { 1218*0fca6ea1SDimitry Andric // rootn(x, -2) = rsqrt(x) 1219*0fca6ea1SDimitry Andric 1220*0fca6ea1SDimitry Andric // The original rootn had looser ulp requirements than the resultant sqrt 1221*0fca6ea1SDimitry Andric // and fdiv. 1222*0fca6ea1SDimitry Andric MDBuilder MDHelper(M->getContext()); 1223*0fca6ea1SDimitry Andric MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f)); 1224*0fca6ea1SDimitry Andric 1225*0fca6ea1SDimitry Andric // TODO: Could handle strictfp but need to fix strict sqrt emission 1226*0fca6ea1SDimitry Andric FastMathFlags FMF = FPOp->getFastMathFlags(); 1227*0fca6ea1SDimitry Andric FMF.setAllowContract(true); 1228*0fca6ea1SDimitry Andric 1229*0fca6ea1SDimitry Andric CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI); 1230*0fca6ea1SDimitry Andric Instruction *RSqrt = cast<Instruction>( 1231*0fca6ea1SDimitry Andric B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt)); 1232*0fca6ea1SDimitry Andric Sqrt->setFastMathFlags(FMF); 1233*0fca6ea1SDimitry Andric RSqrt->setFastMathFlags(FMF); 1234*0fca6ea1SDimitry Andric RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD); 1235*0fca6ea1SDimitry Andric 12365f757f3fSDimitry Andric LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0 12370b57cec5SDimitry Andric << ")\n"); 1238*0fca6ea1SDimitry Andric replaceCall(CI, RSqrt); 12390b57cec5SDimitry Andric return true; 12400b57cec5SDimitry Andric } 1241*0fca6ea1SDimitry Andric 12420b57cec5SDimitry Andric return false; 12430b57cec5SDimitry Andric } 12440b57cec5SDimitry Andric 1245349cc55cSDimitry Andric // Get a scalar native builtin single argument FP function 12460b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 12470b57cec5SDimitry Andric const FuncInfo &FInfo) { 12480b57cec5SDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 12490b57cec5SDimitry Andric return nullptr; 12500b57cec5SDimitry Andric FuncInfo nf = FInfo; 12510b57cec5SDimitry Andric nf.setPrefix(AMDGPULibFunc::NATIVE); 12520b57cec5SDimitry Andric return getFunction(M, nf); 12530b57cec5SDimitry Andric } 12540b57cec5SDimitry Andric 12555f757f3fSDimitry Andric // Some library calls are just wrappers around llvm intrinsics, but compiled 12565f757f3fSDimitry Andric // conservatively. Preserve the flags from the original call site by 12575f757f3fSDimitry Andric // substituting them with direct calls with all the flags. 12585f757f3fSDimitry Andric bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 12595f757f3fSDimitry Andric bool AllowMinSizeF32, 12605f757f3fSDimitry Andric bool AllowF64, 12615f757f3fSDimitry Andric bool AllowStrictFP) { 12625f757f3fSDimitry Andric Type *FltTy = CI->getType()->getScalarType(); 12635f757f3fSDimitry Andric const bool IsF32 = FltTy->isFloatTy(); 12645f757f3fSDimitry Andric 12655f757f3fSDimitry Andric // f64 intrinsics aren't implemented for most operations. 12665f757f3fSDimitry Andric if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy())) 12675f757f3fSDimitry Andric return false; 12685f757f3fSDimitry Andric 12695f757f3fSDimitry Andric // We're implicitly inlining by replacing the libcall with the intrinsic, so 12705f757f3fSDimitry Andric // don't do it for noinline call sites. 12715f757f3fSDimitry Andric if (CI->isNoInline()) 12725f757f3fSDimitry Andric return false; 12735f757f3fSDimitry Andric 12745f757f3fSDimitry Andric const Function *ParentF = CI->getFunction(); 12755f757f3fSDimitry Andric // TODO: Handle strictfp 12765f757f3fSDimitry Andric if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP)) 12775f757f3fSDimitry Andric return false; 12785f757f3fSDimitry Andric 12795f757f3fSDimitry Andric if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize()) 12805f757f3fSDimitry Andric return false; 12815f757f3fSDimitry Andric return true; 12825f757f3fSDimitry Andric } 12835f757f3fSDimitry Andric 12845f757f3fSDimitry Andric void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, 12855f757f3fSDimitry Andric CallInst *CI, 12865f757f3fSDimitry Andric Intrinsic::ID IntrID) { 12875f757f3fSDimitry Andric if (CI->arg_size() == 2) { 12885f757f3fSDimitry Andric Value *Arg0 = CI->getArgOperand(0); 12895f757f3fSDimitry Andric Value *Arg1 = CI->getArgOperand(1); 12905f757f3fSDimitry Andric VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType()); 12915f757f3fSDimitry Andric VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType()); 12925f757f3fSDimitry Andric if (Arg0VecTy && !Arg1VecTy) { 12935f757f3fSDimitry Andric Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1); 12945f757f3fSDimitry Andric CI->setArgOperand(1, SplatRHS); 12955f757f3fSDimitry Andric } else if (!Arg0VecTy && Arg1VecTy) { 12965f757f3fSDimitry Andric Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0); 12975f757f3fSDimitry Andric CI->setArgOperand(0, SplatLHS); 12985f757f3fSDimitry Andric } 12995f757f3fSDimitry Andric } 13005f757f3fSDimitry Andric 13015f757f3fSDimitry Andric CI->setCalledFunction( 13025f757f3fSDimitry Andric Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()})); 13035f757f3fSDimitry Andric } 13045f757f3fSDimitry Andric 13055f757f3fSDimitry Andric bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( 13065f757f3fSDimitry Andric IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32, 13075f757f3fSDimitry Andric bool AllowF64, bool AllowStrictFP) { 13085f757f3fSDimitry Andric if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64, 13095f757f3fSDimitry Andric AllowStrictFP)) 13105f757f3fSDimitry Andric return false; 13115f757f3fSDimitry Andric replaceLibCallWithSimpleIntrinsic(B, CI, IntrID); 13125f757f3fSDimitry Andric return true; 13135f757f3fSDimitry Andric } 13145f757f3fSDimitry Andric 13155f757f3fSDimitry Andric std::tuple<Value *, Value *, Value *> 13165f757f3fSDimitry Andric AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B, 13175f757f3fSDimitry Andric FunctionCallee Fsincos) { 13185f757f3fSDimitry Andric DebugLoc DL = B.getCurrentDebugLocation(); 13195f757f3fSDimitry Andric Function *F = B.GetInsertBlock()->getParent(); 13205f757f3fSDimitry Andric B.SetInsertPointPastAllocas(F); 13215f757f3fSDimitry Andric 13225f757f3fSDimitry Andric AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_"); 13235f757f3fSDimitry Andric 13245f757f3fSDimitry Andric if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { 13255f757f3fSDimitry Andric // If the argument is an instruction, it must dominate all uses so put our 13265f757f3fSDimitry Andric // sincos call there. Otherwise, right after the allocas works well enough 13275f757f3fSDimitry Andric // if it's an argument or constant. 13285f757f3fSDimitry Andric 13295f757f3fSDimitry Andric B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); 13305f757f3fSDimitry Andric 13315f757f3fSDimitry Andric // SetInsertPoint unwelcomely always tries to set the debug loc. 13325f757f3fSDimitry Andric B.SetCurrentDebugLocation(DL); 13335f757f3fSDimitry Andric } 13345f757f3fSDimitry Andric 13355f757f3fSDimitry Andric Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1); 13365f757f3fSDimitry Andric 13375f757f3fSDimitry Andric // The allocaInst allocates the memory in private address space. This need 13385f757f3fSDimitry Andric // to be addrspacecasted to point to the address space of cos pointer type. 13395f757f3fSDimitry Andric // In OpenCL 2.0 this is generic, while in 1.2 that is private. 13405f757f3fSDimitry Andric Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy); 13415f757f3fSDimitry Andric 13425f757f3fSDimitry Andric CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc); 13435f757f3fSDimitry Andric 13445f757f3fSDimitry Andric // TODO: Is it worth trying to preserve the location for the cos calls for the 13455f757f3fSDimitry Andric // load? 13465f757f3fSDimitry Andric 13475f757f3fSDimitry Andric LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 13485f757f3fSDimitry Andric return {SinCos, LoadCos, SinCos}; 13495f757f3fSDimitry Andric } 13505f757f3fSDimitry Andric 13510b57cec5SDimitry Andric // fold sin, cos -> sincos. 13525f757f3fSDimitry Andric bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, 13535f757f3fSDimitry Andric const FuncInfo &fInfo) { 13540b57cec5SDimitry Andric assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 13550b57cec5SDimitry Andric fInfo.getId() == AMDGPULibFunc::EI_COS); 13565f757f3fSDimitry Andric 13575f757f3fSDimitry Andric if ((getArgType(fInfo) != AMDGPULibFunc::F32 && 13585f757f3fSDimitry Andric getArgType(fInfo) != AMDGPULibFunc::F64) || 13595f757f3fSDimitry Andric fInfo.getPrefix() != AMDGPULibFunc::NOPFX) 13605f757f3fSDimitry Andric return false; 13615f757f3fSDimitry Andric 13620b57cec5SDimitry Andric bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 13630b57cec5SDimitry Andric 13645f757f3fSDimitry Andric Value *CArgVal = FPOp->getOperand(0); 13655f757f3fSDimitry Andric CallInst *CI = cast<CallInst>(FPOp); 13660b57cec5SDimitry Andric 13675f757f3fSDimitry Andric Function *F = B.GetInsertBlock()->getParent(); 13685f757f3fSDimitry Andric Module *M = F->getParent(); 13690b57cec5SDimitry Andric 13705f757f3fSDimitry Andric // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer 13715f757f3fSDimitry Andric // implementation. Prefer the private form if available. 13725f757f3fSDimitry Andric AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo); 13735f757f3fSDimitry Andric SinCosLibFuncPrivate.getLeads()[0].PtrKind = 13745f757f3fSDimitry Andric AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS); 13750b57cec5SDimitry Andric 13765f757f3fSDimitry Andric AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo); 13775f757f3fSDimitry Andric SinCosLibFuncGeneric.getLeads()[0].PtrKind = 13785f757f3fSDimitry Andric AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 13790b57cec5SDimitry Andric 13805f757f3fSDimitry Andric FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate); 13815f757f3fSDimitry Andric FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric); 13825f757f3fSDimitry Andric FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric; 13835f757f3fSDimitry Andric if (!FSinCos) 13845f757f3fSDimitry Andric return false; 13855f757f3fSDimitry Andric 13865f757f3fSDimitry Andric SmallVector<CallInst *> SinCalls; 13875f757f3fSDimitry Andric SmallVector<CallInst *> CosCalls; 13885f757f3fSDimitry Andric SmallVector<CallInst *> SinCosCalls; 13895f757f3fSDimitry Andric FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, 13905f757f3fSDimitry Andric fInfo); 13915f757f3fSDimitry Andric const std::string PairName = PartnerInfo.mangle(); 13925f757f3fSDimitry Andric 13935f757f3fSDimitry Andric StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName; 13945f757f3fSDimitry Andric StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName(); 13955f757f3fSDimitry Andric const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle(); 13965f757f3fSDimitry Andric const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle(); 13975f757f3fSDimitry Andric 13985f757f3fSDimitry Andric // Intersect the two sets of flags. 13995f757f3fSDimitry Andric FastMathFlags FMF = FPOp->getFastMathFlags(); 14005f757f3fSDimitry Andric MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath); 14015f757f3fSDimitry Andric 14025f757f3fSDimitry Andric SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()}; 14035f757f3fSDimitry Andric 14040b57cec5SDimitry Andric for (User* U : CArgVal->users()) { 14055f757f3fSDimitry Andric CallInst *XI = dyn_cast<CallInst>(U); 14065f757f3fSDimitry Andric if (!XI || XI->getFunction() != F || XI->isNoBuiltin()) 14070b57cec5SDimitry Andric continue; 14080b57cec5SDimitry Andric 14090b57cec5SDimitry Andric Function *UCallee = XI->getCalledFunction(); 14105f757f3fSDimitry Andric if (!UCallee) 14110b57cec5SDimitry Andric continue; 14120b57cec5SDimitry Andric 14135f757f3fSDimitry Andric bool Handled = true; 14145f757f3fSDimitry Andric 14155f757f3fSDimitry Andric if (UCallee->getName() == SinName) 14165f757f3fSDimitry Andric SinCalls.push_back(XI); 14175f757f3fSDimitry Andric else if (UCallee->getName() == CosName) 14185f757f3fSDimitry Andric CosCalls.push_back(XI); 14195f757f3fSDimitry Andric else if (UCallee->getName() == SinCosPrivateName || 14205f757f3fSDimitry Andric UCallee->getName() == SinCosGenericName) 14215f757f3fSDimitry Andric SinCosCalls.push_back(XI); 14225f757f3fSDimitry Andric else 14235f757f3fSDimitry Andric Handled = false; 14245f757f3fSDimitry Andric 14255f757f3fSDimitry Andric if (Handled) { 14265f757f3fSDimitry Andric MergeDbgLocs.push_back(XI->getDebugLoc()); 14275f757f3fSDimitry Andric auto *OtherOp = cast<FPMathOperator>(XI); 14285f757f3fSDimitry Andric FMF &= OtherOp->getFastMathFlags(); 14295f757f3fSDimitry Andric FPMath = MDNode::getMostGenericFPMath( 14305f757f3fSDimitry Andric FPMath, XI->getMetadata(LLVMContext::MD_fpmath)); 14310b57cec5SDimitry Andric } 14320b57cec5SDimitry Andric } 14330b57cec5SDimitry Andric 14345f757f3fSDimitry Andric if (SinCalls.empty() || CosCalls.empty()) 14350b57cec5SDimitry Andric return false; 14360b57cec5SDimitry Andric 14375f757f3fSDimitry Andric B.setFastMathFlags(FMF); 14385f757f3fSDimitry Andric B.setDefaultFPMathTag(FPMath); 14395f757f3fSDimitry Andric DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs); 14405f757f3fSDimitry Andric B.SetCurrentDebugLocation(DbgLoc); 14410b57cec5SDimitry Andric 14425f757f3fSDimitry Andric auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos); 14430b57cec5SDimitry Andric 14445f757f3fSDimitry Andric auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) { 14455f757f3fSDimitry Andric for (CallInst *C : Calls) 14465f757f3fSDimitry Andric C->replaceAllUsesWith(Res); 14470b57cec5SDimitry Andric 14485f757f3fSDimitry Andric // Leave the other dead instructions to avoid clobbering iterators. 14495f757f3fSDimitry Andric }; 14505f757f3fSDimitry Andric 14515f757f3fSDimitry Andric replaceTrigInsts(SinCalls, Sin); 14525f757f3fSDimitry Andric replaceTrigInsts(CosCalls, Cos); 14535f757f3fSDimitry Andric replaceTrigInsts(SinCosCalls, SinCos); 14545f757f3fSDimitry Andric 14555f757f3fSDimitry Andric // It's safe to delete the original now. 14560b57cec5SDimitry Andric CI->eraseFromParent(); 14570b57cec5SDimitry Andric return true; 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric 14605f757f3fSDimitry Andric bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, 14615f757f3fSDimitry Andric double &Res1, Constant *copr0, 14625f757f3fSDimitry Andric Constant *copr1) { 14630b57cec5SDimitry Andric // By default, opr0/opr1/opr3 holds values of float/double type. 14640b57cec5SDimitry Andric // If they are not float/double, each function has to its 14650b57cec5SDimitry Andric // operand separately. 14665f757f3fSDimitry Andric double opr0 = 0.0, opr1 = 0.0; 14670b57cec5SDimitry Andric ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 14680b57cec5SDimitry Andric ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 14690b57cec5SDimitry Andric if (fpopr0) { 14700b57cec5SDimitry Andric opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 14710b57cec5SDimitry Andric ? fpopr0->getValueAPF().convertToDouble() 14720b57cec5SDimitry Andric : (double)fpopr0->getValueAPF().convertToFloat(); 14730b57cec5SDimitry Andric } 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric if (fpopr1) { 14760b57cec5SDimitry Andric opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 14770b57cec5SDimitry Andric ? fpopr1->getValueAPF().convertToDouble() 14780b57cec5SDimitry Andric : (double)fpopr1->getValueAPF().convertToFloat(); 14790b57cec5SDimitry Andric } 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andric switch (FInfo.getId()) { 14820b57cec5SDimitry Andric default : return false; 14830b57cec5SDimitry Andric 14840b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOS: 14850b57cec5SDimitry Andric Res0 = acos(opr0); 14860b57cec5SDimitry Andric return true; 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOSH: 14890b57cec5SDimitry Andric // acosh(x) == log(x + sqrt(x*x - 1)) 14900b57cec5SDimitry Andric Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 14910b57cec5SDimitry Andric return true; 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric case AMDGPULibFunc::EI_ACOSPI: 14940b57cec5SDimitry Andric Res0 = acos(opr0) / MATH_PI; 14950b57cec5SDimitry Andric return true; 14960b57cec5SDimitry Andric 14970b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASIN: 14980b57cec5SDimitry Andric Res0 = asin(opr0); 14990b57cec5SDimitry Andric return true; 15000b57cec5SDimitry Andric 15010b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASINH: 15020b57cec5SDimitry Andric // asinh(x) == log(x + sqrt(x*x + 1)) 15030b57cec5SDimitry Andric Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 15040b57cec5SDimitry Andric return true; 15050b57cec5SDimitry Andric 15060b57cec5SDimitry Andric case AMDGPULibFunc::EI_ASINPI: 15070b57cec5SDimitry Andric Res0 = asin(opr0) / MATH_PI; 15080b57cec5SDimitry Andric return true; 15090b57cec5SDimitry Andric 15100b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATAN: 15110b57cec5SDimitry Andric Res0 = atan(opr0); 15120b57cec5SDimitry Andric return true; 15130b57cec5SDimitry Andric 15140b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATANH: 15150b57cec5SDimitry Andric // atanh(x) == (log(x+1) - log(x-1))/2; 15160b57cec5SDimitry Andric Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 15170b57cec5SDimitry Andric return true; 15180b57cec5SDimitry Andric 15190b57cec5SDimitry Andric case AMDGPULibFunc::EI_ATANPI: 15200b57cec5SDimitry Andric Res0 = atan(opr0) / MATH_PI; 15210b57cec5SDimitry Andric return true; 15220b57cec5SDimitry Andric 15230b57cec5SDimitry Andric case AMDGPULibFunc::EI_CBRT: 15240b57cec5SDimitry Andric Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 15250b57cec5SDimitry Andric return true; 15260b57cec5SDimitry Andric 15270b57cec5SDimitry Andric case AMDGPULibFunc::EI_COS: 15280b57cec5SDimitry Andric Res0 = cos(opr0); 15290b57cec5SDimitry Andric return true; 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andric case AMDGPULibFunc::EI_COSH: 15320b57cec5SDimitry Andric Res0 = cosh(opr0); 15330b57cec5SDimitry Andric return true; 15340b57cec5SDimitry Andric 15350b57cec5SDimitry Andric case AMDGPULibFunc::EI_COSPI: 15360b57cec5SDimitry Andric Res0 = cos(MATH_PI * opr0); 15370b57cec5SDimitry Andric return true; 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP: 15400b57cec5SDimitry Andric Res0 = exp(opr0); 15410b57cec5SDimitry Andric return true; 15420b57cec5SDimitry Andric 15430b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP2: 15440b57cec5SDimitry Andric Res0 = pow(2.0, opr0); 15450b57cec5SDimitry Andric return true; 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andric case AMDGPULibFunc::EI_EXP10: 15480b57cec5SDimitry Andric Res0 = pow(10.0, opr0); 15490b57cec5SDimitry Andric return true; 15500b57cec5SDimitry Andric 15510b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG: 15520b57cec5SDimitry Andric Res0 = log(opr0); 15530b57cec5SDimitry Andric return true; 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG2: 15560b57cec5SDimitry Andric Res0 = log(opr0) / log(2.0); 15570b57cec5SDimitry Andric return true; 15580b57cec5SDimitry Andric 15590b57cec5SDimitry Andric case AMDGPULibFunc::EI_LOG10: 15600b57cec5SDimitry Andric Res0 = log(opr0) / log(10.0); 15610b57cec5SDimitry Andric return true; 15620b57cec5SDimitry Andric 15630b57cec5SDimitry Andric case AMDGPULibFunc::EI_RSQRT: 15640b57cec5SDimitry Andric Res0 = 1.0 / sqrt(opr0); 15650b57cec5SDimitry Andric return true; 15660b57cec5SDimitry Andric 15670b57cec5SDimitry Andric case AMDGPULibFunc::EI_SIN: 15680b57cec5SDimitry Andric Res0 = sin(opr0); 15690b57cec5SDimitry Andric return true; 15700b57cec5SDimitry Andric 15710b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINH: 15720b57cec5SDimitry Andric Res0 = sinh(opr0); 15730b57cec5SDimitry Andric return true; 15740b57cec5SDimitry Andric 15750b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINPI: 15760b57cec5SDimitry Andric Res0 = sin(MATH_PI * opr0); 15770b57cec5SDimitry Andric return true; 15780b57cec5SDimitry Andric 15790b57cec5SDimitry Andric case AMDGPULibFunc::EI_TAN: 15800b57cec5SDimitry Andric Res0 = tan(opr0); 15810b57cec5SDimitry Andric return true; 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric case AMDGPULibFunc::EI_TANH: 15840b57cec5SDimitry Andric Res0 = tanh(opr0); 15850b57cec5SDimitry Andric return true; 15860b57cec5SDimitry Andric 15870b57cec5SDimitry Andric case AMDGPULibFunc::EI_TANPI: 15880b57cec5SDimitry Andric Res0 = tan(MATH_PI * opr0); 15890b57cec5SDimitry Andric return true; 15900b57cec5SDimitry Andric 15910b57cec5SDimitry Andric // two-arg functions 15920b57cec5SDimitry Andric case AMDGPULibFunc::EI_POW: 15930b57cec5SDimitry Andric case AMDGPULibFunc::EI_POWR: 15940b57cec5SDimitry Andric Res0 = pow(opr0, opr1); 15950b57cec5SDimitry Andric return true; 15960b57cec5SDimitry Andric 15970b57cec5SDimitry Andric case AMDGPULibFunc::EI_POWN: { 15980b57cec5SDimitry Andric if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 15990b57cec5SDimitry Andric double val = (double)iopr1->getSExtValue(); 16000b57cec5SDimitry Andric Res0 = pow(opr0, val); 16010b57cec5SDimitry Andric return true; 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric return false; 16040b57cec5SDimitry Andric } 16050b57cec5SDimitry Andric 16060b57cec5SDimitry Andric case AMDGPULibFunc::EI_ROOTN: { 16070b57cec5SDimitry Andric if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 16080b57cec5SDimitry Andric double val = (double)iopr1->getSExtValue(); 16090b57cec5SDimitry Andric Res0 = pow(opr0, 1.0 / val); 16100b57cec5SDimitry Andric return true; 16110b57cec5SDimitry Andric } 16120b57cec5SDimitry Andric return false; 16130b57cec5SDimitry Andric } 16140b57cec5SDimitry Andric 16150b57cec5SDimitry Andric // with ptr arg 16160b57cec5SDimitry Andric case AMDGPULibFunc::EI_SINCOS: 16170b57cec5SDimitry Andric Res0 = sin(opr0); 16180b57cec5SDimitry Andric Res1 = cos(opr0); 16190b57cec5SDimitry Andric return true; 16200b57cec5SDimitry Andric } 16210b57cec5SDimitry Andric 16220b57cec5SDimitry Andric return false; 16230b57cec5SDimitry Andric } 16240b57cec5SDimitry Andric 1625349cc55cSDimitry Andric bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { 1626349cc55cSDimitry Andric int numArgs = (int)aCI->arg_size(); 16270b57cec5SDimitry Andric if (numArgs > 3) 16280b57cec5SDimitry Andric return false; 16290b57cec5SDimitry Andric 16300b57cec5SDimitry Andric Constant *copr0 = nullptr; 16310b57cec5SDimitry Andric Constant *copr1 = nullptr; 16320b57cec5SDimitry Andric if (numArgs > 0) { 16330b57cec5SDimitry Andric if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 16340b57cec5SDimitry Andric return false; 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric 16370b57cec5SDimitry Andric if (numArgs > 1) { 16380b57cec5SDimitry Andric if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 16390b57cec5SDimitry Andric if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 16400b57cec5SDimitry Andric return false; 16410b57cec5SDimitry Andric } 16420b57cec5SDimitry Andric } 16430b57cec5SDimitry Andric 16440b57cec5SDimitry Andric // At this point, all arguments to aCI are constants. 16450b57cec5SDimitry Andric 16460b57cec5SDimitry Andric // max vector size is 16, and sincos will generate two results. 16470b57cec5SDimitry Andric double DVal0[16], DVal1[16]; 164881ad6265SDimitry Andric int FuncVecSize = getVecSize(FInfo); 16490b57cec5SDimitry Andric bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 165081ad6265SDimitry Andric if (FuncVecSize == 1) { 16515f757f3fSDimitry Andric if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) { 16520b57cec5SDimitry Andric return false; 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric } else { 16550b57cec5SDimitry Andric ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 16560b57cec5SDimitry Andric ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 165781ad6265SDimitry Andric for (int i = 0; i < FuncVecSize; ++i) { 16580b57cec5SDimitry Andric Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 16590b57cec5SDimitry Andric Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 16605f757f3fSDimitry Andric if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) { 16610b57cec5SDimitry Andric return false; 16620b57cec5SDimitry Andric } 16630b57cec5SDimitry Andric } 16640b57cec5SDimitry Andric } 16650b57cec5SDimitry Andric 16665f757f3fSDimitry Andric LLVMContext &context = aCI->getContext(); 16670b57cec5SDimitry Andric Constant *nval0, *nval1; 166881ad6265SDimitry Andric if (FuncVecSize == 1) { 16695f757f3fSDimitry Andric nval0 = ConstantFP::get(aCI->getType(), DVal0[0]); 16700b57cec5SDimitry Andric if (hasTwoResults) 16715f757f3fSDimitry Andric nval1 = ConstantFP::get(aCI->getType(), DVal1[0]); 16720b57cec5SDimitry Andric } else { 16730b57cec5SDimitry Andric if (getArgType(FInfo) == AMDGPULibFunc::F32) { 16740b57cec5SDimitry Andric SmallVector <float, 0> FVal0, FVal1; 167581ad6265SDimitry Andric for (int i = 0; i < FuncVecSize; ++i) 16760b57cec5SDimitry Andric FVal0.push_back((float)DVal0[i]); 16770b57cec5SDimitry Andric ArrayRef<float> tmp0(FVal0); 16780b57cec5SDimitry Andric nval0 = ConstantDataVector::get(context, tmp0); 16790b57cec5SDimitry Andric if (hasTwoResults) { 168081ad6265SDimitry Andric for (int i = 0; i < FuncVecSize; ++i) 16810b57cec5SDimitry Andric FVal1.push_back((float)DVal1[i]); 16820b57cec5SDimitry Andric ArrayRef<float> tmp1(FVal1); 16830b57cec5SDimitry Andric nval1 = ConstantDataVector::get(context, tmp1); 16840b57cec5SDimitry Andric } 16850b57cec5SDimitry Andric } else { 16860b57cec5SDimitry Andric ArrayRef<double> tmp0(DVal0); 16870b57cec5SDimitry Andric nval0 = ConstantDataVector::get(context, tmp0); 16880b57cec5SDimitry Andric if (hasTwoResults) { 16890b57cec5SDimitry Andric ArrayRef<double> tmp1(DVal1); 16900b57cec5SDimitry Andric nval1 = ConstantDataVector::get(context, tmp1); 16910b57cec5SDimitry Andric } 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric } 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andric if (hasTwoResults) { 16960b57cec5SDimitry Andric // sincos 16970b57cec5SDimitry Andric assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 16980b57cec5SDimitry Andric "math function with ptr arg not supported yet"); 1699*0fca6ea1SDimitry Andric new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator()); 17000b57cec5SDimitry Andric } 17010b57cec5SDimitry Andric 17025f757f3fSDimitry Andric replaceCall(aCI, nval0); 17030b57cec5SDimitry Andric return true; 17040b57cec5SDimitry Andric } 17050b57cec5SDimitry Andric 1706e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 1707e8d8bef9SDimitry Andric FunctionAnalysisManager &AM) { 17085f757f3fSDimitry Andric AMDGPULibCalls Simplifier; 1709e8d8bef9SDimitry Andric Simplifier.initNativeFuncs(); 17105f757f3fSDimitry Andric Simplifier.initFunction(F, AM); 1711e8d8bef9SDimitry Andric 1712e8d8bef9SDimitry Andric bool Changed = false; 1713e8d8bef9SDimitry Andric 1714e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1715e8d8bef9SDimitry Andric F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1716e8d8bef9SDimitry Andric 1717e8d8bef9SDimitry Andric for (auto &BB : F) { 1718e8d8bef9SDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1719e8d8bef9SDimitry Andric // Ignore non-calls. 1720e8d8bef9SDimitry Andric CallInst *CI = dyn_cast<CallInst>(I); 1721e8d8bef9SDimitry Andric ++I; 1722e8d8bef9SDimitry Andric 17235f757f3fSDimitry Andric if (CI) { 17245f757f3fSDimitry Andric if (Simplifier.fold(CI)) 1725e8d8bef9SDimitry Andric Changed = true; 1726e8d8bef9SDimitry Andric } 1727e8d8bef9SDimitry Andric } 17285f757f3fSDimitry Andric } 1729e8d8bef9SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1730e8d8bef9SDimitry Andric } 1731e8d8bef9SDimitry Andric 1732e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 1733e8d8bef9SDimitry Andric FunctionAnalysisManager &AM) { 1734e8d8bef9SDimitry Andric if (UseNative.empty()) 1735e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 1736e8d8bef9SDimitry Andric 1737e8d8bef9SDimitry Andric AMDGPULibCalls Simplifier; 1738e8d8bef9SDimitry Andric Simplifier.initNativeFuncs(); 17395f757f3fSDimitry Andric Simplifier.initFunction(F, AM); 1740e8d8bef9SDimitry Andric 1741e8d8bef9SDimitry Andric bool Changed = false; 1742e8d8bef9SDimitry Andric for (auto &BB : F) { 1743e8d8bef9SDimitry Andric for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1744e8d8bef9SDimitry Andric // Ignore non-calls. 1745e8d8bef9SDimitry Andric CallInst *CI = dyn_cast<CallInst>(I); 1746e8d8bef9SDimitry Andric ++I; 17475f757f3fSDimitry Andric if (CI && Simplifier.useNative(CI)) 1748e8d8bef9SDimitry Andric Changed = true; 1749e8d8bef9SDimitry Andric } 1750e8d8bef9SDimitry Andric } 1751e8d8bef9SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1752e8d8bef9SDimitry Andric } 1753