xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- AMDGPULibCalls.cpp -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file does AMD library function optimizations.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPU.h"
150b57cec5SDimitry Andric #include "AMDGPULibFunc.h"
16e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
175f757f3fSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
185f757f3fSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
195f757f3fSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
205f757f3fSDimitry Andric #include "llvm/IR/AttributeMask.h"
215f757f3fSDimitry Andric #include "llvm/IR/Dominators.h"
22fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h"
231fd87a68SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
241fd87a68SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
25*0fca6ea1SDimitry Andric #include "llvm/IR/MDBuilder.h"
265f757f3fSDimitry Andric #include "llvm/IR/PatternMatch.h"
27480093f4SDimitry Andric #include "llvm/InitializePasses.h"
28bdd1243dSDimitry Andric #include <cmath>
29480093f4SDimitry Andric 
30480093f4SDimitry Andric #define DEBUG_TYPE "amdgpu-simplifylib"
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace llvm;
335f757f3fSDimitry Andric using namespace llvm::PatternMatch;
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric static cl::opt<bool> EnablePreLink("amdgpu-prelink",
360b57cec5SDimitry Andric   cl::desc("Enable pre-link mode optimizations"),
370b57cec5SDimitry Andric   cl::init(false),
380b57cec5SDimitry Andric   cl::Hidden);
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric static cl::list<std::string> UseNative("amdgpu-use-native",
410b57cec5SDimitry Andric   cl::desc("Comma separated list of functions to replace with native, or all"),
420b57cec5SDimitry Andric   cl::CommaSeparated, cl::ValueOptional,
430b57cec5SDimitry Andric   cl::Hidden);
440b57cec5SDimitry Andric 
458bcb0991SDimitry Andric #define MATH_PI      numbers::pi
468bcb0991SDimitry Andric #define MATH_E       numbers::e
478bcb0991SDimitry Andric #define MATH_SQRT2   numbers::sqrt2
488bcb0991SDimitry Andric #define MATH_SQRT1_2 numbers::inv_sqrt2
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric namespace llvm {
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric class AMDGPULibCalls {
530b57cec5SDimitry Andric private:
545f757f3fSDimitry Andric   const TargetLibraryInfo *TLInfo = nullptr;
555f757f3fSDimitry Andric   AssumptionCache *AC = nullptr;
565f757f3fSDimitry Andric   DominatorTree *DT = nullptr;
570b57cec5SDimitry Andric 
58*0fca6ea1SDimitry Andric   using FuncInfo = llvm::AMDGPULibFunc;
590b57cec5SDimitry Andric 
605f757f3fSDimitry Andric   bool UnsafeFPMath = false;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   // -fuse-native.
630b57cec5SDimitry Andric   bool AllNative = false;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   bool useNativeFunc(const StringRef F) const;
660b57cec5SDimitry Andric 
67349cc55cSDimitry Andric   // Return a pointer (pointer expr) to the function if function definition with
680b57cec5SDimitry Andric   // "FuncName" exists. It may create a new function prototype in pre-link mode.
690b57cec5SDimitry Andric   FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
700b57cec5SDimitry Andric 
71349cc55cSDimitry Andric   bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric   /* Specialized optimizations */
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric   // pow/powr/pown
785f757f3fSDimitry Andric   bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric   // rootn
815f757f3fSDimitry Andric   bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   // -fuse-native for sincos
840b57cec5SDimitry Andric   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   // evaluate calls if calls' arguments are constants.
875f757f3fSDimitry Andric   bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
885f757f3fSDimitry Andric                               Constant *copr0, Constant *copr1);
89349cc55cSDimitry Andric   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
900b57cec5SDimitry Andric 
915f757f3fSDimitry Andric   /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
925f757f3fSDimitry Andric   /// of cos, sincos call).
935f757f3fSDimitry Andric   std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
945f757f3fSDimitry Andric                                                      FastMathFlags FMF,
955f757f3fSDimitry Andric                                                      IRBuilder<> &B,
965f757f3fSDimitry Andric                                                      FunctionCallee Fsincos);
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   // sin/cos
995f757f3fSDimitry Andric   bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   // __read_pipe/__write_pipe
102349cc55cSDimitry Andric   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
103349cc55cSDimitry Andric                             const FuncInfo &FInfo);
1040b57cec5SDimitry Andric 
105349cc55cSDimitry Andric   // Get a scalar native builtin single argument FP function
1060b57cec5SDimitry Andric   FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
1070b57cec5SDimitry Andric 
1085f757f3fSDimitry Andric   /// Substitute a call to a known libcall with an intrinsic call. If \p
1095f757f3fSDimitry Andric   /// AllowMinSize is true, allow the replacement in a minsize function.
1105f757f3fSDimitry Andric   bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1115f757f3fSDimitry Andric                                          bool AllowMinSizeF32 = false,
1125f757f3fSDimitry Andric                                          bool AllowF64 = false,
1135f757f3fSDimitry Andric                                          bool AllowStrictFP = false);
1145f757f3fSDimitry Andric   void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
1155f757f3fSDimitry Andric                                          Intrinsic::ID IntrID);
1165f757f3fSDimitry Andric 
1175f757f3fSDimitry Andric   bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
1185f757f3fSDimitry Andric                                             Intrinsic::ID IntrID,
1195f757f3fSDimitry Andric                                             bool AllowMinSizeF32 = false,
1205f757f3fSDimitry Andric                                             bool AllowF64 = false,
1215f757f3fSDimitry Andric                                             bool AllowStrictFP = false);
1225f757f3fSDimitry Andric 
1230b57cec5SDimitry Andric protected:
1245f757f3fSDimitry Andric   bool isUnsafeMath(const FPMathOperator *FPOp) const;
1255f757f3fSDimitry Andric   bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
1260b57cec5SDimitry Andric 
1275f757f3fSDimitry Andric   bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const;
1280b57cec5SDimitry Andric 
1295f757f3fSDimitry Andric   static void replaceCall(Instruction *I, Value *With) {
1305f757f3fSDimitry Andric     I->replaceAllUsesWith(With);
1315f757f3fSDimitry Andric     I->eraseFromParent();
1325f757f3fSDimitry Andric   }
1335f757f3fSDimitry Andric 
1345f757f3fSDimitry Andric   static void replaceCall(FPMathOperator *I, Value *With) {
1355f757f3fSDimitry Andric     replaceCall(cast<Instruction>(I), With);
1360b57cec5SDimitry Andric   }
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric public:
139*0fca6ea1SDimitry Andric   AMDGPULibCalls() = default;
1400b57cec5SDimitry Andric 
1415f757f3fSDimitry Andric   bool fold(CallInst *CI);
1420b57cec5SDimitry Andric 
1435f757f3fSDimitry Andric   void initFunction(Function &F, FunctionAnalysisManager &FAM);
1440b57cec5SDimitry Andric   void initNativeFuncs();
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   // Replace a normal math function call with that native version
1470b57cec5SDimitry Andric   bool useNative(CallInst *CI);
1480b57cec5SDimitry Andric };
1490b57cec5SDimitry Andric 
150*0fca6ea1SDimitry Andric } // end namespace llvm
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric template <typename IRB>
1530b57cec5SDimitry Andric static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
1540b57cec5SDimitry Andric                               const Twine &Name = "") {
1550b57cec5SDimitry Andric   CallInst *R = B.CreateCall(Callee, Arg, Name);
1560b57cec5SDimitry Andric   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1570b57cec5SDimitry Andric     R->setCallingConv(F->getCallingConv());
1580b57cec5SDimitry Andric   return R;
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric template <typename IRB>
1620b57cec5SDimitry Andric static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
1630b57cec5SDimitry Andric                                Value *Arg2, const Twine &Name = "") {
1640b57cec5SDimitry Andric   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
1650b57cec5SDimitry Andric   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1660b57cec5SDimitry Andric     R->setCallingConv(F->getCallingConv());
1670b57cec5SDimitry Andric   return R;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric 
1705f757f3fSDimitry Andric static FunctionType *getPownType(FunctionType *FT) {
1715f757f3fSDimitry Andric   Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
1725f757f3fSDimitry Andric   if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
1735f757f3fSDimitry Andric     PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
1745f757f3fSDimitry Andric 
1755f757f3fSDimitry Andric   return FunctionType::get(FT->getReturnType(),
1765f757f3fSDimitry Andric                            {FT->getParamType(0), PowNExpTy}, false);
1775f757f3fSDimitry Andric }
1785f757f3fSDimitry Andric 
1790b57cec5SDimitry Andric //  Data structures for table-driven optimizations.
1800b57cec5SDimitry Andric //  FuncTbl works for both f32 and f64 functions with 1 input argument
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric struct TableEntry {
1830b57cec5SDimitry Andric   double   result;
1840b57cec5SDimitry Andric   double   input;
1850b57cec5SDimitry Andric };
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric /* a list of {result, input} */
1880b57cec5SDimitry Andric static const TableEntry tbl_acos[] = {
1890b57cec5SDimitry Andric   {MATH_PI / 2.0, 0.0},
1900b57cec5SDimitry Andric   {MATH_PI / 2.0, -0.0},
1910b57cec5SDimitry Andric   {0.0, 1.0},
1920b57cec5SDimitry Andric   {MATH_PI, -1.0}
1930b57cec5SDimitry Andric };
1940b57cec5SDimitry Andric static const TableEntry tbl_acosh[] = {
1950b57cec5SDimitry Andric   {0.0, 1.0}
1960b57cec5SDimitry Andric };
1970b57cec5SDimitry Andric static const TableEntry tbl_acospi[] = {
1980b57cec5SDimitry Andric   {0.5, 0.0},
1990b57cec5SDimitry Andric   {0.5, -0.0},
2000b57cec5SDimitry Andric   {0.0, 1.0},
2010b57cec5SDimitry Andric   {1.0, -1.0}
2020b57cec5SDimitry Andric };
2030b57cec5SDimitry Andric static const TableEntry tbl_asin[] = {
2040b57cec5SDimitry Andric   {0.0, 0.0},
2050b57cec5SDimitry Andric   {-0.0, -0.0},
2060b57cec5SDimitry Andric   {MATH_PI / 2.0, 1.0},
2070b57cec5SDimitry Andric   {-MATH_PI / 2.0, -1.0}
2080b57cec5SDimitry Andric };
2090b57cec5SDimitry Andric static const TableEntry tbl_asinh[] = {
2100b57cec5SDimitry Andric   {0.0, 0.0},
2110b57cec5SDimitry Andric   {-0.0, -0.0}
2120b57cec5SDimitry Andric };
2130b57cec5SDimitry Andric static const TableEntry tbl_asinpi[] = {
2140b57cec5SDimitry Andric   {0.0, 0.0},
2150b57cec5SDimitry Andric   {-0.0, -0.0},
2160b57cec5SDimitry Andric   {0.5, 1.0},
2170b57cec5SDimitry Andric   {-0.5, -1.0}
2180b57cec5SDimitry Andric };
2190b57cec5SDimitry Andric static const TableEntry tbl_atan[] = {
2200b57cec5SDimitry Andric   {0.0, 0.0},
2210b57cec5SDimitry Andric   {-0.0, -0.0},
2220b57cec5SDimitry Andric   {MATH_PI / 4.0, 1.0},
2230b57cec5SDimitry Andric   {-MATH_PI / 4.0, -1.0}
2240b57cec5SDimitry Andric };
2250b57cec5SDimitry Andric static const TableEntry tbl_atanh[] = {
2260b57cec5SDimitry Andric   {0.0, 0.0},
2270b57cec5SDimitry Andric   {-0.0, -0.0}
2280b57cec5SDimitry Andric };
2290b57cec5SDimitry Andric static const TableEntry tbl_atanpi[] = {
2300b57cec5SDimitry Andric   {0.0, 0.0},
2310b57cec5SDimitry Andric   {-0.0, -0.0},
2320b57cec5SDimitry Andric   {0.25, 1.0},
2330b57cec5SDimitry Andric   {-0.25, -1.0}
2340b57cec5SDimitry Andric };
2350b57cec5SDimitry Andric static const TableEntry tbl_cbrt[] = {
2360b57cec5SDimitry Andric   {0.0, 0.0},
2370b57cec5SDimitry Andric   {-0.0, -0.0},
2380b57cec5SDimitry Andric   {1.0, 1.0},
2390b57cec5SDimitry Andric   {-1.0, -1.0},
2400b57cec5SDimitry Andric };
2410b57cec5SDimitry Andric static const TableEntry tbl_cos[] = {
2420b57cec5SDimitry Andric   {1.0, 0.0},
2430b57cec5SDimitry Andric   {1.0, -0.0}
2440b57cec5SDimitry Andric };
2450b57cec5SDimitry Andric static const TableEntry tbl_cosh[] = {
2460b57cec5SDimitry Andric   {1.0, 0.0},
2470b57cec5SDimitry Andric   {1.0, -0.0}
2480b57cec5SDimitry Andric };
2490b57cec5SDimitry Andric static const TableEntry tbl_cospi[] = {
2500b57cec5SDimitry Andric   {1.0, 0.0},
2510b57cec5SDimitry Andric   {1.0, -0.0}
2520b57cec5SDimitry Andric };
2530b57cec5SDimitry Andric static const TableEntry tbl_erfc[] = {
2540b57cec5SDimitry Andric   {1.0, 0.0},
2550b57cec5SDimitry Andric   {1.0, -0.0}
2560b57cec5SDimitry Andric };
2570b57cec5SDimitry Andric static const TableEntry tbl_erf[] = {
2580b57cec5SDimitry Andric   {0.0, 0.0},
2590b57cec5SDimitry Andric   {-0.0, -0.0}
2600b57cec5SDimitry Andric };
2610b57cec5SDimitry Andric static const TableEntry tbl_exp[] = {
2620b57cec5SDimitry Andric   {1.0, 0.0},
2630b57cec5SDimitry Andric   {1.0, -0.0},
2640b57cec5SDimitry Andric   {MATH_E, 1.0}
2650b57cec5SDimitry Andric };
2660b57cec5SDimitry Andric static const TableEntry tbl_exp2[] = {
2670b57cec5SDimitry Andric   {1.0, 0.0},
2680b57cec5SDimitry Andric   {1.0, -0.0},
2690b57cec5SDimitry Andric   {2.0, 1.0}
2700b57cec5SDimitry Andric };
2710b57cec5SDimitry Andric static const TableEntry tbl_exp10[] = {
2720b57cec5SDimitry Andric   {1.0, 0.0},
2730b57cec5SDimitry Andric   {1.0, -0.0},
2740b57cec5SDimitry Andric   {10.0, 1.0}
2750b57cec5SDimitry Andric };
2760b57cec5SDimitry Andric static const TableEntry tbl_expm1[] = {
2770b57cec5SDimitry Andric   {0.0, 0.0},
2780b57cec5SDimitry Andric   {-0.0, -0.0}
2790b57cec5SDimitry Andric };
2800b57cec5SDimitry Andric static const TableEntry tbl_log[] = {
2810b57cec5SDimitry Andric   {0.0, 1.0},
2820b57cec5SDimitry Andric   {1.0, MATH_E}
2830b57cec5SDimitry Andric };
2840b57cec5SDimitry Andric static const TableEntry tbl_log2[] = {
2850b57cec5SDimitry Andric   {0.0, 1.0},
2860b57cec5SDimitry Andric   {1.0, 2.0}
2870b57cec5SDimitry Andric };
2880b57cec5SDimitry Andric static const TableEntry tbl_log10[] = {
2890b57cec5SDimitry Andric   {0.0, 1.0},
2900b57cec5SDimitry Andric   {1.0, 10.0}
2910b57cec5SDimitry Andric };
2920b57cec5SDimitry Andric static const TableEntry tbl_rsqrt[] = {
2930b57cec5SDimitry Andric   {1.0, 1.0},
2948bcb0991SDimitry Andric   {MATH_SQRT1_2, 2.0}
2950b57cec5SDimitry Andric };
2960b57cec5SDimitry Andric static const TableEntry tbl_sin[] = {
2970b57cec5SDimitry Andric   {0.0, 0.0},
2980b57cec5SDimitry Andric   {-0.0, -0.0}
2990b57cec5SDimitry Andric };
3000b57cec5SDimitry Andric static const TableEntry tbl_sinh[] = {
3010b57cec5SDimitry Andric   {0.0, 0.0},
3020b57cec5SDimitry Andric   {-0.0, -0.0}
3030b57cec5SDimitry Andric };
3040b57cec5SDimitry Andric static const TableEntry tbl_sinpi[] = {
3050b57cec5SDimitry Andric   {0.0, 0.0},
3060b57cec5SDimitry Andric   {-0.0, -0.0}
3070b57cec5SDimitry Andric };
3080b57cec5SDimitry Andric static const TableEntry tbl_sqrt[] = {
3090b57cec5SDimitry Andric   {0.0, 0.0},
3100b57cec5SDimitry Andric   {1.0, 1.0},
3110b57cec5SDimitry Andric   {MATH_SQRT2, 2.0}
3120b57cec5SDimitry Andric };
3130b57cec5SDimitry Andric static const TableEntry tbl_tan[] = {
3140b57cec5SDimitry Andric   {0.0, 0.0},
3150b57cec5SDimitry Andric   {-0.0, -0.0}
3160b57cec5SDimitry Andric };
3170b57cec5SDimitry Andric static const TableEntry tbl_tanh[] = {
3180b57cec5SDimitry Andric   {0.0, 0.0},
3190b57cec5SDimitry Andric   {-0.0, -0.0}
3200b57cec5SDimitry Andric };
3210b57cec5SDimitry Andric static const TableEntry tbl_tanpi[] = {
3220b57cec5SDimitry Andric   {0.0, 0.0},
3230b57cec5SDimitry Andric   {-0.0, -0.0}
3240b57cec5SDimitry Andric };
3250b57cec5SDimitry Andric static const TableEntry tbl_tgamma[] = {
3260b57cec5SDimitry Andric   {1.0, 1.0},
3270b57cec5SDimitry Andric   {1.0, 2.0},
3280b57cec5SDimitry Andric   {2.0, 3.0},
3290b57cec5SDimitry Andric   {6.0, 4.0}
3300b57cec5SDimitry Andric };
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric static bool HasNative(AMDGPULibFunc::EFuncId id) {
3330b57cec5SDimitry Andric   switch(id) {
3340b57cec5SDimitry Andric   case AMDGPULibFunc::EI_DIVIDE:
3350b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:
3360b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:
3370b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:
3380b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:
3390b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:
3400b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:
3410b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:
3420b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWR:
3430b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RECIP:
3440b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
3450b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:
3460b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
3470b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SQRT:
3480b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:
3490b57cec5SDimitry Andric     return true;
3500b57cec5SDimitry Andric   default:;
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric   return false;
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric 
355fcaf7f86SDimitry Andric using TableRef = ArrayRef<TableEntry>;
3560b57cec5SDimitry Andric 
3570b57cec5SDimitry Andric static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
3580b57cec5SDimitry Andric   switch(id) {
3590b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
3600b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
3610b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
3620b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
3630b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
3640b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
3650b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
3660b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
3670b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
3680b57cec5SDimitry Andric   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
3690b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NCOS:
3700b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
3710b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
3720b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
3730b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
3740b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
3750b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
3760b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NEXP2:
3770b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
3780b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
3790b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
3800b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
3810b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NLOG2:
3820b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
3830b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
3840b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NRSQRT:
3850b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
3860b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NSIN:
3870b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
3880b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
3890b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
3900b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NSQRT:
3910b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
3920b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
3930b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
3940b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
3950b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
3960b57cec5SDimitry Andric   default:;
3970b57cec5SDimitry Andric   }
3980b57cec5SDimitry Andric   return TableRef();
3990b57cec5SDimitry Andric }
4000b57cec5SDimitry Andric 
4010b57cec5SDimitry Andric static inline int getVecSize(const AMDGPULibFunc& FInfo) {
4020b57cec5SDimitry Andric   return FInfo.getLeads()[0].VectorSize;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
4060b57cec5SDimitry Andric   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
4100b57cec5SDimitry Andric   // If we are doing PreLinkOpt, the function is external. So it is safe to
4110b57cec5SDimitry Andric   // use getOrInsertFunction() at this stage.
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4140b57cec5SDimitry Andric                        : AMDGPULibFunc::getFunction(M, fInfo);
4150b57cec5SDimitry Andric }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
418349cc55cSDimitry Andric                                        FuncInfo &FInfo) {
419349cc55cSDimitry Andric   return AMDGPULibFunc::parse(FMangledName, FInfo);
4200b57cec5SDimitry Andric }
4210b57cec5SDimitry Andric 
4225f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const {
4235f757f3fSDimitry Andric   return UnsafeFPMath || FPOp->isFast();
4245f757f3fSDimitry Andric }
4255f757f3fSDimitry Andric 
4265f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const {
4275f757f3fSDimitry Andric   return UnsafeFPMath ||
4285f757f3fSDimitry Andric          (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs());
4295f757f3fSDimitry Andric }
4305f757f3fSDimitry Andric 
4315f757f3fSDimitry Andric bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold(
4325f757f3fSDimitry Andric     const FPMathOperator *FPOp) const {
4335f757f3fSDimitry Andric   // TODO: Refine to approxFunc or contract
4345f757f3fSDimitry Andric   return isUnsafeMath(FPOp);
4355f757f3fSDimitry Andric }
4365f757f3fSDimitry Andric 
4375f757f3fSDimitry Andric void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) {
4385f757f3fSDimitry Andric   UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool();
4395f757f3fSDimitry Andric   AC = &FAM.getResult<AssumptionAnalysis>(F);
4405f757f3fSDimitry Andric   TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
4415f757f3fSDimitry Andric   DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
4420b57cec5SDimitry Andric }
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
445e8d8bef9SDimitry Andric   return AllNative || llvm::is_contained(UseNative, F);
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric void AMDGPULibCalls::initNativeFuncs() {
4490b57cec5SDimitry Andric   AllNative = useNativeFunc("all") ||
4500b57cec5SDimitry Andric               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
4510b57cec5SDimitry Andric                UseNative.begin()->empty());
4520b57cec5SDimitry Andric }
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
4550b57cec5SDimitry Andric   bool native_sin = useNativeFunc("sin");
4560b57cec5SDimitry Andric   bool native_cos = useNativeFunc("cos");
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   if (native_sin && native_cos) {
4590b57cec5SDimitry Andric     Module *M = aCI->getModule();
4600b57cec5SDimitry Andric     Value *opr0 = aCI->getArgOperand(0);
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric     AMDGPULibFunc nf;
4630b57cec5SDimitry Andric     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
4640b57cec5SDimitry Andric     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
4670b57cec5SDimitry Andric     nf.setId(AMDGPULibFunc::EI_SIN);
4680b57cec5SDimitry Andric     FunctionCallee sinExpr = getFunction(M, nf);
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
4710b57cec5SDimitry Andric     nf.setId(AMDGPULibFunc::EI_COS);
4720b57cec5SDimitry Andric     FunctionCallee cosExpr = getFunction(M, nf);
4730b57cec5SDimitry Andric     if (sinExpr && cosExpr) {
474*0fca6ea1SDimitry Andric       Value *sinval =
475*0fca6ea1SDimitry Andric           CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
476*0fca6ea1SDimitry Andric       Value *cosval =
477*0fca6ea1SDimitry Andric           CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
478*0fca6ea1SDimitry Andric       new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
4810b57cec5SDimitry Andric                                           << " with native version of sin/cos");
4820b57cec5SDimitry Andric 
4835f757f3fSDimitry Andric       replaceCall(aCI, sinval);
4840b57cec5SDimitry Andric       return true;
4850b57cec5SDimitry Andric     }
4860b57cec5SDimitry Andric   }
4870b57cec5SDimitry Andric   return false;
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric bool AMDGPULibCalls::useNative(CallInst *aCI) {
4910b57cec5SDimitry Andric   Function *Callee = aCI->getCalledFunction();
4925f757f3fSDimitry Andric   if (!Callee || aCI->isNoBuiltin())
4935f757f3fSDimitry Andric     return false;
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric   FuncInfo FInfo;
496349cc55cSDimitry Andric   if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
4970b57cec5SDimitry Andric       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
4980b57cec5SDimitry Andric       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
4990b57cec5SDimitry Andric       !(AllNative || useNativeFunc(FInfo.getName()))) {
5000b57cec5SDimitry Andric     return false;
5010b57cec5SDimitry Andric   }
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5040b57cec5SDimitry Andric     return sincosUseNative(aCI, FInfo);
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
5070b57cec5SDimitry Andric   FunctionCallee F = getFunction(aCI->getModule(), FInfo);
5080b57cec5SDimitry Andric   if (!F)
5090b57cec5SDimitry Andric     return false;
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   aCI->setCalledFunction(F);
5120b57cec5SDimitry Andric   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5130b57cec5SDimitry Andric                                       << " with native version");
5140b57cec5SDimitry Andric   return true;
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
5180b57cec5SDimitry Andric // builtin, with appended type size and alignment arguments, where 2 or 4
5190b57cec5SDimitry Andric // indicates the original number of arguments. The library has optimized version
5200b57cec5SDimitry Andric // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
5210b57cec5SDimitry Andric // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
5220b57cec5SDimitry Andric // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
5230b57cec5SDimitry Andric // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
5240b57cec5SDimitry Andric bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
525349cc55cSDimitry Andric                                           const FuncInfo &FInfo) {
5260b57cec5SDimitry Andric   auto *Callee = CI->getCalledFunction();
5270b57cec5SDimitry Andric   if (!Callee->isDeclaration())
5280b57cec5SDimitry Andric     return false;
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
5310b57cec5SDimitry Andric   auto *M = Callee->getParent();
5325ffd83dbSDimitry Andric   std::string Name = std::string(Callee->getName());
533349cc55cSDimitry Andric   auto NumArg = CI->arg_size();
5340b57cec5SDimitry Andric   if (NumArg != 4 && NumArg != 6)
5350b57cec5SDimitry Andric     return false;
5365f757f3fSDimitry Andric   ConstantInt *PacketSize =
5375f757f3fSDimitry Andric       dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
5385f757f3fSDimitry Andric   ConstantInt *PacketAlign =
5395f757f3fSDimitry Andric       dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
5405f757f3fSDimitry Andric   if (!PacketSize || !PacketAlign)
5410b57cec5SDimitry Andric     return false;
5425f757f3fSDimitry Andric 
5435f757f3fSDimitry Andric   unsigned Size = PacketSize->getZExtValue();
5445f757f3fSDimitry Andric   Align Alignment = PacketAlign->getAlignValue();
5455ffd83dbSDimitry Andric   if (Alignment != Size)
5460b57cec5SDimitry Andric     return false;
5470b57cec5SDimitry Andric 
548349cc55cSDimitry Andric   unsigned PtrArgLoc = CI->arg_size() - 3;
5495f757f3fSDimitry Andric   Value *PtrArg = CI->getArgOperand(PtrArgLoc);
5505f757f3fSDimitry Andric   Type *PtrTy = PtrArg->getType();
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric   SmallVector<llvm::Type *, 6> ArgTys;
5530b57cec5SDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
5540b57cec5SDimitry Andric     ArgTys.push_back(CI->getArgOperand(I)->getType());
5550b57cec5SDimitry Andric   ArgTys.push_back(PtrTy);
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric   Name = Name + "_" + std::to_string(Size);
5580b57cec5SDimitry Andric   auto *FTy = FunctionType::get(Callee->getReturnType(),
5590b57cec5SDimitry Andric                                 ArrayRef<Type *>(ArgTys), false);
5600b57cec5SDimitry Andric   AMDGPULibFunc NewLibFunc(Name, FTy);
5610b57cec5SDimitry Andric   FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
5620b57cec5SDimitry Andric   if (!F)
5630b57cec5SDimitry Andric     return false;
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric   SmallVector<Value *, 6> Args;
5660b57cec5SDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
5670b57cec5SDimitry Andric     Args.push_back(CI->getArgOperand(I));
5685f757f3fSDimitry Andric   Args.push_back(PtrArg);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric   auto *NCI = B.CreateCall(F, Args);
5710b57cec5SDimitry Andric   NCI->setAttributes(CI->getAttributes());
5720b57cec5SDimitry Andric   CI->replaceAllUsesWith(NCI);
5730b57cec5SDimitry Andric   CI->dropAllReferences();
5740b57cec5SDimitry Andric   CI->eraseFromParent();
5750b57cec5SDimitry Andric 
5760b57cec5SDimitry Andric   return true;
5770b57cec5SDimitry Andric }
5780b57cec5SDimitry Andric 
5795f757f3fSDimitry Andric static bool isKnownIntegral(const Value *V, const DataLayout &DL,
5805f757f3fSDimitry Andric                             FastMathFlags FMF) {
581*0fca6ea1SDimitry Andric   if (isa<PoisonValue>(V))
5825f757f3fSDimitry Andric     return true;
583*0fca6ea1SDimitry Andric   if (isa<UndefValue>(V))
584*0fca6ea1SDimitry Andric     return false;
5850b57cec5SDimitry Andric 
5865f757f3fSDimitry Andric   if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
5875f757f3fSDimitry Andric     return CF->getValueAPF().isInteger();
5885f757f3fSDimitry Andric 
589*0fca6ea1SDimitry Andric   auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
590*0fca6ea1SDimitry Andric   const Constant *CV = dyn_cast<Constant>(V);
591*0fca6ea1SDimitry Andric   if (VFVTy && CV) {
592*0fca6ea1SDimitry Andric     unsigned NumElts = VFVTy->getNumElements();
593*0fca6ea1SDimitry Andric     for (unsigned i = 0; i != NumElts; ++i) {
594*0fca6ea1SDimitry Andric       Constant *Elt = CV->getAggregateElement(i);
595*0fca6ea1SDimitry Andric       if (!Elt)
596*0fca6ea1SDimitry Andric         return false;
597*0fca6ea1SDimitry Andric       if (isa<PoisonValue>(Elt))
5985f757f3fSDimitry Andric         continue;
599*0fca6ea1SDimitry Andric 
600*0fca6ea1SDimitry Andric       const ConstantFP *CFP = dyn_cast<ConstantFP>(Elt);
6015f757f3fSDimitry Andric       if (!CFP || !CFP->getValue().isInteger())
6025f757f3fSDimitry Andric         return false;
6035f757f3fSDimitry Andric     }
6045f757f3fSDimitry Andric 
6055f757f3fSDimitry Andric     return true;
6065f757f3fSDimitry Andric   }
6075f757f3fSDimitry Andric 
6085f757f3fSDimitry Andric   const Instruction *I = dyn_cast<Instruction>(V);
6095f757f3fSDimitry Andric   if (!I)
61004eeddc0SDimitry Andric     return false;
6110b57cec5SDimitry Andric 
6125f757f3fSDimitry Andric   switch (I->getOpcode()) {
6135f757f3fSDimitry Andric   case Instruction::SIToFP:
6145f757f3fSDimitry Andric   case Instruction::UIToFP:
6155f757f3fSDimitry Andric     // TODO: Could check nofpclass(inf) on incoming argument
6165f757f3fSDimitry Andric     if (FMF.noInfs())
6175f757f3fSDimitry Andric       return true;
6180b57cec5SDimitry Andric 
6195f757f3fSDimitry Andric     // Need to check int size cannot produce infinity, which computeKnownFPClass
6205f757f3fSDimitry Andric     // knows how to do already.
621*0fca6ea1SDimitry Andric     return isKnownNeverInfinity(I, /*Depth=*/0, SimplifyQuery(DL));
6225f757f3fSDimitry Andric   case Instruction::Call: {
6235f757f3fSDimitry Andric     const CallInst *CI = cast<CallInst>(I);
6245f757f3fSDimitry Andric     switch (CI->getIntrinsicID()) {
6255f757f3fSDimitry Andric     case Intrinsic::trunc:
6265f757f3fSDimitry Andric     case Intrinsic::floor:
6275f757f3fSDimitry Andric     case Intrinsic::ceil:
6285f757f3fSDimitry Andric     case Intrinsic::rint:
6295f757f3fSDimitry Andric     case Intrinsic::nearbyint:
6305f757f3fSDimitry Andric     case Intrinsic::round:
6315f757f3fSDimitry Andric     case Intrinsic::roundeven:
6325f757f3fSDimitry Andric       return (FMF.noInfs() && FMF.noNaNs()) ||
633*0fca6ea1SDimitry Andric              isKnownNeverInfOrNaN(I, /*Depth=*/0, SimplifyQuery(DL));
6340b57cec5SDimitry Andric     default:
6350b57cec5SDimitry Andric       break;
6360b57cec5SDimitry Andric     }
6370b57cec5SDimitry Andric 
6385f757f3fSDimitry Andric     break;
6395f757f3fSDimitry Andric   }
6405f757f3fSDimitry Andric   default:
6415f757f3fSDimitry Andric     break;
6425f757f3fSDimitry Andric   }
6435f757f3fSDimitry Andric 
6445f757f3fSDimitry Andric   return false;
6455f757f3fSDimitry Andric }
6465f757f3fSDimitry Andric 
6475f757f3fSDimitry Andric // This function returns false if no change; return true otherwise.
6485f757f3fSDimitry Andric bool AMDGPULibCalls::fold(CallInst *CI) {
6495f757f3fSDimitry Andric   Function *Callee = CI->getCalledFunction();
6505f757f3fSDimitry Andric   // Ignore indirect calls.
6515f757f3fSDimitry Andric   if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
6525f757f3fSDimitry Andric     return false;
6535f757f3fSDimitry Andric 
6540b57cec5SDimitry Andric   FuncInfo FInfo;
655349cc55cSDimitry Andric   if (!parseFunctionName(Callee->getName(), FInfo))
6560b57cec5SDimitry Andric     return false;
6570b57cec5SDimitry Andric 
6580b57cec5SDimitry Andric   // Further check the number of arguments to see if they match.
6595f757f3fSDimitry Andric   // TODO: Check calling convention matches too
6605f757f3fSDimitry Andric   if (!FInfo.isCompatibleSignature(CI->getFunctionType()))
6610b57cec5SDimitry Andric     return false;
6620b57cec5SDimitry Andric 
6635f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
6645f757f3fSDimitry Andric 
6650b57cec5SDimitry Andric   if (TDOFold(CI, FInfo))
6660b57cec5SDimitry Andric     return true;
6670b57cec5SDimitry Andric 
6685f757f3fSDimitry Andric   IRBuilder<> B(CI);
669*0fca6ea1SDimitry Andric   if (CI->isStrictFP())
670*0fca6ea1SDimitry Andric     B.setIsFPConstrained(true);
6715f757f3fSDimitry Andric 
6725f757f3fSDimitry Andric   if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) {
6730b57cec5SDimitry Andric     // Under unsafe-math, evaluate calls if possible.
6740b57cec5SDimitry Andric     // According to Brian Sumner, we can do this for all f32 function calls
6750b57cec5SDimitry Andric     // using host's double function calls.
6765f757f3fSDimitry Andric     if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
6770b57cec5SDimitry Andric       return true;
6780b57cec5SDimitry Andric 
6795f757f3fSDimitry Andric     // Copy fast flags from the original call.
6805f757f3fSDimitry Andric     FastMathFlags FMF = FPOp->getFastMathFlags();
6815f757f3fSDimitry Andric     B.setFastMathFlags(FMF);
6825f757f3fSDimitry Andric 
6835f757f3fSDimitry Andric     // Specialized optimizations for each function call.
6845f757f3fSDimitry Andric     //
6855f757f3fSDimitry Andric     // TODO: Handle native functions
6860b57cec5SDimitry Andric     switch (FInfo.getId()) {
6875f757f3fSDimitry Andric     case AMDGPULibFunc::EI_EXP:
6885f757f3fSDimitry Andric       if (FMF.none())
6895f757f3fSDimitry Andric         return false;
6905f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
6915f757f3fSDimitry Andric                                                   FMF.approxFunc());
6925f757f3fSDimitry Andric     case AMDGPULibFunc::EI_EXP2:
6935f757f3fSDimitry Andric       if (FMF.none())
6945f757f3fSDimitry Andric         return false;
6955f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
6965f757f3fSDimitry Andric                                                   FMF.approxFunc());
6975f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG:
6985f757f3fSDimitry Andric       if (FMF.none())
6995f757f3fSDimitry Andric         return false;
7005f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
7015f757f3fSDimitry Andric                                                   FMF.approxFunc());
7025f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG2:
7035f757f3fSDimitry Andric       if (FMF.none())
7045f757f3fSDimitry Andric         return false;
7055f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
7065f757f3fSDimitry Andric                                                   FMF.approxFunc());
7075f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG10:
7085f757f3fSDimitry Andric       if (FMF.none())
7095f757f3fSDimitry Andric         return false;
7105f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
7115f757f3fSDimitry Andric                                                   FMF.approxFunc());
7125f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMIN:
7135f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
7145f757f3fSDimitry Andric                                                   true, true);
7155f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMAX:
7165f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
7175f757f3fSDimitry Andric                                                   true, true);
7185f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMA:
7195f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
7205f757f3fSDimitry Andric                                                   true);
7215f757f3fSDimitry Andric     case AMDGPULibFunc::EI_MAD:
7225f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
7235f757f3fSDimitry Andric                                                   true, true);
7245f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FABS:
7255f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
7265f757f3fSDimitry Andric                                                   true, true);
7275f757f3fSDimitry Andric     case AMDGPULibFunc::EI_COPYSIGN:
7285f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
7295f757f3fSDimitry Andric                                                   true, true, true);
7305f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FLOOR:
7315f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
7325f757f3fSDimitry Andric                                                   true);
7335f757f3fSDimitry Andric     case AMDGPULibFunc::EI_CEIL:
7345f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
7355f757f3fSDimitry Andric                                                   true);
7365f757f3fSDimitry Andric     case AMDGPULibFunc::EI_TRUNC:
7375f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
7385f757f3fSDimitry Andric                                                   true);
7395f757f3fSDimitry Andric     case AMDGPULibFunc::EI_RINT:
7405f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
7415f757f3fSDimitry Andric                                                   true);
7425f757f3fSDimitry Andric     case AMDGPULibFunc::EI_ROUND:
7435f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
7445f757f3fSDimitry Andric                                                   true);
7455f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LDEXP: {
7465f757f3fSDimitry Andric       if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
7475f757f3fSDimitry Andric         return false;
7480b57cec5SDimitry Andric 
7495f757f3fSDimitry Andric       Value *Arg1 = CI->getArgOperand(1);
7505f757f3fSDimitry Andric       if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
7515f757f3fSDimitry Andric           VecTy && !isa<VectorType>(Arg1->getType())) {
7525f757f3fSDimitry Andric         Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
7535f757f3fSDimitry Andric         CI->setArgOperand(1, SplatArg1);
7545f757f3fSDimitry Andric       }
7550b57cec5SDimitry Andric 
7565f757f3fSDimitry Andric       CI->setCalledFunction(Intrinsic::getDeclaration(
7575f757f3fSDimitry Andric           CI->getModule(), Intrinsic::ldexp,
7585f757f3fSDimitry Andric           {CI->getType(), CI->getArgOperand(1)->getType()}));
7595f757f3fSDimitry Andric       return true;
7605f757f3fSDimitry Andric     }
7615f757f3fSDimitry Andric     case AMDGPULibFunc::EI_POW: {
7625f757f3fSDimitry Andric       Module *M = Callee->getParent();
7635f757f3fSDimitry Andric       AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo);
7645f757f3fSDimitry Andric       FunctionCallee PowrFunc = getFunction(M, PowrInfo);
7655f757f3fSDimitry Andric       CallInst *Call = cast<CallInst>(FPOp);
7665f757f3fSDimitry Andric 
7675f757f3fSDimitry Andric       // pow(x, y) -> powr(x, y) for x >= -0.0
7685f757f3fSDimitry Andric       // TODO: Account for flags on current call
7695f757f3fSDimitry Andric       if (PowrFunc &&
770*0fca6ea1SDimitry Andric           cannotBeOrderedLessThanZero(
771*0fca6ea1SDimitry Andric               FPOp->getOperand(0), /*Depth=*/0,
772*0fca6ea1SDimitry Andric               SimplifyQuery(M->getDataLayout(), TLInfo, DT, AC, Call))) {
7735f757f3fSDimitry Andric         Call->setCalledFunction(PowrFunc);
7745f757f3fSDimitry Andric         return fold_pow(FPOp, B, PowrInfo) || true;
7755f757f3fSDimitry Andric       }
7765f757f3fSDimitry Andric 
7775f757f3fSDimitry Andric       // pow(x, y) -> pown(x, y) for known integral y
7785f757f3fSDimitry Andric       if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(),
7795f757f3fSDimitry Andric                           FPOp->getFastMathFlags())) {
7805f757f3fSDimitry Andric         FunctionType *PownType = getPownType(CI->getFunctionType());
7815f757f3fSDimitry Andric         AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
7825f757f3fSDimitry Andric         FunctionCallee PownFunc = getFunction(M, PownInfo);
7835f757f3fSDimitry Andric         if (PownFunc) {
7845f757f3fSDimitry Andric           // TODO: If the incoming integral value is an sitofp/uitofp, it won't
7855f757f3fSDimitry Andric           // fold out without a known range. We can probably take the source
7865f757f3fSDimitry Andric           // value directly.
7875f757f3fSDimitry Andric           Value *CastedArg =
7885f757f3fSDimitry Andric               B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
7895f757f3fSDimitry Andric           // Have to drop any nofpclass attributes on the original call site.
7905f757f3fSDimitry Andric           Call->removeParamAttrs(
7915f757f3fSDimitry Andric               1, AttributeFuncs::typeIncompatible(CastedArg->getType()));
7925f757f3fSDimitry Andric           Call->setCalledFunction(PownFunc);
7935f757f3fSDimitry Andric           Call->setArgOperand(1, CastedArg);
7945f757f3fSDimitry Andric           return fold_pow(FPOp, B, PownInfo) || true;
7955f757f3fSDimitry Andric         }
7965f757f3fSDimitry Andric       }
7975f757f3fSDimitry Andric 
7985f757f3fSDimitry Andric       return fold_pow(FPOp, B, FInfo);
7995f757f3fSDimitry Andric     }
8000b57cec5SDimitry Andric     case AMDGPULibFunc::EI_POWR:
8010b57cec5SDimitry Andric     case AMDGPULibFunc::EI_POWN:
8025f757f3fSDimitry Andric       return fold_pow(FPOp, B, FInfo);
8030b57cec5SDimitry Andric     case AMDGPULibFunc::EI_ROOTN:
8045f757f3fSDimitry Andric       return fold_rootn(FPOp, B, FInfo);
8050b57cec5SDimitry Andric     case AMDGPULibFunc::EI_SQRT:
8061db9f3b2SDimitry Andric       // TODO: Allow with strictfp + constrained intrinsic
8071db9f3b2SDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(
8081db9f3b2SDimitry Andric           B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
8090b57cec5SDimitry Andric     case AMDGPULibFunc::EI_COS:
8100b57cec5SDimitry Andric     case AMDGPULibFunc::EI_SIN:
8115f757f3fSDimitry Andric       return fold_sincos(FPOp, B, FInfo);
8125f757f3fSDimitry Andric     default:
8130b57cec5SDimitry Andric       break;
8145f757f3fSDimitry Andric     }
8155f757f3fSDimitry Andric   } else {
8165f757f3fSDimitry Andric     // Specialized optimizations for each function call
8175f757f3fSDimitry Andric     switch (FInfo.getId()) {
8180b57cec5SDimitry Andric     case AMDGPULibFunc::EI_READ_PIPE_2:
8190b57cec5SDimitry Andric     case AMDGPULibFunc::EI_READ_PIPE_4:
8200b57cec5SDimitry Andric     case AMDGPULibFunc::EI_WRITE_PIPE_2:
8210b57cec5SDimitry Andric     case AMDGPULibFunc::EI_WRITE_PIPE_4:
8220b57cec5SDimitry Andric       return fold_read_write_pipe(CI, B, FInfo);
8230b57cec5SDimitry Andric     default:
8240b57cec5SDimitry Andric       break;
8250b57cec5SDimitry Andric     }
8265f757f3fSDimitry Andric   }
8270b57cec5SDimitry Andric 
8280b57cec5SDimitry Andric   return false;
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
8320b57cec5SDimitry Andric   // Table-Driven optimization
8330b57cec5SDimitry Andric   const TableRef tr = getOptTable(FInfo.getId());
834fcaf7f86SDimitry Andric   if (tr.empty())
8350b57cec5SDimitry Andric     return false;
8360b57cec5SDimitry Andric 
837fcaf7f86SDimitry Andric   int const sz = (int)tr.size();
8380b57cec5SDimitry Andric   Value *opr0 = CI->getArgOperand(0);
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric   if (getVecSize(FInfo) > 1) {
8410b57cec5SDimitry Andric     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
8420b57cec5SDimitry Andric       SmallVector<double, 0> DVal;
8430b57cec5SDimitry Andric       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
8440b57cec5SDimitry Andric         ConstantFP *eltval = dyn_cast<ConstantFP>(
8450b57cec5SDimitry Andric                                CV->getElementAsConstant((unsigned)eltNo));
8460b57cec5SDimitry Andric         assert(eltval && "Non-FP arguments in math function!");
8470b57cec5SDimitry Andric         bool found = false;
8480b57cec5SDimitry Andric         for (int i=0; i < sz; ++i) {
849fcaf7f86SDimitry Andric           if (eltval->isExactlyValue(tr[i].input)) {
850fcaf7f86SDimitry Andric             DVal.push_back(tr[i].result);
8510b57cec5SDimitry Andric             found = true;
8520b57cec5SDimitry Andric             break;
8530b57cec5SDimitry Andric           }
8540b57cec5SDimitry Andric         }
8550b57cec5SDimitry Andric         if (!found) {
8560b57cec5SDimitry Andric           // This vector constants not handled yet.
8570b57cec5SDimitry Andric           return false;
8580b57cec5SDimitry Andric         }
8590b57cec5SDimitry Andric       }
8600b57cec5SDimitry Andric       LLVMContext &context = CI->getParent()->getParent()->getContext();
8610b57cec5SDimitry Andric       Constant *nval;
8620b57cec5SDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
8630b57cec5SDimitry Andric         SmallVector<float, 0> FVal;
864*0fca6ea1SDimitry Andric         for (double D : DVal)
865*0fca6ea1SDimitry Andric           FVal.push_back((float)D);
8660b57cec5SDimitry Andric         ArrayRef<float> tmp(FVal);
8670b57cec5SDimitry Andric         nval = ConstantDataVector::get(context, tmp);
8680b57cec5SDimitry Andric       } else { // F64
8690b57cec5SDimitry Andric         ArrayRef<double> tmp(DVal);
8700b57cec5SDimitry Andric         nval = ConstantDataVector::get(context, tmp);
8710b57cec5SDimitry Andric       }
8720b57cec5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8735f757f3fSDimitry Andric       replaceCall(CI, nval);
8740b57cec5SDimitry Andric       return true;
8750b57cec5SDimitry Andric     }
8760b57cec5SDimitry Andric   } else {
8770b57cec5SDimitry Andric     // Scalar version
8780b57cec5SDimitry Andric     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
8790b57cec5SDimitry Andric       for (int i = 0; i < sz; ++i) {
880fcaf7f86SDimitry Andric         if (CF->isExactlyValue(tr[i].input)) {
881fcaf7f86SDimitry Andric           Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
8820b57cec5SDimitry Andric           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8835f757f3fSDimitry Andric           replaceCall(CI, nval);
8840b57cec5SDimitry Andric           return true;
8850b57cec5SDimitry Andric         }
8860b57cec5SDimitry Andric       }
8870b57cec5SDimitry Andric     }
8880b57cec5SDimitry Andric   }
8890b57cec5SDimitry Andric 
8900b57cec5SDimitry Andric   return false;
8910b57cec5SDimitry Andric }
8920b57cec5SDimitry Andric 
8930b57cec5SDimitry Andric namespace llvm {
8940b57cec5SDimitry Andric static double log2(double V) {
8950b57cec5SDimitry Andric #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
8960b57cec5SDimitry Andric   return ::log2(V);
8970b57cec5SDimitry Andric #else
8988bcb0991SDimitry Andric   return log(V) / numbers::ln2;
8990b57cec5SDimitry Andric #endif
9000b57cec5SDimitry Andric }
901*0fca6ea1SDimitry Andric } // namespace llvm
9020b57cec5SDimitry Andric 
9035f757f3fSDimitry Andric bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
9040b57cec5SDimitry Andric                               const FuncInfo &FInfo) {
9050b57cec5SDimitry Andric   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
9060b57cec5SDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
9070b57cec5SDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
9080b57cec5SDimitry Andric          "fold_pow: encounter a wrong function call");
9090b57cec5SDimitry Andric 
9105f757f3fSDimitry Andric   Module *M = B.GetInsertBlock()->getModule();
9115f757f3fSDimitry Andric   Type *eltType = FPOp->getType()->getScalarType();
9125f757f3fSDimitry Andric   Value *opr0 = FPOp->getOperand(0);
9135f757f3fSDimitry Andric   Value *opr1 = FPOp->getOperand(1);
9140b57cec5SDimitry Andric 
9155f757f3fSDimitry Andric   const APFloat *CF = nullptr;
9165f757f3fSDimitry Andric   const APInt *CINT = nullptr;
917*0fca6ea1SDimitry Andric   if (!match(opr1, m_APFloatAllowPoison(CF)))
918*0fca6ea1SDimitry Andric     match(opr1, m_APIntAllowPoison(CINT));
9190b57cec5SDimitry Andric 
9200b57cec5SDimitry Andric   // 0x1111111 means that we don't do anything for this call.
9210b57cec5SDimitry Andric   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
9220b57cec5SDimitry Andric 
9235f757f3fSDimitry Andric   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
9240b57cec5SDimitry Andric     //  pow/powr/pown(x, 0) == 1
9255f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
9260b57cec5SDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9270b57cec5SDimitry Andric     if (getVecSize(FInfo) > 1) {
9280b57cec5SDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9290b57cec5SDimitry Andric     }
9305f757f3fSDimitry Andric     replaceCall(FPOp, cnval);
9310b57cec5SDimitry Andric     return true;
9320b57cec5SDimitry Andric   }
9330b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
9340b57cec5SDimitry Andric     // pow/powr/pown(x, 1.0) = x
9355f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
9365f757f3fSDimitry Andric     replaceCall(FPOp, opr0);
9370b57cec5SDimitry Andric     return true;
9380b57cec5SDimitry Andric   }
9390b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
9400b57cec5SDimitry Andric     // pow/powr/pown(x, 2.0) = x*x
9415f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
9425f757f3fSDimitry Andric                       << *opr0 << "\n");
9430b57cec5SDimitry Andric     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
9445f757f3fSDimitry Andric     replaceCall(FPOp, nval);
9450b57cec5SDimitry Andric     return true;
9460b57cec5SDimitry Andric   }
9470b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
9480b57cec5SDimitry Andric     // pow/powr/pown(x, -1.0) = 1.0/x
9495f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
9500b57cec5SDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9510b57cec5SDimitry Andric     if (getVecSize(FInfo) > 1) {
9520b57cec5SDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9530b57cec5SDimitry Andric     }
9540b57cec5SDimitry Andric     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
9555f757f3fSDimitry Andric     replaceCall(FPOp, nval);
9560b57cec5SDimitry Andric     return true;
9570b57cec5SDimitry Andric   }
9580b57cec5SDimitry Andric 
9590b57cec5SDimitry Andric   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
9600b57cec5SDimitry Andric     // pow[r](x, [-]0.5) = sqrt(x)
9610b57cec5SDimitry Andric     bool issqrt = CF->isExactlyValue(0.5);
9620b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
9630b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
9640b57cec5SDimitry Andric                                                 : AMDGPULibFunc::EI_RSQRT,
9650b57cec5SDimitry Andric                                          FInfo))) {
9665f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
9675f757f3fSDimitry Andric                         << '(' << *opr0 << ")\n");
9680b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
9690b57cec5SDimitry Andric                                                         : "__pow2rsqrt");
9705f757f3fSDimitry Andric       replaceCall(FPOp, nval);
9710b57cec5SDimitry Andric       return true;
9720b57cec5SDimitry Andric     }
9730b57cec5SDimitry Andric   }
9740b57cec5SDimitry Andric 
9755f757f3fSDimitry Andric   if (!isUnsafeFiniteOnlyMath(FPOp))
9760b57cec5SDimitry Andric     return false;
9770b57cec5SDimitry Andric 
9780b57cec5SDimitry Andric   // Unsafe Math optimization
9790b57cec5SDimitry Andric 
9800b57cec5SDimitry Andric   // Remember that ci_opr1 is set if opr1 is integral
9810b57cec5SDimitry Andric   if (CF) {
9820b57cec5SDimitry Andric     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9835f757f3fSDimitry Andric                       ? (double)CF->convertToFloat()
9845f757f3fSDimitry Andric                       : CF->convertToDouble();
9850b57cec5SDimitry Andric     int ival = (int)dval;
9860b57cec5SDimitry Andric     if ((double)ival == dval) {
9870b57cec5SDimitry Andric       ci_opr1 = ival;
9880b57cec5SDimitry Andric     } else
9890b57cec5SDimitry Andric       ci_opr1 = 0x11111111;
9900b57cec5SDimitry Andric   }
9910b57cec5SDimitry Andric 
9920b57cec5SDimitry Andric   // pow/powr/pown(x, c) = [1/](x*x*..x); where
9930b57cec5SDimitry Andric   //   trunc(c) == c && the number of x == c && |c| <= 12
9940b57cec5SDimitry Andric   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9950b57cec5SDimitry Andric   if (abs_opr1 <= 12) {
9960b57cec5SDimitry Andric     Constant *cnval;
9970b57cec5SDimitry Andric     Value *nval;
9980b57cec5SDimitry Andric     if (abs_opr1 == 0) {
9990b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
10000b57cec5SDimitry Andric       if (getVecSize(FInfo) > 1) {
10010b57cec5SDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
10020b57cec5SDimitry Andric       }
10030b57cec5SDimitry Andric       nval = cnval;
10040b57cec5SDimitry Andric     } else {
10050b57cec5SDimitry Andric       Value *valx2 = nullptr;
10060b57cec5SDimitry Andric       nval = nullptr;
10070b57cec5SDimitry Andric       while (abs_opr1 > 0) {
10080b57cec5SDimitry Andric         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
10090b57cec5SDimitry Andric         if (abs_opr1 & 1) {
10100b57cec5SDimitry Andric           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
10110b57cec5SDimitry Andric         }
10120b57cec5SDimitry Andric         abs_opr1 >>= 1;
10130b57cec5SDimitry Andric       }
10140b57cec5SDimitry Andric     }
10150b57cec5SDimitry Andric 
10160b57cec5SDimitry Andric     if (ci_opr1 < 0) {
10170b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
10180b57cec5SDimitry Andric       if (getVecSize(FInfo) > 1) {
10190b57cec5SDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
10200b57cec5SDimitry Andric       }
10210b57cec5SDimitry Andric       nval = B.CreateFDiv(cnval, nval, "__1powprod");
10220b57cec5SDimitry Andric     }
10235f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
10240b57cec5SDimitry Andric                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
10250b57cec5SDimitry Andric                       << ")\n");
10265f757f3fSDimitry Andric     replaceCall(FPOp, nval);
10270b57cec5SDimitry Andric     return true;
10280b57cec5SDimitry Andric   }
10290b57cec5SDimitry Andric 
10305f757f3fSDimitry Andric   // If we should use the generic intrinsic instead of emitting a libcall
10315f757f3fSDimitry Andric   const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
10325f757f3fSDimitry Andric 
10330b57cec5SDimitry Andric   // powr ---> exp2(y * log2(x))
10340b57cec5SDimitry Andric   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
10355f757f3fSDimitry Andric   FunctionCallee ExpExpr;
10365f757f3fSDimitry Andric   if (ShouldUseIntrinsic)
10375f757f3fSDimitry Andric     ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()});
10385f757f3fSDimitry Andric   else {
10395f757f3fSDimitry Andric     ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
10400b57cec5SDimitry Andric     if (!ExpExpr)
10410b57cec5SDimitry Andric       return false;
10425f757f3fSDimitry Andric   }
10430b57cec5SDimitry Andric 
10440b57cec5SDimitry Andric   bool needlog = false;
10450b57cec5SDimitry Andric   bool needabs = false;
10460b57cec5SDimitry Andric   bool needcopysign = false;
10470b57cec5SDimitry Andric   Constant *cnval = nullptr;
10480b57cec5SDimitry Andric   if (getVecSize(FInfo) == 1) {
10495f757f3fSDimitry Andric     CF = nullptr;
1050*0fca6ea1SDimitry Andric     match(opr0, m_APFloatAllowPoison(CF));
10510b57cec5SDimitry Andric 
10520b57cec5SDimitry Andric     if (CF) {
10530b57cec5SDimitry Andric       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10545f757f3fSDimitry Andric                      ? (double)CF->convertToFloat()
10555f757f3fSDimitry Andric                      : CF->convertToDouble();
10560b57cec5SDimitry Andric 
10570b57cec5SDimitry Andric       V = log2(std::abs(V));
10580b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, V);
10590b57cec5SDimitry Andric       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
10600b57cec5SDimitry Andric                      CF->isNegative();
10610b57cec5SDimitry Andric     } else {
10620b57cec5SDimitry Andric       needlog = true;
1063cb14a3feSDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10640b57cec5SDimitry Andric     }
10650b57cec5SDimitry Andric   } else {
10660b57cec5SDimitry Andric     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
10670b57cec5SDimitry Andric 
10680b57cec5SDimitry Andric     if (!CDV) {
10690b57cec5SDimitry Andric       needlog = true;
10700b57cec5SDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10710b57cec5SDimitry Andric     } else {
10720b57cec5SDimitry Andric       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
10730b57cec5SDimitry Andric               "Wrong vector size detected");
10740b57cec5SDimitry Andric 
10750b57cec5SDimitry Andric       SmallVector<double, 0> DVal;
10760b57cec5SDimitry Andric       for (int i=0; i < getVecSize(FInfo); ++i) {
10775f757f3fSDimitry Andric         double V = CDV->getElementAsAPFloat(i).convertToDouble();
10780b57cec5SDimitry Andric         if (V < 0.0) needcopysign = true;
10790b57cec5SDimitry Andric         V = log2(std::abs(V));
10800b57cec5SDimitry Andric         DVal.push_back(V);
10810b57cec5SDimitry Andric       }
10820b57cec5SDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
10830b57cec5SDimitry Andric         SmallVector<float, 0> FVal;
1084*0fca6ea1SDimitry Andric         for (double D : DVal)
1085*0fca6ea1SDimitry Andric           FVal.push_back((float)D);
10860b57cec5SDimitry Andric         ArrayRef<float> tmp(FVal);
10870b57cec5SDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10880b57cec5SDimitry Andric       } else {
10890b57cec5SDimitry Andric         ArrayRef<double> tmp(DVal);
10900b57cec5SDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10910b57cec5SDimitry Andric       }
10920b57cec5SDimitry Andric     }
10930b57cec5SDimitry Andric   }
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10960b57cec5SDimitry Andric     // We cannot handle corner cases for a general pow() function, give up
10970b57cec5SDimitry Andric     // unless y is a constant integral value. Then proceed as if it were pown.
10985f757f3fSDimitry Andric     if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags()))
10990b57cec5SDimitry Andric       return false;
11000b57cec5SDimitry Andric   }
11010b57cec5SDimitry Andric 
11020b57cec5SDimitry Andric   Value *nval;
11030b57cec5SDimitry Andric   if (needabs) {
11045f757f3fSDimitry Andric     nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
11050b57cec5SDimitry Andric   } else {
11060b57cec5SDimitry Andric     nval = cnval ? cnval : opr0;
11070b57cec5SDimitry Andric   }
11080b57cec5SDimitry Andric   if (needlog) {
11095f757f3fSDimitry Andric     FunctionCallee LogExpr;
11105f757f3fSDimitry Andric     if (ShouldUseIntrinsic) {
11115f757f3fSDimitry Andric       LogExpr =
11125f757f3fSDimitry Andric           Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()});
11135f757f3fSDimitry Andric     } else {
11145f757f3fSDimitry Andric       LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
11150b57cec5SDimitry Andric       if (!LogExpr)
11160b57cec5SDimitry Andric         return false;
11175f757f3fSDimitry Andric     }
11185f757f3fSDimitry Andric 
11190b57cec5SDimitry Andric     nval = CreateCallEx(B,LogExpr, nval, "__log2");
11200b57cec5SDimitry Andric   }
11210b57cec5SDimitry Andric 
11220b57cec5SDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
11230b57cec5SDimitry Andric     // convert int(32) to fp(f32 or f64)
11240b57cec5SDimitry Andric     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
11250b57cec5SDimitry Andric   }
11260b57cec5SDimitry Andric   nval = B.CreateFMul(opr1, nval, "__ylogx");
11270b57cec5SDimitry Andric   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11280b57cec5SDimitry Andric 
11290b57cec5SDimitry Andric   if (needcopysign) {
11305f757f3fSDimitry Andric     Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1131*0fca6ea1SDimitry Andric     Type *nTy = FPOp->getType()->getWithNewType(nTyS);
11320b57cec5SDimitry Andric     unsigned size = nTy->getScalarSizeInBits();
1133*0fca6ea1SDimitry Andric     Value *opr_n = FPOp->getOperand(1);
1134*0fca6ea1SDimitry Andric     if (opr_n->getType()->getScalarType()->isIntegerTy())
11355f757f3fSDimitry Andric       opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
11360b57cec5SDimitry Andric     else
11370b57cec5SDimitry Andric       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11380b57cec5SDimitry Andric 
11390b57cec5SDimitry Andric     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11400b57cec5SDimitry Andric     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
11410b57cec5SDimitry Andric     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
11420b57cec5SDimitry Andric     nval = B.CreateBitCast(nval, opr0->getType());
11430b57cec5SDimitry Andric   }
11440b57cec5SDimitry Andric 
11455f757f3fSDimitry Andric   LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
11460b57cec5SDimitry Andric                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
11475f757f3fSDimitry Andric   replaceCall(FPOp, nval);
11480b57cec5SDimitry Andric 
11490b57cec5SDimitry Andric   return true;
11500b57cec5SDimitry Andric }
11510b57cec5SDimitry Andric 
11525f757f3fSDimitry Andric bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
11530b57cec5SDimitry Andric                                 const FuncInfo &FInfo) {
11545f757f3fSDimitry Andric   Value *opr0 = FPOp->getOperand(0);
11555f757f3fSDimitry Andric   Value *opr1 = FPOp->getOperand(1);
11560b57cec5SDimitry Andric 
1157*0fca6ea1SDimitry Andric   const APInt *CINT = nullptr;
1158*0fca6ea1SDimitry Andric   if (!match(opr1, m_APIntAllowPoison(CINT)))
11590b57cec5SDimitry Andric     return false;
1160*0fca6ea1SDimitry Andric 
1161*0fca6ea1SDimitry Andric   Function *Parent = B.GetInsertBlock()->getParent();
1162*0fca6ea1SDimitry Andric 
11630b57cec5SDimitry Andric   int ci_opr1 = (int)CINT->getSExtValue();
1164*0fca6ea1SDimitry Andric   if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1165*0fca6ea1SDimitry Andric     // rootn(x, 1) = x
1166*0fca6ea1SDimitry Andric     //
1167*0fca6ea1SDimitry Andric     // TODO: Insert constrained canonicalize for strictfp case.
1168*0fca6ea1SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
11695f757f3fSDimitry Andric     replaceCall(FPOp, opr0);
11700b57cec5SDimitry Andric     return true;
11710b57cec5SDimitry Andric   }
11725f757f3fSDimitry Andric 
11735f757f3fSDimitry Andric   Module *M = B.GetInsertBlock()->getModule();
1174*0fca6ea1SDimitry Andric 
1175*0fca6ea1SDimitry Andric   CallInst *CI = cast<CallInst>(FPOp);
1176*0fca6ea1SDimitry Andric   if (ci_opr1 == 2 &&
1177*0fca6ea1SDimitry Andric       shouldReplaceLibcallWithIntrinsic(CI,
1178*0fca6ea1SDimitry Andric                                         /*AllowMinSizeF32=*/true,
1179*0fca6ea1SDimitry Andric                                         /*AllowF64=*/true)) {
1180*0fca6ea1SDimitry Andric     // rootn(x, 2) = sqrt(x)
1181*0fca6ea1SDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1182*0fca6ea1SDimitry Andric 
1183*0fca6ea1SDimitry Andric     CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1184*0fca6ea1SDimitry Andric     NewCall->takeName(CI);
1185*0fca6ea1SDimitry Andric 
1186*0fca6ea1SDimitry Andric     // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1187*0fca6ea1SDimitry Andric     // metadata.
1188*0fca6ea1SDimitry Andric     MDBuilder MDHelper(M->getContext());
1189*0fca6ea1SDimitry Andric     MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1190*0fca6ea1SDimitry Andric     NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1191*0fca6ea1SDimitry Andric 
1192*0fca6ea1SDimitry Andric     replaceCall(CI, NewCall);
11930b57cec5SDimitry Andric     return true;
11940b57cec5SDimitry Andric   }
1195*0fca6ea1SDimitry Andric 
1196*0fca6ea1SDimitry Andric   if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11970b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
11980b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
11995f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
12005f757f3fSDimitry Andric                         << ")\n");
12010b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
12025f757f3fSDimitry Andric       replaceCall(FPOp, nval);
12030b57cec5SDimitry Andric       return true;
12040b57cec5SDimitry Andric     }
12050b57cec5SDimitry Andric   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
12065f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
12070b57cec5SDimitry Andric     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
12080b57cec5SDimitry Andric                                opr0,
12090b57cec5SDimitry Andric                                "__rootn2div");
12105f757f3fSDimitry Andric     replaceCall(FPOp, nval);
12110b57cec5SDimitry Andric     return true;
1212*0fca6ea1SDimitry Andric   }
1213*0fca6ea1SDimitry Andric 
1214*0fca6ea1SDimitry Andric   if (ci_opr1 == -2 &&
1215*0fca6ea1SDimitry Andric       shouldReplaceLibcallWithIntrinsic(CI,
1216*0fca6ea1SDimitry Andric                                         /*AllowMinSizeF32=*/true,
1217*0fca6ea1SDimitry Andric                                         /*AllowF64=*/true)) {
1218*0fca6ea1SDimitry Andric     // rootn(x, -2) = rsqrt(x)
1219*0fca6ea1SDimitry Andric 
1220*0fca6ea1SDimitry Andric     // The original rootn had looser ulp requirements than the resultant sqrt
1221*0fca6ea1SDimitry Andric     // and fdiv.
1222*0fca6ea1SDimitry Andric     MDBuilder MDHelper(M->getContext());
1223*0fca6ea1SDimitry Andric     MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1224*0fca6ea1SDimitry Andric 
1225*0fca6ea1SDimitry Andric     // TODO: Could handle strictfp but need to fix strict sqrt emission
1226*0fca6ea1SDimitry Andric     FastMathFlags FMF = FPOp->getFastMathFlags();
1227*0fca6ea1SDimitry Andric     FMF.setAllowContract(true);
1228*0fca6ea1SDimitry Andric 
1229*0fca6ea1SDimitry Andric     CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1230*0fca6ea1SDimitry Andric     Instruction *RSqrt = cast<Instruction>(
1231*0fca6ea1SDimitry Andric         B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1232*0fca6ea1SDimitry Andric     Sqrt->setFastMathFlags(FMF);
1233*0fca6ea1SDimitry Andric     RSqrt->setFastMathFlags(FMF);
1234*0fca6ea1SDimitry Andric     RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1235*0fca6ea1SDimitry Andric 
12365f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
12370b57cec5SDimitry Andric                       << ")\n");
1238*0fca6ea1SDimitry Andric     replaceCall(CI, RSqrt);
12390b57cec5SDimitry Andric     return true;
12400b57cec5SDimitry Andric   }
1241*0fca6ea1SDimitry Andric 
12420b57cec5SDimitry Andric   return false;
12430b57cec5SDimitry Andric }
12440b57cec5SDimitry Andric 
1245349cc55cSDimitry Andric // Get a scalar native builtin single argument FP function
12460b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
12470b57cec5SDimitry Andric                                                  const FuncInfo &FInfo) {
12480b57cec5SDimitry Andric   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
12490b57cec5SDimitry Andric     return nullptr;
12500b57cec5SDimitry Andric   FuncInfo nf = FInfo;
12510b57cec5SDimitry Andric   nf.setPrefix(AMDGPULibFunc::NATIVE);
12520b57cec5SDimitry Andric   return getFunction(M, nf);
12530b57cec5SDimitry Andric }
12540b57cec5SDimitry Andric 
12555f757f3fSDimitry Andric // Some library calls are just wrappers around llvm intrinsics, but compiled
12565f757f3fSDimitry Andric // conservatively. Preserve the flags from the original call site by
12575f757f3fSDimitry Andric // substituting them with direct calls with all the flags.
12585f757f3fSDimitry Andric bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
12595f757f3fSDimitry Andric                                                        bool AllowMinSizeF32,
12605f757f3fSDimitry Andric                                                        bool AllowF64,
12615f757f3fSDimitry Andric                                                        bool AllowStrictFP) {
12625f757f3fSDimitry Andric   Type *FltTy = CI->getType()->getScalarType();
12635f757f3fSDimitry Andric   const bool IsF32 = FltTy->isFloatTy();
12645f757f3fSDimitry Andric 
12655f757f3fSDimitry Andric   // f64 intrinsics aren't implemented for most operations.
12665f757f3fSDimitry Andric   if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
12675f757f3fSDimitry Andric     return false;
12685f757f3fSDimitry Andric 
12695f757f3fSDimitry Andric   // We're implicitly inlining by replacing the libcall with the intrinsic, so
12705f757f3fSDimitry Andric   // don't do it for noinline call sites.
12715f757f3fSDimitry Andric   if (CI->isNoInline())
12725f757f3fSDimitry Andric     return false;
12735f757f3fSDimitry Andric 
12745f757f3fSDimitry Andric   const Function *ParentF = CI->getFunction();
12755f757f3fSDimitry Andric   // TODO: Handle strictfp
12765f757f3fSDimitry Andric   if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
12775f757f3fSDimitry Andric     return false;
12785f757f3fSDimitry Andric 
12795f757f3fSDimitry Andric   if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
12805f757f3fSDimitry Andric     return false;
12815f757f3fSDimitry Andric   return true;
12825f757f3fSDimitry Andric }
12835f757f3fSDimitry Andric 
12845f757f3fSDimitry Andric void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
12855f757f3fSDimitry Andric                                                        CallInst *CI,
12865f757f3fSDimitry Andric                                                        Intrinsic::ID IntrID) {
12875f757f3fSDimitry Andric   if (CI->arg_size() == 2) {
12885f757f3fSDimitry Andric     Value *Arg0 = CI->getArgOperand(0);
12895f757f3fSDimitry Andric     Value *Arg1 = CI->getArgOperand(1);
12905f757f3fSDimitry Andric     VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
12915f757f3fSDimitry Andric     VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
12925f757f3fSDimitry Andric     if (Arg0VecTy && !Arg1VecTy) {
12935f757f3fSDimitry Andric       Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
12945f757f3fSDimitry Andric       CI->setArgOperand(1, SplatRHS);
12955f757f3fSDimitry Andric     } else if (!Arg0VecTy && Arg1VecTy) {
12965f757f3fSDimitry Andric       Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
12975f757f3fSDimitry Andric       CI->setArgOperand(0, SplatLHS);
12985f757f3fSDimitry Andric     }
12995f757f3fSDimitry Andric   }
13005f757f3fSDimitry Andric 
13015f757f3fSDimitry Andric   CI->setCalledFunction(
13025f757f3fSDimitry Andric       Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()}));
13035f757f3fSDimitry Andric }
13045f757f3fSDimitry Andric 
13055f757f3fSDimitry Andric bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
13065f757f3fSDimitry Andric     IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
13075f757f3fSDimitry Andric     bool AllowF64, bool AllowStrictFP) {
13085f757f3fSDimitry Andric   if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
13095f757f3fSDimitry Andric                                          AllowStrictFP))
13105f757f3fSDimitry Andric     return false;
13115f757f3fSDimitry Andric   replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
13125f757f3fSDimitry Andric   return true;
13135f757f3fSDimitry Andric }
13145f757f3fSDimitry Andric 
13155f757f3fSDimitry Andric std::tuple<Value *, Value *, Value *>
13165f757f3fSDimitry Andric AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
13175f757f3fSDimitry Andric                              FunctionCallee Fsincos) {
13185f757f3fSDimitry Andric   DebugLoc DL = B.getCurrentDebugLocation();
13195f757f3fSDimitry Andric   Function *F = B.GetInsertBlock()->getParent();
13205f757f3fSDimitry Andric   B.SetInsertPointPastAllocas(F);
13215f757f3fSDimitry Andric 
13225f757f3fSDimitry Andric   AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
13235f757f3fSDimitry Andric 
13245f757f3fSDimitry Andric   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
13255f757f3fSDimitry Andric     // If the argument is an instruction, it must dominate all uses so put our
13265f757f3fSDimitry Andric     // sincos call there. Otherwise, right after the allocas works well enough
13275f757f3fSDimitry Andric     // if it's an argument or constant.
13285f757f3fSDimitry Andric 
13295f757f3fSDimitry Andric     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
13305f757f3fSDimitry Andric 
13315f757f3fSDimitry Andric     // SetInsertPoint unwelcomely always tries to set the debug loc.
13325f757f3fSDimitry Andric     B.SetCurrentDebugLocation(DL);
13335f757f3fSDimitry Andric   }
13345f757f3fSDimitry Andric 
13355f757f3fSDimitry Andric   Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
13365f757f3fSDimitry Andric 
13375f757f3fSDimitry Andric   // The allocaInst allocates the memory in private address space. This need
13385f757f3fSDimitry Andric   // to be addrspacecasted to point to the address space of cos pointer type.
13395f757f3fSDimitry Andric   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
13405f757f3fSDimitry Andric   Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
13415f757f3fSDimitry Andric 
13425f757f3fSDimitry Andric   CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
13435f757f3fSDimitry Andric 
13445f757f3fSDimitry Andric   // TODO: Is it worth trying to preserve the location for the cos calls for the
13455f757f3fSDimitry Andric   // load?
13465f757f3fSDimitry Andric 
13475f757f3fSDimitry Andric   LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13485f757f3fSDimitry Andric   return {SinCos, LoadCos, SinCos};
13495f757f3fSDimitry Andric }
13505f757f3fSDimitry Andric 
13510b57cec5SDimitry Andric // fold sin, cos -> sincos.
13525f757f3fSDimitry Andric bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
13535f757f3fSDimitry Andric                                  const FuncInfo &fInfo) {
13540b57cec5SDimitry Andric   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
13550b57cec5SDimitry Andric          fInfo.getId() == AMDGPULibFunc::EI_COS);
13565f757f3fSDimitry Andric 
13575f757f3fSDimitry Andric   if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
13585f757f3fSDimitry Andric        getArgType(fInfo) != AMDGPULibFunc::F64) ||
13595f757f3fSDimitry Andric       fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
13605f757f3fSDimitry Andric     return false;
13615f757f3fSDimitry Andric 
13620b57cec5SDimitry Andric   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
13630b57cec5SDimitry Andric 
13645f757f3fSDimitry Andric   Value *CArgVal = FPOp->getOperand(0);
13655f757f3fSDimitry Andric   CallInst *CI = cast<CallInst>(FPOp);
13660b57cec5SDimitry Andric 
13675f757f3fSDimitry Andric   Function *F = B.GetInsertBlock()->getParent();
13685f757f3fSDimitry Andric   Module *M = F->getParent();
13690b57cec5SDimitry Andric 
13705f757f3fSDimitry Andric   // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
13715f757f3fSDimitry Andric   // implementation. Prefer the private form if available.
13725f757f3fSDimitry Andric   AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
13735f757f3fSDimitry Andric   SinCosLibFuncPrivate.getLeads()[0].PtrKind =
13745f757f3fSDimitry Andric       AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS);
13750b57cec5SDimitry Andric 
13765f757f3fSDimitry Andric   AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
13775f757f3fSDimitry Andric   SinCosLibFuncGeneric.getLeads()[0].PtrKind =
13785f757f3fSDimitry Andric       AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
13790b57cec5SDimitry Andric 
13805f757f3fSDimitry Andric   FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
13815f757f3fSDimitry Andric   FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
13825f757f3fSDimitry Andric   FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
13835f757f3fSDimitry Andric   if (!FSinCos)
13845f757f3fSDimitry Andric     return false;
13855f757f3fSDimitry Andric 
13865f757f3fSDimitry Andric   SmallVector<CallInst *> SinCalls;
13875f757f3fSDimitry Andric   SmallVector<CallInst *> CosCalls;
13885f757f3fSDimitry Andric   SmallVector<CallInst *> SinCosCalls;
13895f757f3fSDimitry Andric   FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
13905f757f3fSDimitry Andric                        fInfo);
13915f757f3fSDimitry Andric   const std::string PairName = PartnerInfo.mangle();
13925f757f3fSDimitry Andric 
13935f757f3fSDimitry Andric   StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
13945f757f3fSDimitry Andric   StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
13955f757f3fSDimitry Andric   const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
13965f757f3fSDimitry Andric   const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
13975f757f3fSDimitry Andric 
13985f757f3fSDimitry Andric   // Intersect the two sets of flags.
13995f757f3fSDimitry Andric   FastMathFlags FMF = FPOp->getFastMathFlags();
14005f757f3fSDimitry Andric   MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
14015f757f3fSDimitry Andric 
14025f757f3fSDimitry Andric   SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
14035f757f3fSDimitry Andric 
14040b57cec5SDimitry Andric   for (User* U : CArgVal->users()) {
14055f757f3fSDimitry Andric     CallInst *XI = dyn_cast<CallInst>(U);
14065f757f3fSDimitry Andric     if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
14070b57cec5SDimitry Andric       continue;
14080b57cec5SDimitry Andric 
14090b57cec5SDimitry Andric     Function *UCallee = XI->getCalledFunction();
14105f757f3fSDimitry Andric     if (!UCallee)
14110b57cec5SDimitry Andric       continue;
14120b57cec5SDimitry Andric 
14135f757f3fSDimitry Andric     bool Handled = true;
14145f757f3fSDimitry Andric 
14155f757f3fSDimitry Andric     if (UCallee->getName() == SinName)
14165f757f3fSDimitry Andric       SinCalls.push_back(XI);
14175f757f3fSDimitry Andric     else if (UCallee->getName() == CosName)
14185f757f3fSDimitry Andric       CosCalls.push_back(XI);
14195f757f3fSDimitry Andric     else if (UCallee->getName() == SinCosPrivateName ||
14205f757f3fSDimitry Andric              UCallee->getName() == SinCosGenericName)
14215f757f3fSDimitry Andric       SinCosCalls.push_back(XI);
14225f757f3fSDimitry Andric     else
14235f757f3fSDimitry Andric       Handled = false;
14245f757f3fSDimitry Andric 
14255f757f3fSDimitry Andric     if (Handled) {
14265f757f3fSDimitry Andric       MergeDbgLocs.push_back(XI->getDebugLoc());
14275f757f3fSDimitry Andric       auto *OtherOp = cast<FPMathOperator>(XI);
14285f757f3fSDimitry Andric       FMF &= OtherOp->getFastMathFlags();
14295f757f3fSDimitry Andric       FPMath = MDNode::getMostGenericFPMath(
14305f757f3fSDimitry Andric           FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
14310b57cec5SDimitry Andric     }
14320b57cec5SDimitry Andric   }
14330b57cec5SDimitry Andric 
14345f757f3fSDimitry Andric   if (SinCalls.empty() || CosCalls.empty())
14350b57cec5SDimitry Andric     return false;
14360b57cec5SDimitry Andric 
14375f757f3fSDimitry Andric   B.setFastMathFlags(FMF);
14385f757f3fSDimitry Andric   B.setDefaultFPMathTag(FPMath);
14395f757f3fSDimitry Andric   DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
14405f757f3fSDimitry Andric   B.SetCurrentDebugLocation(DbgLoc);
14410b57cec5SDimitry Andric 
14425f757f3fSDimitry Andric   auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
14430b57cec5SDimitry Andric 
14445f757f3fSDimitry Andric   auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
14455f757f3fSDimitry Andric     for (CallInst *C : Calls)
14465f757f3fSDimitry Andric       C->replaceAllUsesWith(Res);
14470b57cec5SDimitry Andric 
14485f757f3fSDimitry Andric     // Leave the other dead instructions to avoid clobbering iterators.
14495f757f3fSDimitry Andric   };
14505f757f3fSDimitry Andric 
14515f757f3fSDimitry Andric   replaceTrigInsts(SinCalls, Sin);
14525f757f3fSDimitry Andric   replaceTrigInsts(CosCalls, Cos);
14535f757f3fSDimitry Andric   replaceTrigInsts(SinCosCalls, SinCos);
14545f757f3fSDimitry Andric 
14555f757f3fSDimitry Andric   // It's safe to delete the original now.
14560b57cec5SDimitry Andric   CI->eraseFromParent();
14570b57cec5SDimitry Andric   return true;
14580b57cec5SDimitry Andric }
14590b57cec5SDimitry Andric 
14605f757f3fSDimitry Andric bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
14615f757f3fSDimitry Andric                                             double &Res1, Constant *copr0,
14625f757f3fSDimitry Andric                                             Constant *copr1) {
14630b57cec5SDimitry Andric   // By default, opr0/opr1/opr3 holds values of float/double type.
14640b57cec5SDimitry Andric   // If they are not float/double, each function has to its
14650b57cec5SDimitry Andric   // operand separately.
14665f757f3fSDimitry Andric   double opr0 = 0.0, opr1 = 0.0;
14670b57cec5SDimitry Andric   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
14680b57cec5SDimitry Andric   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
14690b57cec5SDimitry Andric   if (fpopr0) {
14700b57cec5SDimitry Andric     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14710b57cec5SDimitry Andric              ? fpopr0->getValueAPF().convertToDouble()
14720b57cec5SDimitry Andric              : (double)fpopr0->getValueAPF().convertToFloat();
14730b57cec5SDimitry Andric   }
14740b57cec5SDimitry Andric 
14750b57cec5SDimitry Andric   if (fpopr1) {
14760b57cec5SDimitry Andric     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14770b57cec5SDimitry Andric              ? fpopr1->getValueAPF().convertToDouble()
14780b57cec5SDimitry Andric              : (double)fpopr1->getValueAPF().convertToFloat();
14790b57cec5SDimitry Andric   }
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric   switch (FInfo.getId()) {
14820b57cec5SDimitry Andric   default : return false;
14830b57cec5SDimitry Andric 
14840b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOS:
14850b57cec5SDimitry Andric     Res0 = acos(opr0);
14860b57cec5SDimitry Andric     return true;
14870b57cec5SDimitry Andric 
14880b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSH:
14890b57cec5SDimitry Andric     // acosh(x) == log(x + sqrt(x*x - 1))
14900b57cec5SDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14910b57cec5SDimitry Andric     return true;
14920b57cec5SDimitry Andric 
14930b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:
14940b57cec5SDimitry Andric     Res0 = acos(opr0) / MATH_PI;
14950b57cec5SDimitry Andric     return true;
14960b57cec5SDimitry Andric 
14970b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASIN:
14980b57cec5SDimitry Andric     Res0 = asin(opr0);
14990b57cec5SDimitry Andric     return true;
15000b57cec5SDimitry Andric 
15010b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINH:
15020b57cec5SDimitry Andric     // asinh(x) == log(x + sqrt(x*x + 1))
15030b57cec5SDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
15040b57cec5SDimitry Andric     return true;
15050b57cec5SDimitry Andric 
15060b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINPI:
15070b57cec5SDimitry Andric     Res0 = asin(opr0) / MATH_PI;
15080b57cec5SDimitry Andric     return true;
15090b57cec5SDimitry Andric 
15100b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATAN:
15110b57cec5SDimitry Andric     Res0 = atan(opr0);
15120b57cec5SDimitry Andric     return true;
15130b57cec5SDimitry Andric 
15140b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANH:
15150b57cec5SDimitry Andric     // atanh(x) == (log(x+1) - log(x-1))/2;
15160b57cec5SDimitry Andric     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
15170b57cec5SDimitry Andric     return true;
15180b57cec5SDimitry Andric 
15190b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANPI:
15200b57cec5SDimitry Andric     Res0 = atan(opr0) / MATH_PI;
15210b57cec5SDimitry Andric     return true;
15220b57cec5SDimitry Andric 
15230b57cec5SDimitry Andric   case AMDGPULibFunc::EI_CBRT:
15240b57cec5SDimitry Andric     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
15250b57cec5SDimitry Andric     return true;
15260b57cec5SDimitry Andric 
15270b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:
15280b57cec5SDimitry Andric     Res0 = cos(opr0);
15290b57cec5SDimitry Andric     return true;
15300b57cec5SDimitry Andric 
15310b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSH:
15320b57cec5SDimitry Andric     Res0 = cosh(opr0);
15330b57cec5SDimitry Andric     return true;
15340b57cec5SDimitry Andric 
15350b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSPI:
15360b57cec5SDimitry Andric     Res0 = cos(MATH_PI * opr0);
15370b57cec5SDimitry Andric     return true;
15380b57cec5SDimitry Andric 
15390b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:
15400b57cec5SDimitry Andric     Res0 = exp(opr0);
15410b57cec5SDimitry Andric     return true;
15420b57cec5SDimitry Andric 
15430b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:
15440b57cec5SDimitry Andric     Res0 = pow(2.0, opr0);
15450b57cec5SDimitry Andric     return true;
15460b57cec5SDimitry Andric 
15470b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:
15480b57cec5SDimitry Andric     Res0 = pow(10.0, opr0);
15490b57cec5SDimitry Andric     return true;
15500b57cec5SDimitry Andric 
15510b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:
15520b57cec5SDimitry Andric     Res0 = log(opr0);
15530b57cec5SDimitry Andric     return true;
15540b57cec5SDimitry Andric 
15550b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:
15560b57cec5SDimitry Andric     Res0 = log(opr0) / log(2.0);
15570b57cec5SDimitry Andric     return true;
15580b57cec5SDimitry Andric 
15590b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:
15600b57cec5SDimitry Andric     Res0 = log(opr0) / log(10.0);
15610b57cec5SDimitry Andric     return true;
15620b57cec5SDimitry Andric 
15630b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
15640b57cec5SDimitry Andric     Res0 = 1.0 / sqrt(opr0);
15650b57cec5SDimitry Andric     return true;
15660b57cec5SDimitry Andric 
15670b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:
15680b57cec5SDimitry Andric     Res0 = sin(opr0);
15690b57cec5SDimitry Andric     return true;
15700b57cec5SDimitry Andric 
15710b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINH:
15720b57cec5SDimitry Andric     Res0 = sinh(opr0);
15730b57cec5SDimitry Andric     return true;
15740b57cec5SDimitry Andric 
15750b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINPI:
15760b57cec5SDimitry Andric     Res0 = sin(MATH_PI * opr0);
15770b57cec5SDimitry Andric     return true;
15780b57cec5SDimitry Andric 
15790b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:
15800b57cec5SDimitry Andric     Res0 = tan(opr0);
15810b57cec5SDimitry Andric     return true;
15820b57cec5SDimitry Andric 
15830b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANH:
15840b57cec5SDimitry Andric     Res0 = tanh(opr0);
15850b57cec5SDimitry Andric     return true;
15860b57cec5SDimitry Andric 
15870b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANPI:
15880b57cec5SDimitry Andric     Res0 = tan(MATH_PI * opr0);
15890b57cec5SDimitry Andric     return true;
15900b57cec5SDimitry Andric 
15910b57cec5SDimitry Andric   // two-arg functions
15920b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POW:
15930b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWR:
15940b57cec5SDimitry Andric     Res0 = pow(opr0, opr1);
15950b57cec5SDimitry Andric     return true;
15960b57cec5SDimitry Andric 
15970b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWN: {
15980b57cec5SDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15990b57cec5SDimitry Andric       double val = (double)iopr1->getSExtValue();
16000b57cec5SDimitry Andric       Res0 = pow(opr0, val);
16010b57cec5SDimitry Andric       return true;
16020b57cec5SDimitry Andric     }
16030b57cec5SDimitry Andric     return false;
16040b57cec5SDimitry Andric   }
16050b57cec5SDimitry Andric 
16060b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ROOTN: {
16070b57cec5SDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
16080b57cec5SDimitry Andric       double val = (double)iopr1->getSExtValue();
16090b57cec5SDimitry Andric       Res0 = pow(opr0, 1.0 / val);
16100b57cec5SDimitry Andric       return true;
16110b57cec5SDimitry Andric     }
16120b57cec5SDimitry Andric     return false;
16130b57cec5SDimitry Andric   }
16140b57cec5SDimitry Andric 
16150b57cec5SDimitry Andric   // with ptr arg
16160b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
16170b57cec5SDimitry Andric     Res0 = sin(opr0);
16180b57cec5SDimitry Andric     Res1 = cos(opr0);
16190b57cec5SDimitry Andric     return true;
16200b57cec5SDimitry Andric   }
16210b57cec5SDimitry Andric 
16220b57cec5SDimitry Andric   return false;
16230b57cec5SDimitry Andric }
16240b57cec5SDimitry Andric 
1625349cc55cSDimitry Andric bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1626349cc55cSDimitry Andric   int numArgs = (int)aCI->arg_size();
16270b57cec5SDimitry Andric   if (numArgs > 3)
16280b57cec5SDimitry Andric     return false;
16290b57cec5SDimitry Andric 
16300b57cec5SDimitry Andric   Constant *copr0 = nullptr;
16310b57cec5SDimitry Andric   Constant *copr1 = nullptr;
16320b57cec5SDimitry Andric   if (numArgs > 0) {
16330b57cec5SDimitry Andric     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
16340b57cec5SDimitry Andric       return false;
16350b57cec5SDimitry Andric   }
16360b57cec5SDimitry Andric 
16370b57cec5SDimitry Andric   if (numArgs > 1) {
16380b57cec5SDimitry Andric     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
16390b57cec5SDimitry Andric       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
16400b57cec5SDimitry Andric         return false;
16410b57cec5SDimitry Andric     }
16420b57cec5SDimitry Andric   }
16430b57cec5SDimitry Andric 
16440b57cec5SDimitry Andric   // At this point, all arguments to aCI are constants.
16450b57cec5SDimitry Andric 
16460b57cec5SDimitry Andric   // max vector size is 16, and sincos will generate two results.
16470b57cec5SDimitry Andric   double DVal0[16], DVal1[16];
164881ad6265SDimitry Andric   int FuncVecSize = getVecSize(FInfo);
16490b57cec5SDimitry Andric   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
165081ad6265SDimitry Andric   if (FuncVecSize == 1) {
16515f757f3fSDimitry Andric     if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
16520b57cec5SDimitry Andric       return false;
16530b57cec5SDimitry Andric     }
16540b57cec5SDimitry Andric   } else {
16550b57cec5SDimitry Andric     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
16560b57cec5SDimitry Andric     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
165781ad6265SDimitry Andric     for (int i = 0; i < FuncVecSize; ++i) {
16580b57cec5SDimitry Andric       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16590b57cec5SDimitry Andric       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16605f757f3fSDimitry Andric       if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
16610b57cec5SDimitry Andric         return false;
16620b57cec5SDimitry Andric       }
16630b57cec5SDimitry Andric     }
16640b57cec5SDimitry Andric   }
16650b57cec5SDimitry Andric 
16665f757f3fSDimitry Andric   LLVMContext &context = aCI->getContext();
16670b57cec5SDimitry Andric   Constant *nval0, *nval1;
166881ad6265SDimitry Andric   if (FuncVecSize == 1) {
16695f757f3fSDimitry Andric     nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
16700b57cec5SDimitry Andric     if (hasTwoResults)
16715f757f3fSDimitry Andric       nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
16720b57cec5SDimitry Andric   } else {
16730b57cec5SDimitry Andric     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16740b57cec5SDimitry Andric       SmallVector <float, 0> FVal0, FVal1;
167581ad6265SDimitry Andric       for (int i = 0; i < FuncVecSize; ++i)
16760b57cec5SDimitry Andric         FVal0.push_back((float)DVal0[i]);
16770b57cec5SDimitry Andric       ArrayRef<float> tmp0(FVal0);
16780b57cec5SDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16790b57cec5SDimitry Andric       if (hasTwoResults) {
168081ad6265SDimitry Andric         for (int i = 0; i < FuncVecSize; ++i)
16810b57cec5SDimitry Andric           FVal1.push_back((float)DVal1[i]);
16820b57cec5SDimitry Andric         ArrayRef<float> tmp1(FVal1);
16830b57cec5SDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16840b57cec5SDimitry Andric       }
16850b57cec5SDimitry Andric     } else {
16860b57cec5SDimitry Andric       ArrayRef<double> tmp0(DVal0);
16870b57cec5SDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16880b57cec5SDimitry Andric       if (hasTwoResults) {
16890b57cec5SDimitry Andric         ArrayRef<double> tmp1(DVal1);
16900b57cec5SDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16910b57cec5SDimitry Andric       }
16920b57cec5SDimitry Andric     }
16930b57cec5SDimitry Andric   }
16940b57cec5SDimitry Andric 
16950b57cec5SDimitry Andric   if (hasTwoResults) {
16960b57cec5SDimitry Andric     // sincos
16970b57cec5SDimitry Andric     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16980b57cec5SDimitry Andric            "math function with ptr arg not supported yet");
1699*0fca6ea1SDimitry Andric     new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
17000b57cec5SDimitry Andric   }
17010b57cec5SDimitry Andric 
17025f757f3fSDimitry Andric   replaceCall(aCI, nval0);
17030b57cec5SDimitry Andric   return true;
17040b57cec5SDimitry Andric }
17050b57cec5SDimitry Andric 
1706e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
1707e8d8bef9SDimitry Andric                                                   FunctionAnalysisManager &AM) {
17085f757f3fSDimitry Andric   AMDGPULibCalls Simplifier;
1709e8d8bef9SDimitry Andric   Simplifier.initNativeFuncs();
17105f757f3fSDimitry Andric   Simplifier.initFunction(F, AM);
1711e8d8bef9SDimitry Andric 
1712e8d8bef9SDimitry Andric   bool Changed = false;
1713e8d8bef9SDimitry Andric 
1714e8d8bef9SDimitry Andric   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1715e8d8bef9SDimitry Andric              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1716e8d8bef9SDimitry Andric 
1717e8d8bef9SDimitry Andric   for (auto &BB : F) {
1718e8d8bef9SDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1719e8d8bef9SDimitry Andric       // Ignore non-calls.
1720e8d8bef9SDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
1721e8d8bef9SDimitry Andric       ++I;
1722e8d8bef9SDimitry Andric 
17235f757f3fSDimitry Andric       if (CI) {
17245f757f3fSDimitry Andric         if (Simplifier.fold(CI))
1725e8d8bef9SDimitry Andric           Changed = true;
1726e8d8bef9SDimitry Andric       }
1727e8d8bef9SDimitry Andric     }
17285f757f3fSDimitry Andric   }
1729e8d8bef9SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1730e8d8bef9SDimitry Andric }
1731e8d8bef9SDimitry Andric 
1732e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
1733e8d8bef9SDimitry Andric                                                 FunctionAnalysisManager &AM) {
1734e8d8bef9SDimitry Andric   if (UseNative.empty())
1735e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
1736e8d8bef9SDimitry Andric 
1737e8d8bef9SDimitry Andric   AMDGPULibCalls Simplifier;
1738e8d8bef9SDimitry Andric   Simplifier.initNativeFuncs();
17395f757f3fSDimitry Andric   Simplifier.initFunction(F, AM);
1740e8d8bef9SDimitry Andric 
1741e8d8bef9SDimitry Andric   bool Changed = false;
1742e8d8bef9SDimitry Andric   for (auto &BB : F) {
1743e8d8bef9SDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1744e8d8bef9SDimitry Andric       // Ignore non-calls.
1745e8d8bef9SDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
1746e8d8bef9SDimitry Andric       ++I;
17475f757f3fSDimitry Andric       if (CI && Simplifier.useNative(CI))
1748e8d8bef9SDimitry Andric         Changed = true;
1749e8d8bef9SDimitry Andric     }
1750e8d8bef9SDimitry Andric   }
1751e8d8bef9SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1752e8d8bef9SDimitry Andric }
1753