1 //===- AMDGPULibCalls.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file does AMD library function optimizations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULibFunc.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/Analysis/AssumptionCache.h" 18 #include "llvm/Analysis/TargetLibraryInfo.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/IR/AttributeMask.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/IntrinsicsAMDGPU.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/InitializePasses.h" 27 #include <cmath> 28 29 #define DEBUG_TYPE "amdgpu-simplifylib" 30 31 using namespace llvm; 32 using namespace llvm::PatternMatch; 33 34 static cl::opt<bool> EnablePreLink("amdgpu-prelink", 35 cl::desc("Enable pre-link mode optimizations"), 36 cl::init(false), 37 cl::Hidden); 38 39 static cl::list<std::string> UseNative("amdgpu-use-native", 40 cl::desc("Comma separated list of functions to replace with native, or all"), 41 cl::CommaSeparated, cl::ValueOptional, 42 cl::Hidden); 43 44 #define MATH_PI numbers::pi 45 #define MATH_E numbers::e 46 #define MATH_SQRT2 numbers::sqrt2 47 #define MATH_SQRT1_2 numbers::inv_sqrt2 48 49 namespace llvm { 50 51 class AMDGPULibCalls { 52 private: 53 const TargetLibraryInfo *TLInfo = nullptr; 54 AssumptionCache *AC = nullptr; 55 DominatorTree *DT = nullptr; 56 57 typedef llvm::AMDGPULibFunc FuncInfo; 58 59 bool UnsafeFPMath = false; 60 61 // -fuse-native. 62 bool AllNative = false; 63 64 bool useNativeFunc(const StringRef F) const; 65 66 // Return a pointer (pointer expr) to the function if function definition with 67 // "FuncName" exists. It may create a new function prototype in pre-link mode. 68 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 69 70 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); 71 72 bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 73 74 /* Specialized optimizations */ 75 76 // pow/powr/pown 77 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 78 79 // rootn 80 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 81 82 // -fuse-native for sincos 83 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 84 85 // evaluate calls if calls' arguments are constants. 86 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1, 87 Constant *copr0, Constant *copr1); 88 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); 89 90 // sqrt 91 bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 92 93 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value 94 /// of cos, sincos call). 95 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg, 96 FastMathFlags FMF, 97 IRBuilder<> &B, 98 FunctionCallee Fsincos); 99 100 // sin/cos 101 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 102 103 // __read_pipe/__write_pipe 104 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 105 const FuncInfo &FInfo); 106 107 // Get a scalar native builtin single argument FP function 108 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 109 110 /// Substitute a call to a known libcall with an intrinsic call. If \p 111 /// AllowMinSize is true, allow the replacement in a minsize function. 112 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 113 bool AllowMinSizeF32 = false, 114 bool AllowF64 = false, 115 bool AllowStrictFP = false); 116 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 117 Intrinsic::ID IntrID); 118 119 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 120 Intrinsic::ID IntrID, 121 bool AllowMinSizeF32 = false, 122 bool AllowF64 = false, 123 bool AllowStrictFP = false); 124 125 protected: 126 bool isUnsafeMath(const FPMathOperator *FPOp) const; 127 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const; 128 129 bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const; 130 131 static void replaceCall(Instruction *I, Value *With) { 132 I->replaceAllUsesWith(With); 133 I->eraseFromParent(); 134 } 135 136 static void replaceCall(FPMathOperator *I, Value *With) { 137 replaceCall(cast<Instruction>(I), With); 138 } 139 140 public: 141 AMDGPULibCalls() {} 142 143 bool fold(CallInst *CI); 144 145 void initFunction(Function &F, FunctionAnalysisManager &FAM); 146 void initNativeFuncs(); 147 148 // Replace a normal math function call with that native version 149 bool useNative(CallInst *CI); 150 }; 151 152 } // end llvm namespace 153 154 template <typename IRB> 155 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 156 const Twine &Name = "") { 157 CallInst *R = B.CreateCall(Callee, Arg, Name); 158 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 159 R->setCallingConv(F->getCallingConv()); 160 return R; 161 } 162 163 template <typename IRB> 164 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 165 Value *Arg2, const Twine &Name = "") { 166 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 167 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 168 R->setCallingConv(F->getCallingConv()); 169 return R; 170 } 171 172 static FunctionType *getPownType(FunctionType *FT) { 173 Type *PowNExpTy = Type::getInt32Ty(FT->getContext()); 174 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType())) 175 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount()); 176 177 return FunctionType::get(FT->getReturnType(), 178 {FT->getParamType(0), PowNExpTy}, false); 179 } 180 181 // Data structures for table-driven optimizations. 182 // FuncTbl works for both f32 and f64 functions with 1 input argument 183 184 struct TableEntry { 185 double result; 186 double input; 187 }; 188 189 /* a list of {result, input} */ 190 static const TableEntry tbl_acos[] = { 191 {MATH_PI / 2.0, 0.0}, 192 {MATH_PI / 2.0, -0.0}, 193 {0.0, 1.0}, 194 {MATH_PI, -1.0} 195 }; 196 static const TableEntry tbl_acosh[] = { 197 {0.0, 1.0} 198 }; 199 static const TableEntry tbl_acospi[] = { 200 {0.5, 0.0}, 201 {0.5, -0.0}, 202 {0.0, 1.0}, 203 {1.0, -1.0} 204 }; 205 static const TableEntry tbl_asin[] = { 206 {0.0, 0.0}, 207 {-0.0, -0.0}, 208 {MATH_PI / 2.0, 1.0}, 209 {-MATH_PI / 2.0, -1.0} 210 }; 211 static const TableEntry tbl_asinh[] = { 212 {0.0, 0.0}, 213 {-0.0, -0.0} 214 }; 215 static const TableEntry tbl_asinpi[] = { 216 {0.0, 0.0}, 217 {-0.0, -0.0}, 218 {0.5, 1.0}, 219 {-0.5, -1.0} 220 }; 221 static const TableEntry tbl_atan[] = { 222 {0.0, 0.0}, 223 {-0.0, -0.0}, 224 {MATH_PI / 4.0, 1.0}, 225 {-MATH_PI / 4.0, -1.0} 226 }; 227 static const TableEntry tbl_atanh[] = { 228 {0.0, 0.0}, 229 {-0.0, -0.0} 230 }; 231 static const TableEntry tbl_atanpi[] = { 232 {0.0, 0.0}, 233 {-0.0, -0.0}, 234 {0.25, 1.0}, 235 {-0.25, -1.0} 236 }; 237 static const TableEntry tbl_cbrt[] = { 238 {0.0, 0.0}, 239 {-0.0, -0.0}, 240 {1.0, 1.0}, 241 {-1.0, -1.0}, 242 }; 243 static const TableEntry tbl_cos[] = { 244 {1.0, 0.0}, 245 {1.0, -0.0} 246 }; 247 static const TableEntry tbl_cosh[] = { 248 {1.0, 0.0}, 249 {1.0, -0.0} 250 }; 251 static const TableEntry tbl_cospi[] = { 252 {1.0, 0.0}, 253 {1.0, -0.0} 254 }; 255 static const TableEntry tbl_erfc[] = { 256 {1.0, 0.0}, 257 {1.0, -0.0} 258 }; 259 static const TableEntry tbl_erf[] = { 260 {0.0, 0.0}, 261 {-0.0, -0.0} 262 }; 263 static const TableEntry tbl_exp[] = { 264 {1.0, 0.0}, 265 {1.0, -0.0}, 266 {MATH_E, 1.0} 267 }; 268 static const TableEntry tbl_exp2[] = { 269 {1.0, 0.0}, 270 {1.0, -0.0}, 271 {2.0, 1.0} 272 }; 273 static const TableEntry tbl_exp10[] = { 274 {1.0, 0.0}, 275 {1.0, -0.0}, 276 {10.0, 1.0} 277 }; 278 static const TableEntry tbl_expm1[] = { 279 {0.0, 0.0}, 280 {-0.0, -0.0} 281 }; 282 static const TableEntry tbl_log[] = { 283 {0.0, 1.0}, 284 {1.0, MATH_E} 285 }; 286 static const TableEntry tbl_log2[] = { 287 {0.0, 1.0}, 288 {1.0, 2.0} 289 }; 290 static const TableEntry tbl_log10[] = { 291 {0.0, 1.0}, 292 {1.0, 10.0} 293 }; 294 static const TableEntry tbl_rsqrt[] = { 295 {1.0, 1.0}, 296 {MATH_SQRT1_2, 2.0} 297 }; 298 static const TableEntry tbl_sin[] = { 299 {0.0, 0.0}, 300 {-0.0, -0.0} 301 }; 302 static const TableEntry tbl_sinh[] = { 303 {0.0, 0.0}, 304 {-0.0, -0.0} 305 }; 306 static const TableEntry tbl_sinpi[] = { 307 {0.0, 0.0}, 308 {-0.0, -0.0} 309 }; 310 static const TableEntry tbl_sqrt[] = { 311 {0.0, 0.0}, 312 {1.0, 1.0}, 313 {MATH_SQRT2, 2.0} 314 }; 315 static const TableEntry tbl_tan[] = { 316 {0.0, 0.0}, 317 {-0.0, -0.0} 318 }; 319 static const TableEntry tbl_tanh[] = { 320 {0.0, 0.0}, 321 {-0.0, -0.0} 322 }; 323 static const TableEntry tbl_tanpi[] = { 324 {0.0, 0.0}, 325 {-0.0, -0.0} 326 }; 327 static const TableEntry tbl_tgamma[] = { 328 {1.0, 1.0}, 329 {1.0, 2.0}, 330 {2.0, 3.0}, 331 {6.0, 4.0} 332 }; 333 334 static bool HasNative(AMDGPULibFunc::EFuncId id) { 335 switch(id) { 336 case AMDGPULibFunc::EI_DIVIDE: 337 case AMDGPULibFunc::EI_COS: 338 case AMDGPULibFunc::EI_EXP: 339 case AMDGPULibFunc::EI_EXP2: 340 case AMDGPULibFunc::EI_EXP10: 341 case AMDGPULibFunc::EI_LOG: 342 case AMDGPULibFunc::EI_LOG2: 343 case AMDGPULibFunc::EI_LOG10: 344 case AMDGPULibFunc::EI_POWR: 345 case AMDGPULibFunc::EI_RECIP: 346 case AMDGPULibFunc::EI_RSQRT: 347 case AMDGPULibFunc::EI_SIN: 348 case AMDGPULibFunc::EI_SINCOS: 349 case AMDGPULibFunc::EI_SQRT: 350 case AMDGPULibFunc::EI_TAN: 351 return true; 352 default:; 353 } 354 return false; 355 } 356 357 using TableRef = ArrayRef<TableEntry>; 358 359 static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 360 switch(id) { 361 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 362 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 363 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 364 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 365 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 366 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 367 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 368 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 369 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 370 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 371 case AMDGPULibFunc::EI_NCOS: 372 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 373 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 374 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 375 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 376 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 377 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 378 case AMDGPULibFunc::EI_NEXP2: 379 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 380 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 381 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 382 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 383 case AMDGPULibFunc::EI_NLOG2: 384 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 385 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 386 case AMDGPULibFunc::EI_NRSQRT: 387 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 388 case AMDGPULibFunc::EI_NSIN: 389 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 390 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 391 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 392 case AMDGPULibFunc::EI_NSQRT: 393 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 394 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 395 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 396 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 397 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 398 default:; 399 } 400 return TableRef(); 401 } 402 403 static inline int getVecSize(const AMDGPULibFunc& FInfo) { 404 return FInfo.getLeads()[0].VectorSize; 405 } 406 407 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 408 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 409 } 410 411 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 412 // If we are doing PreLinkOpt, the function is external. So it is safe to 413 // use getOrInsertFunction() at this stage. 414 415 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 416 : AMDGPULibFunc::getFunction(M, fInfo); 417 } 418 419 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, 420 FuncInfo &FInfo) { 421 return AMDGPULibFunc::parse(FMangledName, FInfo); 422 } 423 424 bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const { 425 return UnsafeFPMath || FPOp->isFast(); 426 } 427 428 bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const { 429 return UnsafeFPMath || 430 (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs()); 431 } 432 433 bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold( 434 const FPMathOperator *FPOp) const { 435 // TODO: Refine to approxFunc or contract 436 return isUnsafeMath(FPOp); 437 } 438 439 void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) { 440 UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool(); 441 AC = &FAM.getResult<AssumptionAnalysis>(F); 442 TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F); 443 DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 444 } 445 446 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 447 return AllNative || llvm::is_contained(UseNative, F); 448 } 449 450 void AMDGPULibCalls::initNativeFuncs() { 451 AllNative = useNativeFunc("all") || 452 (UseNative.getNumOccurrences() && UseNative.size() == 1 && 453 UseNative.begin()->empty()); 454 } 455 456 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 457 bool native_sin = useNativeFunc("sin"); 458 bool native_cos = useNativeFunc("cos"); 459 460 if (native_sin && native_cos) { 461 Module *M = aCI->getModule(); 462 Value *opr0 = aCI->getArgOperand(0); 463 464 AMDGPULibFunc nf; 465 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 466 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 467 468 nf.setPrefix(AMDGPULibFunc::NATIVE); 469 nf.setId(AMDGPULibFunc::EI_SIN); 470 FunctionCallee sinExpr = getFunction(M, nf); 471 472 nf.setPrefix(AMDGPULibFunc::NATIVE); 473 nf.setId(AMDGPULibFunc::EI_COS); 474 FunctionCallee cosExpr = getFunction(M, nf); 475 if (sinExpr && cosExpr) { 476 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); 477 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); 478 new StoreInst(cosval, aCI->getArgOperand(1), aCI); 479 480 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 481 << " with native version of sin/cos"); 482 483 replaceCall(aCI, sinval); 484 return true; 485 } 486 } 487 return false; 488 } 489 490 bool AMDGPULibCalls::useNative(CallInst *aCI) { 491 Function *Callee = aCI->getCalledFunction(); 492 if (!Callee || aCI->isNoBuiltin()) 493 return false; 494 495 FuncInfo FInfo; 496 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || 497 FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 498 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 499 !(AllNative || useNativeFunc(FInfo.getName()))) { 500 return false; 501 } 502 503 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 504 return sincosUseNative(aCI, FInfo); 505 506 FInfo.setPrefix(AMDGPULibFunc::NATIVE); 507 FunctionCallee F = getFunction(aCI->getModule(), FInfo); 508 if (!F) 509 return false; 510 511 aCI->setCalledFunction(F); 512 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 513 << " with native version"); 514 return true; 515 } 516 517 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 518 // builtin, with appended type size and alignment arguments, where 2 or 4 519 // indicates the original number of arguments. The library has optimized version 520 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 521 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 522 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 523 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 524 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 525 const FuncInfo &FInfo) { 526 auto *Callee = CI->getCalledFunction(); 527 if (!Callee->isDeclaration()) 528 return false; 529 530 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 531 auto *M = Callee->getParent(); 532 std::string Name = std::string(Callee->getName()); 533 auto NumArg = CI->arg_size(); 534 if (NumArg != 4 && NumArg != 6) 535 return false; 536 ConstantInt *PacketSize = 537 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2)); 538 ConstantInt *PacketAlign = 539 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1)); 540 if (!PacketSize || !PacketAlign) 541 return false; 542 543 unsigned Size = PacketSize->getZExtValue(); 544 Align Alignment = PacketAlign->getAlignValue(); 545 if (Alignment != Size) 546 return false; 547 548 unsigned PtrArgLoc = CI->arg_size() - 3; 549 Value *PtrArg = CI->getArgOperand(PtrArgLoc); 550 Type *PtrTy = PtrArg->getType(); 551 552 SmallVector<llvm::Type *, 6> ArgTys; 553 for (unsigned I = 0; I != PtrArgLoc; ++I) 554 ArgTys.push_back(CI->getArgOperand(I)->getType()); 555 ArgTys.push_back(PtrTy); 556 557 Name = Name + "_" + std::to_string(Size); 558 auto *FTy = FunctionType::get(Callee->getReturnType(), 559 ArrayRef<Type *>(ArgTys), false); 560 AMDGPULibFunc NewLibFunc(Name, FTy); 561 FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 562 if (!F) 563 return false; 564 565 auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); 566 SmallVector<Value *, 6> Args; 567 for (unsigned I = 0; I != PtrArgLoc; ++I) 568 Args.push_back(CI->getArgOperand(I)); 569 Args.push_back(BCast); 570 571 auto *NCI = B.CreateCall(F, Args); 572 NCI->setAttributes(CI->getAttributes()); 573 CI->replaceAllUsesWith(NCI); 574 CI->dropAllReferences(); 575 CI->eraseFromParent(); 576 577 return true; 578 } 579 580 static bool isKnownIntegral(const Value *V, const DataLayout &DL, 581 FastMathFlags FMF) { 582 if (isa<UndefValue>(V)) 583 return true; 584 585 if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) 586 return CF->getValueAPF().isInteger(); 587 588 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 589 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 590 Constant *ConstElt = CDV->getElementAsConstant(i); 591 if (isa<UndefValue>(ConstElt)) 592 continue; 593 const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt); 594 if (!CFP || !CFP->getValue().isInteger()) 595 return false; 596 } 597 598 return true; 599 } 600 601 const Instruction *I = dyn_cast<Instruction>(V); 602 if (!I) 603 return false; 604 605 switch (I->getOpcode()) { 606 case Instruction::SIToFP: 607 case Instruction::UIToFP: 608 // TODO: Could check nofpclass(inf) on incoming argument 609 if (FMF.noInfs()) 610 return true; 611 612 // Need to check int size cannot produce infinity, which computeKnownFPClass 613 // knows how to do already. 614 return isKnownNeverInfinity(I, DL); 615 default: 616 break; 617 } 618 619 return false; 620 } 621 622 // This function returns false if no change; return true otherwise. 623 bool AMDGPULibCalls::fold(CallInst *CI) { 624 Function *Callee = CI->getCalledFunction(); 625 // Ignore indirect calls. 626 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin()) 627 return false; 628 629 FuncInfo FInfo; 630 if (!parseFunctionName(Callee->getName(), FInfo)) 631 return false; 632 633 // Further check the number of arguments to see if they match. 634 // TODO: Check calling convention matches too 635 if (!FInfo.isCompatibleSignature(CI->getFunctionType())) 636 return false; 637 638 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n'); 639 640 if (TDOFold(CI, FInfo)) 641 return true; 642 643 IRBuilder<> B(CI); 644 645 if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) { 646 // Under unsafe-math, evaluate calls if possible. 647 // According to Brian Sumner, we can do this for all f32 function calls 648 // using host's double function calls. 649 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo)) 650 return true; 651 652 // Copy fast flags from the original call. 653 FastMathFlags FMF = FPOp->getFastMathFlags(); 654 B.setFastMathFlags(FMF); 655 656 // Specialized optimizations for each function call. 657 // 658 // TODO: Handle other simple intrinsic wrappers. Sqrt. 659 // 660 // TODO: Handle native functions 661 switch (FInfo.getId()) { 662 case AMDGPULibFunc::EI_EXP: 663 if (FMF.none()) 664 return false; 665 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp, 666 FMF.approxFunc()); 667 case AMDGPULibFunc::EI_EXP2: 668 if (FMF.none()) 669 return false; 670 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2, 671 FMF.approxFunc()); 672 case AMDGPULibFunc::EI_LOG: 673 if (FMF.none()) 674 return false; 675 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log, 676 FMF.approxFunc()); 677 case AMDGPULibFunc::EI_LOG2: 678 if (FMF.none()) 679 return false; 680 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2, 681 FMF.approxFunc()); 682 case AMDGPULibFunc::EI_LOG10: 683 if (FMF.none()) 684 return false; 685 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10, 686 FMF.approxFunc()); 687 case AMDGPULibFunc::EI_FMIN: 688 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum, 689 true, true); 690 case AMDGPULibFunc::EI_FMAX: 691 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum, 692 true, true); 693 case AMDGPULibFunc::EI_FMA: 694 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true, 695 true); 696 case AMDGPULibFunc::EI_MAD: 697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd, 698 true, true); 699 case AMDGPULibFunc::EI_FABS: 700 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true, 701 true, true); 702 case AMDGPULibFunc::EI_COPYSIGN: 703 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign, 704 true, true, true); 705 case AMDGPULibFunc::EI_FLOOR: 706 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true, 707 true); 708 case AMDGPULibFunc::EI_CEIL: 709 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true, 710 true); 711 case AMDGPULibFunc::EI_TRUNC: 712 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true, 713 true); 714 case AMDGPULibFunc::EI_RINT: 715 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true, 716 true); 717 case AMDGPULibFunc::EI_ROUND: 718 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true, 719 true); 720 case AMDGPULibFunc::EI_LDEXP: { 721 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true)) 722 return false; 723 724 Value *Arg1 = CI->getArgOperand(1); 725 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType()); 726 VecTy && !isa<VectorType>(Arg1->getType())) { 727 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1); 728 CI->setArgOperand(1, SplatArg1); 729 } 730 731 CI->setCalledFunction(Intrinsic::getDeclaration( 732 CI->getModule(), Intrinsic::ldexp, 733 {CI->getType(), CI->getArgOperand(1)->getType()})); 734 return true; 735 } 736 case AMDGPULibFunc::EI_POW: { 737 Module *M = Callee->getParent(); 738 AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo); 739 FunctionCallee PowrFunc = getFunction(M, PowrInfo); 740 CallInst *Call = cast<CallInst>(FPOp); 741 742 // pow(x, y) -> powr(x, y) for x >= -0.0 743 // TODO: Account for flags on current call 744 if (PowrFunc && 745 cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(), 746 TLInfo, 0, AC, Call, DT)) { 747 Call->setCalledFunction(PowrFunc); 748 return fold_pow(FPOp, B, PowrInfo) || true; 749 } 750 751 // pow(x, y) -> pown(x, y) for known integral y 752 if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(), 753 FPOp->getFastMathFlags())) { 754 FunctionType *PownType = getPownType(CI->getFunctionType()); 755 AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true); 756 FunctionCallee PownFunc = getFunction(M, PownInfo); 757 if (PownFunc) { 758 // TODO: If the incoming integral value is an sitofp/uitofp, it won't 759 // fold out without a known range. We can probably take the source 760 // value directly. 761 Value *CastedArg = 762 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1)); 763 // Have to drop any nofpclass attributes on the original call site. 764 Call->removeParamAttrs( 765 1, AttributeFuncs::typeIncompatible(CastedArg->getType())); 766 Call->setCalledFunction(PownFunc); 767 Call->setArgOperand(1, CastedArg); 768 return fold_pow(FPOp, B, PownInfo) || true; 769 } 770 } 771 772 return fold_pow(FPOp, B, FInfo); 773 } 774 case AMDGPULibFunc::EI_POWR: 775 case AMDGPULibFunc::EI_POWN: 776 return fold_pow(FPOp, B, FInfo); 777 case AMDGPULibFunc::EI_ROOTN: 778 return fold_rootn(FPOp, B, FInfo); 779 case AMDGPULibFunc::EI_SQRT: 780 return fold_sqrt(FPOp, B, FInfo); 781 case AMDGPULibFunc::EI_COS: 782 case AMDGPULibFunc::EI_SIN: 783 return fold_sincos(FPOp, B, FInfo); 784 default: 785 break; 786 } 787 } else { 788 // Specialized optimizations for each function call 789 switch (FInfo.getId()) { 790 case AMDGPULibFunc::EI_READ_PIPE_2: 791 case AMDGPULibFunc::EI_READ_PIPE_4: 792 case AMDGPULibFunc::EI_WRITE_PIPE_2: 793 case AMDGPULibFunc::EI_WRITE_PIPE_4: 794 return fold_read_write_pipe(CI, B, FInfo); 795 default: 796 break; 797 } 798 } 799 800 return false; 801 } 802 803 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 804 // Table-Driven optimization 805 const TableRef tr = getOptTable(FInfo.getId()); 806 if (tr.empty()) 807 return false; 808 809 int const sz = (int)tr.size(); 810 Value *opr0 = CI->getArgOperand(0); 811 812 if (getVecSize(FInfo) > 1) { 813 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 814 SmallVector<double, 0> DVal; 815 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 816 ConstantFP *eltval = dyn_cast<ConstantFP>( 817 CV->getElementAsConstant((unsigned)eltNo)); 818 assert(eltval && "Non-FP arguments in math function!"); 819 bool found = false; 820 for (int i=0; i < sz; ++i) { 821 if (eltval->isExactlyValue(tr[i].input)) { 822 DVal.push_back(tr[i].result); 823 found = true; 824 break; 825 } 826 } 827 if (!found) { 828 // This vector constants not handled yet. 829 return false; 830 } 831 } 832 LLVMContext &context = CI->getParent()->getParent()->getContext(); 833 Constant *nval; 834 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 835 SmallVector<float, 0> FVal; 836 for (unsigned i = 0; i < DVal.size(); ++i) { 837 FVal.push_back((float)DVal[i]); 838 } 839 ArrayRef<float> tmp(FVal); 840 nval = ConstantDataVector::get(context, tmp); 841 } else { // F64 842 ArrayRef<double> tmp(DVal); 843 nval = ConstantDataVector::get(context, tmp); 844 } 845 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 846 replaceCall(CI, nval); 847 return true; 848 } 849 } else { 850 // Scalar version 851 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 852 for (int i = 0; i < sz; ++i) { 853 if (CF->isExactlyValue(tr[i].input)) { 854 Value *nval = ConstantFP::get(CF->getType(), tr[i].result); 855 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 856 replaceCall(CI, nval); 857 return true; 858 } 859 } 860 } 861 } 862 863 return false; 864 } 865 866 namespace llvm { 867 static double log2(double V) { 868 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 869 return ::log2(V); 870 #else 871 return log(V) / numbers::ln2; 872 #endif 873 } 874 } 875 876 bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, 877 const FuncInfo &FInfo) { 878 assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 879 FInfo.getId() == AMDGPULibFunc::EI_POWR || 880 FInfo.getId() == AMDGPULibFunc::EI_POWN) && 881 "fold_pow: encounter a wrong function call"); 882 883 Module *M = B.GetInsertBlock()->getModule(); 884 Type *eltType = FPOp->getType()->getScalarType(); 885 Value *opr0 = FPOp->getOperand(0); 886 Value *opr1 = FPOp->getOperand(1); 887 888 const APFloat *CF = nullptr; 889 const APInt *CINT = nullptr; 890 if (!match(opr1, m_APFloatAllowUndef(CF))) 891 match(opr1, m_APIntAllowUndef(CINT)); 892 893 // 0x1111111 means that we don't do anything for this call. 894 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 895 896 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) { 897 // pow/powr/pown(x, 0) == 1 898 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n"); 899 Constant *cnval = ConstantFP::get(eltType, 1.0); 900 if (getVecSize(FInfo) > 1) { 901 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 902 } 903 replaceCall(FPOp, cnval); 904 return true; 905 } 906 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 907 // pow/powr/pown(x, 1.0) = x 908 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 909 replaceCall(FPOp, opr0); 910 return true; 911 } 912 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 913 // pow/powr/pown(x, 2.0) = x*x 914 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * " 915 << *opr0 << "\n"); 916 Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 917 replaceCall(FPOp, nval); 918 return true; 919 } 920 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 921 // pow/powr/pown(x, -1.0) = 1.0/x 922 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n"); 923 Constant *cnval = ConstantFP::get(eltType, 1.0); 924 if (getVecSize(FInfo) > 1) { 925 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 926 } 927 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 928 replaceCall(FPOp, nval); 929 return true; 930 } 931 932 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 933 // pow[r](x, [-]0.5) = sqrt(x) 934 bool issqrt = CF->isExactlyValue(0.5); 935 if (FunctionCallee FPExpr = 936 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 937 : AMDGPULibFunc::EI_RSQRT, 938 FInfo))) { 939 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName() 940 << '(' << *opr0 << ")\n"); 941 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 942 : "__pow2rsqrt"); 943 replaceCall(FPOp, nval); 944 return true; 945 } 946 } 947 948 if (!isUnsafeFiniteOnlyMath(FPOp)) 949 return false; 950 951 // Unsafe Math optimization 952 953 // Remember that ci_opr1 is set if opr1 is integral 954 if (CF) { 955 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 956 ? (double)CF->convertToFloat() 957 : CF->convertToDouble(); 958 int ival = (int)dval; 959 if ((double)ival == dval) { 960 ci_opr1 = ival; 961 } else 962 ci_opr1 = 0x11111111; 963 } 964 965 // pow/powr/pown(x, c) = [1/](x*x*..x); where 966 // trunc(c) == c && the number of x == c && |c| <= 12 967 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 968 if (abs_opr1 <= 12) { 969 Constant *cnval; 970 Value *nval; 971 if (abs_opr1 == 0) { 972 cnval = ConstantFP::get(eltType, 1.0); 973 if (getVecSize(FInfo) > 1) { 974 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 975 } 976 nval = cnval; 977 } else { 978 Value *valx2 = nullptr; 979 nval = nullptr; 980 while (abs_opr1 > 0) { 981 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 982 if (abs_opr1 & 1) { 983 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 984 } 985 abs_opr1 >>= 1; 986 } 987 } 988 989 if (ci_opr1 < 0) { 990 cnval = ConstantFP::get(eltType, 1.0); 991 if (getVecSize(FInfo) > 1) { 992 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 993 } 994 nval = B.CreateFDiv(cnval, nval, "__1powprod"); 995 } 996 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 997 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 998 << ")\n"); 999 replaceCall(FPOp, nval); 1000 return true; 1001 } 1002 1003 // If we should use the generic intrinsic instead of emitting a libcall 1004 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy(); 1005 1006 // powr ---> exp2(y * log2(x)) 1007 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 1008 FunctionCallee ExpExpr; 1009 if (ShouldUseIntrinsic) 1010 ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()}); 1011 else { 1012 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 1013 if (!ExpExpr) 1014 return false; 1015 } 1016 1017 bool needlog = false; 1018 bool needabs = false; 1019 bool needcopysign = false; 1020 Constant *cnval = nullptr; 1021 if (getVecSize(FInfo) == 1) { 1022 CF = nullptr; 1023 match(opr0, m_APFloatAllowUndef(CF)); 1024 1025 if (CF) { 1026 double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 1027 ? (double)CF->convertToFloat() 1028 : CF->convertToDouble(); 1029 1030 V = log2(std::abs(V)); 1031 cnval = ConstantFP::get(eltType, V); 1032 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 1033 CF->isNegative(); 1034 } else { 1035 needlog = true; 1036 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && 1037 (!CF || CF->isNegative()); 1038 } 1039 } else { 1040 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 1041 1042 if (!CDV) { 1043 needlog = true; 1044 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 1045 } else { 1046 assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 1047 "Wrong vector size detected"); 1048 1049 SmallVector<double, 0> DVal; 1050 for (int i=0; i < getVecSize(FInfo); ++i) { 1051 double V = CDV->getElementAsAPFloat(i).convertToDouble(); 1052 if (V < 0.0) needcopysign = true; 1053 V = log2(std::abs(V)); 1054 DVal.push_back(V); 1055 } 1056 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1057 SmallVector<float, 0> FVal; 1058 for (unsigned i=0; i < DVal.size(); ++i) { 1059 FVal.push_back((float)DVal[i]); 1060 } 1061 ArrayRef<float> tmp(FVal); 1062 cnval = ConstantDataVector::get(M->getContext(), tmp); 1063 } else { 1064 ArrayRef<double> tmp(DVal); 1065 cnval = ConstantDataVector::get(M->getContext(), tmp); 1066 } 1067 } 1068 } 1069 1070 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 1071 // We cannot handle corner cases for a general pow() function, give up 1072 // unless y is a constant integral value. Then proceed as if it were pown. 1073 if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags())) 1074 return false; 1075 } 1076 1077 Value *nval; 1078 if (needabs) { 1079 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs"); 1080 } else { 1081 nval = cnval ? cnval : opr0; 1082 } 1083 if (needlog) { 1084 FunctionCallee LogExpr; 1085 if (ShouldUseIntrinsic) { 1086 LogExpr = 1087 Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()}); 1088 } else { 1089 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 1090 if (!LogExpr) 1091 return false; 1092 } 1093 1094 nval = CreateCallEx(B,LogExpr, nval, "__log2"); 1095 } 1096 1097 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 1098 // convert int(32) to fp(f32 or f64) 1099 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 1100 } 1101 nval = B.CreateFMul(opr1, nval, "__ylogx"); 1102 nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 1103 1104 if (needcopysign) { 1105 Value *opr_n; 1106 Type* rTy = opr0->getType(); 1107 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits()); 1108 Type *nTy = nTyS; 1109 if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) 1110 nTy = FixedVectorType::get(nTyS, vTy); 1111 unsigned size = nTy->getScalarSizeInBits(); 1112 opr_n = FPOp->getOperand(1); 1113 if (opr_n->getType()->isIntegerTy()) 1114 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou"); 1115 else 1116 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 1117 1118 Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 1119 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 1120 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 1121 nval = B.CreateBitCast(nval, opr0->getType()); 1122 } 1123 1124 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1125 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 1126 replaceCall(FPOp, nval); 1127 1128 return true; 1129 } 1130 1131 bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, 1132 const FuncInfo &FInfo) { 1133 // skip vector function 1134 if (getVecSize(FInfo) != 1) 1135 return false; 1136 1137 Value *opr0 = FPOp->getOperand(0); 1138 Value *opr1 = FPOp->getOperand(1); 1139 1140 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); 1141 if (!CINT) { 1142 return false; 1143 } 1144 int ci_opr1 = (int)CINT->getSExtValue(); 1145 if (ci_opr1 == 1) { // rootn(x, 1) = x 1146 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 1147 replaceCall(FPOp, opr0); 1148 return true; 1149 } 1150 1151 Module *M = B.GetInsertBlock()->getModule(); 1152 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) 1153 if (FunctionCallee FPExpr = 1154 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1155 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 1156 << ")\n"); 1157 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); 1158 replaceCall(FPOp, nval); 1159 return true; 1160 } 1161 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 1162 if (FunctionCallee FPExpr = 1163 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 1164 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0 1165 << ")\n"); 1166 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 1167 replaceCall(FPOp, nval); 1168 return true; 1169 } 1170 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 1171 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n"); 1172 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 1173 opr0, 1174 "__rootn2div"); 1175 replaceCall(FPOp, nval); 1176 return true; 1177 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) 1178 if (FunctionCallee FPExpr = 1179 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { 1180 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0 1181 << ")\n"); 1182 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); 1183 replaceCall(FPOp, nval); 1184 return true; 1185 } 1186 } 1187 return false; 1188 } 1189 1190 // Get a scalar native builtin single argument FP function 1191 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 1192 const FuncInfo &FInfo) { 1193 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 1194 return nullptr; 1195 FuncInfo nf = FInfo; 1196 nf.setPrefix(AMDGPULibFunc::NATIVE); 1197 return getFunction(M, nf); 1198 } 1199 1200 // Some library calls are just wrappers around llvm intrinsics, but compiled 1201 // conservatively. Preserve the flags from the original call site by 1202 // substituting them with direct calls with all the flags. 1203 bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 1204 bool AllowMinSizeF32, 1205 bool AllowF64, 1206 bool AllowStrictFP) { 1207 Type *FltTy = CI->getType()->getScalarType(); 1208 const bool IsF32 = FltTy->isFloatTy(); 1209 1210 // f64 intrinsics aren't implemented for most operations. 1211 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy())) 1212 return false; 1213 1214 // We're implicitly inlining by replacing the libcall with the intrinsic, so 1215 // don't do it for noinline call sites. 1216 if (CI->isNoInline()) 1217 return false; 1218 1219 const Function *ParentF = CI->getFunction(); 1220 // TODO: Handle strictfp 1221 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP)) 1222 return false; 1223 1224 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize()) 1225 return false; 1226 return true; 1227 } 1228 1229 void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, 1230 CallInst *CI, 1231 Intrinsic::ID IntrID) { 1232 if (CI->arg_size() == 2) { 1233 Value *Arg0 = CI->getArgOperand(0); 1234 Value *Arg1 = CI->getArgOperand(1); 1235 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType()); 1236 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType()); 1237 if (Arg0VecTy && !Arg1VecTy) { 1238 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1); 1239 CI->setArgOperand(1, SplatRHS); 1240 } else if (!Arg0VecTy && Arg1VecTy) { 1241 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0); 1242 CI->setArgOperand(0, SplatLHS); 1243 } 1244 } 1245 1246 CI->setCalledFunction( 1247 Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()})); 1248 } 1249 1250 bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( 1251 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32, 1252 bool AllowF64, bool AllowStrictFP) { 1253 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64, 1254 AllowStrictFP)) 1255 return false; 1256 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID); 1257 return true; 1258 } 1259 1260 // fold sqrt -> native_sqrt (x) 1261 bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, 1262 const FuncInfo &FInfo) { 1263 if (!isUnsafeMath(FPOp)) 1264 return false; 1265 1266 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && 1267 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { 1268 Module *M = B.GetInsertBlock()->getModule(); 1269 1270 if (FunctionCallee FPExpr = getNativeFunction( 1271 M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1272 Value *opr0 = FPOp->getOperand(0); 1273 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1274 << "sqrt(" << *opr0 << ")\n"); 1275 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); 1276 replaceCall(FPOp, nval); 1277 return true; 1278 } 1279 } 1280 return false; 1281 } 1282 1283 std::tuple<Value *, Value *, Value *> 1284 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B, 1285 FunctionCallee Fsincos) { 1286 DebugLoc DL = B.getCurrentDebugLocation(); 1287 Function *F = B.GetInsertBlock()->getParent(); 1288 B.SetInsertPointPastAllocas(F); 1289 1290 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_"); 1291 1292 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { 1293 // If the argument is an instruction, it must dominate all uses so put our 1294 // sincos call there. Otherwise, right after the allocas works well enough 1295 // if it's an argument or constant. 1296 1297 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); 1298 1299 // SetInsertPoint unwelcomely always tries to set the debug loc. 1300 B.SetCurrentDebugLocation(DL); 1301 } 1302 1303 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1); 1304 1305 // The allocaInst allocates the memory in private address space. This need 1306 // to be addrspacecasted to point to the address space of cos pointer type. 1307 // In OpenCL 2.0 this is generic, while in 1.2 that is private. 1308 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy); 1309 1310 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc); 1311 1312 // TODO: Is it worth trying to preserve the location for the cos calls for the 1313 // load? 1314 1315 LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 1316 return {SinCos, LoadCos, SinCos}; 1317 } 1318 1319 // fold sin, cos -> sincos. 1320 bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, 1321 const FuncInfo &fInfo) { 1322 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 1323 fInfo.getId() == AMDGPULibFunc::EI_COS); 1324 1325 if ((getArgType(fInfo) != AMDGPULibFunc::F32 && 1326 getArgType(fInfo) != AMDGPULibFunc::F64) || 1327 fInfo.getPrefix() != AMDGPULibFunc::NOPFX) 1328 return false; 1329 1330 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 1331 1332 Value *CArgVal = FPOp->getOperand(0); 1333 CallInst *CI = cast<CallInst>(FPOp); 1334 1335 Function *F = B.GetInsertBlock()->getParent(); 1336 Module *M = F->getParent(); 1337 1338 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer 1339 // implementation. Prefer the private form if available. 1340 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo); 1341 SinCosLibFuncPrivate.getLeads()[0].PtrKind = 1342 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS); 1343 1344 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo); 1345 SinCosLibFuncGeneric.getLeads()[0].PtrKind = 1346 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 1347 1348 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate); 1349 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric); 1350 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric; 1351 if (!FSinCos) 1352 return false; 1353 1354 SmallVector<CallInst *> SinCalls; 1355 SmallVector<CallInst *> CosCalls; 1356 SmallVector<CallInst *> SinCosCalls; 1357 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, 1358 fInfo); 1359 const std::string PairName = PartnerInfo.mangle(); 1360 1361 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName; 1362 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName(); 1363 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle(); 1364 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle(); 1365 1366 // Intersect the two sets of flags. 1367 FastMathFlags FMF = FPOp->getFastMathFlags(); 1368 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath); 1369 1370 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()}; 1371 1372 for (User* U : CArgVal->users()) { 1373 CallInst *XI = dyn_cast<CallInst>(U); 1374 if (!XI || XI->getFunction() != F || XI->isNoBuiltin()) 1375 continue; 1376 1377 Function *UCallee = XI->getCalledFunction(); 1378 if (!UCallee) 1379 continue; 1380 1381 bool Handled = true; 1382 1383 if (UCallee->getName() == SinName) 1384 SinCalls.push_back(XI); 1385 else if (UCallee->getName() == CosName) 1386 CosCalls.push_back(XI); 1387 else if (UCallee->getName() == SinCosPrivateName || 1388 UCallee->getName() == SinCosGenericName) 1389 SinCosCalls.push_back(XI); 1390 else 1391 Handled = false; 1392 1393 if (Handled) { 1394 MergeDbgLocs.push_back(XI->getDebugLoc()); 1395 auto *OtherOp = cast<FPMathOperator>(XI); 1396 FMF &= OtherOp->getFastMathFlags(); 1397 FPMath = MDNode::getMostGenericFPMath( 1398 FPMath, XI->getMetadata(LLVMContext::MD_fpmath)); 1399 } 1400 } 1401 1402 if (SinCalls.empty() || CosCalls.empty()) 1403 return false; 1404 1405 B.setFastMathFlags(FMF); 1406 B.setDefaultFPMathTag(FPMath); 1407 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs); 1408 B.SetCurrentDebugLocation(DbgLoc); 1409 1410 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos); 1411 1412 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) { 1413 for (CallInst *C : Calls) 1414 C->replaceAllUsesWith(Res); 1415 1416 // Leave the other dead instructions to avoid clobbering iterators. 1417 }; 1418 1419 replaceTrigInsts(SinCalls, Sin); 1420 replaceTrigInsts(CosCalls, Cos); 1421 replaceTrigInsts(SinCosCalls, SinCos); 1422 1423 // It's safe to delete the original now. 1424 CI->eraseFromParent(); 1425 return true; 1426 } 1427 1428 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, 1429 double &Res1, Constant *copr0, 1430 Constant *copr1) { 1431 // By default, opr0/opr1/opr3 holds values of float/double type. 1432 // If they are not float/double, each function has to its 1433 // operand separately. 1434 double opr0 = 0.0, opr1 = 0.0; 1435 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 1436 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 1437 if (fpopr0) { 1438 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1439 ? fpopr0->getValueAPF().convertToDouble() 1440 : (double)fpopr0->getValueAPF().convertToFloat(); 1441 } 1442 1443 if (fpopr1) { 1444 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1445 ? fpopr1->getValueAPF().convertToDouble() 1446 : (double)fpopr1->getValueAPF().convertToFloat(); 1447 } 1448 1449 switch (FInfo.getId()) { 1450 default : return false; 1451 1452 case AMDGPULibFunc::EI_ACOS: 1453 Res0 = acos(opr0); 1454 return true; 1455 1456 case AMDGPULibFunc::EI_ACOSH: 1457 // acosh(x) == log(x + sqrt(x*x - 1)) 1458 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 1459 return true; 1460 1461 case AMDGPULibFunc::EI_ACOSPI: 1462 Res0 = acos(opr0) / MATH_PI; 1463 return true; 1464 1465 case AMDGPULibFunc::EI_ASIN: 1466 Res0 = asin(opr0); 1467 return true; 1468 1469 case AMDGPULibFunc::EI_ASINH: 1470 // asinh(x) == log(x + sqrt(x*x + 1)) 1471 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 1472 return true; 1473 1474 case AMDGPULibFunc::EI_ASINPI: 1475 Res0 = asin(opr0) / MATH_PI; 1476 return true; 1477 1478 case AMDGPULibFunc::EI_ATAN: 1479 Res0 = atan(opr0); 1480 return true; 1481 1482 case AMDGPULibFunc::EI_ATANH: 1483 // atanh(x) == (log(x+1) - log(x-1))/2; 1484 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 1485 return true; 1486 1487 case AMDGPULibFunc::EI_ATANPI: 1488 Res0 = atan(opr0) / MATH_PI; 1489 return true; 1490 1491 case AMDGPULibFunc::EI_CBRT: 1492 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 1493 return true; 1494 1495 case AMDGPULibFunc::EI_COS: 1496 Res0 = cos(opr0); 1497 return true; 1498 1499 case AMDGPULibFunc::EI_COSH: 1500 Res0 = cosh(opr0); 1501 return true; 1502 1503 case AMDGPULibFunc::EI_COSPI: 1504 Res0 = cos(MATH_PI * opr0); 1505 return true; 1506 1507 case AMDGPULibFunc::EI_EXP: 1508 Res0 = exp(opr0); 1509 return true; 1510 1511 case AMDGPULibFunc::EI_EXP2: 1512 Res0 = pow(2.0, opr0); 1513 return true; 1514 1515 case AMDGPULibFunc::EI_EXP10: 1516 Res0 = pow(10.0, opr0); 1517 return true; 1518 1519 case AMDGPULibFunc::EI_LOG: 1520 Res0 = log(opr0); 1521 return true; 1522 1523 case AMDGPULibFunc::EI_LOG2: 1524 Res0 = log(opr0) / log(2.0); 1525 return true; 1526 1527 case AMDGPULibFunc::EI_LOG10: 1528 Res0 = log(opr0) / log(10.0); 1529 return true; 1530 1531 case AMDGPULibFunc::EI_RSQRT: 1532 Res0 = 1.0 / sqrt(opr0); 1533 return true; 1534 1535 case AMDGPULibFunc::EI_SIN: 1536 Res0 = sin(opr0); 1537 return true; 1538 1539 case AMDGPULibFunc::EI_SINH: 1540 Res0 = sinh(opr0); 1541 return true; 1542 1543 case AMDGPULibFunc::EI_SINPI: 1544 Res0 = sin(MATH_PI * opr0); 1545 return true; 1546 1547 case AMDGPULibFunc::EI_TAN: 1548 Res0 = tan(opr0); 1549 return true; 1550 1551 case AMDGPULibFunc::EI_TANH: 1552 Res0 = tanh(opr0); 1553 return true; 1554 1555 case AMDGPULibFunc::EI_TANPI: 1556 Res0 = tan(MATH_PI * opr0); 1557 return true; 1558 1559 // two-arg functions 1560 case AMDGPULibFunc::EI_POW: 1561 case AMDGPULibFunc::EI_POWR: 1562 Res0 = pow(opr0, opr1); 1563 return true; 1564 1565 case AMDGPULibFunc::EI_POWN: { 1566 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1567 double val = (double)iopr1->getSExtValue(); 1568 Res0 = pow(opr0, val); 1569 return true; 1570 } 1571 return false; 1572 } 1573 1574 case AMDGPULibFunc::EI_ROOTN: { 1575 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1576 double val = (double)iopr1->getSExtValue(); 1577 Res0 = pow(opr0, 1.0 / val); 1578 return true; 1579 } 1580 return false; 1581 } 1582 1583 // with ptr arg 1584 case AMDGPULibFunc::EI_SINCOS: 1585 Res0 = sin(opr0); 1586 Res1 = cos(opr0); 1587 return true; 1588 } 1589 1590 return false; 1591 } 1592 1593 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { 1594 int numArgs = (int)aCI->arg_size(); 1595 if (numArgs > 3) 1596 return false; 1597 1598 Constant *copr0 = nullptr; 1599 Constant *copr1 = nullptr; 1600 if (numArgs > 0) { 1601 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 1602 return false; 1603 } 1604 1605 if (numArgs > 1) { 1606 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 1607 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 1608 return false; 1609 } 1610 } 1611 1612 // At this point, all arguments to aCI are constants. 1613 1614 // max vector size is 16, and sincos will generate two results. 1615 double DVal0[16], DVal1[16]; 1616 int FuncVecSize = getVecSize(FInfo); 1617 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 1618 if (FuncVecSize == 1) { 1619 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) { 1620 return false; 1621 } 1622 } else { 1623 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 1624 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 1625 for (int i = 0; i < FuncVecSize; ++i) { 1626 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 1627 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 1628 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) { 1629 return false; 1630 } 1631 } 1632 } 1633 1634 LLVMContext &context = aCI->getContext(); 1635 Constant *nval0, *nval1; 1636 if (FuncVecSize == 1) { 1637 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]); 1638 if (hasTwoResults) 1639 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]); 1640 } else { 1641 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1642 SmallVector <float, 0> FVal0, FVal1; 1643 for (int i = 0; i < FuncVecSize; ++i) 1644 FVal0.push_back((float)DVal0[i]); 1645 ArrayRef<float> tmp0(FVal0); 1646 nval0 = ConstantDataVector::get(context, tmp0); 1647 if (hasTwoResults) { 1648 for (int i = 0; i < FuncVecSize; ++i) 1649 FVal1.push_back((float)DVal1[i]); 1650 ArrayRef<float> tmp1(FVal1); 1651 nval1 = ConstantDataVector::get(context, tmp1); 1652 } 1653 } else { 1654 ArrayRef<double> tmp0(DVal0); 1655 nval0 = ConstantDataVector::get(context, tmp0); 1656 if (hasTwoResults) { 1657 ArrayRef<double> tmp1(DVal1); 1658 nval1 = ConstantDataVector::get(context, tmp1); 1659 } 1660 } 1661 } 1662 1663 if (hasTwoResults) { 1664 // sincos 1665 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 1666 "math function with ptr arg not supported yet"); 1667 new StoreInst(nval1, aCI->getArgOperand(1), aCI); 1668 } 1669 1670 replaceCall(aCI, nval0); 1671 return true; 1672 } 1673 1674 PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 1675 FunctionAnalysisManager &AM) { 1676 AMDGPULibCalls Simplifier; 1677 Simplifier.initNativeFuncs(); 1678 Simplifier.initFunction(F, AM); 1679 1680 bool Changed = false; 1681 1682 LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1683 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1684 1685 for (auto &BB : F) { 1686 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1687 // Ignore non-calls. 1688 CallInst *CI = dyn_cast<CallInst>(I); 1689 ++I; 1690 1691 if (CI) { 1692 if (Simplifier.fold(CI)) 1693 Changed = true; 1694 } 1695 } 1696 } 1697 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1698 } 1699 1700 PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 1701 FunctionAnalysisManager &AM) { 1702 if (UseNative.empty()) 1703 return PreservedAnalyses::all(); 1704 1705 AMDGPULibCalls Simplifier; 1706 Simplifier.initNativeFuncs(); 1707 Simplifier.initFunction(F, AM); 1708 1709 bool Changed = false; 1710 for (auto &BB : F) { 1711 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1712 // Ignore non-calls. 1713 CallInst *CI = dyn_cast<CallInst>(I); 1714 ++I; 1715 if (CI && Simplifier.useNative(CI)) 1716 Changed = true; 1717 } 1718 } 1719 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1720 } 1721