1 //===- AMDGPULibCalls.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file does AMD library function optimizations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULibFunc.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/Analysis/AssumptionCache.h" 18 #include "llvm/Analysis/TargetLibraryInfo.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/IR/AttributeMask.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/IntrinsicsAMDGPU.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/InitializePasses.h" 27 #include <cmath> 28 29 #define DEBUG_TYPE "amdgpu-simplifylib" 30 31 using namespace llvm; 32 using namespace llvm::PatternMatch; 33 34 static cl::opt<bool> EnablePreLink("amdgpu-prelink", 35 cl::desc("Enable pre-link mode optimizations"), 36 cl::init(false), 37 cl::Hidden); 38 39 static cl::list<std::string> UseNative("amdgpu-use-native", 40 cl::desc("Comma separated list of functions to replace with native, or all"), 41 cl::CommaSeparated, cl::ValueOptional, 42 cl::Hidden); 43 44 #define MATH_PI numbers::pi 45 #define MATH_E numbers::e 46 #define MATH_SQRT2 numbers::sqrt2 47 #define MATH_SQRT1_2 numbers::inv_sqrt2 48 49 namespace llvm { 50 51 class AMDGPULibCalls { 52 private: 53 const TargetLibraryInfo *TLInfo = nullptr; 54 AssumptionCache *AC = nullptr; 55 DominatorTree *DT = nullptr; 56 57 typedef llvm::AMDGPULibFunc FuncInfo; 58 59 bool UnsafeFPMath = false; 60 61 // -fuse-native. 62 bool AllNative = false; 63 64 bool useNativeFunc(const StringRef F) const; 65 66 // Return a pointer (pointer expr) to the function if function definition with 67 // "FuncName" exists. It may create a new function prototype in pre-link mode. 68 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 69 70 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); 71 72 bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 73 74 /* Specialized optimizations */ 75 76 // pow/powr/pown 77 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 78 79 // rootn 80 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 81 82 // -fuse-native for sincos 83 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 84 85 // evaluate calls if calls' arguments are constants. 86 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1, 87 Constant *copr0, Constant *copr1); 88 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); 89 90 // sqrt 91 bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 92 93 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value 94 /// of cos, sincos call). 95 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg, 96 FastMathFlags FMF, 97 IRBuilder<> &B, 98 FunctionCallee Fsincos); 99 100 // sin/cos 101 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 102 103 // __read_pipe/__write_pipe 104 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 105 const FuncInfo &FInfo); 106 107 // Get a scalar native builtin single argument FP function 108 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 109 110 /// Substitute a call to a known libcall with an intrinsic call. If \p 111 /// AllowMinSize is true, allow the replacement in a minsize function. 112 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 113 bool AllowMinSizeF32 = false, 114 bool AllowF64 = false, 115 bool AllowStrictFP = false); 116 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 117 Intrinsic::ID IntrID); 118 119 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 120 Intrinsic::ID IntrID, 121 bool AllowMinSizeF32 = false, 122 bool AllowF64 = false, 123 bool AllowStrictFP = false); 124 125 protected: 126 bool isUnsafeMath(const FPMathOperator *FPOp) const; 127 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const; 128 129 bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const; 130 131 static void replaceCall(Instruction *I, Value *With) { 132 I->replaceAllUsesWith(With); 133 I->eraseFromParent(); 134 } 135 136 static void replaceCall(FPMathOperator *I, Value *With) { 137 replaceCall(cast<Instruction>(I), With); 138 } 139 140 public: 141 AMDGPULibCalls() {} 142 143 bool fold(CallInst *CI); 144 145 void initFunction(Function &F, FunctionAnalysisManager &FAM); 146 void initNativeFuncs(); 147 148 // Replace a normal math function call with that native version 149 bool useNative(CallInst *CI); 150 }; 151 152 } // end llvm namespace 153 154 template <typename IRB> 155 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 156 const Twine &Name = "") { 157 CallInst *R = B.CreateCall(Callee, Arg, Name); 158 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 159 R->setCallingConv(F->getCallingConv()); 160 return R; 161 } 162 163 template <typename IRB> 164 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 165 Value *Arg2, const Twine &Name = "") { 166 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 167 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 168 R->setCallingConv(F->getCallingConv()); 169 return R; 170 } 171 172 static FunctionType *getPownType(FunctionType *FT) { 173 Type *PowNExpTy = Type::getInt32Ty(FT->getContext()); 174 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType())) 175 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount()); 176 177 return FunctionType::get(FT->getReturnType(), 178 {FT->getParamType(0), PowNExpTy}, false); 179 } 180 181 // Data structures for table-driven optimizations. 182 // FuncTbl works for both f32 and f64 functions with 1 input argument 183 184 struct TableEntry { 185 double result; 186 double input; 187 }; 188 189 /* a list of {result, input} */ 190 static const TableEntry tbl_acos[] = { 191 {MATH_PI / 2.0, 0.0}, 192 {MATH_PI / 2.0, -0.0}, 193 {0.0, 1.0}, 194 {MATH_PI, -1.0} 195 }; 196 static const TableEntry tbl_acosh[] = { 197 {0.0, 1.0} 198 }; 199 static const TableEntry tbl_acospi[] = { 200 {0.5, 0.0}, 201 {0.5, -0.0}, 202 {0.0, 1.0}, 203 {1.0, -1.0} 204 }; 205 static const TableEntry tbl_asin[] = { 206 {0.0, 0.0}, 207 {-0.0, -0.0}, 208 {MATH_PI / 2.0, 1.0}, 209 {-MATH_PI / 2.0, -1.0} 210 }; 211 static const TableEntry tbl_asinh[] = { 212 {0.0, 0.0}, 213 {-0.0, -0.0} 214 }; 215 static const TableEntry tbl_asinpi[] = { 216 {0.0, 0.0}, 217 {-0.0, -0.0}, 218 {0.5, 1.0}, 219 {-0.5, -1.0} 220 }; 221 static const TableEntry tbl_atan[] = { 222 {0.0, 0.0}, 223 {-0.0, -0.0}, 224 {MATH_PI / 4.0, 1.0}, 225 {-MATH_PI / 4.0, -1.0} 226 }; 227 static const TableEntry tbl_atanh[] = { 228 {0.0, 0.0}, 229 {-0.0, -0.0} 230 }; 231 static const TableEntry tbl_atanpi[] = { 232 {0.0, 0.0}, 233 {-0.0, -0.0}, 234 {0.25, 1.0}, 235 {-0.25, -1.0} 236 }; 237 static const TableEntry tbl_cbrt[] = { 238 {0.0, 0.0}, 239 {-0.0, -0.0}, 240 {1.0, 1.0}, 241 {-1.0, -1.0}, 242 }; 243 static const TableEntry tbl_cos[] = { 244 {1.0, 0.0}, 245 {1.0, -0.0} 246 }; 247 static const TableEntry tbl_cosh[] = { 248 {1.0, 0.0}, 249 {1.0, -0.0} 250 }; 251 static const TableEntry tbl_cospi[] = { 252 {1.0, 0.0}, 253 {1.0, -0.0} 254 }; 255 static const TableEntry tbl_erfc[] = { 256 {1.0, 0.0}, 257 {1.0, -0.0} 258 }; 259 static const TableEntry tbl_erf[] = { 260 {0.0, 0.0}, 261 {-0.0, -0.0} 262 }; 263 static const TableEntry tbl_exp[] = { 264 {1.0, 0.0}, 265 {1.0, -0.0}, 266 {MATH_E, 1.0} 267 }; 268 static const TableEntry tbl_exp2[] = { 269 {1.0, 0.0}, 270 {1.0, -0.0}, 271 {2.0, 1.0} 272 }; 273 static const TableEntry tbl_exp10[] = { 274 {1.0, 0.0}, 275 {1.0, -0.0}, 276 {10.0, 1.0} 277 }; 278 static const TableEntry tbl_expm1[] = { 279 {0.0, 0.0}, 280 {-0.0, -0.0} 281 }; 282 static const TableEntry tbl_log[] = { 283 {0.0, 1.0}, 284 {1.0, MATH_E} 285 }; 286 static const TableEntry tbl_log2[] = { 287 {0.0, 1.0}, 288 {1.0, 2.0} 289 }; 290 static const TableEntry tbl_log10[] = { 291 {0.0, 1.0}, 292 {1.0, 10.0} 293 }; 294 static const TableEntry tbl_rsqrt[] = { 295 {1.0, 1.0}, 296 {MATH_SQRT1_2, 2.0} 297 }; 298 static const TableEntry tbl_sin[] = { 299 {0.0, 0.0}, 300 {-0.0, -0.0} 301 }; 302 static const TableEntry tbl_sinh[] = { 303 {0.0, 0.0}, 304 {-0.0, -0.0} 305 }; 306 static const TableEntry tbl_sinpi[] = { 307 {0.0, 0.0}, 308 {-0.0, -0.0} 309 }; 310 static const TableEntry tbl_sqrt[] = { 311 {0.0, 0.0}, 312 {1.0, 1.0}, 313 {MATH_SQRT2, 2.0} 314 }; 315 static const TableEntry tbl_tan[] = { 316 {0.0, 0.0}, 317 {-0.0, -0.0} 318 }; 319 static const TableEntry tbl_tanh[] = { 320 {0.0, 0.0}, 321 {-0.0, -0.0} 322 }; 323 static const TableEntry tbl_tanpi[] = { 324 {0.0, 0.0}, 325 {-0.0, -0.0} 326 }; 327 static const TableEntry tbl_tgamma[] = { 328 {1.0, 1.0}, 329 {1.0, 2.0}, 330 {2.0, 3.0}, 331 {6.0, 4.0} 332 }; 333 334 static bool HasNative(AMDGPULibFunc::EFuncId id) { 335 switch(id) { 336 case AMDGPULibFunc::EI_DIVIDE: 337 case AMDGPULibFunc::EI_COS: 338 case AMDGPULibFunc::EI_EXP: 339 case AMDGPULibFunc::EI_EXP2: 340 case AMDGPULibFunc::EI_EXP10: 341 case AMDGPULibFunc::EI_LOG: 342 case AMDGPULibFunc::EI_LOG2: 343 case AMDGPULibFunc::EI_LOG10: 344 case AMDGPULibFunc::EI_POWR: 345 case AMDGPULibFunc::EI_RECIP: 346 case AMDGPULibFunc::EI_RSQRT: 347 case AMDGPULibFunc::EI_SIN: 348 case AMDGPULibFunc::EI_SINCOS: 349 case AMDGPULibFunc::EI_SQRT: 350 case AMDGPULibFunc::EI_TAN: 351 return true; 352 default:; 353 } 354 return false; 355 } 356 357 using TableRef = ArrayRef<TableEntry>; 358 359 static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 360 switch(id) { 361 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 362 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 363 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 364 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 365 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 366 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 367 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 368 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 369 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 370 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 371 case AMDGPULibFunc::EI_NCOS: 372 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 373 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 374 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 375 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 376 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 377 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 378 case AMDGPULibFunc::EI_NEXP2: 379 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 380 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 381 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 382 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 383 case AMDGPULibFunc::EI_NLOG2: 384 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 385 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 386 case AMDGPULibFunc::EI_NRSQRT: 387 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 388 case AMDGPULibFunc::EI_NSIN: 389 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 390 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 391 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 392 case AMDGPULibFunc::EI_NSQRT: 393 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 394 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 395 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 396 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 397 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 398 default:; 399 } 400 return TableRef(); 401 } 402 403 static inline int getVecSize(const AMDGPULibFunc& FInfo) { 404 return FInfo.getLeads()[0].VectorSize; 405 } 406 407 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 408 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 409 } 410 411 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 412 // If we are doing PreLinkOpt, the function is external. So it is safe to 413 // use getOrInsertFunction() at this stage. 414 415 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 416 : AMDGPULibFunc::getFunction(M, fInfo); 417 } 418 419 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, 420 FuncInfo &FInfo) { 421 return AMDGPULibFunc::parse(FMangledName, FInfo); 422 } 423 424 bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const { 425 return UnsafeFPMath || FPOp->isFast(); 426 } 427 428 bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const { 429 return UnsafeFPMath || 430 (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs()); 431 } 432 433 bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold( 434 const FPMathOperator *FPOp) const { 435 // TODO: Refine to approxFunc or contract 436 return isUnsafeMath(FPOp); 437 } 438 439 void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) { 440 UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool(); 441 AC = &FAM.getResult<AssumptionAnalysis>(F); 442 TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F); 443 DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 444 } 445 446 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 447 return AllNative || llvm::is_contained(UseNative, F); 448 } 449 450 void AMDGPULibCalls::initNativeFuncs() { 451 AllNative = useNativeFunc("all") || 452 (UseNative.getNumOccurrences() && UseNative.size() == 1 && 453 UseNative.begin()->empty()); 454 } 455 456 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 457 bool native_sin = useNativeFunc("sin"); 458 bool native_cos = useNativeFunc("cos"); 459 460 if (native_sin && native_cos) { 461 Module *M = aCI->getModule(); 462 Value *opr0 = aCI->getArgOperand(0); 463 464 AMDGPULibFunc nf; 465 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 466 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 467 468 nf.setPrefix(AMDGPULibFunc::NATIVE); 469 nf.setId(AMDGPULibFunc::EI_SIN); 470 FunctionCallee sinExpr = getFunction(M, nf); 471 472 nf.setPrefix(AMDGPULibFunc::NATIVE); 473 nf.setId(AMDGPULibFunc::EI_COS); 474 FunctionCallee cosExpr = getFunction(M, nf); 475 if (sinExpr && cosExpr) { 476 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); 477 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); 478 new StoreInst(cosval, aCI->getArgOperand(1), aCI); 479 480 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 481 << " with native version of sin/cos"); 482 483 replaceCall(aCI, sinval); 484 return true; 485 } 486 } 487 return false; 488 } 489 490 bool AMDGPULibCalls::useNative(CallInst *aCI) { 491 Function *Callee = aCI->getCalledFunction(); 492 if (!Callee || aCI->isNoBuiltin()) 493 return false; 494 495 FuncInfo FInfo; 496 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || 497 FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 498 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 499 !(AllNative || useNativeFunc(FInfo.getName()))) { 500 return false; 501 } 502 503 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 504 return sincosUseNative(aCI, FInfo); 505 506 FInfo.setPrefix(AMDGPULibFunc::NATIVE); 507 FunctionCallee F = getFunction(aCI->getModule(), FInfo); 508 if (!F) 509 return false; 510 511 aCI->setCalledFunction(F); 512 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 513 << " with native version"); 514 return true; 515 } 516 517 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 518 // builtin, with appended type size and alignment arguments, where 2 or 4 519 // indicates the original number of arguments. The library has optimized version 520 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 521 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 522 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 523 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 524 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 525 const FuncInfo &FInfo) { 526 auto *Callee = CI->getCalledFunction(); 527 if (!Callee->isDeclaration()) 528 return false; 529 530 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 531 auto *M = Callee->getParent(); 532 std::string Name = std::string(Callee->getName()); 533 auto NumArg = CI->arg_size(); 534 if (NumArg != 4 && NumArg != 6) 535 return false; 536 ConstantInt *PacketSize = 537 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2)); 538 ConstantInt *PacketAlign = 539 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1)); 540 if (!PacketSize || !PacketAlign) 541 return false; 542 543 unsigned Size = PacketSize->getZExtValue(); 544 Align Alignment = PacketAlign->getAlignValue(); 545 if (Alignment != Size) 546 return false; 547 548 unsigned PtrArgLoc = CI->arg_size() - 3; 549 Value *PtrArg = CI->getArgOperand(PtrArgLoc); 550 Type *PtrTy = PtrArg->getType(); 551 552 SmallVector<llvm::Type *, 6> ArgTys; 553 for (unsigned I = 0; I != PtrArgLoc; ++I) 554 ArgTys.push_back(CI->getArgOperand(I)->getType()); 555 ArgTys.push_back(PtrTy); 556 557 Name = Name + "_" + std::to_string(Size); 558 auto *FTy = FunctionType::get(Callee->getReturnType(), 559 ArrayRef<Type *>(ArgTys), false); 560 AMDGPULibFunc NewLibFunc(Name, FTy); 561 FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 562 if (!F) 563 return false; 564 565 SmallVector<Value *, 6> Args; 566 for (unsigned I = 0; I != PtrArgLoc; ++I) 567 Args.push_back(CI->getArgOperand(I)); 568 Args.push_back(PtrArg); 569 570 auto *NCI = B.CreateCall(F, Args); 571 NCI->setAttributes(CI->getAttributes()); 572 CI->replaceAllUsesWith(NCI); 573 CI->dropAllReferences(); 574 CI->eraseFromParent(); 575 576 return true; 577 } 578 579 static bool isKnownIntegral(const Value *V, const DataLayout &DL, 580 FastMathFlags FMF) { 581 if (isa<UndefValue>(V)) 582 return true; 583 584 if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) 585 return CF->getValueAPF().isInteger(); 586 587 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 588 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 589 Constant *ConstElt = CDV->getElementAsConstant(i); 590 if (isa<UndefValue>(ConstElt)) 591 continue; 592 const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt); 593 if (!CFP || !CFP->getValue().isInteger()) 594 return false; 595 } 596 597 return true; 598 } 599 600 const Instruction *I = dyn_cast<Instruction>(V); 601 if (!I) 602 return false; 603 604 switch (I->getOpcode()) { 605 case Instruction::SIToFP: 606 case Instruction::UIToFP: 607 // TODO: Could check nofpclass(inf) on incoming argument 608 if (FMF.noInfs()) 609 return true; 610 611 // Need to check int size cannot produce infinity, which computeKnownFPClass 612 // knows how to do already. 613 return isKnownNeverInfinity(I, DL); 614 case Instruction::Call: { 615 const CallInst *CI = cast<CallInst>(I); 616 switch (CI->getIntrinsicID()) { 617 case Intrinsic::trunc: 618 case Intrinsic::floor: 619 case Intrinsic::ceil: 620 case Intrinsic::rint: 621 case Intrinsic::nearbyint: 622 case Intrinsic::round: 623 case Intrinsic::roundeven: 624 return (FMF.noInfs() && FMF.noNaNs()) || 625 isKnownNeverInfOrNaN(I, DL, nullptr); 626 default: 627 break; 628 } 629 630 break; 631 } 632 default: 633 break; 634 } 635 636 return false; 637 } 638 639 // This function returns false if no change; return true otherwise. 640 bool AMDGPULibCalls::fold(CallInst *CI) { 641 Function *Callee = CI->getCalledFunction(); 642 // Ignore indirect calls. 643 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin()) 644 return false; 645 646 FuncInfo FInfo; 647 if (!parseFunctionName(Callee->getName(), FInfo)) 648 return false; 649 650 // Further check the number of arguments to see if they match. 651 // TODO: Check calling convention matches too 652 if (!FInfo.isCompatibleSignature(CI->getFunctionType())) 653 return false; 654 655 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n'); 656 657 if (TDOFold(CI, FInfo)) 658 return true; 659 660 IRBuilder<> B(CI); 661 662 if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) { 663 // Under unsafe-math, evaluate calls if possible. 664 // According to Brian Sumner, we can do this for all f32 function calls 665 // using host's double function calls. 666 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo)) 667 return true; 668 669 // Copy fast flags from the original call. 670 FastMathFlags FMF = FPOp->getFastMathFlags(); 671 B.setFastMathFlags(FMF); 672 673 // Specialized optimizations for each function call. 674 // 675 // TODO: Handle other simple intrinsic wrappers. Sqrt. 676 // 677 // TODO: Handle native functions 678 switch (FInfo.getId()) { 679 case AMDGPULibFunc::EI_EXP: 680 if (FMF.none()) 681 return false; 682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp, 683 FMF.approxFunc()); 684 case AMDGPULibFunc::EI_EXP2: 685 if (FMF.none()) 686 return false; 687 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2, 688 FMF.approxFunc()); 689 case AMDGPULibFunc::EI_LOG: 690 if (FMF.none()) 691 return false; 692 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log, 693 FMF.approxFunc()); 694 case AMDGPULibFunc::EI_LOG2: 695 if (FMF.none()) 696 return false; 697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2, 698 FMF.approxFunc()); 699 case AMDGPULibFunc::EI_LOG10: 700 if (FMF.none()) 701 return false; 702 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10, 703 FMF.approxFunc()); 704 case AMDGPULibFunc::EI_FMIN: 705 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum, 706 true, true); 707 case AMDGPULibFunc::EI_FMAX: 708 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum, 709 true, true); 710 case AMDGPULibFunc::EI_FMA: 711 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true, 712 true); 713 case AMDGPULibFunc::EI_MAD: 714 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd, 715 true, true); 716 case AMDGPULibFunc::EI_FABS: 717 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true, 718 true, true); 719 case AMDGPULibFunc::EI_COPYSIGN: 720 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign, 721 true, true, true); 722 case AMDGPULibFunc::EI_FLOOR: 723 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true, 724 true); 725 case AMDGPULibFunc::EI_CEIL: 726 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true, 727 true); 728 case AMDGPULibFunc::EI_TRUNC: 729 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true, 730 true); 731 case AMDGPULibFunc::EI_RINT: 732 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true, 733 true); 734 case AMDGPULibFunc::EI_ROUND: 735 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true, 736 true); 737 case AMDGPULibFunc::EI_LDEXP: { 738 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true)) 739 return false; 740 741 Value *Arg1 = CI->getArgOperand(1); 742 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType()); 743 VecTy && !isa<VectorType>(Arg1->getType())) { 744 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1); 745 CI->setArgOperand(1, SplatArg1); 746 } 747 748 CI->setCalledFunction(Intrinsic::getDeclaration( 749 CI->getModule(), Intrinsic::ldexp, 750 {CI->getType(), CI->getArgOperand(1)->getType()})); 751 return true; 752 } 753 case AMDGPULibFunc::EI_POW: { 754 Module *M = Callee->getParent(); 755 AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo); 756 FunctionCallee PowrFunc = getFunction(M, PowrInfo); 757 CallInst *Call = cast<CallInst>(FPOp); 758 759 // pow(x, y) -> powr(x, y) for x >= -0.0 760 // TODO: Account for flags on current call 761 if (PowrFunc && 762 cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(), 763 TLInfo, 0, AC, Call, DT)) { 764 Call->setCalledFunction(PowrFunc); 765 return fold_pow(FPOp, B, PowrInfo) || true; 766 } 767 768 // pow(x, y) -> pown(x, y) for known integral y 769 if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(), 770 FPOp->getFastMathFlags())) { 771 FunctionType *PownType = getPownType(CI->getFunctionType()); 772 AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true); 773 FunctionCallee PownFunc = getFunction(M, PownInfo); 774 if (PownFunc) { 775 // TODO: If the incoming integral value is an sitofp/uitofp, it won't 776 // fold out without a known range. We can probably take the source 777 // value directly. 778 Value *CastedArg = 779 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1)); 780 // Have to drop any nofpclass attributes on the original call site. 781 Call->removeParamAttrs( 782 1, AttributeFuncs::typeIncompatible(CastedArg->getType())); 783 Call->setCalledFunction(PownFunc); 784 Call->setArgOperand(1, CastedArg); 785 return fold_pow(FPOp, B, PownInfo) || true; 786 } 787 } 788 789 return fold_pow(FPOp, B, FInfo); 790 } 791 case AMDGPULibFunc::EI_POWR: 792 case AMDGPULibFunc::EI_POWN: 793 return fold_pow(FPOp, B, FInfo); 794 case AMDGPULibFunc::EI_ROOTN: 795 return fold_rootn(FPOp, B, FInfo); 796 case AMDGPULibFunc::EI_SQRT: 797 return fold_sqrt(FPOp, B, FInfo); 798 case AMDGPULibFunc::EI_COS: 799 case AMDGPULibFunc::EI_SIN: 800 return fold_sincos(FPOp, B, FInfo); 801 default: 802 break; 803 } 804 } else { 805 // Specialized optimizations for each function call 806 switch (FInfo.getId()) { 807 case AMDGPULibFunc::EI_READ_PIPE_2: 808 case AMDGPULibFunc::EI_READ_PIPE_4: 809 case AMDGPULibFunc::EI_WRITE_PIPE_2: 810 case AMDGPULibFunc::EI_WRITE_PIPE_4: 811 return fold_read_write_pipe(CI, B, FInfo); 812 default: 813 break; 814 } 815 } 816 817 return false; 818 } 819 820 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 821 // Table-Driven optimization 822 const TableRef tr = getOptTable(FInfo.getId()); 823 if (tr.empty()) 824 return false; 825 826 int const sz = (int)tr.size(); 827 Value *opr0 = CI->getArgOperand(0); 828 829 if (getVecSize(FInfo) > 1) { 830 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 831 SmallVector<double, 0> DVal; 832 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 833 ConstantFP *eltval = dyn_cast<ConstantFP>( 834 CV->getElementAsConstant((unsigned)eltNo)); 835 assert(eltval && "Non-FP arguments in math function!"); 836 bool found = false; 837 for (int i=0; i < sz; ++i) { 838 if (eltval->isExactlyValue(tr[i].input)) { 839 DVal.push_back(tr[i].result); 840 found = true; 841 break; 842 } 843 } 844 if (!found) { 845 // This vector constants not handled yet. 846 return false; 847 } 848 } 849 LLVMContext &context = CI->getParent()->getParent()->getContext(); 850 Constant *nval; 851 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 852 SmallVector<float, 0> FVal; 853 for (unsigned i = 0; i < DVal.size(); ++i) { 854 FVal.push_back((float)DVal[i]); 855 } 856 ArrayRef<float> tmp(FVal); 857 nval = ConstantDataVector::get(context, tmp); 858 } else { // F64 859 ArrayRef<double> tmp(DVal); 860 nval = ConstantDataVector::get(context, tmp); 861 } 862 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 863 replaceCall(CI, nval); 864 return true; 865 } 866 } else { 867 // Scalar version 868 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 869 for (int i = 0; i < sz; ++i) { 870 if (CF->isExactlyValue(tr[i].input)) { 871 Value *nval = ConstantFP::get(CF->getType(), tr[i].result); 872 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 873 replaceCall(CI, nval); 874 return true; 875 } 876 } 877 } 878 } 879 880 return false; 881 } 882 883 namespace llvm { 884 static double log2(double V) { 885 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 886 return ::log2(V); 887 #else 888 return log(V) / numbers::ln2; 889 #endif 890 } 891 } 892 893 bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, 894 const FuncInfo &FInfo) { 895 assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 896 FInfo.getId() == AMDGPULibFunc::EI_POWR || 897 FInfo.getId() == AMDGPULibFunc::EI_POWN) && 898 "fold_pow: encounter a wrong function call"); 899 900 Module *M = B.GetInsertBlock()->getModule(); 901 Type *eltType = FPOp->getType()->getScalarType(); 902 Value *opr0 = FPOp->getOperand(0); 903 Value *opr1 = FPOp->getOperand(1); 904 905 const APFloat *CF = nullptr; 906 const APInt *CINT = nullptr; 907 if (!match(opr1, m_APFloatAllowUndef(CF))) 908 match(opr1, m_APIntAllowUndef(CINT)); 909 910 // 0x1111111 means that we don't do anything for this call. 911 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 912 913 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) { 914 // pow/powr/pown(x, 0) == 1 915 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n"); 916 Constant *cnval = ConstantFP::get(eltType, 1.0); 917 if (getVecSize(FInfo) > 1) { 918 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 919 } 920 replaceCall(FPOp, cnval); 921 return true; 922 } 923 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 924 // pow/powr/pown(x, 1.0) = x 925 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 926 replaceCall(FPOp, opr0); 927 return true; 928 } 929 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 930 // pow/powr/pown(x, 2.0) = x*x 931 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * " 932 << *opr0 << "\n"); 933 Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 934 replaceCall(FPOp, nval); 935 return true; 936 } 937 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 938 // pow/powr/pown(x, -1.0) = 1.0/x 939 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n"); 940 Constant *cnval = ConstantFP::get(eltType, 1.0); 941 if (getVecSize(FInfo) > 1) { 942 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 943 } 944 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 945 replaceCall(FPOp, nval); 946 return true; 947 } 948 949 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 950 // pow[r](x, [-]0.5) = sqrt(x) 951 bool issqrt = CF->isExactlyValue(0.5); 952 if (FunctionCallee FPExpr = 953 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 954 : AMDGPULibFunc::EI_RSQRT, 955 FInfo))) { 956 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName() 957 << '(' << *opr0 << ")\n"); 958 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 959 : "__pow2rsqrt"); 960 replaceCall(FPOp, nval); 961 return true; 962 } 963 } 964 965 if (!isUnsafeFiniteOnlyMath(FPOp)) 966 return false; 967 968 // Unsafe Math optimization 969 970 // Remember that ci_opr1 is set if opr1 is integral 971 if (CF) { 972 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 973 ? (double)CF->convertToFloat() 974 : CF->convertToDouble(); 975 int ival = (int)dval; 976 if ((double)ival == dval) { 977 ci_opr1 = ival; 978 } else 979 ci_opr1 = 0x11111111; 980 } 981 982 // pow/powr/pown(x, c) = [1/](x*x*..x); where 983 // trunc(c) == c && the number of x == c && |c| <= 12 984 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 985 if (abs_opr1 <= 12) { 986 Constant *cnval; 987 Value *nval; 988 if (abs_opr1 == 0) { 989 cnval = ConstantFP::get(eltType, 1.0); 990 if (getVecSize(FInfo) > 1) { 991 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 992 } 993 nval = cnval; 994 } else { 995 Value *valx2 = nullptr; 996 nval = nullptr; 997 while (abs_opr1 > 0) { 998 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 999 if (abs_opr1 & 1) { 1000 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 1001 } 1002 abs_opr1 >>= 1; 1003 } 1004 } 1005 1006 if (ci_opr1 < 0) { 1007 cnval = ConstantFP::get(eltType, 1.0); 1008 if (getVecSize(FInfo) > 1) { 1009 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 1010 } 1011 nval = B.CreateFDiv(cnval, nval, "__1powprod"); 1012 } 1013 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1014 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 1015 << ")\n"); 1016 replaceCall(FPOp, nval); 1017 return true; 1018 } 1019 1020 // If we should use the generic intrinsic instead of emitting a libcall 1021 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy(); 1022 1023 // powr ---> exp2(y * log2(x)) 1024 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 1025 FunctionCallee ExpExpr; 1026 if (ShouldUseIntrinsic) 1027 ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()}); 1028 else { 1029 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 1030 if (!ExpExpr) 1031 return false; 1032 } 1033 1034 bool needlog = false; 1035 bool needabs = false; 1036 bool needcopysign = false; 1037 Constant *cnval = nullptr; 1038 if (getVecSize(FInfo) == 1) { 1039 CF = nullptr; 1040 match(opr0, m_APFloatAllowUndef(CF)); 1041 1042 if (CF) { 1043 double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 1044 ? (double)CF->convertToFloat() 1045 : CF->convertToDouble(); 1046 1047 V = log2(std::abs(V)); 1048 cnval = ConstantFP::get(eltType, V); 1049 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 1050 CF->isNegative(); 1051 } else { 1052 needlog = true; 1053 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 1054 } 1055 } else { 1056 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 1057 1058 if (!CDV) { 1059 needlog = true; 1060 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 1061 } else { 1062 assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 1063 "Wrong vector size detected"); 1064 1065 SmallVector<double, 0> DVal; 1066 for (int i=0; i < getVecSize(FInfo); ++i) { 1067 double V = CDV->getElementAsAPFloat(i).convertToDouble(); 1068 if (V < 0.0) needcopysign = true; 1069 V = log2(std::abs(V)); 1070 DVal.push_back(V); 1071 } 1072 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1073 SmallVector<float, 0> FVal; 1074 for (unsigned i=0; i < DVal.size(); ++i) { 1075 FVal.push_back((float)DVal[i]); 1076 } 1077 ArrayRef<float> tmp(FVal); 1078 cnval = ConstantDataVector::get(M->getContext(), tmp); 1079 } else { 1080 ArrayRef<double> tmp(DVal); 1081 cnval = ConstantDataVector::get(M->getContext(), tmp); 1082 } 1083 } 1084 } 1085 1086 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 1087 // We cannot handle corner cases for a general pow() function, give up 1088 // unless y is a constant integral value. Then proceed as if it were pown. 1089 if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags())) 1090 return false; 1091 } 1092 1093 Value *nval; 1094 if (needabs) { 1095 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs"); 1096 } else { 1097 nval = cnval ? cnval : opr0; 1098 } 1099 if (needlog) { 1100 FunctionCallee LogExpr; 1101 if (ShouldUseIntrinsic) { 1102 LogExpr = 1103 Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()}); 1104 } else { 1105 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 1106 if (!LogExpr) 1107 return false; 1108 } 1109 1110 nval = CreateCallEx(B,LogExpr, nval, "__log2"); 1111 } 1112 1113 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 1114 // convert int(32) to fp(f32 or f64) 1115 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 1116 } 1117 nval = B.CreateFMul(opr1, nval, "__ylogx"); 1118 nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 1119 1120 if (needcopysign) { 1121 Value *opr_n; 1122 Type* rTy = opr0->getType(); 1123 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits()); 1124 Type *nTy = nTyS; 1125 if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) 1126 nTy = FixedVectorType::get(nTyS, vTy); 1127 unsigned size = nTy->getScalarSizeInBits(); 1128 opr_n = FPOp->getOperand(1); 1129 if (opr_n->getType()->isIntegerTy()) 1130 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou"); 1131 else 1132 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 1133 1134 Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 1135 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 1136 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 1137 nval = B.CreateBitCast(nval, opr0->getType()); 1138 } 1139 1140 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1141 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 1142 replaceCall(FPOp, nval); 1143 1144 return true; 1145 } 1146 1147 bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, 1148 const FuncInfo &FInfo) { 1149 // skip vector function 1150 if (getVecSize(FInfo) != 1) 1151 return false; 1152 1153 Value *opr0 = FPOp->getOperand(0); 1154 Value *opr1 = FPOp->getOperand(1); 1155 1156 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); 1157 if (!CINT) { 1158 return false; 1159 } 1160 int ci_opr1 = (int)CINT->getSExtValue(); 1161 if (ci_opr1 == 1) { // rootn(x, 1) = x 1162 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 1163 replaceCall(FPOp, opr0); 1164 return true; 1165 } 1166 1167 Module *M = B.GetInsertBlock()->getModule(); 1168 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) 1169 if (FunctionCallee FPExpr = 1170 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1171 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 1172 << ")\n"); 1173 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); 1174 replaceCall(FPOp, nval); 1175 return true; 1176 } 1177 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 1178 if (FunctionCallee FPExpr = 1179 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 1180 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0 1181 << ")\n"); 1182 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 1183 replaceCall(FPOp, nval); 1184 return true; 1185 } 1186 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 1187 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n"); 1188 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 1189 opr0, 1190 "__rootn2div"); 1191 replaceCall(FPOp, nval); 1192 return true; 1193 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) 1194 if (FunctionCallee FPExpr = 1195 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { 1196 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0 1197 << ")\n"); 1198 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); 1199 replaceCall(FPOp, nval); 1200 return true; 1201 } 1202 } 1203 return false; 1204 } 1205 1206 // Get a scalar native builtin single argument FP function 1207 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 1208 const FuncInfo &FInfo) { 1209 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 1210 return nullptr; 1211 FuncInfo nf = FInfo; 1212 nf.setPrefix(AMDGPULibFunc::NATIVE); 1213 return getFunction(M, nf); 1214 } 1215 1216 // Some library calls are just wrappers around llvm intrinsics, but compiled 1217 // conservatively. Preserve the flags from the original call site by 1218 // substituting them with direct calls with all the flags. 1219 bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 1220 bool AllowMinSizeF32, 1221 bool AllowF64, 1222 bool AllowStrictFP) { 1223 Type *FltTy = CI->getType()->getScalarType(); 1224 const bool IsF32 = FltTy->isFloatTy(); 1225 1226 // f64 intrinsics aren't implemented for most operations. 1227 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy())) 1228 return false; 1229 1230 // We're implicitly inlining by replacing the libcall with the intrinsic, so 1231 // don't do it for noinline call sites. 1232 if (CI->isNoInline()) 1233 return false; 1234 1235 const Function *ParentF = CI->getFunction(); 1236 // TODO: Handle strictfp 1237 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP)) 1238 return false; 1239 1240 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize()) 1241 return false; 1242 return true; 1243 } 1244 1245 void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, 1246 CallInst *CI, 1247 Intrinsic::ID IntrID) { 1248 if (CI->arg_size() == 2) { 1249 Value *Arg0 = CI->getArgOperand(0); 1250 Value *Arg1 = CI->getArgOperand(1); 1251 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType()); 1252 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType()); 1253 if (Arg0VecTy && !Arg1VecTy) { 1254 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1); 1255 CI->setArgOperand(1, SplatRHS); 1256 } else if (!Arg0VecTy && Arg1VecTy) { 1257 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0); 1258 CI->setArgOperand(0, SplatLHS); 1259 } 1260 } 1261 1262 CI->setCalledFunction( 1263 Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()})); 1264 } 1265 1266 bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( 1267 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32, 1268 bool AllowF64, bool AllowStrictFP) { 1269 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64, 1270 AllowStrictFP)) 1271 return false; 1272 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID); 1273 return true; 1274 } 1275 1276 // fold sqrt -> native_sqrt (x) 1277 bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, 1278 const FuncInfo &FInfo) { 1279 if (!isUnsafeMath(FPOp)) 1280 return false; 1281 1282 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && 1283 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { 1284 Module *M = B.GetInsertBlock()->getModule(); 1285 1286 if (FunctionCallee FPExpr = getNativeFunction( 1287 M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1288 Value *opr0 = FPOp->getOperand(0); 1289 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1290 << "sqrt(" << *opr0 << ")\n"); 1291 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); 1292 replaceCall(FPOp, nval); 1293 return true; 1294 } 1295 } 1296 return false; 1297 } 1298 1299 std::tuple<Value *, Value *, Value *> 1300 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B, 1301 FunctionCallee Fsincos) { 1302 DebugLoc DL = B.getCurrentDebugLocation(); 1303 Function *F = B.GetInsertBlock()->getParent(); 1304 B.SetInsertPointPastAllocas(F); 1305 1306 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_"); 1307 1308 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { 1309 // If the argument is an instruction, it must dominate all uses so put our 1310 // sincos call there. Otherwise, right after the allocas works well enough 1311 // if it's an argument or constant. 1312 1313 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); 1314 1315 // SetInsertPoint unwelcomely always tries to set the debug loc. 1316 B.SetCurrentDebugLocation(DL); 1317 } 1318 1319 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1); 1320 1321 // The allocaInst allocates the memory in private address space. This need 1322 // to be addrspacecasted to point to the address space of cos pointer type. 1323 // In OpenCL 2.0 this is generic, while in 1.2 that is private. 1324 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy); 1325 1326 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc); 1327 1328 // TODO: Is it worth trying to preserve the location for the cos calls for the 1329 // load? 1330 1331 LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 1332 return {SinCos, LoadCos, SinCos}; 1333 } 1334 1335 // fold sin, cos -> sincos. 1336 bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, 1337 const FuncInfo &fInfo) { 1338 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 1339 fInfo.getId() == AMDGPULibFunc::EI_COS); 1340 1341 if ((getArgType(fInfo) != AMDGPULibFunc::F32 && 1342 getArgType(fInfo) != AMDGPULibFunc::F64) || 1343 fInfo.getPrefix() != AMDGPULibFunc::NOPFX) 1344 return false; 1345 1346 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 1347 1348 Value *CArgVal = FPOp->getOperand(0); 1349 CallInst *CI = cast<CallInst>(FPOp); 1350 1351 Function *F = B.GetInsertBlock()->getParent(); 1352 Module *M = F->getParent(); 1353 1354 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer 1355 // implementation. Prefer the private form if available. 1356 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo); 1357 SinCosLibFuncPrivate.getLeads()[0].PtrKind = 1358 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS); 1359 1360 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo); 1361 SinCosLibFuncGeneric.getLeads()[0].PtrKind = 1362 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 1363 1364 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate); 1365 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric); 1366 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric; 1367 if (!FSinCos) 1368 return false; 1369 1370 SmallVector<CallInst *> SinCalls; 1371 SmallVector<CallInst *> CosCalls; 1372 SmallVector<CallInst *> SinCosCalls; 1373 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, 1374 fInfo); 1375 const std::string PairName = PartnerInfo.mangle(); 1376 1377 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName; 1378 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName(); 1379 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle(); 1380 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle(); 1381 1382 // Intersect the two sets of flags. 1383 FastMathFlags FMF = FPOp->getFastMathFlags(); 1384 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath); 1385 1386 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()}; 1387 1388 for (User* U : CArgVal->users()) { 1389 CallInst *XI = dyn_cast<CallInst>(U); 1390 if (!XI || XI->getFunction() != F || XI->isNoBuiltin()) 1391 continue; 1392 1393 Function *UCallee = XI->getCalledFunction(); 1394 if (!UCallee) 1395 continue; 1396 1397 bool Handled = true; 1398 1399 if (UCallee->getName() == SinName) 1400 SinCalls.push_back(XI); 1401 else if (UCallee->getName() == CosName) 1402 CosCalls.push_back(XI); 1403 else if (UCallee->getName() == SinCosPrivateName || 1404 UCallee->getName() == SinCosGenericName) 1405 SinCosCalls.push_back(XI); 1406 else 1407 Handled = false; 1408 1409 if (Handled) { 1410 MergeDbgLocs.push_back(XI->getDebugLoc()); 1411 auto *OtherOp = cast<FPMathOperator>(XI); 1412 FMF &= OtherOp->getFastMathFlags(); 1413 FPMath = MDNode::getMostGenericFPMath( 1414 FPMath, XI->getMetadata(LLVMContext::MD_fpmath)); 1415 } 1416 } 1417 1418 if (SinCalls.empty() || CosCalls.empty()) 1419 return false; 1420 1421 B.setFastMathFlags(FMF); 1422 B.setDefaultFPMathTag(FPMath); 1423 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs); 1424 B.SetCurrentDebugLocation(DbgLoc); 1425 1426 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos); 1427 1428 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) { 1429 for (CallInst *C : Calls) 1430 C->replaceAllUsesWith(Res); 1431 1432 // Leave the other dead instructions to avoid clobbering iterators. 1433 }; 1434 1435 replaceTrigInsts(SinCalls, Sin); 1436 replaceTrigInsts(CosCalls, Cos); 1437 replaceTrigInsts(SinCosCalls, SinCos); 1438 1439 // It's safe to delete the original now. 1440 CI->eraseFromParent(); 1441 return true; 1442 } 1443 1444 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, 1445 double &Res1, Constant *copr0, 1446 Constant *copr1) { 1447 // By default, opr0/opr1/opr3 holds values of float/double type. 1448 // If they are not float/double, each function has to its 1449 // operand separately. 1450 double opr0 = 0.0, opr1 = 0.0; 1451 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 1452 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 1453 if (fpopr0) { 1454 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1455 ? fpopr0->getValueAPF().convertToDouble() 1456 : (double)fpopr0->getValueAPF().convertToFloat(); 1457 } 1458 1459 if (fpopr1) { 1460 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1461 ? fpopr1->getValueAPF().convertToDouble() 1462 : (double)fpopr1->getValueAPF().convertToFloat(); 1463 } 1464 1465 switch (FInfo.getId()) { 1466 default : return false; 1467 1468 case AMDGPULibFunc::EI_ACOS: 1469 Res0 = acos(opr0); 1470 return true; 1471 1472 case AMDGPULibFunc::EI_ACOSH: 1473 // acosh(x) == log(x + sqrt(x*x - 1)) 1474 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 1475 return true; 1476 1477 case AMDGPULibFunc::EI_ACOSPI: 1478 Res0 = acos(opr0) / MATH_PI; 1479 return true; 1480 1481 case AMDGPULibFunc::EI_ASIN: 1482 Res0 = asin(opr0); 1483 return true; 1484 1485 case AMDGPULibFunc::EI_ASINH: 1486 // asinh(x) == log(x + sqrt(x*x + 1)) 1487 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 1488 return true; 1489 1490 case AMDGPULibFunc::EI_ASINPI: 1491 Res0 = asin(opr0) / MATH_PI; 1492 return true; 1493 1494 case AMDGPULibFunc::EI_ATAN: 1495 Res0 = atan(opr0); 1496 return true; 1497 1498 case AMDGPULibFunc::EI_ATANH: 1499 // atanh(x) == (log(x+1) - log(x-1))/2; 1500 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 1501 return true; 1502 1503 case AMDGPULibFunc::EI_ATANPI: 1504 Res0 = atan(opr0) / MATH_PI; 1505 return true; 1506 1507 case AMDGPULibFunc::EI_CBRT: 1508 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 1509 return true; 1510 1511 case AMDGPULibFunc::EI_COS: 1512 Res0 = cos(opr0); 1513 return true; 1514 1515 case AMDGPULibFunc::EI_COSH: 1516 Res0 = cosh(opr0); 1517 return true; 1518 1519 case AMDGPULibFunc::EI_COSPI: 1520 Res0 = cos(MATH_PI * opr0); 1521 return true; 1522 1523 case AMDGPULibFunc::EI_EXP: 1524 Res0 = exp(opr0); 1525 return true; 1526 1527 case AMDGPULibFunc::EI_EXP2: 1528 Res0 = pow(2.0, opr0); 1529 return true; 1530 1531 case AMDGPULibFunc::EI_EXP10: 1532 Res0 = pow(10.0, opr0); 1533 return true; 1534 1535 case AMDGPULibFunc::EI_LOG: 1536 Res0 = log(opr0); 1537 return true; 1538 1539 case AMDGPULibFunc::EI_LOG2: 1540 Res0 = log(opr0) / log(2.0); 1541 return true; 1542 1543 case AMDGPULibFunc::EI_LOG10: 1544 Res0 = log(opr0) / log(10.0); 1545 return true; 1546 1547 case AMDGPULibFunc::EI_RSQRT: 1548 Res0 = 1.0 / sqrt(opr0); 1549 return true; 1550 1551 case AMDGPULibFunc::EI_SIN: 1552 Res0 = sin(opr0); 1553 return true; 1554 1555 case AMDGPULibFunc::EI_SINH: 1556 Res0 = sinh(opr0); 1557 return true; 1558 1559 case AMDGPULibFunc::EI_SINPI: 1560 Res0 = sin(MATH_PI * opr0); 1561 return true; 1562 1563 case AMDGPULibFunc::EI_TAN: 1564 Res0 = tan(opr0); 1565 return true; 1566 1567 case AMDGPULibFunc::EI_TANH: 1568 Res0 = tanh(opr0); 1569 return true; 1570 1571 case AMDGPULibFunc::EI_TANPI: 1572 Res0 = tan(MATH_PI * opr0); 1573 return true; 1574 1575 // two-arg functions 1576 case AMDGPULibFunc::EI_POW: 1577 case AMDGPULibFunc::EI_POWR: 1578 Res0 = pow(opr0, opr1); 1579 return true; 1580 1581 case AMDGPULibFunc::EI_POWN: { 1582 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1583 double val = (double)iopr1->getSExtValue(); 1584 Res0 = pow(opr0, val); 1585 return true; 1586 } 1587 return false; 1588 } 1589 1590 case AMDGPULibFunc::EI_ROOTN: { 1591 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1592 double val = (double)iopr1->getSExtValue(); 1593 Res0 = pow(opr0, 1.0 / val); 1594 return true; 1595 } 1596 return false; 1597 } 1598 1599 // with ptr arg 1600 case AMDGPULibFunc::EI_SINCOS: 1601 Res0 = sin(opr0); 1602 Res1 = cos(opr0); 1603 return true; 1604 } 1605 1606 return false; 1607 } 1608 1609 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { 1610 int numArgs = (int)aCI->arg_size(); 1611 if (numArgs > 3) 1612 return false; 1613 1614 Constant *copr0 = nullptr; 1615 Constant *copr1 = nullptr; 1616 if (numArgs > 0) { 1617 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 1618 return false; 1619 } 1620 1621 if (numArgs > 1) { 1622 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 1623 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 1624 return false; 1625 } 1626 } 1627 1628 // At this point, all arguments to aCI are constants. 1629 1630 // max vector size is 16, and sincos will generate two results. 1631 double DVal0[16], DVal1[16]; 1632 int FuncVecSize = getVecSize(FInfo); 1633 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 1634 if (FuncVecSize == 1) { 1635 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) { 1636 return false; 1637 } 1638 } else { 1639 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 1640 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 1641 for (int i = 0; i < FuncVecSize; ++i) { 1642 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 1643 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 1644 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) { 1645 return false; 1646 } 1647 } 1648 } 1649 1650 LLVMContext &context = aCI->getContext(); 1651 Constant *nval0, *nval1; 1652 if (FuncVecSize == 1) { 1653 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]); 1654 if (hasTwoResults) 1655 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]); 1656 } else { 1657 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1658 SmallVector <float, 0> FVal0, FVal1; 1659 for (int i = 0; i < FuncVecSize; ++i) 1660 FVal0.push_back((float)DVal0[i]); 1661 ArrayRef<float> tmp0(FVal0); 1662 nval0 = ConstantDataVector::get(context, tmp0); 1663 if (hasTwoResults) { 1664 for (int i = 0; i < FuncVecSize; ++i) 1665 FVal1.push_back((float)DVal1[i]); 1666 ArrayRef<float> tmp1(FVal1); 1667 nval1 = ConstantDataVector::get(context, tmp1); 1668 } 1669 } else { 1670 ArrayRef<double> tmp0(DVal0); 1671 nval0 = ConstantDataVector::get(context, tmp0); 1672 if (hasTwoResults) { 1673 ArrayRef<double> tmp1(DVal1); 1674 nval1 = ConstantDataVector::get(context, tmp1); 1675 } 1676 } 1677 } 1678 1679 if (hasTwoResults) { 1680 // sincos 1681 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 1682 "math function with ptr arg not supported yet"); 1683 new StoreInst(nval1, aCI->getArgOperand(1), aCI); 1684 } 1685 1686 replaceCall(aCI, nval0); 1687 return true; 1688 } 1689 1690 PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 1691 FunctionAnalysisManager &AM) { 1692 AMDGPULibCalls Simplifier; 1693 Simplifier.initNativeFuncs(); 1694 Simplifier.initFunction(F, AM); 1695 1696 bool Changed = false; 1697 1698 LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1699 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1700 1701 for (auto &BB : F) { 1702 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1703 // Ignore non-calls. 1704 CallInst *CI = dyn_cast<CallInst>(I); 1705 ++I; 1706 1707 if (CI) { 1708 if (Simplifier.fold(CI)) 1709 Changed = true; 1710 } 1711 } 1712 } 1713 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1714 } 1715 1716 PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 1717 FunctionAnalysisManager &AM) { 1718 if (UseNative.empty()) 1719 return PreservedAnalyses::all(); 1720 1721 AMDGPULibCalls Simplifier; 1722 Simplifier.initNativeFuncs(); 1723 Simplifier.initFunction(F, AM); 1724 1725 bool Changed = false; 1726 for (auto &BB : F) { 1727 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1728 // Ignore non-calls. 1729 CallInst *CI = dyn_cast<CallInst>(I); 1730 ++I; 1731 if (CI && Simplifier.useNative(CI)) 1732 Changed = true; 1733 } 1734 } 1735 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1736 } 1737