Lines Matching full:builder
249 Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
253 Value *expandDivRem24Impl(IRBuilder<> &Builder, BinaryOperator &I,
258 Value* expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I,
261 Value *shrinkDivRem64(IRBuilder<> &Builder, BinaryOperator &I,
276 Value *applyFractPat(IRBuilder<> &Builder, Value *FractArg);
281 Value *optimizeWithRsq(IRBuilder<> &Builder, Value *Num, Value *Den,
285 Value *optimizeWithRcp(IRBuilder<> &Builder, Value *Num, Value *Den,
287 Value *optimizeWithFDivFast(IRBuilder<> &Builder, Value *Num, Value *Den,
290 Value *visitFDivElement(IRBuilder<> &Builder, Value *Num, Value *Den,
295 std::pair<Value *, Value *> getFrexpResults(IRBuilder<> &Builder,
298 Value *emitRcpIEEE1ULP(IRBuilder<> &Builder, Value *Src,
300 Value *emitFrexpDiv(IRBuilder<> &Builder, Value *LHS, Value *RHS,
302 Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
475 IRBuilder<> Builder(&I);
476 Builder.SetCurrentDebugLocation(I.getDebugLoc());
478 Type *I32Ty = getI32Ty(Builder, I.getType());
485 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
486 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
488 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
489 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
492 ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
504 TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
516 IRBuilder<> Builder(&I);
517 Builder.SetCurrentDebugLocation(I.getDebugLoc());
519 Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
525 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
526 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
528 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
529 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
531 NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
543 IRBuilder<> Builder(&I);
544 Builder.SetCurrentDebugLocation(I.getDebugLoc());
546 Type *I32Ty = getI32Ty(Builder, I.getType());
553 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
554 ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
556 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
557 ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
559 ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
560 TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
575 IRBuilder<> Builder(&I);
576 Builder.SetCurrentDebugLocation(I.getDebugLoc());
578 Type *I32Ty = getI32Ty(Builder, I.getType());
581 Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
582 Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
584 Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
586 Builder.CreateTrunc(LShrOp, I.getType());
602 static void extractValues(IRBuilder<> &Builder,
611 Values.push_back(Builder.CreateExtractElement(V, I));
614 static Value *insertValues(IRBuilder<> &Builder,
624 NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
644 IRBuilder<> Builder(&I);
645 Builder.SetCurrentDebugLocation(I.getDebugLoc());
664 extractValues(Builder, LHSVals, LHS);
665 extractValues(Builder, RHSVals, RHS);
667 IntegerType *I32Ty = Builder.getInt32Ty();
668 IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
672 Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
673 : Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
674 Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
675 : Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
678 Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
679 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
680 : Builder.CreateZExtOrTrunc(Result, DstTy);
684 Value *NewVal = insertValues(Builder, Ty, ResultVals);
752 IRBuilder<> Builder(&BO);
753 Builder.SetCurrentDebugLocation(BO.getDebugLoc());
755 Builder.setFastMathFlags(FPOp->getFastMathFlags());
757 Value *NewSelect = Builder.CreateSelect(Sel->getCondition(),
769 AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
772 Value *Frexp = Builder.CreateIntrinsic(Intrinsic::frexp,
773 {Ty, Builder.getInt32Ty()}, Src);
774 Value *FrexpMant = Builder.CreateExtractValue(Frexp, {0});
782 ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
783 {Builder.getInt32Ty(), Ty}, Src)
784 : Builder.CreateExtractValue(Frexp, {1});
789 Value *AMDGPUCodeGenPrepareImpl::emitRcpIEEE1ULP(IRBuilder<> &Builder,
795 Src = Builder.CreateFNeg(Src);
806 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
807 Value *ScaleFactor = Builder.CreateNeg(FrexpExp);
808 Value *Rcp = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMant);
809 return Builder.CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
813 Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
825 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder, RHS);
828 Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMantRHS);
830 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder, LHS);
831 Value *Mul = Builder.CreateFMul(FrexpMantLHS, Rcp);
835 Value *ExpDiff = Builder.CreateSub(FrexpExpLHS, FrexpExpRHS);
836 return Builder.CreateCall(getLdexpF32(), {Mul, ExpDiff});
840 Value *AMDGPUCodeGenPrepareImpl::emitSqrtIEEE2ULP(IRBuilder<> &Builder,
847 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
849 ConstantInt *Zero = Builder.getInt32(0);
851 Builder.CreateSelect(NeedScale, Builder.getInt32(32), Zero);
853 Value *Scaled = Builder.CreateCall(getLdexpF32(), {Src, InputScaleFactor});
855 Value *Sqrt = Builder.CreateCall(getSqrtF32(), Scaled);
858 Builder.CreateSelect(NeedScale, Builder.getInt32(-16), Zero);
859 return Builder.CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
863 static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
874 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
880 Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
882 Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
883 Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
884 Value *OutputScaleFactor = Builder.CreateSelect(
887 return Builder.CreateFMul(Rsq, OutputScaleFactor);
903 IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
922 IRBuilder<>::FastMathFlagGuard Guard(Builder);
923 Builder.setFastMathFlags(DivFMF | SqrtFMF);
927 Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
929 return IsNegative ? Builder.CreateFNeg(Result) : Result;
932 return emitRsqIEEE1ULP(Builder, Den, IsNegative);
945 AMDGPUCodeGenPrepareImpl::optimizeWithRcp(IRBuilder<> &Builder, Value *Num,
962 Src = Builder.CreateFNeg(Src);
975 return Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Src);
980 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
990 Value *Recip = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Den);
991 return Builder.CreateFMul(Num, Recip);
994 Value *Recip = emitRcpIEEE1ULP(Builder, Den, false);
995 return Builder.CreateFMul(Num, Recip);
1009 IRBuilder<> &Builder, Value *Num, Value *Den, float ReqdAccuracy) const {
1030 return Builder.CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {}, {Num, Den});
1034 IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
1039 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
1044 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
1052 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
1056 return emitFrexpDiv(Builder, Num, Den, DivFMF);
1121 IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
1122 Builder.setFastMathFlags(DivFMF);
1123 Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
1128 extractValues(Builder, NumVals, Num);
1129 extractValues(Builder, DenVals, Den);
1132 extractValues(Builder, RsqDenVals, RsqOp);
1141 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
1148 NewElt = Builder.CreateFDiv(NumElt, DenElt);
1156 Value *NewVal = insertValues(Builder, FDiv.getType(), ResultVals);
1172 static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
1174 Type *I32Ty = Builder.getInt32Ty();
1175 Type *I64Ty = Builder.getInt64Ty();
1177 Value *LHS_EXT64 = Builder.CreateZExt(LHS, I64Ty);
1178 Value *RHS_EXT64 = Builder.CreateZExt(RHS, I64Ty);
1179 Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
1180 Value *Lo = Builder.CreateTrunc(MUL64, I32Ty);
1181 Value *Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
1182 Hi = Builder.CreateTrunc(Hi, I32Ty);
1186 static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
1187 return getMul64(Builder, LHS, RHS).second;
1214 Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
1224 return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
1228 IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den,
1230 Type *I32Ty = Builder.getInt32Ty();
1231 Num = Builder.CreateTrunc(Num, I32Ty);
1232 Den = Builder.CreateTrunc(Den, I32Ty);
1234 Type *F32Ty = Builder.getFloatTy();
1235 ConstantInt *One = Builder.getInt32(1);
1240 JQ = Builder.CreateXor(Num, Den);
1243 JQ = Builder.CreateAShr(JQ, Builder.getInt32(30));
1246 JQ = Builder.CreateOr(JQ, One);
1256 Value *FA = IsSigned ? Builder.CreateSIToFP(IA, F32Ty)
1257 : Builder.CreateUIToFP(IA, F32Ty);
1260 Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty)
1261 : Builder.CreateUIToFP(IB,F32Ty);
1264 Builder.getFloatTy());
1265 Value *RCP = Builder.CreateCall(RcpDecl, { FB });
1266 Value *FQM = Builder.CreateFMul(FA, RCP);
1269 CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
1270 FQ->copyFastMathFlags(Builder.getFastMathFlags());
1273 Value *FQNeg = Builder.CreateFNeg(FQ);
1279 Value *FR = Builder.CreateIntrinsic(FMAD,
1283 Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
1284 : Builder.CreateFPToUI(FQ, I32Ty);
1287 FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
1290 FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
1293 Value *CV = Builder.CreateFCmpOGE(FR, FB);
1296 JQ = Builder.CreateSelect(CV, JQ, Builder.getInt32(0));
1299 Value *Div = Builder.CreateAdd(IQ, JQ);
1304 Value *Rem = Builder.CreateMul(Div, Den);
1305 Res = Builder.CreateSub(Num, Rem);
1313 Res = Builder.CreateShl(Res, InRegBits);
1314 Res = Builder.CreateAShr(Res, InRegBits);
1317 = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
1318 Res = Builder.CreateAnd(Res, TruncMask);
1362 static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
1369 return Builder.CreateAShr(V, Builder.getInt32(31));
1372 Value *AMDGPUCodeGenPrepareImpl::expandDivRem32(IRBuilder<> &Builder,
1381 Builder.setFastMathFlags(FMF);
1390 Type *I32Ty = Builder.getInt32Ty();
1391 Type *F32Ty = Builder.getFloatTy();
1395 X = Builder.CreateSExtOrTrunc(X, I32Ty);
1396 Y = Builder.CreateSExtOrTrunc(Y, I32Ty);
1398 X = Builder.CreateZExtOrTrunc(X, I32Ty);
1399 Y = Builder.CreateZExtOrTrunc(Y, I32Ty);
1403 if (Value *Res = expandDivRem24(Builder, I, X, Y, IsDiv, IsSigned)) {
1404 return IsSigned ? Builder.CreateSExtOrTrunc(Res, Ty) :
1405 Builder.CreateZExtOrTrunc(Res, Ty);
1408 ConstantInt *Zero = Builder.getInt32(0);
1409 ConstantInt *One = Builder.getInt32(1);
1413 Value *SignX = getSign32(X, Builder, DL);
1414 Value *SignY = getSign32(Y, Builder, DL);
1416 Sign = IsDiv ? Builder.CreateXor(SignX, SignY) : SignX;
1418 X = Builder.CreateAdd(X, SignX);
1419 Y = Builder.CreateAdd(Y, SignY);
1421 X = Builder.CreateXor(X, SignX);
1422 Y = Builder.CreateXor(Y, SignY);
1457 Value *FloatY = Builder.CreateUIToFP(Y, F32Ty);
1459 Value *RcpY = Builder.CreateCall(Rcp, {FloatY});
1461 Value *ScaledY = Builder.CreateFMul(RcpY, Scale);
1462 Value *Z = Builder.CreateFPToUI(ScaledY, I32Ty);
1465 Value *NegY = Builder.CreateSub(Zero, Y);
1466 Value *NegYZ = Builder.CreateMul(NegY, Z);
1467 Z = Builder.CreateAdd(Z, getMulHu(Builder, Z, NegYZ));
1470 Value *Q = getMulHu(Builder, X, Z);
1471 Value *R = Builder.CreateSub(X, Builder.CreateMul(Q, Y));
1474 Value *Cond = Builder.CreateICmpUGE(R, Y);
1476 Q = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
1477 R = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
1480 Cond = Builder.CreateICmpUGE(R, Y);
1483 Res = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
1485 Res = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
1488 Res = Builder.CreateXor(Res, Sign);
1489 Res = Builder.CreateSub(Res, Sign);
1490 Res = Builder.CreateSExtOrTrunc(Res, Ty);
1492 Res = Builder.CreateZExtOrTrunc(Res, Ty);
1497 Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
1514 Narrowed = expandDivRem24Impl(Builder, I, Num, Den, NumDivBits,
1517 Narrowed = expandDivRem32(Builder, I, Num, Den);
1521 return IsSigned ? Builder.CreateSExt(Narrowed, Num->getType()) :
1522 Builder.CreateZExt(Narrowed, Num->getType());
1569 IRBuilder<> Builder(&I);
1570 Builder.SetCurrentDebugLocation(I.getDebugLoc());
1576 Value *NumEltN = Builder.CreateExtractElement(Num, N);
1577 Value *DenEltN = Builder.CreateExtractElement(Den, N);
1581 NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN);
1583 NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
1587 NewElt = shrinkDivRem64(Builder, I, NumEltN, DenEltN);
1592 NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
1600 NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
1604 NewDiv = expandDivRem32(Builder, I, Num, Den);
1606 NewDiv = shrinkDivRem64(Builder, I, Num, Den);
1638 IRBuilder<> Builder(&I);
1639 Builder.SetCurrentDebugLocation(I.getDebugLoc());
1641 Type *I32Ty = Builder.getInt32Ty();
1642 LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, I.getPointerOperand());
1666 Type *IntNTy = Builder.getIntNTy(TySize);
1667 Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
1668 Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
1708 IRBuilder<> Builder(&I);
1709 Builder.setFastMathFlags(FPOp->getFastMathFlags());
1718 Fract = applyFractPat(Builder, CmpVal);
1722 Fract = applyFractPat(Builder, CmpVal);
2159 Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
2162 extractValues(Builder, FractVals, FractArg);
2169 Builder.CreateIntrinsic(Intrinsic::amdgcn_fract, {Ty}, {FractVals[I]});
2172 return insertValues(Builder, FractArg->getType(), ResultVals);
2186 IRBuilder<> Builder(&I);
2189 Builder.setFastMathFlags(FMF);
2191 Value *Fract = applyFractPat(Builder, FractArg);
2244 IRBuilder<> Builder(&Sqrt);
2246 extractValues(Builder, SrcVals, SrcVal);
2251 ResultVals[I] = Builder.CreateCall(getSqrtF32(), SrcVals[I]);
2253 ResultVals[I] = emitSqrtIEEE2ULP(Builder, SrcVals[I], SqrtFMF);
2256 Value *NewSqrt = insertValues(Builder, Sqrt.getType(), ResultVals);