Lines Matching defs:Builder

261   Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I,
265 Value *expandDivRem24Impl(IRBuilder<> &Builder, BinaryOperator &I,
270 Value* expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I,
273 Value *shrinkDivRem64(IRBuilder<> &Builder, BinaryOperator &I,
288 Value *applyFractPat(IRBuilder<> &Builder, Value *FractArg);
293 Value *optimizeWithRsq(IRBuilder<> &Builder, Value *Num, Value *Den,
297 Value *optimizeWithRcp(IRBuilder<> &Builder, Value *Num, Value *Den,
299 Value *optimizeWithFDivFast(IRBuilder<> &Builder, Value *Num, Value *Den,
302 Value *visitFDivElement(IRBuilder<> &Builder, Value *Num, Value *Den,
307 std::pair<Value *, Value *> getFrexpResults(IRBuilder<> &Builder,
310 Value *emitRcpIEEE1ULP(IRBuilder<> &Builder, Value *Src,
312 Value *emitFrexpDiv(IRBuilder<> &Builder, Value *LHS, Value *RHS,
314 Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src,
482 IRBuilder<> Builder(&I);
483 Builder.SetCurrentDebugLocation(I.getDebugLoc());
485 Type *I32Ty = getI32Ty(Builder, I.getType());
492 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
493 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
495 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
496 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
499 ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
511 TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
523 IRBuilder<> Builder(&I);
524 Builder.SetCurrentDebugLocation(I.getDebugLoc());
526 Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
532 ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
533 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
535 ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
536 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
538 NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
550 IRBuilder<> Builder(&I);
551 Builder.SetCurrentDebugLocation(I.getDebugLoc());
553 Type *I32Ty = getI32Ty(Builder, I.getType());
560 ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
561 ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
563 ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
564 ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
566 ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
567 TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
582 IRBuilder<> Builder(&I);
583 Builder.SetCurrentDebugLocation(I.getDebugLoc());
585 Type *I32Ty = getI32Ty(Builder, I.getType());
586 Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
588 Builder.CreateIntrinsic(Intrinsic::bitreverse, {I32Ty}, {ExtOp});
590 Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
592 Builder.CreateTrunc(LShrOp, I.getType());
608 static void extractValues(IRBuilder<> &Builder,
617 Values.push_back(Builder.CreateExtractElement(V, I));
620 static Value *insertValues(IRBuilder<> &Builder,
630 NewVal = Builder.CreateInsertElement(NewVal, Values[I], I);
650 IRBuilder<> Builder(&I);
651 Builder.SetCurrentDebugLocation(I.getDebugLoc());
670 extractValues(Builder, LHSVals, LHS);
671 extractValues(Builder, RHSVals, RHS);
673 IntegerType *I32Ty = Builder.getInt32Ty();
674 IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
678 Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
679 : Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
680 Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
681 : Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
684 Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
685 Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
686 : Builder.CreateZExtOrTrunc(Result, DstTy);
690 Value *NewVal = insertValues(Builder, Ty, ResultVals);
758 IRBuilder<> Builder(&BO);
759 Builder.SetCurrentDebugLocation(BO.getDebugLoc());
761 Builder.setFastMathFlags(FPOp->getFastMathFlags());
763 Value *NewSelect = Builder.CreateSelect(Sel->getCondition(),
775 AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
778 Value *Frexp = Builder.CreateIntrinsic(Intrinsic::frexp,
779 {Ty, Builder.getInt32Ty()}, Src);
780 Value *FrexpMant = Builder.CreateExtractValue(Frexp, {0});
788 ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
789 {Builder.getInt32Ty(), Ty}, Src)
790 : Builder.CreateExtractValue(Frexp, {1});
795 Value *AMDGPUCodeGenPrepareImpl::emitRcpIEEE1ULP(IRBuilder<> &Builder,
801 Src = Builder.CreateFNeg(Src);
812 auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
813 Value *ScaleFactor = Builder.CreateNeg(FrexpExp);
814 Value *Rcp = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMant);
815 return Builder.CreateCall(getLdexpF32(), {Rcp, ScaleFactor});
819 Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
831 auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder, RHS);
834 Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMantRHS);
836 auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder, LHS);
837 Value *Mul = Builder.CreateFMul(FrexpMantLHS, Rcp);
841 Value *ExpDiff = Builder.CreateSub(FrexpExpLHS, FrexpExpRHS);
842 return Builder.CreateCall(getLdexpF32(), {Mul, ExpDiff});
846 Value *AMDGPUCodeGenPrepareImpl::emitSqrtIEEE2ULP(IRBuilder<> &Builder,
853 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
855 ConstantInt *Zero = Builder.getInt32(0);
857 Builder.CreateSelect(NeedScale, Builder.getInt32(32), Zero);
859 Value *Scaled = Builder.CreateCall(getLdexpF32(), {Src, InputScaleFactor});
861 Value *Sqrt = Builder.CreateCall(getSqrtF32(), Scaled);
864 Builder.CreateSelect(NeedScale, Builder.getInt32(-16), Zero);
865 return Builder.CreateCall(getLdexpF32(), {Sqrt, OutputScaleFactor});
869 static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
880 Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
886 Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
888 Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
889 Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
890 Value *OutputScaleFactor = Builder.CreateSelect(
893 return Builder.CreateFMul(Rsq, OutputScaleFactor);
909 IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
928 IRBuilder<>::FastMathFlagGuard Guard(Builder);
929 Builder.setFastMathFlags(DivFMF | SqrtFMF);
933 Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
935 return IsNegative ? Builder.CreateFNeg(Result) : Result;
938 return emitRsqIEEE1ULP(Builder, Den, IsNegative);
951 AMDGPUCodeGenPrepareImpl::optimizeWithRcp(IRBuilder<> &Builder, Value *Num,
968 Src = Builder.CreateFNeg(Src);
981 return Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Src);
986 return emitRcpIEEE1ULP(Builder, Src, IsNegative);
996 Value *Recip = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Den);
997 return Builder.CreateFMul(Num, Recip);
1000 Value *Recip = emitRcpIEEE1ULP(Builder, Den, false);
1001 return Builder.CreateFMul(Num, Recip);
1015 IRBuilder<> &Builder, Value *Num, Value *Den, float ReqdAccuracy) const {
1036 return Builder.CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {}, {Num, Den});
1040 IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
1045 optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
1050 Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
1058 Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
1062 return emitFrexpDiv(Builder, Num, Den, DivFMF);
1127 IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
1128 Builder.setFastMathFlags(DivFMF);
1129 Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
1134 extractValues(Builder, NumVals, Num);
1135 extractValues(Builder, DenVals, Den);
1138 extractValues(Builder, RsqDenVals, RsqOp);
1147 visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
1154 NewElt = Builder.CreateFDiv(NumElt, DenElt);
1162 Value *NewVal = insertValues(Builder, FDiv.getType(), ResultVals);
1173 static std::pair<Value*, Value*> getMul64(IRBuilder<> &Builder,
1175 Type *I32Ty = Builder.getInt32Ty();
1176 Type *I64Ty = Builder.getInt64Ty();
1178 Value *LHS_EXT64 = Builder.CreateZExt(LHS, I64Ty);
1179 Value *RHS_EXT64 = Builder.CreateZExt(RHS, I64Ty);
1180 Value *MUL64 = Builder.CreateMul(LHS_EXT64, RHS_EXT64);
1181 Value *Lo = Builder.CreateTrunc(MUL64, I32Ty);
1182 Value *Hi = Builder.CreateLShr(MUL64, Builder.getInt64(32));
1183 Hi = Builder.CreateTrunc(Hi, I32Ty);
1187 static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
1188 return getMul64(Builder, LHS, RHS).second;
1238 Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
1245 return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
1249 IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den,
1251 Type *I32Ty = Builder.getInt32Ty();
1252 Num = Builder.CreateTrunc(Num, I32Ty);
1253 Den = Builder.CreateTrunc(Den, I32Ty);
1255 Type *F32Ty = Builder.getFloatTy();
1256 ConstantInt *One = Builder.getInt32(1);
1261 JQ = Builder.CreateXor(Num, Den);
1264 JQ = Builder.CreateAShr(JQ, Builder.getInt32(30));
1267 JQ = Builder.CreateOr(JQ, One);
1277 Value *FA = IsSigned ? Builder.CreateSIToFP(IA, F32Ty)
1278 : Builder.CreateUIToFP(IA, F32Ty);
1281 Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty)
1282 : Builder.CreateUIToFP(IB,F32Ty);
1284 Value *RCP = Builder.CreateIntrinsic(Intrinsic::amdgcn_rcp,
1285 Builder.getFloatTy(), {FB});
1286 Value *FQM = Builder.CreateFMul(FA, RCP);
1289 CallInst *FQ = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, FQM);
1290 FQ->copyFastMathFlags(Builder.getFastMathFlags());
1293 Value *FQNeg = Builder.CreateFNeg(FQ);
1299 Value *FR = Builder.CreateIntrinsic(FMAD,
1303 Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty)
1304 : Builder.CreateFPToUI(FQ, I32Ty);
1307 FR = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FR, FQ);
1310 FB = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FB, FQ);
1313 Value *CV = Builder.CreateFCmpOGE(FR, FB);
1316 JQ = Builder.CreateSelect(CV, JQ, Builder.getInt32(0));
1319 Value *Div = Builder.CreateAdd(IQ, JQ);
1324 Value *Rem = Builder.CreateMul(Div, Den);
1325 Res = Builder.CreateSub(Num, Rem);
1333 Res = Builder.CreateShl(Res, InRegBits);
1334 Res = Builder.CreateAShr(Res, InRegBits);
1337 = Builder.getInt32((UINT64_C(1) << DivBits) - 1);
1338 Res = Builder.CreateAnd(Res, TruncMask);
1382 static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout DL) {
1389 return Builder.CreateAShr(V, Builder.getInt32(31));
1392 Value *AMDGPUCodeGenPrepareImpl::expandDivRem32(IRBuilder<> &Builder,
1401 Builder.setFastMathFlags(FMF);
1410 Type *I32Ty = Builder.getInt32Ty();
1411 Type *F32Ty = Builder.getFloatTy();
1415 X = Builder.CreateSExtOrTrunc(X, I32Ty);
1416 Y = Builder.CreateSExtOrTrunc(Y, I32Ty);
1418 X = Builder.CreateZExtOrTrunc(X, I32Ty);
1419 Y = Builder.CreateZExtOrTrunc(Y, I32Ty);
1423 if (Value *Res = expandDivRem24(Builder, I, X, Y, IsDiv, IsSigned)) {
1424 return IsSigned ? Builder.CreateSExtOrTrunc(Res, Ty) :
1425 Builder.CreateZExtOrTrunc(Res, Ty);
1428 ConstantInt *Zero = Builder.getInt32(0);
1429 ConstantInt *One = Builder.getInt32(1);
1433 Value *SignX = getSign32(X, Builder, DL);
1434 Value *SignY = getSign32(Y, Builder, DL);
1436 Sign = IsDiv ? Builder.CreateXor(SignX, SignY) : SignX;
1438 X = Builder.CreateAdd(X, SignX);
1439 Y = Builder.CreateAdd(Y, SignY);
1441 X = Builder.CreateXor(X, SignX);
1442 Y = Builder.CreateXor(Y, SignY);
1477 Value *FloatY = Builder.CreateUIToFP(Y, F32Ty);
1478 Value *RcpY = Builder.CreateIntrinsic(Intrinsic::amdgcn_rcp, F32Ty, {FloatY});
1480 Value *ScaledY = Builder.CreateFMul(RcpY, Scale);
1481 Value *Z = Builder.CreateFPToUI(ScaledY, I32Ty);
1484 Value *NegY = Builder.CreateSub(Zero, Y);
1485 Value *NegYZ = Builder.CreateMul(NegY, Z);
1486 Z = Builder.CreateAdd(Z, getMulHu(Builder, Z, NegYZ));
1489 Value *Q = getMulHu(Builder, X, Z);
1490 Value *R = Builder.CreateSub(X, Builder.CreateMul(Q, Y));
1493 Value *Cond = Builder.CreateICmpUGE(R, Y);
1495 Q = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
1496 R = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
1499 Cond = Builder.CreateICmpUGE(R, Y);
1502 Res = Builder.CreateSelect(Cond, Builder.CreateAdd(Q, One), Q);
1504 Res = Builder.CreateSelect(Cond, Builder.CreateSub(R, Y), R);
1507 Res = Builder.CreateXor(Res, Sign);
1508 Res = Builder.CreateSub(Res, Sign);
1509 Res = Builder.CreateSExtOrTrunc(Res, Ty);
1511 Res = Builder.CreateZExtOrTrunc(Res, Ty);
1516 Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
1533 Narrowed = expandDivRem24Impl(Builder, I, Num, Den, NumDivBits,
1536 Narrowed = expandDivRem32(Builder, I, Num, Den);
1540 return IsSigned ? Builder.CreateSExt(Narrowed, Num->getType()) :
1541 Builder.CreateZExt(Narrowed, Num->getType());
1588 IRBuilder<> Builder(&I);
1589 Builder.SetCurrentDebugLocation(I.getDebugLoc());
1595 Value *NumEltN = Builder.CreateExtractElement(Num, N);
1596 Value *DenEltN = Builder.CreateExtractElement(Den, N);
1600 NewElt = expandDivRem32(Builder, I, NumEltN, DenEltN);
1602 NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
1606 NewElt = shrinkDivRem64(Builder, I, NumEltN, DenEltN);
1611 NewElt = Builder.CreateBinOp(Opc, NumEltN, DenEltN);
1619 NewDiv = Builder.CreateInsertElement(NewDiv, NewElt, N);
1623 NewDiv = expandDivRem32(Builder, I, Num, Den);
1625 NewDiv = shrinkDivRem64(Builder, I, Num, Den);
1657 IRBuilder<> Builder(&I);
1658 Builder.SetCurrentDebugLocation(I.getDebugLoc());
1660 Type *I32Ty = Builder.getInt32Ty();
1661 LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, I.getPointerOperand());
1685 Type *IntNTy = Builder.getIntNTy(TySize);
1686 Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
1687 Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
1727 IRBuilder<> Builder(&I);
1728 Builder.setFastMathFlags(FPOp->getFastMathFlags());
1737 Fract = applyFractPat(Builder, CmpVal);
1741 Fract = applyFractPat(Builder, CmpVal);
2178 Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
2181 extractValues(Builder, FractVals, FractArg);
2188 Builder.CreateIntrinsic(Intrinsic::amdgcn_fract, {Ty}, {FractVals[I]});
2191 return insertValues(Builder, FractArg->getType(), ResultVals);
2205 IRBuilder<> Builder(&I);
2208 Builder.setFastMathFlags(FMF);
2210 Value *Fract = applyFractPat(Builder, FractArg);
2263 IRBuilder<> Builder(&Sqrt);
2265 extractValues(Builder, SrcVals, SrcVal);
2270 ResultVals[I] = Builder.CreateCall(getSqrtF32(), SrcVals[I]);
2272 ResultVals[I] = emitSqrtIEEE2ULP(Builder, SrcVals[I], SqrtFMF);
2275 Value *NewSqrt = insertValues(Builder, Sqrt.getType(), ResultVals);