Lines Matching +full:non +full:- +full:interleaved

1 //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
26 "riscv-v-register-bit-width-lmul",
33 "riscv-v-slp-max-vf",
48 InstructionCost LMULCost = TLI->getLMULCost(VT);
55 Cost += TLI->getVRGatherVICost(VT);
58 Cost += TLI->getVRGatherVVCost(VT);
62 Cost += TLI->getVSlideVICost(VT);
66 Cost += TLI->getVSlideVXCost(VT);
117 assert(Ty->isIntegerTy() &&
139 auto *BO = dyn_cast<BinaryOperator>(Inst->getOperand(0));
140 if (!BO || !BO->hasOneUse())
143 if (BO->getOpcode() != Instruction::Shl)
146 if (!isa<ConstantInt>(BO->getOperand(1)))
149 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
165 assert(Ty->isIntegerTy() &&
172 // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
193 if (!getTLI()->allowsMemoryAccessForAlignment(
194 Ty->getContext(), DL, getTLI()->getValueType(DL, Ty),
195 ST->getPointerAddressSpace(), ST->getAlign()))
206 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
209 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
212 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
214 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
225 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
234 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
253 // ... and fits into the 12-bit immediate.
255 getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
277 return ST->hasVInstructions();
283 return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
289 // Currently, the ExpandReductions pass can't expand scalable-vector
292 switch (II->getIntrinsicID()) {
303 if (ST->hasVInstructions())
304 return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;
309 if (ST->hasVInstructions())
310 if (unsigned MinVLen = ST->getRealMinVLen();
322 return TypeSize::getFixed(ST->getXLen());
325 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
328 (ST->hasVInstructions() &&
329 ST->getRealMinVLen() >= RISCV::RVVBitsPerBlock)
379 if (EltTp.getScalarSizeInBits() < ST->getELen()) {
383 // li a0, -1 (ignored)
386 return 2 * LT.first * TLI->getLMULCost(LT.second);
403 VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
418 auto &C = Tp->getContext();
419 auto EC = Tp->getElementCount();
438 Tp->getElementType()->getPrimitiveSizeInBits() &&
440 cast<FixedVectorType>(Tp)->getNumElements() &&
442 cast<FixedVectorType>(Tp)->getNumElements()) ==
445 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
447 auto *SubVecTy = FixedVectorType::get(Tp->getElementType(), SubVF);
454 I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
483 // If we're extracting a subvector of at most m1 size at a sub-register
484 // boundary - which unfortunately we need exact vlen to identify - this is
491 const unsigned MinVLen = ST->getRealMinVLen();
492 const unsigned MaxVLen = ST->getRealMaxVLen();
573 else if (Index < 0 && Index > -32)
587 // addi a0, a0, -1
595 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
598 isInt<5>(LT.second.getVectorNumElements() - 1))
603 InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
629 // The interleaved memory access pass will lower interleaved memory ops (i.e
634 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
640 VectorType::get(VTy->getElementType(),
641 VTy->getElementCount().divideCoefficientBy(Factor));
643 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
644 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
650 auto *LegalVTy = VectorType::get(VTy->getElementType(),
659 // TODO: Return the cost of interleaved accesses for scalable vector when
667 unsigned VF = FVTy->getNumElements() / Factor;
669 // An interleaved load will look like this for Factor=3:
678 FixedVectorType::get(FVTy->getElementType(), VF * Factor);
692 // An interleaved store for 3 vectors of 4 lanes will look like
696 // %interleaved.vec = shufflevector %13, poison, <12 x i32> <interleave mask>
697 // store <12 x i32> %interleaved.vec, ptr %10, align 4
849 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
858 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
861 if (ST->hasVInstructions() && LT.second.isVector()) {
888 if (ST->hasVInstructions() && LT.second.isVector())
894 if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector())
900 if (ST->hasVInstructions() && LT.second.isVector()) {
908 if (ST->hasVInstructions()) {
910 ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
926 // (LT.first - 1) vector adds.
927 if (ST->hasVInstructions())
929 (LT.first - 1) *
931 return 1 + (LT.first - 1);
935 EVT ArgType = TLI->getValueType(DL, ArgTy, true);
936 if (getTLI()->shouldExpandCttzElements(ArgType))
942 // cmp + select instructions to convert -1 to EVL.
943 Type *BoolTy = Type::getInt1Ty(RetTy->getContext());
945 cast<ConstantInt>(ICA.getArgs()[1])->isZero())
954 // RISC-V target uses at least 5 instructions to lower rounding intrinsics.
957 if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
965 if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
979 if (TLI->isOperationCustom(VPISD, LT.second))
1011 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1017 return LT.first * Entry->Cost;
1034 (Src->getScalarSizeInBits() <= ST->getELen()) &&
1035 (Dst->getScalarSizeInBits() <= ST->getELen());
1044 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1047 int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
1048 (int)Log2_32(Src->getScalarSizeInBits());
1052 const unsigned SrcEltSize = Src->getScalarSizeInBits();
1057 // vmerge.vim v8, v8, -1, v0
1066 (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];
1070 if (Dst->getScalarSizeInBits() == 1) {
1083 unsigned SrcEltSize = Src->getScalarSizeInBits();
1084 unsigned DstEltSize = Dst->getScalarSizeInBits();
1105 if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
1110 // vmerge.vim v8, v8, -1, v0
1123 if (Src->isIntOrIntVectorTy())
1133 const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
1138 return cast<FixedVectorType>(Ty)->getNumElements();
1145 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1149 if (Ty->getScalarSizeInBits() > ST->getELen())
1153 if (Ty->getElementType()->isIntegerTy(1)) {
1155 // vector_reduce_{smin,umax}(<n x i1>) --> vector_reduce_or(<n x i1>)
1156 // vector_reduce_{smax,umin}(<n x i1>) --> vector_reduce_and(<n x i1>)
1176 Type *DstTy = Ty->getScalarType();
1177 const unsigned EltTyBits = DstTy->getScalarSizeInBits();
1178 Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
1195 Type *DstTy = Ty->getScalarType();
1197 Type *SrcTy = IntegerType::getIntNTy(DstTy->getContext(), EltTyBits);
1241 (LT.first > 1) ? (LT.first - 1) *
1251 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1255 if (Ty->getScalarSizeInBits() > ST->getELen())
1258 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1267 Type *ElementTy = Ty->getElementType();
1268 if (ElementTy->isIntegerTy(1)) {
1276 return (LT.first - 1) +
1286 return (LT.first - 1) +
1326 (LT.first > 1) ? (LT.first - 1) *
1335 if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
1340 if (ResTy->getScalarSizeInBits() > ST->getELen())
1350 if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
1354 return (LT.first - 1) +
1361 assert(OpInfo.isConstant() && "non constant operand?");
1384 EVT VT = TLI->getValueType(DL, Src, true);
1401 BaseCost *= TLI->getLMULCost(LT.second);
1415 if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
1420 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
1425 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
1426 if (CondTy->isVectorTy()) {
1427 if (ValTy->getScalarSizeInBits() == 1) {
1441 if (ValTy->getScalarSizeInBits() == 1) {
1464 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
1472 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
1483 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
1484 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
1485 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
1525 if (ST->hasConditionalMoveFusion() && I && isa<ICmpInst>(I) &&
1526 ValTy->isIntegerTy() && !I->user_empty()) {
1527 if (all_of(I->users(), [&](const User *U) {
1529 U->getType()->isIntegerTy() &&
1530 !isa<ConstantData>(U->getOperand(1)) &&
1531 !isa<ConstantData>(U->getOperand(2));
1554 assert(Val->isVectorTy() && "This must be a vector type");
1567 if (Index != -1U)
1569 // Extract/InsertElement with non-constant index is very costly when
1573 Type *ElemTy = FixedVecTy->getElementType();
1574 auto NumElems = FixedVecTy->getNumElements();
1593 if (Val->getScalarSizeInBits() == 1) {
1595 VectorType::get(IntegerType::get(Val->getContext(), 8),
1596 cast<VectorType>(Val)->getElementCount());
1623 if (Index != -1U) {
1624 // The type may be split. For fixed-width vectors we can normalize the
1639 if (Val->getScalarType()->isIntegerTy() &&
1640 ST->getXLen() < Val->getScalarSizeInBits()) {
1674 if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
1679 if (isa<VectorType>(Ty) && Ty->getScalarSizeInBits() > ST->getELen())
1692 [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
1693 if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
1694 // Two sub-cases:
1712 switch (TLI->InstructionOpcodeToISD(Opcode)) {
1725 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
1767 if (Ty->isFPOrFPVectorTy())
1781 // pointer). Typically, if Base is a not a GEP-instruction and all the
1784 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1785 // any their index is a non-const.
1793 if (GEP->hasAllConstantIndices())
1795 // If the chain is unit-stride and BaseReg + stride*i is a legal
1806 GEP->getType()->getPointerAddressSpace()))
1808 Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
1813 SmallVector<const Value *> Indices(GEP->indices());
1814 Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
1828 if (ST->enableDefaultUnroll())
1838 if (L->getHeader()->getParent()->hasOptSize())
1842 L->getExitingBlocks(ExitingBlocks);
1844 << "Blocks: " << L->getNumBlocks() << "\n"
1853 // Allowing 4 blocks permits if-then-else diamonds in the body.
1854 if (L->getNumBlocks() > 4)
1864 for (auto *BB : L->getBlocks()) {
1866 // Initial setting - Don't unroll loops containing vectorized
1868 if (I.getType()->isVectorTy())
1906 if (Ty->isVectorTy()) {
1907 if (Size.isScalable() && ST->hasVInstructions())
1910 if (ST->useRVVForFixedLengthVectors())
1911 return divideCeil(Size, ST->getRealMinVLen());
1935 // RISC-V specific here are "instruction number 1st priority".
1950 if (!VTy || VTy->isScalableTy())
1960 const TargetMachine &TM = getTLI()->getTargetMachine();
1963 TM.getSubtargetImpl(*Caller)->getFeatureBits();
1965 TM.getSubtargetImpl(*Callee)->getFeatureBits();
1967 // Inline a callee if its target-features are a subset of the callers
1968 // target-features.