Lines Matching +full:segment +full:- +full:no +full:- +full:remap
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
34 #define DEBUG_TYPE "hexagon-isel"
39 // --------------------------------------------------------------------
43 // - Forward delta.
44 // - Reverse delta.
45 // - Benes.
56 // positions as long as there is no conflict.
62 // |- 1 ---------------|- 2 -----|- 3 -|
111 static constexpr Node Ignore = Node(-1);
141 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; in conj()
146 return F != Colors.end() ? F->second : ColorKind::None; in getColor()
282 dbgs() << " -"; in dump()
292 dbgs() << " " << E.first << " -> {"; in dump()
313 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n"; in dump()
325 static constexpr ElemType Ignore = ElemType(-1);
358 W |= C << (Log-1-L); in getControls()
451 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2); in route()
463 P[J] -= Num/2; in route()
476 unsigned Pets = Log-1 - Step; in route()
480 // In this step half-switching occurs, so coloring can be used. in route()
535 P[J] -= Num/2; in route()
554 unsigned Pets = 2*Log-1 - Step; in route()
557 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will in route()
571 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; in route()
606 P[J] -= Num/2; in route()
617 // --------------------------------------------------------------------
678 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {} in ResultStack()
683 return List.size()-1; in push()
694 unsigned top() const { return size()-1; } in top()
717 OpV.getNode()->print(OS, &G); in print()
755 InpNode->dumpr(&G); in print()
770 if (M == -1) in ShuffleMask()
772 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); in ShuffleMask()
773 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M); in ShuffleMask()
778 int MinSrc = -1, MaxSrc = -1;
862 // clang-format off in vpack()
865 // clang-format on in vpack()
886 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size); in vdeal()
903 auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT { in mask()
912 // --------------------------------------------------------------------
941 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs. in getPairVT()
1008 MaskL[I] = MaskR[I] = -1; in splitMask()
1011 MaskR[I] = -1; in splitMask()
1013 MaskL[I] = -1; in splitMask()
1014 MaskR[I] = M-VecLen; in splitMask()
1025 if (A[I] - E != Inc) in findStrip()
1034 if (Idx != -1) in isUndef()
1051 // Check if the second half of the mask is all-undef. in isLowHalfOnly()
1059 if (SM.MaxSrc == -1) in getInputSegmentList()
1080 // segments, where the first output segment has only elements of the in getOutputSegmentMap()
1081 // input segment at index 1. The next output segment only has elements in getOutputSegmentMap()
1082 // of the input segment 3, etc. in getOutputSegmentMap()
1083 // If an output segment only has undef elements, the value will be ~0u. in getOutputSegmentMap()
1084 // If an output segment has elements from more than one input segment, in getOutputSegmentMap()
1096 unsigned G = M / SegLen; // Input segment of this element. in getOutputSegmentMap()
1113 for (int I = OutSegMap.size() - 1; I >= 0; --I) { in packSegmentMask()
1127 M = (M & (SegLen-1)) + SegLen*OutIdx; in packSegmentMask()
1146 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL) in selectVectorConstants()
1148 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j) in selectVectorConstants()
1149 WorkQ.insert(W->getOperand(j).getNode()); in selectVectorConstants()
1212 OutN->dumpr(&DAG); in materialize()
1249 Amount -= VecLen; in funnels()
1256 } else if (isUInt<3>(VecLen - Amount)) { in funnels()
1257 SDValue A = getConst32(VecLen - Amount, dl); in funnels()
1262 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)}); in funnels()
1268 // pack these halves into a single vector, and remap SM into NewMask to use
1297 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) { in packs()
1299 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl); in packs()
1310 // Segment is a vector half. in packs()
1326 // (i.e. a multiple of 2*SegLen), and non-zero. in packs()
1327 // The output segment map is computed, and it may have any even number of in packs()
1329 // on the first two (non-undef) entries in the segment map. in packs()
1356 M -= SrcOp * HwLen; in packs()
1369 // If Seg0 or Seg1 are "multi-defined", pick them from the input in packs()
1370 // segment list instead. in packs()
1402 // vshuff(CD,AB,HL) -> BD:AC in packs()
1403 // vshuff(AB,CD,HL) -> DB:CA in packs()
1438 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) { in packs()
1443 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) { in packs()
1451 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) { in packs()
1456 ShiftR -= HwLen; in packs()
1462 if (M != -1) in packs()
1463 M -= SMA.MinSrc; in packs()
1469 // By here, packing by segment (half-vector) shuffling, and vector alignment in packs()
1482 if (M == -1) in packs()
1485 M -= HwLen; in packs()
1501 // pack these vectors into a pair, and remap SM into NewMask to use the
1534 // and so there is no extra cost added in case the order needs to be in packp()
1548 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)}); in vmuxs()
1549 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va}); in vmuxs()
1567 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); })); in shuffs1()
1634 if (MaskL[I] != -1) in shuffs2()
1664 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half in shuffp1()
1665 // is all-undef) may produce a perfect shuffle that generates legitimate in shuffp1()
1706 if (MaskL[I] != -1) in shuffp2()
1733 *F->second = nullptr; in erase()
1749 assert(ISelN->getOpcode() == HexagonISD::ISEL); in select()
1750 SDNode *N0 = ISelN->getOperand(0).getNode(); in select()
1759 if (!N0->isMachineOpcode()) { in select()
1762 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; }; in select()
1763 if (llvm::all_of(N0->uses(), IsISelN)) in select()
1777 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool { in select()
1780 if (T->use_empty() || NonDom.count(T)) in select()
1782 for (SDNode *U : T->uses()) { in select()
1783 // If T is reachable from a known non-dominated node, then T itself in select()
1784 // is non-dominated. in select()
1798 for (SDValue Op : SubNodes[I]->ops()) { in select()
1810 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) { in select()
1821 for (SDNode *U : S->uses()) { in select()
1826 if (F->second > 0 && !--F->second) in select()
1827 TmpQ.insert(F->first); in select()
1841 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);}); in select()
1883 M -= VecLen; in scalarizeShuffle()
1890 M -= HwLen; in scalarizeShuffle()
1916 assert(!N->use_empty()); in scalarizeShuffle()
1926 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t { in getPerfectCompletions()
1963 // by P and Q are either equal or disjoint (no partial overlap). in getPerfectCompletions()
1965 // Illustration: For 4-bit values there are 4 complete sequences: in getPerfectCompletions()
2027 uint32_t T = P ^ ((P - 1) & P); in completeToPerfect()
2043 assert(OrAll == (1u << Width) -1); in completeToPerfect()
2062 // Add WrapAt in an attempt to keep I+Dist non-negative. in rotationDistance()
2063 Dist = M - I; in rotationDistance()
2098 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool { in contracting()
2143 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb in contracting()
2144 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such in contracting()
2157 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)}); in contracting()
2158 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)}); in contracting()
2183 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so in expanding()
2189 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type"); in expanding()
2196 // vunpacku: 0, -1, L, -1, 2L, -1 ... in expanding()
2201 // The vunpackus only handle byte and half-word. in expanding()
2208 // First, check the non-ignored strips. in expanding()
2210 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); in expanding()
2216 // Check the -1s. in expanding()
2218 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I); in expanding()
2219 if (S.first != -1 || S.second != unsigned(L)) in expanding()
2258 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*] in perfect()
2273 // matrices (or "sub-matrices"), given a specific group size. For example, in perfect()
2335 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen; in perfect()
2350 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be in perfect()
2353 // order being from left to right. Any (contiguous) segment where the in perfect()
2381 // If the cycle contains LogLen-1, move it to the front of the cycle. in perfect()
2383 auto canonicalize = [LogLen](const CycleType &C) -> CycleType { in perfect()
2386 if (C[LogPos] == LogLen - 1) in perfect()
2402 if (C[0] != Len - 1) in perfect()
2404 int D = Len - C.size(); in perfect()
2409 for (unsigned I = 1; I != Len - D; ++I) { in perfect()
2410 if (C[I] != Len - 1 - I) in perfect()
2412 if (C[I] != I - (1 - D)) // I-1, I in perfect()
2415 // At most one, IsDeal or IsShuff, can be non-zero. in perfect()
2449 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5 in perfect()
2463 // artificially add "LogLen-1" at both ends of the sequence. in perfect()
2465 SwapElems.push_back(LogLen - 1); in perfect()
2467 // Do the transformation: (a1..an) -> (M a1..an)(M a1). in perfect()
2468 unsigned First = (C[0] == LogLen - 1) ? 1 : 0; in perfect()
2474 SwapElems.push_back(LogLen - 1); in perfect()
2482 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1]; in perfect()
2484 if (I < E - 1) { in perfect()
2485 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1])) in perfect()
2497 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)}; in perfect()
2523 if (M != -1 && M >= VecLen) in butterfly()
2550 {OpRef::res(-1), OpRef(CtlR)}); in butterfly()
2574 SDValue Inp = N->getOperand(0); in selectExtractSubvector()
2575 MVT ResTy = N->getValueType(0).getSimpleVT(); in selectExtractSubvector()
2576 unsigned Idx = N->getConstantOperandVal(1); in selectExtractSubvector()
2593 N->dump(&DAG); in selectShuffle()
2595 MVT ResTy = N->getValueType(0).getSimpleVT(); in selectShuffle()
2600 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end()); in selectShuffle()
2603 if (Idx != -1 && Idx < 0) in selectShuffle()
2604 Idx = -1; in selectShuffle()
2615 if (Mask[I] == -1) in selectShuffle()
2630 // If the mask is all -1's, generate "undef". in selectShuffle()
2636 SDValue Vec0 = N->getOperand(0); in selectShuffle()
2637 SDValue Vec1 = N->getOperand(1); in selectShuffle()
2668 SN->dumpr(&DAG); in selectShuffle()
2676 MVT Ty = N->getValueType(0).getSimpleVT(); in selectRor()
2678 SDValue VecV = N->getOperand(0); in selectRor()
2679 SDValue RotV = N->getOperand(1); in selectRor()
2683 unsigned S = CN->getZExtValue() % HST.getVectorLength(); in selectRor()
2699 SDValue Vv = N->getOperand(0); in selectVAlign()
2700 SDValue Vu = N->getOperand(1); in selectVAlign()
2701 SDValue Rt = N->getOperand(2); in selectVAlign()
2703 N->getValueType(0), {Vv, Vu, Rt}); in selectVAlign()
2709 auto getNodes = [this]() -> std::vector<SDNode *> { in PreprocessHvxISelDAG()
2711 T.reserve(CurDAG->allnodes_size()); in PreprocessHvxISelDAG()
2712 for (SDNode &N : CurDAG->allnodes()) in PreprocessHvxISelDAG()
2738 unsigned HwLen = HST->getVectorLength(); in ppHvxShuffleOfShuffle()
2750 const MapType &OpMap) -> int { in ppHvxShuffleOfShuffle()
2752 // Idx as a (non-undef) element of the top level shuffle's mask, that in ppHvxShuffleOfShuffle()
2759 Idx -= HwLen; in ppHvxShuffleOfShuffle()
2762 int MaybeN = OpShuff->getMaskElt(Idx); in ppHvxShuffleOfShuffle()
2764 return -1; in ppHvxShuffleOfShuffle()
2767 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0)) in ppHvxShuffleOfShuffle()
2768 : OpMap.at(OpShuff->getOperand(1)); in ppHvxShuffleOfShuffle()
2770 N -= HwLen; in ppHvxShuffleOfShuffle()
2775 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue { in ppHvxShuffleOfShuffle()
2780 ArrayRef<int> TopMask = This->getMask(); in ppHvxShuffleOfShuffle()
2783 assert(TopMask.size() == S0->getMask().size() && in ppHvxShuffleOfShuffle()
2784 TopMask.size() == S1->getMask().size()); in ppHvxShuffleOfShuffle()
2794 FoldedMask[I] = -1; in ppHvxShuffleOfShuffle()
2797 // The second half of the result will be all-undef. in ppHvxShuffleOfShuffle()
2798 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1); in ppHvxShuffleOfShuffle()
2813 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> { in ppHvxShuffleOfShuffle()
2819 !cast<ConstantSDNode>(V.getOperand(1))->isZero()); in ppHvxShuffleOfShuffle()
2823 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) in ppHvxShuffleOfShuffle()
2825 EVT ResTy = N->getValueType(0); in ppHvxShuffleOfShuffle()
2831 SDValue V0 = N->getOperand(0); in ppHvxShuffleOfShuffle()
2832 SDValue V1 = N->getOperand(1); in ppHvxShuffleOfShuffle()
2846 if (!V0B.has_value() || V0B->Src != V0A->Src) in ppHvxShuffleOfShuffle()
2849 if (!V1A.has_value() || V1A->Src != V0A->Src) in ppHvxShuffleOfShuffle()
2852 if (!V1B.has_value() || V1B->Src != V0A->Src) in ppHvxShuffleOfShuffle()
2857 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen); in ppHvxShuffleOfShuffle()
2860 {V0.getOperand(0), V0A->HalfIdx * HwLen}, in ppHvxShuffleOfShuffle()
2861 {V0.getOperand(1), V0B->HalfIdx * HwLen}, in ppHvxShuffleOfShuffle()
2862 {V1.getOperand(0), V1A->HalfIdx * HwLen}, in ppHvxShuffleOfShuffle()
2863 {V1.getOperand(1), V1B->HalfIdx * HwLen}, in ppHvxShuffleOfShuffle()
2865 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap)); in ppHvxShuffleOfShuffle()
2888 SDValue Chain = N->getOperand(0); in SelectV65GatherPred()
2889 SDValue Address = N->getOperand(2); in SelectV65GatherPred()
2890 SDValue Predicate = N->getOperand(3); in SelectV65GatherPred()
2891 SDValue Base = N->getOperand(4); in SelectV65GatherPred()
2892 SDValue Modifier = N->getOperand(5); in SelectV65GatherPred()
2893 SDValue Offset = N->getOperand(6); in SelectV65GatherPred()
2894 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); in SelectV65GatherPred()
2897 unsigned IntNo = N->getConstantOperandVal(1); in SelectV65GatherPred()
2915 SDVTList VTs = CurDAG->getVTList(MVT::Other); in SelectV65GatherPred()
2918 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); in SelectV65GatherPred()
2920 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); in SelectV65GatherPred()
2921 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); in SelectV65GatherPred()
2928 SDValue Chain = N->getOperand(0); in SelectV65Gather()
2929 SDValue Address = N->getOperand(2); in SelectV65Gather()
2930 SDValue Base = N->getOperand(3); in SelectV65Gather()
2931 SDValue Modifier = N->getOperand(4); in SelectV65Gather()
2932 SDValue Offset = N->getOperand(5); in SelectV65Gather()
2933 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32); in SelectV65Gather()
2936 unsigned IntNo = N->getConstantOperandVal(1); in SelectV65Gather()
2954 SDVTList VTs = CurDAG->getVTList(MVT::Other); in SelectV65Gather()
2956 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); in SelectV65Gather()
2958 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); in SelectV65Gather()
2959 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); in SelectV65Gather()
2965 unsigned IID = N->getConstantOperandVal(0); in SelectHVXDualOutput()
2970 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}}; in SelectHVXDualOutput()
2971 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1); in SelectHVXDualOutput()
2972 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); in SelectHVXDualOutput()
2977 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}}; in SelectHVXDualOutput()
2978 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1); in SelectHVXDualOutput()
2979 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); in SelectHVXDualOutput()
2984 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}}; in SelectHVXDualOutput()
2985 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1); in SelectHVXDualOutput()
2986 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); in SelectHVXDualOutput()
2991 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}}; in SelectHVXDualOutput()
2992 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1); in SelectHVXDualOutput()
2993 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); in SelectHVXDualOutput()
3002 CurDAG->RemoveDeadNode(N); in SelectHVXDualOutput()