15ffd83dbSDimitry Andric //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // Define several functions to decode x86 specific shuffle semantics into a 105ffd83dbSDimitry Andric // generic vector mask. 115ffd83dbSDimitry Andric // 125ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 135ffd83dbSDimitry Andric 145ffd83dbSDimitry Andric #include "X86ShuffleDecode.h" 155ffd83dbSDimitry Andric #include "llvm/ADT/APInt.h" 165ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 175ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 185ffd83dbSDimitry Andric 195ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 205ffd83dbSDimitry Andric // Vector Mask Decoding 215ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric namespace llvm { 245ffd83dbSDimitry Andric 255ffd83dbSDimitry Andric void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 265ffd83dbSDimitry Andric // Defaults the copying the dest value. 275ffd83dbSDimitry Andric ShuffleMask.push_back(0); 285ffd83dbSDimitry Andric ShuffleMask.push_back(1); 295ffd83dbSDimitry Andric ShuffleMask.push_back(2); 305ffd83dbSDimitry Andric ShuffleMask.push_back(3); 315ffd83dbSDimitry Andric 325ffd83dbSDimitry Andric // Decode the immediate. 335ffd83dbSDimitry Andric unsigned ZMask = Imm & 15; 345ffd83dbSDimitry Andric unsigned CountD = (Imm >> 4) & 3; 355ffd83dbSDimitry Andric unsigned CountS = (Imm >> 6) & 3; 365ffd83dbSDimitry Andric 375ffd83dbSDimitry Andric // CountS selects which input element to use. 385ffd83dbSDimitry Andric unsigned InVal = 4 + CountS; 395ffd83dbSDimitry Andric // CountD specifies which element of destination to update. 405ffd83dbSDimitry Andric ShuffleMask[CountD] = InVal; 415ffd83dbSDimitry Andric // ZMask zaps values, potentially overriding the CountD elt. 425ffd83dbSDimitry Andric if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 435ffd83dbSDimitry Andric if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 445ffd83dbSDimitry Andric if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 455ffd83dbSDimitry Andric if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 465ffd83dbSDimitry Andric } 475ffd83dbSDimitry Andric 485ffd83dbSDimitry Andric void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, 495ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 505ffd83dbSDimitry Andric assert((Idx + Len) <= NumElts && "Insertion out of range"); 515ffd83dbSDimitry Andric 525ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 535ffd83dbSDimitry Andric ShuffleMask.push_back(i); 545ffd83dbSDimitry Andric for (unsigned i = 0; i != Len; ++i) 555ffd83dbSDimitry Andric ShuffleMask[Idx + i] = NumElts + i; 565ffd83dbSDimitry Andric } 575ffd83dbSDimitry Andric 585ffd83dbSDimitry Andric // <3,1> or <6,7,2,3> 595ffd83dbSDimitry Andric void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 605ffd83dbSDimitry Andric for (unsigned i = NElts / 2; i != NElts; ++i) 615ffd83dbSDimitry Andric ShuffleMask.push_back(NElts + i); 625ffd83dbSDimitry Andric 635ffd83dbSDimitry Andric for (unsigned i = NElts / 2; i != NElts; ++i) 645ffd83dbSDimitry Andric ShuffleMask.push_back(i); 655ffd83dbSDimitry Andric } 665ffd83dbSDimitry Andric 675ffd83dbSDimitry Andric // <0,2> or <0,1,4,5> 685ffd83dbSDimitry Andric void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 695ffd83dbSDimitry Andric for (unsigned i = 0; i != NElts / 2; ++i) 705ffd83dbSDimitry Andric ShuffleMask.push_back(i); 715ffd83dbSDimitry Andric 725ffd83dbSDimitry Andric for (unsigned i = 0; i != NElts / 2; ++i) 735ffd83dbSDimitry Andric ShuffleMask.push_back(NElts + i); 745ffd83dbSDimitry Andric } 755ffd83dbSDimitry Andric 765ffd83dbSDimitry Andric void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 775ffd83dbSDimitry Andric for (int i = 0, e = NumElts / 2; i < e; ++i) { 785ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i); 795ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i); 805ffd83dbSDimitry Andric } 815ffd83dbSDimitry Andric } 825ffd83dbSDimitry Andric 835ffd83dbSDimitry Andric void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 845ffd83dbSDimitry Andric for (int i = 0, e = NumElts / 2; i < e; ++i) { 855ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i + 1); 865ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i + 1); 875ffd83dbSDimitry Andric } 885ffd83dbSDimitry Andric } 895ffd83dbSDimitry Andric 905ffd83dbSDimitry Andric void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 915ffd83dbSDimitry Andric const unsigned NumLaneElts = 2; 925ffd83dbSDimitry Andric 935ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 945ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) 955ffd83dbSDimitry Andric ShuffleMask.push_back(l); 965ffd83dbSDimitry Andric } 975ffd83dbSDimitry Andric 985ffd83dbSDimitry Andric void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, 995ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1005ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 1015ffd83dbSDimitry Andric 1025ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 1035ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) { 1045ffd83dbSDimitry Andric int M = SM_SentinelZero; 1055ffd83dbSDimitry Andric if (i >= Imm) M = i - Imm + l; 1065ffd83dbSDimitry Andric ShuffleMask.push_back(M); 1075ffd83dbSDimitry Andric } 1085ffd83dbSDimitry Andric } 1095ffd83dbSDimitry Andric 1105ffd83dbSDimitry Andric void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, 1115ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1125ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 1135ffd83dbSDimitry Andric 1145ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 1155ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) { 1165ffd83dbSDimitry Andric unsigned Base = i + Imm; 1175ffd83dbSDimitry Andric int M = Base + l; 1185ffd83dbSDimitry Andric if (Base >= NumLaneElts) M = SM_SentinelZero; 1195ffd83dbSDimitry Andric ShuffleMask.push_back(M); 1205ffd83dbSDimitry Andric } 1215ffd83dbSDimitry Andric } 1225ffd83dbSDimitry Andric 1235ffd83dbSDimitry Andric void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, 1245ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1255ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 1265ffd83dbSDimitry Andric 1275ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 1285ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts; ++i) { 1295ffd83dbSDimitry Andric unsigned Base = i + Imm; 1305ffd83dbSDimitry Andric // if i+imm is out of this lane then we actually need the other source 1315ffd83dbSDimitry Andric if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 1325ffd83dbSDimitry Andric ShuffleMask.push_back(Base + l); 1335ffd83dbSDimitry Andric } 1345ffd83dbSDimitry Andric } 1355ffd83dbSDimitry Andric } 1365ffd83dbSDimitry Andric 1375ffd83dbSDimitry Andric void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, 1385ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1395ffd83dbSDimitry Andric // Not all bits of the immediate are used so mask it. 1405ffd83dbSDimitry Andric assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2"); 1415ffd83dbSDimitry Andric Imm = Imm & (NumElts - 1); 1425ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 1435ffd83dbSDimitry Andric ShuffleMask.push_back(i + Imm); 1445ffd83dbSDimitry Andric } 1455ffd83dbSDimitry Andric 1465ffd83dbSDimitry Andric void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, 1475ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1485ffd83dbSDimitry Andric unsigned Size = NumElts * ScalarBits; 1495ffd83dbSDimitry Andric unsigned NumLanes = Size / 128; 1505ffd83dbSDimitry Andric if (NumLanes == 0) NumLanes = 1; // Handle MMX 1515ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 1525ffd83dbSDimitry Andric 1535ffd83dbSDimitry Andric uint32_t SplatImm = (Imm & 0xff) * 0x01010101; 1545ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 1555ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts; ++i) { 1565ffd83dbSDimitry Andric ShuffleMask.push_back(SplatImm % NumLaneElts + l); 1575ffd83dbSDimitry Andric SplatImm /= NumLaneElts; 1585ffd83dbSDimitry Andric } 1595ffd83dbSDimitry Andric } 1605ffd83dbSDimitry Andric } 1615ffd83dbSDimitry Andric 1625ffd83dbSDimitry Andric void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, 1635ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1645ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 8) { 1655ffd83dbSDimitry Andric unsigned NewImm = Imm; 1665ffd83dbSDimitry Andric for (unsigned i = 0, e = 4; i != e; ++i) { 1675ffd83dbSDimitry Andric ShuffleMask.push_back(l + i); 1685ffd83dbSDimitry Andric } 1695ffd83dbSDimitry Andric for (unsigned i = 4, e = 8; i != e; ++i) { 1705ffd83dbSDimitry Andric ShuffleMask.push_back(l + 4 + (NewImm & 3)); 1715ffd83dbSDimitry Andric NewImm >>= 2; 1725ffd83dbSDimitry Andric } 1735ffd83dbSDimitry Andric } 1745ffd83dbSDimitry Andric } 1755ffd83dbSDimitry Andric 1765ffd83dbSDimitry Andric void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, 1775ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 1785ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 8) { 1795ffd83dbSDimitry Andric unsigned NewImm = Imm; 1805ffd83dbSDimitry Andric for (unsigned i = 0, e = 4; i != e; ++i) { 1815ffd83dbSDimitry Andric ShuffleMask.push_back(l + (NewImm & 3)); 1825ffd83dbSDimitry Andric NewImm >>= 2; 1835ffd83dbSDimitry Andric } 1845ffd83dbSDimitry Andric for (unsigned i = 4, e = 8; i != e; ++i) { 1855ffd83dbSDimitry Andric ShuffleMask.push_back(l + i); 1865ffd83dbSDimitry Andric } 1875ffd83dbSDimitry Andric } 1885ffd83dbSDimitry Andric } 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 1915ffd83dbSDimitry Andric unsigned NumHalfElts = NumElts / 2; 1925ffd83dbSDimitry Andric 1935ffd83dbSDimitry Andric for (unsigned l = 0; l != NumHalfElts; ++l) 1945ffd83dbSDimitry Andric ShuffleMask.push_back(l + NumHalfElts); 1955ffd83dbSDimitry Andric for (unsigned h = 0; h != NumHalfElts; ++h) 1965ffd83dbSDimitry Andric ShuffleMask.push_back(h); 1975ffd83dbSDimitry Andric } 1985ffd83dbSDimitry Andric 1995ffd83dbSDimitry Andric void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, 2005ffd83dbSDimitry Andric unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 2015ffd83dbSDimitry Andric unsigned NumLaneElts = 128 / ScalarBits; 2025ffd83dbSDimitry Andric 2035ffd83dbSDimitry Andric unsigned NewImm = Imm; 2045ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 2055ffd83dbSDimitry Andric // each half of a lane comes from different source 2065ffd83dbSDimitry Andric for (unsigned s = 0; s != NumElts * 2; s += NumElts) { 2075ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts / 2; ++i) { 2085ffd83dbSDimitry Andric ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 2095ffd83dbSDimitry Andric NewImm /= NumLaneElts; 2105ffd83dbSDimitry Andric } 2115ffd83dbSDimitry Andric } 2125ffd83dbSDimitry Andric if (NumLaneElts == 4) NewImm = Imm; // reload imm 2135ffd83dbSDimitry Andric } 2145ffd83dbSDimitry Andric } 2155ffd83dbSDimitry Andric 2165ffd83dbSDimitry Andric void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, 2175ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2185ffd83dbSDimitry Andric // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 2195ffd83dbSDimitry Andric // independently on 128-bit lanes. 2205ffd83dbSDimitry Andric unsigned NumLanes = (NumElts * ScalarBits) / 128; 2215ffd83dbSDimitry Andric if (NumLanes == 0) NumLanes = 1; // Handle MMX 2225ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 2235ffd83dbSDimitry Andric 2245ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 2255ffd83dbSDimitry Andric for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { 2265ffd83dbSDimitry Andric ShuffleMask.push_back(i); // Reads from dest/src1 2275ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); // Reads from src/src2 2285ffd83dbSDimitry Andric } 2295ffd83dbSDimitry Andric } 2305ffd83dbSDimitry Andric } 2315ffd83dbSDimitry Andric 2325ffd83dbSDimitry Andric void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, 2335ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2345ffd83dbSDimitry Andric // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 2355ffd83dbSDimitry Andric // independently on 128-bit lanes. 2365ffd83dbSDimitry Andric unsigned NumLanes = (NumElts * ScalarBits) / 128; 2375ffd83dbSDimitry Andric if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 2385ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 2395ffd83dbSDimitry Andric 2405ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 2415ffd83dbSDimitry Andric for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { 2425ffd83dbSDimitry Andric ShuffleMask.push_back(i); // Reads from dest/src1 2435ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); // Reads from src/src2 2445ffd83dbSDimitry Andric } 2455ffd83dbSDimitry Andric } 2465ffd83dbSDimitry Andric } 2475ffd83dbSDimitry Andric 2485ffd83dbSDimitry Andric void DecodeVectorBroadcast(unsigned NumElts, 2495ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2505ffd83dbSDimitry Andric ShuffleMask.append(NumElts, 0); 2515ffd83dbSDimitry Andric } 2525ffd83dbSDimitry Andric 2535ffd83dbSDimitry Andric void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, 2545ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2555ffd83dbSDimitry Andric unsigned Scale = DstNumElts / SrcNumElts; 2565ffd83dbSDimitry Andric 2575ffd83dbSDimitry Andric for (unsigned i = 0; i != Scale; ++i) 2585ffd83dbSDimitry Andric for (unsigned j = 0; j != SrcNumElts; ++j) 2595ffd83dbSDimitry Andric ShuffleMask.push_back(j); 2605ffd83dbSDimitry Andric } 2615ffd83dbSDimitry Andric 2625ffd83dbSDimitry Andric void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, 2635ffd83dbSDimitry Andric unsigned Imm, 2645ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2655ffd83dbSDimitry Andric unsigned NumElementsInLane = 128 / ScalarSize; 2665ffd83dbSDimitry Andric unsigned NumLanes = NumElts / NumElementsInLane; 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumElementsInLane) { 2695ffd83dbSDimitry Andric unsigned Index = (Imm % NumLanes) * NumElementsInLane; 2705ffd83dbSDimitry Andric Imm /= NumLanes; // Discard the bits we just used. 2715ffd83dbSDimitry Andric // We actually need the other source. 2725ffd83dbSDimitry Andric if (l >= (NumElts / 2)) 2735ffd83dbSDimitry Andric Index += NumElts; 2745ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElementsInLane; ++i) 2755ffd83dbSDimitry Andric ShuffleMask.push_back(Index + i); 2765ffd83dbSDimitry Andric } 2775ffd83dbSDimitry Andric } 2785ffd83dbSDimitry Andric 2795ffd83dbSDimitry Andric void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, 2805ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2815ffd83dbSDimitry Andric unsigned HalfSize = NumElts / 2; 2825ffd83dbSDimitry Andric 2835ffd83dbSDimitry Andric for (unsigned l = 0; l != 2; ++l) { 2845ffd83dbSDimitry Andric unsigned HalfMask = Imm >> (l * 4); 2855ffd83dbSDimitry Andric unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; 2865ffd83dbSDimitry Andric for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) 2875ffd83dbSDimitry Andric ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i); 2885ffd83dbSDimitry Andric } 2895ffd83dbSDimitry Andric } 2905ffd83dbSDimitry Andric 2915ffd83dbSDimitry Andric void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 2925ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 2935ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i < e; ++i) { 2945ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 2955ffd83dbSDimitry Andric if (UndefElts[i]) { 2965ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 2975ffd83dbSDimitry Andric continue; 2985ffd83dbSDimitry Andric } 2995ffd83dbSDimitry Andric // For 256/512-bit vectors the base of the shuffle is the 128-bit 3005ffd83dbSDimitry Andric // subvector we're inside. 3015ffd83dbSDimitry Andric int Base = (i / 16) * 16; 3025ffd83dbSDimitry Andric // If the high bit (7) of the byte is set, the element is zeroed. 3035ffd83dbSDimitry Andric if (M & (1 << 7)) 3045ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 3055ffd83dbSDimitry Andric else { 3065ffd83dbSDimitry Andric // Only the least significant 4 bits of the byte are used. 3075ffd83dbSDimitry Andric int Index = Base + (M & 0xf); 3085ffd83dbSDimitry Andric ShuffleMask.push_back(Index); 3095ffd83dbSDimitry Andric } 3105ffd83dbSDimitry Andric } 3115ffd83dbSDimitry Andric } 3125ffd83dbSDimitry Andric 3135ffd83dbSDimitry Andric void DecodeBLENDMask(unsigned NumElts, unsigned Imm, 3145ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3155ffd83dbSDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 3165ffd83dbSDimitry Andric // If there are more than 8 elements in the vector, then any immediate blend 3175ffd83dbSDimitry Andric // mask wraps around. 3185ffd83dbSDimitry Andric unsigned Bit = i % 8; 3195ffd83dbSDimitry Andric ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i); 3205ffd83dbSDimitry Andric } 3215ffd83dbSDimitry Andric } 3225ffd83dbSDimitry Andric 3235ffd83dbSDimitry Andric void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 3245ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3255ffd83dbSDimitry Andric assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); 3265ffd83dbSDimitry Andric 3275ffd83dbSDimitry Andric // VPPERM Operation 3285ffd83dbSDimitry Andric // Bits[4:0] - Byte Index (0 - 31) 3295ffd83dbSDimitry Andric // Bits[7:5] - Permute Operation 3305ffd83dbSDimitry Andric // 3315ffd83dbSDimitry Andric // Permute Operation: 3325ffd83dbSDimitry Andric // 0 - Source byte (no logical operation). 3335ffd83dbSDimitry Andric // 1 - Invert source byte. 3345ffd83dbSDimitry Andric // 2 - Bit reverse of source byte. 3355ffd83dbSDimitry Andric // 3 - Bit reverse of inverted source byte. 3365ffd83dbSDimitry Andric // 4 - 00h (zero - fill). 3375ffd83dbSDimitry Andric // 5 - FFh (ones - fill). 3385ffd83dbSDimitry Andric // 6 - Most significant bit of source byte replicated in all bit positions. 3395ffd83dbSDimitry Andric // 7 - Invert most significant bit of source byte and replicate in all bit positions. 3405ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i < e; ++i) { 3415ffd83dbSDimitry Andric if (UndefElts[i]) { 3425ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 3435ffd83dbSDimitry Andric continue; 3445ffd83dbSDimitry Andric } 3455ffd83dbSDimitry Andric 3465ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 3475ffd83dbSDimitry Andric uint64_t PermuteOp = (M >> 5) & 0x7; 3485ffd83dbSDimitry Andric if (PermuteOp == 4) { 3495ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 3505ffd83dbSDimitry Andric continue; 3515ffd83dbSDimitry Andric } 3525ffd83dbSDimitry Andric if (PermuteOp != 0) { 3535ffd83dbSDimitry Andric ShuffleMask.clear(); 3545ffd83dbSDimitry Andric return; 3555ffd83dbSDimitry Andric } 3565ffd83dbSDimitry Andric 3575ffd83dbSDimitry Andric uint64_t Index = M & 0x1F; 3585ffd83dbSDimitry Andric ShuffleMask.push_back((int)Index); 3595ffd83dbSDimitry Andric } 3605ffd83dbSDimitry Andric } 3615ffd83dbSDimitry Andric 3625ffd83dbSDimitry Andric void DecodeVPERMMask(unsigned NumElts, unsigned Imm, 3635ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3645ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 4) 3655ffd83dbSDimitry Andric for (unsigned i = 0; i != 4; ++i) 3665ffd83dbSDimitry Andric ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); 3675ffd83dbSDimitry Andric } 3685ffd83dbSDimitry Andric 3695ffd83dbSDimitry Andric void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, 3705ffd83dbSDimitry Andric unsigned NumDstElts, bool IsAnyExtend, 3715ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3725ffd83dbSDimitry Andric unsigned Scale = DstScalarBits / SrcScalarBits; 3735ffd83dbSDimitry Andric assert(SrcScalarBits < DstScalarBits && 3745ffd83dbSDimitry Andric "Expected zero extension mask to increase scalar size"); 3755ffd83dbSDimitry Andric 3765ffd83dbSDimitry Andric int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero; 3775ffd83dbSDimitry Andric for (unsigned i = 0; i != NumDstElts; i++) { 3785ffd83dbSDimitry Andric ShuffleMask.push_back(i); 3795ffd83dbSDimitry Andric ShuffleMask.append(Scale - 1, Sentinel); 3805ffd83dbSDimitry Andric } 3815ffd83dbSDimitry Andric } 3825ffd83dbSDimitry Andric 3835ffd83dbSDimitry Andric void DecodeZeroMoveLowMask(unsigned NumElts, 3845ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3855ffd83dbSDimitry Andric ShuffleMask.push_back(0); 3865ffd83dbSDimitry Andric ShuffleMask.append(NumElts - 1, SM_SentinelZero); 3875ffd83dbSDimitry Andric } 3885ffd83dbSDimitry Andric 3895ffd83dbSDimitry Andric void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, 3905ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 3915ffd83dbSDimitry Andric // First element comes from the first element of second source. 3925ffd83dbSDimitry Andric // Remaining elements: Load zero extends / Move copies from first source. 3935ffd83dbSDimitry Andric ShuffleMask.push_back(NumElts); 3945ffd83dbSDimitry Andric for (unsigned i = 1; i < NumElts; i++) 3955ffd83dbSDimitry Andric ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); 3965ffd83dbSDimitry Andric } 3975ffd83dbSDimitry Andric 3985ffd83dbSDimitry Andric void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 3995ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 4005ffd83dbSDimitry Andric unsigned HalfElts = NumElts / 2; 4015ffd83dbSDimitry Andric 4025ffd83dbSDimitry Andric // Only the bottom 6 bits are valid for each immediate. 4035ffd83dbSDimitry Andric Len &= 0x3F; 4045ffd83dbSDimitry Andric Idx &= 0x3F; 4055ffd83dbSDimitry Andric 4065ffd83dbSDimitry Andric // We can only decode this bit extraction instruction as a shuffle if both the 4075ffd83dbSDimitry Andric // length and index work with whole elements. 4085ffd83dbSDimitry Andric if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 4095ffd83dbSDimitry Andric return; 4105ffd83dbSDimitry Andric 4115ffd83dbSDimitry Andric // A length of zero is equivalent to a bit length of 64. 4125ffd83dbSDimitry Andric if (Len == 0) 4135ffd83dbSDimitry Andric Len = 64; 4145ffd83dbSDimitry Andric 4155ffd83dbSDimitry Andric // If the length + index exceeds the bottom 64 bits the result is undefined. 4165ffd83dbSDimitry Andric if ((Len + Idx) > 64) { 4175ffd83dbSDimitry Andric ShuffleMask.append(NumElts, SM_SentinelUndef); 4185ffd83dbSDimitry Andric return; 4195ffd83dbSDimitry Andric } 4205ffd83dbSDimitry Andric 4215ffd83dbSDimitry Andric // Convert index and index to work with elements. 4225ffd83dbSDimitry Andric Len /= EltSize; 4235ffd83dbSDimitry Andric Idx /= EltSize; 4245ffd83dbSDimitry Andric 4255ffd83dbSDimitry Andric // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining 4265ffd83dbSDimitry Andric // elements of the lower 64-bits. The upper 64-bits are undefined. 4275ffd83dbSDimitry Andric for (int i = 0; i != Len; ++i) 4285ffd83dbSDimitry Andric ShuffleMask.push_back(i + Idx); 4295ffd83dbSDimitry Andric for (int i = Len; i != (int)HalfElts; ++i) 4305ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 4315ffd83dbSDimitry Andric for (int i = HalfElts; i != (int)NumElts; ++i) 4325ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 4335ffd83dbSDimitry Andric } 4345ffd83dbSDimitry Andric 4355ffd83dbSDimitry Andric void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 4365ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 4375ffd83dbSDimitry Andric unsigned HalfElts = NumElts / 2; 4385ffd83dbSDimitry Andric 4395ffd83dbSDimitry Andric // Only the bottom 6 bits are valid for each immediate. 4405ffd83dbSDimitry Andric Len &= 0x3F; 4415ffd83dbSDimitry Andric Idx &= 0x3F; 4425ffd83dbSDimitry Andric 4435ffd83dbSDimitry Andric // We can only decode this bit insertion instruction as a shuffle if both the 4445ffd83dbSDimitry Andric // length and index work with whole elements. 4455ffd83dbSDimitry Andric if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 4465ffd83dbSDimitry Andric return; 4475ffd83dbSDimitry Andric 4485ffd83dbSDimitry Andric // A length of zero is equivalent to a bit length of 64. 4495ffd83dbSDimitry Andric if (Len == 0) 4505ffd83dbSDimitry Andric Len = 64; 4515ffd83dbSDimitry Andric 4525ffd83dbSDimitry Andric // If the length + index exceeds the bottom 64 bits the result is undefined. 4535ffd83dbSDimitry Andric if ((Len + Idx) > 64) { 4545ffd83dbSDimitry Andric ShuffleMask.append(NumElts, SM_SentinelUndef); 4555ffd83dbSDimitry Andric return; 4565ffd83dbSDimitry Andric } 4575ffd83dbSDimitry Andric 4585ffd83dbSDimitry Andric // Convert index and index to work with elements. 4595ffd83dbSDimitry Andric Len /= EltSize; 4605ffd83dbSDimitry Andric Idx /= EltSize; 4615ffd83dbSDimitry Andric 4625ffd83dbSDimitry Andric // INSERTQ: Extract lowest Len elements from lower half of second source and 4635ffd83dbSDimitry Andric // insert over first source starting at Idx element. The upper 64-bits are 4645ffd83dbSDimitry Andric // undefined. 4655ffd83dbSDimitry Andric for (int i = 0; i != Idx; ++i) 4665ffd83dbSDimitry Andric ShuffleMask.push_back(i); 4675ffd83dbSDimitry Andric for (int i = 0; i != Len; ++i) 4685ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); 4695ffd83dbSDimitry Andric for (int i = Idx + Len; i != (int)HalfElts; ++i) 4705ffd83dbSDimitry Andric ShuffleMask.push_back(i); 4715ffd83dbSDimitry Andric for (int i = HalfElts; i != (int)NumElts; ++i) 4725ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 4735ffd83dbSDimitry Andric } 4745ffd83dbSDimitry Andric 4755ffd83dbSDimitry Andric void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, 4765ffd83dbSDimitry Andric ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 4775ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 4785ffd83dbSDimitry Andric unsigned VecSize = NumElts * ScalarBits; 4795ffd83dbSDimitry Andric unsigned NumLanes = VecSize / 128; 4805ffd83dbSDimitry Andric unsigned NumEltsPerLane = NumElts / NumLanes; 4815ffd83dbSDimitry Andric assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && 4825ffd83dbSDimitry Andric "Unexpected vector size"); 4835ffd83dbSDimitry Andric assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 4845ffd83dbSDimitry Andric 4855ffd83dbSDimitry Andric for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 4865ffd83dbSDimitry Andric if (UndefElts[i]) { 4875ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 4885ffd83dbSDimitry Andric continue; 4895ffd83dbSDimitry Andric } 4905ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 4915ffd83dbSDimitry Andric M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); 4925ffd83dbSDimitry Andric unsigned LaneOffset = i & ~(NumEltsPerLane - 1); 4935ffd83dbSDimitry Andric ShuffleMask.push_back((int)(LaneOffset + M)); 4945ffd83dbSDimitry Andric } 4955ffd83dbSDimitry Andric } 4965ffd83dbSDimitry Andric 4975ffd83dbSDimitry Andric void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, 4985ffd83dbSDimitry Andric ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 4995ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 5005ffd83dbSDimitry Andric unsigned VecSize = NumElts * ScalarBits; 5015ffd83dbSDimitry Andric unsigned NumLanes = VecSize / 128; 5025ffd83dbSDimitry Andric unsigned NumEltsPerLane = NumElts / NumLanes; 5035ffd83dbSDimitry Andric assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); 5045ffd83dbSDimitry Andric assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 5055ffd83dbSDimitry Andric assert((NumElts == RawMask.size()) && "Unexpected mask size"); 5065ffd83dbSDimitry Andric 5075ffd83dbSDimitry Andric for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 5085ffd83dbSDimitry Andric if (UndefElts[i]) { 5095ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 5105ffd83dbSDimitry Andric continue; 5115ffd83dbSDimitry Andric } 5125ffd83dbSDimitry Andric 5135ffd83dbSDimitry Andric // VPERMIL2 Operation. 5145ffd83dbSDimitry Andric // Bits[3] - Match Bit. 5155ffd83dbSDimitry Andric // Bits[2:1] - (Per Lane) PD Shuffle Mask. 5165ffd83dbSDimitry Andric // Bits[2:0] - (Per Lane) PS Shuffle Mask. 5175ffd83dbSDimitry Andric uint64_t Selector = RawMask[i]; 5185ffd83dbSDimitry Andric unsigned MatchBit = (Selector >> 3) & 0x1; 5195ffd83dbSDimitry Andric 5205ffd83dbSDimitry Andric // M2Z[0:1] MatchBit 5215ffd83dbSDimitry Andric // 0Xb X Source selected by Selector index. 5225ffd83dbSDimitry Andric // 10b 0 Source selected by Selector index. 5235ffd83dbSDimitry Andric // 10b 1 Zero. 5245ffd83dbSDimitry Andric // 11b 0 Zero. 5255ffd83dbSDimitry Andric // 11b 1 Source selected by Selector index. 5265ffd83dbSDimitry Andric if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { 5275ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 5285ffd83dbSDimitry Andric continue; 5295ffd83dbSDimitry Andric } 5305ffd83dbSDimitry Andric 5315ffd83dbSDimitry Andric int Index = i & ~(NumEltsPerLane - 1); 5325ffd83dbSDimitry Andric if (ScalarBits == 64) 5335ffd83dbSDimitry Andric Index += (Selector >> 1) & 0x1; 5345ffd83dbSDimitry Andric else 5355ffd83dbSDimitry Andric Index += Selector & 0x3; 5365ffd83dbSDimitry Andric 5375ffd83dbSDimitry Andric int Src = (Selector >> 2) & 0x1; 5385ffd83dbSDimitry Andric Index += Src * NumElts; 5395ffd83dbSDimitry Andric ShuffleMask.push_back(Index); 5405ffd83dbSDimitry Andric } 5415ffd83dbSDimitry Andric } 5425ffd83dbSDimitry Andric 5435ffd83dbSDimitry Andric void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 5445ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 5455ffd83dbSDimitry Andric uint64_t EltMaskSize = RawMask.size() - 1; 5465ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i != e; ++i) { 5475ffd83dbSDimitry Andric if (UndefElts[i]) { 5485ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 5495ffd83dbSDimitry Andric continue; 5505ffd83dbSDimitry Andric } 5515ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 5525ffd83dbSDimitry Andric M &= EltMaskSize; 5535ffd83dbSDimitry Andric ShuffleMask.push_back((int)M); 5545ffd83dbSDimitry Andric } 5555ffd83dbSDimitry Andric } 5565ffd83dbSDimitry Andric 5575ffd83dbSDimitry Andric void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 5585ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 5595ffd83dbSDimitry Andric uint64_t EltMaskSize = (RawMask.size() * 2) - 1; 5605ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i != e; ++i) { 5615ffd83dbSDimitry Andric if (UndefElts[i]) { 5625ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 5635ffd83dbSDimitry Andric continue; 5645ffd83dbSDimitry Andric } 5655ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 5665ffd83dbSDimitry Andric M &= EltMaskSize; 5675ffd83dbSDimitry Andric ShuffleMask.push_back((int)M); 5685ffd83dbSDimitry Andric } 5695ffd83dbSDimitry Andric } 5705ffd83dbSDimitry Andric 571*e8d8bef9SDimitry Andric } // namespace llvm 572