1*5ffd83dbSDimitry Andric //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric // Define several functions to decode x86 specific shuffle semantics into a 10*5ffd83dbSDimitry Andric // generic vector mask. 11*5ffd83dbSDimitry Andric // 12*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 13*5ffd83dbSDimitry Andric 14*5ffd83dbSDimitry Andric #include "X86ShuffleDecode.h" 15*5ffd83dbSDimitry Andric #include "llvm/ADT/APInt.h" 16*5ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 17*5ffd83dbSDimitry Andric #include "llvm/ADT/SmallVector.h" 18*5ffd83dbSDimitry Andric 19*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 20*5ffd83dbSDimitry Andric // Vector Mask Decoding 21*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 22*5ffd83dbSDimitry Andric 23*5ffd83dbSDimitry Andric namespace llvm { 24*5ffd83dbSDimitry Andric 25*5ffd83dbSDimitry Andric void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 26*5ffd83dbSDimitry Andric // Defaults the copying the dest value. 27*5ffd83dbSDimitry Andric ShuffleMask.push_back(0); 28*5ffd83dbSDimitry Andric ShuffleMask.push_back(1); 29*5ffd83dbSDimitry Andric ShuffleMask.push_back(2); 30*5ffd83dbSDimitry Andric ShuffleMask.push_back(3); 31*5ffd83dbSDimitry Andric 32*5ffd83dbSDimitry Andric // Decode the immediate. 33*5ffd83dbSDimitry Andric unsigned ZMask = Imm & 15; 34*5ffd83dbSDimitry Andric unsigned CountD = (Imm >> 4) & 3; 35*5ffd83dbSDimitry Andric unsigned CountS = (Imm >> 6) & 3; 36*5ffd83dbSDimitry Andric 37*5ffd83dbSDimitry Andric // CountS selects which input element to use. 38*5ffd83dbSDimitry Andric unsigned InVal = 4 + CountS; 39*5ffd83dbSDimitry Andric // CountD specifies which element of destination to update. 40*5ffd83dbSDimitry Andric ShuffleMask[CountD] = InVal; 41*5ffd83dbSDimitry Andric // ZMask zaps values, potentially overriding the CountD elt. 42*5ffd83dbSDimitry Andric if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 43*5ffd83dbSDimitry Andric if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 44*5ffd83dbSDimitry Andric if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 45*5ffd83dbSDimitry Andric if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 46*5ffd83dbSDimitry Andric } 47*5ffd83dbSDimitry Andric 48*5ffd83dbSDimitry Andric void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, 49*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 50*5ffd83dbSDimitry Andric assert((Idx + Len) <= NumElts && "Insertion out of range"); 51*5ffd83dbSDimitry Andric 52*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 53*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 54*5ffd83dbSDimitry Andric for (unsigned i = 0; i != Len; ++i) 55*5ffd83dbSDimitry Andric ShuffleMask[Idx + i] = NumElts + i; 56*5ffd83dbSDimitry Andric } 57*5ffd83dbSDimitry Andric 58*5ffd83dbSDimitry Andric // <3,1> or <6,7,2,3> 59*5ffd83dbSDimitry Andric void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 60*5ffd83dbSDimitry Andric for (unsigned i = NElts / 2; i != NElts; ++i) 61*5ffd83dbSDimitry Andric ShuffleMask.push_back(NElts + i); 62*5ffd83dbSDimitry Andric 63*5ffd83dbSDimitry Andric for (unsigned i = NElts / 2; i != NElts; ++i) 64*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 65*5ffd83dbSDimitry Andric } 66*5ffd83dbSDimitry Andric 67*5ffd83dbSDimitry Andric // <0,2> or <0,1,4,5> 68*5ffd83dbSDimitry Andric void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 69*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NElts / 2; ++i) 70*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 71*5ffd83dbSDimitry Andric 72*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NElts / 2; ++i) 73*5ffd83dbSDimitry Andric ShuffleMask.push_back(NElts + i); 74*5ffd83dbSDimitry Andric } 75*5ffd83dbSDimitry Andric 76*5ffd83dbSDimitry Andric void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 77*5ffd83dbSDimitry Andric for (int i = 0, e = NumElts / 2; i < e; ++i) { 78*5ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i); 79*5ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i); 80*5ffd83dbSDimitry Andric } 81*5ffd83dbSDimitry Andric } 82*5ffd83dbSDimitry Andric 83*5ffd83dbSDimitry Andric void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 84*5ffd83dbSDimitry Andric for (int i = 0, e = NumElts / 2; i < e; ++i) { 85*5ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i + 1); 86*5ffd83dbSDimitry Andric ShuffleMask.push_back(2 * i + 1); 87*5ffd83dbSDimitry Andric } 88*5ffd83dbSDimitry Andric } 89*5ffd83dbSDimitry Andric 90*5ffd83dbSDimitry Andric void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 91*5ffd83dbSDimitry Andric const unsigned NumLaneElts = 2; 92*5ffd83dbSDimitry Andric 93*5ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 94*5ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) 95*5ffd83dbSDimitry Andric ShuffleMask.push_back(l); 96*5ffd83dbSDimitry Andric } 97*5ffd83dbSDimitry Andric 98*5ffd83dbSDimitry Andric void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, 99*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 100*5ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 101*5ffd83dbSDimitry Andric 102*5ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 103*5ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) { 104*5ffd83dbSDimitry Andric int M = SM_SentinelZero; 105*5ffd83dbSDimitry Andric if (i >= Imm) M = i - Imm + l; 106*5ffd83dbSDimitry Andric ShuffleMask.push_back(M); 107*5ffd83dbSDimitry Andric } 108*5ffd83dbSDimitry Andric } 109*5ffd83dbSDimitry Andric 110*5ffd83dbSDimitry Andric void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, 111*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 112*5ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 113*5ffd83dbSDimitry Andric 114*5ffd83dbSDimitry Andric for (unsigned l = 0; l < NumElts; l += NumLaneElts) 115*5ffd83dbSDimitry Andric for (unsigned i = 0; i < NumLaneElts; ++i) { 116*5ffd83dbSDimitry Andric unsigned Base = i + Imm; 117*5ffd83dbSDimitry Andric int M = Base + l; 118*5ffd83dbSDimitry Andric if (Base >= NumLaneElts) M = SM_SentinelZero; 119*5ffd83dbSDimitry Andric ShuffleMask.push_back(M); 120*5ffd83dbSDimitry Andric } 121*5ffd83dbSDimitry Andric } 122*5ffd83dbSDimitry Andric 123*5ffd83dbSDimitry Andric void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, 124*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 125*5ffd83dbSDimitry Andric const unsigned NumLaneElts = 16; 126*5ffd83dbSDimitry Andric 127*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 128*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts; ++i) { 129*5ffd83dbSDimitry Andric unsigned Base = i + Imm; 130*5ffd83dbSDimitry Andric // if i+imm is out of this lane then we actually need the other source 131*5ffd83dbSDimitry Andric if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 132*5ffd83dbSDimitry Andric ShuffleMask.push_back(Base + l); 133*5ffd83dbSDimitry Andric } 134*5ffd83dbSDimitry Andric } 135*5ffd83dbSDimitry Andric } 136*5ffd83dbSDimitry Andric 137*5ffd83dbSDimitry Andric void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, 138*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 139*5ffd83dbSDimitry Andric // Not all bits of the immediate are used so mask it. 140*5ffd83dbSDimitry Andric assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2"); 141*5ffd83dbSDimitry Andric Imm = Imm & (NumElts - 1); 142*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElts; ++i) 143*5ffd83dbSDimitry Andric ShuffleMask.push_back(i + Imm); 144*5ffd83dbSDimitry Andric } 145*5ffd83dbSDimitry Andric 146*5ffd83dbSDimitry Andric void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, 147*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 148*5ffd83dbSDimitry Andric unsigned Size = NumElts * ScalarBits; 149*5ffd83dbSDimitry Andric unsigned NumLanes = Size / 128; 150*5ffd83dbSDimitry Andric if (NumLanes == 0) NumLanes = 1; // Handle MMX 151*5ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 152*5ffd83dbSDimitry Andric 153*5ffd83dbSDimitry Andric uint32_t SplatImm = (Imm & 0xff) * 0x01010101; 154*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 155*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts; ++i) { 156*5ffd83dbSDimitry Andric ShuffleMask.push_back(SplatImm % NumLaneElts + l); 157*5ffd83dbSDimitry Andric SplatImm /= NumLaneElts; 158*5ffd83dbSDimitry Andric } 159*5ffd83dbSDimitry Andric } 160*5ffd83dbSDimitry Andric } 161*5ffd83dbSDimitry Andric 162*5ffd83dbSDimitry Andric void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, 163*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 164*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 8) { 165*5ffd83dbSDimitry Andric unsigned NewImm = Imm; 166*5ffd83dbSDimitry Andric for (unsigned i = 0, e = 4; i != e; ++i) { 167*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + i); 168*5ffd83dbSDimitry Andric } 169*5ffd83dbSDimitry Andric for (unsigned i = 4, e = 8; i != e; ++i) { 170*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + 4 + (NewImm & 3)); 171*5ffd83dbSDimitry Andric NewImm >>= 2; 172*5ffd83dbSDimitry Andric } 173*5ffd83dbSDimitry Andric } 174*5ffd83dbSDimitry Andric } 175*5ffd83dbSDimitry Andric 176*5ffd83dbSDimitry Andric void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, 177*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 178*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 8) { 179*5ffd83dbSDimitry Andric unsigned NewImm = Imm; 180*5ffd83dbSDimitry Andric for (unsigned i = 0, e = 4; i != e; ++i) { 181*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + (NewImm & 3)); 182*5ffd83dbSDimitry Andric NewImm >>= 2; 183*5ffd83dbSDimitry Andric } 184*5ffd83dbSDimitry Andric for (unsigned i = 4, e = 8; i != e; ++i) { 185*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + i); 186*5ffd83dbSDimitry Andric } 187*5ffd83dbSDimitry Andric } 188*5ffd83dbSDimitry Andric } 189*5ffd83dbSDimitry Andric 190*5ffd83dbSDimitry Andric void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) { 191*5ffd83dbSDimitry Andric unsigned NumHalfElts = NumElts / 2; 192*5ffd83dbSDimitry Andric 193*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumHalfElts; ++l) 194*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + NumHalfElts); 195*5ffd83dbSDimitry Andric for (unsigned h = 0; h != NumHalfElts; ++h) 196*5ffd83dbSDimitry Andric ShuffleMask.push_back(h); 197*5ffd83dbSDimitry Andric } 198*5ffd83dbSDimitry Andric 199*5ffd83dbSDimitry Andric void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, 200*5ffd83dbSDimitry Andric unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 201*5ffd83dbSDimitry Andric unsigned NumLaneElts = 128 / ScalarBits; 202*5ffd83dbSDimitry Andric 203*5ffd83dbSDimitry Andric unsigned NewImm = Imm; 204*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 205*5ffd83dbSDimitry Andric // each half of a lane comes from different source 206*5ffd83dbSDimitry Andric for (unsigned s = 0; s != NumElts * 2; s += NumElts) { 207*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumLaneElts / 2; ++i) { 208*5ffd83dbSDimitry Andric ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 209*5ffd83dbSDimitry Andric NewImm /= NumLaneElts; 210*5ffd83dbSDimitry Andric } 211*5ffd83dbSDimitry Andric } 212*5ffd83dbSDimitry Andric if (NumLaneElts == 4) NewImm = Imm; // reload imm 213*5ffd83dbSDimitry Andric } 214*5ffd83dbSDimitry Andric } 215*5ffd83dbSDimitry Andric 216*5ffd83dbSDimitry Andric void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, 217*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 218*5ffd83dbSDimitry Andric // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 219*5ffd83dbSDimitry Andric // independently on 128-bit lanes. 220*5ffd83dbSDimitry Andric unsigned NumLanes = (NumElts * ScalarBits) / 128; 221*5ffd83dbSDimitry Andric if (NumLanes == 0) NumLanes = 1; // Handle MMX 222*5ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 223*5ffd83dbSDimitry Andric 224*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 225*5ffd83dbSDimitry Andric for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { 226*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); // Reads from dest/src1 227*5ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); // Reads from src/src2 228*5ffd83dbSDimitry Andric } 229*5ffd83dbSDimitry Andric } 230*5ffd83dbSDimitry Andric } 231*5ffd83dbSDimitry Andric 232*5ffd83dbSDimitry Andric void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, 233*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 234*5ffd83dbSDimitry Andric // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 235*5ffd83dbSDimitry Andric // independently on 128-bit lanes. 236*5ffd83dbSDimitry Andric unsigned NumLanes = (NumElts * ScalarBits) / 128; 237*5ffd83dbSDimitry Andric if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 238*5ffd83dbSDimitry Andric unsigned NumLaneElts = NumElts / NumLanes; 239*5ffd83dbSDimitry Andric 240*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 241*5ffd83dbSDimitry Andric for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { 242*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); // Reads from dest/src1 243*5ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); // Reads from src/src2 244*5ffd83dbSDimitry Andric } 245*5ffd83dbSDimitry Andric } 246*5ffd83dbSDimitry Andric } 247*5ffd83dbSDimitry Andric 248*5ffd83dbSDimitry Andric void DecodeVectorBroadcast(unsigned NumElts, 249*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 250*5ffd83dbSDimitry Andric ShuffleMask.append(NumElts, 0); 251*5ffd83dbSDimitry Andric } 252*5ffd83dbSDimitry Andric 253*5ffd83dbSDimitry Andric void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, 254*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 255*5ffd83dbSDimitry Andric unsigned Scale = DstNumElts / SrcNumElts; 256*5ffd83dbSDimitry Andric 257*5ffd83dbSDimitry Andric for (unsigned i = 0; i != Scale; ++i) 258*5ffd83dbSDimitry Andric for (unsigned j = 0; j != SrcNumElts; ++j) 259*5ffd83dbSDimitry Andric ShuffleMask.push_back(j); 260*5ffd83dbSDimitry Andric } 261*5ffd83dbSDimitry Andric 262*5ffd83dbSDimitry Andric void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, 263*5ffd83dbSDimitry Andric unsigned Imm, 264*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 265*5ffd83dbSDimitry Andric unsigned NumElementsInLane = 128 / ScalarSize; 266*5ffd83dbSDimitry Andric unsigned NumLanes = NumElts / NumElementsInLane; 267*5ffd83dbSDimitry Andric 268*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += NumElementsInLane) { 269*5ffd83dbSDimitry Andric unsigned Index = (Imm % NumLanes) * NumElementsInLane; 270*5ffd83dbSDimitry Andric Imm /= NumLanes; // Discard the bits we just used. 271*5ffd83dbSDimitry Andric // We actually need the other source. 272*5ffd83dbSDimitry Andric if (l >= (NumElts / 2)) 273*5ffd83dbSDimitry Andric Index += NumElts; 274*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumElementsInLane; ++i) 275*5ffd83dbSDimitry Andric ShuffleMask.push_back(Index + i); 276*5ffd83dbSDimitry Andric } 277*5ffd83dbSDimitry Andric } 278*5ffd83dbSDimitry Andric 279*5ffd83dbSDimitry Andric void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, 280*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 281*5ffd83dbSDimitry Andric unsigned HalfSize = NumElts / 2; 282*5ffd83dbSDimitry Andric 283*5ffd83dbSDimitry Andric for (unsigned l = 0; l != 2; ++l) { 284*5ffd83dbSDimitry Andric unsigned HalfMask = Imm >> (l * 4); 285*5ffd83dbSDimitry Andric unsigned HalfBegin = (HalfMask & 0x3) * HalfSize; 286*5ffd83dbSDimitry Andric for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) 287*5ffd83dbSDimitry Andric ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i); 288*5ffd83dbSDimitry Andric } 289*5ffd83dbSDimitry Andric } 290*5ffd83dbSDimitry Andric 291*5ffd83dbSDimitry Andric void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 292*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 293*5ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i < e; ++i) { 294*5ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 295*5ffd83dbSDimitry Andric if (UndefElts[i]) { 296*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 297*5ffd83dbSDimitry Andric continue; 298*5ffd83dbSDimitry Andric } 299*5ffd83dbSDimitry Andric // For 256/512-bit vectors the base of the shuffle is the 128-bit 300*5ffd83dbSDimitry Andric // subvector we're inside. 301*5ffd83dbSDimitry Andric int Base = (i / 16) * 16; 302*5ffd83dbSDimitry Andric // If the high bit (7) of the byte is set, the element is zeroed. 303*5ffd83dbSDimitry Andric if (M & (1 << 7)) 304*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 305*5ffd83dbSDimitry Andric else { 306*5ffd83dbSDimitry Andric // Only the least significant 4 bits of the byte are used. 307*5ffd83dbSDimitry Andric int Index = Base + (M & 0xf); 308*5ffd83dbSDimitry Andric ShuffleMask.push_back(Index); 309*5ffd83dbSDimitry Andric } 310*5ffd83dbSDimitry Andric } 311*5ffd83dbSDimitry Andric } 312*5ffd83dbSDimitry Andric 313*5ffd83dbSDimitry Andric void DecodeBLENDMask(unsigned NumElts, unsigned Imm, 314*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 315*5ffd83dbSDimitry Andric for (unsigned i = 0; i < NumElts; ++i) { 316*5ffd83dbSDimitry Andric // If there are more than 8 elements in the vector, then any immediate blend 317*5ffd83dbSDimitry Andric // mask wraps around. 318*5ffd83dbSDimitry Andric unsigned Bit = i % 8; 319*5ffd83dbSDimitry Andric ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i); 320*5ffd83dbSDimitry Andric } 321*5ffd83dbSDimitry Andric } 322*5ffd83dbSDimitry Andric 323*5ffd83dbSDimitry Andric void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 324*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 325*5ffd83dbSDimitry Andric assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); 326*5ffd83dbSDimitry Andric 327*5ffd83dbSDimitry Andric // VPPERM Operation 328*5ffd83dbSDimitry Andric // Bits[4:0] - Byte Index (0 - 31) 329*5ffd83dbSDimitry Andric // Bits[7:5] - Permute Operation 330*5ffd83dbSDimitry Andric // 331*5ffd83dbSDimitry Andric // Permute Operation: 332*5ffd83dbSDimitry Andric // 0 - Source byte (no logical operation). 333*5ffd83dbSDimitry Andric // 1 - Invert source byte. 334*5ffd83dbSDimitry Andric // 2 - Bit reverse of source byte. 335*5ffd83dbSDimitry Andric // 3 - Bit reverse of inverted source byte. 336*5ffd83dbSDimitry Andric // 4 - 00h (zero - fill). 337*5ffd83dbSDimitry Andric // 5 - FFh (ones - fill). 338*5ffd83dbSDimitry Andric // 6 - Most significant bit of source byte replicated in all bit positions. 339*5ffd83dbSDimitry Andric // 7 - Invert most significant bit of source byte and replicate in all bit positions. 340*5ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i < e; ++i) { 341*5ffd83dbSDimitry Andric if (UndefElts[i]) { 342*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 343*5ffd83dbSDimitry Andric continue; 344*5ffd83dbSDimitry Andric } 345*5ffd83dbSDimitry Andric 346*5ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 347*5ffd83dbSDimitry Andric uint64_t PermuteOp = (M >> 5) & 0x7; 348*5ffd83dbSDimitry Andric if (PermuteOp == 4) { 349*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 350*5ffd83dbSDimitry Andric continue; 351*5ffd83dbSDimitry Andric } 352*5ffd83dbSDimitry Andric if (PermuteOp != 0) { 353*5ffd83dbSDimitry Andric ShuffleMask.clear(); 354*5ffd83dbSDimitry Andric return; 355*5ffd83dbSDimitry Andric } 356*5ffd83dbSDimitry Andric 357*5ffd83dbSDimitry Andric uint64_t Index = M & 0x1F; 358*5ffd83dbSDimitry Andric ShuffleMask.push_back((int)Index); 359*5ffd83dbSDimitry Andric } 360*5ffd83dbSDimitry Andric } 361*5ffd83dbSDimitry Andric 362*5ffd83dbSDimitry Andric void DecodeVPERMMask(unsigned NumElts, unsigned Imm, 363*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 364*5ffd83dbSDimitry Andric for (unsigned l = 0; l != NumElts; l += 4) 365*5ffd83dbSDimitry Andric for (unsigned i = 0; i != 4; ++i) 366*5ffd83dbSDimitry Andric ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); 367*5ffd83dbSDimitry Andric } 368*5ffd83dbSDimitry Andric 369*5ffd83dbSDimitry Andric void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, 370*5ffd83dbSDimitry Andric unsigned NumDstElts, bool IsAnyExtend, 371*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 372*5ffd83dbSDimitry Andric unsigned Scale = DstScalarBits / SrcScalarBits; 373*5ffd83dbSDimitry Andric assert(SrcScalarBits < DstScalarBits && 374*5ffd83dbSDimitry Andric "Expected zero extension mask to increase scalar size"); 375*5ffd83dbSDimitry Andric 376*5ffd83dbSDimitry Andric int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero; 377*5ffd83dbSDimitry Andric for (unsigned i = 0; i != NumDstElts; i++) { 378*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 379*5ffd83dbSDimitry Andric ShuffleMask.append(Scale - 1, Sentinel); 380*5ffd83dbSDimitry Andric } 381*5ffd83dbSDimitry Andric } 382*5ffd83dbSDimitry Andric 383*5ffd83dbSDimitry Andric void DecodeZeroMoveLowMask(unsigned NumElts, 384*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 385*5ffd83dbSDimitry Andric ShuffleMask.push_back(0); 386*5ffd83dbSDimitry Andric ShuffleMask.append(NumElts - 1, SM_SentinelZero); 387*5ffd83dbSDimitry Andric } 388*5ffd83dbSDimitry Andric 389*5ffd83dbSDimitry Andric void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, 390*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 391*5ffd83dbSDimitry Andric // First element comes from the first element of second source. 392*5ffd83dbSDimitry Andric // Remaining elements: Load zero extends / Move copies from first source. 393*5ffd83dbSDimitry Andric ShuffleMask.push_back(NumElts); 394*5ffd83dbSDimitry Andric for (unsigned i = 1; i < NumElts; i++) 395*5ffd83dbSDimitry Andric ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); 396*5ffd83dbSDimitry Andric } 397*5ffd83dbSDimitry Andric 398*5ffd83dbSDimitry Andric void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 399*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 400*5ffd83dbSDimitry Andric unsigned HalfElts = NumElts / 2; 401*5ffd83dbSDimitry Andric 402*5ffd83dbSDimitry Andric // Only the bottom 6 bits are valid for each immediate. 403*5ffd83dbSDimitry Andric Len &= 0x3F; 404*5ffd83dbSDimitry Andric Idx &= 0x3F; 405*5ffd83dbSDimitry Andric 406*5ffd83dbSDimitry Andric // We can only decode this bit extraction instruction as a shuffle if both the 407*5ffd83dbSDimitry Andric // length and index work with whole elements. 408*5ffd83dbSDimitry Andric if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 409*5ffd83dbSDimitry Andric return; 410*5ffd83dbSDimitry Andric 411*5ffd83dbSDimitry Andric // A length of zero is equivalent to a bit length of 64. 412*5ffd83dbSDimitry Andric if (Len == 0) 413*5ffd83dbSDimitry Andric Len = 64; 414*5ffd83dbSDimitry Andric 415*5ffd83dbSDimitry Andric // If the length + index exceeds the bottom 64 bits the result is undefined. 416*5ffd83dbSDimitry Andric if ((Len + Idx) > 64) { 417*5ffd83dbSDimitry Andric ShuffleMask.append(NumElts, SM_SentinelUndef); 418*5ffd83dbSDimitry Andric return; 419*5ffd83dbSDimitry Andric } 420*5ffd83dbSDimitry Andric 421*5ffd83dbSDimitry Andric // Convert index and index to work with elements. 422*5ffd83dbSDimitry Andric Len /= EltSize; 423*5ffd83dbSDimitry Andric Idx /= EltSize; 424*5ffd83dbSDimitry Andric 425*5ffd83dbSDimitry Andric // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining 426*5ffd83dbSDimitry Andric // elements of the lower 64-bits. The upper 64-bits are undefined. 427*5ffd83dbSDimitry Andric for (int i = 0; i != Len; ++i) 428*5ffd83dbSDimitry Andric ShuffleMask.push_back(i + Idx); 429*5ffd83dbSDimitry Andric for (int i = Len; i != (int)HalfElts; ++i) 430*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 431*5ffd83dbSDimitry Andric for (int i = HalfElts; i != (int)NumElts; ++i) 432*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 433*5ffd83dbSDimitry Andric } 434*5ffd83dbSDimitry Andric 435*5ffd83dbSDimitry Andric void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, 436*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 437*5ffd83dbSDimitry Andric unsigned HalfElts = NumElts / 2; 438*5ffd83dbSDimitry Andric 439*5ffd83dbSDimitry Andric // Only the bottom 6 bits are valid for each immediate. 440*5ffd83dbSDimitry Andric Len &= 0x3F; 441*5ffd83dbSDimitry Andric Idx &= 0x3F; 442*5ffd83dbSDimitry Andric 443*5ffd83dbSDimitry Andric // We can only decode this bit insertion instruction as a shuffle if both the 444*5ffd83dbSDimitry Andric // length and index work with whole elements. 445*5ffd83dbSDimitry Andric if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) 446*5ffd83dbSDimitry Andric return; 447*5ffd83dbSDimitry Andric 448*5ffd83dbSDimitry Andric // A length of zero is equivalent to a bit length of 64. 449*5ffd83dbSDimitry Andric if (Len == 0) 450*5ffd83dbSDimitry Andric Len = 64; 451*5ffd83dbSDimitry Andric 452*5ffd83dbSDimitry Andric // If the length + index exceeds the bottom 64 bits the result is undefined. 453*5ffd83dbSDimitry Andric if ((Len + Idx) > 64) { 454*5ffd83dbSDimitry Andric ShuffleMask.append(NumElts, SM_SentinelUndef); 455*5ffd83dbSDimitry Andric return; 456*5ffd83dbSDimitry Andric } 457*5ffd83dbSDimitry Andric 458*5ffd83dbSDimitry Andric // Convert index and index to work with elements. 459*5ffd83dbSDimitry Andric Len /= EltSize; 460*5ffd83dbSDimitry Andric Idx /= EltSize; 461*5ffd83dbSDimitry Andric 462*5ffd83dbSDimitry Andric // INSERTQ: Extract lowest Len elements from lower half of second source and 463*5ffd83dbSDimitry Andric // insert over first source starting at Idx element. The upper 64-bits are 464*5ffd83dbSDimitry Andric // undefined. 465*5ffd83dbSDimitry Andric for (int i = 0; i != Idx; ++i) 466*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 467*5ffd83dbSDimitry Andric for (int i = 0; i != Len; ++i) 468*5ffd83dbSDimitry Andric ShuffleMask.push_back(i + NumElts); 469*5ffd83dbSDimitry Andric for (int i = Idx + Len; i != (int)HalfElts; ++i) 470*5ffd83dbSDimitry Andric ShuffleMask.push_back(i); 471*5ffd83dbSDimitry Andric for (int i = HalfElts; i != (int)NumElts; ++i) 472*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 473*5ffd83dbSDimitry Andric } 474*5ffd83dbSDimitry Andric 475*5ffd83dbSDimitry Andric void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, 476*5ffd83dbSDimitry Andric ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 477*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 478*5ffd83dbSDimitry Andric unsigned VecSize = NumElts * ScalarBits; 479*5ffd83dbSDimitry Andric unsigned NumLanes = VecSize / 128; 480*5ffd83dbSDimitry Andric unsigned NumEltsPerLane = NumElts / NumLanes; 481*5ffd83dbSDimitry Andric assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && 482*5ffd83dbSDimitry Andric "Unexpected vector size"); 483*5ffd83dbSDimitry Andric assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 484*5ffd83dbSDimitry Andric 485*5ffd83dbSDimitry Andric for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 486*5ffd83dbSDimitry Andric if (UndefElts[i]) { 487*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 488*5ffd83dbSDimitry Andric continue; 489*5ffd83dbSDimitry Andric } 490*5ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 491*5ffd83dbSDimitry Andric M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); 492*5ffd83dbSDimitry Andric unsigned LaneOffset = i & ~(NumEltsPerLane - 1); 493*5ffd83dbSDimitry Andric ShuffleMask.push_back((int)(LaneOffset + M)); 494*5ffd83dbSDimitry Andric } 495*5ffd83dbSDimitry Andric } 496*5ffd83dbSDimitry Andric 497*5ffd83dbSDimitry Andric void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, 498*5ffd83dbSDimitry Andric ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 499*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 500*5ffd83dbSDimitry Andric unsigned VecSize = NumElts * ScalarBits; 501*5ffd83dbSDimitry Andric unsigned NumLanes = VecSize / 128; 502*5ffd83dbSDimitry Andric unsigned NumEltsPerLane = NumElts / NumLanes; 503*5ffd83dbSDimitry Andric assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size"); 504*5ffd83dbSDimitry Andric assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size"); 505*5ffd83dbSDimitry Andric assert((NumElts == RawMask.size()) && "Unexpected mask size"); 506*5ffd83dbSDimitry Andric 507*5ffd83dbSDimitry Andric for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { 508*5ffd83dbSDimitry Andric if (UndefElts[i]) { 509*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 510*5ffd83dbSDimitry Andric continue; 511*5ffd83dbSDimitry Andric } 512*5ffd83dbSDimitry Andric 513*5ffd83dbSDimitry Andric // VPERMIL2 Operation. 514*5ffd83dbSDimitry Andric // Bits[3] - Match Bit. 515*5ffd83dbSDimitry Andric // Bits[2:1] - (Per Lane) PD Shuffle Mask. 516*5ffd83dbSDimitry Andric // Bits[2:0] - (Per Lane) PS Shuffle Mask. 517*5ffd83dbSDimitry Andric uint64_t Selector = RawMask[i]; 518*5ffd83dbSDimitry Andric unsigned MatchBit = (Selector >> 3) & 0x1; 519*5ffd83dbSDimitry Andric 520*5ffd83dbSDimitry Andric // M2Z[0:1] MatchBit 521*5ffd83dbSDimitry Andric // 0Xb X Source selected by Selector index. 522*5ffd83dbSDimitry Andric // 10b 0 Source selected by Selector index. 523*5ffd83dbSDimitry Andric // 10b 1 Zero. 524*5ffd83dbSDimitry Andric // 11b 0 Zero. 525*5ffd83dbSDimitry Andric // 11b 1 Source selected by Selector index. 526*5ffd83dbSDimitry Andric if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { 527*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelZero); 528*5ffd83dbSDimitry Andric continue; 529*5ffd83dbSDimitry Andric } 530*5ffd83dbSDimitry Andric 531*5ffd83dbSDimitry Andric int Index = i & ~(NumEltsPerLane - 1); 532*5ffd83dbSDimitry Andric if (ScalarBits == 64) 533*5ffd83dbSDimitry Andric Index += (Selector >> 1) & 0x1; 534*5ffd83dbSDimitry Andric else 535*5ffd83dbSDimitry Andric Index += Selector & 0x3; 536*5ffd83dbSDimitry Andric 537*5ffd83dbSDimitry Andric int Src = (Selector >> 2) & 0x1; 538*5ffd83dbSDimitry Andric Index += Src * NumElts; 539*5ffd83dbSDimitry Andric ShuffleMask.push_back(Index); 540*5ffd83dbSDimitry Andric } 541*5ffd83dbSDimitry Andric } 542*5ffd83dbSDimitry Andric 543*5ffd83dbSDimitry Andric void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 544*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 545*5ffd83dbSDimitry Andric uint64_t EltMaskSize = RawMask.size() - 1; 546*5ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i != e; ++i) { 547*5ffd83dbSDimitry Andric if (UndefElts[i]) { 548*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 549*5ffd83dbSDimitry Andric continue; 550*5ffd83dbSDimitry Andric } 551*5ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 552*5ffd83dbSDimitry Andric M &= EltMaskSize; 553*5ffd83dbSDimitry Andric ShuffleMask.push_back((int)M); 554*5ffd83dbSDimitry Andric } 555*5ffd83dbSDimitry Andric } 556*5ffd83dbSDimitry Andric 557*5ffd83dbSDimitry Andric void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts, 558*5ffd83dbSDimitry Andric SmallVectorImpl<int> &ShuffleMask) { 559*5ffd83dbSDimitry Andric uint64_t EltMaskSize = (RawMask.size() * 2) - 1; 560*5ffd83dbSDimitry Andric for (int i = 0, e = RawMask.size(); i != e; ++i) { 561*5ffd83dbSDimitry Andric if (UndefElts[i]) { 562*5ffd83dbSDimitry Andric ShuffleMask.push_back(SM_SentinelUndef); 563*5ffd83dbSDimitry Andric continue; 564*5ffd83dbSDimitry Andric } 565*5ffd83dbSDimitry Andric uint64_t M = RawMask[i]; 566*5ffd83dbSDimitry Andric M &= EltMaskSize; 567*5ffd83dbSDimitry Andric ShuffleMask.push_back((int)M); 568*5ffd83dbSDimitry Andric } 569*5ffd83dbSDimitry Andric } 570*5ffd83dbSDimitry Andric 571*5ffd83dbSDimitry Andric } // llvm namespace 572