xref: /llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (revision 3a895c48732ab84b68e2453a8b869cc9c5050e9a)
1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Define several functions to decode x86 specific shuffle semantics using
11 // constants from the constant pool.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86ShuffleDecodeConstantPool.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/MachineValueType.h"
19 #include "llvm/IR/Constants.h"
20 
21 //===----------------------------------------------------------------------===//
22 //  Vector Mask Decoding
23 //===----------------------------------------------------------------------===//
24 
25 namespace llvm {
26 
27 static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
28                                 SmallBitVector &UndefElts,
29                                 SmallVectorImpl<uint64_t> &RawMask) {
30   // It is not an error for shuffle masks to not be a vector of
31   // MaskEltSizeInBits because the constant pool uniques constants by their
32   // bit representation.
33   // e.g. the following take up the same space in the constant pool:
34   //   i128 -170141183420855150465331762880109871104
35   //
36   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
37   //
38   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
39   //              i32 -2147483648, i32 -2147483648>
40   Type *CstTy = C->getType();
41   if (!CstTy->isVectorTy())
42     return false;
43 
44   Type *CstEltTy = CstTy->getVectorElementType();
45   if (!CstEltTy->isIntegerTy())
46     return false;
47 
48   unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
49   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
50   unsigned NumCstElts = CstTy->getVectorNumElements();
51 
52   // Extract all the undef/constant element data and pack into single bitsets.
53   APInt UndefBits(CstSizeInBits, 0);
54   APInt MaskBits(CstSizeInBits, 0);
55   for (unsigned i = 0; i != NumCstElts; ++i) {
56     Constant *COp = C->getAggregateElement(i);
57     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
58       return false;
59 
60     unsigned BitOffset = i * CstEltSizeInBits;
61 
62     if (isa<UndefValue>(COp)) {
63       unsigned HiBits = BitOffset + CstEltSizeInBits;
64       UndefBits |= APInt::getBitsSet(CstSizeInBits, BitOffset, HiBits);
65       continue;
66     }
67 
68     auto *Elt = cast<ConstantInt>(COp);
69     MaskBits |= Elt->getValue().zextOrTrunc(CstSizeInBits).shl(BitOffset);
70   }
71 
72   // Now extract the undef/constant bit data into the raw shuffle masks.
73   assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
74          "Unaligned shuffle mask size");
75 
76   unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
77   UndefElts = SmallBitVector(NumMaskElts, false);
78   RawMask.resize(NumMaskElts, 0);
79 
80   for (unsigned i = 0; i != NumMaskElts; ++i) {
81     APInt EltUndef = UndefBits.lshr(i * MaskEltSizeInBits);
82     EltUndef = EltUndef.zextOrTrunc(MaskEltSizeInBits);
83 
84     // Only treat the element as UNDEF if all bits are UNDEF, otherwise
85     // treat it as zero.
86     if (EltUndef.isAllOnesValue()) {
87       UndefElts[i] = true;
88       RawMask[i] = 0;
89       continue;
90     }
91 
92     APInt EltBits = MaskBits.lshr(i * MaskEltSizeInBits);
93     EltBits = EltBits.zextOrTrunc(MaskEltSizeInBits);
94     RawMask[i] = EltBits.getZExtValue();
95   }
96 
97   return true;
98 }
99 
100 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
101   Type *MaskTy = C->getType();
102   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
103   (void)MaskTySize;
104   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
105          "Unexpected vector size.");
106 
107   // The shuffle mask requires a byte vector.
108   SmallBitVector UndefElts;
109   SmallVector<uint64_t, 32> RawMask;
110   if (!extractConstantMask(C, 8, UndefElts, RawMask))
111     return;
112 
113   unsigned NumElts = RawMask.size();
114   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
115          "Unexpected number of vector elements.");
116 
117   for (unsigned i = 0; i != NumElts; ++i) {
118     if (UndefElts[i]) {
119       ShuffleMask.push_back(SM_SentinelUndef);
120       continue;
121     }
122 
123     uint64_t Element = RawMask[i];
124     // If the high bit (7) of the byte is set, the element is zeroed.
125     if (Element & (1 << 7))
126       ShuffleMask.push_back(SM_SentinelZero);
127     else {
128       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
129       // lane of the vector we're inside.
130       unsigned Base = i & ~0xf;
131 
132       // Only the least significant 4 bits of the byte are used.
133       int Index = Base + (Element & 0xf);
134       ShuffleMask.push_back(Index);
135     }
136   }
137 }
138 
139 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
140                         SmallVectorImpl<int> &ShuffleMask) {
141   Type *MaskTy = C->getType();
142   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
143   (void)MaskTySize;
144   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
145          "Unexpected vector size.");
146   assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
147 
148   // The shuffle mask requires elements the same size as the target.
149   SmallBitVector UndefElts;
150   SmallVector<uint64_t, 8> RawMask;
151   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
152     return;
153 
154   unsigned NumElts = RawMask.size();
155   unsigned NumEltsPerLane = 128 / ElSize;
156   assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
157          "Unexpected number of vector elements.");
158 
159   for (unsigned i = 0; i != NumElts; ++i) {
160     if (UndefElts[i]) {
161       ShuffleMask.push_back(SM_SentinelUndef);
162       continue;
163     }
164 
165     int Index = i & ~(NumEltsPerLane - 1);
166     uint64_t Element = RawMask[i];
167     if (ElSize == 64)
168       Index += (Element >> 1) & 0x1;
169     else
170       Index += Element & 0x3;
171 
172     ShuffleMask.push_back(Index);
173   }
174 }
175 
176 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
177                          SmallVectorImpl<int> &ShuffleMask) {
178   Type *MaskTy = C->getType();
179   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
180   (void)MaskTySize;
181   assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
182 
183   // The shuffle mask requires elements the same size as the target.
184   SmallBitVector UndefElts;
185   SmallVector<uint64_t, 8> RawMask;
186   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
187     return;
188 
189   unsigned NumElts = RawMask.size();
190   unsigned NumEltsPerLane = 128 / ElSize;
191   assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
192          "Unexpected number of vector elements.");
193 
194   for (unsigned i = 0; i != NumElts; ++i) {
195     if (UndefElts[i]) {
196       ShuffleMask.push_back(SM_SentinelUndef);
197       continue;
198     }
199 
200     // VPERMIL2 Operation.
201     // Bits[3] - Match Bit.
202     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
203     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
204     uint64_t Selector = RawMask[i];
205     unsigned MatchBit = (Selector >> 3) & 0x1;
206 
207     // M2Z[0:1]     MatchBit
208     //   0Xb           X        Source selected by Selector index.
209     //   10b           0        Source selected by Selector index.
210     //   10b           1        Zero.
211     //   11b           0        Zero.
212     //   11b           1        Source selected by Selector index.
213     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
214       ShuffleMask.push_back(SM_SentinelZero);
215       continue;
216     }
217 
218     int Index = i & ~(NumEltsPerLane - 1);
219     if (ElSize == 64)
220       Index += (Selector >> 1) & 0x1;
221     else
222       Index += Selector & 0x3;
223 
224     int Src = (Selector >> 2) & 0x1;
225     Index += Src * NumElts;
226     ShuffleMask.push_back(Index);
227   }
228 }
229 
230 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
231   assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
232          "Unexpected vector size.");
233 
234   // The shuffle mask requires a byte vector.
235   SmallBitVector UndefElts;
236   SmallVector<uint64_t, 32> RawMask;
237   if (!extractConstantMask(C, 8, UndefElts, RawMask))
238     return;
239 
240   unsigned NumElts = RawMask.size();
241   assert(NumElts == 16 && "Unexpected number of vector elements.");
242 
243   for (unsigned i = 0; i != NumElts; ++i) {
244     if (UndefElts[i]) {
245       ShuffleMask.push_back(SM_SentinelUndef);
246       continue;
247     }
248 
249     // VPPERM Operation
250     // Bits[4:0] - Byte Index (0 - 31)
251     // Bits[7:5] - Permute Operation
252     //
253     // Permute Operation:
254     // 0 - Source byte (no logical operation).
255     // 1 - Invert source byte.
256     // 2 - Bit reverse of source byte.
257     // 3 - Bit reverse of inverted source byte.
258     // 4 - 00h (zero - fill).
259     // 5 - FFh (ones - fill).
260     // 6 - Most significant bit of source byte replicated in all bit positions.
261     // 7 - Invert most significant bit of source byte and replicate in all bit
262     // positions.
263     uint64_t Element = RawMask[i];
264     uint64_t Index = Element & 0x1F;
265     uint64_t PermuteOp = (Element >> 5) & 0x7;
266 
267     if (PermuteOp == 4) {
268       ShuffleMask.push_back(SM_SentinelZero);
269       continue;
270     }
271     if (PermuteOp != 0) {
272       ShuffleMask.clear();
273       return;
274     }
275     ShuffleMask.push_back((int)Index);
276   }
277 }
278 
279 void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
280                       SmallVectorImpl<int> &ShuffleMask) {
281   Type *MaskTy = C->getType();
282   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
283   (void)MaskTySize;
284   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
285          "Unexpected vector size.");
286   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
287          "Unexpected vector element size.");
288 
289   // The shuffle mask requires elements the same size as the target.
290   SmallBitVector UndefElts;
291   SmallVector<uint64_t, 8> RawMask;
292   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
293     return;
294 
295   unsigned NumElts = RawMask.size();
296 
297   for (unsigned i = 0; i != NumElts; ++i) {
298     if (UndefElts[i]) {
299       ShuffleMask.push_back(SM_SentinelUndef);
300       continue;
301     }
302     int Index = RawMask[i] & (NumElts - 1);
303     ShuffleMask.push_back(Index);
304   }
305 }
306 
307 void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
308                        SmallVectorImpl<int> &ShuffleMask) {
309   Type *MaskTy = C->getType();
310   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
311   (void)MaskTySize;
312   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
313          "Unexpected vector size.");
314   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
315          "Unexpected vector element size.");
316 
317   // The shuffle mask requires elements the same size as the target.
318   SmallBitVector UndefElts;
319   SmallVector<uint64_t, 8> RawMask;
320   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
321     return;
322 
323   unsigned NumElts = RawMask.size();
324 
325   for (unsigned i = 0; i != NumElts; ++i) {
326     if (UndefElts[i]) {
327       ShuffleMask.push_back(SM_SentinelUndef);
328       continue;
329     }
330     int Index = RawMask[i] & (NumElts*2 - 1);
331     ShuffleMask.push_back(Index);
332   }
333 }
334 } // llvm namespace
335