xref: /llvm-project/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (revision bd9fb2ae959dc2bc0a2a6a309b56ea239d41797e)
1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Define several functions to decode x86 specific shuffle semantics using
11 // constants from the constant pool.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86ShuffleDecodeConstantPool.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/MachineValueType.h"
19 #include "llvm/IR/Constants.h"
20 
21 //===----------------------------------------------------------------------===//
22 //  Vector Mask Decoding
23 //===----------------------------------------------------------------------===//
24 
25 namespace llvm {
26 
27 static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
28                                 SmallBitVector &UndefElts,
29                                 SmallVectorImpl<uint64_t> &RawMask) {
30   // It is not an error for shuffle masks to not be a vector of
31   // MaskEltSizeInBits because the constant pool uniques constants by their
32   // bit representation.
33   // e.g. the following take up the same space in the constant pool:
34   //   i128 -170141183420855150465331762880109871104
35   //
36   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
37   //
38   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
39   //              i32 -2147483648, i32 -2147483648>
40   Type *CstTy = C->getType();
41   if (!CstTy->isVectorTy())
42     return false;
43 
44   Type *CstEltTy = CstTy->getVectorElementType();
45   if (!CstEltTy->isIntegerTy())
46     return false;
47 
48   unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
49   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
50   unsigned NumCstElts = CstTy->getVectorNumElements();
51 
52   // Extract all the undef/constant element data and pack into single bitsets.
53   APInt UndefBits(CstSizeInBits, 0);
54   APInt MaskBits(CstSizeInBits, 0);
55   for (unsigned i = 0; i != NumCstElts; ++i) {
56     Constant *COp = C->getAggregateElement(i);
57     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
58       return false;
59 
60     unsigned BitOffset = i * CstEltSizeInBits;
61 
62     if (isa<UndefValue>(COp)) {
63       UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
64       continue;
65     }
66 
67     auto *Elt = cast<ConstantInt>(COp);
68     MaskBits |= Elt->getValue().zextOrTrunc(CstSizeInBits).shl(BitOffset);
69   }
70 
71   // Now extract the undef/constant bit data into the raw shuffle masks.
72   assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
73          "Unaligned shuffle mask size");
74 
75   unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
76   UndefElts = SmallBitVector(NumMaskElts, false);
77   RawMask.resize(NumMaskElts, 0);
78 
79   for (unsigned i = 0; i != NumMaskElts; ++i) {
80     unsigned BitOffset = i * MaskEltSizeInBits;
81     APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
82 
83     // Only treat the element as UNDEF if all bits are UNDEF, otherwise
84     // treat it as zero.
85     if (EltUndef.isAllOnesValue()) {
86       UndefElts[i] = true;
87       RawMask[i] = 0;
88       continue;
89     }
90 
91     APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
92     RawMask[i] = EltBits.getZExtValue();
93   }
94 
95   return true;
96 }
97 
98 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
99   Type *MaskTy = C->getType();
100   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
101   (void)MaskTySize;
102   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
103          "Unexpected vector size.");
104 
105   // The shuffle mask requires a byte vector.
106   SmallBitVector UndefElts;
107   SmallVector<uint64_t, 32> RawMask;
108   if (!extractConstantMask(C, 8, UndefElts, RawMask))
109     return;
110 
111   unsigned NumElts = RawMask.size();
112   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
113          "Unexpected number of vector elements.");
114 
115   for (unsigned i = 0; i != NumElts; ++i) {
116     if (UndefElts[i]) {
117       ShuffleMask.push_back(SM_SentinelUndef);
118       continue;
119     }
120 
121     uint64_t Element = RawMask[i];
122     // If the high bit (7) of the byte is set, the element is zeroed.
123     if (Element & (1 << 7))
124       ShuffleMask.push_back(SM_SentinelZero);
125     else {
126       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
127       // lane of the vector we're inside.
128       unsigned Base = i & ~0xf;
129 
130       // Only the least significant 4 bits of the byte are used.
131       int Index = Base + (Element & 0xf);
132       ShuffleMask.push_back(Index);
133     }
134   }
135 }
136 
137 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
138                         SmallVectorImpl<int> &ShuffleMask) {
139   Type *MaskTy = C->getType();
140   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
141   (void)MaskTySize;
142   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
143          "Unexpected vector size.");
144   assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
145 
146   // The shuffle mask requires elements the same size as the target.
147   SmallBitVector UndefElts;
148   SmallVector<uint64_t, 8> RawMask;
149   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
150     return;
151 
152   unsigned NumElts = RawMask.size();
153   unsigned NumEltsPerLane = 128 / ElSize;
154   assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
155          "Unexpected number of vector elements.");
156 
157   for (unsigned i = 0; i != NumElts; ++i) {
158     if (UndefElts[i]) {
159       ShuffleMask.push_back(SM_SentinelUndef);
160       continue;
161     }
162 
163     int Index = i & ~(NumEltsPerLane - 1);
164     uint64_t Element = RawMask[i];
165     if (ElSize == 64)
166       Index += (Element >> 1) & 0x1;
167     else
168       Index += Element & 0x3;
169 
170     ShuffleMask.push_back(Index);
171   }
172 }
173 
174 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
175                          SmallVectorImpl<int> &ShuffleMask) {
176   Type *MaskTy = C->getType();
177   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
178   (void)MaskTySize;
179   assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
180 
181   // The shuffle mask requires elements the same size as the target.
182   SmallBitVector UndefElts;
183   SmallVector<uint64_t, 8> RawMask;
184   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
185     return;
186 
187   unsigned NumElts = RawMask.size();
188   unsigned NumEltsPerLane = 128 / ElSize;
189   assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
190          "Unexpected number of vector elements.");
191 
192   for (unsigned i = 0; i != NumElts; ++i) {
193     if (UndefElts[i]) {
194       ShuffleMask.push_back(SM_SentinelUndef);
195       continue;
196     }
197 
198     // VPERMIL2 Operation.
199     // Bits[3] - Match Bit.
200     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
201     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
202     uint64_t Selector = RawMask[i];
203     unsigned MatchBit = (Selector >> 3) & 0x1;
204 
205     // M2Z[0:1]     MatchBit
206     //   0Xb           X        Source selected by Selector index.
207     //   10b           0        Source selected by Selector index.
208     //   10b           1        Zero.
209     //   11b           0        Zero.
210     //   11b           1        Source selected by Selector index.
211     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
212       ShuffleMask.push_back(SM_SentinelZero);
213       continue;
214     }
215 
216     int Index = i & ~(NumEltsPerLane - 1);
217     if (ElSize == 64)
218       Index += (Selector >> 1) & 0x1;
219     else
220       Index += Selector & 0x3;
221 
222     int Src = (Selector >> 2) & 0x1;
223     Index += Src * NumElts;
224     ShuffleMask.push_back(Index);
225   }
226 }
227 
228 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
229   assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
230          "Unexpected vector size.");
231 
232   // The shuffle mask requires a byte vector.
233   SmallBitVector UndefElts;
234   SmallVector<uint64_t, 32> RawMask;
235   if (!extractConstantMask(C, 8, UndefElts, RawMask))
236     return;
237 
238   unsigned NumElts = RawMask.size();
239   assert(NumElts == 16 && "Unexpected number of vector elements.");
240 
241   for (unsigned i = 0; i != NumElts; ++i) {
242     if (UndefElts[i]) {
243       ShuffleMask.push_back(SM_SentinelUndef);
244       continue;
245     }
246 
247     // VPPERM Operation
248     // Bits[4:0] - Byte Index (0 - 31)
249     // Bits[7:5] - Permute Operation
250     //
251     // Permute Operation:
252     // 0 - Source byte (no logical operation).
253     // 1 - Invert source byte.
254     // 2 - Bit reverse of source byte.
255     // 3 - Bit reverse of inverted source byte.
256     // 4 - 00h (zero - fill).
257     // 5 - FFh (ones - fill).
258     // 6 - Most significant bit of source byte replicated in all bit positions.
259     // 7 - Invert most significant bit of source byte and replicate in all bit
260     // positions.
261     uint64_t Element = RawMask[i];
262     uint64_t Index = Element & 0x1F;
263     uint64_t PermuteOp = (Element >> 5) & 0x7;
264 
265     if (PermuteOp == 4) {
266       ShuffleMask.push_back(SM_SentinelZero);
267       continue;
268     }
269     if (PermuteOp != 0) {
270       ShuffleMask.clear();
271       return;
272     }
273     ShuffleMask.push_back((int)Index);
274   }
275 }
276 
277 void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
278                       SmallVectorImpl<int> &ShuffleMask) {
279   Type *MaskTy = C->getType();
280   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
281   (void)MaskTySize;
282   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
283          "Unexpected vector size.");
284   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
285          "Unexpected vector element size.");
286 
287   // The shuffle mask requires elements the same size as the target.
288   SmallBitVector UndefElts;
289   SmallVector<uint64_t, 8> RawMask;
290   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
291     return;
292 
293   unsigned NumElts = RawMask.size();
294 
295   for (unsigned i = 0; i != NumElts; ++i) {
296     if (UndefElts[i]) {
297       ShuffleMask.push_back(SM_SentinelUndef);
298       continue;
299     }
300     int Index = RawMask[i] & (NumElts - 1);
301     ShuffleMask.push_back(Index);
302   }
303 }
304 
305 void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
306                        SmallVectorImpl<int> &ShuffleMask) {
307   Type *MaskTy = C->getType();
308   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
309   (void)MaskTySize;
310   assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
311          "Unexpected vector size.");
312   assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
313          "Unexpected vector element size.");
314 
315   // The shuffle mask requires elements the same size as the target.
316   SmallBitVector UndefElts;
317   SmallVector<uint64_t, 8> RawMask;
318   if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
319     return;
320 
321   unsigned NumElts = RawMask.size();
322 
323   for (unsigned i = 0; i != NumElts; ++i) {
324     if (UndefElts[i]) {
325       ShuffleMask.push_back(SM_SentinelUndef);
326       continue;
327     }
328     int Index = RawMask[i] & (NumElts*2 - 1);
329     ShuffleMask.push_back(Index);
330   }
331 }
332 } // llvm namespace
333