xref: /llvm-project/llvm/utils/PerfectShuffle/PerfectShuffle.cpp (revision 4c6a070a2ce1722f53da8164b6d50d5d54fdc1d2)
1 //===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file computes an optimal sequence of instructions for doing all shuffles
10 // of two 4-element vectors.  With a release build and when configured to emit
11 // an altivec instruction table, this takes about 30s to run on a 2.7Ghz
12 // PowerPC G5.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include <cassert>
17 #include <cstdlib>
18 #include <iomanip>
19 #include <iostream>
20 #include <vector>
21 
22 #define GENERATE_NEON
23 #define GENERATE_NEON_INS
24 
25 struct Operator;
26 
27 // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
28 // an element from that value of the input vectors.  A value of 8 means the
29 // entry is undefined.
30 
31 // Mask manipulation functions.
MakeMask(unsigned V0,unsigned V1,unsigned V2,unsigned V3)32 static inline unsigned short MakeMask(unsigned V0, unsigned V1,
33                                       unsigned V2, unsigned V3) {
34   return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
35 }
36 
37 /// getMaskElt - Return element N of the specified mask.
getMaskElt(unsigned Mask,unsigned Elt)38 static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
39   return (Mask >> ((3-Elt)*4)) & 0xF;
40 }
41 
setMaskElt(unsigned Mask,unsigned Elt,unsigned NewVal)42 static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
43   unsigned FieldShift = ((3-Elt)*4);
44   return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
45 }
46 
47 // Reject elements where the values are 9-15.
isValidMask(unsigned short Mask)48 static bool isValidMask(unsigned short Mask) {
49   unsigned short UndefBits = Mask & 0x8888;
50   return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
51 }
52 
53 /// hasUndefElements - Return true if any of the elements in the mask are undefs
54 ///
hasUndefElements(unsigned short Mask)55 static bool hasUndefElements(unsigned short Mask) {
56   return (Mask & 0x8888) != 0;
57 }
58 
59 /// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
60 /// including undef values..
isOnlyLHSMask(unsigned short Mask)61 static bool isOnlyLHSMask(unsigned short Mask) {
62   return (Mask & 0x4444) == 0;
63 }
64 
65 /// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
66 /// refer to the LHS only (for when one argument value is passed into the same
67 /// function twice).
68 #if 0
69 static unsigned short getLHSOnlyMask(unsigned short Mask) {
70   return Mask & 0xBBBB;  // Keep only LHS and Undefs.
71 }
72 #endif
73 
74 /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
75 /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
getCompressedMask(unsigned short Mask)76 static unsigned getCompressedMask(unsigned short Mask) {
77   return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
78          getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
79 }
80 
PrintMask(unsigned i,std::ostream & OS)81 static void PrintMask(unsigned i, std::ostream &OS) {
82   OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
83      << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
84      << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
85      << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
86      << ">";
87 }
88 
89 /// ShuffleVal - This represents a shufflevector operation.
90 struct ShuffleVal {
91   Operator *Op;   // The Operation used to generate this value.
92   unsigned Cost;  // Number of instrs used to generate this value.
93   unsigned short Arg0, Arg1;  // Input operands for this value.
94 
ShuffleValShuffleVal95   ShuffleVal() : Cost(1000000) {}
96 };
97 
98 
99 /// ShufTab - This is the actual shuffle table that we are trying to generate.
100 ///
101 static ShuffleVal ShufTab[65536];
102 
103 /// TheOperators - All of the operators that this target supports.
104 static std::vector<Operator*> TheOperators;
105 
106 /// Operator - This is a vector operation that is available for use.
107 struct Operator {
108   const char *Name;
109   unsigned short ShuffleMask;
110   unsigned short OpNum;
111   unsigned Cost;
112 
OperatorOperator113   Operator(unsigned short shufflemask, const char *name, unsigned opnum,
114            unsigned cost = 1)
115     :  Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) {
116     TheOperators.push_back(this);
117   }
~OperatorOperator118   ~Operator() {
119     assert(TheOperators.back() == this);
120     TheOperators.pop_back();
121   }
122 
isOnlyLHSOperatorOperator123   bool isOnlyLHSOperator() const {
124     return isOnlyLHSMask(ShuffleMask);
125   }
126 
getNameOperator127   const char *getName() const { return Name; }
getCostOperator128   unsigned getCost() const { return Cost; }
129 
getTransformedMaskOperator130   unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
131     // Extract the elements from LHSMask and RHSMask, as appropriate.
132     unsigned Result = 0;
133     for (unsigned i = 0; i != 4; ++i) {
134       unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
135       unsigned ResElt;
136       if (SrcElt < 4)
137         ResElt = getMaskElt(LHSMask, SrcElt);
138       else if (SrcElt < 8)
139         ResElt = getMaskElt(RHSMask, SrcElt-4);
140       else {
141         assert(SrcElt == 8 && "Bad src elt!");
142         ResElt = 8;
143       }
144       Result |= ResElt << (4*i);
145     }
146     return Result;
147   }
148 };
149 
150 #ifdef GENERATE_NEON_INS
151 // Special case "insert" op identifier used below
152 static Operator InsOp(0, "ins", 15, 1);
153 #endif
154 
getZeroCostOpName(unsigned short Op)155 static const char *getZeroCostOpName(unsigned short Op) {
156   if (ShufTab[Op].Arg0 == 0x0123)
157     return "LHS";
158   else if (ShufTab[Op].Arg0 == 0x4567)
159     return "RHS";
160   else {
161     assert(0 && "bad zero cost operation");
162     abort();
163   }
164 }
165 
PrintOperation(unsigned ValNo,unsigned short Vals[])166 static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
167   unsigned short ThisOp = Vals[ValNo];
168   std::cerr << "t" << ValNo;
169   PrintMask(ThisOp, std::cerr);
170   std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
171 
172   if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
173     std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
174     PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
175   } else {
176     // Figure out what tmp # it is.
177     for (unsigned i = 0; ; ++i)
178       if (Vals[i] == ShufTab[ThisOp].Arg0) {
179         std::cerr << "t" << i;
180         break;
181       }
182   }
183 
184 #ifdef GENERATE_NEON_INS
185   if (ShufTab[ThisOp].Op == &InsOp) {
186     std::cerr << ", lane " << ShufTab[ThisOp].Arg1;
187   } else
188 #endif
189   if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
190     std::cerr << ", ";
191     if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
192       std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
193       PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
194     } else {
195       // Figure out what tmp # it is.
196       for (unsigned i = 0; ; ++i)
197         if (Vals[i] == ShufTab[ThisOp].Arg1) {
198           std::cerr << "t" << i;
199           break;
200         }
201     }
202   }
203   std::cerr << ")  ";
204 }
205 
getNumEntered()206 static unsigned getNumEntered() {
207   unsigned Count = 0;
208   for (unsigned i = 0; i != 65536; ++i)
209     Count += ShufTab[i].Cost < 100;
210   return Count;
211 }
212 
EvaluateOps(unsigned short Elt,unsigned short Vals[],unsigned & NumVals)213 static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
214                         unsigned &NumVals) {
215   if (ShufTab[Elt].Cost == 0) return;
216 #ifdef GENERATE_NEON_INS
217   if (ShufTab[Elt].Op == &InsOp) {
218     EvaluateOps(ShufTab[Elt].Arg0, Vals, NumVals);
219     Vals[NumVals++] = Elt;
220     return;
221   }
222 #endif
223 
224   // If this value has already been evaluated, it is free.  FIXME: match undefs.
225   for (unsigned i = 0, e = NumVals; i != e; ++i)
226     if (Vals[i] == Elt) return;
227 
228   // Otherwise, get the operands of the value, then add it.
229   unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
230   if (ShufTab[Arg0].Cost)
231     EvaluateOps(Arg0, Vals, NumVals);
232   if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
233     EvaluateOps(Arg1, Vals, NumVals);
234 
235   Vals[NumVals++] = Elt;
236 }
237 
238 
main()239 int main() {
240   // Seed the table with accesses to the LHS and RHS.
241   ShufTab[0x0123].Cost = 0;
242   ShufTab[0x0123].Op = nullptr;
243   ShufTab[0x0123].Arg0 = 0x0123;
244   ShufTab[0x4567].Cost = 0;
245   ShufTab[0x4567].Op = nullptr;
246   ShufTab[0x4567].Arg0 = 0x4567;
247 
248   // Seed the first-level of shuffles, shuffles whose inputs are the input to
249   // the vectorshuffle operation.
250   bool MadeChange = true;
251   unsigned OpCount = 0;
252   while (MadeChange) {
253     MadeChange = false;
254     ++OpCount;
255     std::cerr << "Starting iteration #" << OpCount << " with "
256               << getNumEntered() << " entries established.\n";
257 
258     // Scan the table for two reasons: First, compute the maximum cost of any
259     // operation left in the table.  Second, make sure that values with undefs
260     // have the cheapest alternative that they match.
261     unsigned MaxCost = ShufTab[0].Cost;
262     for (unsigned i = 1; i != 0x8889; ++i) {
263       if (!isValidMask(i)) continue;
264       if (ShufTab[i].Cost > MaxCost)
265         MaxCost = ShufTab[i].Cost;
266 
267       // If this value has an undef, make it be computed the cheapest possible
268       // way of any of the things that it matches.
269       if (hasUndefElements(i)) {
270         // This code is a little bit tricky, so here's the idea: consider some
271         // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we
272         // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If
273         // there are 3 undefs, the number rises to 729 entries we have to scan,
274         // and for the 4 undef case, we have to scan the whole table.
275         //
276         // Instead of doing this huge amount of scanning, we process the table
277         // entries *in order*, and use the fact that 'u' is 8, larger than any
278         // valid index.  Given an entry like 7u4u then, we only need to scan
279         // 7[0-7]4u - 8 entries.  We can get away with this, because we already
280         // know that each of 704u, 714u, 724u, etc contain the minimum value of
281         // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
282         unsigned UndefIdx;
283         if (i & 0x8000)
284           UndefIdx = 0;
285         else if (i & 0x0800)
286           UndefIdx = 1;
287         else if (i & 0x0080)
288           UndefIdx = 2;
289         else if (i & 0x0008)
290           UndefIdx = 3;
291         else
292           abort();
293 
294         unsigned MinVal  = i;
295         unsigned MinCost = ShufTab[i].Cost;
296 
297         // Scan the 8 entries.
298         for (unsigned j = 0; j != 8; ++j) {
299           unsigned NewElt = setMaskElt(i, UndefIdx, j);
300           if (ShufTab[NewElt].Cost < MinCost) {
301             MinCost = ShufTab[NewElt].Cost;
302             MinVal = NewElt;
303           }
304         }
305 
306         // If we found something cheaper than what was here before, use it.
307         if (i != MinVal) {
308           MadeChange = true;
309           ShufTab[i] = ShufTab[MinVal];
310         }
311       }
312 #ifdef GENERATE_NEON_INS
313       else {
314         // Similarly, if we take the mask (eg 3,6,1,0) and take the cost with
315         // undef for each lane (eg u,6,1,0 or 3,u,1,0 etc), we can use a single
316         // lane insert to fixup the result.
317         for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx++) {
318           if (getMaskElt(i, LaneIdx) == 8)
319             continue;
320           unsigned NewElt = setMaskElt(i, LaneIdx, 8);
321           if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
322             MadeChange = true;
323             ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
324             ShufTab[i].Op = &InsOp;
325             ShufTab[i].Arg0 = NewElt;
326             ShufTab[i].Arg1 = LaneIdx;
327           }
328         }
329 
330         // Similar idea for using a D register mov, masking out 2 lanes to undef
331         for (unsigned LaneIdx = 0; LaneIdx < 4; LaneIdx += 2) {
332           unsigned Ln0 = getMaskElt(i, LaneIdx);
333           unsigned Ln1 = getMaskElt(i, LaneIdx + 1);
334           if ((Ln0 == 0 && Ln1 == 1) || (Ln0 == 2 && Ln1 == 3) ||
335               (Ln0 == 4 && Ln1 == 5) || (Ln0 == 6 && Ln1 == 7)) {
336             unsigned NewElt = setMaskElt(i, LaneIdx, 8);
337             NewElt = setMaskElt(NewElt, LaneIdx + 1, 8);
338             if (ShufTab[NewElt].Cost + 1 < ShufTab[i].Cost) {
339               MadeChange = true;
340               ShufTab[i].Cost = ShufTab[NewElt].Cost + 1;
341               ShufTab[i].Op = &InsOp;
342               ShufTab[i].Arg0 = NewElt;
343               ShufTab[i].Arg1 = (LaneIdx >> 1) | 0x4;
344             }
345           }
346         }
347       }
348 #endif
349     }
350 
351     for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
352       if (!isValidMask(LHS)) continue;
353       if (ShufTab[LHS].Cost > 1000) continue;
354 
355       // If nothing involving this operand could possibly be cheaper than what
356       // we already have, don't consider it.
357       if (ShufTab[LHS].Cost + 1 >= MaxCost)
358         continue;
359 
360       for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
361         Operator *Op = TheOperators[opnum];
362 #ifdef GENERATE_NEON_INS
363         if (Op == &InsOp)
364           continue;
365 #endif
366 
367         // Evaluate op(LHS,LHS)
368         unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
369 
370         unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
371         if (Cost < ShufTab[ResultMask].Cost) {
372           ShufTab[ResultMask].Cost = Cost;
373           ShufTab[ResultMask].Op = Op;
374           ShufTab[ResultMask].Arg0 = LHS;
375           ShufTab[ResultMask].Arg1 = LHS;
376           MadeChange = true;
377         }
378 
379         // If this is a two input instruction, include the op(x,y) cases.  If
380         // this is a one input instruction, skip this.
381         if (Op->isOnlyLHSOperator()) continue;
382 
383         for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
384           if (!isValidMask(RHS)) continue;
385           if (ShufTab[RHS].Cost > 1000) continue;
386 
387           // If nothing involving this operand could possibly be cheaper than
388           // what we already have, don't consider it.
389           if (ShufTab[RHS].Cost + 1 >= MaxCost)
390             continue;
391 
392 
393           // Evaluate op(LHS,RHS)
394           unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
395 
396           if (ShufTab[ResultMask].Cost <= OpCount ||
397               ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
398               ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
399             continue;
400 
401           // Figure out the cost to evaluate this, knowing that CSE's only need
402           // to be evaluated once.
403           unsigned short Vals[30];
404           unsigned NumVals = 0;
405           EvaluateOps(LHS, Vals, NumVals);
406           EvaluateOps(RHS, Vals, NumVals);
407 
408           unsigned Cost = NumVals + Op->getCost();
409           if (Cost < ShufTab[ResultMask].Cost) {
410             ShufTab[ResultMask].Cost = Cost;
411             ShufTab[ResultMask].Op = Op;
412             ShufTab[ResultMask].Arg0 = LHS;
413             ShufTab[ResultMask].Arg1 = RHS;
414             MadeChange = true;
415           }
416         }
417       }
418     }
419   }
420 
421   std::cerr << "Finished Table has " << getNumEntered()
422             << " entries established.\n";
423 
424   unsigned CostArray[10] = { 0 };
425 
426   // Compute a cost histogram.
427   for (unsigned i = 0; i != 65536; ++i) {
428     if (!isValidMask(i)) continue;
429     if (ShufTab[i].Cost > 9)
430       ++CostArray[9];
431     else
432       ++CostArray[ShufTab[i].Cost];
433   }
434 
435   for (unsigned i = 0; i != 9; ++i)
436     if (CostArray[i])
437       std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
438   if (CostArray[9])
439     std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
440 
441 
442   // Build up the table to emit.
443   std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
444   std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
445 
446   for (unsigned i = 0; i != 0x8889; ++i) {
447     if (!isValidMask(i)) continue;
448 
449     // CostSat - The cost of this operation saturated to two bits.
450     unsigned CostSat = ShufTab[i].Cost;
451     if (CostSat > 4) CostSat = 4;
452     if (CostSat == 0) CostSat = 1;
453     --CostSat;  // Cost is now between 0-3.
454 
455     unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
456     assert(OpNum < 16 && "Too few bits to encode operation!");
457 
458     unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
459     unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
460 
461     // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
462     // LHS, and 13 bits of RHS = 32 bits.
463     unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
464 
465     std::cout << "  " << std::setw(10) << Val << "U, // ";
466     PrintMask(i, std::cout);
467     std::cout << ": Cost " << ShufTab[i].Cost;
468     std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
469     std::cout << " ";
470     if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
471       std::cout << getZeroCostOpName(ShufTab[i].Arg0);
472     } else {
473       PrintMask(ShufTab[i].Arg0, std::cout);
474     }
475 
476     if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
477       std::cout << ", ";
478       if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
479         std::cout << getZeroCostOpName(ShufTab[i].Arg1);
480       } else {
481         PrintMask(ShufTab[i].Arg1, std::cout);
482       }
483     }
484 #ifdef GENERATE_NEON_INS
485     else if (ShufTab[i].Op == &InsOp) {
486       std::cout << ", lane " << ShufTab[i].Arg1;
487     }
488 #endif
489 
490     std::cout << "\n";
491   }
492   std::cout << "  0\n};\n";
493 
494   if (false) {
495     // Print out the table.
496     for (unsigned i = 0; i != 0x8889; ++i) {
497       if (!isValidMask(i)) continue;
498       if (ShufTab[i].Cost < 1000) {
499         PrintMask(i, std::cerr);
500         std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
501 
502         unsigned short Vals[30];
503         unsigned NumVals = 0;
504         EvaluateOps(i, Vals, NumVals);
505 
506         for (unsigned j = 0, e = NumVals; j != e; ++j)
507           PrintOperation(j, Vals);
508         std::cerr << "\n";
509       }
510     }
511   }
512 }
513 
514 
515 #ifdef GENERATE_ALTIVEC
516 
517 ///===---------------------------------------------------------------------===//
518 /// The altivec instruction definitions.  This is the altivec-specific part of
519 /// this file.
520 ///===---------------------------------------------------------------------===//
521 
522 // Note that the opcode numbers here must match those in the PPC backend.
523 enum {
524   OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
525   OP_VMRGHW,
526   OP_VMRGLW,
527   OP_VSPLTISW0,
528   OP_VSPLTISW1,
529   OP_VSPLTISW2,
530   OP_VSPLTISW3,
531   OP_VSLDOI4,
532   OP_VSLDOI8,
533   OP_VSLDOI12
534 };
535 
536 struct vmrghw : public Operator {
vmrghwvmrghw537   vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
538 } the_vmrghw;
539 
540 struct vmrglw : public Operator {
vmrglwvmrglw541   vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
542 } the_vmrglw;
543 
544 template<unsigned Elt>
545 struct vspltisw : public Operator {
vspltiswvspltisw546   vspltisw(const char *N, unsigned Opc)
547     : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
548 };
549 
550 vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
551 vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
552 vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
553 vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
554 
555 template<unsigned N>
556 struct vsldoi : public Operator {
vsldoivsldoi557   vsldoi(const char *Name, unsigned Opc)
558     : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
559   }
560 };
561 
562 vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
563 vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
564 vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
565 
566 #endif
567 
568 #ifdef GENERATE_NEON
569 enum {
570   OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
571   OP_VREV,
572   OP_VDUP0,
573   OP_VDUP1,
574   OP_VDUP2,
575   OP_VDUP3,
576   OP_VEXT1,
577   OP_VEXT2,
578   OP_VEXT3,
579   OP_VUZPL, // VUZP, left result
580   OP_VUZPR, // VUZP, right result
581   OP_VZIPL, // VZIP, left result
582   OP_VZIPR, // VZIP, right result
583   OP_VTRNL, // VTRN, left result
584   OP_VTRNR  // VTRN, right result
585 };
586 
587 struct vrev : public Operator {
vrevvrev588   vrev() : Operator(0x1032, "vrev", OP_VREV) {}
589 } the_vrev;
590 
591 template<unsigned Elt>
592 struct vdup : public Operator {
vdupvdup593   vdup(const char *N, unsigned Opc)
594     : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
595 };
596 
597 vdup<0> the_vdup0("vdup0", OP_VDUP0);
598 vdup<1> the_vdup1("vdup1", OP_VDUP1);
599 vdup<2> the_vdup2("vdup2", OP_VDUP2);
600 vdup<3> the_vdup3("vdup3", OP_VDUP3);
601 
602 template<unsigned N>
603 struct vext : public Operator {
vextvext604   vext(const char *Name, unsigned Opc)
605     : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
606   }
607 };
608 
609 vext<1> the_vext1("vext1", OP_VEXT1);
610 vext<2> the_vext2("vext2", OP_VEXT2);
611 vext<3> the_vext3("vext3", OP_VEXT3);
612 
613 struct vuzpl : public Operator {
vuzplvuzpl614   vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 1) {}
615 } the_vuzpl;
616 
617 struct vuzpr : public Operator {
vuzprvuzpr618   vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 1) {}
619 } the_vuzpr;
620 
621 struct vzipl : public Operator {
vziplvzipl622   vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 1) {}
623 } the_vzipl;
624 
625 struct vzipr : public Operator {
vziprvzipr626   vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 1) {}
627 } the_vzipr;
628 
629 struct vtrnl : public Operator {
vtrnlvtrnl630   vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 1) {}
631 } the_vtrnl;
632 
633 struct vtrnr : public Operator {
vtrnrvtrnr634   vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 1) {}
635 } the_vtrnr;
636 
637 #endif
638