xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric /// \file
9e8d8bef9SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the
10e8d8bef9SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide
11e8d8bef9SDimitry Andric /// more precise answers to certain TTI queries, while letting the target
12e8d8bef9SDimitry Andric /// independent and default TTI implementations handle the rest.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "X86TargetTransformInfo.h"
17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
18e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsX86.h"
19e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h"
20e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
21bdd1243dSDimitry Andric #include <optional>
22e8d8bef9SDimitry Andric 
23e8d8bef9SDimitry Andric using namespace llvm;
24*0fca6ea1SDimitry Andric using namespace llvm::PatternMatch;
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric #define DEBUG_TYPE "x86tti"
27e8d8bef9SDimitry Andric 
28e8d8bef9SDimitry Andric /// Return a constant boolean vector that has true elements in all positions
29e8d8bef9SDimitry Andric /// where the input constant data vector has an element with the sign bit set.
30*0fca6ea1SDimitry Andric static Constant *getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL) {
31e8d8bef9SDimitry Andric   VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
32e8d8bef9SDimitry Andric   V = ConstantExpr::getBitCast(V, IntTy);
33*0fca6ea1SDimitry Andric   V = ConstantFoldCompareInstOperands(CmpInst::ICMP_SGT,
34*0fca6ea1SDimitry Andric                                       Constant::getNullValue(IntTy), V, DL);
35*0fca6ea1SDimitry Andric   assert(V && "Vector must be foldable");
36e8d8bef9SDimitry Andric   return V;
37e8d8bef9SDimitry Andric }
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric /// Convert the x86 XMM integer vector mask to a vector of bools based on
40e8d8bef9SDimitry Andric /// each element's most significant bit (the sign bit).
41*0fca6ea1SDimitry Andric static Value *getBoolVecFromMask(Value *Mask, const DataLayout &DL) {
42e8d8bef9SDimitry Andric   // Fold Constant Mask.
43e8d8bef9SDimitry Andric   if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
44*0fca6ea1SDimitry Andric     return getNegativeIsTrueBoolVec(ConstantMask, DL);
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric   // Mask was extended from a boolean vector.
47e8d8bef9SDimitry Andric   Value *ExtMask;
48*0fca6ea1SDimitry Andric   if (match(Mask, m_SExt(m_Value(ExtMask))) &&
49e8d8bef9SDimitry Andric       ExtMask->getType()->isIntOrIntVectorTy(1))
50e8d8bef9SDimitry Andric     return ExtMask;
51e8d8bef9SDimitry Andric 
52e8d8bef9SDimitry Andric   return nullptr;
53e8d8bef9SDimitry Andric }
54e8d8bef9SDimitry Andric 
55e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
56e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
57e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
58e8d8bef9SDimitry Andric static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
59e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
60e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
61e8d8bef9SDimitry Andric   Constant *ZeroVec = Constant::getNullValue(II.getType());
62e8d8bef9SDimitry Andric 
63e8d8bef9SDimitry Andric   // Zero Mask - masked load instruction creates a zero vector.
64e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask))
65e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, ZeroVec);
66e8d8bef9SDimitry Andric 
67e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
68e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
69*0fca6ea1SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
70e8d8bef9SDimitry Andric     // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
71e8d8bef9SDimitry Andric     // the LLVM intrinsic definition for the pointer argument.
72e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
73e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
74e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
75e8d8bef9SDimitry Andric 
76e8d8bef9SDimitry Andric     // The pass-through vector for an x86 masked load is a zero vector.
77fe6060f1SDimitry Andric     CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad(
78fe6060f1SDimitry Andric         II.getType(), PtrCast, Align(1), BoolMask, ZeroVec);
79e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, NewMaskedLoad);
80e8d8bef9SDimitry Andric   }
81e8d8bef9SDimitry Andric 
82e8d8bef9SDimitry Andric   return nullptr;
83e8d8bef9SDimitry Andric }
84e8d8bef9SDimitry Andric 
85e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
86e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
87e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
88e8d8bef9SDimitry Andric static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
89e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
90e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
91e8d8bef9SDimitry Andric   Value *Vec = II.getOperand(2);
92e8d8bef9SDimitry Andric 
93e8d8bef9SDimitry Andric   // Zero Mask - this masked store instruction does nothing.
94e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask)) {
95e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
96e8d8bef9SDimitry Andric     return true;
97e8d8bef9SDimitry Andric   }
98e8d8bef9SDimitry Andric 
99e8d8bef9SDimitry Andric   // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
100e8d8bef9SDimitry Andric   // anything else at this level.
101e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
102e8d8bef9SDimitry Andric     return false;
103e8d8bef9SDimitry Andric 
104e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
105e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
106*0fca6ea1SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
107e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
108e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
109e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
110e8d8bef9SDimitry Andric 
111e8d8bef9SDimitry Andric     IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
112e8d8bef9SDimitry Andric 
113e8d8bef9SDimitry Andric     // 'Replace uses' doesn't work for stores. Erase the original masked store.
114e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
115e8d8bef9SDimitry Andric     return true;
116e8d8bef9SDimitry Andric   }
117e8d8bef9SDimitry Andric 
118e8d8bef9SDimitry Andric   return false;
119e8d8bef9SDimitry Andric }
120e8d8bef9SDimitry Andric 
121e8d8bef9SDimitry Andric static Value *simplifyX86immShift(const IntrinsicInst &II,
122e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
123e8d8bef9SDimitry Andric   bool LogicalShift = false;
124e8d8bef9SDimitry Andric   bool ShiftLeft = false;
125e8d8bef9SDimitry Andric   bool IsImm = false;
126e8d8bef9SDimitry Andric 
127e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
128e8d8bef9SDimitry Andric   default:
129e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
130e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
131e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
132e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
133e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
134e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
135e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
136e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
137e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
138e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
139e8d8bef9SDimitry Andric     IsImm = true;
140bdd1243dSDimitry Andric     [[fallthrough]];
141e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
142e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
143e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
144e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
145e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
146e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
147e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
148e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
149e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
150e8d8bef9SDimitry Andric     LogicalShift = false;
151e8d8bef9SDimitry Andric     ShiftLeft = false;
152e8d8bef9SDimitry Andric     break;
153e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
154e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
155e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
156e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
157e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
158e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
159e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
160e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
161e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
162e8d8bef9SDimitry Andric     IsImm = true;
163bdd1243dSDimitry Andric     [[fallthrough]];
164e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
165e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
166e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
167e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
168e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
169e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
170e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
171e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
172e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
173e8d8bef9SDimitry Andric     LogicalShift = true;
174e8d8bef9SDimitry Andric     ShiftLeft = false;
175e8d8bef9SDimitry Andric     break;
176e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
177e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
178e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
179e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
180e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
181e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
182e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
183e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
184e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
185e8d8bef9SDimitry Andric     IsImm = true;
186bdd1243dSDimitry Andric     [[fallthrough]];
187e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
188e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
189e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
190e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
191e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
192e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
193e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
194e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
195e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512:
196e8d8bef9SDimitry Andric     LogicalShift = true;
197e8d8bef9SDimitry Andric     ShiftLeft = true;
198e8d8bef9SDimitry Andric     break;
199e8d8bef9SDimitry Andric   }
200e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
201e8d8bef9SDimitry Andric 
202fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
203fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
204fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(Vec->getType());
205fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
206fe6060f1SDimitry Andric   Type *AmtVT = Amt->getType();
207e8d8bef9SDimitry Andric   unsigned VWidth = VT->getNumElements();
208e8d8bef9SDimitry Andric   unsigned BitWidth = SVT->getPrimitiveSizeInBits();
209e8d8bef9SDimitry Andric 
210e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
211e8d8bef9SDimitry Andric   // generic shift. If its guaranteed to be out of range, logical shifts combine
212e8d8bef9SDimitry Andric   // to zero and arithmetic shifts are clamped to (BitWidth - 1).
213e8d8bef9SDimitry Andric   if (IsImm) {
214e8d8bef9SDimitry Andric     assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
215e8d8bef9SDimitry Andric     KnownBits KnownAmtBits =
216*0fca6ea1SDimitry Andric         llvm::computeKnownBits(Amt, II.getDataLayout());
217e8d8bef9SDimitry Andric     if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
218e8d8bef9SDimitry Andric       Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
219e8d8bef9SDimitry Andric       Amt = Builder.CreateVectorSplat(VWidth, Amt);
220e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
221e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
222e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
223e8d8bef9SDimitry Andric     }
224e8d8bef9SDimitry Andric     if (KnownAmtBits.getMinValue().uge(BitWidth)) {
225e8d8bef9SDimitry Andric       if (LogicalShift)
226e8d8bef9SDimitry Andric         return ConstantAggregateZero::get(VT);
227e8d8bef9SDimitry Andric       Amt = ConstantInt::get(SVT, BitWidth - 1);
228e8d8bef9SDimitry Andric       return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229e8d8bef9SDimitry Andric     }
230e8d8bef9SDimitry Andric   } else {
231e8d8bef9SDimitry Andric     // Ensure the first element has an in-range value and the rest of the
232e8d8bef9SDimitry Andric     // elements in the bottom 64 bits are zero.
233e8d8bef9SDimitry Andric     assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
234e8d8bef9SDimitry Andric            cast<VectorType>(AmtVT)->getElementType() == SVT &&
235e8d8bef9SDimitry Andric            "Unexpected shift-by-scalar type");
236e8d8bef9SDimitry Andric     unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
237e8d8bef9SDimitry Andric     APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
238e8d8bef9SDimitry Andric     APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
239e8d8bef9SDimitry Andric     KnownBits KnownLowerBits = llvm::computeKnownBits(
240*0fca6ea1SDimitry Andric         Amt, DemandedLower, II.getDataLayout());
241e8d8bef9SDimitry Andric     KnownBits KnownUpperBits = llvm::computeKnownBits(
242*0fca6ea1SDimitry Andric         Amt, DemandedUpper, II.getDataLayout());
243e8d8bef9SDimitry Andric     if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
244349cc55cSDimitry Andric         (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
245e8d8bef9SDimitry Andric       SmallVector<int, 16> ZeroSplat(VWidth, 0);
246e8d8bef9SDimitry Andric       Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
247e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
248e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
249e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
250e8d8bef9SDimitry Andric     }
251e8d8bef9SDimitry Andric   }
252e8d8bef9SDimitry Andric 
253e8d8bef9SDimitry Andric   // Simplify if count is constant vector.
254fe6060f1SDimitry Andric   auto *CDV = dyn_cast<ConstantDataVector>(Amt);
255e8d8bef9SDimitry Andric   if (!CDV)
256e8d8bef9SDimitry Andric     return nullptr;
257e8d8bef9SDimitry Andric 
258e8d8bef9SDimitry Andric   // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
259e8d8bef9SDimitry Andric   // operand to compute the shift amount.
260e8d8bef9SDimitry Andric   assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
261e8d8bef9SDimitry Andric          cast<VectorType>(AmtVT)->getElementType() == SVT &&
262e8d8bef9SDimitry Andric          "Unexpected shift-by-scalar type");
263e8d8bef9SDimitry Andric 
264e8d8bef9SDimitry Andric   // Concatenate the sub-elements to create the 64-bit value.
265e8d8bef9SDimitry Andric   APInt Count(64, 0);
266e8d8bef9SDimitry Andric   for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
267e8d8bef9SDimitry Andric     unsigned SubEltIdx = (NumSubElts - 1) - i;
268fe6060f1SDimitry Andric     auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
269e8d8bef9SDimitry Andric     Count <<= BitWidth;
270e8d8bef9SDimitry Andric     Count |= SubElt->getValue().zextOrTrunc(64);
271e8d8bef9SDimitry Andric   }
272e8d8bef9SDimitry Andric 
273e8d8bef9SDimitry Andric   // If shift-by-zero then just return the original value.
274349cc55cSDimitry Andric   if (Count.isZero())
275e8d8bef9SDimitry Andric     return Vec;
276e8d8bef9SDimitry Andric 
277e8d8bef9SDimitry Andric   // Handle cases when Shift >= BitWidth.
278e8d8bef9SDimitry Andric   if (Count.uge(BitWidth)) {
279e8d8bef9SDimitry Andric     // If LogicalShift - just return zero.
280e8d8bef9SDimitry Andric     if (LogicalShift)
281e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(VT);
282e8d8bef9SDimitry Andric 
283e8d8bef9SDimitry Andric     // If ArithmeticShift - clamp Shift to (BitWidth - 1).
284e8d8bef9SDimitry Andric     Count = APInt(64, BitWidth - 1);
285e8d8bef9SDimitry Andric   }
286e8d8bef9SDimitry Andric 
287e8d8bef9SDimitry Andric   // Get a constant vector of the same type as the first operand.
288e8d8bef9SDimitry Andric   auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
289e8d8bef9SDimitry Andric   auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
290e8d8bef9SDimitry Andric 
291e8d8bef9SDimitry Andric   if (ShiftLeft)
292e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
293e8d8bef9SDimitry Andric 
294e8d8bef9SDimitry Andric   if (LogicalShift)
295e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
296e8d8bef9SDimitry Andric 
297e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
298e8d8bef9SDimitry Andric }
299e8d8bef9SDimitry Andric 
300e8d8bef9SDimitry Andric // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
301e8d8bef9SDimitry Andric // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
302e8d8bef9SDimitry Andric // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
303e8d8bef9SDimitry Andric static Value *simplifyX86varShift(const IntrinsicInst &II,
304e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
305e8d8bef9SDimitry Andric   bool LogicalShift = false;
306e8d8bef9SDimitry Andric   bool ShiftLeft = false;
307e8d8bef9SDimitry Andric 
308e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
309e8d8bef9SDimitry Andric   default:
310e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
311e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
312e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
313e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
314e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
315e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
316e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
317e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
318e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
319e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
320e8d8bef9SDimitry Andric     LogicalShift = false;
321e8d8bef9SDimitry Andric     ShiftLeft = false;
322e8d8bef9SDimitry Andric     break;
323e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
324e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
325e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
326e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
327e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
328e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
329e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
330e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
331e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
332e8d8bef9SDimitry Andric     LogicalShift = true;
333e8d8bef9SDimitry Andric     ShiftLeft = false;
334e8d8bef9SDimitry Andric     break;
335e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
336e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
337e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
338e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
339e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
340e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
341e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
342e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
343e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
344e8d8bef9SDimitry Andric     LogicalShift = true;
345e8d8bef9SDimitry Andric     ShiftLeft = true;
346e8d8bef9SDimitry Andric     break;
347e8d8bef9SDimitry Andric   }
348e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
349e8d8bef9SDimitry Andric 
350fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
351fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
352fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(II.getType());
353fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
354e8d8bef9SDimitry Andric   int NumElts = VT->getNumElements();
355e8d8bef9SDimitry Andric   int BitWidth = SVT->getIntegerBitWidth();
356e8d8bef9SDimitry Andric 
357e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
358e8d8bef9SDimitry Andric   // generic shift.
35981ad6265SDimitry Andric   KnownBits KnownAmt =
360*0fca6ea1SDimitry Andric       llvm::computeKnownBits(Amt, II.getDataLayout());
36181ad6265SDimitry Andric   if (KnownAmt.getMaxValue().ult(BitWidth)) {
362e8d8bef9SDimitry Andric     return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
363e8d8bef9SDimitry Andric                                       : Builder.CreateLShr(Vec, Amt))
364e8d8bef9SDimitry Andric                          : Builder.CreateAShr(Vec, Amt));
365e8d8bef9SDimitry Andric   }
366e8d8bef9SDimitry Andric 
367e8d8bef9SDimitry Andric   // Simplify if all shift amounts are constant/undef.
368e8d8bef9SDimitry Andric   auto *CShift = dyn_cast<Constant>(Amt);
369e8d8bef9SDimitry Andric   if (!CShift)
370e8d8bef9SDimitry Andric     return nullptr;
371e8d8bef9SDimitry Andric 
372e8d8bef9SDimitry Andric   // Collect each element's shift amount.
373e8d8bef9SDimitry Andric   // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
374e8d8bef9SDimitry Andric   bool AnyOutOfRange = false;
375e8d8bef9SDimitry Andric   SmallVector<int, 8> ShiftAmts;
376e8d8bef9SDimitry Andric   for (int I = 0; I < NumElts; ++I) {
377e8d8bef9SDimitry Andric     auto *CElt = CShift->getAggregateElement(I);
378e8d8bef9SDimitry Andric     if (isa_and_nonnull<UndefValue>(CElt)) {
379e8d8bef9SDimitry Andric       ShiftAmts.push_back(-1);
380e8d8bef9SDimitry Andric       continue;
381e8d8bef9SDimitry Andric     }
382e8d8bef9SDimitry Andric 
383e8d8bef9SDimitry Andric     auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
384e8d8bef9SDimitry Andric     if (!COp)
385e8d8bef9SDimitry Andric       return nullptr;
386e8d8bef9SDimitry Andric 
387e8d8bef9SDimitry Andric     // Handle out of range shifts.
388e8d8bef9SDimitry Andric     // If LogicalShift - set to BitWidth (special case).
389e8d8bef9SDimitry Andric     // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
390e8d8bef9SDimitry Andric     APInt ShiftVal = COp->getValue();
391e8d8bef9SDimitry Andric     if (ShiftVal.uge(BitWidth)) {
392e8d8bef9SDimitry Andric       AnyOutOfRange = LogicalShift;
393e8d8bef9SDimitry Andric       ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
394e8d8bef9SDimitry Andric       continue;
395e8d8bef9SDimitry Andric     }
396e8d8bef9SDimitry Andric 
397e8d8bef9SDimitry Andric     ShiftAmts.push_back((int)ShiftVal.getZExtValue());
398e8d8bef9SDimitry Andric   }
399e8d8bef9SDimitry Andric 
400e8d8bef9SDimitry Andric   // If all elements out of range or UNDEF, return vector of zeros/undefs.
401e8d8bef9SDimitry Andric   // ArithmeticShift should only hit this if they are all UNDEF.
402e8d8bef9SDimitry Andric   auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
403e8d8bef9SDimitry Andric   if (llvm::all_of(ShiftAmts, OutOfRange)) {
404e8d8bef9SDimitry Andric     SmallVector<Constant *, 8> ConstantVec;
405e8d8bef9SDimitry Andric     for (int Idx : ShiftAmts) {
406e8d8bef9SDimitry Andric       if (Idx < 0) {
407e8d8bef9SDimitry Andric         ConstantVec.push_back(UndefValue::get(SVT));
408e8d8bef9SDimitry Andric       } else {
409e8d8bef9SDimitry Andric         assert(LogicalShift && "Logical shift expected");
410e8d8bef9SDimitry Andric         ConstantVec.push_back(ConstantInt::getNullValue(SVT));
411e8d8bef9SDimitry Andric       }
412e8d8bef9SDimitry Andric     }
413e8d8bef9SDimitry Andric     return ConstantVector::get(ConstantVec);
414e8d8bef9SDimitry Andric   }
415e8d8bef9SDimitry Andric 
416e8d8bef9SDimitry Andric   // We can't handle only some out of range values with generic logical shifts.
417e8d8bef9SDimitry Andric   if (AnyOutOfRange)
418e8d8bef9SDimitry Andric     return nullptr;
419e8d8bef9SDimitry Andric 
420e8d8bef9SDimitry Andric   // Build the shift amount constant vector.
421e8d8bef9SDimitry Andric   SmallVector<Constant *, 8> ShiftVecAmts;
422e8d8bef9SDimitry Andric   for (int Idx : ShiftAmts) {
423e8d8bef9SDimitry Andric     if (Idx < 0)
424e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(UndefValue::get(SVT));
425e8d8bef9SDimitry Andric     else
426e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
427e8d8bef9SDimitry Andric   }
428e8d8bef9SDimitry Andric   auto ShiftVec = ConstantVector::get(ShiftVecAmts);
429e8d8bef9SDimitry Andric 
430e8d8bef9SDimitry Andric   if (ShiftLeft)
431e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
432e8d8bef9SDimitry Andric 
433e8d8bef9SDimitry Andric   if (LogicalShift)
434e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
435e8d8bef9SDimitry Andric 
436e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
437e8d8bef9SDimitry Andric }
438e8d8bef9SDimitry Andric 
439e8d8bef9SDimitry Andric static Value *simplifyX86pack(IntrinsicInst &II,
440e8d8bef9SDimitry Andric                               InstCombiner::BuilderTy &Builder, bool IsSigned) {
441e8d8bef9SDimitry Andric   Value *Arg0 = II.getArgOperand(0);
442e8d8bef9SDimitry Andric   Value *Arg1 = II.getArgOperand(1);
443e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
444e8d8bef9SDimitry Andric 
445e8d8bef9SDimitry Andric   // Fast all undef handling.
446e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
447e8d8bef9SDimitry Andric     return UndefValue::get(ResTy);
448e8d8bef9SDimitry Andric 
449e8d8bef9SDimitry Andric   auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
450e8d8bef9SDimitry Andric   unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
451e8d8bef9SDimitry Andric   unsigned NumSrcElts = ArgTy->getNumElements();
452e8d8bef9SDimitry Andric   assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
453e8d8bef9SDimitry Andric          "Unexpected packing types");
454e8d8bef9SDimitry Andric 
455e8d8bef9SDimitry Andric   unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
456e8d8bef9SDimitry Andric   unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
457e8d8bef9SDimitry Andric   unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458e8d8bef9SDimitry Andric   assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459e8d8bef9SDimitry Andric          "Unexpected packing types");
460e8d8bef9SDimitry Andric 
461e8d8bef9SDimitry Andric   // Constant folding.
462e8d8bef9SDimitry Andric   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
463e8d8bef9SDimitry Andric     return nullptr;
464e8d8bef9SDimitry Andric 
465e8d8bef9SDimitry Andric   // Clamp Values - signed/unsigned both use signed clamp values, but they
466e8d8bef9SDimitry Andric   // differ on the min/max values.
467e8d8bef9SDimitry Andric   APInt MinValue, MaxValue;
468e8d8bef9SDimitry Andric   if (IsSigned) {
469e8d8bef9SDimitry Andric     // PACKSS: Truncate signed value with signed saturation.
470e8d8bef9SDimitry Andric     // Source values less than dst minint are saturated to minint.
471e8d8bef9SDimitry Andric     // Source values greater than dst maxint are saturated to maxint.
472e8d8bef9SDimitry Andric     MinValue =
473e8d8bef9SDimitry Andric         APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
474e8d8bef9SDimitry Andric     MaxValue =
475e8d8bef9SDimitry Andric         APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
476e8d8bef9SDimitry Andric   } else {
477e8d8bef9SDimitry Andric     // PACKUS: Truncate signed value with unsigned saturation.
478e8d8bef9SDimitry Andric     // Source values less than zero are saturated to zero.
479e8d8bef9SDimitry Andric     // Source values greater than dst maxuint are saturated to maxuint.
480349cc55cSDimitry Andric     MinValue = APInt::getZero(SrcScalarSizeInBits);
481e8d8bef9SDimitry Andric     MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
482e8d8bef9SDimitry Andric   }
483e8d8bef9SDimitry Andric 
484e8d8bef9SDimitry Andric   auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
485e8d8bef9SDimitry Andric   auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
486e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
487e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
488e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
489e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
490e8d8bef9SDimitry Andric 
491e8d8bef9SDimitry Andric   // Shuffle clamped args together at the lane level.
492e8d8bef9SDimitry Andric   SmallVector<int, 32> PackMask;
493e8d8bef9SDimitry Andric   for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
496e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
498e8d8bef9SDimitry Andric   }
499e8d8bef9SDimitry Andric   auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
500e8d8bef9SDimitry Andric 
501e8d8bef9SDimitry Andric   // Truncate to dst size.
502e8d8bef9SDimitry Andric   return Builder.CreateTrunc(Shuffle, ResTy);
503e8d8bef9SDimitry Andric }
504e8d8bef9SDimitry Andric 
505*0fca6ea1SDimitry Andric static Value *simplifyX86pmulh(IntrinsicInst &II,
506*0fca6ea1SDimitry Andric                                InstCombiner::BuilderTy &Builder, bool IsSigned,
507*0fca6ea1SDimitry Andric                                bool IsRounding) {
508*0fca6ea1SDimitry Andric   Value *Arg0 = II.getArgOperand(0);
509*0fca6ea1SDimitry Andric   Value *Arg1 = II.getArgOperand(1);
510*0fca6ea1SDimitry Andric   auto *ResTy = cast<FixedVectorType>(II.getType());
511*0fca6ea1SDimitry Andric   auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
512*0fca6ea1SDimitry Andric   assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
513*0fca6ea1SDimitry Andric          "Unexpected PMULH types");
514*0fca6ea1SDimitry Andric   assert((!IsRounding || IsSigned) && "PMULHRS instruction must be signed");
515*0fca6ea1SDimitry Andric 
516*0fca6ea1SDimitry Andric   // Multiply by undef -> zero (NOT undef!) as other arg could still be zero.
517*0fca6ea1SDimitry Andric   if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
518*0fca6ea1SDimitry Andric     return ConstantAggregateZero::get(ResTy);
519*0fca6ea1SDimitry Andric 
520*0fca6ea1SDimitry Andric   // Multiply by zero.
521*0fca6ea1SDimitry Andric   if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
522*0fca6ea1SDimitry Andric     return ConstantAggregateZero::get(ResTy);
523*0fca6ea1SDimitry Andric 
524*0fca6ea1SDimitry Andric   // Multiply by one.
525*0fca6ea1SDimitry Andric   if (!IsRounding) {
526*0fca6ea1SDimitry Andric     if (match(Arg0, m_One()))
527*0fca6ea1SDimitry Andric       return IsSigned ? Builder.CreateAShr(Arg1, 15)
528*0fca6ea1SDimitry Andric                       : ConstantAggregateZero::get(ResTy);
529*0fca6ea1SDimitry Andric     if (match(Arg1, m_One()))
530*0fca6ea1SDimitry Andric       return IsSigned ? Builder.CreateAShr(Arg0, 15)
531*0fca6ea1SDimitry Andric                       : ConstantAggregateZero::get(ResTy);
532*0fca6ea1SDimitry Andric   }
533*0fca6ea1SDimitry Andric 
534*0fca6ea1SDimitry Andric   // Constant folding.
535*0fca6ea1SDimitry Andric   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
536*0fca6ea1SDimitry Andric     return nullptr;
537*0fca6ea1SDimitry Andric 
538*0fca6ea1SDimitry Andric   // Extend to twice the width and multiply.
539*0fca6ea1SDimitry Andric   auto Cast =
540*0fca6ea1SDimitry Andric       IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
541*0fca6ea1SDimitry Andric   auto *ExtTy = FixedVectorType::getExtendedElementVectorType(ArgTy);
542*0fca6ea1SDimitry Andric   Value *LHS = Builder.CreateCast(Cast, Arg0, ExtTy);
543*0fca6ea1SDimitry Andric   Value *RHS = Builder.CreateCast(Cast, Arg1, ExtTy);
544*0fca6ea1SDimitry Andric   Value *Mul = Builder.CreateMul(LHS, RHS);
545*0fca6ea1SDimitry Andric 
546*0fca6ea1SDimitry Andric   if (IsRounding) {
547*0fca6ea1SDimitry Andric     // PMULHRSW: truncate to vXi18 of the most significant bits, add one and
548*0fca6ea1SDimitry Andric     // extract bits[16:1].
549*0fca6ea1SDimitry Andric     auto *RndEltTy = IntegerType::get(ExtTy->getContext(), 18);
550*0fca6ea1SDimitry Andric     auto *RndTy = FixedVectorType::get(RndEltTy, ExtTy);
551*0fca6ea1SDimitry Andric     Mul = Builder.CreateLShr(Mul, 14);
552*0fca6ea1SDimitry Andric     Mul = Builder.CreateTrunc(Mul, RndTy);
553*0fca6ea1SDimitry Andric     Mul = Builder.CreateAdd(Mul, ConstantInt::get(RndTy, 1));
554*0fca6ea1SDimitry Andric     Mul = Builder.CreateLShr(Mul, 1);
555*0fca6ea1SDimitry Andric   } else {
556*0fca6ea1SDimitry Andric     // PMULH/PMULHU: extract the vXi16 most significant bits.
557*0fca6ea1SDimitry Andric     Mul = Builder.CreateLShr(Mul, 16);
558*0fca6ea1SDimitry Andric   }
559*0fca6ea1SDimitry Andric 
560*0fca6ea1SDimitry Andric   return Builder.CreateTrunc(Mul, ResTy);
561*0fca6ea1SDimitry Andric }
562*0fca6ea1SDimitry Andric 
563*0fca6ea1SDimitry Andric static Value *simplifyX86pmadd(IntrinsicInst &II,
564*0fca6ea1SDimitry Andric                                InstCombiner::BuilderTy &Builder,
565*0fca6ea1SDimitry Andric                                bool IsPMADDWD) {
566*0fca6ea1SDimitry Andric   Value *Arg0 = II.getArgOperand(0);
567*0fca6ea1SDimitry Andric   Value *Arg1 = II.getArgOperand(1);
568*0fca6ea1SDimitry Andric   auto *ResTy = cast<FixedVectorType>(II.getType());
569*0fca6ea1SDimitry Andric   [[maybe_unused]] auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
570*0fca6ea1SDimitry Andric 
571*0fca6ea1SDimitry Andric   unsigned NumDstElts = ResTy->getNumElements();
572*0fca6ea1SDimitry Andric   assert(ArgTy->getNumElements() == (2 * NumDstElts) &&
573*0fca6ea1SDimitry Andric          ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) &&
574*0fca6ea1SDimitry Andric          "Unexpected PMADD types");
575*0fca6ea1SDimitry Andric 
576*0fca6ea1SDimitry Andric   // Multiply by undef -> zero (NOT undef!) as other arg could still be zero.
577*0fca6ea1SDimitry Andric   if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
578*0fca6ea1SDimitry Andric     return ConstantAggregateZero::get(ResTy);
579*0fca6ea1SDimitry Andric 
580*0fca6ea1SDimitry Andric   // Multiply by zero.
581*0fca6ea1SDimitry Andric   if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
582*0fca6ea1SDimitry Andric     return ConstantAggregateZero::get(ResTy);
583*0fca6ea1SDimitry Andric 
584*0fca6ea1SDimitry Andric   // Constant folding.
585*0fca6ea1SDimitry Andric   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
586*0fca6ea1SDimitry Andric     return nullptr;
587*0fca6ea1SDimitry Andric 
588*0fca6ea1SDimitry Andric   // Split Lo/Hi elements pairs, extend and add together.
589*0fca6ea1SDimitry Andric   // PMADDWD(X,Y) =
590*0fca6ea1SDimitry Andric   // add(mul(sext(lhs[0]),sext(rhs[0])),mul(sext(lhs[1]),sext(rhs[1])))
591*0fca6ea1SDimitry Andric   // PMADDUBSW(X,Y) =
592*0fca6ea1SDimitry Andric   // sadd_sat(mul(zext(lhs[0]),sext(rhs[0])),mul(zext(lhs[1]),sext(rhs[1])))
593*0fca6ea1SDimitry Andric   SmallVector<int> LoMask, HiMask;
594*0fca6ea1SDimitry Andric   for (unsigned I = 0; I != NumDstElts; ++I) {
595*0fca6ea1SDimitry Andric     LoMask.push_back(2 * I + 0);
596*0fca6ea1SDimitry Andric     HiMask.push_back(2 * I + 1);
597*0fca6ea1SDimitry Andric   }
598*0fca6ea1SDimitry Andric 
599*0fca6ea1SDimitry Andric   auto *LHSLo = Builder.CreateShuffleVector(Arg0, LoMask);
600*0fca6ea1SDimitry Andric   auto *LHSHi = Builder.CreateShuffleVector(Arg0, HiMask);
601*0fca6ea1SDimitry Andric   auto *RHSLo = Builder.CreateShuffleVector(Arg1, LoMask);
602*0fca6ea1SDimitry Andric   auto *RHSHi = Builder.CreateShuffleVector(Arg1, HiMask);
603*0fca6ea1SDimitry Andric 
604*0fca6ea1SDimitry Andric   auto LHSCast =
605*0fca6ea1SDimitry Andric       IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt;
606*0fca6ea1SDimitry Andric   LHSLo = Builder.CreateCast(LHSCast, LHSLo, ResTy);
607*0fca6ea1SDimitry Andric   LHSHi = Builder.CreateCast(LHSCast, LHSHi, ResTy);
608*0fca6ea1SDimitry Andric   RHSLo = Builder.CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy);
609*0fca6ea1SDimitry Andric   RHSHi = Builder.CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy);
610*0fca6ea1SDimitry Andric   Value *Lo = Builder.CreateMul(LHSLo, RHSLo);
611*0fca6ea1SDimitry Andric   Value *Hi = Builder.CreateMul(LHSHi, RHSHi);
612*0fca6ea1SDimitry Andric   return IsPMADDWD
613*0fca6ea1SDimitry Andric              ? Builder.CreateAdd(Lo, Hi)
614*0fca6ea1SDimitry Andric              : Builder.CreateIntrinsic(ResTy, Intrinsic::sadd_sat, {Lo, Hi});
615*0fca6ea1SDimitry Andric }
616*0fca6ea1SDimitry Andric 
617e8d8bef9SDimitry Andric static Value *simplifyX86movmsk(const IntrinsicInst &II,
618e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
619e8d8bef9SDimitry Andric   Value *Arg = II.getArgOperand(0);
620e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
621e8d8bef9SDimitry Andric 
622e8d8bef9SDimitry Andric   // movmsk(undef) -> zero as we must ensure the upper bits are zero.
623e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg))
624e8d8bef9SDimitry Andric     return Constant::getNullValue(ResTy);
625e8d8bef9SDimitry Andric 
626e8d8bef9SDimitry Andric   auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
627e8d8bef9SDimitry Andric   // We can't easily peek through x86_mmx types.
628e8d8bef9SDimitry Andric   if (!ArgTy)
629e8d8bef9SDimitry Andric     return nullptr;
630e8d8bef9SDimitry Andric 
631e8d8bef9SDimitry Andric   // Expand MOVMSK to compare/bitcast/zext:
632e8d8bef9SDimitry Andric   // e.g. PMOVMSKB(v16i8 x):
633e8d8bef9SDimitry Andric   // %cmp = icmp slt <16 x i8> %x, zeroinitializer
634e8d8bef9SDimitry Andric   // %int = bitcast <16 x i1> %cmp to i16
635e8d8bef9SDimitry Andric   // %res = zext i16 %int to i32
636e8d8bef9SDimitry Andric   unsigned NumElts = ArgTy->getNumElements();
637e8d8bef9SDimitry Andric   Type *IntegerTy = Builder.getIntNTy(NumElts);
638e8d8bef9SDimitry Andric 
63981ad6265SDimitry Andric   Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy));
64081ad6265SDimitry Andric   Res = Builder.CreateIsNeg(Res);
641e8d8bef9SDimitry Andric   Res = Builder.CreateBitCast(Res, IntegerTy);
642e8d8bef9SDimitry Andric   Res = Builder.CreateZExtOrTrunc(Res, ResTy);
643e8d8bef9SDimitry Andric   return Res;
644e8d8bef9SDimitry Andric }
645e8d8bef9SDimitry Andric 
646e8d8bef9SDimitry Andric static Value *simplifyX86addcarry(const IntrinsicInst &II,
647e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
648e8d8bef9SDimitry Andric   Value *CarryIn = II.getArgOperand(0);
649e8d8bef9SDimitry Andric   Value *Op1 = II.getArgOperand(1);
650e8d8bef9SDimitry Andric   Value *Op2 = II.getArgOperand(2);
651e8d8bef9SDimitry Andric   Type *RetTy = II.getType();
652e8d8bef9SDimitry Andric   Type *OpTy = Op1->getType();
653e8d8bef9SDimitry Andric   assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
654e8d8bef9SDimitry Andric          RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
655e8d8bef9SDimitry Andric          "Unexpected types for x86 addcarry");
656e8d8bef9SDimitry Andric 
657e8d8bef9SDimitry Andric   // If carry-in is zero, this is just an unsigned add with overflow.
658*0fca6ea1SDimitry Andric   if (match(CarryIn, m_ZeroInt())) {
659e8d8bef9SDimitry Andric     Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
660e8d8bef9SDimitry Andric                                           {Op1, Op2});
661e8d8bef9SDimitry Andric     // The types have to be adjusted to match the x86 call types.
662e8d8bef9SDimitry Andric     Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
663e8d8bef9SDimitry Andric     Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
664e8d8bef9SDimitry Andric                                        Builder.getInt8Ty());
665bdd1243dSDimitry Andric     Value *Res = PoisonValue::get(RetTy);
666e8d8bef9SDimitry Andric     Res = Builder.CreateInsertValue(Res, UAddOV, 0);
667e8d8bef9SDimitry Andric     return Builder.CreateInsertValue(Res, UAddResult, 1);
668e8d8bef9SDimitry Andric   }
669e8d8bef9SDimitry Andric 
670e8d8bef9SDimitry Andric   return nullptr;
671e8d8bef9SDimitry Andric }
672e8d8bef9SDimitry Andric 
67306c3fb27SDimitry Andric static Value *simplifyTernarylogic(const IntrinsicInst &II,
67406c3fb27SDimitry Andric                                    InstCombiner::BuilderTy &Builder) {
67506c3fb27SDimitry Andric 
67606c3fb27SDimitry Andric   auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3));
67706c3fb27SDimitry Andric   if (!ArgImm || ArgImm->getValue().uge(256))
67806c3fb27SDimitry Andric     return nullptr;
67906c3fb27SDimitry Andric 
68006c3fb27SDimitry Andric   Value *ArgA = II.getArgOperand(0);
68106c3fb27SDimitry Andric   Value *ArgB = II.getArgOperand(1);
68206c3fb27SDimitry Andric   Value *ArgC = II.getArgOperand(2);
68306c3fb27SDimitry Andric 
68406c3fb27SDimitry Andric   Type *Ty = II.getType();
68506c3fb27SDimitry Andric 
68606c3fb27SDimitry Andric   auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
68706c3fb27SDimitry Andric     return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
68806c3fb27SDimitry Andric   };
68906c3fb27SDimitry Andric   auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
69006c3fb27SDimitry Andric     return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
69106c3fb27SDimitry Andric   };
69206c3fb27SDimitry Andric   auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
69306c3fb27SDimitry Andric     return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
69406c3fb27SDimitry Andric   };
69506c3fb27SDimitry Andric   auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {
69606c3fb27SDimitry Andric     return {Builder.CreateNot(V.first), ~V.second};
69706c3fb27SDimitry Andric   };
69806c3fb27SDimitry Andric   auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };
69906c3fb27SDimitry Andric   auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };
70006c3fb27SDimitry Andric   auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };
70106c3fb27SDimitry Andric 
702*0fca6ea1SDimitry Andric   bool AIsConst = match(ArgA, m_ImmConstant());
703*0fca6ea1SDimitry Andric   bool BIsConst = match(ArgB, m_ImmConstant());
704*0fca6ea1SDimitry Andric   bool CIsConst = match(ArgC, m_ImmConstant());
70506c3fb27SDimitry Andric 
70606c3fb27SDimitry Andric   bool ABIsConst = AIsConst && BIsConst;
70706c3fb27SDimitry Andric   bool ACIsConst = AIsConst && CIsConst;
70806c3fb27SDimitry Andric   bool BCIsConst = BIsConst && CIsConst;
70906c3fb27SDimitry Andric   bool ABCIsConst = AIsConst && BIsConst && CIsConst;
71006c3fb27SDimitry Andric 
71106c3fb27SDimitry Andric   // Use for verification. Its a big table. Its difficult to go from Imm ->
71206c3fb27SDimitry Andric   // logic ops, but easy to verify that a set of logic ops is correct. We track
71306c3fb27SDimitry Andric   // the logic ops through the second value in the pair. At the end it should
71406c3fb27SDimitry Andric   // equal Imm.
71506c3fb27SDimitry Andric   std::pair<Value *, uint8_t> A = {ArgA, 0xf0};
71606c3fb27SDimitry Andric   std::pair<Value *, uint8_t> B = {ArgB, 0xcc};
71706c3fb27SDimitry Andric   std::pair<Value *, uint8_t> C = {ArgC, 0xaa};
71806c3fb27SDimitry Andric   std::pair<Value *, uint8_t> Res = {nullptr, 0};
71906c3fb27SDimitry Andric 
72006c3fb27SDimitry Andric   // Currently we only handle cases that convert directly to another instruction
72106c3fb27SDimitry Andric   // or cases where all the ops are constant.  This is because we don't properly
72206c3fb27SDimitry Andric   // handle creating ternary ops in the backend, so splitting them here may
72306c3fb27SDimitry Andric   // cause regressions. As the backend improves, uncomment more cases.
72406c3fb27SDimitry Andric 
72506c3fb27SDimitry Andric   uint8_t Imm = ArgImm->getValue().getZExtValue();
72606c3fb27SDimitry Andric   switch (Imm) {
72706c3fb27SDimitry Andric   case 0x0:
72806c3fb27SDimitry Andric     Res = {Constant::getNullValue(Ty), 0};
72906c3fb27SDimitry Andric     break;
73006c3fb27SDimitry Andric   case 0x1:
73106c3fb27SDimitry Andric     if (ABCIsConst)
73206c3fb27SDimitry Andric       Res = Nor(Or(A, B), C);
73306c3fb27SDimitry Andric     break;
73406c3fb27SDimitry Andric   case 0x2:
73506c3fb27SDimitry Andric     if (ABCIsConst)
73606c3fb27SDimitry Andric       Res = And(Nor(A, B), C);
73706c3fb27SDimitry Andric     break;
73806c3fb27SDimitry Andric   case 0x3:
73906c3fb27SDimitry Andric     if (ABIsConst)
74006c3fb27SDimitry Andric       Res = Nor(A, B);
74106c3fb27SDimitry Andric     break;
74206c3fb27SDimitry Andric   case 0x4:
74306c3fb27SDimitry Andric     if (ABCIsConst)
74406c3fb27SDimitry Andric       Res = And(Nor(A, C), B);
74506c3fb27SDimitry Andric     break;
74606c3fb27SDimitry Andric   case 0x5:
74706c3fb27SDimitry Andric     if (ACIsConst)
74806c3fb27SDimitry Andric       Res = Nor(A, C);
74906c3fb27SDimitry Andric     break;
75006c3fb27SDimitry Andric   case 0x6:
75106c3fb27SDimitry Andric     if (ABCIsConst)
75206c3fb27SDimitry Andric       Res = Nor(A, Xnor(B, C));
75306c3fb27SDimitry Andric     break;
75406c3fb27SDimitry Andric   case 0x7:
75506c3fb27SDimitry Andric     if (ABCIsConst)
75606c3fb27SDimitry Andric       Res = Nor(A, And(B, C));
75706c3fb27SDimitry Andric     break;
75806c3fb27SDimitry Andric   case 0x8:
75906c3fb27SDimitry Andric     if (ABCIsConst)
76006c3fb27SDimitry Andric       Res = Nor(A, Nand(B, C));
76106c3fb27SDimitry Andric     break;
76206c3fb27SDimitry Andric   case 0x9:
76306c3fb27SDimitry Andric     if (ABCIsConst)
76406c3fb27SDimitry Andric       Res = Nor(A, Xor(B, C));
76506c3fb27SDimitry Andric     break;
76606c3fb27SDimitry Andric   case 0xa:
76706c3fb27SDimitry Andric     if (ACIsConst)
76806c3fb27SDimitry Andric       Res = Nor(A, Not(C));
76906c3fb27SDimitry Andric     break;
77006c3fb27SDimitry Andric   case 0xb:
77106c3fb27SDimitry Andric     if (ABCIsConst)
77206c3fb27SDimitry Andric       Res = Nor(A, Nor(C, Not(B)));
77306c3fb27SDimitry Andric     break;
77406c3fb27SDimitry Andric   case 0xc:
77506c3fb27SDimitry Andric     if (ABIsConst)
77606c3fb27SDimitry Andric       Res = Nor(A, Not(B));
77706c3fb27SDimitry Andric     break;
77806c3fb27SDimitry Andric   case 0xd:
77906c3fb27SDimitry Andric     if (ABCIsConst)
78006c3fb27SDimitry Andric       Res = Nor(A, Nor(B, Not(C)));
78106c3fb27SDimitry Andric     break;
78206c3fb27SDimitry Andric   case 0xe:
78306c3fb27SDimitry Andric     if (ABCIsConst)
78406c3fb27SDimitry Andric       Res = Nor(A, Nor(B, C));
78506c3fb27SDimitry Andric     break;
78606c3fb27SDimitry Andric   case 0xf:
78706c3fb27SDimitry Andric     Res = Not(A);
78806c3fb27SDimitry Andric     break;
78906c3fb27SDimitry Andric   case 0x10:
79006c3fb27SDimitry Andric     if (ABCIsConst)
79106c3fb27SDimitry Andric       Res = And(A, Nor(B, C));
79206c3fb27SDimitry Andric     break;
79306c3fb27SDimitry Andric   case 0x11:
79406c3fb27SDimitry Andric     if (BCIsConst)
79506c3fb27SDimitry Andric       Res = Nor(B, C);
79606c3fb27SDimitry Andric     break;
79706c3fb27SDimitry Andric   case 0x12:
79806c3fb27SDimitry Andric     if (ABCIsConst)
79906c3fb27SDimitry Andric       Res = Nor(Xnor(A, C), B);
80006c3fb27SDimitry Andric     break;
80106c3fb27SDimitry Andric   case 0x13:
80206c3fb27SDimitry Andric     if (ABCIsConst)
80306c3fb27SDimitry Andric       Res = Nor(And(A, C), B);
80406c3fb27SDimitry Andric     break;
80506c3fb27SDimitry Andric   case 0x14:
80606c3fb27SDimitry Andric     if (ABCIsConst)
80706c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), C);
80806c3fb27SDimitry Andric     break;
80906c3fb27SDimitry Andric   case 0x15:
81006c3fb27SDimitry Andric     if (ABCIsConst)
81106c3fb27SDimitry Andric       Res = Nor(And(A, B), C);
81206c3fb27SDimitry Andric     break;
81306c3fb27SDimitry Andric   case 0x16:
81406c3fb27SDimitry Andric     if (ABCIsConst)
81506c3fb27SDimitry Andric       Res = Xor(Xor(A, B), And(Nand(A, B), C));
81606c3fb27SDimitry Andric     break;
81706c3fb27SDimitry Andric   case 0x17:
81806c3fb27SDimitry Andric     if (ABCIsConst)
81906c3fb27SDimitry Andric       Res = Xor(Or(A, B), Or(Xnor(A, B), C));
82006c3fb27SDimitry Andric     break;
82106c3fb27SDimitry Andric   case 0x18:
82206c3fb27SDimitry Andric     if (ABCIsConst)
82306c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Xnor(A, C));
82406c3fb27SDimitry Andric     break;
82506c3fb27SDimitry Andric   case 0x19:
82606c3fb27SDimitry Andric     if (ABCIsConst)
82706c3fb27SDimitry Andric       Res = And(Nand(A, B), Xnor(B, C));
82806c3fb27SDimitry Andric     break;
82906c3fb27SDimitry Andric   case 0x1a:
83006c3fb27SDimitry Andric     if (ABCIsConst)
83106c3fb27SDimitry Andric       Res = Xor(A, Or(And(A, B), C));
83206c3fb27SDimitry Andric     break;
83306c3fb27SDimitry Andric   case 0x1b:
83406c3fb27SDimitry Andric     if (ABCIsConst)
83506c3fb27SDimitry Andric       Res = Xor(A, Or(Xnor(A, B), C));
83606c3fb27SDimitry Andric     break;
83706c3fb27SDimitry Andric   case 0x1c:
83806c3fb27SDimitry Andric     if (ABCIsConst)
83906c3fb27SDimitry Andric       Res = Xor(A, Or(And(A, C), B));
84006c3fb27SDimitry Andric     break;
84106c3fb27SDimitry Andric   case 0x1d:
84206c3fb27SDimitry Andric     if (ABCIsConst)
84306c3fb27SDimitry Andric       Res = Xor(A, Or(Xnor(A, C), B));
84406c3fb27SDimitry Andric     break;
84506c3fb27SDimitry Andric   case 0x1e:
84606c3fb27SDimitry Andric     if (ABCIsConst)
84706c3fb27SDimitry Andric       Res = Xor(A, Or(B, C));
84806c3fb27SDimitry Andric     break;
84906c3fb27SDimitry Andric   case 0x1f:
85006c3fb27SDimitry Andric     if (ABCIsConst)
85106c3fb27SDimitry Andric       Res = Nand(A, Or(B, C));
85206c3fb27SDimitry Andric     break;
85306c3fb27SDimitry Andric   case 0x20:
85406c3fb27SDimitry Andric     if (ABCIsConst)
85506c3fb27SDimitry Andric       Res = Nor(Nand(A, C), B);
85606c3fb27SDimitry Andric     break;
85706c3fb27SDimitry Andric   case 0x21:
85806c3fb27SDimitry Andric     if (ABCIsConst)
85906c3fb27SDimitry Andric       Res = Nor(Xor(A, C), B);
86006c3fb27SDimitry Andric     break;
86106c3fb27SDimitry Andric   case 0x22:
86206c3fb27SDimitry Andric     if (BCIsConst)
86306c3fb27SDimitry Andric       Res = Nor(B, Not(C));
86406c3fb27SDimitry Andric     break;
86506c3fb27SDimitry Andric   case 0x23:
86606c3fb27SDimitry Andric     if (ABCIsConst)
86706c3fb27SDimitry Andric       Res = Nor(B, Nor(C, Not(A)));
86806c3fb27SDimitry Andric     break;
86906c3fb27SDimitry Andric   case 0x24:
87006c3fb27SDimitry Andric     if (ABCIsConst)
87106c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Xor(A, C));
87206c3fb27SDimitry Andric     break;
87306c3fb27SDimitry Andric   case 0x25:
87406c3fb27SDimitry Andric     if (ABCIsConst)
87506c3fb27SDimitry Andric       Res = Xor(A, Nand(Nand(A, B), C));
87606c3fb27SDimitry Andric     break;
87706c3fb27SDimitry Andric   case 0x26:
87806c3fb27SDimitry Andric     if (ABCIsConst)
87906c3fb27SDimitry Andric       Res = And(Nand(A, B), Xor(B, C));
88006c3fb27SDimitry Andric     break;
88106c3fb27SDimitry Andric   case 0x27:
88206c3fb27SDimitry Andric     if (ABCIsConst)
88306c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), C), B);
88406c3fb27SDimitry Andric     break;
88506c3fb27SDimitry Andric   case 0x28:
88606c3fb27SDimitry Andric     if (ABCIsConst)
88706c3fb27SDimitry Andric       Res = And(Xor(A, B), C);
88806c3fb27SDimitry Andric     break;
88906c3fb27SDimitry Andric   case 0x29:
89006c3fb27SDimitry Andric     if (ABCIsConst)
89106c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nor(And(A, B), C));
89206c3fb27SDimitry Andric     break;
89306c3fb27SDimitry Andric   case 0x2a:
89406c3fb27SDimitry Andric     if (ABCIsConst)
89506c3fb27SDimitry Andric       Res = And(Nand(A, B), C);
89606c3fb27SDimitry Andric     break;
89706c3fb27SDimitry Andric   case 0x2b:
89806c3fb27SDimitry Andric     if (ABCIsConst)
89906c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Xor(A, C)), A);
90006c3fb27SDimitry Andric     break;
90106c3fb27SDimitry Andric   case 0x2c:
90206c3fb27SDimitry Andric     if (ABCIsConst)
90306c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Nor(B, C));
90406c3fb27SDimitry Andric     break;
90506c3fb27SDimitry Andric   case 0x2d:
90606c3fb27SDimitry Andric     if (ABCIsConst)
90706c3fb27SDimitry Andric       Res = Xor(A, Or(B, Not(C)));
90806c3fb27SDimitry Andric     break;
90906c3fb27SDimitry Andric   case 0x2e:
91006c3fb27SDimitry Andric     if (ABCIsConst)
91106c3fb27SDimitry Andric       Res = Xor(A, Or(Xor(A, C), B));
91206c3fb27SDimitry Andric     break;
91306c3fb27SDimitry Andric   case 0x2f:
91406c3fb27SDimitry Andric     if (ABCIsConst)
91506c3fb27SDimitry Andric       Res = Nand(A, Or(B, Not(C)));
91606c3fb27SDimitry Andric     break;
91706c3fb27SDimitry Andric   case 0x30:
91806c3fb27SDimitry Andric     if (ABIsConst)
91906c3fb27SDimitry Andric       Res = Nor(B, Not(A));
92006c3fb27SDimitry Andric     break;
92106c3fb27SDimitry Andric   case 0x31:
92206c3fb27SDimitry Andric     if (ABCIsConst)
92306c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(C)), B);
92406c3fb27SDimitry Andric     break;
92506c3fb27SDimitry Andric   case 0x32:
92606c3fb27SDimitry Andric     if (ABCIsConst)
92706c3fb27SDimitry Andric       Res = Nor(Nor(A, C), B);
92806c3fb27SDimitry Andric     break;
92906c3fb27SDimitry Andric   case 0x33:
93006c3fb27SDimitry Andric     Res = Not(B);
93106c3fb27SDimitry Andric     break;
93206c3fb27SDimitry Andric   case 0x34:
93306c3fb27SDimitry Andric     if (ABCIsConst)
93406c3fb27SDimitry Andric       Res = And(Xor(A, B), Nand(B, C));
93506c3fb27SDimitry Andric     break;
93606c3fb27SDimitry Andric   case 0x35:
93706c3fb27SDimitry Andric     if (ABCIsConst)
93806c3fb27SDimitry Andric       Res = Xor(B, Or(A, Xnor(B, C)));
93906c3fb27SDimitry Andric     break;
94006c3fb27SDimitry Andric   case 0x36:
94106c3fb27SDimitry Andric     if (ABCIsConst)
94206c3fb27SDimitry Andric       Res = Xor(Or(A, C), B);
94306c3fb27SDimitry Andric     break;
94406c3fb27SDimitry Andric   case 0x37:
94506c3fb27SDimitry Andric     if (ABCIsConst)
94606c3fb27SDimitry Andric       Res = Nand(Or(A, C), B);
94706c3fb27SDimitry Andric     break;
94806c3fb27SDimitry Andric   case 0x38:
94906c3fb27SDimitry Andric     if (ABCIsConst)
95006c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Nor(A, C));
95106c3fb27SDimitry Andric     break;
95206c3fb27SDimitry Andric   case 0x39:
95306c3fb27SDimitry Andric     if (ABCIsConst)
95406c3fb27SDimitry Andric       Res = Xor(Or(A, Not(C)), B);
95506c3fb27SDimitry Andric     break;
95606c3fb27SDimitry Andric   case 0x3a:
95706c3fb27SDimitry Andric     if (ABCIsConst)
95806c3fb27SDimitry Andric       Res = Xor(B, Or(A, Xor(B, C)));
95906c3fb27SDimitry Andric     break;
96006c3fb27SDimitry Andric   case 0x3b:
96106c3fb27SDimitry Andric     if (ABCIsConst)
96206c3fb27SDimitry Andric       Res = Nand(Or(A, Not(C)), B);
96306c3fb27SDimitry Andric     break;
96406c3fb27SDimitry Andric   case 0x3c:
96506c3fb27SDimitry Andric     Res = Xor(A, B);
96606c3fb27SDimitry Andric     break;
96706c3fb27SDimitry Andric   case 0x3d:
96806c3fb27SDimitry Andric     if (ABCIsConst)
96906c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, C), B));
97006c3fb27SDimitry Andric     break;
97106c3fb27SDimitry Andric   case 0x3e:
97206c3fb27SDimitry Andric     if (ABCIsConst)
97306c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, Not(C)), B));
97406c3fb27SDimitry Andric     break;
97506c3fb27SDimitry Andric   case 0x3f:
97606c3fb27SDimitry Andric     if (ABIsConst)
97706c3fb27SDimitry Andric       Res = Nand(A, B);
97806c3fb27SDimitry Andric     break;
97906c3fb27SDimitry Andric   case 0x40:
98006c3fb27SDimitry Andric     if (ABCIsConst)
98106c3fb27SDimitry Andric       Res = Nor(Nand(A, B), C);
98206c3fb27SDimitry Andric     break;
98306c3fb27SDimitry Andric   case 0x41:
98406c3fb27SDimitry Andric     if (ABCIsConst)
98506c3fb27SDimitry Andric       Res = Nor(Xor(A, B), C);
98606c3fb27SDimitry Andric     break;
98706c3fb27SDimitry Andric   case 0x42:
98806c3fb27SDimitry Andric     if (ABCIsConst)
98906c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Xnor(A, C));
99006c3fb27SDimitry Andric     break;
99106c3fb27SDimitry Andric   case 0x43:
99206c3fb27SDimitry Andric     if (ABCIsConst)
99306c3fb27SDimitry Andric       Res = Xor(A, Nand(Nand(A, C), B));
99406c3fb27SDimitry Andric     break;
99506c3fb27SDimitry Andric   case 0x44:
99606c3fb27SDimitry Andric     if (BCIsConst)
99706c3fb27SDimitry Andric       Res = Nor(C, Not(B));
99806c3fb27SDimitry Andric     break;
99906c3fb27SDimitry Andric   case 0x45:
100006c3fb27SDimitry Andric     if (ABCIsConst)
100106c3fb27SDimitry Andric       Res = Nor(Nor(B, Not(A)), C);
100206c3fb27SDimitry Andric     break;
100306c3fb27SDimitry Andric   case 0x46:
100406c3fb27SDimitry Andric     if (ABCIsConst)
100506c3fb27SDimitry Andric       Res = Xor(Or(And(A, C), B), C);
100606c3fb27SDimitry Andric     break;
100706c3fb27SDimitry Andric   case 0x47:
100806c3fb27SDimitry Andric     if (ABCIsConst)
100906c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, C), B), C);
101006c3fb27SDimitry Andric     break;
101106c3fb27SDimitry Andric   case 0x48:
101206c3fb27SDimitry Andric     if (ABCIsConst)
101306c3fb27SDimitry Andric       Res = And(Xor(A, C), B);
101406c3fb27SDimitry Andric     break;
101506c3fb27SDimitry Andric   case 0x49:
101606c3fb27SDimitry Andric     if (ABCIsConst)
101706c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), And(A, C)), C);
101806c3fb27SDimitry Andric     break;
101906c3fb27SDimitry Andric   case 0x4a:
102006c3fb27SDimitry Andric     if (ABCIsConst)
102106c3fb27SDimitry Andric       Res = Nor(Xnor(A, C), Nor(B, C));
102206c3fb27SDimitry Andric     break;
102306c3fb27SDimitry Andric   case 0x4b:
102406c3fb27SDimitry Andric     if (ABCIsConst)
102506c3fb27SDimitry Andric       Res = Xor(A, Or(C, Not(B)));
102606c3fb27SDimitry Andric     break;
102706c3fb27SDimitry Andric   case 0x4c:
102806c3fb27SDimitry Andric     if (ABCIsConst)
102906c3fb27SDimitry Andric       Res = And(Nand(A, C), B);
103006c3fb27SDimitry Andric     break;
103106c3fb27SDimitry Andric   case 0x4d:
103206c3fb27SDimitry Andric     if (ABCIsConst)
103306c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), Xnor(A, C)), A);
103406c3fb27SDimitry Andric     break;
103506c3fb27SDimitry Andric   case 0x4e:
103606c3fb27SDimitry Andric     if (ABCIsConst)
103706c3fb27SDimitry Andric       Res = Xor(A, Or(Xor(A, B), C));
103806c3fb27SDimitry Andric     break;
103906c3fb27SDimitry Andric   case 0x4f:
104006c3fb27SDimitry Andric     if (ABCIsConst)
104106c3fb27SDimitry Andric       Res = Nand(A, Nand(B, Not(C)));
104206c3fb27SDimitry Andric     break;
104306c3fb27SDimitry Andric   case 0x50:
104406c3fb27SDimitry Andric     if (ACIsConst)
104506c3fb27SDimitry Andric       Res = Nor(C, Not(A));
104606c3fb27SDimitry Andric     break;
104706c3fb27SDimitry Andric   case 0x51:
104806c3fb27SDimitry Andric     if (ABCIsConst)
104906c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(B)), C);
105006c3fb27SDimitry Andric     break;
105106c3fb27SDimitry Andric   case 0x52:
105206c3fb27SDimitry Andric     if (ABCIsConst)
105306c3fb27SDimitry Andric       Res = And(Xor(A, C), Nand(B, C));
105406c3fb27SDimitry Andric     break;
105506c3fb27SDimitry Andric   case 0x53:
105606c3fb27SDimitry Andric     if (ABCIsConst)
105706c3fb27SDimitry Andric       Res = Xor(Or(Xnor(B, C), A), C);
105806c3fb27SDimitry Andric     break;
105906c3fb27SDimitry Andric   case 0x54:
106006c3fb27SDimitry Andric     if (ABCIsConst)
106106c3fb27SDimitry Andric       Res = Nor(Nor(A, B), C);
106206c3fb27SDimitry Andric     break;
106306c3fb27SDimitry Andric   case 0x55:
106406c3fb27SDimitry Andric     Res = Not(C);
106506c3fb27SDimitry Andric     break;
106606c3fb27SDimitry Andric   case 0x56:
106706c3fb27SDimitry Andric     if (ABCIsConst)
106806c3fb27SDimitry Andric       Res = Xor(Or(A, B), C);
106906c3fb27SDimitry Andric     break;
107006c3fb27SDimitry Andric   case 0x57:
107106c3fb27SDimitry Andric     if (ABCIsConst)
107206c3fb27SDimitry Andric       Res = Nand(Or(A, B), C);
107306c3fb27SDimitry Andric     break;
107406c3fb27SDimitry Andric   case 0x58:
107506c3fb27SDimitry Andric     if (ABCIsConst)
107606c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xnor(A, C));
107706c3fb27SDimitry Andric     break;
107806c3fb27SDimitry Andric   case 0x59:
107906c3fb27SDimitry Andric     if (ABCIsConst)
108006c3fb27SDimitry Andric       Res = Xor(Or(A, Not(B)), C);
108106c3fb27SDimitry Andric     break;
108206c3fb27SDimitry Andric   case 0x5a:
108306c3fb27SDimitry Andric     Res = Xor(A, C);
108406c3fb27SDimitry Andric     break;
108506c3fb27SDimitry Andric   case 0x5b:
108606c3fb27SDimitry Andric     if (ABCIsConst)
108706c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, B), C));
108806c3fb27SDimitry Andric     break;
108906c3fb27SDimitry Andric   case 0x5c:
109006c3fb27SDimitry Andric     if (ABCIsConst)
109106c3fb27SDimitry Andric       Res = Xor(Or(Xor(B, C), A), C);
109206c3fb27SDimitry Andric     break;
109306c3fb27SDimitry Andric   case 0x5d:
109406c3fb27SDimitry Andric     if (ABCIsConst)
109506c3fb27SDimitry Andric       Res = Nand(Or(A, Not(B)), C);
109606c3fb27SDimitry Andric     break;
109706c3fb27SDimitry Andric   case 0x5e:
109806c3fb27SDimitry Andric     if (ABCIsConst)
109906c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, Not(B)), C));
110006c3fb27SDimitry Andric     break;
110106c3fb27SDimitry Andric   case 0x5f:
110206c3fb27SDimitry Andric     if (ACIsConst)
110306c3fb27SDimitry Andric       Res = Nand(A, C);
110406c3fb27SDimitry Andric     break;
110506c3fb27SDimitry Andric   case 0x60:
110606c3fb27SDimitry Andric     if (ABCIsConst)
110706c3fb27SDimitry Andric       Res = And(A, Xor(B, C));
110806c3fb27SDimitry Andric     break;
110906c3fb27SDimitry Andric   case 0x61:
111006c3fb27SDimitry Andric     if (ABCIsConst)
111106c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), And(B, C)), C);
111206c3fb27SDimitry Andric     break;
111306c3fb27SDimitry Andric   case 0x62:
111406c3fb27SDimitry Andric     if (ABCIsConst)
111506c3fb27SDimitry Andric       Res = Nor(Nor(A, C), Xnor(B, C));
111606c3fb27SDimitry Andric     break;
111706c3fb27SDimitry Andric   case 0x63:
111806c3fb27SDimitry Andric     if (ABCIsConst)
111906c3fb27SDimitry Andric       Res = Xor(B, Or(C, Not(A)));
112006c3fb27SDimitry Andric     break;
112106c3fb27SDimitry Andric   case 0x64:
112206c3fb27SDimitry Andric     if (ABCIsConst)
112306c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xnor(B, C));
112406c3fb27SDimitry Andric     break;
112506c3fb27SDimitry Andric   case 0x65:
112606c3fb27SDimitry Andric     if (ABCIsConst)
112706c3fb27SDimitry Andric       Res = Xor(Or(B, Not(A)), C);
112806c3fb27SDimitry Andric     break;
112906c3fb27SDimitry Andric   case 0x66:
113006c3fb27SDimitry Andric     Res = Xor(B, C);
113106c3fb27SDimitry Andric     break;
113206c3fb27SDimitry Andric   case 0x67:
113306c3fb27SDimitry Andric     if (ABCIsConst)
113406c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xor(B, C));
113506c3fb27SDimitry Andric     break;
113606c3fb27SDimitry Andric   case 0x68:
113706c3fb27SDimitry Andric     if (ABCIsConst)
113806c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nor(Nor(A, B), C));
113906c3fb27SDimitry Andric     break;
114006c3fb27SDimitry Andric   case 0x69:
114106c3fb27SDimitry Andric     if (ABCIsConst)
114206c3fb27SDimitry Andric       Res = Xor(Xnor(A, B), C);
114306c3fb27SDimitry Andric     break;
114406c3fb27SDimitry Andric   case 0x6a:
114506c3fb27SDimitry Andric     if (ABCIsConst)
114606c3fb27SDimitry Andric       Res = Xor(And(A, B), C);
114706c3fb27SDimitry Andric     break;
114806c3fb27SDimitry Andric   case 0x6b:
114906c3fb27SDimitry Andric     if (ABCIsConst)
115006c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xor(Xnor(A, B), C));
115106c3fb27SDimitry Andric     break;
115206c3fb27SDimitry Andric   case 0x6c:
115306c3fb27SDimitry Andric     if (ABCIsConst)
115406c3fb27SDimitry Andric       Res = Xor(And(A, C), B);
115506c3fb27SDimitry Andric     break;
115606c3fb27SDimitry Andric   case 0x6d:
115706c3fb27SDimitry Andric     if (ABCIsConst)
115806c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);
115906c3fb27SDimitry Andric     break;
116006c3fb27SDimitry Andric   case 0x6e:
116106c3fb27SDimitry Andric     if (ABCIsConst)
116206c3fb27SDimitry Andric       Res = Or(Nor(A, Not(B)), Xor(B, C));
116306c3fb27SDimitry Andric     break;
116406c3fb27SDimitry Andric   case 0x6f:
116506c3fb27SDimitry Andric     if (ABCIsConst)
116606c3fb27SDimitry Andric       Res = Nand(A, Xnor(B, C));
116706c3fb27SDimitry Andric     break;
116806c3fb27SDimitry Andric   case 0x70:
116906c3fb27SDimitry Andric     if (ABCIsConst)
117006c3fb27SDimitry Andric       Res = And(A, Nand(B, C));
117106c3fb27SDimitry Andric     break;
117206c3fb27SDimitry Andric   case 0x71:
117306c3fb27SDimitry Andric     if (ABCIsConst)
117406c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), Xor(A, C)), A);
117506c3fb27SDimitry Andric     break;
117606c3fb27SDimitry Andric   case 0x72:
117706c3fb27SDimitry Andric     if (ABCIsConst)
117806c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), C), B);
117906c3fb27SDimitry Andric     break;
118006c3fb27SDimitry Andric   case 0x73:
118106c3fb27SDimitry Andric     if (ABCIsConst)
118206c3fb27SDimitry Andric       Res = Nand(Nand(A, Not(C)), B);
118306c3fb27SDimitry Andric     break;
118406c3fb27SDimitry Andric   case 0x74:
118506c3fb27SDimitry Andric     if (ABCIsConst)
118606c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, C), B), C);
118706c3fb27SDimitry Andric     break;
118806c3fb27SDimitry Andric   case 0x75:
118906c3fb27SDimitry Andric     if (ABCIsConst)
119006c3fb27SDimitry Andric       Res = Nand(Nand(A, Not(B)), C);
119106c3fb27SDimitry Andric     break;
119206c3fb27SDimitry Andric   case 0x76:
119306c3fb27SDimitry Andric     if (ABCIsConst)
119406c3fb27SDimitry Andric       Res = Xor(B, Or(Nor(B, Not(A)), C));
119506c3fb27SDimitry Andric     break;
119606c3fb27SDimitry Andric   case 0x77:
119706c3fb27SDimitry Andric     if (BCIsConst)
119806c3fb27SDimitry Andric       Res = Nand(B, C);
119906c3fb27SDimitry Andric     break;
120006c3fb27SDimitry Andric   case 0x78:
120106c3fb27SDimitry Andric     if (ABCIsConst)
120206c3fb27SDimitry Andric       Res = Xor(A, And(B, C));
120306c3fb27SDimitry Andric     break;
120406c3fb27SDimitry Andric   case 0x79:
120506c3fb27SDimitry Andric     if (ABCIsConst)
120606c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);
120706c3fb27SDimitry Andric     break;
120806c3fb27SDimitry Andric   case 0x7a:
120906c3fb27SDimitry Andric     if (ABCIsConst)
121006c3fb27SDimitry Andric       Res = Or(Xor(A, C), Nor(B, Not(A)));
121106c3fb27SDimitry Andric     break;
121206c3fb27SDimitry Andric   case 0x7b:
121306c3fb27SDimitry Andric     if (ABCIsConst)
121406c3fb27SDimitry Andric       Res = Nand(Xnor(A, C), B);
121506c3fb27SDimitry Andric     break;
121606c3fb27SDimitry Andric   case 0x7c:
121706c3fb27SDimitry Andric     if (ABCIsConst)
121806c3fb27SDimitry Andric       Res = Or(Xor(A, B), Nor(C, Not(A)));
121906c3fb27SDimitry Andric     break;
122006c3fb27SDimitry Andric   case 0x7d:
122106c3fb27SDimitry Andric     if (ABCIsConst)
122206c3fb27SDimitry Andric       Res = Nand(Xnor(A, B), C);
122306c3fb27SDimitry Andric     break;
122406c3fb27SDimitry Andric   case 0x7e:
122506c3fb27SDimitry Andric     if (ABCIsConst)
122606c3fb27SDimitry Andric       Res = Or(Xor(A, B), Xor(A, C));
122706c3fb27SDimitry Andric     break;
122806c3fb27SDimitry Andric   case 0x7f:
122906c3fb27SDimitry Andric     if (ABCIsConst)
123006c3fb27SDimitry Andric       Res = Nand(And(A, B), C);
123106c3fb27SDimitry Andric     break;
123206c3fb27SDimitry Andric   case 0x80:
123306c3fb27SDimitry Andric     if (ABCIsConst)
123406c3fb27SDimitry Andric       Res = And(And(A, B), C);
123506c3fb27SDimitry Andric     break;
123606c3fb27SDimitry Andric   case 0x81:
123706c3fb27SDimitry Andric     if (ABCIsConst)
123806c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Xor(A, C));
123906c3fb27SDimitry Andric     break;
124006c3fb27SDimitry Andric   case 0x82:
124106c3fb27SDimitry Andric     if (ABCIsConst)
124206c3fb27SDimitry Andric       Res = And(Xnor(A, B), C);
124306c3fb27SDimitry Andric     break;
124406c3fb27SDimitry Andric   case 0x83:
124506c3fb27SDimitry Andric     if (ABCIsConst)
124606c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Nor(C, Not(A)));
124706c3fb27SDimitry Andric     break;
124806c3fb27SDimitry Andric   case 0x84:
124906c3fb27SDimitry Andric     if (ABCIsConst)
125006c3fb27SDimitry Andric       Res = And(Xnor(A, C), B);
125106c3fb27SDimitry Andric     break;
125206c3fb27SDimitry Andric   case 0x85:
125306c3fb27SDimitry Andric     if (ABCIsConst)
125406c3fb27SDimitry Andric       Res = Nor(Xor(A, C), Nor(B, Not(A)));
125506c3fb27SDimitry Andric     break;
125606c3fb27SDimitry Andric   case 0x86:
125706c3fb27SDimitry Andric     if (ABCIsConst)
125806c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);
125906c3fb27SDimitry Andric     break;
126006c3fb27SDimitry Andric   case 0x87:
126106c3fb27SDimitry Andric     if (ABCIsConst)
126206c3fb27SDimitry Andric       Res = Xor(A, Nand(B, C));
126306c3fb27SDimitry Andric     break;
126406c3fb27SDimitry Andric   case 0x88:
126506c3fb27SDimitry Andric     Res = And(B, C);
126606c3fb27SDimitry Andric     break;
126706c3fb27SDimitry Andric   case 0x89:
126806c3fb27SDimitry Andric     if (ABCIsConst)
126906c3fb27SDimitry Andric       Res = Xor(B, Nor(Nor(B, Not(A)), C));
127006c3fb27SDimitry Andric     break;
127106c3fb27SDimitry Andric   case 0x8a:
127206c3fb27SDimitry Andric     if (ABCIsConst)
127306c3fb27SDimitry Andric       Res = And(Nand(A, Not(B)), C);
127406c3fb27SDimitry Andric     break;
127506c3fb27SDimitry Andric   case 0x8b:
127606c3fb27SDimitry Andric     if (ABCIsConst)
127706c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, C), B), C);
127806c3fb27SDimitry Andric     break;
127906c3fb27SDimitry Andric   case 0x8c:
128006c3fb27SDimitry Andric     if (ABCIsConst)
128106c3fb27SDimitry Andric       Res = And(Nand(A, Not(C)), B);
128206c3fb27SDimitry Andric     break;
128306c3fb27SDimitry Andric   case 0x8d:
128406c3fb27SDimitry Andric     if (ABCIsConst)
128506c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), C), B);
128606c3fb27SDimitry Andric     break;
128706c3fb27SDimitry Andric   case 0x8e:
128806c3fb27SDimitry Andric     if (ABCIsConst)
128906c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), Xor(A, C)), A);
129006c3fb27SDimitry Andric     break;
129106c3fb27SDimitry Andric   case 0x8f:
129206c3fb27SDimitry Andric     if (ABCIsConst)
129306c3fb27SDimitry Andric       Res = Nand(A, Nand(B, C));
129406c3fb27SDimitry Andric     break;
129506c3fb27SDimitry Andric   case 0x90:
129606c3fb27SDimitry Andric     if (ABCIsConst)
129706c3fb27SDimitry Andric       Res = And(A, Xnor(B, C));
129806c3fb27SDimitry Andric     break;
129906c3fb27SDimitry Andric   case 0x91:
130006c3fb27SDimitry Andric     if (ABCIsConst)
130106c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(B)), Xor(B, C));
130206c3fb27SDimitry Andric     break;
130306c3fb27SDimitry Andric   case 0x92:
130406c3fb27SDimitry Andric     if (ABCIsConst)
130506c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);
130606c3fb27SDimitry Andric     break;
130706c3fb27SDimitry Andric   case 0x93:
130806c3fb27SDimitry Andric     if (ABCIsConst)
130906c3fb27SDimitry Andric       Res = Xor(Nand(A, C), B);
131006c3fb27SDimitry Andric     break;
131106c3fb27SDimitry Andric   case 0x94:
131206c3fb27SDimitry Andric     if (ABCIsConst)
131306c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));
131406c3fb27SDimitry Andric     break;
131506c3fb27SDimitry Andric   case 0x95:
131606c3fb27SDimitry Andric     if (ABCIsConst)
131706c3fb27SDimitry Andric       Res = Xor(Nand(A, B), C);
131806c3fb27SDimitry Andric     break;
131906c3fb27SDimitry Andric   case 0x96:
132006c3fb27SDimitry Andric     if (ABCIsConst)
132106c3fb27SDimitry Andric       Res = Xor(Xor(A, B), C);
132206c3fb27SDimitry Andric     break;
132306c3fb27SDimitry Andric   case 0x97:
132406c3fb27SDimitry Andric     if (ABCIsConst)
132506c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Or(Nor(A, B), C));
132606c3fb27SDimitry Andric     break;
132706c3fb27SDimitry Andric   case 0x98:
132806c3fb27SDimitry Andric     if (ABCIsConst)
132906c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xor(B, C));
133006c3fb27SDimitry Andric     break;
133106c3fb27SDimitry Andric   case 0x99:
133206c3fb27SDimitry Andric     if (BCIsConst)
133306c3fb27SDimitry Andric       Res = Xnor(B, C);
133406c3fb27SDimitry Andric     break;
133506c3fb27SDimitry Andric   case 0x9a:
133606c3fb27SDimitry Andric     if (ABCIsConst)
133706c3fb27SDimitry Andric       Res = Xor(Nor(B, Not(A)), C);
133806c3fb27SDimitry Andric     break;
133906c3fb27SDimitry Andric   case 0x9b:
134006c3fb27SDimitry Andric     if (ABCIsConst)
134106c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xnor(B, C));
134206c3fb27SDimitry Andric     break;
134306c3fb27SDimitry Andric   case 0x9c:
134406c3fb27SDimitry Andric     if (ABCIsConst)
134506c3fb27SDimitry Andric       Res = Xor(B, Nor(C, Not(A)));
134606c3fb27SDimitry Andric     break;
134706c3fb27SDimitry Andric   case 0x9d:
134806c3fb27SDimitry Andric     if (ABCIsConst)
134906c3fb27SDimitry Andric       Res = Or(Nor(A, C), Xnor(B, C));
135006c3fb27SDimitry Andric     break;
135106c3fb27SDimitry Andric   case 0x9e:
135206c3fb27SDimitry Andric     if (ABCIsConst)
135306c3fb27SDimitry Andric       Res = Xor(And(Xor(A, B), Nand(B, C)), C);
135406c3fb27SDimitry Andric     break;
135506c3fb27SDimitry Andric   case 0x9f:
135606c3fb27SDimitry Andric     if (ABCIsConst)
135706c3fb27SDimitry Andric       Res = Nand(A, Xor(B, C));
135806c3fb27SDimitry Andric     break;
135906c3fb27SDimitry Andric   case 0xa0:
136006c3fb27SDimitry Andric     Res = And(A, C);
136106c3fb27SDimitry Andric     break;
136206c3fb27SDimitry Andric   case 0xa1:
136306c3fb27SDimitry Andric     if (ABCIsConst)
136406c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, Not(B)), C));
136506c3fb27SDimitry Andric     break;
136606c3fb27SDimitry Andric   case 0xa2:
136706c3fb27SDimitry Andric     if (ABCIsConst)
136806c3fb27SDimitry Andric       Res = And(Or(A, Not(B)), C);
136906c3fb27SDimitry Andric     break;
137006c3fb27SDimitry Andric   case 0xa3:
137106c3fb27SDimitry Andric     if (ABCIsConst)
137206c3fb27SDimitry Andric       Res = Xor(Nor(Xor(B, C), A), C);
137306c3fb27SDimitry Andric     break;
137406c3fb27SDimitry Andric   case 0xa4:
137506c3fb27SDimitry Andric     if (ABCIsConst)
137606c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, B), C));
137706c3fb27SDimitry Andric     break;
137806c3fb27SDimitry Andric   case 0xa5:
137906c3fb27SDimitry Andric     if (ACIsConst)
138006c3fb27SDimitry Andric       Res = Xnor(A, C);
138106c3fb27SDimitry Andric     break;
138206c3fb27SDimitry Andric   case 0xa6:
138306c3fb27SDimitry Andric     if (ABCIsConst)
138406c3fb27SDimitry Andric       Res = Xor(Nor(A, Not(B)), C);
138506c3fb27SDimitry Andric     break;
138606c3fb27SDimitry Andric   case 0xa7:
138706c3fb27SDimitry Andric     if (ABCIsConst)
138806c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xnor(A, C));
138906c3fb27SDimitry Andric     break;
139006c3fb27SDimitry Andric   case 0xa8:
139106c3fb27SDimitry Andric     if (ABCIsConst)
139206c3fb27SDimitry Andric       Res = And(Or(A, B), C);
139306c3fb27SDimitry Andric     break;
139406c3fb27SDimitry Andric   case 0xa9:
139506c3fb27SDimitry Andric     if (ABCIsConst)
139606c3fb27SDimitry Andric       Res = Xor(Nor(A, B), C);
139706c3fb27SDimitry Andric     break;
139806c3fb27SDimitry Andric   case 0xaa:
139906c3fb27SDimitry Andric     Res = C;
140006c3fb27SDimitry Andric     break;
140106c3fb27SDimitry Andric   case 0xab:
140206c3fb27SDimitry Andric     if (ABCIsConst)
140306c3fb27SDimitry Andric       Res = Or(Nor(A, B), C);
140406c3fb27SDimitry Andric     break;
140506c3fb27SDimitry Andric   case 0xac:
140606c3fb27SDimitry Andric     if (ABCIsConst)
140706c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(B, C), A), C);
140806c3fb27SDimitry Andric     break;
140906c3fb27SDimitry Andric   case 0xad:
141006c3fb27SDimitry Andric     if (ABCIsConst)
141106c3fb27SDimitry Andric       Res = Or(Xnor(A, C), And(B, C));
141206c3fb27SDimitry Andric     break;
141306c3fb27SDimitry Andric   case 0xae:
141406c3fb27SDimitry Andric     if (ABCIsConst)
141506c3fb27SDimitry Andric       Res = Or(Nor(A, Not(B)), C);
141606c3fb27SDimitry Andric     break;
141706c3fb27SDimitry Andric   case 0xaf:
141806c3fb27SDimitry Andric     if (ACIsConst)
141906c3fb27SDimitry Andric       Res = Or(C, Not(A));
142006c3fb27SDimitry Andric     break;
142106c3fb27SDimitry Andric   case 0xb0:
142206c3fb27SDimitry Andric     if (ABCIsConst)
142306c3fb27SDimitry Andric       Res = And(A, Nand(B, Not(C)));
142406c3fb27SDimitry Andric     break;
142506c3fb27SDimitry Andric   case 0xb1:
142606c3fb27SDimitry Andric     if (ABCIsConst)
142706c3fb27SDimitry Andric       Res = Xor(A, Nor(Xor(A, B), C));
142806c3fb27SDimitry Andric     break;
142906c3fb27SDimitry Andric   case 0xb2:
143006c3fb27SDimitry Andric     if (ABCIsConst)
143106c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);
143206c3fb27SDimitry Andric     break;
143306c3fb27SDimitry Andric   case 0xb3:
143406c3fb27SDimitry Andric     if (ABCIsConst)
143506c3fb27SDimitry Andric       Res = Nand(Nand(A, C), B);
143606c3fb27SDimitry Andric     break;
143706c3fb27SDimitry Andric   case 0xb4:
143806c3fb27SDimitry Andric     if (ABCIsConst)
143906c3fb27SDimitry Andric       Res = Xor(A, Nor(C, Not(B)));
144006c3fb27SDimitry Andric     break;
144106c3fb27SDimitry Andric   case 0xb5:
144206c3fb27SDimitry Andric     if (ABCIsConst)
144306c3fb27SDimitry Andric       Res = Or(Xnor(A, C), Nor(B, C));
144406c3fb27SDimitry Andric     break;
144506c3fb27SDimitry Andric   case 0xb6:
144606c3fb27SDimitry Andric     if (ABCIsConst)
144706c3fb27SDimitry Andric       Res = Xor(And(Xor(A, B), Nand(A, C)), C);
144806c3fb27SDimitry Andric     break;
144906c3fb27SDimitry Andric   case 0xb7:
145006c3fb27SDimitry Andric     if (ABCIsConst)
145106c3fb27SDimitry Andric       Res = Nand(Xor(A, C), B);
145206c3fb27SDimitry Andric     break;
145306c3fb27SDimitry Andric   case 0xb8:
145406c3fb27SDimitry Andric     if (ABCIsConst)
145506c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, C), B), C);
145606c3fb27SDimitry Andric     break;
145706c3fb27SDimitry Andric   case 0xb9:
145806c3fb27SDimitry Andric     if (ABCIsConst)
145906c3fb27SDimitry Andric       Res = Xor(Nor(And(A, C), B), C);
146006c3fb27SDimitry Andric     break;
146106c3fb27SDimitry Andric   case 0xba:
146206c3fb27SDimitry Andric     if (ABCIsConst)
146306c3fb27SDimitry Andric       Res = Or(Nor(B, Not(A)), C);
146406c3fb27SDimitry Andric     break;
146506c3fb27SDimitry Andric   case 0xbb:
146606c3fb27SDimitry Andric     if (BCIsConst)
146706c3fb27SDimitry Andric       Res = Or(C, Not(B));
146806c3fb27SDimitry Andric     break;
146906c3fb27SDimitry Andric   case 0xbc:
147006c3fb27SDimitry Andric     if (ABCIsConst)
147106c3fb27SDimitry Andric       Res = Xor(A, And(Nand(A, C), B));
147206c3fb27SDimitry Andric     break;
147306c3fb27SDimitry Andric   case 0xbd:
147406c3fb27SDimitry Andric     if (ABCIsConst)
147506c3fb27SDimitry Andric       Res = Or(Xor(A, B), Xnor(A, C));
147606c3fb27SDimitry Andric     break;
147706c3fb27SDimitry Andric   case 0xbe:
147806c3fb27SDimitry Andric     if (ABCIsConst)
147906c3fb27SDimitry Andric       Res = Or(Xor(A, B), C);
148006c3fb27SDimitry Andric     break;
148106c3fb27SDimitry Andric   case 0xbf:
148206c3fb27SDimitry Andric     if (ABCIsConst)
148306c3fb27SDimitry Andric       Res = Or(Nand(A, B), C);
148406c3fb27SDimitry Andric     break;
148506c3fb27SDimitry Andric   case 0xc0:
148606c3fb27SDimitry Andric     Res = And(A, B);
148706c3fb27SDimitry Andric     break;
148806c3fb27SDimitry Andric   case 0xc1:
148906c3fb27SDimitry Andric     if (ABCIsConst)
149006c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, Not(C)), B));
149106c3fb27SDimitry Andric     break;
149206c3fb27SDimitry Andric   case 0xc2:
149306c3fb27SDimitry Andric     if (ABCIsConst)
149406c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, C), B));
149506c3fb27SDimitry Andric     break;
149606c3fb27SDimitry Andric   case 0xc3:
149706c3fb27SDimitry Andric     if (ABIsConst)
149806c3fb27SDimitry Andric       Res = Xnor(A, B);
149906c3fb27SDimitry Andric     break;
150006c3fb27SDimitry Andric   case 0xc4:
150106c3fb27SDimitry Andric     if (ABCIsConst)
150206c3fb27SDimitry Andric       Res = And(Or(A, Not(C)), B);
150306c3fb27SDimitry Andric     break;
150406c3fb27SDimitry Andric   case 0xc5:
150506c3fb27SDimitry Andric     if (ABCIsConst)
150606c3fb27SDimitry Andric       Res = Xor(B, Nor(A, Xor(B, C)));
150706c3fb27SDimitry Andric     break;
150806c3fb27SDimitry Andric   case 0xc6:
150906c3fb27SDimitry Andric     if (ABCIsConst)
151006c3fb27SDimitry Andric       Res = Xor(Nor(A, Not(C)), B);
151106c3fb27SDimitry Andric     break;
151206c3fb27SDimitry Andric   case 0xc7:
151306c3fb27SDimitry Andric     if (ABCIsConst)
151406c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Nor(A, C));
151506c3fb27SDimitry Andric     break;
151606c3fb27SDimitry Andric   case 0xc8:
151706c3fb27SDimitry Andric     if (ABCIsConst)
151806c3fb27SDimitry Andric       Res = And(Or(A, C), B);
151906c3fb27SDimitry Andric     break;
152006c3fb27SDimitry Andric   case 0xc9:
152106c3fb27SDimitry Andric     if (ABCIsConst)
152206c3fb27SDimitry Andric       Res = Xor(Nor(A, C), B);
152306c3fb27SDimitry Andric     break;
152406c3fb27SDimitry Andric   case 0xca:
152506c3fb27SDimitry Andric     if (ABCIsConst)
152606c3fb27SDimitry Andric       Res = Xor(B, Nor(A, Xnor(B, C)));
152706c3fb27SDimitry Andric     break;
152806c3fb27SDimitry Andric   case 0xcb:
152906c3fb27SDimitry Andric     if (ABCIsConst)
153006c3fb27SDimitry Andric       Res = Or(Xnor(A, B), And(B, C));
153106c3fb27SDimitry Andric     break;
153206c3fb27SDimitry Andric   case 0xcc:
153306c3fb27SDimitry Andric     Res = B;
153406c3fb27SDimitry Andric     break;
153506c3fb27SDimitry Andric   case 0xcd:
153606c3fb27SDimitry Andric     if (ABCIsConst)
153706c3fb27SDimitry Andric       Res = Or(Nor(A, C), B);
153806c3fb27SDimitry Andric     break;
153906c3fb27SDimitry Andric   case 0xce:
154006c3fb27SDimitry Andric     if (ABCIsConst)
154106c3fb27SDimitry Andric       Res = Or(Nor(A, Not(C)), B);
154206c3fb27SDimitry Andric     break;
154306c3fb27SDimitry Andric   case 0xcf:
154406c3fb27SDimitry Andric     if (ABIsConst)
154506c3fb27SDimitry Andric       Res = Or(B, Not(A));
154606c3fb27SDimitry Andric     break;
154706c3fb27SDimitry Andric   case 0xd0:
154806c3fb27SDimitry Andric     if (ABCIsConst)
154906c3fb27SDimitry Andric       Res = And(A, Or(B, Not(C)));
155006c3fb27SDimitry Andric     break;
155106c3fb27SDimitry Andric   case 0xd1:
155206c3fb27SDimitry Andric     if (ABCIsConst)
155306c3fb27SDimitry Andric       Res = Xor(A, Nor(Xor(A, C), B));
155406c3fb27SDimitry Andric     break;
155506c3fb27SDimitry Andric   case 0xd2:
155606c3fb27SDimitry Andric     if (ABCIsConst)
155706c3fb27SDimitry Andric       Res = Xor(A, Nor(B, Not(C)));
155806c3fb27SDimitry Andric     break;
155906c3fb27SDimitry Andric   case 0xd3:
156006c3fb27SDimitry Andric     if (ABCIsConst)
156106c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Nor(B, C));
156206c3fb27SDimitry Andric     break;
156306c3fb27SDimitry Andric   case 0xd4:
156406c3fb27SDimitry Andric     if (ABCIsConst)
156506c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);
156606c3fb27SDimitry Andric     break;
156706c3fb27SDimitry Andric   case 0xd5:
156806c3fb27SDimitry Andric     if (ABCIsConst)
156906c3fb27SDimitry Andric       Res = Nand(Nand(A, B), C);
157006c3fb27SDimitry Andric     break;
157106c3fb27SDimitry Andric   case 0xd6:
157206c3fb27SDimitry Andric     if (ABCIsConst)
157306c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Or(And(A, B), C));
157406c3fb27SDimitry Andric     break;
157506c3fb27SDimitry Andric   case 0xd7:
157606c3fb27SDimitry Andric     if (ABCIsConst)
157706c3fb27SDimitry Andric       Res = Nand(Xor(A, B), C);
157806c3fb27SDimitry Andric     break;
157906c3fb27SDimitry Andric   case 0xd8:
158006c3fb27SDimitry Andric     if (ABCIsConst)
158106c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), C), B);
158206c3fb27SDimitry Andric     break;
158306c3fb27SDimitry Andric   case 0xd9:
158406c3fb27SDimitry Andric     if (ABCIsConst)
158506c3fb27SDimitry Andric       Res = Or(And(A, B), Xnor(B, C));
158606c3fb27SDimitry Andric     break;
158706c3fb27SDimitry Andric   case 0xda:
158806c3fb27SDimitry Andric     if (ABCIsConst)
158906c3fb27SDimitry Andric       Res = Xor(A, And(Nand(A, B), C));
159006c3fb27SDimitry Andric     break;
159106c3fb27SDimitry Andric   case 0xdb:
159206c3fb27SDimitry Andric     if (ABCIsConst)
159306c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Xor(A, C));
159406c3fb27SDimitry Andric     break;
159506c3fb27SDimitry Andric   case 0xdc:
159606c3fb27SDimitry Andric     if (ABCIsConst)
159706c3fb27SDimitry Andric       Res = Or(B, Nor(C, Not(A)));
159806c3fb27SDimitry Andric     break;
159906c3fb27SDimitry Andric   case 0xdd:
160006c3fb27SDimitry Andric     if (BCIsConst)
160106c3fb27SDimitry Andric       Res = Or(B, Not(C));
160206c3fb27SDimitry Andric     break;
160306c3fb27SDimitry Andric   case 0xde:
160406c3fb27SDimitry Andric     if (ABCIsConst)
160506c3fb27SDimitry Andric       Res = Or(Xor(A, C), B);
160606c3fb27SDimitry Andric     break;
160706c3fb27SDimitry Andric   case 0xdf:
160806c3fb27SDimitry Andric     if (ABCIsConst)
160906c3fb27SDimitry Andric       Res = Or(Nand(A, C), B);
161006c3fb27SDimitry Andric     break;
161106c3fb27SDimitry Andric   case 0xe0:
161206c3fb27SDimitry Andric     if (ABCIsConst)
161306c3fb27SDimitry Andric       Res = And(A, Or(B, C));
161406c3fb27SDimitry Andric     break;
161506c3fb27SDimitry Andric   case 0xe1:
161606c3fb27SDimitry Andric     if (ABCIsConst)
161706c3fb27SDimitry Andric       Res = Xor(A, Nor(B, C));
161806c3fb27SDimitry Andric     break;
161906c3fb27SDimitry Andric   case 0xe2:
162006c3fb27SDimitry Andric     if (ABCIsConst)
162106c3fb27SDimitry Andric       Res = Xor(A, Nor(Xnor(A, C), B));
162206c3fb27SDimitry Andric     break;
162306c3fb27SDimitry Andric   case 0xe3:
162406c3fb27SDimitry Andric     if (ABCIsConst)
162506c3fb27SDimitry Andric       Res = Xor(A, Nor(And(A, C), B));
162606c3fb27SDimitry Andric     break;
162706c3fb27SDimitry Andric   case 0xe4:
162806c3fb27SDimitry Andric     if (ABCIsConst)
162906c3fb27SDimitry Andric       Res = Xor(A, Nor(Xnor(A, B), C));
163006c3fb27SDimitry Andric     break;
163106c3fb27SDimitry Andric   case 0xe5:
163206c3fb27SDimitry Andric     if (ABCIsConst)
163306c3fb27SDimitry Andric       Res = Xor(A, Nor(And(A, B), C));
163406c3fb27SDimitry Andric     break;
163506c3fb27SDimitry Andric   case 0xe6:
163606c3fb27SDimitry Andric     if (ABCIsConst)
163706c3fb27SDimitry Andric       Res = Or(And(A, B), Xor(B, C));
163806c3fb27SDimitry Andric     break;
163906c3fb27SDimitry Andric   case 0xe7:
164006c3fb27SDimitry Andric     if (ABCIsConst)
164106c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Xnor(A, C));
164206c3fb27SDimitry Andric     break;
164306c3fb27SDimitry Andric   case 0xe8:
164406c3fb27SDimitry Andric     if (ABCIsConst)
164506c3fb27SDimitry Andric       Res = Xor(Or(A, B), Nor(Xnor(A, B), C));
164606c3fb27SDimitry Andric     break;
164706c3fb27SDimitry Andric   case 0xe9:
164806c3fb27SDimitry Andric     if (ABCIsConst)
164906c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nand(Nand(A, B), C));
165006c3fb27SDimitry Andric     break;
165106c3fb27SDimitry Andric   case 0xea:
165206c3fb27SDimitry Andric     if (ABCIsConst)
165306c3fb27SDimitry Andric       Res = Or(And(A, B), C);
165406c3fb27SDimitry Andric     break;
165506c3fb27SDimitry Andric   case 0xeb:
165606c3fb27SDimitry Andric     if (ABCIsConst)
165706c3fb27SDimitry Andric       Res = Or(Xnor(A, B), C);
165806c3fb27SDimitry Andric     break;
165906c3fb27SDimitry Andric   case 0xec:
166006c3fb27SDimitry Andric     if (ABCIsConst)
166106c3fb27SDimitry Andric       Res = Or(And(A, C), B);
166206c3fb27SDimitry Andric     break;
166306c3fb27SDimitry Andric   case 0xed:
166406c3fb27SDimitry Andric     if (ABCIsConst)
166506c3fb27SDimitry Andric       Res = Or(Xnor(A, C), B);
166606c3fb27SDimitry Andric     break;
166706c3fb27SDimitry Andric   case 0xee:
166806c3fb27SDimitry Andric     Res = Or(B, C);
166906c3fb27SDimitry Andric     break;
167006c3fb27SDimitry Andric   case 0xef:
167106c3fb27SDimitry Andric     if (ABCIsConst)
167206c3fb27SDimitry Andric       Res = Nand(A, Nor(B, C));
167306c3fb27SDimitry Andric     break;
167406c3fb27SDimitry Andric   case 0xf0:
167506c3fb27SDimitry Andric     Res = A;
167606c3fb27SDimitry Andric     break;
167706c3fb27SDimitry Andric   case 0xf1:
167806c3fb27SDimitry Andric     if (ABCIsConst)
167906c3fb27SDimitry Andric       Res = Or(A, Nor(B, C));
168006c3fb27SDimitry Andric     break;
168106c3fb27SDimitry Andric   case 0xf2:
168206c3fb27SDimitry Andric     if (ABCIsConst)
168306c3fb27SDimitry Andric       Res = Or(A, Nor(B, Not(C)));
168406c3fb27SDimitry Andric     break;
168506c3fb27SDimitry Andric   case 0xf3:
168606c3fb27SDimitry Andric     if (ABIsConst)
168706c3fb27SDimitry Andric       Res = Or(A, Not(B));
168806c3fb27SDimitry Andric     break;
168906c3fb27SDimitry Andric   case 0xf4:
169006c3fb27SDimitry Andric     if (ABCIsConst)
169106c3fb27SDimitry Andric       Res = Or(A, Nor(C, Not(B)));
169206c3fb27SDimitry Andric     break;
169306c3fb27SDimitry Andric   case 0xf5:
169406c3fb27SDimitry Andric     if (ACIsConst)
169506c3fb27SDimitry Andric       Res = Or(A, Not(C));
169606c3fb27SDimitry Andric     break;
169706c3fb27SDimitry Andric   case 0xf6:
169806c3fb27SDimitry Andric     if (ABCIsConst)
169906c3fb27SDimitry Andric       Res = Or(A, Xor(B, C));
170006c3fb27SDimitry Andric     break;
170106c3fb27SDimitry Andric   case 0xf7:
170206c3fb27SDimitry Andric     if (ABCIsConst)
170306c3fb27SDimitry Andric       Res = Or(A, Nand(B, C));
170406c3fb27SDimitry Andric     break;
170506c3fb27SDimitry Andric   case 0xf8:
170606c3fb27SDimitry Andric     if (ABCIsConst)
170706c3fb27SDimitry Andric       Res = Or(A, And(B, C));
170806c3fb27SDimitry Andric     break;
170906c3fb27SDimitry Andric   case 0xf9:
171006c3fb27SDimitry Andric     if (ABCIsConst)
171106c3fb27SDimitry Andric       Res = Or(A, Xnor(B, C));
171206c3fb27SDimitry Andric     break;
171306c3fb27SDimitry Andric   case 0xfa:
171406c3fb27SDimitry Andric     Res = Or(A, C);
171506c3fb27SDimitry Andric     break;
171606c3fb27SDimitry Andric   case 0xfb:
171706c3fb27SDimitry Andric     if (ABCIsConst)
171806c3fb27SDimitry Andric       Res = Nand(Nor(A, C), B);
171906c3fb27SDimitry Andric     break;
172006c3fb27SDimitry Andric   case 0xfc:
172106c3fb27SDimitry Andric     Res = Or(A, B);
172206c3fb27SDimitry Andric     break;
172306c3fb27SDimitry Andric   case 0xfd:
172406c3fb27SDimitry Andric     if (ABCIsConst)
172506c3fb27SDimitry Andric       Res = Nand(Nor(A, B), C);
172606c3fb27SDimitry Andric     break;
172706c3fb27SDimitry Andric   case 0xfe:
172806c3fb27SDimitry Andric     if (ABCIsConst)
172906c3fb27SDimitry Andric       Res = Or(Or(A, B), C);
173006c3fb27SDimitry Andric     break;
173106c3fb27SDimitry Andric   case 0xff:
173206c3fb27SDimitry Andric     Res = {Constant::getAllOnesValue(Ty), 0xff};
173306c3fb27SDimitry Andric     break;
173406c3fb27SDimitry Andric   }
173506c3fb27SDimitry Andric 
173606c3fb27SDimitry Andric   assert((Res.first == nullptr || Res.second == Imm) &&
173706c3fb27SDimitry Andric          "Simplification of ternary logic does not verify!");
173806c3fb27SDimitry Andric   return Res.first;
173906c3fb27SDimitry Andric }
174006c3fb27SDimitry Andric 
1741e8d8bef9SDimitry Andric static Value *simplifyX86insertps(const IntrinsicInst &II,
1742e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
1743e8d8bef9SDimitry Andric   auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
1744e8d8bef9SDimitry Andric   if (!CInt)
1745e8d8bef9SDimitry Andric     return nullptr;
1746e8d8bef9SDimitry Andric 
1747e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1748e8d8bef9SDimitry Andric   assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
1749e8d8bef9SDimitry Andric 
1750e8d8bef9SDimitry Andric   // The immediate permute control byte looks like this:
1751e8d8bef9SDimitry Andric   //    [3:0] - zero mask for each 32-bit lane
1752e8d8bef9SDimitry Andric   //    [5:4] - select one 32-bit destination lane
1753e8d8bef9SDimitry Andric   //    [7:6] - select one 32-bit source lane
1754e8d8bef9SDimitry Andric 
1755e8d8bef9SDimitry Andric   uint8_t Imm = CInt->getZExtValue();
1756e8d8bef9SDimitry Andric   uint8_t ZMask = Imm & 0xf;
1757e8d8bef9SDimitry Andric   uint8_t DestLane = (Imm >> 4) & 0x3;
1758e8d8bef9SDimitry Andric   uint8_t SourceLane = (Imm >> 6) & 0x3;
1759e8d8bef9SDimitry Andric 
1760e8d8bef9SDimitry Andric   ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
1761e8d8bef9SDimitry Andric 
1762e8d8bef9SDimitry Andric   // If all zero mask bits are set, this was just a weird way to
1763e8d8bef9SDimitry Andric   // generate a zero vector.
1764e8d8bef9SDimitry Andric   if (ZMask == 0xf)
1765e8d8bef9SDimitry Andric     return ZeroVector;
1766e8d8bef9SDimitry Andric 
1767e8d8bef9SDimitry Andric   // Initialize by passing all of the first source bits through.
1768e8d8bef9SDimitry Andric   int ShuffleMask[4] = {0, 1, 2, 3};
1769e8d8bef9SDimitry Andric 
1770e8d8bef9SDimitry Andric   // We may replace the second operand with the zero vector.
1771e8d8bef9SDimitry Andric   Value *V1 = II.getArgOperand(1);
1772e8d8bef9SDimitry Andric 
1773e8d8bef9SDimitry Andric   if (ZMask) {
1774e8d8bef9SDimitry Andric     // If the zero mask is being used with a single input or the zero mask
1775e8d8bef9SDimitry Andric     // overrides the destination lane, this is a shuffle with the zero vector.
1776e8d8bef9SDimitry Andric     if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
1777e8d8bef9SDimitry Andric         (ZMask & (1 << DestLane))) {
1778e8d8bef9SDimitry Andric       V1 = ZeroVector;
1779e8d8bef9SDimitry Andric       // We may still move 32-bits of the first source vector from one lane
1780e8d8bef9SDimitry Andric       // to another.
1781e8d8bef9SDimitry Andric       ShuffleMask[DestLane] = SourceLane;
1782e8d8bef9SDimitry Andric       // The zero mask may override the previous insert operation.
1783e8d8bef9SDimitry Andric       for (unsigned i = 0; i < 4; ++i)
1784e8d8bef9SDimitry Andric         if ((ZMask >> i) & 0x1)
1785e8d8bef9SDimitry Andric           ShuffleMask[i] = i + 4;
1786e8d8bef9SDimitry Andric     } else {
1787e8d8bef9SDimitry Andric       // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
1788e8d8bef9SDimitry Andric       return nullptr;
1789e8d8bef9SDimitry Andric     }
1790e8d8bef9SDimitry Andric   } else {
1791e8d8bef9SDimitry Andric     // Replace the selected destination lane with the selected source lane.
1792e8d8bef9SDimitry Andric     ShuffleMask[DestLane] = SourceLane + 4;
1793e8d8bef9SDimitry Andric   }
1794e8d8bef9SDimitry Andric 
1795e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
1796e8d8bef9SDimitry Andric }
1797e8d8bef9SDimitry Andric 
1798e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
1799e8d8bef9SDimitry Andric /// or conversion to a shuffle vector.
1800e8d8bef9SDimitry Andric static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
1801e8d8bef9SDimitry Andric                                ConstantInt *CILength, ConstantInt *CIIndex,
1802e8d8bef9SDimitry Andric                                InstCombiner::BuilderTy &Builder) {
1803e8d8bef9SDimitry Andric   auto LowConstantHighUndef = [&](uint64_t Val) {
1804e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
1805e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1806e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
1807e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
1808e8d8bef9SDimitry Andric   };
1809e8d8bef9SDimitry Andric 
1810e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
1811fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
1812fe6060f1SDimitry Andric   auto *CI0 =
1813e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1814e8d8bef9SDimitry Andric          : nullptr;
1815e8d8bef9SDimitry Andric 
1816e8d8bef9SDimitry Andric   // Attempt to constant fold.
1817e8d8bef9SDimitry Andric   if (CILength && CIIndex) {
1818e8d8bef9SDimitry Andric     // From AMD documentation: "The bit index and field length are each six
1819e8d8bef9SDimitry Andric     // bits in length other bits of the field are ignored."
1820e8d8bef9SDimitry Andric     APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
1821e8d8bef9SDimitry Andric     APInt APLength = CILength->getValue().zextOrTrunc(6);
1822e8d8bef9SDimitry Andric 
1823e8d8bef9SDimitry Andric     unsigned Index = APIndex.getZExtValue();
1824e8d8bef9SDimitry Andric 
1825e8d8bef9SDimitry Andric     // From AMD documentation: "a value of zero in the field length is
1826e8d8bef9SDimitry Andric     // defined as length of 64".
1827e8d8bef9SDimitry Andric     unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1828e8d8bef9SDimitry Andric 
1829e8d8bef9SDimitry Andric     // From AMD documentation: "If the sum of the bit index + length field
1830e8d8bef9SDimitry Andric     // is greater than 64, the results are undefined".
1831e8d8bef9SDimitry Andric     unsigned End = Index + Length;
1832e8d8bef9SDimitry Andric 
1833e8d8bef9SDimitry Andric     // Note that both field index and field length are 8-bit quantities.
1834e8d8bef9SDimitry Andric     // Since variables 'Index' and 'Length' are unsigned values
1835e8d8bef9SDimitry Andric     // obtained from zero-extending field index and field length
1836e8d8bef9SDimitry Andric     // respectively, their sum should never wrap around.
1837e8d8bef9SDimitry Andric     if (End > 64)
1838e8d8bef9SDimitry Andric       return UndefValue::get(II.getType());
1839e8d8bef9SDimitry Andric 
1840e8d8bef9SDimitry Andric     // If we are inserting whole bytes, we can convert this to a shuffle.
1841e8d8bef9SDimitry Andric     // Lowering can recognize EXTRQI shuffle masks.
1842e8d8bef9SDimitry Andric     if ((Length % 8) == 0 && (Index % 8) == 0) {
1843e8d8bef9SDimitry Andric       // Convert bit indices to byte indices.
1844e8d8bef9SDimitry Andric       Length /= 8;
1845e8d8bef9SDimitry Andric       Index /= 8;
1846e8d8bef9SDimitry Andric 
1847e8d8bef9SDimitry Andric       Type *IntTy8 = Type::getInt8Ty(II.getContext());
1848e8d8bef9SDimitry Andric       auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1849e8d8bef9SDimitry Andric 
1850e8d8bef9SDimitry Andric       SmallVector<int, 16> ShuffleMask;
1851e8d8bef9SDimitry Andric       for (int i = 0; i != (int)Length; ++i)
1852e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + Index);
1853e8d8bef9SDimitry Andric       for (int i = Length; i != 8; ++i)
1854e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + 16);
1855e8d8bef9SDimitry Andric       for (int i = 8; i != 16; ++i)
1856e8d8bef9SDimitry Andric         ShuffleMask.push_back(-1);
1857e8d8bef9SDimitry Andric 
1858e8d8bef9SDimitry Andric       Value *SV = Builder.CreateShuffleVector(
1859e8d8bef9SDimitry Andric           Builder.CreateBitCast(Op0, ShufTy),
1860e8d8bef9SDimitry Andric           ConstantAggregateZero::get(ShufTy), ShuffleMask);
1861e8d8bef9SDimitry Andric       return Builder.CreateBitCast(SV, II.getType());
1862e8d8bef9SDimitry Andric     }
1863e8d8bef9SDimitry Andric 
1864e8d8bef9SDimitry Andric     // Constant Fold - shift Index'th bit to lowest position and mask off
1865e8d8bef9SDimitry Andric     // Length bits.
1866e8d8bef9SDimitry Andric     if (CI0) {
1867e8d8bef9SDimitry Andric       APInt Elt = CI0->getValue();
1868e8d8bef9SDimitry Andric       Elt.lshrInPlace(Index);
1869e8d8bef9SDimitry Andric       Elt = Elt.zextOrTrunc(Length);
1870e8d8bef9SDimitry Andric       return LowConstantHighUndef(Elt.getZExtValue());
1871e8d8bef9SDimitry Andric     }
1872e8d8bef9SDimitry Andric 
1873e8d8bef9SDimitry Andric     // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
1874e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1875e8d8bef9SDimitry Andric       Value *Args[] = {Op0, CILength, CIIndex};
1876e8d8bef9SDimitry Andric       Module *M = II.getModule();
1877e8d8bef9SDimitry Andric       Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
1878e8d8bef9SDimitry Andric       return Builder.CreateCall(F, Args);
1879e8d8bef9SDimitry Andric     }
1880e8d8bef9SDimitry Andric   }
1881e8d8bef9SDimitry Andric 
1882e8d8bef9SDimitry Andric   // Constant Fold - extraction from zero is always {zero, undef}.
1883e8d8bef9SDimitry Andric   if (CI0 && CI0->isZero())
1884e8d8bef9SDimitry Andric     return LowConstantHighUndef(0);
1885e8d8bef9SDimitry Andric 
1886e8d8bef9SDimitry Andric   return nullptr;
1887e8d8bef9SDimitry Andric }
1888e8d8bef9SDimitry Andric 
1889e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
1890e8d8bef9SDimitry Andric /// folding or conversion to a shuffle vector.
1891e8d8bef9SDimitry Andric static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
1892e8d8bef9SDimitry Andric                                  APInt APLength, APInt APIndex,
1893e8d8bef9SDimitry Andric                                  InstCombiner::BuilderTy &Builder) {
1894e8d8bef9SDimitry Andric   // From AMD documentation: "The bit index and field length are each six bits
1895e8d8bef9SDimitry Andric   // in length other bits of the field are ignored."
1896e8d8bef9SDimitry Andric   APIndex = APIndex.zextOrTrunc(6);
1897e8d8bef9SDimitry Andric   APLength = APLength.zextOrTrunc(6);
1898e8d8bef9SDimitry Andric 
1899e8d8bef9SDimitry Andric   // Attempt to constant fold.
1900e8d8bef9SDimitry Andric   unsigned Index = APIndex.getZExtValue();
1901e8d8bef9SDimitry Andric 
1902e8d8bef9SDimitry Andric   // From AMD documentation: "a value of zero in the field length is
1903e8d8bef9SDimitry Andric   // defined as length of 64".
1904e8d8bef9SDimitry Andric   unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1905e8d8bef9SDimitry Andric 
1906e8d8bef9SDimitry Andric   // From AMD documentation: "If the sum of the bit index + length field
1907e8d8bef9SDimitry Andric   // is greater than 64, the results are undefined".
1908e8d8bef9SDimitry Andric   unsigned End = Index + Length;
1909e8d8bef9SDimitry Andric 
1910e8d8bef9SDimitry Andric   // Note that both field index and field length are 8-bit quantities.
1911e8d8bef9SDimitry Andric   // Since variables 'Index' and 'Length' are unsigned values
1912e8d8bef9SDimitry Andric   // obtained from zero-extending field index and field length
1913e8d8bef9SDimitry Andric   // respectively, their sum should never wrap around.
1914e8d8bef9SDimitry Andric   if (End > 64)
1915e8d8bef9SDimitry Andric     return UndefValue::get(II.getType());
1916e8d8bef9SDimitry Andric 
1917e8d8bef9SDimitry Andric   // If we are inserting whole bytes, we can convert this to a shuffle.
1918e8d8bef9SDimitry Andric   // Lowering can recognize INSERTQI shuffle masks.
1919e8d8bef9SDimitry Andric   if ((Length % 8) == 0 && (Index % 8) == 0) {
1920e8d8bef9SDimitry Andric     // Convert bit indices to byte indices.
1921e8d8bef9SDimitry Andric     Length /= 8;
1922e8d8bef9SDimitry Andric     Index /= 8;
1923e8d8bef9SDimitry Andric 
1924e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
1925e8d8bef9SDimitry Andric     auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1926e8d8bef9SDimitry Andric 
1927e8d8bef9SDimitry Andric     SmallVector<int, 16> ShuffleMask;
1928e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Index; ++i)
1929e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
1930e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Length; ++i)
1931e8d8bef9SDimitry Andric       ShuffleMask.push_back(i + 16);
1932e8d8bef9SDimitry Andric     for (int i = Index + Length; i != 8; ++i)
1933e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
1934e8d8bef9SDimitry Andric     for (int i = 8; i != 16; ++i)
1935e8d8bef9SDimitry Andric       ShuffleMask.push_back(-1);
1936e8d8bef9SDimitry Andric 
1937e8d8bef9SDimitry Andric     Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
1938e8d8bef9SDimitry Andric                                             Builder.CreateBitCast(Op1, ShufTy),
1939e8d8bef9SDimitry Andric                                             ShuffleMask);
1940e8d8bef9SDimitry Andric     return Builder.CreateBitCast(SV, II.getType());
1941e8d8bef9SDimitry Andric   }
1942e8d8bef9SDimitry Andric 
1943e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
1944fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
1945fe6060f1SDimitry Andric   auto *C1 = dyn_cast<Constant>(Op1);
1946fe6060f1SDimitry Andric   auto *CI00 =
1947e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1948e8d8bef9SDimitry Andric          : nullptr;
1949fe6060f1SDimitry Andric   auto *CI10 =
1950e8d8bef9SDimitry Andric       C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
1951e8d8bef9SDimitry Andric          : nullptr;
1952e8d8bef9SDimitry Andric 
1953e8d8bef9SDimitry Andric   // Constant Fold - insert bottom Length bits starting at the Index'th bit.
1954e8d8bef9SDimitry Andric   if (CI00 && CI10) {
1955e8d8bef9SDimitry Andric     APInt V00 = CI00->getValue();
1956e8d8bef9SDimitry Andric     APInt V10 = CI10->getValue();
1957e8d8bef9SDimitry Andric     APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
1958e8d8bef9SDimitry Andric     V00 = V00 & ~Mask;
1959e8d8bef9SDimitry Andric     V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
1960e8d8bef9SDimitry Andric     APInt Val = V00 | V10;
1961e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
1962e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
1963e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
1964e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
1965e8d8bef9SDimitry Andric   }
1966e8d8bef9SDimitry Andric 
1967e8d8bef9SDimitry Andric   // If we were an INSERTQ call, we'll save demanded elements if we convert to
1968e8d8bef9SDimitry Andric   // INSERTQI.
1969e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1970e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
1971e8d8bef9SDimitry Andric     Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1972e8d8bef9SDimitry Andric     Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1973e8d8bef9SDimitry Andric 
1974e8d8bef9SDimitry Andric     Value *Args[] = {Op0, Op1, CILength, CIIndex};
1975e8d8bef9SDimitry Andric     Module *M = II.getModule();
1976e8d8bef9SDimitry Andric     Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
1977e8d8bef9SDimitry Andric     return Builder.CreateCall(F, Args);
1978e8d8bef9SDimitry Andric   }
1979e8d8bef9SDimitry Andric 
1980e8d8bef9SDimitry Andric   return nullptr;
1981e8d8bef9SDimitry Andric }
1982e8d8bef9SDimitry Andric 
1983e8d8bef9SDimitry Andric /// Attempt to convert pshufb* to shufflevector if the mask is constant.
1984e8d8bef9SDimitry Andric static Value *simplifyX86pshufb(const IntrinsicInst &II,
1985e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
1986fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1987e8d8bef9SDimitry Andric   if (!V)
1988e8d8bef9SDimitry Andric     return nullptr;
1989e8d8bef9SDimitry Andric 
1990e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1991e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
1992e8d8bef9SDimitry Andric   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1993e8d8bef9SDimitry Andric          "Unexpected number of elements in shuffle mask!");
1994e8d8bef9SDimitry Andric 
1995e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
1996e8d8bef9SDimitry Andric   int Indexes[64];
1997e8d8bef9SDimitry Andric 
1998e8d8bef9SDimitry Andric   // Each byte in the shuffle control mask forms an index to permute the
1999e8d8bef9SDimitry Andric   // corresponding byte in the destination operand.
2000e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
2001e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
2002e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2003e8d8bef9SDimitry Andric       return nullptr;
2004e8d8bef9SDimitry Andric 
2005e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
2006e8d8bef9SDimitry Andric       Indexes[I] = -1;
2007e8d8bef9SDimitry Andric       continue;
2008e8d8bef9SDimitry Andric     }
2009e8d8bef9SDimitry Andric 
2010e8d8bef9SDimitry Andric     int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
2011e8d8bef9SDimitry Andric 
2012e8d8bef9SDimitry Andric     // If the most significant bit (bit[7]) of each byte of the shuffle
2013e8d8bef9SDimitry Andric     // control mask is set, then zero is written in the result byte.
2014e8d8bef9SDimitry Andric     // The zero vector is in the right-hand side of the resulting
2015e8d8bef9SDimitry Andric     // shufflevector.
2016e8d8bef9SDimitry Andric 
2017e8d8bef9SDimitry Andric     // The value of each index for the high 128-bit lane is the least
2018e8d8bef9SDimitry Andric     // significant 4 bits of the respective shuffle control byte.
2019e8d8bef9SDimitry Andric     Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
2020e8d8bef9SDimitry Andric     Indexes[I] = Index;
2021e8d8bef9SDimitry Andric   }
2022e8d8bef9SDimitry Andric 
2023e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
2024e8d8bef9SDimitry Andric   auto V2 = Constant::getNullValue(VecTy);
2025bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
2026e8d8bef9SDimitry Andric }
2027e8d8bef9SDimitry Andric 
2028e8d8bef9SDimitry Andric /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
2029e8d8bef9SDimitry Andric static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
2030e8d8bef9SDimitry Andric                                     InstCombiner::BuilderTy &Builder) {
2031fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
2032e8d8bef9SDimitry Andric   if (!V)
2033e8d8bef9SDimitry Andric     return nullptr;
2034e8d8bef9SDimitry Andric 
2035e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
2036e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
2037e8d8bef9SDimitry Andric   bool IsPD = VecTy->getScalarType()->isDoubleTy();
2038e8d8bef9SDimitry Andric   unsigned NumLaneElts = IsPD ? 2 : 4;
2039e8d8bef9SDimitry Andric   assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
2040e8d8bef9SDimitry Andric 
2041e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
2042e8d8bef9SDimitry Andric   int Indexes[16];
2043e8d8bef9SDimitry Andric 
2044e8d8bef9SDimitry Andric   // The intrinsics only read one or two bits, clear the rest.
2045e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
2046e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
2047e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2048e8d8bef9SDimitry Andric       return nullptr;
2049e8d8bef9SDimitry Andric 
2050e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
2051e8d8bef9SDimitry Andric       Indexes[I] = -1;
2052e8d8bef9SDimitry Andric       continue;
2053e8d8bef9SDimitry Andric     }
2054e8d8bef9SDimitry Andric 
2055e8d8bef9SDimitry Andric     APInt Index = cast<ConstantInt>(COp)->getValue();
2056e8d8bef9SDimitry Andric     Index = Index.zextOrTrunc(32).getLoBits(2);
2057e8d8bef9SDimitry Andric 
2058e8d8bef9SDimitry Andric     // The PD variants uses bit 1 to select per-lane element index, so
2059e8d8bef9SDimitry Andric     // shift down to convert to generic shuffle mask index.
2060e8d8bef9SDimitry Andric     if (IsPD)
2061e8d8bef9SDimitry Andric       Index.lshrInPlace(1);
2062e8d8bef9SDimitry Andric 
2063e8d8bef9SDimitry Andric     // The _256 variants are a bit trickier since the mask bits always index
2064e8d8bef9SDimitry Andric     // into the corresponding 128 half. In order to convert to a generic
2065e8d8bef9SDimitry Andric     // shuffle, we have to make that explicit.
2066e8d8bef9SDimitry Andric     Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
2067e8d8bef9SDimitry Andric 
2068e8d8bef9SDimitry Andric     Indexes[I] = Index.getZExtValue();
2069e8d8bef9SDimitry Andric   }
2070e8d8bef9SDimitry Andric 
2071e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
2072bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts));
2073e8d8bef9SDimitry Andric }
2074e8d8bef9SDimitry Andric 
2075e8d8bef9SDimitry Andric /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
2076e8d8bef9SDimitry Andric static Value *simplifyX86vpermv(const IntrinsicInst &II,
2077e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
2078e8d8bef9SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
2079e8d8bef9SDimitry Andric   if (!V)
2080e8d8bef9SDimitry Andric     return nullptr;
2081e8d8bef9SDimitry Andric 
2082e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
2083e8d8bef9SDimitry Andric   unsigned Size = VecTy->getNumElements();
2084e8d8bef9SDimitry Andric   assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
2085e8d8bef9SDimitry Andric          "Unexpected shuffle mask size");
2086e8d8bef9SDimitry Andric 
2087e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
2088e8d8bef9SDimitry Andric   int Indexes[64];
2089e8d8bef9SDimitry Andric 
2090e8d8bef9SDimitry Andric   for (unsigned I = 0; I < Size; ++I) {
2091e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
2092e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2093e8d8bef9SDimitry Andric       return nullptr;
2094e8d8bef9SDimitry Andric 
2095e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
2096e8d8bef9SDimitry Andric       Indexes[I] = -1;
2097e8d8bef9SDimitry Andric       continue;
2098e8d8bef9SDimitry Andric     }
2099e8d8bef9SDimitry Andric 
2100e8d8bef9SDimitry Andric     uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2101e8d8bef9SDimitry Andric     Index &= Size - 1;
2102e8d8bef9SDimitry Andric     Indexes[I] = Index;
2103e8d8bef9SDimitry Andric   }
2104e8d8bef9SDimitry Andric 
2105e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
2106bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size));
2107e8d8bef9SDimitry Andric }
2108e8d8bef9SDimitry Andric 
2109*0fca6ea1SDimitry Andric /// Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant.
2110*0fca6ea1SDimitry Andric static Value *simplifyX86vpermv3(const IntrinsicInst &II,
2111*0fca6ea1SDimitry Andric                                  InstCombiner::BuilderTy &Builder) {
2112*0fca6ea1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
2113*0fca6ea1SDimitry Andric   if (!V)
2114*0fca6ea1SDimitry Andric     return nullptr;
2115*0fca6ea1SDimitry Andric 
2116*0fca6ea1SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
2117*0fca6ea1SDimitry Andric   unsigned Size = VecTy->getNumElements();
2118*0fca6ea1SDimitry Andric   assert((Size == 2 || Size == 4 || Size == 8 || Size == 16 || Size == 32 ||
2119*0fca6ea1SDimitry Andric           Size == 64) &&
2120*0fca6ea1SDimitry Andric          "Unexpected shuffle mask size");
2121*0fca6ea1SDimitry Andric 
2122*0fca6ea1SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
2123*0fca6ea1SDimitry Andric   int Indexes[64];
2124*0fca6ea1SDimitry Andric 
2125*0fca6ea1SDimitry Andric   for (unsigned I = 0; I < Size; ++I) {
2126*0fca6ea1SDimitry Andric     Constant *COp = V->getAggregateElement(I);
2127*0fca6ea1SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
2128*0fca6ea1SDimitry Andric       return nullptr;
2129*0fca6ea1SDimitry Andric 
2130*0fca6ea1SDimitry Andric     if (isa<UndefValue>(COp)) {
2131*0fca6ea1SDimitry Andric       Indexes[I] = -1;
2132*0fca6ea1SDimitry Andric       continue;
2133*0fca6ea1SDimitry Andric     }
2134*0fca6ea1SDimitry Andric 
2135*0fca6ea1SDimitry Andric     uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
2136*0fca6ea1SDimitry Andric     Index &= (2 * Size) - 1;
2137*0fca6ea1SDimitry Andric     Indexes[I] = Index;
2138*0fca6ea1SDimitry Andric   }
2139*0fca6ea1SDimitry Andric 
2140*0fca6ea1SDimitry Andric   auto V1 = II.getArgOperand(0);
2141*0fca6ea1SDimitry Andric   auto V2 = II.getArgOperand(2);
2142*0fca6ea1SDimitry Andric   return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size));
2143*0fca6ea1SDimitry Andric }
2144*0fca6ea1SDimitry Andric 
2145bdd1243dSDimitry Andric std::optional<Instruction *>
2146e8d8bef9SDimitry Andric X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
2147e8d8bef9SDimitry Andric   auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
2148e8d8bef9SDimitry Andric                                              unsigned DemandedWidth) {
2149e8d8bef9SDimitry Andric     APInt UndefElts(Width, 0);
2150e8d8bef9SDimitry Andric     APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
2151e8d8bef9SDimitry Andric     return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
2152e8d8bef9SDimitry Andric   };
2153e8d8bef9SDimitry Andric 
2154e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
2155e8d8bef9SDimitry Andric   switch (IID) {
2156e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_32:
2157e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_64:
2158e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u32:
2159e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u64:
2160e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
2161e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2162e8d8bef9SDimitry Andric       uint64_t Shift = C->getZExtValue();
2163e8d8bef9SDimitry Andric       uint64_t Length = (Shift >> 8) & 0xff;
2164e8d8bef9SDimitry Andric       Shift &= 0xff;
2165e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
2166e8d8bef9SDimitry Andric       // If the length is 0 or the shift is out of range, replace with zero.
2167e8d8bef9SDimitry Andric       if (Length == 0 || Shift >= BitWidth) {
2168e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2169e8d8bef9SDimitry Andric       }
2170e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
2171e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2172e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue() >> Shift;
2173e8d8bef9SDimitry Andric         if (Length > BitWidth)
2174e8d8bef9SDimitry Andric           Length = BitWidth;
2175e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Length);
2176e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2177e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2178e8d8bef9SDimitry Andric       }
2179e8d8bef9SDimitry Andric       // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2180e8d8bef9SDimitry Andric       // are only masking bits that a shift already cleared?
2181e8d8bef9SDimitry Andric     }
2182e8d8bef9SDimitry Andric     break;
2183e8d8bef9SDimitry Andric 
2184e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_32:
2185e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_64:
2186e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
2187e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2188e8d8bef9SDimitry Andric       uint64_t Index = C->getZExtValue() & 0xff;
2189e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
2190e8d8bef9SDimitry Andric       if (Index >= BitWidth) {
2191e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2192e8d8bef9SDimitry Andric       }
2193e8d8bef9SDimitry Andric       if (Index == 0) {
2194e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2195e8d8bef9SDimitry Andric       }
2196e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
2197e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2198e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue();
2199e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Index);
2200e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2201e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2202e8d8bef9SDimitry Andric       }
2203e8d8bef9SDimitry Andric       // TODO should we convert this to an AND if the RHS is constant?
2204e8d8bef9SDimitry Andric     }
2205e8d8bef9SDimitry Andric     break;
2206e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_32:
2207e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_64:
2208e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2209e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
2210e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2211e8d8bef9SDimitry Andric       }
2212e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
2213e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2214e8d8bef9SDimitry Andric       }
2215e8d8bef9SDimitry Andric 
221681ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
221781ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2218e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
2219e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
2220e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
2221e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
2222e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
222381ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
222481ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt);
2225e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Shifted);
2226e8d8bef9SDimitry Andric       }
2227e8d8bef9SDimitry Andric 
2228e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2229e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
2230e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
2231e8d8bef9SDimitry Andric         uint64_t Result = 0;
2232e8d8bef9SDimitry Andric         uint64_t BitToSet = 1;
2233e8d8bef9SDimitry Andric 
2234e8d8bef9SDimitry Andric         while (Mask) {
2235e8d8bef9SDimitry Andric           // Isolate lowest set bit.
2236e8d8bef9SDimitry Andric           uint64_t BitToTest = Mask & -Mask;
2237e8d8bef9SDimitry Andric           if (BitToTest & Src)
2238e8d8bef9SDimitry Andric             Result |= BitToSet;
2239e8d8bef9SDimitry Andric 
2240e8d8bef9SDimitry Andric           BitToSet <<= 1;
2241e8d8bef9SDimitry Andric           // Clear lowest set bit.
2242e8d8bef9SDimitry Andric           Mask &= Mask - 1;
2243e8d8bef9SDimitry Andric         }
2244e8d8bef9SDimitry Andric 
2245e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2246e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2247e8d8bef9SDimitry Andric       }
2248e8d8bef9SDimitry Andric     }
2249e8d8bef9SDimitry Andric     break;
2250e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_32:
2251e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_64:
2252e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2253e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
2254e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2255e8d8bef9SDimitry Andric       }
2256e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
2257e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2258e8d8bef9SDimitry Andric       }
225981ad6265SDimitry Andric 
226081ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
226181ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2262e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
2263e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
2264e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
2265e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
226681ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
226781ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt);
2268e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
2269e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Masked);
2270e8d8bef9SDimitry Andric       }
2271e8d8bef9SDimitry Andric 
2272e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2273e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
2274e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
2275e8d8bef9SDimitry Andric         uint64_t Result = 0;
2276e8d8bef9SDimitry Andric         uint64_t BitToTest = 1;
2277e8d8bef9SDimitry Andric 
2278e8d8bef9SDimitry Andric         while (Mask) {
2279e8d8bef9SDimitry Andric           // Isolate lowest set bit.
2280e8d8bef9SDimitry Andric           uint64_t BitToSet = Mask & -Mask;
2281e8d8bef9SDimitry Andric           if (BitToTest & Src)
2282e8d8bef9SDimitry Andric             Result |= BitToSet;
2283e8d8bef9SDimitry Andric 
2284e8d8bef9SDimitry Andric           BitToTest <<= 1;
2285e8d8bef9SDimitry Andric           // Clear lowest set bit;
2286e8d8bef9SDimitry Andric           Mask &= Mask - 1;
2287e8d8bef9SDimitry Andric         }
2288e8d8bef9SDimitry Andric 
2289e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2290e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2291e8d8bef9SDimitry Andric       }
2292e8d8bef9SDimitry Andric     }
2293e8d8bef9SDimitry Andric     break;
2294e8d8bef9SDimitry Andric 
2295e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si:
2296e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si64:
2297e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si:
2298e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si64:
2299e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si:
2300e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si64:
2301e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si:
2302e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si64:
2303e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si32:
2304e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si64:
2305e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi32:
2306e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi64:
2307e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si32:
2308e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si64:
2309e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi32:
2310e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi64:
2311e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si:
2312e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si64:
2313e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi:
2314e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi64:
2315e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si:
2316e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si64:
2317e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi:
2318e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi64: {
2319e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
2320e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
2321e8d8bef9SDimitry Andric     Value *Arg = II.getArgOperand(0);
2322e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements();
2323e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2324e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2325e8d8bef9SDimitry Andric     }
2326e8d8bef9SDimitry Andric     break;
2327e8d8bef9SDimitry Andric   }
2328e8d8bef9SDimitry Andric 
2329e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
2330e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
2331e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
2332e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
2333e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
2334e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
2335e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb:
2336e8d8bef9SDimitry Andric     if (Value *V = simplifyX86movmsk(II, IC.Builder)) {
2337e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2338e8d8bef9SDimitry Andric     }
2339e8d8bef9SDimitry Andric     break;
2340e8d8bef9SDimitry Andric 
2341e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comieq_ss:
2342e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comige_ss:
2343e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comigt_ss:
2344e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comile_ss:
2345e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comilt_ss:
2346e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comineq_ss:
2347e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomieq_ss:
2348e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomige_ss:
2349e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomigt_ss:
2350e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomile_ss:
2351e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomilt_ss:
2352e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomineq_ss:
2353e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comieq_sd:
2354e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comige_sd:
2355e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comigt_sd:
2356e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comile_sd:
2357e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comilt_sd:
2358e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comineq_sd:
2359e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomieq_sd:
2360e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomige_sd:
2361e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomigt_sd:
2362e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomile_sd:
2363e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomilt_sd:
2364e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomineq_sd:
2365e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_ss:
2366e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_sd:
2367e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_ss:
2368e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_sd: {
2369e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
2370e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
2371e8d8bef9SDimitry Andric     bool MadeChange = false;
2372e8d8bef9SDimitry Andric     Value *Arg0 = II.getArgOperand(0);
2373e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
2374e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
2375e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2376e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2377e8d8bef9SDimitry Andric       MadeChange = true;
2378e8d8bef9SDimitry Andric     }
2379e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2380e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2381e8d8bef9SDimitry Andric       MadeChange = true;
2382e8d8bef9SDimitry Andric     }
2383e8d8bef9SDimitry Andric     if (MadeChange) {
2384e8d8bef9SDimitry Andric       return &II;
2385e8d8bef9SDimitry Andric     }
2386e8d8bef9SDimitry Andric     break;
2387e8d8bef9SDimitry Andric   }
2388e8d8bef9SDimitry Andric 
2389e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_ps_512:
2390e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_ps_512:
2391e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_ps_512:
2392e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_ps_512:
2393e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_pd_512:
2394e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_pd_512:
2395e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_pd_512:
2396e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_pd_512:
2397e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2398e8d8bef9SDimitry Andric     // IR operations.
2399e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2400e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
2401e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
2402e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
2403e8d8bef9SDimitry Andric 
2404e8d8bef9SDimitry Andric         Value *V;
2405e8d8bef9SDimitry Andric         switch (IID) {
2406e8d8bef9SDimitry Andric         default:
2407e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
2408e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_ps_512:
2409e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_pd_512:
2410e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(Arg0, Arg1);
2411e8d8bef9SDimitry Andric           break;
2412e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_ps_512:
2413e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_pd_512:
2414e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(Arg0, Arg1);
2415e8d8bef9SDimitry Andric           break;
2416e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_ps_512:
2417e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_pd_512:
2418e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(Arg0, Arg1);
2419e8d8bef9SDimitry Andric           break;
2420e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_ps_512:
2421e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_pd_512:
2422e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(Arg0, Arg1);
2423e8d8bef9SDimitry Andric           break;
2424e8d8bef9SDimitry Andric         }
2425e8d8bef9SDimitry Andric 
2426e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2427e8d8bef9SDimitry Andric       }
2428e8d8bef9SDimitry Andric     }
2429e8d8bef9SDimitry Andric     break;
2430e8d8bef9SDimitry Andric 
2431e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
2432e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
2433e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
2434e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
2435e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
2436e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
2437e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
2438e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
2439e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2440e8d8bef9SDimitry Andric     // IR operations.
2441e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) {
2442e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
2443e8d8bef9SDimitry Andric         // Extract the element as scalars.
2444e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
2445e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
2446e8d8bef9SDimitry Andric         Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
2447e8d8bef9SDimitry Andric         Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
2448e8d8bef9SDimitry Andric 
2449e8d8bef9SDimitry Andric         Value *V;
2450e8d8bef9SDimitry Andric         switch (IID) {
2451e8d8bef9SDimitry Andric         default:
2452e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
2453e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_ss_round:
2454e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_sd_round:
2455e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(LHS, RHS);
2456e8d8bef9SDimitry Andric           break;
2457e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_ss_round:
2458e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_sd_round:
2459e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(LHS, RHS);
2460e8d8bef9SDimitry Andric           break;
2461e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_ss_round:
2462e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_sd_round:
2463e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(LHS, RHS);
2464e8d8bef9SDimitry Andric           break;
2465e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_ss_round:
2466e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_sd_round:
2467e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(LHS, RHS);
2468e8d8bef9SDimitry Andric           break;
2469e8d8bef9SDimitry Andric         }
2470e8d8bef9SDimitry Andric 
2471e8d8bef9SDimitry Andric         // Handle the masking aspect of the intrinsic.
2472e8d8bef9SDimitry Andric         Value *Mask = II.getArgOperand(3);
2473e8d8bef9SDimitry Andric         auto *C = dyn_cast<ConstantInt>(Mask);
2474e8d8bef9SDimitry Andric         // We don't need a select if we know the mask bit is a 1.
2475e8d8bef9SDimitry Andric         if (!C || !C->getValue()[0]) {
2476e8d8bef9SDimitry Andric           // Cast the mask to an i1 vector and then extract the lowest element.
2477e8d8bef9SDimitry Andric           auto *MaskTy = FixedVectorType::get(
2478e8d8bef9SDimitry Andric               IC.Builder.getInt1Ty(),
2479e8d8bef9SDimitry Andric               cast<IntegerType>(Mask->getType())->getBitWidth());
2480e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateBitCast(Mask, MaskTy);
2481e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0);
2482e8d8bef9SDimitry Andric           // Extract the lowest element from the passthru operand.
2483e8d8bef9SDimitry Andric           Value *Passthru =
2484e8d8bef9SDimitry Andric               IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0);
2485e8d8bef9SDimitry Andric           V = IC.Builder.CreateSelect(Mask, V, Passthru);
2486e8d8bef9SDimitry Andric         }
2487e8d8bef9SDimitry Andric 
2488e8d8bef9SDimitry Andric         // Insert the result back into the original argument 0.
2489e8d8bef9SDimitry Andric         V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2490e8d8bef9SDimitry Andric 
2491e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2492e8d8bef9SDimitry Andric       }
2493e8d8bef9SDimitry Andric     }
2494e8d8bef9SDimitry Andric     break;
2495e8d8bef9SDimitry Andric 
2496e8d8bef9SDimitry Andric   // Constant fold ashr( <A x Bi>, Ci ).
2497e8d8bef9SDimitry Andric   // Constant fold lshr( <A x Bi>, Ci ).
2498e8d8bef9SDimitry Andric   // Constant fold shl( <A x Bi>, Ci ).
2499e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
2500e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
2501e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
2502e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
2503e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
2504e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
2505e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
2506e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
2507e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
2508e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
2509e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
2510e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
2511e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
2512e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
2513e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
2514e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
2515e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
2516e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
2517e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
2518e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
2519e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
2520e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
2521e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
2522e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
2523e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
2524e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
2525e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
2526e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2527e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2528e8d8bef9SDimitry Andric     }
2529e8d8bef9SDimitry Andric     break;
2530e8d8bef9SDimitry Andric 
2531e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
2532e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
2533e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
2534e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
2535e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
2536e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
2537e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
2538e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
2539e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
2540e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
2541e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
2542e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
2543e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
2544e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
2545e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
2546e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
2547e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
2548e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
2549e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
2550e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
2551e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
2552e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
2553e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
2554e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
2555e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
2556e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
2557e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512: {
2558e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2559e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2560e8d8bef9SDimitry Andric     }
2561e8d8bef9SDimitry Andric 
2562e8d8bef9SDimitry Andric     // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2563e8d8bef9SDimitry Andric     // operand to compute the shift amount.
2564e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
2565e8d8bef9SDimitry Andric     assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2566e8d8bef9SDimitry Andric            "Unexpected packed shift size");
2567e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements();
2568e8d8bef9SDimitry Andric 
2569e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2570e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 1, V);
2571e8d8bef9SDimitry Andric     }
2572e8d8bef9SDimitry Andric     break;
2573e8d8bef9SDimitry Andric   }
2574e8d8bef9SDimitry Andric 
2575e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
2576e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
2577e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
2578e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
2579e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
2580e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
2581e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
2582e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
2583e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
2584e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
2585e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
2586e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
2587e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
2588e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
2589e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
2590e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
2591e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
2592e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
2593e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
2594e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
2595e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
2596e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
2597e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
2598e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
2599e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
2600e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
2601e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
2602e8d8bef9SDimitry Andric     if (Value *V = simplifyX86varShift(II, IC.Builder)) {
2603e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2604e8d8bef9SDimitry Andric     }
2605e8d8bef9SDimitry Andric     break;
2606e8d8bef9SDimitry Andric 
2607e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
2608e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
2609e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
2610e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
2611e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
2612e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
2613e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, true)) {
2614e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2615e8d8bef9SDimitry Andric     }
2616e8d8bef9SDimitry Andric     break;
2617e8d8bef9SDimitry Andric 
2618e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
2619e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
2620e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
2621e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
2622e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
2623e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512:
2624e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, false)) {
2625e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2626e8d8bef9SDimitry Andric     }
2627e8d8bef9SDimitry Andric     break;
2628e8d8bef9SDimitry Andric 
2629*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmulh_w:
2630*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmulh_w:
2631*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmulh_w_512:
2632*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86pmulh(II, IC.Builder, true, false)) {
2633*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2634*0fca6ea1SDimitry Andric     }
2635*0fca6ea1SDimitry Andric     break;
2636*0fca6ea1SDimitry Andric 
2637*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmulhu_w:
2638*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmulhu_w:
2639*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmulhu_w_512:
2640*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86pmulh(II, IC.Builder, false, false)) {
2641*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2642*0fca6ea1SDimitry Andric     }
2643*0fca6ea1SDimitry Andric     break;
2644*0fca6ea1SDimitry Andric 
2645*0fca6ea1SDimitry Andric   case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2646*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmul_hr_sw:
2647*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmul_hr_sw_512:
2648*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86pmulh(II, IC.Builder, true, true)) {
2649*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2650*0fca6ea1SDimitry Andric     }
2651*0fca6ea1SDimitry Andric     break;
2652*0fca6ea1SDimitry Andric 
2653*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmadd_wd:
2654*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmadd_wd:
2655*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmaddw_d_512:
2656*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86pmadd(II, IC.Builder, true)) {
2657*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2658*0fca6ea1SDimitry Andric     }
2659*0fca6ea1SDimitry Andric     break;
2660*0fca6ea1SDimitry Andric 
2661*0fca6ea1SDimitry Andric   case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2662*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmadd_ub_sw:
2663*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmaddubs_w_512:
2664*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86pmadd(II, IC.Builder, false)) {
2665*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2666*0fca6ea1SDimitry Andric     }
2667*0fca6ea1SDimitry Andric     break;
2668*0fca6ea1SDimitry Andric 
2669e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq:
2670e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_256:
2671e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_512: {
2672e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2673e8d8bef9SDimitry Andric       unsigned Imm = C->getZExtValue();
2674e8d8bef9SDimitry Andric 
2675e8d8bef9SDimitry Andric       bool MadeChange = false;
2676e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0);
2677e8d8bef9SDimitry Andric       Value *Arg1 = II.getArgOperand(1);
2678e8d8bef9SDimitry Andric       unsigned VWidth =
2679e8d8bef9SDimitry Andric           cast<FixedVectorType>(Arg0->getType())->getNumElements();
2680e8d8bef9SDimitry Andric 
2681e8d8bef9SDimitry Andric       APInt UndefElts1(VWidth, 0);
2682e8d8bef9SDimitry Andric       APInt DemandedElts1 =
2683e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1));
2684e8d8bef9SDimitry Andric       if (Value *V =
2685e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) {
2686e8d8bef9SDimitry Andric         IC.replaceOperand(II, 0, V);
2687e8d8bef9SDimitry Andric         MadeChange = true;
2688e8d8bef9SDimitry Andric       }
2689e8d8bef9SDimitry Andric 
2690e8d8bef9SDimitry Andric       APInt UndefElts2(VWidth, 0);
2691e8d8bef9SDimitry Andric       APInt DemandedElts2 =
2692e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1));
2693e8d8bef9SDimitry Andric       if (Value *V =
2694e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) {
2695e8d8bef9SDimitry Andric         IC.replaceOperand(II, 1, V);
2696e8d8bef9SDimitry Andric         MadeChange = true;
2697e8d8bef9SDimitry Andric       }
2698e8d8bef9SDimitry Andric 
2699e8d8bef9SDimitry Andric       // If either input elements are undef, the result is zero.
2700e8d8bef9SDimitry Andric       if (DemandedElts1.isSubsetOf(UndefElts1) ||
2701e8d8bef9SDimitry Andric           DemandedElts2.isSubsetOf(UndefElts2)) {
2702e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2703e8d8bef9SDimitry Andric                                       ConstantAggregateZero::get(II.getType()));
2704e8d8bef9SDimitry Andric       }
2705e8d8bef9SDimitry Andric 
2706e8d8bef9SDimitry Andric       if (MadeChange) {
2707e8d8bef9SDimitry Andric         return &II;
2708e8d8bef9SDimitry Andric       }
2709e8d8bef9SDimitry Andric     }
2710e8d8bef9SDimitry Andric     break;
2711e8d8bef9SDimitry Andric   }
2712e8d8bef9SDimitry Andric 
2713e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_insertps:
2714e8d8bef9SDimitry Andric     if (Value *V = simplifyX86insertps(II, IC.Builder)) {
2715e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2716e8d8bef9SDimitry Andric     }
2717e8d8bef9SDimitry Andric     break;
2718e8d8bef9SDimitry Andric 
2719e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq: {
2720e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2721e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2722e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2723e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2724e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2725e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2726e8d8bef9SDimitry Andric            VWidth1 == 16 && "Unexpected operand sizes");
2727e8d8bef9SDimitry Andric 
2728e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2729fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
2730fe6060f1SDimitry Andric     auto *CILength =
2731e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2732e8d8bef9SDimitry Andric            : nullptr;
2733fe6060f1SDimitry Andric     auto *CIIndex =
2734e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2735e8d8bef9SDimitry Andric            : nullptr;
2736e8d8bef9SDimitry Andric 
2737e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2738e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2739e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2740e8d8bef9SDimitry Andric     }
2741e8d8bef9SDimitry Andric 
2742e8d8bef9SDimitry Andric     // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2743e8d8bef9SDimitry Andric     // operands and the lowest 16-bits of the second.
2744e8d8bef9SDimitry Andric     bool MadeChange = false;
2745e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2746e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2747e8d8bef9SDimitry Andric       MadeChange = true;
2748e8d8bef9SDimitry Andric     }
2749e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2750e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2751e8d8bef9SDimitry Andric       MadeChange = true;
2752e8d8bef9SDimitry Andric     }
2753e8d8bef9SDimitry Andric     if (MadeChange) {
2754e8d8bef9SDimitry Andric       return &II;
2755e8d8bef9SDimitry Andric     }
2756e8d8bef9SDimitry Andric     break;
2757e8d8bef9SDimitry Andric   }
2758e8d8bef9SDimitry Andric 
2759e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi: {
2760e8d8bef9SDimitry Andric     // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2761e8d8bef9SDimitry Andric     // bits of the lower 64-bits. The upper 64-bits are undefined.
2762e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2763e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2764e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2765e8d8bef9SDimitry Andric            "Unexpected operand size");
2766e8d8bef9SDimitry Andric 
2767e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2768fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1));
2769fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2));
2770e8d8bef9SDimitry Andric 
2771e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
2772e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2773e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2774e8d8bef9SDimitry Andric     }
2775e8d8bef9SDimitry Andric 
2776e8d8bef9SDimitry Andric     // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2777e8d8bef9SDimitry Andric     // operand.
2778e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2779e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2780e8d8bef9SDimitry Andric     }
2781e8d8bef9SDimitry Andric     break;
2782e8d8bef9SDimitry Andric   }
2783e8d8bef9SDimitry Andric 
2784e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq: {
2785e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2786e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2787e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2788e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2789e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2790e8d8bef9SDimitry Andric            cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 &&
2791e8d8bef9SDimitry Andric            "Unexpected operand size");
2792e8d8bef9SDimitry Andric 
2793e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2794fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
2795fe6060f1SDimitry Andric     auto *CI11 =
2796e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2797e8d8bef9SDimitry Andric            : nullptr;
2798e8d8bef9SDimitry Andric 
2799e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2800e8d8bef9SDimitry Andric     if (CI11) {
2801e8d8bef9SDimitry Andric       const APInt &V11 = CI11->getValue();
2802e8d8bef9SDimitry Andric       APInt Len = V11.zextOrTrunc(6);
2803e8d8bef9SDimitry Andric       APInt Idx = V11.lshr(8).zextOrTrunc(6);
2804e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2805e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2806e8d8bef9SDimitry Andric       }
2807e8d8bef9SDimitry Andric     }
2808e8d8bef9SDimitry Andric 
2809e8d8bef9SDimitry Andric     // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2810e8d8bef9SDimitry Andric     // operand.
2811e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2812e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2813e8d8bef9SDimitry Andric     }
2814e8d8bef9SDimitry Andric     break;
2815e8d8bef9SDimitry Andric   }
2816e8d8bef9SDimitry Andric 
2817e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi: {
2818e8d8bef9SDimitry Andric     // INSERTQI: Extract lowest Length bits from lower half of second source and
2819e8d8bef9SDimitry Andric     // insert over first source starting at Index bit. The upper 64-bits are
2820e8d8bef9SDimitry Andric     // undefined.
2821e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2822e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2823e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2824e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2825e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2826e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2827e8d8bef9SDimitry Andric            VWidth1 == 2 && "Unexpected operand sizes");
2828e8d8bef9SDimitry Andric 
2829e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2830fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2));
2831fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3));
2832e8d8bef9SDimitry Andric 
2833e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
2834e8d8bef9SDimitry Andric     if (CILength && CIIndex) {
2835e8d8bef9SDimitry Andric       APInt Len = CILength->getValue().zextOrTrunc(6);
2836e8d8bef9SDimitry Andric       APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2837e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2838e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2839e8d8bef9SDimitry Andric       }
2840e8d8bef9SDimitry Andric     }
2841e8d8bef9SDimitry Andric 
2842e8d8bef9SDimitry Andric     // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2843e8d8bef9SDimitry Andric     // operands.
2844e8d8bef9SDimitry Andric     bool MadeChange = false;
2845e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2846e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2847e8d8bef9SDimitry Andric       MadeChange = true;
2848e8d8bef9SDimitry Andric     }
2849e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2850e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2851e8d8bef9SDimitry Andric       MadeChange = true;
2852e8d8bef9SDimitry Andric     }
2853e8d8bef9SDimitry Andric     if (MadeChange) {
2854e8d8bef9SDimitry Andric       return &II;
2855e8d8bef9SDimitry Andric     }
2856e8d8bef9SDimitry Andric     break;
2857e8d8bef9SDimitry Andric   }
2858e8d8bef9SDimitry Andric 
2859e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_pblendvb:
2860e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvps:
2861e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvpd:
2862e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_ps_256:
2863e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_pd_256:
2864e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pblendvb: {
2865e8d8bef9SDimitry Andric     // fold (blend A, A, Mask) -> A
2866e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2867e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2868e8d8bef9SDimitry Andric     Value *Mask = II.getArgOperand(2);
2869e8d8bef9SDimitry Andric     if (Op0 == Op1) {
2870e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
2871e8d8bef9SDimitry Andric     }
2872e8d8bef9SDimitry Andric 
2873e8d8bef9SDimitry Andric     // Zero Mask - select 1st argument.
2874e8d8bef9SDimitry Andric     if (isa<ConstantAggregateZero>(Mask)) {
2875e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
2876e8d8bef9SDimitry Andric     }
2877e8d8bef9SDimitry Andric 
2878e8d8bef9SDimitry Andric     // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2879e8d8bef9SDimitry Andric     if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2880*0fca6ea1SDimitry Andric       Constant *NewSelector =
2881*0fca6ea1SDimitry Andric           getNegativeIsTrueBoolVec(ConstantMask, IC.getDataLayout());
2882e8d8bef9SDimitry Andric       return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2883e8d8bef9SDimitry Andric     }
2884e8d8bef9SDimitry Andric 
2885*0fca6ea1SDimitry Andric     Mask = InstCombiner::peekThroughBitcast(Mask);
2886*0fca6ea1SDimitry Andric 
2887*0fca6ea1SDimitry Andric     // Peek through a one-use shuffle - VectorCombine should have simplified
2888*0fca6ea1SDimitry Andric     // this for cases where we're splitting wider vectors to use blendv
2889*0fca6ea1SDimitry Andric     // intrinsics.
2890*0fca6ea1SDimitry Andric     Value *MaskSrc = nullptr;
2891*0fca6ea1SDimitry Andric     ArrayRef<int> ShuffleMask;
2892*0fca6ea1SDimitry Andric     if (match(Mask, m_OneUse(m_Shuffle(m_Value(MaskSrc), m_Undef(),
2893*0fca6ea1SDimitry Andric                                        m_Mask(ShuffleMask))))) {
2894*0fca6ea1SDimitry Andric       // Bail if the shuffle was irregular or contains undefs.
2895*0fca6ea1SDimitry Andric       int NumElts = cast<FixedVectorType>(MaskSrc->getType())->getNumElements();
2896*0fca6ea1SDimitry Andric       if (NumElts < (int)ShuffleMask.size() || !isPowerOf2_32(NumElts) ||
2897*0fca6ea1SDimitry Andric           any_of(ShuffleMask,
2898*0fca6ea1SDimitry Andric                  [NumElts](int M) { return M < 0 || M >= NumElts; }))
2899*0fca6ea1SDimitry Andric         break;
2900*0fca6ea1SDimitry Andric       Mask = InstCombiner::peekThroughBitcast(MaskSrc);
2901*0fca6ea1SDimitry Andric     }
2902*0fca6ea1SDimitry Andric 
2903e8d8bef9SDimitry Andric     // Convert to a vector select if we can bypass casts and find a boolean
2904e8d8bef9SDimitry Andric     // vector condition value.
2905e8d8bef9SDimitry Andric     Value *BoolVec;
2906*0fca6ea1SDimitry Andric     if (match(Mask, m_SExt(m_Value(BoolVec))) &&
2907e8d8bef9SDimitry Andric         BoolVec->getType()->isVectorTy() &&
2908e8d8bef9SDimitry Andric         BoolVec->getType()->getScalarSizeInBits() == 1) {
2909*0fca6ea1SDimitry Andric       auto *MaskTy = cast<FixedVectorType>(Mask->getType());
2910*0fca6ea1SDimitry Andric       auto *OpTy = cast<FixedVectorType>(II.getType());
2911*0fca6ea1SDimitry Andric       unsigned NumMaskElts = MaskTy->getNumElements();
2912*0fca6ea1SDimitry Andric       unsigned NumOperandElts = OpTy->getNumElements();
2913*0fca6ea1SDimitry Andric 
2914*0fca6ea1SDimitry Andric       // If we peeked through a shuffle, reapply the shuffle to the bool vector.
2915*0fca6ea1SDimitry Andric       if (MaskSrc) {
2916*0fca6ea1SDimitry Andric         unsigned NumMaskSrcElts =
2917*0fca6ea1SDimitry Andric             cast<FixedVectorType>(MaskSrc->getType())->getNumElements();
2918*0fca6ea1SDimitry Andric         NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2919*0fca6ea1SDimitry Andric         // Multiple mask bits maps to the same operand element - bail out.
2920*0fca6ea1SDimitry Andric         if (NumMaskElts > NumOperandElts)
2921*0fca6ea1SDimitry Andric           break;
2922*0fca6ea1SDimitry Andric         SmallVector<int> ScaledMask;
2923*0fca6ea1SDimitry Andric         if (!llvm::scaleShuffleMaskElts(NumMaskElts, ShuffleMask, ScaledMask))
2924*0fca6ea1SDimitry Andric           break;
2925*0fca6ea1SDimitry Andric         BoolVec = IC.Builder.CreateShuffleVector(BoolVec, ScaledMask);
2926*0fca6ea1SDimitry Andric         MaskTy = FixedVectorType::get(MaskTy->getElementType(), NumMaskElts);
2927*0fca6ea1SDimitry Andric       }
2928*0fca6ea1SDimitry Andric       assert(MaskTy->getPrimitiveSizeInBits() ==
2929*0fca6ea1SDimitry Andric                  OpTy->getPrimitiveSizeInBits() &&
2930e8d8bef9SDimitry Andric              "Not expecting mask and operands with different sizes");
2931e8d8bef9SDimitry Andric 
2932e8d8bef9SDimitry Andric       if (NumMaskElts == NumOperandElts) {
2933e8d8bef9SDimitry Andric         return SelectInst::Create(BoolVec, Op1, Op0);
2934e8d8bef9SDimitry Andric       }
2935e8d8bef9SDimitry Andric 
2936e8d8bef9SDimitry Andric       // If the mask has less elements than the operands, each mask bit maps to
2937e8d8bef9SDimitry Andric       // multiple elements of the operands. Bitcast back and forth.
2938e8d8bef9SDimitry Andric       if (NumMaskElts < NumOperandElts) {
2939*0fca6ea1SDimitry Andric         Value *CastOp0 = IC.Builder.CreateBitCast(Op0, MaskTy);
2940*0fca6ea1SDimitry Andric         Value *CastOp1 = IC.Builder.CreateBitCast(Op1, MaskTy);
2941e8d8bef9SDimitry Andric         Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
2942e8d8bef9SDimitry Andric         return new BitCastInst(Sel, II.getType());
2943e8d8bef9SDimitry Andric       }
2944e8d8bef9SDimitry Andric     }
2945e8d8bef9SDimitry Andric 
2946e8d8bef9SDimitry Andric     break;
2947e8d8bef9SDimitry Andric   }
2948e8d8bef9SDimitry Andric 
2949e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
2950e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
2951e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
2952e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
2953e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2954e8d8bef9SDimitry Andric     }
2955e8d8bef9SDimitry Andric     break;
2956e8d8bef9SDimitry Andric 
2957e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
2958e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
2959e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
2960e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
2961e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
2962e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
2963e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
2964e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2965e8d8bef9SDimitry Andric     }
2966e8d8bef9SDimitry Andric     break;
2967e8d8bef9SDimitry Andric 
2968e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
2969e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps:
2970e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_256:
2971e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_512:
2972e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_256:
2973e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_512:
2974e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_128:
2975e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_256:
2976e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_512:
2977e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_128:
2978e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_256:
2979e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_512:
2980e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_sf_512:
2981e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_si_512:
2982e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
2983e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2984e8d8bef9SDimitry Andric     }
2985e8d8bef9SDimitry Andric     break;
2986e8d8bef9SDimitry Andric 
2987*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_d_128:
2988*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_d_256:
2989*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_d_512:
2990*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_hi_128:
2991*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_hi_256:
2992*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_hi_512:
2993*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_pd_128:
2994*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_pd_256:
2995*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_pd_512:
2996*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_ps_128:
2997*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_ps_256:
2998*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_ps_512:
2999*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_q_128:
3000*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_q_256:
3001*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_q_512:
3002*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_qi_128:
3003*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_qi_256:
3004*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_vpermi2var_qi_512:
3005*0fca6ea1SDimitry Andric     if (Value *V = simplifyX86vpermv3(II, IC.Builder)) {
3006*0fca6ea1SDimitry Andric       return IC.replaceInstUsesWith(II, V);
3007*0fca6ea1SDimitry Andric     }
3008*0fca6ea1SDimitry Andric     break;
3009*0fca6ea1SDimitry Andric 
3010e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps:
3011e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd:
3012e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps_256:
3013e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd_256:
3014e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d:
3015e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q:
3016e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d_256:
3017e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q_256:
3018e8d8bef9SDimitry Andric     if (Instruction *I = simplifyX86MaskedLoad(II, IC)) {
3019e8d8bef9SDimitry Andric       return I;
3020e8d8bef9SDimitry Andric     }
3021e8d8bef9SDimitry Andric     break;
3022e8d8bef9SDimitry Andric 
3023e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_maskmov_dqu:
3024e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps:
3025e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd:
3026e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps_256:
3027e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd_256:
3028e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d:
3029e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q:
3030e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d_256:
3031e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q_256:
3032e8d8bef9SDimitry Andric     if (simplifyX86MaskedStore(II, IC)) {
3033e8d8bef9SDimitry Andric       return nullptr;
3034e8d8bef9SDimitry Andric     }
3035e8d8bef9SDimitry Andric     break;
3036e8d8bef9SDimitry Andric 
3037e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_32:
3038e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_64:
3039e8d8bef9SDimitry Andric     if (Value *V = simplifyX86addcarry(II, IC.Builder)) {
3040e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
3041e8d8bef9SDimitry Andric     }
3042e8d8bef9SDimitry Andric     break;
3043e8d8bef9SDimitry Andric 
304406c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_128:
304506c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_256:
304606c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_512:
304706c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_128:
304806c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_256:
304906c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_512:
305006c3fb27SDimitry Andric     if (Value *V = simplifyTernarylogic(II, IC.Builder)) {
305106c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, V);
305206c3fb27SDimitry Andric     }
305306c3fb27SDimitry Andric     break;
3054e8d8bef9SDimitry Andric   default:
3055e8d8bef9SDimitry Andric     break;
3056e8d8bef9SDimitry Andric   }
3057bdd1243dSDimitry Andric   return std::nullopt;
3058e8d8bef9SDimitry Andric }
3059e8d8bef9SDimitry Andric 
3060bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
3061e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
3062e8d8bef9SDimitry Andric     bool &KnownBitsComputed) const {
3063e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
3064e8d8bef9SDimitry Andric   default:
3065e8d8bef9SDimitry Andric     break;
3066e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
3067e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
3068e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
3069e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
3070e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
3071e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
3072e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb: {
3073e8d8bef9SDimitry Andric     // MOVMSK copies the vector elements' sign bits to the low bits
3074e8d8bef9SDimitry Andric     // and zeros the high bits.
3075e8d8bef9SDimitry Andric     unsigned ArgWidth;
3076e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
3077e8d8bef9SDimitry Andric       ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
3078e8d8bef9SDimitry Andric     } else {
3079fe6060f1SDimitry Andric       auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType());
3080e8d8bef9SDimitry Andric       ArgWidth = ArgType->getNumElements();
3081e8d8bef9SDimitry Andric     }
3082e8d8bef9SDimitry Andric 
3083e8d8bef9SDimitry Andric     // If we don't need any of low bits then return zero,
3084e8d8bef9SDimitry Andric     // we know that DemandedMask is non-zero already.
3085e8d8bef9SDimitry Andric     APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
3086e8d8bef9SDimitry Andric     Type *VTy = II.getType();
3087349cc55cSDimitry Andric     if (DemandedElts.isZero()) {
3088e8d8bef9SDimitry Andric       return ConstantInt::getNullValue(VTy);
3089e8d8bef9SDimitry Andric     }
3090e8d8bef9SDimitry Andric 
3091e8d8bef9SDimitry Andric     // We know that the upper bits are set to zero.
3092e8d8bef9SDimitry Andric     Known.Zero.setBitsFrom(ArgWidth);
3093e8d8bef9SDimitry Andric     KnownBitsComputed = true;
3094e8d8bef9SDimitry Andric     break;
3095e8d8bef9SDimitry Andric   }
3096e8d8bef9SDimitry Andric   }
3097bdd1243dSDimitry Andric   return std::nullopt;
3098e8d8bef9SDimitry Andric }
3099e8d8bef9SDimitry Andric 
3100bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
3101e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
3102e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
3103e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
3104e8d8bef9SDimitry Andric         simplifyAndSetOp) const {
3105e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements();
3106e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
3107e8d8bef9SDimitry Andric   default:
3108e8d8bef9SDimitry Andric     break;
3109e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_ss:
3110e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_sd:
3111e8d8bef9SDimitry Andric     // The instructions for these intrinsics are speced to zero upper bits not
3112e8d8bef9SDimitry Andric     // pass them through like other scalar intrinsics. So we shouldn't just
3113e8d8bef9SDimitry Andric     // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
3114e8d8bef9SDimitry Andric     // Instead we should return a zero vector.
3115e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
3116e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
3117e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(II.getType());
3118e8d8bef9SDimitry Andric     }
3119e8d8bef9SDimitry Andric 
3120e8d8bef9SDimitry Andric     // Only the lower element is used.
3121e8d8bef9SDimitry Andric     DemandedElts = 1;
3122e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3123e8d8bef9SDimitry Andric 
3124e8d8bef9SDimitry Andric     // Only the lower element is undefined. The high elements are zero.
3125e8d8bef9SDimitry Andric     UndefElts = UndefElts[0];
3126e8d8bef9SDimitry Andric     break;
3127e8d8bef9SDimitry Andric 
3128e8d8bef9SDimitry Andric   // Unary scalar-as-vector operations that work column-wise.
3129e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rcp_ss:
3130e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rsqrt_ss:
3131e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3132e8d8bef9SDimitry Andric 
3133e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
3134e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
3135e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
3136e8d8bef9SDimitry Andric       return II.getArgOperand(0);
3137e8d8bef9SDimitry Andric     }
3138e8d8bef9SDimitry Andric     // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
3139e8d8bef9SDimitry Andric     // checks).
3140e8d8bef9SDimitry Andric     break;
3141e8d8bef9SDimitry Andric 
3142e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
3143e8d8bef9SDimitry Andric   // elements come from operand 0. The low element is a function of both
3144e8d8bef9SDimitry Andric   // operands.
3145e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_min_ss:
3146e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_max_ss:
3147e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cmp_ss:
3148e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_min_sd:
3149e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_max_sd:
3150e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cmp_sd: {
3151e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3152e8d8bef9SDimitry Andric 
3153e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
3154e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
3155e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
3156e8d8bef9SDimitry Andric       return II.getArgOperand(0);
3157e8d8bef9SDimitry Andric     }
3158e8d8bef9SDimitry Andric 
3159e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
3160e8d8bef9SDimitry Andric     DemandedElts = 1;
3161e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3162e8d8bef9SDimitry Andric 
3163e8d8bef9SDimitry Andric     // Lower element is undefined if both lower elements are undefined.
3164e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
3165e8d8bef9SDimitry Andric     if (!UndefElts2[0])
3166e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
3167e8d8bef9SDimitry Andric 
3168e8d8bef9SDimitry Andric     break;
3169e8d8bef9SDimitry Andric   }
3170e8d8bef9SDimitry Andric 
3171e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
3172e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element comes from operand 1.
3173e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_ss:
3174e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_sd: {
3175e8d8bef9SDimitry Andric     // Don't use the low element of operand 0.
3176e8d8bef9SDimitry Andric     APInt DemandedElts2 = DemandedElts;
3177e8d8bef9SDimitry Andric     DemandedElts2.clearBit(0);
3178e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
3179e8d8bef9SDimitry Andric 
3180e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
3181e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
3182e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
3183e8d8bef9SDimitry Andric       return II.getArgOperand(0);
3184e8d8bef9SDimitry Andric     }
3185e8d8bef9SDimitry Andric 
3186e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
3187e8d8bef9SDimitry Andric     DemandedElts = 1;
3188e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3189e8d8bef9SDimitry Andric 
3190e8d8bef9SDimitry Andric     // Take the high undef elements from operand 0 and take the lower element
3191e8d8bef9SDimitry Andric     // from operand 1.
3192e8d8bef9SDimitry Andric     UndefElts.clearBit(0);
3193e8d8bef9SDimitry Andric     UndefElts |= UndefElts2[0];
3194e8d8bef9SDimitry Andric     break;
3195e8d8bef9SDimitry Andric   }
3196e8d8bef9SDimitry Andric 
3197e8d8bef9SDimitry Andric   // Three input scalar-as-vector operations that work column-wise. The high
3198e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element is a function of all
3199e8d8bef9SDimitry Andric   // three inputs.
3200e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
3201e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
3202e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
3203e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
3204e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_ss_round:
3205e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_ss_round:
3206e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
3207e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
3208e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
3209e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
3210e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_sd_round:
3211e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_sd_round:
3212e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3213e8d8bef9SDimitry Andric 
3214e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
3215e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
3216e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
3217e8d8bef9SDimitry Andric       return II.getArgOperand(0);
3218e8d8bef9SDimitry Andric     }
3219e8d8bef9SDimitry Andric 
3220e8d8bef9SDimitry Andric     // Only lower element is used for operand 1 and 2.
3221e8d8bef9SDimitry Andric     DemandedElts = 1;
3222e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3223e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
3224e8d8bef9SDimitry Andric 
3225e8d8bef9SDimitry Andric     // Lower element is undefined if all three lower elements are undefined.
3226e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
3227e8d8bef9SDimitry Andric     if (!UndefElts2[0] || !UndefElts3[0])
3228e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
3229e8d8bef9SDimitry Andric     break;
3230e8d8bef9SDimitry Andric 
3231e8d8bef9SDimitry Andric   // TODO: Add fmaddsub support?
3232e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_pd:
3233e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_ps:
3234e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_pd_256:
3235e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_ps_256: {
3236e8d8bef9SDimitry Andric     // If none of the even or none of the odd lanes are required, turn this
3237e8d8bef9SDimitry Andric     // into a generic FP math instruction.
3238e8d8bef9SDimitry Andric     APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
3239e8d8bef9SDimitry Andric     APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
3240e8d8bef9SDimitry Andric     bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
3241e8d8bef9SDimitry Andric     bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
3242e8d8bef9SDimitry Andric     if (IsSubOnly || IsAddOnly) {
3243e8d8bef9SDimitry Andric       assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
3244e8d8bef9SDimitry Andric       IRBuilderBase::InsertPointGuard Guard(IC.Builder);
3245e8d8bef9SDimitry Andric       IC.Builder.SetInsertPoint(&II);
3246e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
3247e8d8bef9SDimitry Andric       return IC.Builder.CreateBinOp(
3248e8d8bef9SDimitry Andric           IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3249e8d8bef9SDimitry Andric     }
3250e8d8bef9SDimitry Andric 
3251e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3252e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3253e8d8bef9SDimitry Andric     UndefElts &= UndefElts2;
3254e8d8bef9SDimitry Andric     break;
3255e8d8bef9SDimitry Andric   }
3256e8d8bef9SDimitry Andric 
325781ad6265SDimitry Andric   // General per-element vector operations.
325881ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
325981ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
326081ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
326181ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
326281ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
326381ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
326481ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
326581ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
326681ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
326781ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256: {
326881ad6265SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
326981ad6265SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
327081ad6265SDimitry Andric     UndefElts &= UndefElts2;
327181ad6265SDimitry Andric     break;
327281ad6265SDimitry Andric   }
327381ad6265SDimitry Andric 
3274*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmulh_w:
3275*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmulh_w:
3276*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmulh_w_512:
3277*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmulhu_w:
3278*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmulhu_w:
3279*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmulhu_w_512:
3280*0fca6ea1SDimitry Andric   case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3281*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmul_hr_sw:
3282*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3283*0fca6ea1SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3284*0fca6ea1SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3285*0fca6ea1SDimitry Andric     // NOTE: mulh(undef,undef) != undef.
3286*0fca6ea1SDimitry Andric     break;
3287*0fca6ea1SDimitry Andric   }
3288*0fca6ea1SDimitry Andric 
3289e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
3290e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
3291e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
3292e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
3293e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
3294e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
3295e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
3296e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
3297e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
3298e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
3299e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
3300e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512: {
3301e8d8bef9SDimitry Andric     auto *Ty0 = II.getArgOperand(0)->getType();
3302e8d8bef9SDimitry Andric     unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3303e8d8bef9SDimitry Andric     assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
3304e8d8bef9SDimitry Andric 
3305e8d8bef9SDimitry Andric     unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3306e8d8bef9SDimitry Andric     unsigned VWidthPerLane = VWidth / NumLanes;
3307e8d8bef9SDimitry Andric     unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3308e8d8bef9SDimitry Andric 
3309e8d8bef9SDimitry Andric     // Per lane, pack the elements of the first input and then the second.
3310e8d8bef9SDimitry Andric     // e.g.
3311e8d8bef9SDimitry Andric     // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
3312e8d8bef9SDimitry Andric     // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
3313e8d8bef9SDimitry Andric     for (int OpNum = 0; OpNum != 2; ++OpNum) {
3314e8d8bef9SDimitry Andric       APInt OpDemandedElts(InnerVWidth, 0);
3315e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3316e8d8bef9SDimitry Andric         unsigned LaneIdx = Lane * VWidthPerLane;
3317e8d8bef9SDimitry Andric         for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3318e8d8bef9SDimitry Andric           unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3319e8d8bef9SDimitry Andric           if (DemandedElts[Idx])
3320e8d8bef9SDimitry Andric             OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
3321e8d8bef9SDimitry Andric         }
3322e8d8bef9SDimitry Andric       }
3323e8d8bef9SDimitry Andric 
3324e8d8bef9SDimitry Andric       // Demand elements from the operand.
3325e8d8bef9SDimitry Andric       APInt OpUndefElts(InnerVWidth, 0);
3326e8d8bef9SDimitry Andric       simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3327e8d8bef9SDimitry Andric 
3328e8d8bef9SDimitry Andric       // Pack the operand's UNDEF elements, one lane at a time.
3329e8d8bef9SDimitry Andric       OpUndefElts = OpUndefElts.zext(VWidth);
3330e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3331e8d8bef9SDimitry Andric         APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
3332e8d8bef9SDimitry Andric         LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
3333e8d8bef9SDimitry Andric         LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3334e8d8bef9SDimitry Andric         UndefElts |= LaneElts;
3335e8d8bef9SDimitry Andric       }
3336e8d8bef9SDimitry Andric     }
3337e8d8bef9SDimitry Andric     break;
3338e8d8bef9SDimitry Andric   }
3339e8d8bef9SDimitry Andric 
3340*0fca6ea1SDimitry Andric   case Intrinsic::x86_sse2_pmadd_wd:
3341*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmadd_wd:
3342*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmaddw_d_512:
3343*0fca6ea1SDimitry Andric   case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3344*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx2_pmadd_ub_sw:
3345*0fca6ea1SDimitry Andric   case Intrinsic::x86_avx512_pmaddubs_w_512: {
3346*0fca6ea1SDimitry Andric     // PMADD - demand both src elements that map to each dst element.
3347*0fca6ea1SDimitry Andric     auto *ArgTy = II.getArgOperand(0)->getType();
3348*0fca6ea1SDimitry Andric     unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements();
3349*0fca6ea1SDimitry Andric     assert((VWidth * 2) == InnerVWidth && "Unexpected input size");
3350*0fca6ea1SDimitry Andric     APInt OpDemandedElts = APIntOps::ScaleBitMask(DemandedElts, InnerVWidth);
3351*0fca6ea1SDimitry Andric     APInt Op0UndefElts(InnerVWidth, 0);
3352*0fca6ea1SDimitry Andric     APInt Op1UndefElts(InnerVWidth, 0);
3353*0fca6ea1SDimitry Andric     simplifyAndSetOp(&II, 0, OpDemandedElts, Op0UndefElts);
3354*0fca6ea1SDimitry Andric     simplifyAndSetOp(&II, 1, OpDemandedElts, Op1UndefElts);
3355*0fca6ea1SDimitry Andric     // NOTE: madd(undef,undef) != undef.
3356*0fca6ea1SDimitry Andric     break;
3357*0fca6ea1SDimitry Andric   }
3358*0fca6ea1SDimitry Andric 
3359e8d8bef9SDimitry Andric   // PSHUFB
3360e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
3361e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
3362e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
3363e8d8bef9SDimitry Andric   // PERMILVAR
3364e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
3365e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
3366e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
3367e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
3368e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
3369e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
3370e8d8bef9SDimitry Andric   // PERMV
3371e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
3372e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps: {
3373e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3374e8d8bef9SDimitry Andric     break;
3375e8d8bef9SDimitry Andric   }
3376e8d8bef9SDimitry Andric 
3377e8d8bef9SDimitry Andric   // SSE4A instructions leave the upper 64-bits of the 128-bit result
3378e8d8bef9SDimitry Andric   // in an undefined state.
3379e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq:
3380e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi:
3381e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq:
3382e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi:
3383e8d8bef9SDimitry Andric     UndefElts.setHighBits(VWidth / 2);
3384e8d8bef9SDimitry Andric     break;
3385e8d8bef9SDimitry Andric   }
3386bdd1243dSDimitry Andric   return std::nullopt;
3387e8d8bef9SDimitry Andric }
3388