1e8d8bef9SDimitry Andric //===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric /// \file 9e8d8bef9SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the 10e8d8bef9SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide 11e8d8bef9SDimitry Andric /// more precise answers to certain TTI queries, while letting the target 12e8d8bef9SDimitry Andric /// independent and default TTI implementations handle the rest. 13e8d8bef9SDimitry Andric /// 14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric #include "X86TargetTransformInfo.h" 17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsX86.h" 19e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h" 20e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 21bdd1243dSDimitry Andric #include <optional> 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24*0fca6ea1SDimitry Andric using namespace llvm::PatternMatch; 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric #define DEBUG_TYPE "x86tti" 27e8d8bef9SDimitry Andric 28e8d8bef9SDimitry Andric /// Return a constant boolean vector that has true elements in all positions 29e8d8bef9SDimitry Andric /// where the input constant data vector has an element with the sign bit set. 30*0fca6ea1SDimitry Andric static Constant *getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL) { 31e8d8bef9SDimitry Andric VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType())); 32e8d8bef9SDimitry Andric V = ConstantExpr::getBitCast(V, IntTy); 33*0fca6ea1SDimitry Andric V = ConstantFoldCompareInstOperands(CmpInst::ICMP_SGT, 34*0fca6ea1SDimitry Andric Constant::getNullValue(IntTy), V, DL); 35*0fca6ea1SDimitry Andric assert(V && "Vector must be foldable"); 36e8d8bef9SDimitry Andric return V; 37e8d8bef9SDimitry Andric } 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric /// Convert the x86 XMM integer vector mask to a vector of bools based on 40e8d8bef9SDimitry Andric /// each element's most significant bit (the sign bit). 41*0fca6ea1SDimitry Andric static Value *getBoolVecFromMask(Value *Mask, const DataLayout &DL) { 42e8d8bef9SDimitry Andric // Fold Constant Mask. 43e8d8bef9SDimitry Andric if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) 44*0fca6ea1SDimitry Andric return getNegativeIsTrueBoolVec(ConstantMask, DL); 45e8d8bef9SDimitry Andric 46e8d8bef9SDimitry Andric // Mask was extended from a boolean vector. 47e8d8bef9SDimitry Andric Value *ExtMask; 48*0fca6ea1SDimitry Andric if (match(Mask, m_SExt(m_Value(ExtMask))) && 49e8d8bef9SDimitry Andric ExtMask->getType()->isIntOrIntVectorTy(1)) 50e8d8bef9SDimitry Andric return ExtMask; 51e8d8bef9SDimitry Andric 52e8d8bef9SDimitry Andric return nullptr; 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric 55e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an 56e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics 57e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 58e8d8bef9SDimitry Andric static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { 59e8d8bef9SDimitry Andric Value *Ptr = II.getOperand(0); 60e8d8bef9SDimitry Andric Value *Mask = II.getOperand(1); 61e8d8bef9SDimitry Andric Constant *ZeroVec = Constant::getNullValue(II.getType()); 62e8d8bef9SDimitry Andric 63e8d8bef9SDimitry Andric // Zero Mask - masked load instruction creates a zero vector. 64e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) 65e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ZeroVec); 66e8d8bef9SDimitry Andric 67e8d8bef9SDimitry Andric // The mask is constant or extended from a bool vector. Convert this x86 68e8d8bef9SDimitry Andric // intrinsic to the LLVM intrinsic to allow target-independent optimizations. 69*0fca6ea1SDimitry Andric if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) { 70e8d8bef9SDimitry Andric // First, cast the x86 intrinsic scalar pointer to a vector pointer to match 71e8d8bef9SDimitry Andric // the LLVM intrinsic definition for the pointer argument. 72e8d8bef9SDimitry Andric unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 73e8d8bef9SDimitry Andric PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); 74e8d8bef9SDimitry Andric Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 75e8d8bef9SDimitry Andric 76e8d8bef9SDimitry Andric // The pass-through vector for an x86 masked load is a zero vector. 77fe6060f1SDimitry Andric CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad( 78fe6060f1SDimitry Andric II.getType(), PtrCast, Align(1), BoolMask, ZeroVec); 79e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewMaskedLoad); 80e8d8bef9SDimitry Andric } 81e8d8bef9SDimitry Andric 82e8d8bef9SDimitry Andric return nullptr; 83e8d8bef9SDimitry Andric } 84e8d8bef9SDimitry Andric 85e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an 86e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics 87e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 88e8d8bef9SDimitry Andric static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { 89e8d8bef9SDimitry Andric Value *Ptr = II.getOperand(0); 90e8d8bef9SDimitry Andric Value *Mask = II.getOperand(1); 91e8d8bef9SDimitry Andric Value *Vec = II.getOperand(2); 92e8d8bef9SDimitry Andric 93e8d8bef9SDimitry Andric // Zero Mask - this masked store instruction does nothing. 94e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) { 95e8d8bef9SDimitry Andric IC.eraseInstFromFunction(II); 96e8d8bef9SDimitry Andric return true; 97e8d8bef9SDimitry Andric } 98e8d8bef9SDimitry Andric 99e8d8bef9SDimitry Andric // The SSE2 version is too weird (eg, unaligned but non-temporal) to do 100e8d8bef9SDimitry Andric // anything else at this level. 101e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu) 102e8d8bef9SDimitry Andric return false; 103e8d8bef9SDimitry Andric 104e8d8bef9SDimitry Andric // The mask is constant or extended from a bool vector. Convert this x86 105e8d8bef9SDimitry Andric // intrinsic to the LLVM intrinsic to allow target-independent optimizations. 106*0fca6ea1SDimitry Andric if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) { 107e8d8bef9SDimitry Andric unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 108e8d8bef9SDimitry Andric PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); 109e8d8bef9SDimitry Andric Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 110e8d8bef9SDimitry Andric 111e8d8bef9SDimitry Andric IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask); 112e8d8bef9SDimitry Andric 113e8d8bef9SDimitry Andric // 'Replace uses' doesn't work for stores. Erase the original masked store. 114e8d8bef9SDimitry Andric IC.eraseInstFromFunction(II); 115e8d8bef9SDimitry Andric return true; 116e8d8bef9SDimitry Andric } 117e8d8bef9SDimitry Andric 118e8d8bef9SDimitry Andric return false; 119e8d8bef9SDimitry Andric } 120e8d8bef9SDimitry Andric 121e8d8bef9SDimitry Andric static Value *simplifyX86immShift(const IntrinsicInst &II, 122e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 123e8d8bef9SDimitry Andric bool LogicalShift = false; 124e8d8bef9SDimitry Andric bool ShiftLeft = false; 125e8d8bef9SDimitry Andric bool IsImm = false; 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 128e8d8bef9SDimitry Andric default: 129e8d8bef9SDimitry Andric llvm_unreachable("Unexpected intrinsic!"); 130e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_d: 131e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_w: 132e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_d: 133e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_w: 134e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_128: 135e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_256: 136e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_d_512: 137e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_512: 138e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_w_512: 139e8d8bef9SDimitry Andric IsImm = true; 140bdd1243dSDimitry Andric [[fallthrough]]; 141e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_d: 142e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_w: 143e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_d: 144e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_w: 145e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_128: 146e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_256: 147e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_d_512: 148e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_512: 149e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_w_512: 150e8d8bef9SDimitry Andric LogicalShift = false; 151e8d8bef9SDimitry Andric ShiftLeft = false; 152e8d8bef9SDimitry Andric break; 153e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_d: 154e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_q: 155e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_w: 156e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_d: 157e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_q: 158e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_w: 159e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_d_512: 160e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_q_512: 161e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_w_512: 162e8d8bef9SDimitry Andric IsImm = true; 163bdd1243dSDimitry Andric [[fallthrough]]; 164e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_d: 165e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_q: 166e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_w: 167e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_d: 168e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_q: 169e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_w: 170e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_d_512: 171e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_q_512: 172e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_w_512: 173e8d8bef9SDimitry Andric LogicalShift = true; 174e8d8bef9SDimitry Andric ShiftLeft = false; 175e8d8bef9SDimitry Andric break; 176e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_d: 177e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_q: 178e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_w: 179e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_d: 180e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_q: 181e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_w: 182e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_d_512: 183e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_q_512: 184e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_w_512: 185e8d8bef9SDimitry Andric IsImm = true; 186bdd1243dSDimitry Andric [[fallthrough]]; 187e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_d: 188e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_q: 189e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_w: 190e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_d: 191e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_q: 192e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_w: 193e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_d_512: 194e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_q_512: 195e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_w_512: 196e8d8bef9SDimitry Andric LogicalShift = true; 197e8d8bef9SDimitry Andric ShiftLeft = true; 198e8d8bef9SDimitry Andric break; 199e8d8bef9SDimitry Andric } 200e8d8bef9SDimitry Andric assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 201e8d8bef9SDimitry Andric 202fe6060f1SDimitry Andric Value *Vec = II.getArgOperand(0); 203fe6060f1SDimitry Andric Value *Amt = II.getArgOperand(1); 204fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(Vec->getType()); 205fe6060f1SDimitry Andric Type *SVT = VT->getElementType(); 206fe6060f1SDimitry Andric Type *AmtVT = Amt->getType(); 207e8d8bef9SDimitry Andric unsigned VWidth = VT->getNumElements(); 208e8d8bef9SDimitry Andric unsigned BitWidth = SVT->getPrimitiveSizeInBits(); 209e8d8bef9SDimitry Andric 210e8d8bef9SDimitry Andric // If the shift amount is guaranteed to be in-range we can replace it with a 211e8d8bef9SDimitry Andric // generic shift. If its guaranteed to be out of range, logical shifts combine 212e8d8bef9SDimitry Andric // to zero and arithmetic shifts are clamped to (BitWidth - 1). 213e8d8bef9SDimitry Andric if (IsImm) { 214e8d8bef9SDimitry Andric assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type"); 215e8d8bef9SDimitry Andric KnownBits KnownAmtBits = 216*0fca6ea1SDimitry Andric llvm::computeKnownBits(Amt, II.getDataLayout()); 217e8d8bef9SDimitry Andric if (KnownAmtBits.getMaxValue().ult(BitWidth)) { 218e8d8bef9SDimitry Andric Amt = Builder.CreateZExtOrTrunc(Amt, SVT); 219e8d8bef9SDimitry Andric Amt = Builder.CreateVectorSplat(VWidth, Amt); 220e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 221e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 222e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 223e8d8bef9SDimitry Andric } 224e8d8bef9SDimitry Andric if (KnownAmtBits.getMinValue().uge(BitWidth)) { 225e8d8bef9SDimitry Andric if (LogicalShift) 226e8d8bef9SDimitry Andric return ConstantAggregateZero::get(VT); 227e8d8bef9SDimitry Andric Amt = ConstantInt::get(SVT, BitWidth - 1); 228e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt)); 229e8d8bef9SDimitry Andric } 230e8d8bef9SDimitry Andric } else { 231e8d8bef9SDimitry Andric // Ensure the first element has an in-range value and the rest of the 232e8d8bef9SDimitry Andric // elements in the bottom 64 bits are zero. 233e8d8bef9SDimitry Andric assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && 234e8d8bef9SDimitry Andric cast<VectorType>(AmtVT)->getElementType() == SVT && 235e8d8bef9SDimitry Andric "Unexpected shift-by-scalar type"); 236e8d8bef9SDimitry Andric unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements(); 237e8d8bef9SDimitry Andric APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0); 238e8d8bef9SDimitry Andric APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2); 239e8d8bef9SDimitry Andric KnownBits KnownLowerBits = llvm::computeKnownBits( 240*0fca6ea1SDimitry Andric Amt, DemandedLower, II.getDataLayout()); 241e8d8bef9SDimitry Andric KnownBits KnownUpperBits = llvm::computeKnownBits( 242*0fca6ea1SDimitry Andric Amt, DemandedUpper, II.getDataLayout()); 243e8d8bef9SDimitry Andric if (KnownLowerBits.getMaxValue().ult(BitWidth) && 244349cc55cSDimitry Andric (DemandedUpper.isZero() || KnownUpperBits.isZero())) { 245e8d8bef9SDimitry Andric SmallVector<int, 16> ZeroSplat(VWidth, 0); 246e8d8bef9SDimitry Andric Amt = Builder.CreateShuffleVector(Amt, ZeroSplat); 247e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 248e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 249e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 250e8d8bef9SDimitry Andric } 251e8d8bef9SDimitry Andric } 252e8d8bef9SDimitry Andric 253e8d8bef9SDimitry Andric // Simplify if count is constant vector. 254fe6060f1SDimitry Andric auto *CDV = dyn_cast<ConstantDataVector>(Amt); 255e8d8bef9SDimitry Andric if (!CDV) 256e8d8bef9SDimitry Andric return nullptr; 257e8d8bef9SDimitry Andric 258e8d8bef9SDimitry Andric // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector 259e8d8bef9SDimitry Andric // operand to compute the shift amount. 260e8d8bef9SDimitry Andric assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && 261e8d8bef9SDimitry Andric cast<VectorType>(AmtVT)->getElementType() == SVT && 262e8d8bef9SDimitry Andric "Unexpected shift-by-scalar type"); 263e8d8bef9SDimitry Andric 264e8d8bef9SDimitry Andric // Concatenate the sub-elements to create the 64-bit value. 265e8d8bef9SDimitry Andric APInt Count(64, 0); 266e8d8bef9SDimitry Andric for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) { 267e8d8bef9SDimitry Andric unsigned SubEltIdx = (NumSubElts - 1) - i; 268fe6060f1SDimitry Andric auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); 269e8d8bef9SDimitry Andric Count <<= BitWidth; 270e8d8bef9SDimitry Andric Count |= SubElt->getValue().zextOrTrunc(64); 271e8d8bef9SDimitry Andric } 272e8d8bef9SDimitry Andric 273e8d8bef9SDimitry Andric // If shift-by-zero then just return the original value. 274349cc55cSDimitry Andric if (Count.isZero()) 275e8d8bef9SDimitry Andric return Vec; 276e8d8bef9SDimitry Andric 277e8d8bef9SDimitry Andric // Handle cases when Shift >= BitWidth. 278e8d8bef9SDimitry Andric if (Count.uge(BitWidth)) { 279e8d8bef9SDimitry Andric // If LogicalShift - just return zero. 280e8d8bef9SDimitry Andric if (LogicalShift) 281e8d8bef9SDimitry Andric return ConstantAggregateZero::get(VT); 282e8d8bef9SDimitry Andric 283e8d8bef9SDimitry Andric // If ArithmeticShift - clamp Shift to (BitWidth - 1). 284e8d8bef9SDimitry Andric Count = APInt(64, BitWidth - 1); 285e8d8bef9SDimitry Andric } 286e8d8bef9SDimitry Andric 287e8d8bef9SDimitry Andric // Get a constant vector of the same type as the first operand. 288e8d8bef9SDimitry Andric auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); 289e8d8bef9SDimitry Andric auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt); 290e8d8bef9SDimitry Andric 291e8d8bef9SDimitry Andric if (ShiftLeft) 292e8d8bef9SDimitry Andric return Builder.CreateShl(Vec, ShiftVec); 293e8d8bef9SDimitry Andric 294e8d8bef9SDimitry Andric if (LogicalShift) 295e8d8bef9SDimitry Andric return Builder.CreateLShr(Vec, ShiftVec); 296e8d8bef9SDimitry Andric 297e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, ShiftVec); 298e8d8bef9SDimitry Andric } 299e8d8bef9SDimitry Andric 300e8d8bef9SDimitry Andric // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift. 301e8d8bef9SDimitry Andric // Unlike the generic IR shifts, the intrinsics have defined behaviour for out 302e8d8bef9SDimitry Andric // of range shift amounts (logical - set to zero, arithmetic - splat sign bit). 303e8d8bef9SDimitry Andric static Value *simplifyX86varShift(const IntrinsicInst &II, 304e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 305e8d8bef9SDimitry Andric bool LogicalShift = false; 306e8d8bef9SDimitry Andric bool ShiftLeft = false; 307e8d8bef9SDimitry Andric 308e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 309e8d8bef9SDimitry Andric default: 310e8d8bef9SDimitry Andric llvm_unreachable("Unexpected intrinsic!"); 311e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 312e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: 313e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_128: 314e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_256: 315e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_d_512: 316e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_512: 317e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_128: 318e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_256: 319e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_512: 320e8d8bef9SDimitry Andric LogicalShift = false; 321e8d8bef9SDimitry Andric ShiftLeft = false; 322e8d8bef9SDimitry Andric break; 323e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 324e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 325e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 326e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 327e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_d_512: 328e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_q_512: 329e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_128: 330e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_256: 331e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_512: 332e8d8bef9SDimitry Andric LogicalShift = true; 333e8d8bef9SDimitry Andric ShiftLeft = false; 334e8d8bef9SDimitry Andric break; 335e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 336e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 337e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 338e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 339e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_d_512: 340e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_q_512: 341e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_128: 342e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_256: 343e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_512: 344e8d8bef9SDimitry Andric LogicalShift = true; 345e8d8bef9SDimitry Andric ShiftLeft = true; 346e8d8bef9SDimitry Andric break; 347e8d8bef9SDimitry Andric } 348e8d8bef9SDimitry Andric assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 349e8d8bef9SDimitry Andric 350fe6060f1SDimitry Andric Value *Vec = II.getArgOperand(0); 351fe6060f1SDimitry Andric Value *Amt = II.getArgOperand(1); 352fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(II.getType()); 353fe6060f1SDimitry Andric Type *SVT = VT->getElementType(); 354e8d8bef9SDimitry Andric int NumElts = VT->getNumElements(); 355e8d8bef9SDimitry Andric int BitWidth = SVT->getIntegerBitWidth(); 356e8d8bef9SDimitry Andric 357e8d8bef9SDimitry Andric // If the shift amount is guaranteed to be in-range we can replace it with a 358e8d8bef9SDimitry Andric // generic shift. 35981ad6265SDimitry Andric KnownBits KnownAmt = 360*0fca6ea1SDimitry Andric llvm::computeKnownBits(Amt, II.getDataLayout()); 36181ad6265SDimitry Andric if (KnownAmt.getMaxValue().ult(BitWidth)) { 362e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 363e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 364e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 365e8d8bef9SDimitry Andric } 366e8d8bef9SDimitry Andric 367e8d8bef9SDimitry Andric // Simplify if all shift amounts are constant/undef. 368e8d8bef9SDimitry Andric auto *CShift = dyn_cast<Constant>(Amt); 369e8d8bef9SDimitry Andric if (!CShift) 370e8d8bef9SDimitry Andric return nullptr; 371e8d8bef9SDimitry Andric 372e8d8bef9SDimitry Andric // Collect each element's shift amount. 373e8d8bef9SDimitry Andric // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth. 374e8d8bef9SDimitry Andric bool AnyOutOfRange = false; 375e8d8bef9SDimitry Andric SmallVector<int, 8> ShiftAmts; 376e8d8bef9SDimitry Andric for (int I = 0; I < NumElts; ++I) { 377e8d8bef9SDimitry Andric auto *CElt = CShift->getAggregateElement(I); 378e8d8bef9SDimitry Andric if (isa_and_nonnull<UndefValue>(CElt)) { 379e8d8bef9SDimitry Andric ShiftAmts.push_back(-1); 380e8d8bef9SDimitry Andric continue; 381e8d8bef9SDimitry Andric } 382e8d8bef9SDimitry Andric 383e8d8bef9SDimitry Andric auto *COp = dyn_cast_or_null<ConstantInt>(CElt); 384e8d8bef9SDimitry Andric if (!COp) 385e8d8bef9SDimitry Andric return nullptr; 386e8d8bef9SDimitry Andric 387e8d8bef9SDimitry Andric // Handle out of range shifts. 388e8d8bef9SDimitry Andric // If LogicalShift - set to BitWidth (special case). 389e8d8bef9SDimitry Andric // If ArithmeticShift - set to (BitWidth - 1) (sign splat). 390e8d8bef9SDimitry Andric APInt ShiftVal = COp->getValue(); 391e8d8bef9SDimitry Andric if (ShiftVal.uge(BitWidth)) { 392e8d8bef9SDimitry Andric AnyOutOfRange = LogicalShift; 393e8d8bef9SDimitry Andric ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1); 394e8d8bef9SDimitry Andric continue; 395e8d8bef9SDimitry Andric } 396e8d8bef9SDimitry Andric 397e8d8bef9SDimitry Andric ShiftAmts.push_back((int)ShiftVal.getZExtValue()); 398e8d8bef9SDimitry Andric } 399e8d8bef9SDimitry Andric 400e8d8bef9SDimitry Andric // If all elements out of range or UNDEF, return vector of zeros/undefs. 401e8d8bef9SDimitry Andric // ArithmeticShift should only hit this if they are all UNDEF. 402e8d8bef9SDimitry Andric auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; 403e8d8bef9SDimitry Andric if (llvm::all_of(ShiftAmts, OutOfRange)) { 404e8d8bef9SDimitry Andric SmallVector<Constant *, 8> ConstantVec; 405e8d8bef9SDimitry Andric for (int Idx : ShiftAmts) { 406e8d8bef9SDimitry Andric if (Idx < 0) { 407e8d8bef9SDimitry Andric ConstantVec.push_back(UndefValue::get(SVT)); 408e8d8bef9SDimitry Andric } else { 409e8d8bef9SDimitry Andric assert(LogicalShift && "Logical shift expected"); 410e8d8bef9SDimitry Andric ConstantVec.push_back(ConstantInt::getNullValue(SVT)); 411e8d8bef9SDimitry Andric } 412e8d8bef9SDimitry Andric } 413e8d8bef9SDimitry Andric return ConstantVector::get(ConstantVec); 414e8d8bef9SDimitry Andric } 415e8d8bef9SDimitry Andric 416e8d8bef9SDimitry Andric // We can't handle only some out of range values with generic logical shifts. 417e8d8bef9SDimitry Andric if (AnyOutOfRange) 418e8d8bef9SDimitry Andric return nullptr; 419e8d8bef9SDimitry Andric 420e8d8bef9SDimitry Andric // Build the shift amount constant vector. 421e8d8bef9SDimitry Andric SmallVector<Constant *, 8> ShiftVecAmts; 422e8d8bef9SDimitry Andric for (int Idx : ShiftAmts) { 423e8d8bef9SDimitry Andric if (Idx < 0) 424e8d8bef9SDimitry Andric ShiftVecAmts.push_back(UndefValue::get(SVT)); 425e8d8bef9SDimitry Andric else 426e8d8bef9SDimitry Andric ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx)); 427e8d8bef9SDimitry Andric } 428e8d8bef9SDimitry Andric auto ShiftVec = ConstantVector::get(ShiftVecAmts); 429e8d8bef9SDimitry Andric 430e8d8bef9SDimitry Andric if (ShiftLeft) 431e8d8bef9SDimitry Andric return Builder.CreateShl(Vec, ShiftVec); 432e8d8bef9SDimitry Andric 433e8d8bef9SDimitry Andric if (LogicalShift) 434e8d8bef9SDimitry Andric return Builder.CreateLShr(Vec, ShiftVec); 435e8d8bef9SDimitry Andric 436e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, ShiftVec); 437e8d8bef9SDimitry Andric } 438e8d8bef9SDimitry Andric 439e8d8bef9SDimitry Andric static Value *simplifyX86pack(IntrinsicInst &II, 440e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder, bool IsSigned) { 441e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 442e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 443e8d8bef9SDimitry Andric Type *ResTy = II.getType(); 444e8d8bef9SDimitry Andric 445e8d8bef9SDimitry Andric // Fast all undef handling. 446e8d8bef9SDimitry Andric if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1)) 447e8d8bef9SDimitry Andric return UndefValue::get(ResTy); 448e8d8bef9SDimitry Andric 449e8d8bef9SDimitry Andric auto *ArgTy = cast<FixedVectorType>(Arg0->getType()); 450e8d8bef9SDimitry Andric unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128; 451e8d8bef9SDimitry Andric unsigned NumSrcElts = ArgTy->getNumElements(); 452e8d8bef9SDimitry Andric assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) && 453e8d8bef9SDimitry Andric "Unexpected packing types"); 454e8d8bef9SDimitry Andric 455e8d8bef9SDimitry Andric unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; 456e8d8bef9SDimitry Andric unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits(); 457e8d8bef9SDimitry Andric unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits(); 458e8d8bef9SDimitry Andric assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) && 459e8d8bef9SDimitry Andric "Unexpected packing types"); 460e8d8bef9SDimitry Andric 461e8d8bef9SDimitry Andric // Constant folding. 462e8d8bef9SDimitry Andric if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) 463e8d8bef9SDimitry Andric return nullptr; 464e8d8bef9SDimitry Andric 465e8d8bef9SDimitry Andric // Clamp Values - signed/unsigned both use signed clamp values, but they 466e8d8bef9SDimitry Andric // differ on the min/max values. 467e8d8bef9SDimitry Andric APInt MinValue, MaxValue; 468e8d8bef9SDimitry Andric if (IsSigned) { 469e8d8bef9SDimitry Andric // PACKSS: Truncate signed value with signed saturation. 470e8d8bef9SDimitry Andric // Source values less than dst minint are saturated to minint. 471e8d8bef9SDimitry Andric // Source values greater than dst maxint are saturated to maxint. 472e8d8bef9SDimitry Andric MinValue = 473e8d8bef9SDimitry Andric APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 474e8d8bef9SDimitry Andric MaxValue = 475e8d8bef9SDimitry Andric APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 476e8d8bef9SDimitry Andric } else { 477e8d8bef9SDimitry Andric // PACKUS: Truncate signed value with unsigned saturation. 478e8d8bef9SDimitry Andric // Source values less than zero are saturated to zero. 479e8d8bef9SDimitry Andric // Source values greater than dst maxuint are saturated to maxuint. 480349cc55cSDimitry Andric MinValue = APInt::getZero(SrcScalarSizeInBits); 481e8d8bef9SDimitry Andric MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits); 482e8d8bef9SDimitry Andric } 483e8d8bef9SDimitry Andric 484e8d8bef9SDimitry Andric auto *MinC = Constant::getIntegerValue(ArgTy, MinValue); 485e8d8bef9SDimitry Andric auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue); 486e8d8bef9SDimitry Andric Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0); 487e8d8bef9SDimitry Andric Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1); 488e8d8bef9SDimitry Andric Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0); 489e8d8bef9SDimitry Andric Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1); 490e8d8bef9SDimitry Andric 491e8d8bef9SDimitry Andric // Shuffle clamped args together at the lane level. 492e8d8bef9SDimitry Andric SmallVector<int, 32> PackMask; 493e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 494e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 495e8d8bef9SDimitry Andric PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); 496e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 497e8d8bef9SDimitry Andric PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts); 498e8d8bef9SDimitry Andric } 499e8d8bef9SDimitry Andric auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask); 500e8d8bef9SDimitry Andric 501e8d8bef9SDimitry Andric // Truncate to dst size. 502e8d8bef9SDimitry Andric return Builder.CreateTrunc(Shuffle, ResTy); 503e8d8bef9SDimitry Andric } 504e8d8bef9SDimitry Andric 505*0fca6ea1SDimitry Andric static Value *simplifyX86pmulh(IntrinsicInst &II, 506*0fca6ea1SDimitry Andric InstCombiner::BuilderTy &Builder, bool IsSigned, 507*0fca6ea1SDimitry Andric bool IsRounding) { 508*0fca6ea1SDimitry Andric Value *Arg0 = II.getArgOperand(0); 509*0fca6ea1SDimitry Andric Value *Arg1 = II.getArgOperand(1); 510*0fca6ea1SDimitry Andric auto *ResTy = cast<FixedVectorType>(II.getType()); 511*0fca6ea1SDimitry Andric auto *ArgTy = cast<FixedVectorType>(Arg0->getType()); 512*0fca6ea1SDimitry Andric assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 && 513*0fca6ea1SDimitry Andric "Unexpected PMULH types"); 514*0fca6ea1SDimitry Andric assert((!IsRounding || IsSigned) && "PMULHRS instruction must be signed"); 515*0fca6ea1SDimitry Andric 516*0fca6ea1SDimitry Andric // Multiply by undef -> zero (NOT undef!) as other arg could still be zero. 517*0fca6ea1SDimitry Andric if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1)) 518*0fca6ea1SDimitry Andric return ConstantAggregateZero::get(ResTy); 519*0fca6ea1SDimitry Andric 520*0fca6ea1SDimitry Andric // Multiply by zero. 521*0fca6ea1SDimitry Andric if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) 522*0fca6ea1SDimitry Andric return ConstantAggregateZero::get(ResTy); 523*0fca6ea1SDimitry Andric 524*0fca6ea1SDimitry Andric // Multiply by one. 525*0fca6ea1SDimitry Andric if (!IsRounding) { 526*0fca6ea1SDimitry Andric if (match(Arg0, m_One())) 527*0fca6ea1SDimitry Andric return IsSigned ? Builder.CreateAShr(Arg1, 15) 528*0fca6ea1SDimitry Andric : ConstantAggregateZero::get(ResTy); 529*0fca6ea1SDimitry Andric if (match(Arg1, m_One())) 530*0fca6ea1SDimitry Andric return IsSigned ? Builder.CreateAShr(Arg0, 15) 531*0fca6ea1SDimitry Andric : ConstantAggregateZero::get(ResTy); 532*0fca6ea1SDimitry Andric } 533*0fca6ea1SDimitry Andric 534*0fca6ea1SDimitry Andric // Constant folding. 535*0fca6ea1SDimitry Andric if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) 536*0fca6ea1SDimitry Andric return nullptr; 537*0fca6ea1SDimitry Andric 538*0fca6ea1SDimitry Andric // Extend to twice the width and multiply. 539*0fca6ea1SDimitry Andric auto Cast = 540*0fca6ea1SDimitry Andric IsSigned ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt; 541*0fca6ea1SDimitry Andric auto *ExtTy = FixedVectorType::getExtendedElementVectorType(ArgTy); 542*0fca6ea1SDimitry Andric Value *LHS = Builder.CreateCast(Cast, Arg0, ExtTy); 543*0fca6ea1SDimitry Andric Value *RHS = Builder.CreateCast(Cast, Arg1, ExtTy); 544*0fca6ea1SDimitry Andric Value *Mul = Builder.CreateMul(LHS, RHS); 545*0fca6ea1SDimitry Andric 546*0fca6ea1SDimitry Andric if (IsRounding) { 547*0fca6ea1SDimitry Andric // PMULHRSW: truncate to vXi18 of the most significant bits, add one and 548*0fca6ea1SDimitry Andric // extract bits[16:1]. 549*0fca6ea1SDimitry Andric auto *RndEltTy = IntegerType::get(ExtTy->getContext(), 18); 550*0fca6ea1SDimitry Andric auto *RndTy = FixedVectorType::get(RndEltTy, ExtTy); 551*0fca6ea1SDimitry Andric Mul = Builder.CreateLShr(Mul, 14); 552*0fca6ea1SDimitry Andric Mul = Builder.CreateTrunc(Mul, RndTy); 553*0fca6ea1SDimitry Andric Mul = Builder.CreateAdd(Mul, ConstantInt::get(RndTy, 1)); 554*0fca6ea1SDimitry Andric Mul = Builder.CreateLShr(Mul, 1); 555*0fca6ea1SDimitry Andric } else { 556*0fca6ea1SDimitry Andric // PMULH/PMULHU: extract the vXi16 most significant bits. 557*0fca6ea1SDimitry Andric Mul = Builder.CreateLShr(Mul, 16); 558*0fca6ea1SDimitry Andric } 559*0fca6ea1SDimitry Andric 560*0fca6ea1SDimitry Andric return Builder.CreateTrunc(Mul, ResTy); 561*0fca6ea1SDimitry Andric } 562*0fca6ea1SDimitry Andric 563*0fca6ea1SDimitry Andric static Value *simplifyX86pmadd(IntrinsicInst &II, 564*0fca6ea1SDimitry Andric InstCombiner::BuilderTy &Builder, 565*0fca6ea1SDimitry Andric bool IsPMADDWD) { 566*0fca6ea1SDimitry Andric Value *Arg0 = II.getArgOperand(0); 567*0fca6ea1SDimitry Andric Value *Arg1 = II.getArgOperand(1); 568*0fca6ea1SDimitry Andric auto *ResTy = cast<FixedVectorType>(II.getType()); 569*0fca6ea1SDimitry Andric [[maybe_unused]] auto *ArgTy = cast<FixedVectorType>(Arg0->getType()); 570*0fca6ea1SDimitry Andric 571*0fca6ea1SDimitry Andric unsigned NumDstElts = ResTy->getNumElements(); 572*0fca6ea1SDimitry Andric assert(ArgTy->getNumElements() == (2 * NumDstElts) && 573*0fca6ea1SDimitry Andric ResTy->getScalarSizeInBits() == (2 * ArgTy->getScalarSizeInBits()) && 574*0fca6ea1SDimitry Andric "Unexpected PMADD types"); 575*0fca6ea1SDimitry Andric 576*0fca6ea1SDimitry Andric // Multiply by undef -> zero (NOT undef!) as other arg could still be zero. 577*0fca6ea1SDimitry Andric if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1)) 578*0fca6ea1SDimitry Andric return ConstantAggregateZero::get(ResTy); 579*0fca6ea1SDimitry Andric 580*0fca6ea1SDimitry Andric // Multiply by zero. 581*0fca6ea1SDimitry Andric if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) 582*0fca6ea1SDimitry Andric return ConstantAggregateZero::get(ResTy); 583*0fca6ea1SDimitry Andric 584*0fca6ea1SDimitry Andric // Constant folding. 585*0fca6ea1SDimitry Andric if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) 586*0fca6ea1SDimitry Andric return nullptr; 587*0fca6ea1SDimitry Andric 588*0fca6ea1SDimitry Andric // Split Lo/Hi elements pairs, extend and add together. 589*0fca6ea1SDimitry Andric // PMADDWD(X,Y) = 590*0fca6ea1SDimitry Andric // add(mul(sext(lhs[0]),sext(rhs[0])),mul(sext(lhs[1]),sext(rhs[1]))) 591*0fca6ea1SDimitry Andric // PMADDUBSW(X,Y) = 592*0fca6ea1SDimitry Andric // sadd_sat(mul(zext(lhs[0]),sext(rhs[0])),mul(zext(lhs[1]),sext(rhs[1]))) 593*0fca6ea1SDimitry Andric SmallVector<int> LoMask, HiMask; 594*0fca6ea1SDimitry Andric for (unsigned I = 0; I != NumDstElts; ++I) { 595*0fca6ea1SDimitry Andric LoMask.push_back(2 * I + 0); 596*0fca6ea1SDimitry Andric HiMask.push_back(2 * I + 1); 597*0fca6ea1SDimitry Andric } 598*0fca6ea1SDimitry Andric 599*0fca6ea1SDimitry Andric auto *LHSLo = Builder.CreateShuffleVector(Arg0, LoMask); 600*0fca6ea1SDimitry Andric auto *LHSHi = Builder.CreateShuffleVector(Arg0, HiMask); 601*0fca6ea1SDimitry Andric auto *RHSLo = Builder.CreateShuffleVector(Arg1, LoMask); 602*0fca6ea1SDimitry Andric auto *RHSHi = Builder.CreateShuffleVector(Arg1, HiMask); 603*0fca6ea1SDimitry Andric 604*0fca6ea1SDimitry Andric auto LHSCast = 605*0fca6ea1SDimitry Andric IsPMADDWD ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt; 606*0fca6ea1SDimitry Andric LHSLo = Builder.CreateCast(LHSCast, LHSLo, ResTy); 607*0fca6ea1SDimitry Andric LHSHi = Builder.CreateCast(LHSCast, LHSHi, ResTy); 608*0fca6ea1SDimitry Andric RHSLo = Builder.CreateCast(Instruction::CastOps::SExt, RHSLo, ResTy); 609*0fca6ea1SDimitry Andric RHSHi = Builder.CreateCast(Instruction::CastOps::SExt, RHSHi, ResTy); 610*0fca6ea1SDimitry Andric Value *Lo = Builder.CreateMul(LHSLo, RHSLo); 611*0fca6ea1SDimitry Andric Value *Hi = Builder.CreateMul(LHSHi, RHSHi); 612*0fca6ea1SDimitry Andric return IsPMADDWD 613*0fca6ea1SDimitry Andric ? Builder.CreateAdd(Lo, Hi) 614*0fca6ea1SDimitry Andric : Builder.CreateIntrinsic(ResTy, Intrinsic::sadd_sat, {Lo, Hi}); 615*0fca6ea1SDimitry Andric } 616*0fca6ea1SDimitry Andric 617e8d8bef9SDimitry Andric static Value *simplifyX86movmsk(const IntrinsicInst &II, 618e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 619e8d8bef9SDimitry Andric Value *Arg = II.getArgOperand(0); 620e8d8bef9SDimitry Andric Type *ResTy = II.getType(); 621e8d8bef9SDimitry Andric 622e8d8bef9SDimitry Andric // movmsk(undef) -> zero as we must ensure the upper bits are zero. 623e8d8bef9SDimitry Andric if (isa<UndefValue>(Arg)) 624e8d8bef9SDimitry Andric return Constant::getNullValue(ResTy); 625e8d8bef9SDimitry Andric 626e8d8bef9SDimitry Andric auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType()); 627e8d8bef9SDimitry Andric // We can't easily peek through x86_mmx types. 628e8d8bef9SDimitry Andric if (!ArgTy) 629e8d8bef9SDimitry Andric return nullptr; 630e8d8bef9SDimitry Andric 631e8d8bef9SDimitry Andric // Expand MOVMSK to compare/bitcast/zext: 632e8d8bef9SDimitry Andric // e.g. PMOVMSKB(v16i8 x): 633e8d8bef9SDimitry Andric // %cmp = icmp slt <16 x i8> %x, zeroinitializer 634e8d8bef9SDimitry Andric // %int = bitcast <16 x i1> %cmp to i16 635e8d8bef9SDimitry Andric // %res = zext i16 %int to i32 636e8d8bef9SDimitry Andric unsigned NumElts = ArgTy->getNumElements(); 637e8d8bef9SDimitry Andric Type *IntegerTy = Builder.getIntNTy(NumElts); 638e8d8bef9SDimitry Andric 63981ad6265SDimitry Andric Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy)); 64081ad6265SDimitry Andric Res = Builder.CreateIsNeg(Res); 641e8d8bef9SDimitry Andric Res = Builder.CreateBitCast(Res, IntegerTy); 642e8d8bef9SDimitry Andric Res = Builder.CreateZExtOrTrunc(Res, ResTy); 643e8d8bef9SDimitry Andric return Res; 644e8d8bef9SDimitry Andric } 645e8d8bef9SDimitry Andric 646e8d8bef9SDimitry Andric static Value *simplifyX86addcarry(const IntrinsicInst &II, 647e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 648e8d8bef9SDimitry Andric Value *CarryIn = II.getArgOperand(0); 649e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 650e8d8bef9SDimitry Andric Value *Op2 = II.getArgOperand(2); 651e8d8bef9SDimitry Andric Type *RetTy = II.getType(); 652e8d8bef9SDimitry Andric Type *OpTy = Op1->getType(); 653e8d8bef9SDimitry Andric assert(RetTy->getStructElementType(0)->isIntegerTy(8) && 654e8d8bef9SDimitry Andric RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() && 655e8d8bef9SDimitry Andric "Unexpected types for x86 addcarry"); 656e8d8bef9SDimitry Andric 657e8d8bef9SDimitry Andric // If carry-in is zero, this is just an unsigned add with overflow. 658*0fca6ea1SDimitry Andric if (match(CarryIn, m_ZeroInt())) { 659e8d8bef9SDimitry Andric Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy, 660e8d8bef9SDimitry Andric {Op1, Op2}); 661e8d8bef9SDimitry Andric // The types have to be adjusted to match the x86 call types. 662e8d8bef9SDimitry Andric Value *UAddResult = Builder.CreateExtractValue(UAdd, 0); 663e8d8bef9SDimitry Andric Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1), 664e8d8bef9SDimitry Andric Builder.getInt8Ty()); 665bdd1243dSDimitry Andric Value *Res = PoisonValue::get(RetTy); 666e8d8bef9SDimitry Andric Res = Builder.CreateInsertValue(Res, UAddOV, 0); 667e8d8bef9SDimitry Andric return Builder.CreateInsertValue(Res, UAddResult, 1); 668e8d8bef9SDimitry Andric } 669e8d8bef9SDimitry Andric 670e8d8bef9SDimitry Andric return nullptr; 671e8d8bef9SDimitry Andric } 672e8d8bef9SDimitry Andric 67306c3fb27SDimitry Andric static Value *simplifyTernarylogic(const IntrinsicInst &II, 67406c3fb27SDimitry Andric InstCombiner::BuilderTy &Builder) { 67506c3fb27SDimitry Andric 67606c3fb27SDimitry Andric auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3)); 67706c3fb27SDimitry Andric if (!ArgImm || ArgImm->getValue().uge(256)) 67806c3fb27SDimitry Andric return nullptr; 67906c3fb27SDimitry Andric 68006c3fb27SDimitry Andric Value *ArgA = II.getArgOperand(0); 68106c3fb27SDimitry Andric Value *ArgB = II.getArgOperand(1); 68206c3fb27SDimitry Andric Value *ArgC = II.getArgOperand(2); 68306c3fb27SDimitry Andric 68406c3fb27SDimitry Andric Type *Ty = II.getType(); 68506c3fb27SDimitry Andric 68606c3fb27SDimitry Andric auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 68706c3fb27SDimitry Andric return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second}; 68806c3fb27SDimitry Andric }; 68906c3fb27SDimitry Andric auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 69006c3fb27SDimitry Andric return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second}; 69106c3fb27SDimitry Andric }; 69206c3fb27SDimitry Andric auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 69306c3fb27SDimitry Andric return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second}; 69406c3fb27SDimitry Andric }; 69506c3fb27SDimitry Andric auto Not = [&](auto V) -> std::pair<Value *, uint8_t> { 69606c3fb27SDimitry Andric return {Builder.CreateNot(V.first), ~V.second}; 69706c3fb27SDimitry Andric }; 69806c3fb27SDimitry Andric auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); }; 69906c3fb27SDimitry Andric auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); }; 70006c3fb27SDimitry Andric auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); }; 70106c3fb27SDimitry Andric 702*0fca6ea1SDimitry Andric bool AIsConst = match(ArgA, m_ImmConstant()); 703*0fca6ea1SDimitry Andric bool BIsConst = match(ArgB, m_ImmConstant()); 704*0fca6ea1SDimitry Andric bool CIsConst = match(ArgC, m_ImmConstant()); 70506c3fb27SDimitry Andric 70606c3fb27SDimitry Andric bool ABIsConst = AIsConst && BIsConst; 70706c3fb27SDimitry Andric bool ACIsConst = AIsConst && CIsConst; 70806c3fb27SDimitry Andric bool BCIsConst = BIsConst && CIsConst; 70906c3fb27SDimitry Andric bool ABCIsConst = AIsConst && BIsConst && CIsConst; 71006c3fb27SDimitry Andric 71106c3fb27SDimitry Andric // Use for verification. Its a big table. Its difficult to go from Imm -> 71206c3fb27SDimitry Andric // logic ops, but easy to verify that a set of logic ops is correct. We track 71306c3fb27SDimitry Andric // the logic ops through the second value in the pair. At the end it should 71406c3fb27SDimitry Andric // equal Imm. 71506c3fb27SDimitry Andric std::pair<Value *, uint8_t> A = {ArgA, 0xf0}; 71606c3fb27SDimitry Andric std::pair<Value *, uint8_t> B = {ArgB, 0xcc}; 71706c3fb27SDimitry Andric std::pair<Value *, uint8_t> C = {ArgC, 0xaa}; 71806c3fb27SDimitry Andric std::pair<Value *, uint8_t> Res = {nullptr, 0}; 71906c3fb27SDimitry Andric 72006c3fb27SDimitry Andric // Currently we only handle cases that convert directly to another instruction 72106c3fb27SDimitry Andric // or cases where all the ops are constant. This is because we don't properly 72206c3fb27SDimitry Andric // handle creating ternary ops in the backend, so splitting them here may 72306c3fb27SDimitry Andric // cause regressions. As the backend improves, uncomment more cases. 72406c3fb27SDimitry Andric 72506c3fb27SDimitry Andric uint8_t Imm = ArgImm->getValue().getZExtValue(); 72606c3fb27SDimitry Andric switch (Imm) { 72706c3fb27SDimitry Andric case 0x0: 72806c3fb27SDimitry Andric Res = {Constant::getNullValue(Ty), 0}; 72906c3fb27SDimitry Andric break; 73006c3fb27SDimitry Andric case 0x1: 73106c3fb27SDimitry Andric if (ABCIsConst) 73206c3fb27SDimitry Andric Res = Nor(Or(A, B), C); 73306c3fb27SDimitry Andric break; 73406c3fb27SDimitry Andric case 0x2: 73506c3fb27SDimitry Andric if (ABCIsConst) 73606c3fb27SDimitry Andric Res = And(Nor(A, B), C); 73706c3fb27SDimitry Andric break; 73806c3fb27SDimitry Andric case 0x3: 73906c3fb27SDimitry Andric if (ABIsConst) 74006c3fb27SDimitry Andric Res = Nor(A, B); 74106c3fb27SDimitry Andric break; 74206c3fb27SDimitry Andric case 0x4: 74306c3fb27SDimitry Andric if (ABCIsConst) 74406c3fb27SDimitry Andric Res = And(Nor(A, C), B); 74506c3fb27SDimitry Andric break; 74606c3fb27SDimitry Andric case 0x5: 74706c3fb27SDimitry Andric if (ACIsConst) 74806c3fb27SDimitry Andric Res = Nor(A, C); 74906c3fb27SDimitry Andric break; 75006c3fb27SDimitry Andric case 0x6: 75106c3fb27SDimitry Andric if (ABCIsConst) 75206c3fb27SDimitry Andric Res = Nor(A, Xnor(B, C)); 75306c3fb27SDimitry Andric break; 75406c3fb27SDimitry Andric case 0x7: 75506c3fb27SDimitry Andric if (ABCIsConst) 75606c3fb27SDimitry Andric Res = Nor(A, And(B, C)); 75706c3fb27SDimitry Andric break; 75806c3fb27SDimitry Andric case 0x8: 75906c3fb27SDimitry Andric if (ABCIsConst) 76006c3fb27SDimitry Andric Res = Nor(A, Nand(B, C)); 76106c3fb27SDimitry Andric break; 76206c3fb27SDimitry Andric case 0x9: 76306c3fb27SDimitry Andric if (ABCIsConst) 76406c3fb27SDimitry Andric Res = Nor(A, Xor(B, C)); 76506c3fb27SDimitry Andric break; 76606c3fb27SDimitry Andric case 0xa: 76706c3fb27SDimitry Andric if (ACIsConst) 76806c3fb27SDimitry Andric Res = Nor(A, Not(C)); 76906c3fb27SDimitry Andric break; 77006c3fb27SDimitry Andric case 0xb: 77106c3fb27SDimitry Andric if (ABCIsConst) 77206c3fb27SDimitry Andric Res = Nor(A, Nor(C, Not(B))); 77306c3fb27SDimitry Andric break; 77406c3fb27SDimitry Andric case 0xc: 77506c3fb27SDimitry Andric if (ABIsConst) 77606c3fb27SDimitry Andric Res = Nor(A, Not(B)); 77706c3fb27SDimitry Andric break; 77806c3fb27SDimitry Andric case 0xd: 77906c3fb27SDimitry Andric if (ABCIsConst) 78006c3fb27SDimitry Andric Res = Nor(A, Nor(B, Not(C))); 78106c3fb27SDimitry Andric break; 78206c3fb27SDimitry Andric case 0xe: 78306c3fb27SDimitry Andric if (ABCIsConst) 78406c3fb27SDimitry Andric Res = Nor(A, Nor(B, C)); 78506c3fb27SDimitry Andric break; 78606c3fb27SDimitry Andric case 0xf: 78706c3fb27SDimitry Andric Res = Not(A); 78806c3fb27SDimitry Andric break; 78906c3fb27SDimitry Andric case 0x10: 79006c3fb27SDimitry Andric if (ABCIsConst) 79106c3fb27SDimitry Andric Res = And(A, Nor(B, C)); 79206c3fb27SDimitry Andric break; 79306c3fb27SDimitry Andric case 0x11: 79406c3fb27SDimitry Andric if (BCIsConst) 79506c3fb27SDimitry Andric Res = Nor(B, C); 79606c3fb27SDimitry Andric break; 79706c3fb27SDimitry Andric case 0x12: 79806c3fb27SDimitry Andric if (ABCIsConst) 79906c3fb27SDimitry Andric Res = Nor(Xnor(A, C), B); 80006c3fb27SDimitry Andric break; 80106c3fb27SDimitry Andric case 0x13: 80206c3fb27SDimitry Andric if (ABCIsConst) 80306c3fb27SDimitry Andric Res = Nor(And(A, C), B); 80406c3fb27SDimitry Andric break; 80506c3fb27SDimitry Andric case 0x14: 80606c3fb27SDimitry Andric if (ABCIsConst) 80706c3fb27SDimitry Andric Res = Nor(Xnor(A, B), C); 80806c3fb27SDimitry Andric break; 80906c3fb27SDimitry Andric case 0x15: 81006c3fb27SDimitry Andric if (ABCIsConst) 81106c3fb27SDimitry Andric Res = Nor(And(A, B), C); 81206c3fb27SDimitry Andric break; 81306c3fb27SDimitry Andric case 0x16: 81406c3fb27SDimitry Andric if (ABCIsConst) 81506c3fb27SDimitry Andric Res = Xor(Xor(A, B), And(Nand(A, B), C)); 81606c3fb27SDimitry Andric break; 81706c3fb27SDimitry Andric case 0x17: 81806c3fb27SDimitry Andric if (ABCIsConst) 81906c3fb27SDimitry Andric Res = Xor(Or(A, B), Or(Xnor(A, B), C)); 82006c3fb27SDimitry Andric break; 82106c3fb27SDimitry Andric case 0x18: 82206c3fb27SDimitry Andric if (ABCIsConst) 82306c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Xnor(A, C)); 82406c3fb27SDimitry Andric break; 82506c3fb27SDimitry Andric case 0x19: 82606c3fb27SDimitry Andric if (ABCIsConst) 82706c3fb27SDimitry Andric Res = And(Nand(A, B), Xnor(B, C)); 82806c3fb27SDimitry Andric break; 82906c3fb27SDimitry Andric case 0x1a: 83006c3fb27SDimitry Andric if (ABCIsConst) 83106c3fb27SDimitry Andric Res = Xor(A, Or(And(A, B), C)); 83206c3fb27SDimitry Andric break; 83306c3fb27SDimitry Andric case 0x1b: 83406c3fb27SDimitry Andric if (ABCIsConst) 83506c3fb27SDimitry Andric Res = Xor(A, Or(Xnor(A, B), C)); 83606c3fb27SDimitry Andric break; 83706c3fb27SDimitry Andric case 0x1c: 83806c3fb27SDimitry Andric if (ABCIsConst) 83906c3fb27SDimitry Andric Res = Xor(A, Or(And(A, C), B)); 84006c3fb27SDimitry Andric break; 84106c3fb27SDimitry Andric case 0x1d: 84206c3fb27SDimitry Andric if (ABCIsConst) 84306c3fb27SDimitry Andric Res = Xor(A, Or(Xnor(A, C), B)); 84406c3fb27SDimitry Andric break; 84506c3fb27SDimitry Andric case 0x1e: 84606c3fb27SDimitry Andric if (ABCIsConst) 84706c3fb27SDimitry Andric Res = Xor(A, Or(B, C)); 84806c3fb27SDimitry Andric break; 84906c3fb27SDimitry Andric case 0x1f: 85006c3fb27SDimitry Andric if (ABCIsConst) 85106c3fb27SDimitry Andric Res = Nand(A, Or(B, C)); 85206c3fb27SDimitry Andric break; 85306c3fb27SDimitry Andric case 0x20: 85406c3fb27SDimitry Andric if (ABCIsConst) 85506c3fb27SDimitry Andric Res = Nor(Nand(A, C), B); 85606c3fb27SDimitry Andric break; 85706c3fb27SDimitry Andric case 0x21: 85806c3fb27SDimitry Andric if (ABCIsConst) 85906c3fb27SDimitry Andric Res = Nor(Xor(A, C), B); 86006c3fb27SDimitry Andric break; 86106c3fb27SDimitry Andric case 0x22: 86206c3fb27SDimitry Andric if (BCIsConst) 86306c3fb27SDimitry Andric Res = Nor(B, Not(C)); 86406c3fb27SDimitry Andric break; 86506c3fb27SDimitry Andric case 0x23: 86606c3fb27SDimitry Andric if (ABCIsConst) 86706c3fb27SDimitry Andric Res = Nor(B, Nor(C, Not(A))); 86806c3fb27SDimitry Andric break; 86906c3fb27SDimitry Andric case 0x24: 87006c3fb27SDimitry Andric if (ABCIsConst) 87106c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Xor(A, C)); 87206c3fb27SDimitry Andric break; 87306c3fb27SDimitry Andric case 0x25: 87406c3fb27SDimitry Andric if (ABCIsConst) 87506c3fb27SDimitry Andric Res = Xor(A, Nand(Nand(A, B), C)); 87606c3fb27SDimitry Andric break; 87706c3fb27SDimitry Andric case 0x26: 87806c3fb27SDimitry Andric if (ABCIsConst) 87906c3fb27SDimitry Andric Res = And(Nand(A, B), Xor(B, C)); 88006c3fb27SDimitry Andric break; 88106c3fb27SDimitry Andric case 0x27: 88206c3fb27SDimitry Andric if (ABCIsConst) 88306c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), C), B); 88406c3fb27SDimitry Andric break; 88506c3fb27SDimitry Andric case 0x28: 88606c3fb27SDimitry Andric if (ABCIsConst) 88706c3fb27SDimitry Andric Res = And(Xor(A, B), C); 88806c3fb27SDimitry Andric break; 88906c3fb27SDimitry Andric case 0x29: 89006c3fb27SDimitry Andric if (ABCIsConst) 89106c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nor(And(A, B), C)); 89206c3fb27SDimitry Andric break; 89306c3fb27SDimitry Andric case 0x2a: 89406c3fb27SDimitry Andric if (ABCIsConst) 89506c3fb27SDimitry Andric Res = And(Nand(A, B), C); 89606c3fb27SDimitry Andric break; 89706c3fb27SDimitry Andric case 0x2b: 89806c3fb27SDimitry Andric if (ABCIsConst) 89906c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Xor(A, C)), A); 90006c3fb27SDimitry Andric break; 90106c3fb27SDimitry Andric case 0x2c: 90206c3fb27SDimitry Andric if (ABCIsConst) 90306c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Nor(B, C)); 90406c3fb27SDimitry Andric break; 90506c3fb27SDimitry Andric case 0x2d: 90606c3fb27SDimitry Andric if (ABCIsConst) 90706c3fb27SDimitry Andric Res = Xor(A, Or(B, Not(C))); 90806c3fb27SDimitry Andric break; 90906c3fb27SDimitry Andric case 0x2e: 91006c3fb27SDimitry Andric if (ABCIsConst) 91106c3fb27SDimitry Andric Res = Xor(A, Or(Xor(A, C), B)); 91206c3fb27SDimitry Andric break; 91306c3fb27SDimitry Andric case 0x2f: 91406c3fb27SDimitry Andric if (ABCIsConst) 91506c3fb27SDimitry Andric Res = Nand(A, Or(B, Not(C))); 91606c3fb27SDimitry Andric break; 91706c3fb27SDimitry Andric case 0x30: 91806c3fb27SDimitry Andric if (ABIsConst) 91906c3fb27SDimitry Andric Res = Nor(B, Not(A)); 92006c3fb27SDimitry Andric break; 92106c3fb27SDimitry Andric case 0x31: 92206c3fb27SDimitry Andric if (ABCIsConst) 92306c3fb27SDimitry Andric Res = Nor(Nor(A, Not(C)), B); 92406c3fb27SDimitry Andric break; 92506c3fb27SDimitry Andric case 0x32: 92606c3fb27SDimitry Andric if (ABCIsConst) 92706c3fb27SDimitry Andric Res = Nor(Nor(A, C), B); 92806c3fb27SDimitry Andric break; 92906c3fb27SDimitry Andric case 0x33: 93006c3fb27SDimitry Andric Res = Not(B); 93106c3fb27SDimitry Andric break; 93206c3fb27SDimitry Andric case 0x34: 93306c3fb27SDimitry Andric if (ABCIsConst) 93406c3fb27SDimitry Andric Res = And(Xor(A, B), Nand(B, C)); 93506c3fb27SDimitry Andric break; 93606c3fb27SDimitry Andric case 0x35: 93706c3fb27SDimitry Andric if (ABCIsConst) 93806c3fb27SDimitry Andric Res = Xor(B, Or(A, Xnor(B, C))); 93906c3fb27SDimitry Andric break; 94006c3fb27SDimitry Andric case 0x36: 94106c3fb27SDimitry Andric if (ABCIsConst) 94206c3fb27SDimitry Andric Res = Xor(Or(A, C), B); 94306c3fb27SDimitry Andric break; 94406c3fb27SDimitry Andric case 0x37: 94506c3fb27SDimitry Andric if (ABCIsConst) 94606c3fb27SDimitry Andric Res = Nand(Or(A, C), B); 94706c3fb27SDimitry Andric break; 94806c3fb27SDimitry Andric case 0x38: 94906c3fb27SDimitry Andric if (ABCIsConst) 95006c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Nor(A, C)); 95106c3fb27SDimitry Andric break; 95206c3fb27SDimitry Andric case 0x39: 95306c3fb27SDimitry Andric if (ABCIsConst) 95406c3fb27SDimitry Andric Res = Xor(Or(A, Not(C)), B); 95506c3fb27SDimitry Andric break; 95606c3fb27SDimitry Andric case 0x3a: 95706c3fb27SDimitry Andric if (ABCIsConst) 95806c3fb27SDimitry Andric Res = Xor(B, Or(A, Xor(B, C))); 95906c3fb27SDimitry Andric break; 96006c3fb27SDimitry Andric case 0x3b: 96106c3fb27SDimitry Andric if (ABCIsConst) 96206c3fb27SDimitry Andric Res = Nand(Or(A, Not(C)), B); 96306c3fb27SDimitry Andric break; 96406c3fb27SDimitry Andric case 0x3c: 96506c3fb27SDimitry Andric Res = Xor(A, B); 96606c3fb27SDimitry Andric break; 96706c3fb27SDimitry Andric case 0x3d: 96806c3fb27SDimitry Andric if (ABCIsConst) 96906c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, C), B)); 97006c3fb27SDimitry Andric break; 97106c3fb27SDimitry Andric case 0x3e: 97206c3fb27SDimitry Andric if (ABCIsConst) 97306c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, Not(C)), B)); 97406c3fb27SDimitry Andric break; 97506c3fb27SDimitry Andric case 0x3f: 97606c3fb27SDimitry Andric if (ABIsConst) 97706c3fb27SDimitry Andric Res = Nand(A, B); 97806c3fb27SDimitry Andric break; 97906c3fb27SDimitry Andric case 0x40: 98006c3fb27SDimitry Andric if (ABCIsConst) 98106c3fb27SDimitry Andric Res = Nor(Nand(A, B), C); 98206c3fb27SDimitry Andric break; 98306c3fb27SDimitry Andric case 0x41: 98406c3fb27SDimitry Andric if (ABCIsConst) 98506c3fb27SDimitry Andric Res = Nor(Xor(A, B), C); 98606c3fb27SDimitry Andric break; 98706c3fb27SDimitry Andric case 0x42: 98806c3fb27SDimitry Andric if (ABCIsConst) 98906c3fb27SDimitry Andric Res = Nor(Xor(A, B), Xnor(A, C)); 99006c3fb27SDimitry Andric break; 99106c3fb27SDimitry Andric case 0x43: 99206c3fb27SDimitry Andric if (ABCIsConst) 99306c3fb27SDimitry Andric Res = Xor(A, Nand(Nand(A, C), B)); 99406c3fb27SDimitry Andric break; 99506c3fb27SDimitry Andric case 0x44: 99606c3fb27SDimitry Andric if (BCIsConst) 99706c3fb27SDimitry Andric Res = Nor(C, Not(B)); 99806c3fb27SDimitry Andric break; 99906c3fb27SDimitry Andric case 0x45: 100006c3fb27SDimitry Andric if (ABCIsConst) 100106c3fb27SDimitry Andric Res = Nor(Nor(B, Not(A)), C); 100206c3fb27SDimitry Andric break; 100306c3fb27SDimitry Andric case 0x46: 100406c3fb27SDimitry Andric if (ABCIsConst) 100506c3fb27SDimitry Andric Res = Xor(Or(And(A, C), B), C); 100606c3fb27SDimitry Andric break; 100706c3fb27SDimitry Andric case 0x47: 100806c3fb27SDimitry Andric if (ABCIsConst) 100906c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, C), B), C); 101006c3fb27SDimitry Andric break; 101106c3fb27SDimitry Andric case 0x48: 101206c3fb27SDimitry Andric if (ABCIsConst) 101306c3fb27SDimitry Andric Res = And(Xor(A, C), B); 101406c3fb27SDimitry Andric break; 101506c3fb27SDimitry Andric case 0x49: 101606c3fb27SDimitry Andric if (ABCIsConst) 101706c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), And(A, C)), C); 101806c3fb27SDimitry Andric break; 101906c3fb27SDimitry Andric case 0x4a: 102006c3fb27SDimitry Andric if (ABCIsConst) 102106c3fb27SDimitry Andric Res = Nor(Xnor(A, C), Nor(B, C)); 102206c3fb27SDimitry Andric break; 102306c3fb27SDimitry Andric case 0x4b: 102406c3fb27SDimitry Andric if (ABCIsConst) 102506c3fb27SDimitry Andric Res = Xor(A, Or(C, Not(B))); 102606c3fb27SDimitry Andric break; 102706c3fb27SDimitry Andric case 0x4c: 102806c3fb27SDimitry Andric if (ABCIsConst) 102906c3fb27SDimitry Andric Res = And(Nand(A, C), B); 103006c3fb27SDimitry Andric break; 103106c3fb27SDimitry Andric case 0x4d: 103206c3fb27SDimitry Andric if (ABCIsConst) 103306c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), Xnor(A, C)), A); 103406c3fb27SDimitry Andric break; 103506c3fb27SDimitry Andric case 0x4e: 103606c3fb27SDimitry Andric if (ABCIsConst) 103706c3fb27SDimitry Andric Res = Xor(A, Or(Xor(A, B), C)); 103806c3fb27SDimitry Andric break; 103906c3fb27SDimitry Andric case 0x4f: 104006c3fb27SDimitry Andric if (ABCIsConst) 104106c3fb27SDimitry Andric Res = Nand(A, Nand(B, Not(C))); 104206c3fb27SDimitry Andric break; 104306c3fb27SDimitry Andric case 0x50: 104406c3fb27SDimitry Andric if (ACIsConst) 104506c3fb27SDimitry Andric Res = Nor(C, Not(A)); 104606c3fb27SDimitry Andric break; 104706c3fb27SDimitry Andric case 0x51: 104806c3fb27SDimitry Andric if (ABCIsConst) 104906c3fb27SDimitry Andric Res = Nor(Nor(A, Not(B)), C); 105006c3fb27SDimitry Andric break; 105106c3fb27SDimitry Andric case 0x52: 105206c3fb27SDimitry Andric if (ABCIsConst) 105306c3fb27SDimitry Andric Res = And(Xor(A, C), Nand(B, C)); 105406c3fb27SDimitry Andric break; 105506c3fb27SDimitry Andric case 0x53: 105606c3fb27SDimitry Andric if (ABCIsConst) 105706c3fb27SDimitry Andric Res = Xor(Or(Xnor(B, C), A), C); 105806c3fb27SDimitry Andric break; 105906c3fb27SDimitry Andric case 0x54: 106006c3fb27SDimitry Andric if (ABCIsConst) 106106c3fb27SDimitry Andric Res = Nor(Nor(A, B), C); 106206c3fb27SDimitry Andric break; 106306c3fb27SDimitry Andric case 0x55: 106406c3fb27SDimitry Andric Res = Not(C); 106506c3fb27SDimitry Andric break; 106606c3fb27SDimitry Andric case 0x56: 106706c3fb27SDimitry Andric if (ABCIsConst) 106806c3fb27SDimitry Andric Res = Xor(Or(A, B), C); 106906c3fb27SDimitry Andric break; 107006c3fb27SDimitry Andric case 0x57: 107106c3fb27SDimitry Andric if (ABCIsConst) 107206c3fb27SDimitry Andric Res = Nand(Or(A, B), C); 107306c3fb27SDimitry Andric break; 107406c3fb27SDimitry Andric case 0x58: 107506c3fb27SDimitry Andric if (ABCIsConst) 107606c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xnor(A, C)); 107706c3fb27SDimitry Andric break; 107806c3fb27SDimitry Andric case 0x59: 107906c3fb27SDimitry Andric if (ABCIsConst) 108006c3fb27SDimitry Andric Res = Xor(Or(A, Not(B)), C); 108106c3fb27SDimitry Andric break; 108206c3fb27SDimitry Andric case 0x5a: 108306c3fb27SDimitry Andric Res = Xor(A, C); 108406c3fb27SDimitry Andric break; 108506c3fb27SDimitry Andric case 0x5b: 108606c3fb27SDimitry Andric if (ABCIsConst) 108706c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, B), C)); 108806c3fb27SDimitry Andric break; 108906c3fb27SDimitry Andric case 0x5c: 109006c3fb27SDimitry Andric if (ABCIsConst) 109106c3fb27SDimitry Andric Res = Xor(Or(Xor(B, C), A), C); 109206c3fb27SDimitry Andric break; 109306c3fb27SDimitry Andric case 0x5d: 109406c3fb27SDimitry Andric if (ABCIsConst) 109506c3fb27SDimitry Andric Res = Nand(Or(A, Not(B)), C); 109606c3fb27SDimitry Andric break; 109706c3fb27SDimitry Andric case 0x5e: 109806c3fb27SDimitry Andric if (ABCIsConst) 109906c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, Not(B)), C)); 110006c3fb27SDimitry Andric break; 110106c3fb27SDimitry Andric case 0x5f: 110206c3fb27SDimitry Andric if (ACIsConst) 110306c3fb27SDimitry Andric Res = Nand(A, C); 110406c3fb27SDimitry Andric break; 110506c3fb27SDimitry Andric case 0x60: 110606c3fb27SDimitry Andric if (ABCIsConst) 110706c3fb27SDimitry Andric Res = And(A, Xor(B, C)); 110806c3fb27SDimitry Andric break; 110906c3fb27SDimitry Andric case 0x61: 111006c3fb27SDimitry Andric if (ABCIsConst) 111106c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), And(B, C)), C); 111206c3fb27SDimitry Andric break; 111306c3fb27SDimitry Andric case 0x62: 111406c3fb27SDimitry Andric if (ABCIsConst) 111506c3fb27SDimitry Andric Res = Nor(Nor(A, C), Xnor(B, C)); 111606c3fb27SDimitry Andric break; 111706c3fb27SDimitry Andric case 0x63: 111806c3fb27SDimitry Andric if (ABCIsConst) 111906c3fb27SDimitry Andric Res = Xor(B, Or(C, Not(A))); 112006c3fb27SDimitry Andric break; 112106c3fb27SDimitry Andric case 0x64: 112206c3fb27SDimitry Andric if (ABCIsConst) 112306c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xnor(B, C)); 112406c3fb27SDimitry Andric break; 112506c3fb27SDimitry Andric case 0x65: 112606c3fb27SDimitry Andric if (ABCIsConst) 112706c3fb27SDimitry Andric Res = Xor(Or(B, Not(A)), C); 112806c3fb27SDimitry Andric break; 112906c3fb27SDimitry Andric case 0x66: 113006c3fb27SDimitry Andric Res = Xor(B, C); 113106c3fb27SDimitry Andric break; 113206c3fb27SDimitry Andric case 0x67: 113306c3fb27SDimitry Andric if (ABCIsConst) 113406c3fb27SDimitry Andric Res = Or(Nor(A, B), Xor(B, C)); 113506c3fb27SDimitry Andric break; 113606c3fb27SDimitry Andric case 0x68: 113706c3fb27SDimitry Andric if (ABCIsConst) 113806c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nor(Nor(A, B), C)); 113906c3fb27SDimitry Andric break; 114006c3fb27SDimitry Andric case 0x69: 114106c3fb27SDimitry Andric if (ABCIsConst) 114206c3fb27SDimitry Andric Res = Xor(Xnor(A, B), C); 114306c3fb27SDimitry Andric break; 114406c3fb27SDimitry Andric case 0x6a: 114506c3fb27SDimitry Andric if (ABCIsConst) 114606c3fb27SDimitry Andric Res = Xor(And(A, B), C); 114706c3fb27SDimitry Andric break; 114806c3fb27SDimitry Andric case 0x6b: 114906c3fb27SDimitry Andric if (ABCIsConst) 115006c3fb27SDimitry Andric Res = Or(Nor(A, B), Xor(Xnor(A, B), C)); 115106c3fb27SDimitry Andric break; 115206c3fb27SDimitry Andric case 0x6c: 115306c3fb27SDimitry Andric if (ABCIsConst) 115406c3fb27SDimitry Andric Res = Xor(And(A, C), B); 115506c3fb27SDimitry Andric break; 115606c3fb27SDimitry Andric case 0x6d: 115706c3fb27SDimitry Andric if (ABCIsConst) 115806c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Nor(A, C)), C); 115906c3fb27SDimitry Andric break; 116006c3fb27SDimitry Andric case 0x6e: 116106c3fb27SDimitry Andric if (ABCIsConst) 116206c3fb27SDimitry Andric Res = Or(Nor(A, Not(B)), Xor(B, C)); 116306c3fb27SDimitry Andric break; 116406c3fb27SDimitry Andric case 0x6f: 116506c3fb27SDimitry Andric if (ABCIsConst) 116606c3fb27SDimitry Andric Res = Nand(A, Xnor(B, C)); 116706c3fb27SDimitry Andric break; 116806c3fb27SDimitry Andric case 0x70: 116906c3fb27SDimitry Andric if (ABCIsConst) 117006c3fb27SDimitry Andric Res = And(A, Nand(B, C)); 117106c3fb27SDimitry Andric break; 117206c3fb27SDimitry Andric case 0x71: 117306c3fb27SDimitry Andric if (ABCIsConst) 117406c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), Xor(A, C)), A); 117506c3fb27SDimitry Andric break; 117606c3fb27SDimitry Andric case 0x72: 117706c3fb27SDimitry Andric if (ABCIsConst) 117806c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), C), B); 117906c3fb27SDimitry Andric break; 118006c3fb27SDimitry Andric case 0x73: 118106c3fb27SDimitry Andric if (ABCIsConst) 118206c3fb27SDimitry Andric Res = Nand(Nand(A, Not(C)), B); 118306c3fb27SDimitry Andric break; 118406c3fb27SDimitry Andric case 0x74: 118506c3fb27SDimitry Andric if (ABCIsConst) 118606c3fb27SDimitry Andric Res = Xor(Or(Xor(A, C), B), C); 118706c3fb27SDimitry Andric break; 118806c3fb27SDimitry Andric case 0x75: 118906c3fb27SDimitry Andric if (ABCIsConst) 119006c3fb27SDimitry Andric Res = Nand(Nand(A, Not(B)), C); 119106c3fb27SDimitry Andric break; 119206c3fb27SDimitry Andric case 0x76: 119306c3fb27SDimitry Andric if (ABCIsConst) 119406c3fb27SDimitry Andric Res = Xor(B, Or(Nor(B, Not(A)), C)); 119506c3fb27SDimitry Andric break; 119606c3fb27SDimitry Andric case 0x77: 119706c3fb27SDimitry Andric if (BCIsConst) 119806c3fb27SDimitry Andric Res = Nand(B, C); 119906c3fb27SDimitry Andric break; 120006c3fb27SDimitry Andric case 0x78: 120106c3fb27SDimitry Andric if (ABCIsConst) 120206c3fb27SDimitry Andric Res = Xor(A, And(B, C)); 120306c3fb27SDimitry Andric break; 120406c3fb27SDimitry Andric case 0x79: 120506c3fb27SDimitry Andric if (ABCIsConst) 120606c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Nor(B, C)), C); 120706c3fb27SDimitry Andric break; 120806c3fb27SDimitry Andric case 0x7a: 120906c3fb27SDimitry Andric if (ABCIsConst) 121006c3fb27SDimitry Andric Res = Or(Xor(A, C), Nor(B, Not(A))); 121106c3fb27SDimitry Andric break; 121206c3fb27SDimitry Andric case 0x7b: 121306c3fb27SDimitry Andric if (ABCIsConst) 121406c3fb27SDimitry Andric Res = Nand(Xnor(A, C), B); 121506c3fb27SDimitry Andric break; 121606c3fb27SDimitry Andric case 0x7c: 121706c3fb27SDimitry Andric if (ABCIsConst) 121806c3fb27SDimitry Andric Res = Or(Xor(A, B), Nor(C, Not(A))); 121906c3fb27SDimitry Andric break; 122006c3fb27SDimitry Andric case 0x7d: 122106c3fb27SDimitry Andric if (ABCIsConst) 122206c3fb27SDimitry Andric Res = Nand(Xnor(A, B), C); 122306c3fb27SDimitry Andric break; 122406c3fb27SDimitry Andric case 0x7e: 122506c3fb27SDimitry Andric if (ABCIsConst) 122606c3fb27SDimitry Andric Res = Or(Xor(A, B), Xor(A, C)); 122706c3fb27SDimitry Andric break; 122806c3fb27SDimitry Andric case 0x7f: 122906c3fb27SDimitry Andric if (ABCIsConst) 123006c3fb27SDimitry Andric Res = Nand(And(A, B), C); 123106c3fb27SDimitry Andric break; 123206c3fb27SDimitry Andric case 0x80: 123306c3fb27SDimitry Andric if (ABCIsConst) 123406c3fb27SDimitry Andric Res = And(And(A, B), C); 123506c3fb27SDimitry Andric break; 123606c3fb27SDimitry Andric case 0x81: 123706c3fb27SDimitry Andric if (ABCIsConst) 123806c3fb27SDimitry Andric Res = Nor(Xor(A, B), Xor(A, C)); 123906c3fb27SDimitry Andric break; 124006c3fb27SDimitry Andric case 0x82: 124106c3fb27SDimitry Andric if (ABCIsConst) 124206c3fb27SDimitry Andric Res = And(Xnor(A, B), C); 124306c3fb27SDimitry Andric break; 124406c3fb27SDimitry Andric case 0x83: 124506c3fb27SDimitry Andric if (ABCIsConst) 124606c3fb27SDimitry Andric Res = Nor(Xor(A, B), Nor(C, Not(A))); 124706c3fb27SDimitry Andric break; 124806c3fb27SDimitry Andric case 0x84: 124906c3fb27SDimitry Andric if (ABCIsConst) 125006c3fb27SDimitry Andric Res = And(Xnor(A, C), B); 125106c3fb27SDimitry Andric break; 125206c3fb27SDimitry Andric case 0x85: 125306c3fb27SDimitry Andric if (ABCIsConst) 125406c3fb27SDimitry Andric Res = Nor(Xor(A, C), Nor(B, Not(A))); 125506c3fb27SDimitry Andric break; 125606c3fb27SDimitry Andric case 0x86: 125706c3fb27SDimitry Andric if (ABCIsConst) 125806c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C); 125906c3fb27SDimitry Andric break; 126006c3fb27SDimitry Andric case 0x87: 126106c3fb27SDimitry Andric if (ABCIsConst) 126206c3fb27SDimitry Andric Res = Xor(A, Nand(B, C)); 126306c3fb27SDimitry Andric break; 126406c3fb27SDimitry Andric case 0x88: 126506c3fb27SDimitry Andric Res = And(B, C); 126606c3fb27SDimitry Andric break; 126706c3fb27SDimitry Andric case 0x89: 126806c3fb27SDimitry Andric if (ABCIsConst) 126906c3fb27SDimitry Andric Res = Xor(B, Nor(Nor(B, Not(A)), C)); 127006c3fb27SDimitry Andric break; 127106c3fb27SDimitry Andric case 0x8a: 127206c3fb27SDimitry Andric if (ABCIsConst) 127306c3fb27SDimitry Andric Res = And(Nand(A, Not(B)), C); 127406c3fb27SDimitry Andric break; 127506c3fb27SDimitry Andric case 0x8b: 127606c3fb27SDimitry Andric if (ABCIsConst) 127706c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, C), B), C); 127806c3fb27SDimitry Andric break; 127906c3fb27SDimitry Andric case 0x8c: 128006c3fb27SDimitry Andric if (ABCIsConst) 128106c3fb27SDimitry Andric Res = And(Nand(A, Not(C)), B); 128206c3fb27SDimitry Andric break; 128306c3fb27SDimitry Andric case 0x8d: 128406c3fb27SDimitry Andric if (ABCIsConst) 128506c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), C), B); 128606c3fb27SDimitry Andric break; 128706c3fb27SDimitry Andric case 0x8e: 128806c3fb27SDimitry Andric if (ABCIsConst) 128906c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), Xor(A, C)), A); 129006c3fb27SDimitry Andric break; 129106c3fb27SDimitry Andric case 0x8f: 129206c3fb27SDimitry Andric if (ABCIsConst) 129306c3fb27SDimitry Andric Res = Nand(A, Nand(B, C)); 129406c3fb27SDimitry Andric break; 129506c3fb27SDimitry Andric case 0x90: 129606c3fb27SDimitry Andric if (ABCIsConst) 129706c3fb27SDimitry Andric Res = And(A, Xnor(B, C)); 129806c3fb27SDimitry Andric break; 129906c3fb27SDimitry Andric case 0x91: 130006c3fb27SDimitry Andric if (ABCIsConst) 130106c3fb27SDimitry Andric Res = Nor(Nor(A, Not(B)), Xor(B, C)); 130206c3fb27SDimitry Andric break; 130306c3fb27SDimitry Andric case 0x92: 130406c3fb27SDimitry Andric if (ABCIsConst) 130506c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C); 130606c3fb27SDimitry Andric break; 130706c3fb27SDimitry Andric case 0x93: 130806c3fb27SDimitry Andric if (ABCIsConst) 130906c3fb27SDimitry Andric Res = Xor(Nand(A, C), B); 131006c3fb27SDimitry Andric break; 131106c3fb27SDimitry Andric case 0x94: 131206c3fb27SDimitry Andric if (ABCIsConst) 131306c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xor(Xnor(A, B), C)); 131406c3fb27SDimitry Andric break; 131506c3fb27SDimitry Andric case 0x95: 131606c3fb27SDimitry Andric if (ABCIsConst) 131706c3fb27SDimitry Andric Res = Xor(Nand(A, B), C); 131806c3fb27SDimitry Andric break; 131906c3fb27SDimitry Andric case 0x96: 132006c3fb27SDimitry Andric if (ABCIsConst) 132106c3fb27SDimitry Andric Res = Xor(Xor(A, B), C); 132206c3fb27SDimitry Andric break; 132306c3fb27SDimitry Andric case 0x97: 132406c3fb27SDimitry Andric if (ABCIsConst) 132506c3fb27SDimitry Andric Res = Xor(Xor(A, B), Or(Nor(A, B), C)); 132606c3fb27SDimitry Andric break; 132706c3fb27SDimitry Andric case 0x98: 132806c3fb27SDimitry Andric if (ABCIsConst) 132906c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xor(B, C)); 133006c3fb27SDimitry Andric break; 133106c3fb27SDimitry Andric case 0x99: 133206c3fb27SDimitry Andric if (BCIsConst) 133306c3fb27SDimitry Andric Res = Xnor(B, C); 133406c3fb27SDimitry Andric break; 133506c3fb27SDimitry Andric case 0x9a: 133606c3fb27SDimitry Andric if (ABCIsConst) 133706c3fb27SDimitry Andric Res = Xor(Nor(B, Not(A)), C); 133806c3fb27SDimitry Andric break; 133906c3fb27SDimitry Andric case 0x9b: 134006c3fb27SDimitry Andric if (ABCIsConst) 134106c3fb27SDimitry Andric Res = Or(Nor(A, B), Xnor(B, C)); 134206c3fb27SDimitry Andric break; 134306c3fb27SDimitry Andric case 0x9c: 134406c3fb27SDimitry Andric if (ABCIsConst) 134506c3fb27SDimitry Andric Res = Xor(B, Nor(C, Not(A))); 134606c3fb27SDimitry Andric break; 134706c3fb27SDimitry Andric case 0x9d: 134806c3fb27SDimitry Andric if (ABCIsConst) 134906c3fb27SDimitry Andric Res = Or(Nor(A, C), Xnor(B, C)); 135006c3fb27SDimitry Andric break; 135106c3fb27SDimitry Andric case 0x9e: 135206c3fb27SDimitry Andric if (ABCIsConst) 135306c3fb27SDimitry Andric Res = Xor(And(Xor(A, B), Nand(B, C)), C); 135406c3fb27SDimitry Andric break; 135506c3fb27SDimitry Andric case 0x9f: 135606c3fb27SDimitry Andric if (ABCIsConst) 135706c3fb27SDimitry Andric Res = Nand(A, Xor(B, C)); 135806c3fb27SDimitry Andric break; 135906c3fb27SDimitry Andric case 0xa0: 136006c3fb27SDimitry Andric Res = And(A, C); 136106c3fb27SDimitry Andric break; 136206c3fb27SDimitry Andric case 0xa1: 136306c3fb27SDimitry Andric if (ABCIsConst) 136406c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, Not(B)), C)); 136506c3fb27SDimitry Andric break; 136606c3fb27SDimitry Andric case 0xa2: 136706c3fb27SDimitry Andric if (ABCIsConst) 136806c3fb27SDimitry Andric Res = And(Or(A, Not(B)), C); 136906c3fb27SDimitry Andric break; 137006c3fb27SDimitry Andric case 0xa3: 137106c3fb27SDimitry Andric if (ABCIsConst) 137206c3fb27SDimitry Andric Res = Xor(Nor(Xor(B, C), A), C); 137306c3fb27SDimitry Andric break; 137406c3fb27SDimitry Andric case 0xa4: 137506c3fb27SDimitry Andric if (ABCIsConst) 137606c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, B), C)); 137706c3fb27SDimitry Andric break; 137806c3fb27SDimitry Andric case 0xa5: 137906c3fb27SDimitry Andric if (ACIsConst) 138006c3fb27SDimitry Andric Res = Xnor(A, C); 138106c3fb27SDimitry Andric break; 138206c3fb27SDimitry Andric case 0xa6: 138306c3fb27SDimitry Andric if (ABCIsConst) 138406c3fb27SDimitry Andric Res = Xor(Nor(A, Not(B)), C); 138506c3fb27SDimitry Andric break; 138606c3fb27SDimitry Andric case 0xa7: 138706c3fb27SDimitry Andric if (ABCIsConst) 138806c3fb27SDimitry Andric Res = Or(Nor(A, B), Xnor(A, C)); 138906c3fb27SDimitry Andric break; 139006c3fb27SDimitry Andric case 0xa8: 139106c3fb27SDimitry Andric if (ABCIsConst) 139206c3fb27SDimitry Andric Res = And(Or(A, B), C); 139306c3fb27SDimitry Andric break; 139406c3fb27SDimitry Andric case 0xa9: 139506c3fb27SDimitry Andric if (ABCIsConst) 139606c3fb27SDimitry Andric Res = Xor(Nor(A, B), C); 139706c3fb27SDimitry Andric break; 139806c3fb27SDimitry Andric case 0xaa: 139906c3fb27SDimitry Andric Res = C; 140006c3fb27SDimitry Andric break; 140106c3fb27SDimitry Andric case 0xab: 140206c3fb27SDimitry Andric if (ABCIsConst) 140306c3fb27SDimitry Andric Res = Or(Nor(A, B), C); 140406c3fb27SDimitry Andric break; 140506c3fb27SDimitry Andric case 0xac: 140606c3fb27SDimitry Andric if (ABCIsConst) 140706c3fb27SDimitry Andric Res = Xor(Nor(Xnor(B, C), A), C); 140806c3fb27SDimitry Andric break; 140906c3fb27SDimitry Andric case 0xad: 141006c3fb27SDimitry Andric if (ABCIsConst) 141106c3fb27SDimitry Andric Res = Or(Xnor(A, C), And(B, C)); 141206c3fb27SDimitry Andric break; 141306c3fb27SDimitry Andric case 0xae: 141406c3fb27SDimitry Andric if (ABCIsConst) 141506c3fb27SDimitry Andric Res = Or(Nor(A, Not(B)), C); 141606c3fb27SDimitry Andric break; 141706c3fb27SDimitry Andric case 0xaf: 141806c3fb27SDimitry Andric if (ACIsConst) 141906c3fb27SDimitry Andric Res = Or(C, Not(A)); 142006c3fb27SDimitry Andric break; 142106c3fb27SDimitry Andric case 0xb0: 142206c3fb27SDimitry Andric if (ABCIsConst) 142306c3fb27SDimitry Andric Res = And(A, Nand(B, Not(C))); 142406c3fb27SDimitry Andric break; 142506c3fb27SDimitry Andric case 0xb1: 142606c3fb27SDimitry Andric if (ABCIsConst) 142706c3fb27SDimitry Andric Res = Xor(A, Nor(Xor(A, B), C)); 142806c3fb27SDimitry Andric break; 142906c3fb27SDimitry Andric case 0xb2: 143006c3fb27SDimitry Andric if (ABCIsConst) 143106c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A); 143206c3fb27SDimitry Andric break; 143306c3fb27SDimitry Andric case 0xb3: 143406c3fb27SDimitry Andric if (ABCIsConst) 143506c3fb27SDimitry Andric Res = Nand(Nand(A, C), B); 143606c3fb27SDimitry Andric break; 143706c3fb27SDimitry Andric case 0xb4: 143806c3fb27SDimitry Andric if (ABCIsConst) 143906c3fb27SDimitry Andric Res = Xor(A, Nor(C, Not(B))); 144006c3fb27SDimitry Andric break; 144106c3fb27SDimitry Andric case 0xb5: 144206c3fb27SDimitry Andric if (ABCIsConst) 144306c3fb27SDimitry Andric Res = Or(Xnor(A, C), Nor(B, C)); 144406c3fb27SDimitry Andric break; 144506c3fb27SDimitry Andric case 0xb6: 144606c3fb27SDimitry Andric if (ABCIsConst) 144706c3fb27SDimitry Andric Res = Xor(And(Xor(A, B), Nand(A, C)), C); 144806c3fb27SDimitry Andric break; 144906c3fb27SDimitry Andric case 0xb7: 145006c3fb27SDimitry Andric if (ABCIsConst) 145106c3fb27SDimitry Andric Res = Nand(Xor(A, C), B); 145206c3fb27SDimitry Andric break; 145306c3fb27SDimitry Andric case 0xb8: 145406c3fb27SDimitry Andric if (ABCIsConst) 145506c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, C), B), C); 145606c3fb27SDimitry Andric break; 145706c3fb27SDimitry Andric case 0xb9: 145806c3fb27SDimitry Andric if (ABCIsConst) 145906c3fb27SDimitry Andric Res = Xor(Nor(And(A, C), B), C); 146006c3fb27SDimitry Andric break; 146106c3fb27SDimitry Andric case 0xba: 146206c3fb27SDimitry Andric if (ABCIsConst) 146306c3fb27SDimitry Andric Res = Or(Nor(B, Not(A)), C); 146406c3fb27SDimitry Andric break; 146506c3fb27SDimitry Andric case 0xbb: 146606c3fb27SDimitry Andric if (BCIsConst) 146706c3fb27SDimitry Andric Res = Or(C, Not(B)); 146806c3fb27SDimitry Andric break; 146906c3fb27SDimitry Andric case 0xbc: 147006c3fb27SDimitry Andric if (ABCIsConst) 147106c3fb27SDimitry Andric Res = Xor(A, And(Nand(A, C), B)); 147206c3fb27SDimitry Andric break; 147306c3fb27SDimitry Andric case 0xbd: 147406c3fb27SDimitry Andric if (ABCIsConst) 147506c3fb27SDimitry Andric Res = Or(Xor(A, B), Xnor(A, C)); 147606c3fb27SDimitry Andric break; 147706c3fb27SDimitry Andric case 0xbe: 147806c3fb27SDimitry Andric if (ABCIsConst) 147906c3fb27SDimitry Andric Res = Or(Xor(A, B), C); 148006c3fb27SDimitry Andric break; 148106c3fb27SDimitry Andric case 0xbf: 148206c3fb27SDimitry Andric if (ABCIsConst) 148306c3fb27SDimitry Andric Res = Or(Nand(A, B), C); 148406c3fb27SDimitry Andric break; 148506c3fb27SDimitry Andric case 0xc0: 148606c3fb27SDimitry Andric Res = And(A, B); 148706c3fb27SDimitry Andric break; 148806c3fb27SDimitry Andric case 0xc1: 148906c3fb27SDimitry Andric if (ABCIsConst) 149006c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, Not(C)), B)); 149106c3fb27SDimitry Andric break; 149206c3fb27SDimitry Andric case 0xc2: 149306c3fb27SDimitry Andric if (ABCIsConst) 149406c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, C), B)); 149506c3fb27SDimitry Andric break; 149606c3fb27SDimitry Andric case 0xc3: 149706c3fb27SDimitry Andric if (ABIsConst) 149806c3fb27SDimitry Andric Res = Xnor(A, B); 149906c3fb27SDimitry Andric break; 150006c3fb27SDimitry Andric case 0xc4: 150106c3fb27SDimitry Andric if (ABCIsConst) 150206c3fb27SDimitry Andric Res = And(Or(A, Not(C)), B); 150306c3fb27SDimitry Andric break; 150406c3fb27SDimitry Andric case 0xc5: 150506c3fb27SDimitry Andric if (ABCIsConst) 150606c3fb27SDimitry Andric Res = Xor(B, Nor(A, Xor(B, C))); 150706c3fb27SDimitry Andric break; 150806c3fb27SDimitry Andric case 0xc6: 150906c3fb27SDimitry Andric if (ABCIsConst) 151006c3fb27SDimitry Andric Res = Xor(Nor(A, Not(C)), B); 151106c3fb27SDimitry Andric break; 151206c3fb27SDimitry Andric case 0xc7: 151306c3fb27SDimitry Andric if (ABCIsConst) 151406c3fb27SDimitry Andric Res = Or(Xnor(A, B), Nor(A, C)); 151506c3fb27SDimitry Andric break; 151606c3fb27SDimitry Andric case 0xc8: 151706c3fb27SDimitry Andric if (ABCIsConst) 151806c3fb27SDimitry Andric Res = And(Or(A, C), B); 151906c3fb27SDimitry Andric break; 152006c3fb27SDimitry Andric case 0xc9: 152106c3fb27SDimitry Andric if (ABCIsConst) 152206c3fb27SDimitry Andric Res = Xor(Nor(A, C), B); 152306c3fb27SDimitry Andric break; 152406c3fb27SDimitry Andric case 0xca: 152506c3fb27SDimitry Andric if (ABCIsConst) 152606c3fb27SDimitry Andric Res = Xor(B, Nor(A, Xnor(B, C))); 152706c3fb27SDimitry Andric break; 152806c3fb27SDimitry Andric case 0xcb: 152906c3fb27SDimitry Andric if (ABCIsConst) 153006c3fb27SDimitry Andric Res = Or(Xnor(A, B), And(B, C)); 153106c3fb27SDimitry Andric break; 153206c3fb27SDimitry Andric case 0xcc: 153306c3fb27SDimitry Andric Res = B; 153406c3fb27SDimitry Andric break; 153506c3fb27SDimitry Andric case 0xcd: 153606c3fb27SDimitry Andric if (ABCIsConst) 153706c3fb27SDimitry Andric Res = Or(Nor(A, C), B); 153806c3fb27SDimitry Andric break; 153906c3fb27SDimitry Andric case 0xce: 154006c3fb27SDimitry Andric if (ABCIsConst) 154106c3fb27SDimitry Andric Res = Or(Nor(A, Not(C)), B); 154206c3fb27SDimitry Andric break; 154306c3fb27SDimitry Andric case 0xcf: 154406c3fb27SDimitry Andric if (ABIsConst) 154506c3fb27SDimitry Andric Res = Or(B, Not(A)); 154606c3fb27SDimitry Andric break; 154706c3fb27SDimitry Andric case 0xd0: 154806c3fb27SDimitry Andric if (ABCIsConst) 154906c3fb27SDimitry Andric Res = And(A, Or(B, Not(C))); 155006c3fb27SDimitry Andric break; 155106c3fb27SDimitry Andric case 0xd1: 155206c3fb27SDimitry Andric if (ABCIsConst) 155306c3fb27SDimitry Andric Res = Xor(A, Nor(Xor(A, C), B)); 155406c3fb27SDimitry Andric break; 155506c3fb27SDimitry Andric case 0xd2: 155606c3fb27SDimitry Andric if (ABCIsConst) 155706c3fb27SDimitry Andric Res = Xor(A, Nor(B, Not(C))); 155806c3fb27SDimitry Andric break; 155906c3fb27SDimitry Andric case 0xd3: 156006c3fb27SDimitry Andric if (ABCIsConst) 156106c3fb27SDimitry Andric Res = Or(Xnor(A, B), Nor(B, C)); 156206c3fb27SDimitry Andric break; 156306c3fb27SDimitry Andric case 0xd4: 156406c3fb27SDimitry Andric if (ABCIsConst) 156506c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A); 156606c3fb27SDimitry Andric break; 156706c3fb27SDimitry Andric case 0xd5: 156806c3fb27SDimitry Andric if (ABCIsConst) 156906c3fb27SDimitry Andric Res = Nand(Nand(A, B), C); 157006c3fb27SDimitry Andric break; 157106c3fb27SDimitry Andric case 0xd6: 157206c3fb27SDimitry Andric if (ABCIsConst) 157306c3fb27SDimitry Andric Res = Xor(Xor(A, B), Or(And(A, B), C)); 157406c3fb27SDimitry Andric break; 157506c3fb27SDimitry Andric case 0xd7: 157606c3fb27SDimitry Andric if (ABCIsConst) 157706c3fb27SDimitry Andric Res = Nand(Xor(A, B), C); 157806c3fb27SDimitry Andric break; 157906c3fb27SDimitry Andric case 0xd8: 158006c3fb27SDimitry Andric if (ABCIsConst) 158106c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), C), B); 158206c3fb27SDimitry Andric break; 158306c3fb27SDimitry Andric case 0xd9: 158406c3fb27SDimitry Andric if (ABCIsConst) 158506c3fb27SDimitry Andric Res = Or(And(A, B), Xnor(B, C)); 158606c3fb27SDimitry Andric break; 158706c3fb27SDimitry Andric case 0xda: 158806c3fb27SDimitry Andric if (ABCIsConst) 158906c3fb27SDimitry Andric Res = Xor(A, And(Nand(A, B), C)); 159006c3fb27SDimitry Andric break; 159106c3fb27SDimitry Andric case 0xdb: 159206c3fb27SDimitry Andric if (ABCIsConst) 159306c3fb27SDimitry Andric Res = Or(Xnor(A, B), Xor(A, C)); 159406c3fb27SDimitry Andric break; 159506c3fb27SDimitry Andric case 0xdc: 159606c3fb27SDimitry Andric if (ABCIsConst) 159706c3fb27SDimitry Andric Res = Or(B, Nor(C, Not(A))); 159806c3fb27SDimitry Andric break; 159906c3fb27SDimitry Andric case 0xdd: 160006c3fb27SDimitry Andric if (BCIsConst) 160106c3fb27SDimitry Andric Res = Or(B, Not(C)); 160206c3fb27SDimitry Andric break; 160306c3fb27SDimitry Andric case 0xde: 160406c3fb27SDimitry Andric if (ABCIsConst) 160506c3fb27SDimitry Andric Res = Or(Xor(A, C), B); 160606c3fb27SDimitry Andric break; 160706c3fb27SDimitry Andric case 0xdf: 160806c3fb27SDimitry Andric if (ABCIsConst) 160906c3fb27SDimitry Andric Res = Or(Nand(A, C), B); 161006c3fb27SDimitry Andric break; 161106c3fb27SDimitry Andric case 0xe0: 161206c3fb27SDimitry Andric if (ABCIsConst) 161306c3fb27SDimitry Andric Res = And(A, Or(B, C)); 161406c3fb27SDimitry Andric break; 161506c3fb27SDimitry Andric case 0xe1: 161606c3fb27SDimitry Andric if (ABCIsConst) 161706c3fb27SDimitry Andric Res = Xor(A, Nor(B, C)); 161806c3fb27SDimitry Andric break; 161906c3fb27SDimitry Andric case 0xe2: 162006c3fb27SDimitry Andric if (ABCIsConst) 162106c3fb27SDimitry Andric Res = Xor(A, Nor(Xnor(A, C), B)); 162206c3fb27SDimitry Andric break; 162306c3fb27SDimitry Andric case 0xe3: 162406c3fb27SDimitry Andric if (ABCIsConst) 162506c3fb27SDimitry Andric Res = Xor(A, Nor(And(A, C), B)); 162606c3fb27SDimitry Andric break; 162706c3fb27SDimitry Andric case 0xe4: 162806c3fb27SDimitry Andric if (ABCIsConst) 162906c3fb27SDimitry Andric Res = Xor(A, Nor(Xnor(A, B), C)); 163006c3fb27SDimitry Andric break; 163106c3fb27SDimitry Andric case 0xe5: 163206c3fb27SDimitry Andric if (ABCIsConst) 163306c3fb27SDimitry Andric Res = Xor(A, Nor(And(A, B), C)); 163406c3fb27SDimitry Andric break; 163506c3fb27SDimitry Andric case 0xe6: 163606c3fb27SDimitry Andric if (ABCIsConst) 163706c3fb27SDimitry Andric Res = Or(And(A, B), Xor(B, C)); 163806c3fb27SDimitry Andric break; 163906c3fb27SDimitry Andric case 0xe7: 164006c3fb27SDimitry Andric if (ABCIsConst) 164106c3fb27SDimitry Andric Res = Or(Xnor(A, B), Xnor(A, C)); 164206c3fb27SDimitry Andric break; 164306c3fb27SDimitry Andric case 0xe8: 164406c3fb27SDimitry Andric if (ABCIsConst) 164506c3fb27SDimitry Andric Res = Xor(Or(A, B), Nor(Xnor(A, B), C)); 164606c3fb27SDimitry Andric break; 164706c3fb27SDimitry Andric case 0xe9: 164806c3fb27SDimitry Andric if (ABCIsConst) 164906c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nand(Nand(A, B), C)); 165006c3fb27SDimitry Andric break; 165106c3fb27SDimitry Andric case 0xea: 165206c3fb27SDimitry Andric if (ABCIsConst) 165306c3fb27SDimitry Andric Res = Or(And(A, B), C); 165406c3fb27SDimitry Andric break; 165506c3fb27SDimitry Andric case 0xeb: 165606c3fb27SDimitry Andric if (ABCIsConst) 165706c3fb27SDimitry Andric Res = Or(Xnor(A, B), C); 165806c3fb27SDimitry Andric break; 165906c3fb27SDimitry Andric case 0xec: 166006c3fb27SDimitry Andric if (ABCIsConst) 166106c3fb27SDimitry Andric Res = Or(And(A, C), B); 166206c3fb27SDimitry Andric break; 166306c3fb27SDimitry Andric case 0xed: 166406c3fb27SDimitry Andric if (ABCIsConst) 166506c3fb27SDimitry Andric Res = Or(Xnor(A, C), B); 166606c3fb27SDimitry Andric break; 166706c3fb27SDimitry Andric case 0xee: 166806c3fb27SDimitry Andric Res = Or(B, C); 166906c3fb27SDimitry Andric break; 167006c3fb27SDimitry Andric case 0xef: 167106c3fb27SDimitry Andric if (ABCIsConst) 167206c3fb27SDimitry Andric Res = Nand(A, Nor(B, C)); 167306c3fb27SDimitry Andric break; 167406c3fb27SDimitry Andric case 0xf0: 167506c3fb27SDimitry Andric Res = A; 167606c3fb27SDimitry Andric break; 167706c3fb27SDimitry Andric case 0xf1: 167806c3fb27SDimitry Andric if (ABCIsConst) 167906c3fb27SDimitry Andric Res = Or(A, Nor(B, C)); 168006c3fb27SDimitry Andric break; 168106c3fb27SDimitry Andric case 0xf2: 168206c3fb27SDimitry Andric if (ABCIsConst) 168306c3fb27SDimitry Andric Res = Or(A, Nor(B, Not(C))); 168406c3fb27SDimitry Andric break; 168506c3fb27SDimitry Andric case 0xf3: 168606c3fb27SDimitry Andric if (ABIsConst) 168706c3fb27SDimitry Andric Res = Or(A, Not(B)); 168806c3fb27SDimitry Andric break; 168906c3fb27SDimitry Andric case 0xf4: 169006c3fb27SDimitry Andric if (ABCIsConst) 169106c3fb27SDimitry Andric Res = Or(A, Nor(C, Not(B))); 169206c3fb27SDimitry Andric break; 169306c3fb27SDimitry Andric case 0xf5: 169406c3fb27SDimitry Andric if (ACIsConst) 169506c3fb27SDimitry Andric Res = Or(A, Not(C)); 169606c3fb27SDimitry Andric break; 169706c3fb27SDimitry Andric case 0xf6: 169806c3fb27SDimitry Andric if (ABCIsConst) 169906c3fb27SDimitry Andric Res = Or(A, Xor(B, C)); 170006c3fb27SDimitry Andric break; 170106c3fb27SDimitry Andric case 0xf7: 170206c3fb27SDimitry Andric if (ABCIsConst) 170306c3fb27SDimitry Andric Res = Or(A, Nand(B, C)); 170406c3fb27SDimitry Andric break; 170506c3fb27SDimitry Andric case 0xf8: 170606c3fb27SDimitry Andric if (ABCIsConst) 170706c3fb27SDimitry Andric Res = Or(A, And(B, C)); 170806c3fb27SDimitry Andric break; 170906c3fb27SDimitry Andric case 0xf9: 171006c3fb27SDimitry Andric if (ABCIsConst) 171106c3fb27SDimitry Andric Res = Or(A, Xnor(B, C)); 171206c3fb27SDimitry Andric break; 171306c3fb27SDimitry Andric case 0xfa: 171406c3fb27SDimitry Andric Res = Or(A, C); 171506c3fb27SDimitry Andric break; 171606c3fb27SDimitry Andric case 0xfb: 171706c3fb27SDimitry Andric if (ABCIsConst) 171806c3fb27SDimitry Andric Res = Nand(Nor(A, C), B); 171906c3fb27SDimitry Andric break; 172006c3fb27SDimitry Andric case 0xfc: 172106c3fb27SDimitry Andric Res = Or(A, B); 172206c3fb27SDimitry Andric break; 172306c3fb27SDimitry Andric case 0xfd: 172406c3fb27SDimitry Andric if (ABCIsConst) 172506c3fb27SDimitry Andric Res = Nand(Nor(A, B), C); 172606c3fb27SDimitry Andric break; 172706c3fb27SDimitry Andric case 0xfe: 172806c3fb27SDimitry Andric if (ABCIsConst) 172906c3fb27SDimitry Andric Res = Or(Or(A, B), C); 173006c3fb27SDimitry Andric break; 173106c3fb27SDimitry Andric case 0xff: 173206c3fb27SDimitry Andric Res = {Constant::getAllOnesValue(Ty), 0xff}; 173306c3fb27SDimitry Andric break; 173406c3fb27SDimitry Andric } 173506c3fb27SDimitry Andric 173606c3fb27SDimitry Andric assert((Res.first == nullptr || Res.second == Imm) && 173706c3fb27SDimitry Andric "Simplification of ternary logic does not verify!"); 173806c3fb27SDimitry Andric return Res.first; 173906c3fb27SDimitry Andric } 174006c3fb27SDimitry Andric 1741e8d8bef9SDimitry Andric static Value *simplifyX86insertps(const IntrinsicInst &II, 1742e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1743e8d8bef9SDimitry Andric auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2)); 1744e8d8bef9SDimitry Andric if (!CInt) 1745e8d8bef9SDimitry Andric return nullptr; 1746e8d8bef9SDimitry Andric 1747e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1748e8d8bef9SDimitry Andric assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); 1749e8d8bef9SDimitry Andric 1750e8d8bef9SDimitry Andric // The immediate permute control byte looks like this: 1751e8d8bef9SDimitry Andric // [3:0] - zero mask for each 32-bit lane 1752e8d8bef9SDimitry Andric // [5:4] - select one 32-bit destination lane 1753e8d8bef9SDimitry Andric // [7:6] - select one 32-bit source lane 1754e8d8bef9SDimitry Andric 1755e8d8bef9SDimitry Andric uint8_t Imm = CInt->getZExtValue(); 1756e8d8bef9SDimitry Andric uint8_t ZMask = Imm & 0xf; 1757e8d8bef9SDimitry Andric uint8_t DestLane = (Imm >> 4) & 0x3; 1758e8d8bef9SDimitry Andric uint8_t SourceLane = (Imm >> 6) & 0x3; 1759e8d8bef9SDimitry Andric 1760e8d8bef9SDimitry Andric ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); 1761e8d8bef9SDimitry Andric 1762e8d8bef9SDimitry Andric // If all zero mask bits are set, this was just a weird way to 1763e8d8bef9SDimitry Andric // generate a zero vector. 1764e8d8bef9SDimitry Andric if (ZMask == 0xf) 1765e8d8bef9SDimitry Andric return ZeroVector; 1766e8d8bef9SDimitry Andric 1767e8d8bef9SDimitry Andric // Initialize by passing all of the first source bits through. 1768e8d8bef9SDimitry Andric int ShuffleMask[4] = {0, 1, 2, 3}; 1769e8d8bef9SDimitry Andric 1770e8d8bef9SDimitry Andric // We may replace the second operand with the zero vector. 1771e8d8bef9SDimitry Andric Value *V1 = II.getArgOperand(1); 1772e8d8bef9SDimitry Andric 1773e8d8bef9SDimitry Andric if (ZMask) { 1774e8d8bef9SDimitry Andric // If the zero mask is being used with a single input or the zero mask 1775e8d8bef9SDimitry Andric // overrides the destination lane, this is a shuffle with the zero vector. 1776e8d8bef9SDimitry Andric if ((II.getArgOperand(0) == II.getArgOperand(1)) || 1777e8d8bef9SDimitry Andric (ZMask & (1 << DestLane))) { 1778e8d8bef9SDimitry Andric V1 = ZeroVector; 1779e8d8bef9SDimitry Andric // We may still move 32-bits of the first source vector from one lane 1780e8d8bef9SDimitry Andric // to another. 1781e8d8bef9SDimitry Andric ShuffleMask[DestLane] = SourceLane; 1782e8d8bef9SDimitry Andric // The zero mask may override the previous insert operation. 1783e8d8bef9SDimitry Andric for (unsigned i = 0; i < 4; ++i) 1784e8d8bef9SDimitry Andric if ((ZMask >> i) & 0x1) 1785e8d8bef9SDimitry Andric ShuffleMask[i] = i + 4; 1786e8d8bef9SDimitry Andric } else { 1787e8d8bef9SDimitry Andric // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle? 1788e8d8bef9SDimitry Andric return nullptr; 1789e8d8bef9SDimitry Andric } 1790e8d8bef9SDimitry Andric } else { 1791e8d8bef9SDimitry Andric // Replace the selected destination lane with the selected source lane. 1792e8d8bef9SDimitry Andric ShuffleMask[DestLane] = SourceLane + 4; 1793e8d8bef9SDimitry Andric } 1794e8d8bef9SDimitry Andric 1795e8d8bef9SDimitry Andric return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask); 1796e8d8bef9SDimitry Andric } 1797e8d8bef9SDimitry Andric 1798e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding 1799e8d8bef9SDimitry Andric /// or conversion to a shuffle vector. 1800e8d8bef9SDimitry Andric static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, 1801e8d8bef9SDimitry Andric ConstantInt *CILength, ConstantInt *CIIndex, 1802e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1803e8d8bef9SDimitry Andric auto LowConstantHighUndef = [&](uint64_t Val) { 1804e8d8bef9SDimitry Andric Type *IntTy64 = Type::getInt64Ty(II.getContext()); 1805e8d8bef9SDimitry Andric Constant *Args[] = {ConstantInt::get(IntTy64, Val), 1806e8d8bef9SDimitry Andric UndefValue::get(IntTy64)}; 1807e8d8bef9SDimitry Andric return ConstantVector::get(Args); 1808e8d8bef9SDimitry Andric }; 1809e8d8bef9SDimitry Andric 1810e8d8bef9SDimitry Andric // See if we're dealing with constant values. 1811fe6060f1SDimitry Andric auto *C0 = dyn_cast<Constant>(Op0); 1812fe6060f1SDimitry Andric auto *CI0 = 1813e8d8bef9SDimitry Andric C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 1814e8d8bef9SDimitry Andric : nullptr; 1815e8d8bef9SDimitry Andric 1816e8d8bef9SDimitry Andric // Attempt to constant fold. 1817e8d8bef9SDimitry Andric if (CILength && CIIndex) { 1818e8d8bef9SDimitry Andric // From AMD documentation: "The bit index and field length are each six 1819e8d8bef9SDimitry Andric // bits in length other bits of the field are ignored." 1820e8d8bef9SDimitry Andric APInt APIndex = CIIndex->getValue().zextOrTrunc(6); 1821e8d8bef9SDimitry Andric APInt APLength = CILength->getValue().zextOrTrunc(6); 1822e8d8bef9SDimitry Andric 1823e8d8bef9SDimitry Andric unsigned Index = APIndex.getZExtValue(); 1824e8d8bef9SDimitry Andric 1825e8d8bef9SDimitry Andric // From AMD documentation: "a value of zero in the field length is 1826e8d8bef9SDimitry Andric // defined as length of 64". 1827e8d8bef9SDimitry Andric unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 1828e8d8bef9SDimitry Andric 1829e8d8bef9SDimitry Andric // From AMD documentation: "If the sum of the bit index + length field 1830e8d8bef9SDimitry Andric // is greater than 64, the results are undefined". 1831e8d8bef9SDimitry Andric unsigned End = Index + Length; 1832e8d8bef9SDimitry Andric 1833e8d8bef9SDimitry Andric // Note that both field index and field length are 8-bit quantities. 1834e8d8bef9SDimitry Andric // Since variables 'Index' and 'Length' are unsigned values 1835e8d8bef9SDimitry Andric // obtained from zero-extending field index and field length 1836e8d8bef9SDimitry Andric // respectively, their sum should never wrap around. 1837e8d8bef9SDimitry Andric if (End > 64) 1838e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1839e8d8bef9SDimitry Andric 1840e8d8bef9SDimitry Andric // If we are inserting whole bytes, we can convert this to a shuffle. 1841e8d8bef9SDimitry Andric // Lowering can recognize EXTRQI shuffle masks. 1842e8d8bef9SDimitry Andric if ((Length % 8) == 0 && (Index % 8) == 0) { 1843e8d8bef9SDimitry Andric // Convert bit indices to byte indices. 1844e8d8bef9SDimitry Andric Length /= 8; 1845e8d8bef9SDimitry Andric Index /= 8; 1846e8d8bef9SDimitry Andric 1847e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1848e8d8bef9SDimitry Andric auto *ShufTy = FixedVectorType::get(IntTy8, 16); 1849e8d8bef9SDimitry Andric 1850e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask; 1851e8d8bef9SDimitry Andric for (int i = 0; i != (int)Length; ++i) 1852e8d8bef9SDimitry Andric ShuffleMask.push_back(i + Index); 1853e8d8bef9SDimitry Andric for (int i = Length; i != 8; ++i) 1854e8d8bef9SDimitry Andric ShuffleMask.push_back(i + 16); 1855e8d8bef9SDimitry Andric for (int i = 8; i != 16; ++i) 1856e8d8bef9SDimitry Andric ShuffleMask.push_back(-1); 1857e8d8bef9SDimitry Andric 1858e8d8bef9SDimitry Andric Value *SV = Builder.CreateShuffleVector( 1859e8d8bef9SDimitry Andric Builder.CreateBitCast(Op0, ShufTy), 1860e8d8bef9SDimitry Andric ConstantAggregateZero::get(ShufTy), ShuffleMask); 1861e8d8bef9SDimitry Andric return Builder.CreateBitCast(SV, II.getType()); 1862e8d8bef9SDimitry Andric } 1863e8d8bef9SDimitry Andric 1864e8d8bef9SDimitry Andric // Constant Fold - shift Index'th bit to lowest position and mask off 1865e8d8bef9SDimitry Andric // Length bits. 1866e8d8bef9SDimitry Andric if (CI0) { 1867e8d8bef9SDimitry Andric APInt Elt = CI0->getValue(); 1868e8d8bef9SDimitry Andric Elt.lshrInPlace(Index); 1869e8d8bef9SDimitry Andric Elt = Elt.zextOrTrunc(Length); 1870e8d8bef9SDimitry Andric return LowConstantHighUndef(Elt.getZExtValue()); 1871e8d8bef9SDimitry Andric } 1872e8d8bef9SDimitry Andric 1873e8d8bef9SDimitry Andric // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI. 1874e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) { 1875e8d8bef9SDimitry Andric Value *Args[] = {Op0, CILength, CIIndex}; 1876e8d8bef9SDimitry Andric Module *M = II.getModule(); 1877e8d8bef9SDimitry Andric Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi); 1878e8d8bef9SDimitry Andric return Builder.CreateCall(F, Args); 1879e8d8bef9SDimitry Andric } 1880e8d8bef9SDimitry Andric } 1881e8d8bef9SDimitry Andric 1882e8d8bef9SDimitry Andric // Constant Fold - extraction from zero is always {zero, undef}. 1883e8d8bef9SDimitry Andric if (CI0 && CI0->isZero()) 1884e8d8bef9SDimitry Andric return LowConstantHighUndef(0); 1885e8d8bef9SDimitry Andric 1886e8d8bef9SDimitry Andric return nullptr; 1887e8d8bef9SDimitry Andric } 1888e8d8bef9SDimitry Andric 1889e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant 1890e8d8bef9SDimitry Andric /// folding or conversion to a shuffle vector. 1891e8d8bef9SDimitry Andric static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, 1892e8d8bef9SDimitry Andric APInt APLength, APInt APIndex, 1893e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1894e8d8bef9SDimitry Andric // From AMD documentation: "The bit index and field length are each six bits 1895e8d8bef9SDimitry Andric // in length other bits of the field are ignored." 1896e8d8bef9SDimitry Andric APIndex = APIndex.zextOrTrunc(6); 1897e8d8bef9SDimitry Andric APLength = APLength.zextOrTrunc(6); 1898e8d8bef9SDimitry Andric 1899e8d8bef9SDimitry Andric // Attempt to constant fold. 1900e8d8bef9SDimitry Andric unsigned Index = APIndex.getZExtValue(); 1901e8d8bef9SDimitry Andric 1902e8d8bef9SDimitry Andric // From AMD documentation: "a value of zero in the field length is 1903e8d8bef9SDimitry Andric // defined as length of 64". 1904e8d8bef9SDimitry Andric unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 1905e8d8bef9SDimitry Andric 1906e8d8bef9SDimitry Andric // From AMD documentation: "If the sum of the bit index + length field 1907e8d8bef9SDimitry Andric // is greater than 64, the results are undefined". 1908e8d8bef9SDimitry Andric unsigned End = Index + Length; 1909e8d8bef9SDimitry Andric 1910e8d8bef9SDimitry Andric // Note that both field index and field length are 8-bit quantities. 1911e8d8bef9SDimitry Andric // Since variables 'Index' and 'Length' are unsigned values 1912e8d8bef9SDimitry Andric // obtained from zero-extending field index and field length 1913e8d8bef9SDimitry Andric // respectively, their sum should never wrap around. 1914e8d8bef9SDimitry Andric if (End > 64) 1915e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1916e8d8bef9SDimitry Andric 1917e8d8bef9SDimitry Andric // If we are inserting whole bytes, we can convert this to a shuffle. 1918e8d8bef9SDimitry Andric // Lowering can recognize INSERTQI shuffle masks. 1919e8d8bef9SDimitry Andric if ((Length % 8) == 0 && (Index % 8) == 0) { 1920e8d8bef9SDimitry Andric // Convert bit indices to byte indices. 1921e8d8bef9SDimitry Andric Length /= 8; 1922e8d8bef9SDimitry Andric Index /= 8; 1923e8d8bef9SDimitry Andric 1924e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1925e8d8bef9SDimitry Andric auto *ShufTy = FixedVectorType::get(IntTy8, 16); 1926e8d8bef9SDimitry Andric 1927e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask; 1928e8d8bef9SDimitry Andric for (int i = 0; i != (int)Index; ++i) 1929e8d8bef9SDimitry Andric ShuffleMask.push_back(i); 1930e8d8bef9SDimitry Andric for (int i = 0; i != (int)Length; ++i) 1931e8d8bef9SDimitry Andric ShuffleMask.push_back(i + 16); 1932e8d8bef9SDimitry Andric for (int i = Index + Length; i != 8; ++i) 1933e8d8bef9SDimitry Andric ShuffleMask.push_back(i); 1934e8d8bef9SDimitry Andric for (int i = 8; i != 16; ++i) 1935e8d8bef9SDimitry Andric ShuffleMask.push_back(-1); 1936e8d8bef9SDimitry Andric 1937e8d8bef9SDimitry Andric Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy), 1938e8d8bef9SDimitry Andric Builder.CreateBitCast(Op1, ShufTy), 1939e8d8bef9SDimitry Andric ShuffleMask); 1940e8d8bef9SDimitry Andric return Builder.CreateBitCast(SV, II.getType()); 1941e8d8bef9SDimitry Andric } 1942e8d8bef9SDimitry Andric 1943e8d8bef9SDimitry Andric // See if we're dealing with constant values. 1944fe6060f1SDimitry Andric auto *C0 = dyn_cast<Constant>(Op0); 1945fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 1946fe6060f1SDimitry Andric auto *CI00 = 1947e8d8bef9SDimitry Andric C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 1948e8d8bef9SDimitry Andric : nullptr; 1949fe6060f1SDimitry Andric auto *CI10 = 1950e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 1951e8d8bef9SDimitry Andric : nullptr; 1952e8d8bef9SDimitry Andric 1953e8d8bef9SDimitry Andric // Constant Fold - insert bottom Length bits starting at the Index'th bit. 1954e8d8bef9SDimitry Andric if (CI00 && CI10) { 1955e8d8bef9SDimitry Andric APInt V00 = CI00->getValue(); 1956e8d8bef9SDimitry Andric APInt V10 = CI10->getValue(); 1957e8d8bef9SDimitry Andric APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index); 1958e8d8bef9SDimitry Andric V00 = V00 & ~Mask; 1959e8d8bef9SDimitry Andric V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index); 1960e8d8bef9SDimitry Andric APInt Val = V00 | V10; 1961e8d8bef9SDimitry Andric Type *IntTy64 = Type::getInt64Ty(II.getContext()); 1962e8d8bef9SDimitry Andric Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()), 1963e8d8bef9SDimitry Andric UndefValue::get(IntTy64)}; 1964e8d8bef9SDimitry Andric return ConstantVector::get(Args); 1965e8d8bef9SDimitry Andric } 1966e8d8bef9SDimitry Andric 1967e8d8bef9SDimitry Andric // If we were an INSERTQ call, we'll save demanded elements if we convert to 1968e8d8bef9SDimitry Andric // INSERTQI. 1969e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) { 1970e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1971e8d8bef9SDimitry Andric Constant *CILength = ConstantInt::get(IntTy8, Length, false); 1972e8d8bef9SDimitry Andric Constant *CIIndex = ConstantInt::get(IntTy8, Index, false); 1973e8d8bef9SDimitry Andric 1974e8d8bef9SDimitry Andric Value *Args[] = {Op0, Op1, CILength, CIIndex}; 1975e8d8bef9SDimitry Andric Module *M = II.getModule(); 1976e8d8bef9SDimitry Andric Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 1977e8d8bef9SDimitry Andric return Builder.CreateCall(F, Args); 1978e8d8bef9SDimitry Andric } 1979e8d8bef9SDimitry Andric 1980e8d8bef9SDimitry Andric return nullptr; 1981e8d8bef9SDimitry Andric } 1982e8d8bef9SDimitry Andric 1983e8d8bef9SDimitry Andric /// Attempt to convert pshufb* to shufflevector if the mask is constant. 1984e8d8bef9SDimitry Andric static Value *simplifyX86pshufb(const IntrinsicInst &II, 1985e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1986fe6060f1SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 1987e8d8bef9SDimitry Andric if (!V) 1988e8d8bef9SDimitry Andric return nullptr; 1989e8d8bef9SDimitry Andric 1990e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1991e8d8bef9SDimitry Andric unsigned NumElts = VecTy->getNumElements(); 1992e8d8bef9SDimitry Andric assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && 1993e8d8bef9SDimitry Andric "Unexpected number of elements in shuffle mask!"); 1994e8d8bef9SDimitry Andric 1995e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 1996e8d8bef9SDimitry Andric int Indexes[64]; 1997e8d8bef9SDimitry Andric 1998e8d8bef9SDimitry Andric // Each byte in the shuffle control mask forms an index to permute the 1999e8d8bef9SDimitry Andric // corresponding byte in the destination operand. 2000e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 2001e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 2002e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 2003e8d8bef9SDimitry Andric return nullptr; 2004e8d8bef9SDimitry Andric 2005e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 2006e8d8bef9SDimitry Andric Indexes[I] = -1; 2007e8d8bef9SDimitry Andric continue; 2008e8d8bef9SDimitry Andric } 2009e8d8bef9SDimitry Andric 2010e8d8bef9SDimitry Andric int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue(); 2011e8d8bef9SDimitry Andric 2012e8d8bef9SDimitry Andric // If the most significant bit (bit[7]) of each byte of the shuffle 2013e8d8bef9SDimitry Andric // control mask is set, then zero is written in the result byte. 2014e8d8bef9SDimitry Andric // The zero vector is in the right-hand side of the resulting 2015e8d8bef9SDimitry Andric // shufflevector. 2016e8d8bef9SDimitry Andric 2017e8d8bef9SDimitry Andric // The value of each index for the high 128-bit lane is the least 2018e8d8bef9SDimitry Andric // significant 4 bits of the respective shuffle control byte. 2019e8d8bef9SDimitry Andric Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0); 2020e8d8bef9SDimitry Andric Indexes[I] = Index; 2021e8d8bef9SDimitry Andric } 2022e8d8bef9SDimitry Andric 2023e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 2024e8d8bef9SDimitry Andric auto V2 = Constant::getNullValue(VecTy); 2025bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); 2026e8d8bef9SDimitry Andric } 2027e8d8bef9SDimitry Andric 2028e8d8bef9SDimitry Andric /// Attempt to convert vpermilvar* to shufflevector if the mask is constant. 2029e8d8bef9SDimitry Andric static Value *simplifyX86vpermilvar(const IntrinsicInst &II, 2030e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 2031fe6060f1SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 2032e8d8bef9SDimitry Andric if (!V) 2033e8d8bef9SDimitry Andric return nullptr; 2034e8d8bef9SDimitry Andric 2035e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 2036e8d8bef9SDimitry Andric unsigned NumElts = VecTy->getNumElements(); 2037e8d8bef9SDimitry Andric bool IsPD = VecTy->getScalarType()->isDoubleTy(); 2038e8d8bef9SDimitry Andric unsigned NumLaneElts = IsPD ? 2 : 4; 2039e8d8bef9SDimitry Andric assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2); 2040e8d8bef9SDimitry Andric 2041e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 2042e8d8bef9SDimitry Andric int Indexes[16]; 2043e8d8bef9SDimitry Andric 2044e8d8bef9SDimitry Andric // The intrinsics only read one or two bits, clear the rest. 2045e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 2046e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 2047e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 2048e8d8bef9SDimitry Andric return nullptr; 2049e8d8bef9SDimitry Andric 2050e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 2051e8d8bef9SDimitry Andric Indexes[I] = -1; 2052e8d8bef9SDimitry Andric continue; 2053e8d8bef9SDimitry Andric } 2054e8d8bef9SDimitry Andric 2055e8d8bef9SDimitry Andric APInt Index = cast<ConstantInt>(COp)->getValue(); 2056e8d8bef9SDimitry Andric Index = Index.zextOrTrunc(32).getLoBits(2); 2057e8d8bef9SDimitry Andric 2058e8d8bef9SDimitry Andric // The PD variants uses bit 1 to select per-lane element index, so 2059e8d8bef9SDimitry Andric // shift down to convert to generic shuffle mask index. 2060e8d8bef9SDimitry Andric if (IsPD) 2061e8d8bef9SDimitry Andric Index.lshrInPlace(1); 2062e8d8bef9SDimitry Andric 2063e8d8bef9SDimitry Andric // The _256 variants are a bit trickier since the mask bits always index 2064e8d8bef9SDimitry Andric // into the corresponding 128 half. In order to convert to a generic 2065e8d8bef9SDimitry Andric // shuffle, we have to make that explicit. 2066e8d8bef9SDimitry Andric Index += APInt(32, (I / NumLaneElts) * NumLaneElts); 2067e8d8bef9SDimitry Andric 2068e8d8bef9SDimitry Andric Indexes[I] = Index.getZExtValue(); 2069e8d8bef9SDimitry Andric } 2070e8d8bef9SDimitry Andric 2071e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 2072bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts)); 2073e8d8bef9SDimitry Andric } 2074e8d8bef9SDimitry Andric 2075e8d8bef9SDimitry Andric /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant. 2076e8d8bef9SDimitry Andric static Value *simplifyX86vpermv(const IntrinsicInst &II, 2077e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 2078e8d8bef9SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 2079e8d8bef9SDimitry Andric if (!V) 2080e8d8bef9SDimitry Andric return nullptr; 2081e8d8bef9SDimitry Andric 2082e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 2083e8d8bef9SDimitry Andric unsigned Size = VecTy->getNumElements(); 2084e8d8bef9SDimitry Andric assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) && 2085e8d8bef9SDimitry Andric "Unexpected shuffle mask size"); 2086e8d8bef9SDimitry Andric 2087e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 2088e8d8bef9SDimitry Andric int Indexes[64]; 2089e8d8bef9SDimitry Andric 2090e8d8bef9SDimitry Andric for (unsigned I = 0; I < Size; ++I) { 2091e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 2092e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 2093e8d8bef9SDimitry Andric return nullptr; 2094e8d8bef9SDimitry Andric 2095e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 2096e8d8bef9SDimitry Andric Indexes[I] = -1; 2097e8d8bef9SDimitry Andric continue; 2098e8d8bef9SDimitry Andric } 2099e8d8bef9SDimitry Andric 2100e8d8bef9SDimitry Andric uint32_t Index = cast<ConstantInt>(COp)->getZExtValue(); 2101e8d8bef9SDimitry Andric Index &= Size - 1; 2102e8d8bef9SDimitry Andric Indexes[I] = Index; 2103e8d8bef9SDimitry Andric } 2104e8d8bef9SDimitry Andric 2105e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 2106bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size)); 2107e8d8bef9SDimitry Andric } 2108e8d8bef9SDimitry Andric 2109*0fca6ea1SDimitry Andric /// Attempt to convert vpermi2/vpermt2 to shufflevector if the mask is constant. 2110*0fca6ea1SDimitry Andric static Value *simplifyX86vpermv3(const IntrinsicInst &II, 2111*0fca6ea1SDimitry Andric InstCombiner::BuilderTy &Builder) { 2112*0fca6ea1SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 2113*0fca6ea1SDimitry Andric if (!V) 2114*0fca6ea1SDimitry Andric return nullptr; 2115*0fca6ea1SDimitry Andric 2116*0fca6ea1SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 2117*0fca6ea1SDimitry Andric unsigned Size = VecTy->getNumElements(); 2118*0fca6ea1SDimitry Andric assert((Size == 2 || Size == 4 || Size == 8 || Size == 16 || Size == 32 || 2119*0fca6ea1SDimitry Andric Size == 64) && 2120*0fca6ea1SDimitry Andric "Unexpected shuffle mask size"); 2121*0fca6ea1SDimitry Andric 2122*0fca6ea1SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 2123*0fca6ea1SDimitry Andric int Indexes[64]; 2124*0fca6ea1SDimitry Andric 2125*0fca6ea1SDimitry Andric for (unsigned I = 0; I < Size; ++I) { 2126*0fca6ea1SDimitry Andric Constant *COp = V->getAggregateElement(I); 2127*0fca6ea1SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 2128*0fca6ea1SDimitry Andric return nullptr; 2129*0fca6ea1SDimitry Andric 2130*0fca6ea1SDimitry Andric if (isa<UndefValue>(COp)) { 2131*0fca6ea1SDimitry Andric Indexes[I] = -1; 2132*0fca6ea1SDimitry Andric continue; 2133*0fca6ea1SDimitry Andric } 2134*0fca6ea1SDimitry Andric 2135*0fca6ea1SDimitry Andric uint32_t Index = cast<ConstantInt>(COp)->getZExtValue(); 2136*0fca6ea1SDimitry Andric Index &= (2 * Size) - 1; 2137*0fca6ea1SDimitry Andric Indexes[I] = Index; 2138*0fca6ea1SDimitry Andric } 2139*0fca6ea1SDimitry Andric 2140*0fca6ea1SDimitry Andric auto V1 = II.getArgOperand(0); 2141*0fca6ea1SDimitry Andric auto V2 = II.getArgOperand(2); 2142*0fca6ea1SDimitry Andric return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, Size)); 2143*0fca6ea1SDimitry Andric } 2144*0fca6ea1SDimitry Andric 2145bdd1243dSDimitry Andric std::optional<Instruction *> 2146e8d8bef9SDimitry Andric X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 2147e8d8bef9SDimitry Andric auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width, 2148e8d8bef9SDimitry Andric unsigned DemandedWidth) { 2149e8d8bef9SDimitry Andric APInt UndefElts(Width, 0); 2150e8d8bef9SDimitry Andric APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); 2151e8d8bef9SDimitry Andric return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); 2152e8d8bef9SDimitry Andric }; 2153e8d8bef9SDimitry Andric 2154e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID(); 2155e8d8bef9SDimitry Andric switch (IID) { 2156e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bextr_32: 2157e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bextr_64: 2158e8d8bef9SDimitry Andric case Intrinsic::x86_tbm_bextri_u32: 2159e8d8bef9SDimitry Andric case Intrinsic::x86_tbm_bextri_u64: 2160e8d8bef9SDimitry Andric // If the RHS is a constant we can try some simplifications. 2161e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2162e8d8bef9SDimitry Andric uint64_t Shift = C->getZExtValue(); 2163e8d8bef9SDimitry Andric uint64_t Length = (Shift >> 8) & 0xff; 2164e8d8bef9SDimitry Andric Shift &= 0xff; 2165e8d8bef9SDimitry Andric unsigned BitWidth = II.getType()->getIntegerBitWidth(); 2166e8d8bef9SDimitry Andric // If the length is 0 or the shift is out of range, replace with zero. 2167e8d8bef9SDimitry Andric if (Length == 0 || Shift >= BitWidth) { 2168e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2169e8d8bef9SDimitry Andric } 2170e8d8bef9SDimitry Andric // If the LHS is also a constant, we can completely constant fold this. 2171e8d8bef9SDimitry Andric if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2172e8d8bef9SDimitry Andric uint64_t Result = InC->getZExtValue() >> Shift; 2173e8d8bef9SDimitry Andric if (Length > BitWidth) 2174e8d8bef9SDimitry Andric Length = BitWidth; 2175e8d8bef9SDimitry Andric Result &= maskTrailingOnes<uint64_t>(Length); 2176e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2177e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2178e8d8bef9SDimitry Andric } 2179e8d8bef9SDimitry Andric // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we 2180e8d8bef9SDimitry Andric // are only masking bits that a shift already cleared? 2181e8d8bef9SDimitry Andric } 2182e8d8bef9SDimitry Andric break; 2183e8d8bef9SDimitry Andric 2184e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bzhi_32: 2185e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bzhi_64: 2186e8d8bef9SDimitry Andric // If the RHS is a constant we can try some simplifications. 2187e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2188e8d8bef9SDimitry Andric uint64_t Index = C->getZExtValue() & 0xff; 2189e8d8bef9SDimitry Andric unsigned BitWidth = II.getType()->getIntegerBitWidth(); 2190e8d8bef9SDimitry Andric if (Index >= BitWidth) { 2191e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2192e8d8bef9SDimitry Andric } 2193e8d8bef9SDimitry Andric if (Index == 0) { 2194e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2195e8d8bef9SDimitry Andric } 2196e8d8bef9SDimitry Andric // If the LHS is also a constant, we can completely constant fold this. 2197e8d8bef9SDimitry Andric if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2198e8d8bef9SDimitry Andric uint64_t Result = InC->getZExtValue(); 2199e8d8bef9SDimitry Andric Result &= maskTrailingOnes<uint64_t>(Index); 2200e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2201e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2202e8d8bef9SDimitry Andric } 2203e8d8bef9SDimitry Andric // TODO should we convert this to an AND if the RHS is constant? 2204e8d8bef9SDimitry Andric } 2205e8d8bef9SDimitry Andric break; 2206e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pext_32: 2207e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pext_64: 2208e8d8bef9SDimitry Andric if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2209e8d8bef9SDimitry Andric if (MaskC->isNullValue()) { 2210e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2211e8d8bef9SDimitry Andric } 2212e8d8bef9SDimitry Andric if (MaskC->isAllOnesValue()) { 2213e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2214e8d8bef9SDimitry Andric } 2215e8d8bef9SDimitry Andric 221681ad6265SDimitry Andric unsigned MaskIdx, MaskLen; 221781ad6265SDimitry Andric if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { 2218e8d8bef9SDimitry Andric // any single contingous sequence of 1s anywhere in the mask simply 2219e8d8bef9SDimitry Andric // describes a subset of the input bits shifted to the appropriate 2220e8d8bef9SDimitry Andric // position. Replace with the straight forward IR. 2221e8d8bef9SDimitry Andric Value *Input = II.getArgOperand(0); 2222e8d8bef9SDimitry Andric Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); 222381ad6265SDimitry Andric Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); 222481ad6265SDimitry Andric Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt); 2225e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Shifted); 2226e8d8bef9SDimitry Andric } 2227e8d8bef9SDimitry Andric 2228e8d8bef9SDimitry Andric if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2229e8d8bef9SDimitry Andric uint64_t Src = SrcC->getZExtValue(); 2230e8d8bef9SDimitry Andric uint64_t Mask = MaskC->getZExtValue(); 2231e8d8bef9SDimitry Andric uint64_t Result = 0; 2232e8d8bef9SDimitry Andric uint64_t BitToSet = 1; 2233e8d8bef9SDimitry Andric 2234e8d8bef9SDimitry Andric while (Mask) { 2235e8d8bef9SDimitry Andric // Isolate lowest set bit. 2236e8d8bef9SDimitry Andric uint64_t BitToTest = Mask & -Mask; 2237e8d8bef9SDimitry Andric if (BitToTest & Src) 2238e8d8bef9SDimitry Andric Result |= BitToSet; 2239e8d8bef9SDimitry Andric 2240e8d8bef9SDimitry Andric BitToSet <<= 1; 2241e8d8bef9SDimitry Andric // Clear lowest set bit. 2242e8d8bef9SDimitry Andric Mask &= Mask - 1; 2243e8d8bef9SDimitry Andric } 2244e8d8bef9SDimitry Andric 2245e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2246e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2247e8d8bef9SDimitry Andric } 2248e8d8bef9SDimitry Andric } 2249e8d8bef9SDimitry Andric break; 2250e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pdep_32: 2251e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pdep_64: 2252e8d8bef9SDimitry Andric if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2253e8d8bef9SDimitry Andric if (MaskC->isNullValue()) { 2254e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2255e8d8bef9SDimitry Andric } 2256e8d8bef9SDimitry Andric if (MaskC->isAllOnesValue()) { 2257e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2258e8d8bef9SDimitry Andric } 225981ad6265SDimitry Andric 226081ad6265SDimitry Andric unsigned MaskIdx, MaskLen; 226181ad6265SDimitry Andric if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { 2262e8d8bef9SDimitry Andric // any single contingous sequence of 1s anywhere in the mask simply 2263e8d8bef9SDimitry Andric // describes a subset of the input bits shifted to the appropriate 2264e8d8bef9SDimitry Andric // position. Replace with the straight forward IR. 2265e8d8bef9SDimitry Andric Value *Input = II.getArgOperand(0); 226681ad6265SDimitry Andric Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); 226781ad6265SDimitry Andric Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt); 2268e8d8bef9SDimitry Andric Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); 2269e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Masked); 2270e8d8bef9SDimitry Andric } 2271e8d8bef9SDimitry Andric 2272e8d8bef9SDimitry Andric if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2273e8d8bef9SDimitry Andric uint64_t Src = SrcC->getZExtValue(); 2274e8d8bef9SDimitry Andric uint64_t Mask = MaskC->getZExtValue(); 2275e8d8bef9SDimitry Andric uint64_t Result = 0; 2276e8d8bef9SDimitry Andric uint64_t BitToTest = 1; 2277e8d8bef9SDimitry Andric 2278e8d8bef9SDimitry Andric while (Mask) { 2279e8d8bef9SDimitry Andric // Isolate lowest set bit. 2280e8d8bef9SDimitry Andric uint64_t BitToSet = Mask & -Mask; 2281e8d8bef9SDimitry Andric if (BitToTest & Src) 2282e8d8bef9SDimitry Andric Result |= BitToSet; 2283e8d8bef9SDimitry Andric 2284e8d8bef9SDimitry Andric BitToTest <<= 1; 2285e8d8bef9SDimitry Andric // Clear lowest set bit; 2286e8d8bef9SDimitry Andric Mask &= Mask - 1; 2287e8d8bef9SDimitry Andric } 2288e8d8bef9SDimitry Andric 2289e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2290e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2291e8d8bef9SDimitry Andric } 2292e8d8bef9SDimitry Andric } 2293e8d8bef9SDimitry Andric break; 2294e8d8bef9SDimitry Andric 2295e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvtss2si: 2296e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvtss2si64: 2297e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvttss2si: 2298e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvttss2si64: 2299e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvtsd2si: 2300e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvtsd2si64: 2301e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si: 2302e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si64: 2303e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2si32: 2304e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2si64: 2305e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2usi32: 2306e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2usi64: 2307e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2si32: 2308e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2si64: 2309e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2usi32: 2310e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2usi64: 2311e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2si: 2312e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2si64: 2313e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2usi: 2314e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2usi64: 2315e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2si: 2316e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2si64: 2317e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2usi: 2318e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2usi64: { 2319e8d8bef9SDimitry Andric // These intrinsics only demand the 0th element of their input vectors. If 2320e8d8bef9SDimitry Andric // we can simplify the input based on that, do so now. 2321e8d8bef9SDimitry Andric Value *Arg = II.getArgOperand(0); 2322e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements(); 2323e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { 2324e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2325e8d8bef9SDimitry Andric } 2326e8d8bef9SDimitry Andric break; 2327e8d8bef9SDimitry Andric } 2328e8d8bef9SDimitry Andric 2329e8d8bef9SDimitry Andric case Intrinsic::x86_mmx_pmovmskb: 2330e8d8bef9SDimitry Andric case Intrinsic::x86_sse_movmsk_ps: 2331e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_movmsk_pd: 2332e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pmovmskb_128: 2333e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_pd_256: 2334e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_ps_256: 2335e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pmovmskb: 2336e8d8bef9SDimitry Andric if (Value *V = simplifyX86movmsk(II, IC.Builder)) { 2337e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2338e8d8bef9SDimitry Andric } 2339e8d8bef9SDimitry Andric break; 2340e8d8bef9SDimitry Andric 2341e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comieq_ss: 2342e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comige_ss: 2343e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comigt_ss: 2344e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comile_ss: 2345e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comilt_ss: 2346e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comineq_ss: 2347e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomieq_ss: 2348e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomige_ss: 2349e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomigt_ss: 2350e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomile_ss: 2351e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomilt_ss: 2352e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomineq_ss: 2353e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comieq_sd: 2354e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comige_sd: 2355e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comigt_sd: 2356e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comile_sd: 2357e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comilt_sd: 2358e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comineq_sd: 2359e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomieq_sd: 2360e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomige_sd: 2361e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomigt_sd: 2362e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomile_sd: 2363e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomilt_sd: 2364e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomineq_sd: 2365e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcomi_ss: 2366e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcomi_sd: 2367e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_cmp_ss: 2368e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_cmp_sd: { 2369e8d8bef9SDimitry Andric // These intrinsics only demand the 0th element of their input vectors. If 2370e8d8bef9SDimitry Andric // we can simplify the input based on that, do so now. 2371e8d8bef9SDimitry Andric bool MadeChange = false; 2372e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2373e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2374e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements(); 2375e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) { 2376e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2377e8d8bef9SDimitry Andric MadeChange = true; 2378e8d8bef9SDimitry Andric } 2379e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { 2380e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2381e8d8bef9SDimitry Andric MadeChange = true; 2382e8d8bef9SDimitry Andric } 2383e8d8bef9SDimitry Andric if (MadeChange) { 2384e8d8bef9SDimitry Andric return &II; 2385e8d8bef9SDimitry Andric } 2386e8d8bef9SDimitry Andric break; 2387e8d8bef9SDimitry Andric } 2388e8d8bef9SDimitry Andric 2389e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_ps_512: 2390e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_ps_512: 2391e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_ps_512: 2392e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_ps_512: 2393e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_pd_512: 2394e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_pd_512: 2395e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_pd_512: 2396e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_pd_512: 2397e8d8bef9SDimitry Andric // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2398e8d8bef9SDimitry Andric // IR operations. 2399e8d8bef9SDimitry Andric if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) { 2400e8d8bef9SDimitry Andric if (R->getValue() == 4) { 2401e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2402e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2403e8d8bef9SDimitry Andric 2404e8d8bef9SDimitry Andric Value *V; 2405e8d8bef9SDimitry Andric switch (IID) { 2406e8d8bef9SDimitry Andric default: 2407e8d8bef9SDimitry Andric llvm_unreachable("Case stmts out of sync!"); 2408e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_ps_512: 2409e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_pd_512: 2410e8d8bef9SDimitry Andric V = IC.Builder.CreateFAdd(Arg0, Arg1); 2411e8d8bef9SDimitry Andric break; 2412e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_ps_512: 2413e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_pd_512: 2414e8d8bef9SDimitry Andric V = IC.Builder.CreateFSub(Arg0, Arg1); 2415e8d8bef9SDimitry Andric break; 2416e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_ps_512: 2417e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_pd_512: 2418e8d8bef9SDimitry Andric V = IC.Builder.CreateFMul(Arg0, Arg1); 2419e8d8bef9SDimitry Andric break; 2420e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_ps_512: 2421e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_pd_512: 2422e8d8bef9SDimitry Andric V = IC.Builder.CreateFDiv(Arg0, Arg1); 2423e8d8bef9SDimitry Andric break; 2424e8d8bef9SDimitry Andric } 2425e8d8bef9SDimitry Andric 2426e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2427e8d8bef9SDimitry Andric } 2428e8d8bef9SDimitry Andric } 2429e8d8bef9SDimitry Andric break; 2430e8d8bef9SDimitry Andric 2431e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 2432e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 2433e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 2434e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 2435e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 2436e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 2437e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 2438e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 2439e8d8bef9SDimitry Andric // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2440e8d8bef9SDimitry Andric // IR operations. 2441e8d8bef9SDimitry Andric if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) { 2442e8d8bef9SDimitry Andric if (R->getValue() == 4) { 2443e8d8bef9SDimitry Andric // Extract the element as scalars. 2444e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2445e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2446e8d8bef9SDimitry Andric Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0); 2447e8d8bef9SDimitry Andric Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0); 2448e8d8bef9SDimitry Andric 2449e8d8bef9SDimitry Andric Value *V; 2450e8d8bef9SDimitry Andric switch (IID) { 2451e8d8bef9SDimitry Andric default: 2452e8d8bef9SDimitry Andric llvm_unreachable("Case stmts out of sync!"); 2453e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 2454e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 2455e8d8bef9SDimitry Andric V = IC.Builder.CreateFAdd(LHS, RHS); 2456e8d8bef9SDimitry Andric break; 2457e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 2458e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 2459e8d8bef9SDimitry Andric V = IC.Builder.CreateFSub(LHS, RHS); 2460e8d8bef9SDimitry Andric break; 2461e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 2462e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 2463e8d8bef9SDimitry Andric V = IC.Builder.CreateFMul(LHS, RHS); 2464e8d8bef9SDimitry Andric break; 2465e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 2466e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 2467e8d8bef9SDimitry Andric V = IC.Builder.CreateFDiv(LHS, RHS); 2468e8d8bef9SDimitry Andric break; 2469e8d8bef9SDimitry Andric } 2470e8d8bef9SDimitry Andric 2471e8d8bef9SDimitry Andric // Handle the masking aspect of the intrinsic. 2472e8d8bef9SDimitry Andric Value *Mask = II.getArgOperand(3); 2473e8d8bef9SDimitry Andric auto *C = dyn_cast<ConstantInt>(Mask); 2474e8d8bef9SDimitry Andric // We don't need a select if we know the mask bit is a 1. 2475e8d8bef9SDimitry Andric if (!C || !C->getValue()[0]) { 2476e8d8bef9SDimitry Andric // Cast the mask to an i1 vector and then extract the lowest element. 2477e8d8bef9SDimitry Andric auto *MaskTy = FixedVectorType::get( 2478e8d8bef9SDimitry Andric IC.Builder.getInt1Ty(), 2479e8d8bef9SDimitry Andric cast<IntegerType>(Mask->getType())->getBitWidth()); 2480e8d8bef9SDimitry Andric Mask = IC.Builder.CreateBitCast(Mask, MaskTy); 2481e8d8bef9SDimitry Andric Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0); 2482e8d8bef9SDimitry Andric // Extract the lowest element from the passthru operand. 2483e8d8bef9SDimitry Andric Value *Passthru = 2484e8d8bef9SDimitry Andric IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0); 2485e8d8bef9SDimitry Andric V = IC.Builder.CreateSelect(Mask, V, Passthru); 2486e8d8bef9SDimitry Andric } 2487e8d8bef9SDimitry Andric 2488e8d8bef9SDimitry Andric // Insert the result back into the original argument 0. 2489e8d8bef9SDimitry Andric V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0); 2490e8d8bef9SDimitry Andric 2491e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2492e8d8bef9SDimitry Andric } 2493e8d8bef9SDimitry Andric } 2494e8d8bef9SDimitry Andric break; 2495e8d8bef9SDimitry Andric 2496e8d8bef9SDimitry Andric // Constant fold ashr( <A x Bi>, Ci ). 2497e8d8bef9SDimitry Andric // Constant fold lshr( <A x Bi>, Ci ). 2498e8d8bef9SDimitry Andric // Constant fold shl( <A x Bi>, Ci ). 2499e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_d: 2500e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_w: 2501e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_d: 2502e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_w: 2503e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_128: 2504e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_256: 2505e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_d_512: 2506e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_512: 2507e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_w_512: 2508e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_d: 2509e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_q: 2510e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_w: 2511e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_d: 2512e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_q: 2513e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_w: 2514e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_d_512: 2515e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_q_512: 2516e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_w_512: 2517e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_d: 2518e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_q: 2519e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_w: 2520e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_d: 2521e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_q: 2522e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_w: 2523e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_d_512: 2524e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_q_512: 2525e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_w_512: 2526e8d8bef9SDimitry Andric if (Value *V = simplifyX86immShift(II, IC.Builder)) { 2527e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2528e8d8bef9SDimitry Andric } 2529e8d8bef9SDimitry Andric break; 2530e8d8bef9SDimitry Andric 2531e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_d: 2532e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_w: 2533e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_d: 2534e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_w: 2535e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_128: 2536e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_256: 2537e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_d_512: 2538e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_512: 2539e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_w_512: 2540e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_d: 2541e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_q: 2542e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_w: 2543e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_d: 2544e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_q: 2545e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_w: 2546e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_d_512: 2547e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_q_512: 2548e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_w_512: 2549e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_d: 2550e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_q: 2551e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_w: 2552e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_d: 2553e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_q: 2554e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_w: 2555e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_d_512: 2556e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_q_512: 2557e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_w_512: { 2558e8d8bef9SDimitry Andric if (Value *V = simplifyX86immShift(II, IC.Builder)) { 2559e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2560e8d8bef9SDimitry Andric } 2561e8d8bef9SDimitry Andric 2562e8d8bef9SDimitry Andric // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector 2563e8d8bef9SDimitry Andric // operand to compute the shift amount. 2564e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2565e8d8bef9SDimitry Andric assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 && 2566e8d8bef9SDimitry Andric "Unexpected packed shift size"); 2567e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements(); 2568e8d8bef9SDimitry Andric 2569e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { 2570e8d8bef9SDimitry Andric return IC.replaceOperand(II, 1, V); 2571e8d8bef9SDimitry Andric } 2572e8d8bef9SDimitry Andric break; 2573e8d8bef9SDimitry Andric } 2574e8d8bef9SDimitry Andric 2575e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 2576e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 2577e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 2578e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 2579e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_d_512: 2580e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_q_512: 2581e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_128: 2582e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_256: 2583e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_512: 2584e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 2585e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: 2586e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_128: 2587e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_256: 2588e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_d_512: 2589e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_512: 2590e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_128: 2591e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_256: 2592e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_512: 2593e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 2594e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 2595e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 2596e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 2597e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_d_512: 2598e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_q_512: 2599e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_128: 2600e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_256: 2601e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_512: 2602e8d8bef9SDimitry Andric if (Value *V = simplifyX86varShift(II, IC.Builder)) { 2603e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2604e8d8bef9SDimitry Andric } 2605e8d8bef9SDimitry Andric break; 2606e8d8bef9SDimitry Andric 2607e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packssdw_128: 2608e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packsswb_128: 2609e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packssdw: 2610e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packsswb: 2611e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packssdw_512: 2612e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packsswb_512: 2613e8d8bef9SDimitry Andric if (Value *V = simplifyX86pack(II, IC.Builder, true)) { 2614e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2615e8d8bef9SDimitry Andric } 2616e8d8bef9SDimitry Andric break; 2617e8d8bef9SDimitry Andric 2618e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packuswb_128: 2619e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_packusdw: 2620e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packusdw: 2621e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packuswb: 2622e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packusdw_512: 2623e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packuswb_512: 2624e8d8bef9SDimitry Andric if (Value *V = simplifyX86pack(II, IC.Builder, false)) { 2625e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2626e8d8bef9SDimitry Andric } 2627e8d8bef9SDimitry Andric break; 2628e8d8bef9SDimitry Andric 2629*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmulh_w: 2630*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmulh_w: 2631*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmulh_w_512: 2632*0fca6ea1SDimitry Andric if (Value *V = simplifyX86pmulh(II, IC.Builder, true, false)) { 2633*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 2634*0fca6ea1SDimitry Andric } 2635*0fca6ea1SDimitry Andric break; 2636*0fca6ea1SDimitry Andric 2637*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmulhu_w: 2638*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmulhu_w: 2639*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmulhu_w_512: 2640*0fca6ea1SDimitry Andric if (Value *V = simplifyX86pmulh(II, IC.Builder, false, false)) { 2641*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 2642*0fca6ea1SDimitry Andric } 2643*0fca6ea1SDimitry Andric break; 2644*0fca6ea1SDimitry Andric 2645*0fca6ea1SDimitry Andric case Intrinsic::x86_ssse3_pmul_hr_sw_128: 2646*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmul_hr_sw: 2647*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmul_hr_sw_512: 2648*0fca6ea1SDimitry Andric if (Value *V = simplifyX86pmulh(II, IC.Builder, true, true)) { 2649*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 2650*0fca6ea1SDimitry Andric } 2651*0fca6ea1SDimitry Andric break; 2652*0fca6ea1SDimitry Andric 2653*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmadd_wd: 2654*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmadd_wd: 2655*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmaddw_d_512: 2656*0fca6ea1SDimitry Andric if (Value *V = simplifyX86pmadd(II, IC.Builder, true)) { 2657*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 2658*0fca6ea1SDimitry Andric } 2659*0fca6ea1SDimitry Andric break; 2660*0fca6ea1SDimitry Andric 2661*0fca6ea1SDimitry Andric case Intrinsic::x86_ssse3_pmadd_ub_sw_128: 2662*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmadd_ub_sw: 2663*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmaddubs_w_512: 2664*0fca6ea1SDimitry Andric if (Value *V = simplifyX86pmadd(II, IC.Builder, false)) { 2665*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 2666*0fca6ea1SDimitry Andric } 2667*0fca6ea1SDimitry Andric break; 2668*0fca6ea1SDimitry Andric 2669e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq: 2670e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq_256: 2671e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq_512: { 2672e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) { 2673e8d8bef9SDimitry Andric unsigned Imm = C->getZExtValue(); 2674e8d8bef9SDimitry Andric 2675e8d8bef9SDimitry Andric bool MadeChange = false; 2676e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2677e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2678e8d8bef9SDimitry Andric unsigned VWidth = 2679e8d8bef9SDimitry Andric cast<FixedVectorType>(Arg0->getType())->getNumElements(); 2680e8d8bef9SDimitry Andric 2681e8d8bef9SDimitry Andric APInt UndefElts1(VWidth, 0); 2682e8d8bef9SDimitry Andric APInt DemandedElts1 = 2683e8d8bef9SDimitry Andric APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1)); 2684e8d8bef9SDimitry Andric if (Value *V = 2685e8d8bef9SDimitry Andric IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) { 2686e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2687e8d8bef9SDimitry Andric MadeChange = true; 2688e8d8bef9SDimitry Andric } 2689e8d8bef9SDimitry Andric 2690e8d8bef9SDimitry Andric APInt UndefElts2(VWidth, 0); 2691e8d8bef9SDimitry Andric APInt DemandedElts2 = 2692e8d8bef9SDimitry Andric APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1)); 2693e8d8bef9SDimitry Andric if (Value *V = 2694e8d8bef9SDimitry Andric IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) { 2695e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2696e8d8bef9SDimitry Andric MadeChange = true; 2697e8d8bef9SDimitry Andric } 2698e8d8bef9SDimitry Andric 2699e8d8bef9SDimitry Andric // If either input elements are undef, the result is zero. 2700e8d8bef9SDimitry Andric if (DemandedElts1.isSubsetOf(UndefElts1) || 2701e8d8bef9SDimitry Andric DemandedElts2.isSubsetOf(UndefElts2)) { 2702e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2703e8d8bef9SDimitry Andric ConstantAggregateZero::get(II.getType())); 2704e8d8bef9SDimitry Andric } 2705e8d8bef9SDimitry Andric 2706e8d8bef9SDimitry Andric if (MadeChange) { 2707e8d8bef9SDimitry Andric return &II; 2708e8d8bef9SDimitry Andric } 2709e8d8bef9SDimitry Andric } 2710e8d8bef9SDimitry Andric break; 2711e8d8bef9SDimitry Andric } 2712e8d8bef9SDimitry Andric 2713e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_insertps: 2714e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertps(II, IC.Builder)) { 2715e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2716e8d8bef9SDimitry Andric } 2717e8d8bef9SDimitry Andric break; 2718e8d8bef9SDimitry Andric 2719e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrq: { 2720e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2721e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2722e8d8bef9SDimitry Andric unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2723e8d8bef9SDimitry Andric unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements(); 2724e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2725e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 2726e8d8bef9SDimitry Andric VWidth1 == 16 && "Unexpected operand sizes"); 2727e8d8bef9SDimitry Andric 2728e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2729fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 2730fe6060f1SDimitry Andric auto *CILength = 2731e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 2732e8d8bef9SDimitry Andric : nullptr; 2733fe6060f1SDimitry Andric auto *CIIndex = 2734e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 2735e8d8bef9SDimitry Andric : nullptr; 2736e8d8bef9SDimitry Andric 2737e8d8bef9SDimitry Andric // Attempt to simplify to a constant, shuffle vector or EXTRQI call. 2738e8d8bef9SDimitry Andric if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) { 2739e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2740e8d8bef9SDimitry Andric } 2741e8d8bef9SDimitry Andric 2742e8d8bef9SDimitry Andric // EXTRQ only uses the lowest 64-bits of the first 128-bit vector 2743e8d8bef9SDimitry Andric // operands and the lowest 16-bits of the second. 2744e8d8bef9SDimitry Andric bool MadeChange = false; 2745e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 2746e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2747e8d8bef9SDimitry Andric MadeChange = true; 2748e8d8bef9SDimitry Andric } 2749e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { 2750e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2751e8d8bef9SDimitry Andric MadeChange = true; 2752e8d8bef9SDimitry Andric } 2753e8d8bef9SDimitry Andric if (MadeChange) { 2754e8d8bef9SDimitry Andric return &II; 2755e8d8bef9SDimitry Andric } 2756e8d8bef9SDimitry Andric break; 2757e8d8bef9SDimitry Andric } 2758e8d8bef9SDimitry Andric 2759e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrqi: { 2760e8d8bef9SDimitry Andric // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining 2761e8d8bef9SDimitry Andric // bits of the lower 64-bits. The upper 64-bits are undefined. 2762e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2763e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2764e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 2765e8d8bef9SDimitry Andric "Unexpected operand size"); 2766e8d8bef9SDimitry Andric 2767e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2768fe6060f1SDimitry Andric auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1)); 2769fe6060f1SDimitry Andric auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2)); 2770e8d8bef9SDimitry Andric 2771e8d8bef9SDimitry Andric // Attempt to simplify to a constant or shuffle vector. 2772e8d8bef9SDimitry Andric if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) { 2773e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2774e8d8bef9SDimitry Andric } 2775e8d8bef9SDimitry Andric 2776e8d8bef9SDimitry Andric // EXTRQI only uses the lowest 64-bits of the first 128-bit vector 2777e8d8bef9SDimitry Andric // operand. 2778e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 2779e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2780e8d8bef9SDimitry Andric } 2781e8d8bef9SDimitry Andric break; 2782e8d8bef9SDimitry Andric } 2783e8d8bef9SDimitry Andric 2784e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertq: { 2785e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2786e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2787e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2788e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2789e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 2790e8d8bef9SDimitry Andric cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 && 2791e8d8bef9SDimitry Andric "Unexpected operand size"); 2792e8d8bef9SDimitry Andric 2793e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2794fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 2795fe6060f1SDimitry Andric auto *CI11 = 2796e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 2797e8d8bef9SDimitry Andric : nullptr; 2798e8d8bef9SDimitry Andric 2799e8d8bef9SDimitry Andric // Attempt to simplify to a constant, shuffle vector or INSERTQI call. 2800e8d8bef9SDimitry Andric if (CI11) { 2801e8d8bef9SDimitry Andric const APInt &V11 = CI11->getValue(); 2802e8d8bef9SDimitry Andric APInt Len = V11.zextOrTrunc(6); 2803e8d8bef9SDimitry Andric APInt Idx = V11.lshr(8).zextOrTrunc(6); 2804e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) { 2805e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2806e8d8bef9SDimitry Andric } 2807e8d8bef9SDimitry Andric } 2808e8d8bef9SDimitry Andric 2809e8d8bef9SDimitry Andric // INSERTQ only uses the lowest 64-bits of the first 128-bit vector 2810e8d8bef9SDimitry Andric // operand. 2811e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 2812e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2813e8d8bef9SDimitry Andric } 2814e8d8bef9SDimitry Andric break; 2815e8d8bef9SDimitry Andric } 2816e8d8bef9SDimitry Andric 2817e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertqi: { 2818e8d8bef9SDimitry Andric // INSERTQI: Extract lowest Length bits from lower half of second source and 2819e8d8bef9SDimitry Andric // insert over first source starting at Index bit. The upper 64-bits are 2820e8d8bef9SDimitry Andric // undefined. 2821e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2822e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2823e8d8bef9SDimitry Andric unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2824e8d8bef9SDimitry Andric unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements(); 2825e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2826e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 2827e8d8bef9SDimitry Andric VWidth1 == 2 && "Unexpected operand sizes"); 2828e8d8bef9SDimitry Andric 2829e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2830fe6060f1SDimitry Andric auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2)); 2831fe6060f1SDimitry Andric auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3)); 2832e8d8bef9SDimitry Andric 2833e8d8bef9SDimitry Andric // Attempt to simplify to a constant or shuffle vector. 2834e8d8bef9SDimitry Andric if (CILength && CIIndex) { 2835e8d8bef9SDimitry Andric APInt Len = CILength->getValue().zextOrTrunc(6); 2836e8d8bef9SDimitry Andric APInt Idx = CIIndex->getValue().zextOrTrunc(6); 2837e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) { 2838e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2839e8d8bef9SDimitry Andric } 2840e8d8bef9SDimitry Andric } 2841e8d8bef9SDimitry Andric 2842e8d8bef9SDimitry Andric // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector 2843e8d8bef9SDimitry Andric // operands. 2844e8d8bef9SDimitry Andric bool MadeChange = false; 2845e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 2846e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2847e8d8bef9SDimitry Andric MadeChange = true; 2848e8d8bef9SDimitry Andric } 2849e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { 2850e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2851e8d8bef9SDimitry Andric MadeChange = true; 2852e8d8bef9SDimitry Andric } 2853e8d8bef9SDimitry Andric if (MadeChange) { 2854e8d8bef9SDimitry Andric return &II; 2855e8d8bef9SDimitry Andric } 2856e8d8bef9SDimitry Andric break; 2857e8d8bef9SDimitry Andric } 2858e8d8bef9SDimitry Andric 2859e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_pblendvb: 2860e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_blendvps: 2861e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_blendvpd: 2862e8d8bef9SDimitry Andric case Intrinsic::x86_avx_blendv_ps_256: 2863e8d8bef9SDimitry Andric case Intrinsic::x86_avx_blendv_pd_256: 2864e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pblendvb: { 2865e8d8bef9SDimitry Andric // fold (blend A, A, Mask) -> A 2866e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2867e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2868e8d8bef9SDimitry Andric Value *Mask = II.getArgOperand(2); 2869e8d8bef9SDimitry Andric if (Op0 == Op1) { 2870e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 2871e8d8bef9SDimitry Andric } 2872e8d8bef9SDimitry Andric 2873e8d8bef9SDimitry Andric // Zero Mask - select 1st argument. 2874e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) { 2875e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 2876e8d8bef9SDimitry Andric } 2877e8d8bef9SDimitry Andric 2878e8d8bef9SDimitry Andric // Constant Mask - select 1st/2nd argument lane based on top bit of mask. 2879e8d8bef9SDimitry Andric if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) { 2880*0fca6ea1SDimitry Andric Constant *NewSelector = 2881*0fca6ea1SDimitry Andric getNegativeIsTrueBoolVec(ConstantMask, IC.getDataLayout()); 2882e8d8bef9SDimitry Andric return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); 2883e8d8bef9SDimitry Andric } 2884e8d8bef9SDimitry Andric 2885*0fca6ea1SDimitry Andric Mask = InstCombiner::peekThroughBitcast(Mask); 2886*0fca6ea1SDimitry Andric 2887*0fca6ea1SDimitry Andric // Peek through a one-use shuffle - VectorCombine should have simplified 2888*0fca6ea1SDimitry Andric // this for cases where we're splitting wider vectors to use blendv 2889*0fca6ea1SDimitry Andric // intrinsics. 2890*0fca6ea1SDimitry Andric Value *MaskSrc = nullptr; 2891*0fca6ea1SDimitry Andric ArrayRef<int> ShuffleMask; 2892*0fca6ea1SDimitry Andric if (match(Mask, m_OneUse(m_Shuffle(m_Value(MaskSrc), m_Undef(), 2893*0fca6ea1SDimitry Andric m_Mask(ShuffleMask))))) { 2894*0fca6ea1SDimitry Andric // Bail if the shuffle was irregular or contains undefs. 2895*0fca6ea1SDimitry Andric int NumElts = cast<FixedVectorType>(MaskSrc->getType())->getNumElements(); 2896*0fca6ea1SDimitry Andric if (NumElts < (int)ShuffleMask.size() || !isPowerOf2_32(NumElts) || 2897*0fca6ea1SDimitry Andric any_of(ShuffleMask, 2898*0fca6ea1SDimitry Andric [NumElts](int M) { return M < 0 || M >= NumElts; })) 2899*0fca6ea1SDimitry Andric break; 2900*0fca6ea1SDimitry Andric Mask = InstCombiner::peekThroughBitcast(MaskSrc); 2901*0fca6ea1SDimitry Andric } 2902*0fca6ea1SDimitry Andric 2903e8d8bef9SDimitry Andric // Convert to a vector select if we can bypass casts and find a boolean 2904e8d8bef9SDimitry Andric // vector condition value. 2905e8d8bef9SDimitry Andric Value *BoolVec; 2906*0fca6ea1SDimitry Andric if (match(Mask, m_SExt(m_Value(BoolVec))) && 2907e8d8bef9SDimitry Andric BoolVec->getType()->isVectorTy() && 2908e8d8bef9SDimitry Andric BoolVec->getType()->getScalarSizeInBits() == 1) { 2909*0fca6ea1SDimitry Andric auto *MaskTy = cast<FixedVectorType>(Mask->getType()); 2910*0fca6ea1SDimitry Andric auto *OpTy = cast<FixedVectorType>(II.getType()); 2911*0fca6ea1SDimitry Andric unsigned NumMaskElts = MaskTy->getNumElements(); 2912*0fca6ea1SDimitry Andric unsigned NumOperandElts = OpTy->getNumElements(); 2913*0fca6ea1SDimitry Andric 2914*0fca6ea1SDimitry Andric // If we peeked through a shuffle, reapply the shuffle to the bool vector. 2915*0fca6ea1SDimitry Andric if (MaskSrc) { 2916*0fca6ea1SDimitry Andric unsigned NumMaskSrcElts = 2917*0fca6ea1SDimitry Andric cast<FixedVectorType>(MaskSrc->getType())->getNumElements(); 2918*0fca6ea1SDimitry Andric NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts; 2919*0fca6ea1SDimitry Andric // Multiple mask bits maps to the same operand element - bail out. 2920*0fca6ea1SDimitry Andric if (NumMaskElts > NumOperandElts) 2921*0fca6ea1SDimitry Andric break; 2922*0fca6ea1SDimitry Andric SmallVector<int> ScaledMask; 2923*0fca6ea1SDimitry Andric if (!llvm::scaleShuffleMaskElts(NumMaskElts, ShuffleMask, ScaledMask)) 2924*0fca6ea1SDimitry Andric break; 2925*0fca6ea1SDimitry Andric BoolVec = IC.Builder.CreateShuffleVector(BoolVec, ScaledMask); 2926*0fca6ea1SDimitry Andric MaskTy = FixedVectorType::get(MaskTy->getElementType(), NumMaskElts); 2927*0fca6ea1SDimitry Andric } 2928*0fca6ea1SDimitry Andric assert(MaskTy->getPrimitiveSizeInBits() == 2929*0fca6ea1SDimitry Andric OpTy->getPrimitiveSizeInBits() && 2930e8d8bef9SDimitry Andric "Not expecting mask and operands with different sizes"); 2931e8d8bef9SDimitry Andric 2932e8d8bef9SDimitry Andric if (NumMaskElts == NumOperandElts) { 2933e8d8bef9SDimitry Andric return SelectInst::Create(BoolVec, Op1, Op0); 2934e8d8bef9SDimitry Andric } 2935e8d8bef9SDimitry Andric 2936e8d8bef9SDimitry Andric // If the mask has less elements than the operands, each mask bit maps to 2937e8d8bef9SDimitry Andric // multiple elements of the operands. Bitcast back and forth. 2938e8d8bef9SDimitry Andric if (NumMaskElts < NumOperandElts) { 2939*0fca6ea1SDimitry Andric Value *CastOp0 = IC.Builder.CreateBitCast(Op0, MaskTy); 2940*0fca6ea1SDimitry Andric Value *CastOp1 = IC.Builder.CreateBitCast(Op1, MaskTy); 2941e8d8bef9SDimitry Andric Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0); 2942e8d8bef9SDimitry Andric return new BitCastInst(Sel, II.getType()); 2943e8d8bef9SDimitry Andric } 2944e8d8bef9SDimitry Andric } 2945e8d8bef9SDimitry Andric 2946e8d8bef9SDimitry Andric break; 2947e8d8bef9SDimitry Andric } 2948e8d8bef9SDimitry Andric 2949e8d8bef9SDimitry Andric case Intrinsic::x86_ssse3_pshuf_b_128: 2950e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pshuf_b: 2951e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pshuf_b_512: 2952e8d8bef9SDimitry Andric if (Value *V = simplifyX86pshufb(II, IC.Builder)) { 2953e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2954e8d8bef9SDimitry Andric } 2955e8d8bef9SDimitry Andric break; 2956e8d8bef9SDimitry Andric 2957e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps: 2958e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps_256: 2959e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_ps_512: 2960e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd: 2961e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd_256: 2962e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_pd_512: 2963e8d8bef9SDimitry Andric if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) { 2964e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2965e8d8bef9SDimitry Andric } 2966e8d8bef9SDimitry Andric break; 2967e8d8bef9SDimitry Andric 2968e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permd: 2969e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permps: 2970e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_df_256: 2971e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_df_512: 2972e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_di_256: 2973e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_di_512: 2974e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_128: 2975e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_256: 2976e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_512: 2977e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_128: 2978e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_256: 2979e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_512: 2980e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_sf_512: 2981e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_si_512: 2982e8d8bef9SDimitry Andric if (Value *V = simplifyX86vpermv(II, IC.Builder)) { 2983e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2984e8d8bef9SDimitry Andric } 2985e8d8bef9SDimitry Andric break; 2986e8d8bef9SDimitry Andric 2987*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_d_128: 2988*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_d_256: 2989*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_d_512: 2990*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_hi_128: 2991*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_hi_256: 2992*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_hi_512: 2993*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_pd_128: 2994*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_pd_256: 2995*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_pd_512: 2996*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_ps_128: 2997*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_ps_256: 2998*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_ps_512: 2999*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_q_128: 3000*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_q_256: 3001*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_q_512: 3002*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_qi_128: 3003*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_qi_256: 3004*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_vpermi2var_qi_512: 3005*0fca6ea1SDimitry Andric if (Value *V = simplifyX86vpermv3(II, IC.Builder)) { 3006*0fca6ea1SDimitry Andric return IC.replaceInstUsesWith(II, V); 3007*0fca6ea1SDimitry Andric } 3008*0fca6ea1SDimitry Andric break; 3009*0fca6ea1SDimitry Andric 3010e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_ps: 3011e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_pd: 3012e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_ps_256: 3013e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_pd_256: 3014e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_d: 3015e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_q: 3016e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_d_256: 3017e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_q_256: 3018e8d8bef9SDimitry Andric if (Instruction *I = simplifyX86MaskedLoad(II, IC)) { 3019e8d8bef9SDimitry Andric return I; 3020e8d8bef9SDimitry Andric } 3021e8d8bef9SDimitry Andric break; 3022e8d8bef9SDimitry Andric 3023e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_maskmov_dqu: 3024e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_ps: 3025e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_pd: 3026e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_ps_256: 3027e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_pd_256: 3028e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_d: 3029e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_q: 3030e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_d_256: 3031e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_q_256: 3032e8d8bef9SDimitry Andric if (simplifyX86MaskedStore(II, IC)) { 3033e8d8bef9SDimitry Andric return nullptr; 3034e8d8bef9SDimitry Andric } 3035e8d8bef9SDimitry Andric break; 3036e8d8bef9SDimitry Andric 3037e8d8bef9SDimitry Andric case Intrinsic::x86_addcarry_32: 3038e8d8bef9SDimitry Andric case Intrinsic::x86_addcarry_64: 3039e8d8bef9SDimitry Andric if (Value *V = simplifyX86addcarry(II, IC.Builder)) { 3040e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 3041e8d8bef9SDimitry Andric } 3042e8d8bef9SDimitry Andric break; 3043e8d8bef9SDimitry Andric 304406c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_128: 304506c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_256: 304606c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_512: 304706c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_128: 304806c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_256: 304906c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_512: 305006c3fb27SDimitry Andric if (Value *V = simplifyTernarylogic(II, IC.Builder)) { 305106c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, V); 305206c3fb27SDimitry Andric } 305306c3fb27SDimitry Andric break; 3054e8d8bef9SDimitry Andric default: 3055e8d8bef9SDimitry Andric break; 3056e8d8bef9SDimitry Andric } 3057bdd1243dSDimitry Andric return std::nullopt; 3058e8d8bef9SDimitry Andric } 3059e8d8bef9SDimitry Andric 3060bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic( 3061e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, 3062e8d8bef9SDimitry Andric bool &KnownBitsComputed) const { 3063e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 3064e8d8bef9SDimitry Andric default: 3065e8d8bef9SDimitry Andric break; 3066e8d8bef9SDimitry Andric case Intrinsic::x86_mmx_pmovmskb: 3067e8d8bef9SDimitry Andric case Intrinsic::x86_sse_movmsk_ps: 3068e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_movmsk_pd: 3069e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pmovmskb_128: 3070e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_ps_256: 3071e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_pd_256: 3072e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pmovmskb: { 3073e8d8bef9SDimitry Andric // MOVMSK copies the vector elements' sign bits to the low bits 3074e8d8bef9SDimitry Andric // and zeros the high bits. 3075e8d8bef9SDimitry Andric unsigned ArgWidth; 3076e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) { 3077e8d8bef9SDimitry Andric ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. 3078e8d8bef9SDimitry Andric } else { 3079fe6060f1SDimitry Andric auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType()); 3080e8d8bef9SDimitry Andric ArgWidth = ArgType->getNumElements(); 3081e8d8bef9SDimitry Andric } 3082e8d8bef9SDimitry Andric 3083e8d8bef9SDimitry Andric // If we don't need any of low bits then return zero, 3084e8d8bef9SDimitry Andric // we know that DemandedMask is non-zero already. 3085e8d8bef9SDimitry Andric APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); 3086e8d8bef9SDimitry Andric Type *VTy = II.getType(); 3087349cc55cSDimitry Andric if (DemandedElts.isZero()) { 3088e8d8bef9SDimitry Andric return ConstantInt::getNullValue(VTy); 3089e8d8bef9SDimitry Andric } 3090e8d8bef9SDimitry Andric 3091e8d8bef9SDimitry Andric // We know that the upper bits are set to zero. 3092e8d8bef9SDimitry Andric Known.Zero.setBitsFrom(ArgWidth); 3093e8d8bef9SDimitry Andric KnownBitsComputed = true; 3094e8d8bef9SDimitry Andric break; 3095e8d8bef9SDimitry Andric } 3096e8d8bef9SDimitry Andric } 3097bdd1243dSDimitry Andric return std::nullopt; 3098e8d8bef9SDimitry Andric } 3099e8d8bef9SDimitry Andric 3100bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( 3101e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 3102e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 3103e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 3104e8d8bef9SDimitry Andric simplifyAndSetOp) const { 3105e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements(); 3106e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 3107e8d8bef9SDimitry Andric default: 3108e8d8bef9SDimitry Andric break; 3109e8d8bef9SDimitry Andric case Intrinsic::x86_xop_vfrcz_ss: 3110e8d8bef9SDimitry Andric case Intrinsic::x86_xop_vfrcz_sd: 3111e8d8bef9SDimitry Andric // The instructions for these intrinsics are speced to zero upper bits not 3112e8d8bef9SDimitry Andric // pass them through like other scalar intrinsics. So we shouldn't just 3113e8d8bef9SDimitry Andric // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. 3114e8d8bef9SDimitry Andric // Instead we should return a zero vector. 3115e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 3116e8d8bef9SDimitry Andric IC.addToWorklist(&II); 3117e8d8bef9SDimitry Andric return ConstantAggregateZero::get(II.getType()); 3118e8d8bef9SDimitry Andric } 3119e8d8bef9SDimitry Andric 3120e8d8bef9SDimitry Andric // Only the lower element is used. 3121e8d8bef9SDimitry Andric DemandedElts = 1; 3122e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3123e8d8bef9SDimitry Andric 3124e8d8bef9SDimitry Andric // Only the lower element is undefined. The high elements are zero. 3125e8d8bef9SDimitry Andric UndefElts = UndefElts[0]; 3126e8d8bef9SDimitry Andric break; 3127e8d8bef9SDimitry Andric 3128e8d8bef9SDimitry Andric // Unary scalar-as-vector operations that work column-wise. 3129e8d8bef9SDimitry Andric case Intrinsic::x86_sse_rcp_ss: 3130e8d8bef9SDimitry Andric case Intrinsic::x86_sse_rsqrt_ss: 3131e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3132e8d8bef9SDimitry Andric 3133e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 3134e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 3135e8d8bef9SDimitry Andric IC.addToWorklist(&II); 3136e8d8bef9SDimitry Andric return II.getArgOperand(0); 3137e8d8bef9SDimitry Andric } 3138e8d8bef9SDimitry Andric // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions 3139e8d8bef9SDimitry Andric // checks). 3140e8d8bef9SDimitry Andric break; 3141e8d8bef9SDimitry Andric 3142e8d8bef9SDimitry Andric // Binary scalar-as-vector operations that work column-wise. The high 3143e8d8bef9SDimitry Andric // elements come from operand 0. The low element is a function of both 3144e8d8bef9SDimitry Andric // operands. 3145e8d8bef9SDimitry Andric case Intrinsic::x86_sse_min_ss: 3146e8d8bef9SDimitry Andric case Intrinsic::x86_sse_max_ss: 3147e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cmp_ss: 3148e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_min_sd: 3149e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_max_sd: 3150e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cmp_sd: { 3151e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3152e8d8bef9SDimitry Andric 3153e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 3154e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 3155e8d8bef9SDimitry Andric IC.addToWorklist(&II); 3156e8d8bef9SDimitry Andric return II.getArgOperand(0); 3157e8d8bef9SDimitry Andric } 3158e8d8bef9SDimitry Andric 3159e8d8bef9SDimitry Andric // Only lower element is used for operand 1. 3160e8d8bef9SDimitry Andric DemandedElts = 1; 3161e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3162e8d8bef9SDimitry Andric 3163e8d8bef9SDimitry Andric // Lower element is undefined if both lower elements are undefined. 3164e8d8bef9SDimitry Andric // Consider things like undef&0. The result is known zero, not undef. 3165e8d8bef9SDimitry Andric if (!UndefElts2[0]) 3166e8d8bef9SDimitry Andric UndefElts.clearBit(0); 3167e8d8bef9SDimitry Andric 3168e8d8bef9SDimitry Andric break; 3169e8d8bef9SDimitry Andric } 3170e8d8bef9SDimitry Andric 3171e8d8bef9SDimitry Andric // Binary scalar-as-vector operations that work column-wise. The high 3172e8d8bef9SDimitry Andric // elements come from operand 0 and the low element comes from operand 1. 3173e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_round_ss: 3174e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_round_sd: { 3175e8d8bef9SDimitry Andric // Don't use the low element of operand 0. 3176e8d8bef9SDimitry Andric APInt DemandedElts2 = DemandedElts; 3177e8d8bef9SDimitry Andric DemandedElts2.clearBit(0); 3178e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts); 3179e8d8bef9SDimitry Andric 3180e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 3181e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 3182e8d8bef9SDimitry Andric IC.addToWorklist(&II); 3183e8d8bef9SDimitry Andric return II.getArgOperand(0); 3184e8d8bef9SDimitry Andric } 3185e8d8bef9SDimitry Andric 3186e8d8bef9SDimitry Andric // Only lower element is used for operand 1. 3187e8d8bef9SDimitry Andric DemandedElts = 1; 3188e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3189e8d8bef9SDimitry Andric 3190e8d8bef9SDimitry Andric // Take the high undef elements from operand 0 and take the lower element 3191e8d8bef9SDimitry Andric // from operand 1. 3192e8d8bef9SDimitry Andric UndefElts.clearBit(0); 3193e8d8bef9SDimitry Andric UndefElts |= UndefElts2[0]; 3194e8d8bef9SDimitry Andric break; 3195e8d8bef9SDimitry Andric } 3196e8d8bef9SDimitry Andric 3197e8d8bef9SDimitry Andric // Three input scalar-as-vector operations that work column-wise. The high 3198e8d8bef9SDimitry Andric // elements come from operand 0 and the low element is a function of all 3199e8d8bef9SDimitry Andric // three inputs. 3200e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 3201e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 3202e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 3203e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 3204e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_max_ss_round: 3205e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_min_ss_round: 3206e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 3207e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 3208e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 3209e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 3210e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_max_sd_round: 3211e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_min_sd_round: 3212e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3213e8d8bef9SDimitry Andric 3214e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 3215e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 3216e8d8bef9SDimitry Andric IC.addToWorklist(&II); 3217e8d8bef9SDimitry Andric return II.getArgOperand(0); 3218e8d8bef9SDimitry Andric } 3219e8d8bef9SDimitry Andric 3220e8d8bef9SDimitry Andric // Only lower element is used for operand 1 and 2. 3221e8d8bef9SDimitry Andric DemandedElts = 1; 3222e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3223e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3); 3224e8d8bef9SDimitry Andric 3225e8d8bef9SDimitry Andric // Lower element is undefined if all three lower elements are undefined. 3226e8d8bef9SDimitry Andric // Consider things like undef&0. The result is known zero, not undef. 3227e8d8bef9SDimitry Andric if (!UndefElts2[0] || !UndefElts3[0]) 3228e8d8bef9SDimitry Andric UndefElts.clearBit(0); 3229e8d8bef9SDimitry Andric break; 3230e8d8bef9SDimitry Andric 3231e8d8bef9SDimitry Andric // TODO: Add fmaddsub support? 3232e8d8bef9SDimitry Andric case Intrinsic::x86_sse3_addsub_pd: 3233e8d8bef9SDimitry Andric case Intrinsic::x86_sse3_addsub_ps: 3234e8d8bef9SDimitry Andric case Intrinsic::x86_avx_addsub_pd_256: 3235e8d8bef9SDimitry Andric case Intrinsic::x86_avx_addsub_ps_256: { 3236e8d8bef9SDimitry Andric // If none of the even or none of the odd lanes are required, turn this 3237e8d8bef9SDimitry Andric // into a generic FP math instruction. 3238e8d8bef9SDimitry Andric APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1)); 3239e8d8bef9SDimitry Andric APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2)); 3240e8d8bef9SDimitry Andric bool IsSubOnly = DemandedElts.isSubsetOf(SubMask); 3241e8d8bef9SDimitry Andric bool IsAddOnly = DemandedElts.isSubsetOf(AddMask); 3242e8d8bef9SDimitry Andric if (IsSubOnly || IsAddOnly) { 3243e8d8bef9SDimitry Andric assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only"); 3244e8d8bef9SDimitry Andric IRBuilderBase::InsertPointGuard Guard(IC.Builder); 3245e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(&II); 3246e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1); 3247e8d8bef9SDimitry Andric return IC.Builder.CreateBinOp( 3248e8d8bef9SDimitry Andric IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1); 3249e8d8bef9SDimitry Andric } 3250e8d8bef9SDimitry Andric 3251e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3252e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3253e8d8bef9SDimitry Andric UndefElts &= UndefElts2; 3254e8d8bef9SDimitry Andric break; 3255e8d8bef9SDimitry Andric } 3256e8d8bef9SDimitry Andric 325781ad6265SDimitry Andric // General per-element vector operations. 325881ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 325981ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 326081ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 326181ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 326281ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 326381ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 326481ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 326581ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 326681ad6265SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 326781ad6265SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: { 326881ad6265SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 326981ad6265SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 327081ad6265SDimitry Andric UndefElts &= UndefElts2; 327181ad6265SDimitry Andric break; 327281ad6265SDimitry Andric } 327381ad6265SDimitry Andric 3274*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmulh_w: 3275*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmulh_w: 3276*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmulh_w_512: 3277*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmulhu_w: 3278*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmulhu_w: 3279*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmulhu_w_512: 3280*0fca6ea1SDimitry Andric case Intrinsic::x86_ssse3_pmul_hr_sw_128: 3281*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmul_hr_sw: 3282*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmul_hr_sw_512: { 3283*0fca6ea1SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3284*0fca6ea1SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3285*0fca6ea1SDimitry Andric // NOTE: mulh(undef,undef) != undef. 3286*0fca6ea1SDimitry Andric break; 3287*0fca6ea1SDimitry Andric } 3288*0fca6ea1SDimitry Andric 3289e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packssdw_128: 3290e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packsswb_128: 3291e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packuswb_128: 3292e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_packusdw: 3293e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packssdw: 3294e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packsswb: 3295e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packusdw: 3296e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packuswb: 3297e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packssdw_512: 3298e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packsswb_512: 3299e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packusdw_512: 3300e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packuswb_512: { 3301e8d8bef9SDimitry Andric auto *Ty0 = II.getArgOperand(0)->getType(); 3302e8d8bef9SDimitry Andric unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements(); 3303e8d8bef9SDimitry Andric assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); 3304e8d8bef9SDimitry Andric 3305e8d8bef9SDimitry Andric unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; 3306e8d8bef9SDimitry Andric unsigned VWidthPerLane = VWidth / NumLanes; 3307e8d8bef9SDimitry Andric unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; 3308e8d8bef9SDimitry Andric 3309e8d8bef9SDimitry Andric // Per lane, pack the elements of the first input and then the second. 3310e8d8bef9SDimitry Andric // e.g. 3311e8d8bef9SDimitry Andric // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) 3312e8d8bef9SDimitry Andric // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) 3313e8d8bef9SDimitry Andric for (int OpNum = 0; OpNum != 2; ++OpNum) { 3314e8d8bef9SDimitry Andric APInt OpDemandedElts(InnerVWidth, 0); 3315e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 3316e8d8bef9SDimitry Andric unsigned LaneIdx = Lane * VWidthPerLane; 3317e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { 3318e8d8bef9SDimitry Andric unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; 3319e8d8bef9SDimitry Andric if (DemandedElts[Idx]) 3320e8d8bef9SDimitry Andric OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); 3321e8d8bef9SDimitry Andric } 3322e8d8bef9SDimitry Andric } 3323e8d8bef9SDimitry Andric 3324e8d8bef9SDimitry Andric // Demand elements from the operand. 3325e8d8bef9SDimitry Andric APInt OpUndefElts(InnerVWidth, 0); 3326e8d8bef9SDimitry Andric simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts); 3327e8d8bef9SDimitry Andric 3328e8d8bef9SDimitry Andric // Pack the operand's UNDEF elements, one lane at a time. 3329e8d8bef9SDimitry Andric OpUndefElts = OpUndefElts.zext(VWidth); 3330e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 3331e8d8bef9SDimitry Andric APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); 3332e8d8bef9SDimitry Andric LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); 3333e8d8bef9SDimitry Andric LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); 3334e8d8bef9SDimitry Andric UndefElts |= LaneElts; 3335e8d8bef9SDimitry Andric } 3336e8d8bef9SDimitry Andric } 3337e8d8bef9SDimitry Andric break; 3338e8d8bef9SDimitry Andric } 3339e8d8bef9SDimitry Andric 3340*0fca6ea1SDimitry Andric case Intrinsic::x86_sse2_pmadd_wd: 3341*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmadd_wd: 3342*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmaddw_d_512: 3343*0fca6ea1SDimitry Andric case Intrinsic::x86_ssse3_pmadd_ub_sw_128: 3344*0fca6ea1SDimitry Andric case Intrinsic::x86_avx2_pmadd_ub_sw: 3345*0fca6ea1SDimitry Andric case Intrinsic::x86_avx512_pmaddubs_w_512: { 3346*0fca6ea1SDimitry Andric // PMADD - demand both src elements that map to each dst element. 3347*0fca6ea1SDimitry Andric auto *ArgTy = II.getArgOperand(0)->getType(); 3348*0fca6ea1SDimitry Andric unsigned InnerVWidth = cast<FixedVectorType>(ArgTy)->getNumElements(); 3349*0fca6ea1SDimitry Andric assert((VWidth * 2) == InnerVWidth && "Unexpected input size"); 3350*0fca6ea1SDimitry Andric APInt OpDemandedElts = APIntOps::ScaleBitMask(DemandedElts, InnerVWidth); 3351*0fca6ea1SDimitry Andric APInt Op0UndefElts(InnerVWidth, 0); 3352*0fca6ea1SDimitry Andric APInt Op1UndefElts(InnerVWidth, 0); 3353*0fca6ea1SDimitry Andric simplifyAndSetOp(&II, 0, OpDemandedElts, Op0UndefElts); 3354*0fca6ea1SDimitry Andric simplifyAndSetOp(&II, 1, OpDemandedElts, Op1UndefElts); 3355*0fca6ea1SDimitry Andric // NOTE: madd(undef,undef) != undef. 3356*0fca6ea1SDimitry Andric break; 3357*0fca6ea1SDimitry Andric } 3358*0fca6ea1SDimitry Andric 3359e8d8bef9SDimitry Andric // PSHUFB 3360e8d8bef9SDimitry Andric case Intrinsic::x86_ssse3_pshuf_b_128: 3361e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pshuf_b: 3362e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pshuf_b_512: 3363e8d8bef9SDimitry Andric // PERMILVAR 3364e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps: 3365e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps_256: 3366e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_ps_512: 3367e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd: 3368e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd_256: 3369e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_pd_512: 3370e8d8bef9SDimitry Andric // PERMV 3371e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permd: 3372e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permps: { 3373e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts); 3374e8d8bef9SDimitry Andric break; 3375e8d8bef9SDimitry Andric } 3376e8d8bef9SDimitry Andric 3377e8d8bef9SDimitry Andric // SSE4A instructions leave the upper 64-bits of the 128-bit result 3378e8d8bef9SDimitry Andric // in an undefined state. 3379e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrq: 3380e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrqi: 3381e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertq: 3382e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertqi: 3383e8d8bef9SDimitry Andric UndefElts.setHighBits(VWidth / 2); 3384e8d8bef9SDimitry Andric break; 3385e8d8bef9SDimitry Andric } 3386bdd1243dSDimitry Andric return std::nullopt; 3387e8d8bef9SDimitry Andric } 3388