17330f729Sjoerg //===- InstCombineCasts.cpp -----------------------------------------------===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file implements the visit functions for cast operations.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg
137330f729Sjoerg #include "InstCombineInternal.h"
147330f729Sjoerg #include "llvm/ADT/SetVector.h"
157330f729Sjoerg #include "llvm/Analysis/ConstantFolding.h"
167330f729Sjoerg #include "llvm/Analysis/TargetLibraryInfo.h"
177330f729Sjoerg #include "llvm/IR/DIBuilder.h"
18*82d56013Sjoerg #include "llvm/IR/DataLayout.h"
197330f729Sjoerg #include "llvm/IR/PatternMatch.h"
207330f729Sjoerg #include "llvm/Support/KnownBits.h"
21*82d56013Sjoerg #include "llvm/Transforms/InstCombine/InstCombiner.h"
22*82d56013Sjoerg #include <numeric>
237330f729Sjoerg using namespace llvm;
247330f729Sjoerg using namespace PatternMatch;
257330f729Sjoerg
267330f729Sjoerg #define DEBUG_TYPE "instcombine"
277330f729Sjoerg
287330f729Sjoerg /// Analyze 'Val', seeing if it is a simple linear expression.
297330f729Sjoerg /// If so, decompose it, returning some value X, such that Val is
307330f729Sjoerg /// X*Scale+Offset.
317330f729Sjoerg ///
decomposeSimpleLinearExpr(Value * Val,unsigned & Scale,uint64_t & Offset)327330f729Sjoerg static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
337330f729Sjoerg uint64_t &Offset) {
347330f729Sjoerg if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
357330f729Sjoerg Offset = CI->getZExtValue();
367330f729Sjoerg Scale = 0;
377330f729Sjoerg return ConstantInt::get(Val->getType(), 0);
387330f729Sjoerg }
397330f729Sjoerg
407330f729Sjoerg if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
417330f729Sjoerg // Cannot look past anything that might overflow.
427330f729Sjoerg OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
437330f729Sjoerg if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
447330f729Sjoerg Scale = 1;
457330f729Sjoerg Offset = 0;
467330f729Sjoerg return Val;
477330f729Sjoerg }
487330f729Sjoerg
497330f729Sjoerg if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
507330f729Sjoerg if (I->getOpcode() == Instruction::Shl) {
517330f729Sjoerg // This is a value scaled by '1 << the shift amt'.
527330f729Sjoerg Scale = UINT64_C(1) << RHS->getZExtValue();
537330f729Sjoerg Offset = 0;
547330f729Sjoerg return I->getOperand(0);
557330f729Sjoerg }
567330f729Sjoerg
577330f729Sjoerg if (I->getOpcode() == Instruction::Mul) {
587330f729Sjoerg // This value is scaled by 'RHS'.
597330f729Sjoerg Scale = RHS->getZExtValue();
607330f729Sjoerg Offset = 0;
617330f729Sjoerg return I->getOperand(0);
627330f729Sjoerg }
637330f729Sjoerg
647330f729Sjoerg if (I->getOpcode() == Instruction::Add) {
657330f729Sjoerg // We have X+C. Check to see if we really have (X*C2)+C1,
667330f729Sjoerg // where C1 is divisible by C2.
677330f729Sjoerg unsigned SubScale;
687330f729Sjoerg Value *SubVal =
697330f729Sjoerg decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
707330f729Sjoerg Offset += RHS->getZExtValue();
717330f729Sjoerg Scale = SubScale;
727330f729Sjoerg return SubVal;
737330f729Sjoerg }
747330f729Sjoerg }
757330f729Sjoerg }
767330f729Sjoerg
777330f729Sjoerg // Otherwise, we can't look past this.
787330f729Sjoerg Scale = 1;
797330f729Sjoerg Offset = 0;
807330f729Sjoerg return Val;
817330f729Sjoerg }
827330f729Sjoerg
837330f729Sjoerg /// If we find a cast of an allocation instruction, try to eliminate the cast by
847330f729Sjoerg /// moving the type information into the alloc.
PromoteCastOfAllocation(BitCastInst & CI,AllocaInst & AI)85*82d56013Sjoerg Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
867330f729Sjoerg AllocaInst &AI) {
877330f729Sjoerg PointerType *PTy = cast<PointerType>(CI.getType());
887330f729Sjoerg
89*82d56013Sjoerg IRBuilderBase::InsertPointGuard Guard(Builder);
90*82d56013Sjoerg Builder.SetInsertPoint(&AI);
917330f729Sjoerg
927330f729Sjoerg // Get the type really allocated and the type casted to.
937330f729Sjoerg Type *AllocElTy = AI.getAllocatedType();
947330f729Sjoerg Type *CastElTy = PTy->getElementType();
957330f729Sjoerg if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
967330f729Sjoerg
97*82d56013Sjoerg // This optimisation does not work for cases where the cast type
98*82d56013Sjoerg // is scalable and the allocated type is not. This because we need to
99*82d56013Sjoerg // know how many times the casted type fits into the allocated type.
100*82d56013Sjoerg // For the opposite case where the allocated type is scalable and the
101*82d56013Sjoerg // cast type is not this leads to poor code quality due to the
102*82d56013Sjoerg // introduction of 'vscale' into the calculations. It seems better to
103*82d56013Sjoerg // bail out for this case too until we've done a proper cost-benefit
104*82d56013Sjoerg // analysis.
105*82d56013Sjoerg bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
106*82d56013Sjoerg bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
107*82d56013Sjoerg if (AllocIsScalable != CastIsScalable) return nullptr;
108*82d56013Sjoerg
109*82d56013Sjoerg Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
110*82d56013Sjoerg Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
1117330f729Sjoerg if (CastElTyAlign < AllocElTyAlign) return nullptr;
1127330f729Sjoerg
1137330f729Sjoerg // If the allocation has multiple uses, only promote it if we are strictly
1147330f729Sjoerg // increasing the alignment of the resultant allocation. If we keep it the
1157330f729Sjoerg // same, we open the door to infinite loops of various kinds.
1167330f729Sjoerg if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
1177330f729Sjoerg
118*82d56013Sjoerg // The alloc and cast types should be either both fixed or both scalable.
119*82d56013Sjoerg uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
120*82d56013Sjoerg uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
1217330f729Sjoerg if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
1227330f729Sjoerg
1237330f729Sjoerg // If the allocation has multiple uses, only promote it if we're not
1247330f729Sjoerg // shrinking the amount of memory being allocated.
125*82d56013Sjoerg uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
126*82d56013Sjoerg uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
1277330f729Sjoerg if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
1287330f729Sjoerg
1297330f729Sjoerg // See if we can satisfy the modulus by pulling a scale out of the array
1307330f729Sjoerg // size argument.
1317330f729Sjoerg unsigned ArraySizeScale;
1327330f729Sjoerg uint64_t ArrayOffset;
1337330f729Sjoerg Value *NumElements = // See if the array size is a decomposable linear expr.
1347330f729Sjoerg decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
1357330f729Sjoerg
1367330f729Sjoerg // If we can now satisfy the modulus, by using a non-1 scale, we really can
1377330f729Sjoerg // do the xform.
1387330f729Sjoerg if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
1397330f729Sjoerg (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
1407330f729Sjoerg
141*82d56013Sjoerg // We don't currently support arrays of scalable types.
142*82d56013Sjoerg assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
143*82d56013Sjoerg
1447330f729Sjoerg unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
1457330f729Sjoerg Value *Amt = nullptr;
1467330f729Sjoerg if (Scale == 1) {
1477330f729Sjoerg Amt = NumElements;
1487330f729Sjoerg } else {
1497330f729Sjoerg Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
1507330f729Sjoerg // Insert before the alloca, not before the cast.
151*82d56013Sjoerg Amt = Builder.CreateMul(Amt, NumElements);
1527330f729Sjoerg }
1537330f729Sjoerg
1547330f729Sjoerg if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
1557330f729Sjoerg Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
1567330f729Sjoerg Offset, true);
157*82d56013Sjoerg Amt = Builder.CreateAdd(Amt, Off);
1587330f729Sjoerg }
1597330f729Sjoerg
160*82d56013Sjoerg AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
161*82d56013Sjoerg New->setAlignment(AI.getAlign());
1627330f729Sjoerg New->takeName(&AI);
1637330f729Sjoerg New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
1647330f729Sjoerg
1657330f729Sjoerg // If the allocation has multiple real uses, insert a cast and change all
1667330f729Sjoerg // things that used it to use the new cast. This will also hack on CI, but it
1677330f729Sjoerg // will die soon.
1687330f729Sjoerg if (!AI.hasOneUse()) {
1697330f729Sjoerg // New is the allocation instruction, pointer typed. AI is the original
1707330f729Sjoerg // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
171*82d56013Sjoerg Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
1727330f729Sjoerg replaceInstUsesWith(AI, NewCast);
173*82d56013Sjoerg eraseInstFromFunction(AI);
1747330f729Sjoerg }
1757330f729Sjoerg return replaceInstUsesWith(CI, New);
1767330f729Sjoerg }
1777330f729Sjoerg
1787330f729Sjoerg /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
1797330f729Sjoerg /// true for, actually insert the code to evaluate the expression.
EvaluateInDifferentType(Value * V,Type * Ty,bool isSigned)180*82d56013Sjoerg Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
1817330f729Sjoerg bool isSigned) {
1827330f729Sjoerg if (Constant *C = dyn_cast<Constant>(V)) {
1837330f729Sjoerg C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
1847330f729Sjoerg // If we got a constantexpr back, try to simplify it with DL info.
185*82d56013Sjoerg return ConstantFoldConstant(C, DL, &TLI);
1867330f729Sjoerg }
1877330f729Sjoerg
1887330f729Sjoerg // Otherwise, it must be an instruction.
1897330f729Sjoerg Instruction *I = cast<Instruction>(V);
1907330f729Sjoerg Instruction *Res = nullptr;
1917330f729Sjoerg unsigned Opc = I->getOpcode();
1927330f729Sjoerg switch (Opc) {
1937330f729Sjoerg case Instruction::Add:
1947330f729Sjoerg case Instruction::Sub:
1957330f729Sjoerg case Instruction::Mul:
1967330f729Sjoerg case Instruction::And:
1977330f729Sjoerg case Instruction::Or:
1987330f729Sjoerg case Instruction::Xor:
1997330f729Sjoerg case Instruction::AShr:
2007330f729Sjoerg case Instruction::LShr:
2017330f729Sjoerg case Instruction::Shl:
2027330f729Sjoerg case Instruction::UDiv:
2037330f729Sjoerg case Instruction::URem: {
2047330f729Sjoerg Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
2057330f729Sjoerg Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
2067330f729Sjoerg Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
2077330f729Sjoerg break;
2087330f729Sjoerg }
2097330f729Sjoerg case Instruction::Trunc:
2107330f729Sjoerg case Instruction::ZExt:
2117330f729Sjoerg case Instruction::SExt:
2127330f729Sjoerg // If the source type of the cast is the type we're trying for then we can
2137330f729Sjoerg // just return the source. There's no need to insert it because it is not
2147330f729Sjoerg // new.
2157330f729Sjoerg if (I->getOperand(0)->getType() == Ty)
2167330f729Sjoerg return I->getOperand(0);
2177330f729Sjoerg
2187330f729Sjoerg // Otherwise, must be the same type of cast, so just reinsert a new one.
2197330f729Sjoerg // This also handles the case of zext(trunc(x)) -> zext(x).
2207330f729Sjoerg Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
2217330f729Sjoerg Opc == Instruction::SExt);
2227330f729Sjoerg break;
2237330f729Sjoerg case Instruction::Select: {
2247330f729Sjoerg Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
2257330f729Sjoerg Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
2267330f729Sjoerg Res = SelectInst::Create(I->getOperand(0), True, False);
2277330f729Sjoerg break;
2287330f729Sjoerg }
2297330f729Sjoerg case Instruction::PHI: {
2307330f729Sjoerg PHINode *OPN = cast<PHINode>(I);
2317330f729Sjoerg PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
2327330f729Sjoerg for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
2337330f729Sjoerg Value *V =
2347330f729Sjoerg EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
2357330f729Sjoerg NPN->addIncoming(V, OPN->getIncomingBlock(i));
2367330f729Sjoerg }
2377330f729Sjoerg Res = NPN;
2387330f729Sjoerg break;
2397330f729Sjoerg }
2407330f729Sjoerg default:
2417330f729Sjoerg // TODO: Can handle more cases here.
2427330f729Sjoerg llvm_unreachable("Unreachable!");
2437330f729Sjoerg }
2447330f729Sjoerg
2457330f729Sjoerg Res->takeName(I);
2467330f729Sjoerg return InsertNewInstWith(Res, *I);
2477330f729Sjoerg }
2487330f729Sjoerg
249*82d56013Sjoerg Instruction::CastOps
isEliminableCastPair(const CastInst * CI1,const CastInst * CI2)250*82d56013Sjoerg InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
2517330f729Sjoerg const CastInst *CI2) {
2527330f729Sjoerg Type *SrcTy = CI1->getSrcTy();
2537330f729Sjoerg Type *MidTy = CI1->getDestTy();
2547330f729Sjoerg Type *DstTy = CI2->getDestTy();
2557330f729Sjoerg
2567330f729Sjoerg Instruction::CastOps firstOp = CI1->getOpcode();
2577330f729Sjoerg Instruction::CastOps secondOp = CI2->getOpcode();
2587330f729Sjoerg Type *SrcIntPtrTy =
2597330f729Sjoerg SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
2607330f729Sjoerg Type *MidIntPtrTy =
2617330f729Sjoerg MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
2627330f729Sjoerg Type *DstIntPtrTy =
2637330f729Sjoerg DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
2647330f729Sjoerg unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
2657330f729Sjoerg DstTy, SrcIntPtrTy, MidIntPtrTy,
2667330f729Sjoerg DstIntPtrTy);
2677330f729Sjoerg
2687330f729Sjoerg // We don't want to form an inttoptr or ptrtoint that converts to an integer
2697330f729Sjoerg // type that differs from the pointer size.
2707330f729Sjoerg if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
2717330f729Sjoerg (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
2727330f729Sjoerg Res = 0;
2737330f729Sjoerg
2747330f729Sjoerg return Instruction::CastOps(Res);
2757330f729Sjoerg }
2767330f729Sjoerg
2777330f729Sjoerg /// Implement the transforms common to all CastInst visitors.
commonCastTransforms(CastInst & CI)278*82d56013Sjoerg Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
2797330f729Sjoerg Value *Src = CI.getOperand(0);
2807330f729Sjoerg
2817330f729Sjoerg // Try to eliminate a cast of a cast.
2827330f729Sjoerg if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
2837330f729Sjoerg if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
2847330f729Sjoerg // The first cast (CSrc) is eliminable so we need to fix up or replace
2857330f729Sjoerg // the second cast (CI). CSrc will then have a good chance of being dead.
2867330f729Sjoerg auto *Ty = CI.getType();
2877330f729Sjoerg auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
2887330f729Sjoerg // Point debug users of the dying cast to the new one.
2897330f729Sjoerg if (CSrc->hasOneUse())
2907330f729Sjoerg replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
2917330f729Sjoerg return Res;
2927330f729Sjoerg }
2937330f729Sjoerg }
2947330f729Sjoerg
2957330f729Sjoerg if (auto *Sel = dyn_cast<SelectInst>(Src)) {
296*82d56013Sjoerg // We are casting a select. Try to fold the cast into the select if the
297*82d56013Sjoerg // select does not have a compare instruction with matching operand types
298*82d56013Sjoerg // or the select is likely better done in a narrow type.
299*82d56013Sjoerg // Creating a select with operands that are different sizes than its
3007330f729Sjoerg // condition may inhibit other folds and lead to worse codegen.
3017330f729Sjoerg auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
302*82d56013Sjoerg if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
303*82d56013Sjoerg (CI.getOpcode() == Instruction::Trunc &&
304*82d56013Sjoerg shouldChangeType(CI.getSrcTy(), CI.getType()))) {
3057330f729Sjoerg if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
3067330f729Sjoerg replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
3077330f729Sjoerg return NV;
3087330f729Sjoerg }
3097330f729Sjoerg }
310*82d56013Sjoerg }
3117330f729Sjoerg
3127330f729Sjoerg // If we are casting a PHI, then fold the cast into the PHI.
3137330f729Sjoerg if (auto *PN = dyn_cast<PHINode>(Src)) {
3147330f729Sjoerg // Don't do this if it would create a PHI node with an illegal type from a
3157330f729Sjoerg // legal type.
3167330f729Sjoerg if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
317*82d56013Sjoerg shouldChangeType(CI.getSrcTy(), CI.getType()))
3187330f729Sjoerg if (Instruction *NV = foldOpIntoPhi(CI, PN))
3197330f729Sjoerg return NV;
3207330f729Sjoerg }
3217330f729Sjoerg
3227330f729Sjoerg return nullptr;
3237330f729Sjoerg }
3247330f729Sjoerg
3257330f729Sjoerg /// Constants and extensions/truncates from the destination type are always
3267330f729Sjoerg /// free to be evaluated in that type. This is a helper for canEvaluate*.
canAlwaysEvaluateInType(Value * V,Type * Ty)3277330f729Sjoerg static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
3287330f729Sjoerg if (isa<Constant>(V))
3297330f729Sjoerg return true;
3307330f729Sjoerg Value *X;
3317330f729Sjoerg if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
3327330f729Sjoerg X->getType() == Ty)
3337330f729Sjoerg return true;
3347330f729Sjoerg
3357330f729Sjoerg return false;
3367330f729Sjoerg }
3377330f729Sjoerg
3387330f729Sjoerg /// Filter out values that we can not evaluate in the destination type for free.
3397330f729Sjoerg /// This is a helper for canEvaluate*.
canNotEvaluateInType(Value * V,Type * Ty)3407330f729Sjoerg static bool canNotEvaluateInType(Value *V, Type *Ty) {
3417330f729Sjoerg assert(!isa<Constant>(V) && "Constant should already be handled.");
3427330f729Sjoerg if (!isa<Instruction>(V))
3437330f729Sjoerg return true;
3447330f729Sjoerg // We don't extend or shrink something that has multiple uses -- doing so
3457330f729Sjoerg // would require duplicating the instruction which isn't profitable.
3467330f729Sjoerg if (!V->hasOneUse())
3477330f729Sjoerg return true;
3487330f729Sjoerg
3497330f729Sjoerg return false;
3507330f729Sjoerg }
3517330f729Sjoerg
3527330f729Sjoerg /// Return true if we can evaluate the specified expression tree as type Ty
3537330f729Sjoerg /// instead of its larger type, and arrive with the same value.
3547330f729Sjoerg /// This is used by code that tries to eliminate truncates.
3557330f729Sjoerg ///
3567330f729Sjoerg /// Ty will always be a type smaller than V. We should return true if trunc(V)
3577330f729Sjoerg /// can be computed by computing V in the smaller type. If V is an instruction,
3587330f729Sjoerg /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
3597330f729Sjoerg /// makes sense if x and y can be efficiently truncated.
3607330f729Sjoerg ///
3617330f729Sjoerg /// This function works on both vectors and scalars.
3627330f729Sjoerg ///
canEvaluateTruncated(Value * V,Type * Ty,InstCombinerImpl & IC,Instruction * CxtI)363*82d56013Sjoerg static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
3647330f729Sjoerg Instruction *CxtI) {
3657330f729Sjoerg if (canAlwaysEvaluateInType(V, Ty))
3667330f729Sjoerg return true;
3677330f729Sjoerg if (canNotEvaluateInType(V, Ty))
3687330f729Sjoerg return false;
3697330f729Sjoerg
3707330f729Sjoerg auto *I = cast<Instruction>(V);
3717330f729Sjoerg Type *OrigTy = V->getType();
3727330f729Sjoerg switch (I->getOpcode()) {
3737330f729Sjoerg case Instruction::Add:
3747330f729Sjoerg case Instruction::Sub:
3757330f729Sjoerg case Instruction::Mul:
3767330f729Sjoerg case Instruction::And:
3777330f729Sjoerg case Instruction::Or:
3787330f729Sjoerg case Instruction::Xor:
3797330f729Sjoerg // These operators can all arbitrarily be extended or truncated.
3807330f729Sjoerg return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
3817330f729Sjoerg canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
3827330f729Sjoerg
3837330f729Sjoerg case Instruction::UDiv:
3847330f729Sjoerg case Instruction::URem: {
3857330f729Sjoerg // UDiv and URem can be truncated if all the truncated bits are zero.
3867330f729Sjoerg uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
3877330f729Sjoerg uint32_t BitWidth = Ty->getScalarSizeInBits();
3887330f729Sjoerg assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
3897330f729Sjoerg APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
3907330f729Sjoerg if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
3917330f729Sjoerg IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
3927330f729Sjoerg return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
3937330f729Sjoerg canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
3947330f729Sjoerg }
3957330f729Sjoerg break;
3967330f729Sjoerg }
3977330f729Sjoerg case Instruction::Shl: {
398*82d56013Sjoerg // If we are truncating the result of this SHL, and if it's a shift of an
399*82d56013Sjoerg // inrange amount, we can always perform a SHL in a smaller type.
4007330f729Sjoerg uint32_t BitWidth = Ty->getScalarSizeInBits();
401*82d56013Sjoerg KnownBits AmtKnownBits =
402*82d56013Sjoerg llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
403*82d56013Sjoerg if (AmtKnownBits.getMaxValue().ult(BitWidth))
404*82d56013Sjoerg return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
405*82d56013Sjoerg canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
4067330f729Sjoerg break;
4077330f729Sjoerg }
4087330f729Sjoerg case Instruction::LShr: {
4097330f729Sjoerg // If this is a truncate of a logical shr, we can truncate it to a smaller
4107330f729Sjoerg // lshr iff we know that the bits we would otherwise be shifting in are
4117330f729Sjoerg // already zeros.
412*82d56013Sjoerg // TODO: It is enough to check that the bits we would be shifting in are
413*82d56013Sjoerg // zero - use AmtKnownBits.getMaxValue().
4147330f729Sjoerg uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
4157330f729Sjoerg uint32_t BitWidth = Ty->getScalarSizeInBits();
416*82d56013Sjoerg KnownBits AmtKnownBits =
417*82d56013Sjoerg llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
418*82d56013Sjoerg APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
419*82d56013Sjoerg if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
420*82d56013Sjoerg IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
421*82d56013Sjoerg return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
422*82d56013Sjoerg canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
4237330f729Sjoerg }
4247330f729Sjoerg break;
4257330f729Sjoerg }
4267330f729Sjoerg case Instruction::AShr: {
4277330f729Sjoerg // If this is a truncate of an arithmetic shr, we can truncate it to a
4287330f729Sjoerg // smaller ashr iff we know that all the bits from the sign bit of the
4297330f729Sjoerg // original type and the sign bit of the truncate type are similar.
4307330f729Sjoerg // TODO: It is enough to check that the bits we would be shifting in are
4317330f729Sjoerg // similar to sign bit of the truncate type.
4327330f729Sjoerg uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
4337330f729Sjoerg uint32_t BitWidth = Ty->getScalarSizeInBits();
434*82d56013Sjoerg KnownBits AmtKnownBits =
435*82d56013Sjoerg llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
436*82d56013Sjoerg unsigned ShiftedBits = OrigBitWidth - BitWidth;
437*82d56013Sjoerg if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
438*82d56013Sjoerg ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
439*82d56013Sjoerg return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
440*82d56013Sjoerg canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
4417330f729Sjoerg break;
4427330f729Sjoerg }
4437330f729Sjoerg case Instruction::Trunc:
4447330f729Sjoerg // trunc(trunc(x)) -> trunc(x)
4457330f729Sjoerg return true;
4467330f729Sjoerg case Instruction::ZExt:
4477330f729Sjoerg case Instruction::SExt:
4487330f729Sjoerg // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
4497330f729Sjoerg // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
4507330f729Sjoerg return true;
4517330f729Sjoerg case Instruction::Select: {
4527330f729Sjoerg SelectInst *SI = cast<SelectInst>(I);
4537330f729Sjoerg return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
4547330f729Sjoerg canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
4557330f729Sjoerg }
4567330f729Sjoerg case Instruction::PHI: {
4577330f729Sjoerg // We can change a phi if we can change all operands. Note that we never
4587330f729Sjoerg // get into trouble with cyclic PHIs here because we only consider
4597330f729Sjoerg // instructions with a single use.
4607330f729Sjoerg PHINode *PN = cast<PHINode>(I);
4617330f729Sjoerg for (Value *IncValue : PN->incoming_values())
4627330f729Sjoerg if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
4637330f729Sjoerg return false;
4647330f729Sjoerg return true;
4657330f729Sjoerg }
4667330f729Sjoerg default:
4677330f729Sjoerg // TODO: Can handle more cases here.
4687330f729Sjoerg break;
4697330f729Sjoerg }
4707330f729Sjoerg
4717330f729Sjoerg return false;
4727330f729Sjoerg }
4737330f729Sjoerg
4747330f729Sjoerg /// Given a vector that is bitcast to an integer, optionally logically
4757330f729Sjoerg /// right-shifted, and truncated, convert it to an extractelement.
4767330f729Sjoerg /// Example (big endian):
4777330f729Sjoerg /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
4787330f729Sjoerg /// --->
4797330f729Sjoerg /// extractelement <4 x i32> %X, 1
foldVecTruncToExtElt(TruncInst & Trunc,InstCombinerImpl & IC)480*82d56013Sjoerg static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,
481*82d56013Sjoerg InstCombinerImpl &IC) {
4827330f729Sjoerg Value *TruncOp = Trunc.getOperand(0);
4837330f729Sjoerg Type *DestType = Trunc.getType();
4847330f729Sjoerg if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
4857330f729Sjoerg return nullptr;
4867330f729Sjoerg
4877330f729Sjoerg Value *VecInput = nullptr;
4887330f729Sjoerg ConstantInt *ShiftVal = nullptr;
4897330f729Sjoerg if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
4907330f729Sjoerg m_LShr(m_BitCast(m_Value(VecInput)),
4917330f729Sjoerg m_ConstantInt(ShiftVal)))) ||
4927330f729Sjoerg !isa<VectorType>(VecInput->getType()))
4937330f729Sjoerg return nullptr;
4947330f729Sjoerg
4957330f729Sjoerg VectorType *VecType = cast<VectorType>(VecInput->getType());
4967330f729Sjoerg unsigned VecWidth = VecType->getPrimitiveSizeInBits();
4977330f729Sjoerg unsigned DestWidth = DestType->getPrimitiveSizeInBits();
4987330f729Sjoerg unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
4997330f729Sjoerg
5007330f729Sjoerg if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
5017330f729Sjoerg return nullptr;
5027330f729Sjoerg
5037330f729Sjoerg // If the element type of the vector doesn't match the result type,
5047330f729Sjoerg // bitcast it to a vector type that we can extract from.
5057330f729Sjoerg unsigned NumVecElts = VecWidth / DestWidth;
5067330f729Sjoerg if (VecType->getElementType() != DestType) {
507*82d56013Sjoerg VecType = FixedVectorType::get(DestType, NumVecElts);
5087330f729Sjoerg VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
5097330f729Sjoerg }
5107330f729Sjoerg
5117330f729Sjoerg unsigned Elt = ShiftAmount / DestWidth;
5127330f729Sjoerg if (IC.getDataLayout().isBigEndian())
5137330f729Sjoerg Elt = NumVecElts - 1 - Elt;
5147330f729Sjoerg
5157330f729Sjoerg return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
5167330f729Sjoerg }
5177330f729Sjoerg
518*82d56013Sjoerg /// Funnel/Rotate left/right may occur in a wider type than necessary because of
519*82d56013Sjoerg /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
narrowFunnelShift(TruncInst & Trunc)520*82d56013Sjoerg Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
5217330f729Sjoerg assert((isa<VectorType>(Trunc.getSrcTy()) ||
5227330f729Sjoerg shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
5237330f729Sjoerg "Don't narrow to an illegal scalar type");
5247330f729Sjoerg
5257330f729Sjoerg // Bail out on strange types. It is possible to handle some of these patterns
5267330f729Sjoerg // even with non-power-of-2 sizes, but it is not a likely scenario.
5277330f729Sjoerg Type *DestTy = Trunc.getType();
5287330f729Sjoerg unsigned NarrowWidth = DestTy->getScalarSizeInBits();
529*82d56013Sjoerg unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
5307330f729Sjoerg if (!isPowerOf2_32(NarrowWidth))
5317330f729Sjoerg return nullptr;
5327330f729Sjoerg
533*82d56013Sjoerg // First, find an or'd pair of opposite shifts:
534*82d56013Sjoerg // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
535*82d56013Sjoerg BinaryOperator *Or0, *Or1;
536*82d56013Sjoerg if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
5377330f729Sjoerg return nullptr;
5387330f729Sjoerg
539*82d56013Sjoerg Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
540*82d56013Sjoerg if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
541*82d56013Sjoerg !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
542*82d56013Sjoerg Or0->getOpcode() == Or1->getOpcode())
5437330f729Sjoerg return nullptr;
5447330f729Sjoerg
545*82d56013Sjoerg // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
546*82d56013Sjoerg if (Or0->getOpcode() == BinaryOperator::LShr) {
547*82d56013Sjoerg std::swap(Or0, Or1);
548*82d56013Sjoerg std::swap(ShVal0, ShVal1);
549*82d56013Sjoerg std::swap(ShAmt0, ShAmt1);
550*82d56013Sjoerg }
551*82d56013Sjoerg assert(Or0->getOpcode() == BinaryOperator::Shl &&
552*82d56013Sjoerg Or1->getOpcode() == BinaryOperator::LShr &&
553*82d56013Sjoerg "Illegal or(shift,shift) pair");
5547330f729Sjoerg
555*82d56013Sjoerg // Match the shift amount operands for a funnel/rotate pattern. This always
556*82d56013Sjoerg // matches a subtraction on the R operand.
557*82d56013Sjoerg auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
5587330f729Sjoerg // The shift amounts may add up to the narrow bit width:
559*82d56013Sjoerg // (shl ShVal0, L) | (lshr ShVal1, Width - L)
560*82d56013Sjoerg // If this is a funnel shift (different operands are shifted), then the
561*82d56013Sjoerg // shift amount can not over-shift (create poison) in the narrow type.
562*82d56013Sjoerg unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
563*82d56013Sjoerg APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
564*82d56013Sjoerg if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
5657330f729Sjoerg if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
5667330f729Sjoerg return L;
5677330f729Sjoerg
568*82d56013Sjoerg // The following patterns currently only work for rotation patterns.
569*82d56013Sjoerg // TODO: Add more general funnel-shift compatible patterns.
570*82d56013Sjoerg if (ShVal0 != ShVal1)
571*82d56013Sjoerg return nullptr;
572*82d56013Sjoerg
5737330f729Sjoerg // The shift amount may be masked with negation:
574*82d56013Sjoerg // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
5757330f729Sjoerg Value *X;
5767330f729Sjoerg unsigned Mask = Width - 1;
5777330f729Sjoerg if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
5787330f729Sjoerg match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
5797330f729Sjoerg return X;
5807330f729Sjoerg
5817330f729Sjoerg // Same as above, but the shift amount may be extended after masking:
5827330f729Sjoerg if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
5837330f729Sjoerg match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
5847330f729Sjoerg return X;
5857330f729Sjoerg
5867330f729Sjoerg return nullptr;
5877330f729Sjoerg };
5887330f729Sjoerg
5897330f729Sjoerg Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
590*82d56013Sjoerg bool IsFshl = true; // Sub on LSHR.
5917330f729Sjoerg if (!ShAmt) {
5927330f729Sjoerg ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
593*82d56013Sjoerg IsFshl = false; // Sub on SHL.
5947330f729Sjoerg }
5957330f729Sjoerg if (!ShAmt)
5967330f729Sjoerg return nullptr;
5977330f729Sjoerg
598*82d56013Sjoerg // The right-shifted value must have high zeros in the wide type (for example
599*82d56013Sjoerg // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
600*82d56013Sjoerg // truncated, so those do not matter.
6017330f729Sjoerg APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
602*82d56013Sjoerg if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
6037330f729Sjoerg return nullptr;
6047330f729Sjoerg
6057330f729Sjoerg // We have an unnecessarily wide rotate!
606*82d56013Sjoerg // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
6077330f729Sjoerg // Narrow the inputs and convert to funnel shift intrinsic:
6087330f729Sjoerg // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
6097330f729Sjoerg Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
610*82d56013Sjoerg Value *X, *Y;
611*82d56013Sjoerg X = Y = Builder.CreateTrunc(ShVal0, DestTy);
612*82d56013Sjoerg if (ShVal0 != ShVal1)
613*82d56013Sjoerg Y = Builder.CreateTrunc(ShVal1, DestTy);
6147330f729Sjoerg Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
6157330f729Sjoerg Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
616*82d56013Sjoerg return CallInst::Create(F, {X, Y, NarrowShAmt});
6177330f729Sjoerg }
6187330f729Sjoerg
6197330f729Sjoerg /// Try to narrow the width of math or bitwise logic instructions by pulling a
6207330f729Sjoerg /// truncate ahead of binary operators.
6217330f729Sjoerg /// TODO: Transforms for truncated shifts should be moved into here.
narrowBinOp(TruncInst & Trunc)622*82d56013Sjoerg Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
6237330f729Sjoerg Type *SrcTy = Trunc.getSrcTy();
6247330f729Sjoerg Type *DestTy = Trunc.getType();
6257330f729Sjoerg if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
6267330f729Sjoerg return nullptr;
6277330f729Sjoerg
6287330f729Sjoerg BinaryOperator *BinOp;
6297330f729Sjoerg if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
6307330f729Sjoerg return nullptr;
6317330f729Sjoerg
6327330f729Sjoerg Value *BinOp0 = BinOp->getOperand(0);
6337330f729Sjoerg Value *BinOp1 = BinOp->getOperand(1);
6347330f729Sjoerg switch (BinOp->getOpcode()) {
6357330f729Sjoerg case Instruction::And:
6367330f729Sjoerg case Instruction::Or:
6377330f729Sjoerg case Instruction::Xor:
6387330f729Sjoerg case Instruction::Add:
6397330f729Sjoerg case Instruction::Sub:
6407330f729Sjoerg case Instruction::Mul: {
6417330f729Sjoerg Constant *C;
6427330f729Sjoerg if (match(BinOp0, m_Constant(C))) {
6437330f729Sjoerg // trunc (binop C, X) --> binop (trunc C', X)
6447330f729Sjoerg Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
6457330f729Sjoerg Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
6467330f729Sjoerg return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
6477330f729Sjoerg }
6487330f729Sjoerg if (match(BinOp1, m_Constant(C))) {
6497330f729Sjoerg // trunc (binop X, C) --> binop (trunc X, C')
6507330f729Sjoerg Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
6517330f729Sjoerg Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
6527330f729Sjoerg return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
6537330f729Sjoerg }
6547330f729Sjoerg Value *X;
6557330f729Sjoerg if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
6567330f729Sjoerg // trunc (binop (ext X), Y) --> binop X, (trunc Y)
6577330f729Sjoerg Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
6587330f729Sjoerg return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
6597330f729Sjoerg }
6607330f729Sjoerg if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
6617330f729Sjoerg // trunc (binop Y, (ext X)) --> binop (trunc Y), X
6627330f729Sjoerg Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
6637330f729Sjoerg return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
6647330f729Sjoerg }
6657330f729Sjoerg break;
6667330f729Sjoerg }
6677330f729Sjoerg
6687330f729Sjoerg default: break;
6697330f729Sjoerg }
6707330f729Sjoerg
671*82d56013Sjoerg if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
6727330f729Sjoerg return NarrowOr;
6737330f729Sjoerg
6747330f729Sjoerg return nullptr;
6757330f729Sjoerg }
6767330f729Sjoerg
6777330f729Sjoerg /// Try to narrow the width of a splat shuffle. This could be generalized to any
6787330f729Sjoerg /// shuffle with a constant operand, but we limit the transform to avoid
6797330f729Sjoerg /// creating a shuffle type that targets may not be able to lower effectively.
shrinkSplatShuffle(TruncInst & Trunc,InstCombiner::BuilderTy & Builder)6807330f729Sjoerg static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
6817330f729Sjoerg InstCombiner::BuilderTy &Builder) {
6827330f729Sjoerg auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
683*82d56013Sjoerg if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
684*82d56013Sjoerg is_splat(Shuf->getShuffleMask()) &&
6857330f729Sjoerg Shuf->getType() == Shuf->getOperand(0)->getType()) {
6867330f729Sjoerg // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
6877330f729Sjoerg Constant *NarrowUndef = UndefValue::get(Trunc.getType());
6887330f729Sjoerg Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
689*82d56013Sjoerg return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getShuffleMask());
6907330f729Sjoerg }
6917330f729Sjoerg
6927330f729Sjoerg return nullptr;
6937330f729Sjoerg }
6947330f729Sjoerg
6957330f729Sjoerg /// Try to narrow the width of an insert element. This could be generalized for
6967330f729Sjoerg /// any vector constant, but we limit the transform to insertion into undef to
6977330f729Sjoerg /// avoid potential backend problems from unsupported insertion widths. This
6987330f729Sjoerg /// could also be extended to handle the case of inserting a scalar constant
6997330f729Sjoerg /// into a vector variable.
shrinkInsertElt(CastInst & Trunc,InstCombiner::BuilderTy & Builder)7007330f729Sjoerg static Instruction *shrinkInsertElt(CastInst &Trunc,
7017330f729Sjoerg InstCombiner::BuilderTy &Builder) {
7027330f729Sjoerg Instruction::CastOps Opcode = Trunc.getOpcode();
7037330f729Sjoerg assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
7047330f729Sjoerg "Unexpected instruction for shrinking");
7057330f729Sjoerg
7067330f729Sjoerg auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
7077330f729Sjoerg if (!InsElt || !InsElt->hasOneUse())
7087330f729Sjoerg return nullptr;
7097330f729Sjoerg
7107330f729Sjoerg Type *DestTy = Trunc.getType();
7117330f729Sjoerg Type *DestScalarTy = DestTy->getScalarType();
7127330f729Sjoerg Value *VecOp = InsElt->getOperand(0);
7137330f729Sjoerg Value *ScalarOp = InsElt->getOperand(1);
7147330f729Sjoerg Value *Index = InsElt->getOperand(2);
7157330f729Sjoerg
716*82d56013Sjoerg if (match(VecOp, m_Undef())) {
7177330f729Sjoerg // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
7187330f729Sjoerg // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
7197330f729Sjoerg UndefValue *NarrowUndef = UndefValue::get(DestTy);
7207330f729Sjoerg Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
7217330f729Sjoerg return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
7227330f729Sjoerg }
7237330f729Sjoerg
7247330f729Sjoerg return nullptr;
7257330f729Sjoerg }
7267330f729Sjoerg
visitTrunc(TruncInst & Trunc)727*82d56013Sjoerg Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
728*82d56013Sjoerg if (Instruction *Result = commonCastTransforms(Trunc))
7297330f729Sjoerg return Result;
7307330f729Sjoerg
731*82d56013Sjoerg Value *Src = Trunc.getOperand(0);
732*82d56013Sjoerg Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
733*82d56013Sjoerg unsigned DestWidth = DestTy->getScalarSizeInBits();
734*82d56013Sjoerg unsigned SrcWidth = SrcTy->getScalarSizeInBits();
7357330f729Sjoerg
7367330f729Sjoerg // Attempt to truncate the entire input expression tree to the destination
7377330f729Sjoerg // type. Only do this if the dest type is a simple type, don't convert the
7387330f729Sjoerg // expression tree to something weird like i93 unless the source is also
7397330f729Sjoerg // strange.
7407330f729Sjoerg if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
741*82d56013Sjoerg canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
7427330f729Sjoerg
7437330f729Sjoerg // If this cast is a truncate, evaluting in a different type always
7447330f729Sjoerg // eliminates the cast, so it is always a win.
7457330f729Sjoerg LLVM_DEBUG(
7467330f729Sjoerg dbgs() << "ICE: EvaluateInDifferentType converting expression type"
7477330f729Sjoerg " to avoid cast: "
748*82d56013Sjoerg << Trunc << '\n');
7497330f729Sjoerg Value *Res = EvaluateInDifferentType(Src, DestTy, false);
7507330f729Sjoerg assert(Res->getType() == DestTy);
751*82d56013Sjoerg return replaceInstUsesWith(Trunc, Res);
752*82d56013Sjoerg }
753*82d56013Sjoerg
754*82d56013Sjoerg // For integer types, check if we can shorten the entire input expression to
755*82d56013Sjoerg // DestWidth * 2, which won't allow removing the truncate, but reducing the
756*82d56013Sjoerg // width may enable further optimizations, e.g. allowing for larger
757*82d56013Sjoerg // vectorization factors.
758*82d56013Sjoerg if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
759*82d56013Sjoerg if (DestWidth * 2 < SrcWidth) {
760*82d56013Sjoerg auto *NewDestTy = DestITy->getExtendedType();
761*82d56013Sjoerg if (shouldChangeType(SrcTy, NewDestTy) &&
762*82d56013Sjoerg canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
763*82d56013Sjoerg LLVM_DEBUG(
764*82d56013Sjoerg dbgs() << "ICE: EvaluateInDifferentType converting expression type"
765*82d56013Sjoerg " to reduce the width of operand of"
766*82d56013Sjoerg << Trunc << '\n');
767*82d56013Sjoerg Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
768*82d56013Sjoerg return new TruncInst(Res, DestTy);
769*82d56013Sjoerg }
770*82d56013Sjoerg }
7717330f729Sjoerg }
7727330f729Sjoerg
7737330f729Sjoerg // Test if the trunc is the user of a select which is part of a
7747330f729Sjoerg // minimum or maximum operation. If so, don't do any more simplification.
7757330f729Sjoerg // Even simplifying demanded bits can break the canonical form of a
7767330f729Sjoerg // min/max.
7777330f729Sjoerg Value *LHS, *RHS;
778*82d56013Sjoerg if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
779*82d56013Sjoerg if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
7807330f729Sjoerg return nullptr;
7817330f729Sjoerg
7827330f729Sjoerg // See if we can simplify any instructions used by the input whose sole
7837330f729Sjoerg // purpose is to compute bits we don't care about.
784*82d56013Sjoerg if (SimplifyDemandedInstructionBits(Trunc))
785*82d56013Sjoerg return &Trunc;
7867330f729Sjoerg
787*82d56013Sjoerg if (DestWidth == 1) {
788*82d56013Sjoerg Value *Zero = Constant::getNullValue(SrcTy);
7897330f729Sjoerg if (DestTy->isIntegerTy()) {
7907330f729Sjoerg // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
7917330f729Sjoerg // TODO: We canonicalize to more instructions here because we are probably
7927330f729Sjoerg // lacking equivalent analysis for trunc relative to icmp. There may also
7937330f729Sjoerg // be codegen concerns. If those trunc limitations were removed, we could
7947330f729Sjoerg // remove this transform.
7957330f729Sjoerg Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
7967330f729Sjoerg return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
7977330f729Sjoerg }
7987330f729Sjoerg
7997330f729Sjoerg // For vectors, we do not canonicalize all truncs to icmp, so optimize
8007330f729Sjoerg // patterns that would be covered within visitICmpInst.
8017330f729Sjoerg Value *X;
802*82d56013Sjoerg Constant *C;
803*82d56013Sjoerg if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
8047330f729Sjoerg // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
805*82d56013Sjoerg Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
806*82d56013Sjoerg Constant *MaskC = ConstantExpr::getShl(One, C);
807*82d56013Sjoerg Value *And = Builder.CreateAnd(X, MaskC);
8087330f729Sjoerg return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
8097330f729Sjoerg }
810*82d56013Sjoerg if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)),
8117330f729Sjoerg m_Deferred(X))))) {
8127330f729Sjoerg // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
813*82d56013Sjoerg Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
814*82d56013Sjoerg Constant *MaskC = ConstantExpr::getShl(One, C);
815*82d56013Sjoerg MaskC = ConstantExpr::getOr(MaskC, One);
816*82d56013Sjoerg Value *And = Builder.CreateAnd(X, MaskC);
8177330f729Sjoerg return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
8187330f729Sjoerg }
8197330f729Sjoerg }
8207330f729Sjoerg
821*82d56013Sjoerg Value *A;
822*82d56013Sjoerg Constant *C;
823*82d56013Sjoerg if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
824*82d56013Sjoerg unsigned AWidth = A->getType()->getScalarSizeInBits();
825*82d56013Sjoerg unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
826*82d56013Sjoerg auto *OldSh = cast<Instruction>(Src);
827*82d56013Sjoerg bool IsExact = OldSh->isExact();
8287330f729Sjoerg
829*82d56013Sjoerg // If the shift is small enough, all zero bits created by the shift are
830*82d56013Sjoerg // removed by the trunc.
831*82d56013Sjoerg if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
832*82d56013Sjoerg APInt(SrcWidth, MaxShiftAmt)))) {
833*82d56013Sjoerg // trunc (lshr (sext A), C) --> ashr A, C
834*82d56013Sjoerg if (A->getType() == DestTy) {
835*82d56013Sjoerg Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
836*82d56013Sjoerg Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
837*82d56013Sjoerg ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
838*82d56013Sjoerg ShAmt = Constant::mergeUndefsWith(ShAmt, C);
839*82d56013Sjoerg return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
840*82d56013Sjoerg : BinaryOperator::CreateAShr(A, ShAmt);
841*82d56013Sjoerg }
842*82d56013Sjoerg // The types are mismatched, so create a cast after shifting:
843*82d56013Sjoerg // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
844*82d56013Sjoerg if (Src->hasOneUse()) {
845*82d56013Sjoerg Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
846*82d56013Sjoerg Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
847*82d56013Sjoerg ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
848*82d56013Sjoerg Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
849*82d56013Sjoerg return CastInst::CreateIntegerCast(Shift, DestTy, true);
850*82d56013Sjoerg }
851*82d56013Sjoerg }
852*82d56013Sjoerg // TODO: Mask high bits with 'and'.
853*82d56013Sjoerg }
854*82d56013Sjoerg
855*82d56013Sjoerg // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
856*82d56013Sjoerg if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
857*82d56013Sjoerg unsigned MaxShiftAmt = SrcWidth - DestWidth;
858*82d56013Sjoerg
859*82d56013Sjoerg // If the shift is small enough, all zero/sign bits created by the shift are
860*82d56013Sjoerg // removed by the trunc.
861*82d56013Sjoerg if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
862*82d56013Sjoerg APInt(SrcWidth, MaxShiftAmt)))) {
863*82d56013Sjoerg auto *OldShift = cast<Instruction>(Src);
864*82d56013Sjoerg bool IsExact = OldShift->isExact();
865*82d56013Sjoerg auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
866*82d56013Sjoerg ShAmt = Constant::mergeUndefsWith(ShAmt, C);
867*82d56013Sjoerg Value *Shift =
868*82d56013Sjoerg OldShift->getOpcode() == Instruction::AShr
869*82d56013Sjoerg ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
870*82d56013Sjoerg : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
871*82d56013Sjoerg return CastInst::CreateTruncOrBitCast(Shift, DestTy);
872*82d56013Sjoerg }
873*82d56013Sjoerg }
874*82d56013Sjoerg
875*82d56013Sjoerg if (Instruction *I = narrowBinOp(Trunc))
876*82d56013Sjoerg return I;
877*82d56013Sjoerg
878*82d56013Sjoerg if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
879*82d56013Sjoerg return I;
880*82d56013Sjoerg
881*82d56013Sjoerg if (Instruction *I = shrinkInsertElt(Trunc, Builder))
882*82d56013Sjoerg return I;
883*82d56013Sjoerg
8847330f729Sjoerg if (Src->hasOneUse() &&
885*82d56013Sjoerg (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
8867330f729Sjoerg // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
8877330f729Sjoerg // dest type is native and cst < dest size.
888*82d56013Sjoerg if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
8897330f729Sjoerg !match(A, m_Shr(m_Value(), m_Constant()))) {
8907330f729Sjoerg // Skip shifts of shift by constants. It undoes a combine in
8917330f729Sjoerg // FoldShiftByConstant and is the extend in reg pattern.
892*82d56013Sjoerg APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
893*82d56013Sjoerg if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
8947330f729Sjoerg Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
895*82d56013Sjoerg return BinaryOperator::Create(Instruction::Shl, NewTrunc,
896*82d56013Sjoerg ConstantExpr::getTrunc(C, DestTy));
8977330f729Sjoerg }
8987330f729Sjoerg }
8997330f729Sjoerg }
9007330f729Sjoerg
901*82d56013Sjoerg if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
9027330f729Sjoerg return I;
9037330f729Sjoerg
904*82d56013Sjoerg // Whenever an element is extracted from a vector, and then truncated,
905*82d56013Sjoerg // canonicalize by converting it to a bitcast followed by an
906*82d56013Sjoerg // extractelement.
907*82d56013Sjoerg //
908*82d56013Sjoerg // Example (little endian):
909*82d56013Sjoerg // trunc (extractelement <4 x i64> %X, 0) to i32
910*82d56013Sjoerg // --->
911*82d56013Sjoerg // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
912*82d56013Sjoerg Value *VecOp;
913*82d56013Sjoerg ConstantInt *Cst;
914*82d56013Sjoerg if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
915*82d56013Sjoerg auto *VecOpTy = cast<VectorType>(VecOp->getType());
916*82d56013Sjoerg auto VecElts = VecOpTy->getElementCount();
917*82d56013Sjoerg
918*82d56013Sjoerg // A badly fit destination size would result in an invalid cast.
919*82d56013Sjoerg if (SrcWidth % DestWidth == 0) {
920*82d56013Sjoerg uint64_t TruncRatio = SrcWidth / DestWidth;
921*82d56013Sjoerg uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
922*82d56013Sjoerg uint64_t VecOpIdx = Cst->getZExtValue();
923*82d56013Sjoerg uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
924*82d56013Sjoerg : VecOpIdx * TruncRatio;
925*82d56013Sjoerg assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
926*82d56013Sjoerg "overflow 32-bits");
927*82d56013Sjoerg
928*82d56013Sjoerg auto *BitCastTo =
929*82d56013Sjoerg VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
930*82d56013Sjoerg Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
931*82d56013Sjoerg return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
932*82d56013Sjoerg }
933*82d56013Sjoerg }
934*82d56013Sjoerg
9357330f729Sjoerg return nullptr;
9367330f729Sjoerg }
9377330f729Sjoerg
transformZExtICmp(ICmpInst * Cmp,ZExtInst & Zext,bool DoTransform)938*82d56013Sjoerg Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
9397330f729Sjoerg bool DoTransform) {
9407330f729Sjoerg // If we are just checking for a icmp eq of a single bit and zext'ing it
9417330f729Sjoerg // to an integer, then shift the bit to the appropriate place and then
9427330f729Sjoerg // cast to integer to avoid the comparison.
9437330f729Sjoerg const APInt *Op1CV;
944*82d56013Sjoerg if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
9457330f729Sjoerg
9467330f729Sjoerg // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
9477330f729Sjoerg // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
948*82d56013Sjoerg if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
949*82d56013Sjoerg (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
950*82d56013Sjoerg if (!DoTransform) return Cmp;
9517330f729Sjoerg
952*82d56013Sjoerg Value *In = Cmp->getOperand(0);
9537330f729Sjoerg Value *Sh = ConstantInt::get(In->getType(),
9547330f729Sjoerg In->getType()->getScalarSizeInBits() - 1);
9557330f729Sjoerg In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
956*82d56013Sjoerg if (In->getType() != Zext.getType())
957*82d56013Sjoerg In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
9587330f729Sjoerg
959*82d56013Sjoerg if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
9607330f729Sjoerg Constant *One = ConstantInt::get(In->getType(), 1);
9617330f729Sjoerg In = Builder.CreateXor(In, One, In->getName() + ".not");
9627330f729Sjoerg }
9637330f729Sjoerg
964*82d56013Sjoerg return replaceInstUsesWith(Zext, In);
9657330f729Sjoerg }
9667330f729Sjoerg
9677330f729Sjoerg // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
9687330f729Sjoerg // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
9697330f729Sjoerg // zext (X == 1) to i32 --> X iff X has only the low bit set.
9707330f729Sjoerg // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
9717330f729Sjoerg // zext (X != 0) to i32 --> X iff X has only the low bit set.
9727330f729Sjoerg // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
9737330f729Sjoerg // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
9747330f729Sjoerg // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
9757330f729Sjoerg if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
9767330f729Sjoerg // This only works for EQ and NE
977*82d56013Sjoerg Cmp->isEquality()) {
9787330f729Sjoerg // If Op1C some other power of two, convert:
979*82d56013Sjoerg KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
9807330f729Sjoerg
9817330f729Sjoerg APInt KnownZeroMask(~Known.Zero);
9827330f729Sjoerg if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
983*82d56013Sjoerg if (!DoTransform) return Cmp;
9847330f729Sjoerg
985*82d56013Sjoerg bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
9867330f729Sjoerg if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
9877330f729Sjoerg // (X&4) == 2 --> false
9887330f729Sjoerg // (X&4) != 2 --> true
989*82d56013Sjoerg Constant *Res = ConstantInt::get(Zext.getType(), isNE);
990*82d56013Sjoerg return replaceInstUsesWith(Zext, Res);
9917330f729Sjoerg }
9927330f729Sjoerg
9937330f729Sjoerg uint32_t ShAmt = KnownZeroMask.logBase2();
994*82d56013Sjoerg Value *In = Cmp->getOperand(0);
9957330f729Sjoerg if (ShAmt) {
9967330f729Sjoerg // Perform a logical shr by shiftamt.
9977330f729Sjoerg // Insert the shift to put the result in the low bit.
9987330f729Sjoerg In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
9997330f729Sjoerg In->getName() + ".lobit");
10007330f729Sjoerg }
10017330f729Sjoerg
10027330f729Sjoerg if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
10037330f729Sjoerg Constant *One = ConstantInt::get(In->getType(), 1);
10047330f729Sjoerg In = Builder.CreateXor(In, One);
10057330f729Sjoerg }
10067330f729Sjoerg
1007*82d56013Sjoerg if (Zext.getType() == In->getType())
1008*82d56013Sjoerg return replaceInstUsesWith(Zext, In);
10097330f729Sjoerg
1010*82d56013Sjoerg Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1011*82d56013Sjoerg return replaceInstUsesWith(Zext, IntCast);
10127330f729Sjoerg }
10137330f729Sjoerg }
10147330f729Sjoerg }
10157330f729Sjoerg
10167330f729Sjoerg // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
10177330f729Sjoerg // It is also profitable to transform icmp eq into not(xor(A, B)) because that
10187330f729Sjoerg // may lead to additional simplifications.
1019*82d56013Sjoerg if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1020*82d56013Sjoerg if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
1021*82d56013Sjoerg Value *LHS = Cmp->getOperand(0);
1022*82d56013Sjoerg Value *RHS = Cmp->getOperand(1);
10237330f729Sjoerg
1024*82d56013Sjoerg KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
1025*82d56013Sjoerg KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
10267330f729Sjoerg
10277330f729Sjoerg if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
10287330f729Sjoerg APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
10297330f729Sjoerg APInt UnknownBit = ~KnownBits;
10307330f729Sjoerg if (UnknownBit.countPopulation() == 1) {
1031*82d56013Sjoerg if (!DoTransform) return Cmp;
10327330f729Sjoerg
10337330f729Sjoerg Value *Result = Builder.CreateXor(LHS, RHS);
10347330f729Sjoerg
10357330f729Sjoerg // Mask off any bits that are set and won't be shifted away.
10367330f729Sjoerg if (KnownLHS.One.uge(UnknownBit))
10377330f729Sjoerg Result = Builder.CreateAnd(Result,
10387330f729Sjoerg ConstantInt::get(ITy, UnknownBit));
10397330f729Sjoerg
10407330f729Sjoerg // Shift the bit we're testing down to the lsb.
10417330f729Sjoerg Result = Builder.CreateLShr(
10427330f729Sjoerg Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
10437330f729Sjoerg
1044*82d56013Sjoerg if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
10457330f729Sjoerg Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
1046*82d56013Sjoerg Result->takeName(Cmp);
1047*82d56013Sjoerg return replaceInstUsesWith(Zext, Result);
10487330f729Sjoerg }
10497330f729Sjoerg }
10507330f729Sjoerg }
10517330f729Sjoerg }
10527330f729Sjoerg
10537330f729Sjoerg return nullptr;
10547330f729Sjoerg }
10557330f729Sjoerg
10567330f729Sjoerg /// Determine if the specified value can be computed in the specified wider type
10577330f729Sjoerg /// and produce the same low bits. If not, return false.
10587330f729Sjoerg ///
10597330f729Sjoerg /// If this function returns true, it can also return a non-zero number of bits
10607330f729Sjoerg /// (in BitsToClear) which indicates that the value it computes is correct for
10617330f729Sjoerg /// the zero extend, but that the additional BitsToClear bits need to be zero'd
10627330f729Sjoerg /// out. For example, to promote something like:
10637330f729Sjoerg ///
10647330f729Sjoerg /// %B = trunc i64 %A to i32
10657330f729Sjoerg /// %C = lshr i32 %B, 8
10667330f729Sjoerg /// %E = zext i32 %C to i64
10677330f729Sjoerg ///
10687330f729Sjoerg /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
10697330f729Sjoerg /// set to 8 to indicate that the promoted value needs to have bits 24-31
10707330f729Sjoerg /// cleared in addition to bits 32-63. Since an 'and' will be generated to
10717330f729Sjoerg /// clear the top bits anyway, doing this has no extra cost.
10727330f729Sjoerg ///
10737330f729Sjoerg /// This function works on both vectors and scalars.
canEvaluateZExtd(Value * V,Type * Ty,unsigned & BitsToClear,InstCombinerImpl & IC,Instruction * CxtI)10747330f729Sjoerg static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1075*82d56013Sjoerg InstCombinerImpl &IC, Instruction *CxtI) {
10767330f729Sjoerg BitsToClear = 0;
10777330f729Sjoerg if (canAlwaysEvaluateInType(V, Ty))
10787330f729Sjoerg return true;
10797330f729Sjoerg if (canNotEvaluateInType(V, Ty))
10807330f729Sjoerg return false;
10817330f729Sjoerg
10827330f729Sjoerg auto *I = cast<Instruction>(V);
10837330f729Sjoerg unsigned Tmp;
10847330f729Sjoerg switch (I->getOpcode()) {
10857330f729Sjoerg case Instruction::ZExt: // zext(zext(x)) -> zext(x).
10867330f729Sjoerg case Instruction::SExt: // zext(sext(x)) -> sext(x).
10877330f729Sjoerg case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
10887330f729Sjoerg return true;
10897330f729Sjoerg case Instruction::And:
10907330f729Sjoerg case Instruction::Or:
10917330f729Sjoerg case Instruction::Xor:
10927330f729Sjoerg case Instruction::Add:
10937330f729Sjoerg case Instruction::Sub:
10947330f729Sjoerg case Instruction::Mul:
10957330f729Sjoerg if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
10967330f729Sjoerg !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
10977330f729Sjoerg return false;
10987330f729Sjoerg // These can all be promoted if neither operand has 'bits to clear'.
10997330f729Sjoerg if (BitsToClear == 0 && Tmp == 0)
11007330f729Sjoerg return true;
11017330f729Sjoerg
11027330f729Sjoerg // If the operation is an AND/OR/XOR and the bits to clear are zero in the
11037330f729Sjoerg // other side, BitsToClear is ok.
11047330f729Sjoerg if (Tmp == 0 && I->isBitwiseLogicOp()) {
11057330f729Sjoerg // We use MaskedValueIsZero here for generality, but the case we care
11067330f729Sjoerg // about the most is constant RHS.
11077330f729Sjoerg unsigned VSize = V->getType()->getScalarSizeInBits();
11087330f729Sjoerg if (IC.MaskedValueIsZero(I->getOperand(1),
11097330f729Sjoerg APInt::getHighBitsSet(VSize, BitsToClear),
11107330f729Sjoerg 0, CxtI)) {
11117330f729Sjoerg // If this is an And instruction and all of the BitsToClear are
11127330f729Sjoerg // known to be zero we can reset BitsToClear.
11137330f729Sjoerg if (I->getOpcode() == Instruction::And)
11147330f729Sjoerg BitsToClear = 0;
11157330f729Sjoerg return true;
11167330f729Sjoerg }
11177330f729Sjoerg }
11187330f729Sjoerg
11197330f729Sjoerg // Otherwise, we don't know how to analyze this BitsToClear case yet.
11207330f729Sjoerg return false;
11217330f729Sjoerg
11227330f729Sjoerg case Instruction::Shl: {
11237330f729Sjoerg // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
11247330f729Sjoerg // upper bits we can reduce BitsToClear by the shift amount.
11257330f729Sjoerg const APInt *Amt;
11267330f729Sjoerg if (match(I->getOperand(1), m_APInt(Amt))) {
11277330f729Sjoerg if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
11287330f729Sjoerg return false;
11297330f729Sjoerg uint64_t ShiftAmt = Amt->getZExtValue();
11307330f729Sjoerg BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
11317330f729Sjoerg return true;
11327330f729Sjoerg }
11337330f729Sjoerg return false;
11347330f729Sjoerg }
11357330f729Sjoerg case Instruction::LShr: {
11367330f729Sjoerg // We can promote lshr(x, cst) if we can promote x. This requires the
11377330f729Sjoerg // ultimate 'and' to clear out the high zero bits we're clearing out though.
11387330f729Sjoerg const APInt *Amt;
11397330f729Sjoerg if (match(I->getOperand(1), m_APInt(Amt))) {
11407330f729Sjoerg if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
11417330f729Sjoerg return false;
11427330f729Sjoerg BitsToClear += Amt->getZExtValue();
11437330f729Sjoerg if (BitsToClear > V->getType()->getScalarSizeInBits())
11447330f729Sjoerg BitsToClear = V->getType()->getScalarSizeInBits();
11457330f729Sjoerg return true;
11467330f729Sjoerg }
11477330f729Sjoerg // Cannot promote variable LSHR.
11487330f729Sjoerg return false;
11497330f729Sjoerg }
11507330f729Sjoerg case Instruction::Select:
11517330f729Sjoerg if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
11527330f729Sjoerg !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
11537330f729Sjoerg // TODO: If important, we could handle the case when the BitsToClear are
11547330f729Sjoerg // known zero in the disagreeing side.
11557330f729Sjoerg Tmp != BitsToClear)
11567330f729Sjoerg return false;
11577330f729Sjoerg return true;
11587330f729Sjoerg
11597330f729Sjoerg case Instruction::PHI: {
11607330f729Sjoerg // We can change a phi if we can change all operands. Note that we never
11617330f729Sjoerg // get into trouble with cyclic PHIs here because we only consider
11627330f729Sjoerg // instructions with a single use.
11637330f729Sjoerg PHINode *PN = cast<PHINode>(I);
11647330f729Sjoerg if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
11657330f729Sjoerg return false;
11667330f729Sjoerg for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
11677330f729Sjoerg if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
11687330f729Sjoerg // TODO: If important, we could handle the case when the BitsToClear
11697330f729Sjoerg // are known zero in the disagreeing input.
11707330f729Sjoerg Tmp != BitsToClear)
11717330f729Sjoerg return false;
11727330f729Sjoerg return true;
11737330f729Sjoerg }
11747330f729Sjoerg default:
11757330f729Sjoerg // TODO: Can handle more cases here.
11767330f729Sjoerg return false;
11777330f729Sjoerg }
11787330f729Sjoerg }
11797330f729Sjoerg
visitZExt(ZExtInst & CI)1180*82d56013Sjoerg Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
11817330f729Sjoerg // If this zero extend is only used by a truncate, let the truncate be
11827330f729Sjoerg // eliminated before we try to optimize this zext.
11837330f729Sjoerg if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
11847330f729Sjoerg return nullptr;
11857330f729Sjoerg
11867330f729Sjoerg // If one of the common conversion will work, do it.
11877330f729Sjoerg if (Instruction *Result = commonCastTransforms(CI))
11887330f729Sjoerg return Result;
11897330f729Sjoerg
11907330f729Sjoerg Value *Src = CI.getOperand(0);
11917330f729Sjoerg Type *SrcTy = Src->getType(), *DestTy = CI.getType();
11927330f729Sjoerg
11937330f729Sjoerg // Try to extend the entire expression tree to the wide destination type.
11947330f729Sjoerg unsigned BitsToClear;
11957330f729Sjoerg if (shouldChangeType(SrcTy, DestTy) &&
11967330f729Sjoerg canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
11977330f729Sjoerg assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
11987330f729Sjoerg "Can't clear more bits than in SrcTy");
11997330f729Sjoerg
12007330f729Sjoerg // Okay, we can transform this! Insert the new expression now.
12017330f729Sjoerg LLVM_DEBUG(
12027330f729Sjoerg dbgs() << "ICE: EvaluateInDifferentType converting expression type"
12037330f729Sjoerg " to avoid zero extend: "
12047330f729Sjoerg << CI << '\n');
12057330f729Sjoerg Value *Res = EvaluateInDifferentType(Src, DestTy, false);
12067330f729Sjoerg assert(Res->getType() == DestTy);
12077330f729Sjoerg
12087330f729Sjoerg // Preserve debug values referring to Src if the zext is its last use.
12097330f729Sjoerg if (auto *SrcOp = dyn_cast<Instruction>(Src))
12107330f729Sjoerg if (SrcOp->hasOneUse())
12117330f729Sjoerg replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
12127330f729Sjoerg
12137330f729Sjoerg uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
12147330f729Sjoerg uint32_t DestBitSize = DestTy->getScalarSizeInBits();
12157330f729Sjoerg
12167330f729Sjoerg // If the high bits are already filled with zeros, just replace this
12177330f729Sjoerg // cast with the result.
12187330f729Sjoerg if (MaskedValueIsZero(Res,
12197330f729Sjoerg APInt::getHighBitsSet(DestBitSize,
12207330f729Sjoerg DestBitSize-SrcBitsKept),
12217330f729Sjoerg 0, &CI))
12227330f729Sjoerg return replaceInstUsesWith(CI, Res);
12237330f729Sjoerg
12247330f729Sjoerg // We need to emit an AND to clear the high bits.
12257330f729Sjoerg Constant *C = ConstantInt::get(Res->getType(),
12267330f729Sjoerg APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
12277330f729Sjoerg return BinaryOperator::CreateAnd(Res, C);
12287330f729Sjoerg }
12297330f729Sjoerg
12307330f729Sjoerg // If this is a TRUNC followed by a ZEXT then we are dealing with integral
12317330f729Sjoerg // types and if the sizes are just right we can convert this into a logical
12327330f729Sjoerg // 'and' which will be much cheaper than the pair of casts.
12337330f729Sjoerg if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
12347330f729Sjoerg // TODO: Subsume this into EvaluateInDifferentType.
12357330f729Sjoerg
12367330f729Sjoerg // Get the sizes of the types involved. We know that the intermediate type
12377330f729Sjoerg // will be smaller than A or C, but don't know the relation between A and C.
12387330f729Sjoerg Value *A = CSrc->getOperand(0);
12397330f729Sjoerg unsigned SrcSize = A->getType()->getScalarSizeInBits();
12407330f729Sjoerg unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
12417330f729Sjoerg unsigned DstSize = CI.getType()->getScalarSizeInBits();
12427330f729Sjoerg // If we're actually extending zero bits, then if
12437330f729Sjoerg // SrcSize < DstSize: zext(a & mask)
12447330f729Sjoerg // SrcSize == DstSize: a & mask
12457330f729Sjoerg // SrcSize > DstSize: trunc(a) & mask
12467330f729Sjoerg if (SrcSize < DstSize) {
12477330f729Sjoerg APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
12487330f729Sjoerg Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
12497330f729Sjoerg Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
12507330f729Sjoerg return new ZExtInst(And, CI.getType());
12517330f729Sjoerg }
12527330f729Sjoerg
12537330f729Sjoerg if (SrcSize == DstSize) {
12547330f729Sjoerg APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
12557330f729Sjoerg return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
12567330f729Sjoerg AndValue));
12577330f729Sjoerg }
12587330f729Sjoerg if (SrcSize > DstSize) {
12597330f729Sjoerg Value *Trunc = Builder.CreateTrunc(A, CI.getType());
12607330f729Sjoerg APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
12617330f729Sjoerg return BinaryOperator::CreateAnd(Trunc,
12627330f729Sjoerg ConstantInt::get(Trunc->getType(),
12637330f729Sjoerg AndValue));
12647330f729Sjoerg }
12657330f729Sjoerg }
12667330f729Sjoerg
1267*82d56013Sjoerg if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
1268*82d56013Sjoerg return transformZExtICmp(Cmp, CI);
12697330f729Sjoerg
12707330f729Sjoerg BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
12717330f729Sjoerg if (SrcI && SrcI->getOpcode() == Instruction::Or) {
12727330f729Sjoerg // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
12737330f729Sjoerg // of the (zext icmp) can be eliminated. If so, immediately perform the
12747330f729Sjoerg // according elimination.
12757330f729Sjoerg ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
12767330f729Sjoerg ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
12777330f729Sjoerg if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
1278*82d56013Sjoerg LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
12797330f729Sjoerg (transformZExtICmp(LHS, CI, false) ||
12807330f729Sjoerg transformZExtICmp(RHS, CI, false))) {
12817330f729Sjoerg // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
12827330f729Sjoerg Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
12837330f729Sjoerg Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
1284*82d56013Sjoerg Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
1285*82d56013Sjoerg if (auto *OrInst = dyn_cast<Instruction>(Or))
1286*82d56013Sjoerg Builder.SetInsertPoint(OrInst);
12877330f729Sjoerg
12887330f729Sjoerg // Perform the elimination.
12897330f729Sjoerg if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
12907330f729Sjoerg transformZExtICmp(LHS, *LZExt);
12917330f729Sjoerg if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
12927330f729Sjoerg transformZExtICmp(RHS, *RZExt);
12937330f729Sjoerg
1294*82d56013Sjoerg return replaceInstUsesWith(CI, Or);
12957330f729Sjoerg }
12967330f729Sjoerg }
12977330f729Sjoerg
12987330f729Sjoerg // zext(trunc(X) & C) -> (X & zext(C)).
12997330f729Sjoerg Constant *C;
13007330f729Sjoerg Value *X;
13017330f729Sjoerg if (SrcI &&
13027330f729Sjoerg match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
13037330f729Sjoerg X->getType() == CI.getType())
13047330f729Sjoerg return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
13057330f729Sjoerg
13067330f729Sjoerg // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
13077330f729Sjoerg Value *And;
13087330f729Sjoerg if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
13097330f729Sjoerg match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
13107330f729Sjoerg X->getType() == CI.getType()) {
13117330f729Sjoerg Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
13127330f729Sjoerg return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
13137330f729Sjoerg }
13147330f729Sjoerg
13157330f729Sjoerg return nullptr;
13167330f729Sjoerg }
13177330f729Sjoerg
13187330f729Sjoerg /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
transformSExtICmp(ICmpInst * ICI,Instruction & CI)1319*82d56013Sjoerg Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
1320*82d56013Sjoerg Instruction &CI) {
13217330f729Sjoerg Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
13227330f729Sjoerg ICmpInst::Predicate Pred = ICI->getPredicate();
13237330f729Sjoerg
13247330f729Sjoerg // Don't bother if Op1 isn't of vector or integer type.
13257330f729Sjoerg if (!Op1->getType()->isIntOrIntVectorTy())
13267330f729Sjoerg return nullptr;
13277330f729Sjoerg
13287330f729Sjoerg if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
13297330f729Sjoerg (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
13307330f729Sjoerg // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
13317330f729Sjoerg // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
13327330f729Sjoerg Value *Sh = ConstantInt::get(Op0->getType(),
13337330f729Sjoerg Op0->getType()->getScalarSizeInBits() - 1);
13347330f729Sjoerg Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
13357330f729Sjoerg if (In->getType() != CI.getType())
13367330f729Sjoerg In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
13377330f729Sjoerg
13387330f729Sjoerg if (Pred == ICmpInst::ICMP_SGT)
13397330f729Sjoerg In = Builder.CreateNot(In, In->getName() + ".not");
13407330f729Sjoerg return replaceInstUsesWith(CI, In);
13417330f729Sjoerg }
13427330f729Sjoerg
13437330f729Sjoerg if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
13447330f729Sjoerg // If we know that only one bit of the LHS of the icmp can be set and we
13457330f729Sjoerg // have an equality comparison with zero or a power of 2, we can transform
13467330f729Sjoerg // the icmp and sext into bitwise/integer operations.
13477330f729Sjoerg if (ICI->hasOneUse() &&
13487330f729Sjoerg ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
13497330f729Sjoerg KnownBits Known = computeKnownBits(Op0, 0, &CI);
13507330f729Sjoerg
13517330f729Sjoerg APInt KnownZeroMask(~Known.Zero);
13527330f729Sjoerg if (KnownZeroMask.isPowerOf2()) {
13537330f729Sjoerg Value *In = ICI->getOperand(0);
13547330f729Sjoerg
13557330f729Sjoerg // If the icmp tests for a known zero bit we can constant fold it.
13567330f729Sjoerg if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
13577330f729Sjoerg Value *V = Pred == ICmpInst::ICMP_NE ?
13587330f729Sjoerg ConstantInt::getAllOnesValue(CI.getType()) :
13597330f729Sjoerg ConstantInt::getNullValue(CI.getType());
13607330f729Sjoerg return replaceInstUsesWith(CI, V);
13617330f729Sjoerg }
13627330f729Sjoerg
13637330f729Sjoerg if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
13647330f729Sjoerg // sext ((x & 2^n) == 0) -> (x >> n) - 1
13657330f729Sjoerg // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
13667330f729Sjoerg unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
13677330f729Sjoerg // Perform a right shift to place the desired bit in the LSB.
13687330f729Sjoerg if (ShiftAmt)
13697330f729Sjoerg In = Builder.CreateLShr(In,
13707330f729Sjoerg ConstantInt::get(In->getType(), ShiftAmt));
13717330f729Sjoerg
13727330f729Sjoerg // At this point "In" is either 1 or 0. Subtract 1 to turn
13737330f729Sjoerg // {1, 0} -> {0, -1}.
13747330f729Sjoerg In = Builder.CreateAdd(In,
13757330f729Sjoerg ConstantInt::getAllOnesValue(In->getType()),
13767330f729Sjoerg "sext");
13777330f729Sjoerg } else {
13787330f729Sjoerg // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
13797330f729Sjoerg // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
13807330f729Sjoerg unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
13817330f729Sjoerg // Perform a left shift to place the desired bit in the MSB.
13827330f729Sjoerg if (ShiftAmt)
13837330f729Sjoerg In = Builder.CreateShl(In,
13847330f729Sjoerg ConstantInt::get(In->getType(), ShiftAmt));
13857330f729Sjoerg
13867330f729Sjoerg // Distribute the bit over the whole bit width.
13877330f729Sjoerg In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
13887330f729Sjoerg KnownZeroMask.getBitWidth() - 1), "sext");
13897330f729Sjoerg }
13907330f729Sjoerg
13917330f729Sjoerg if (CI.getType() == In->getType())
13927330f729Sjoerg return replaceInstUsesWith(CI, In);
13937330f729Sjoerg return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
13947330f729Sjoerg }
13957330f729Sjoerg }
13967330f729Sjoerg }
13977330f729Sjoerg
13987330f729Sjoerg return nullptr;
13997330f729Sjoerg }
14007330f729Sjoerg
14017330f729Sjoerg /// Return true if we can take the specified value and return it as type Ty
14027330f729Sjoerg /// without inserting any new casts and without changing the value of the common
14037330f729Sjoerg /// low bits. This is used by code that tries to promote integer operations to
14047330f729Sjoerg /// a wider types will allow us to eliminate the extension.
14057330f729Sjoerg ///
14067330f729Sjoerg /// This function works on both vectors and scalars.
14077330f729Sjoerg ///
canEvaluateSExtd(Value * V,Type * Ty)14087330f729Sjoerg static bool canEvaluateSExtd(Value *V, Type *Ty) {
14097330f729Sjoerg assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
14107330f729Sjoerg "Can't sign extend type to a smaller type");
14117330f729Sjoerg if (canAlwaysEvaluateInType(V, Ty))
14127330f729Sjoerg return true;
14137330f729Sjoerg if (canNotEvaluateInType(V, Ty))
14147330f729Sjoerg return false;
14157330f729Sjoerg
14167330f729Sjoerg auto *I = cast<Instruction>(V);
14177330f729Sjoerg switch (I->getOpcode()) {
14187330f729Sjoerg case Instruction::SExt: // sext(sext(x)) -> sext(x)
14197330f729Sjoerg case Instruction::ZExt: // sext(zext(x)) -> zext(x)
14207330f729Sjoerg case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
14217330f729Sjoerg return true;
14227330f729Sjoerg case Instruction::And:
14237330f729Sjoerg case Instruction::Or:
14247330f729Sjoerg case Instruction::Xor:
14257330f729Sjoerg case Instruction::Add:
14267330f729Sjoerg case Instruction::Sub:
14277330f729Sjoerg case Instruction::Mul:
14287330f729Sjoerg // These operators can all arbitrarily be extended if their inputs can.
14297330f729Sjoerg return canEvaluateSExtd(I->getOperand(0), Ty) &&
14307330f729Sjoerg canEvaluateSExtd(I->getOperand(1), Ty);
14317330f729Sjoerg
14327330f729Sjoerg //case Instruction::Shl: TODO
14337330f729Sjoerg //case Instruction::LShr: TODO
14347330f729Sjoerg
14357330f729Sjoerg case Instruction::Select:
14367330f729Sjoerg return canEvaluateSExtd(I->getOperand(1), Ty) &&
14377330f729Sjoerg canEvaluateSExtd(I->getOperand(2), Ty);
14387330f729Sjoerg
14397330f729Sjoerg case Instruction::PHI: {
14407330f729Sjoerg // We can change a phi if we can change all operands. Note that we never
14417330f729Sjoerg // get into trouble with cyclic PHIs here because we only consider
14427330f729Sjoerg // instructions with a single use.
14437330f729Sjoerg PHINode *PN = cast<PHINode>(I);
14447330f729Sjoerg for (Value *IncValue : PN->incoming_values())
14457330f729Sjoerg if (!canEvaluateSExtd(IncValue, Ty)) return false;
14467330f729Sjoerg return true;
14477330f729Sjoerg }
14487330f729Sjoerg default:
14497330f729Sjoerg // TODO: Can handle more cases here.
14507330f729Sjoerg break;
14517330f729Sjoerg }
14527330f729Sjoerg
14537330f729Sjoerg return false;
14547330f729Sjoerg }
14557330f729Sjoerg
visitSExt(SExtInst & CI)1456*82d56013Sjoerg Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
14577330f729Sjoerg // If this sign extend is only used by a truncate, let the truncate be
14587330f729Sjoerg // eliminated before we try to optimize this sext.
14597330f729Sjoerg if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
14607330f729Sjoerg return nullptr;
14617330f729Sjoerg
14627330f729Sjoerg if (Instruction *I = commonCastTransforms(CI))
14637330f729Sjoerg return I;
14647330f729Sjoerg
14657330f729Sjoerg Value *Src = CI.getOperand(0);
14667330f729Sjoerg Type *SrcTy = Src->getType(), *DestTy = CI.getType();
14677330f729Sjoerg
14687330f729Sjoerg // If we know that the value being extended is positive, we can use a zext
14697330f729Sjoerg // instead.
14707330f729Sjoerg KnownBits Known = computeKnownBits(Src, 0, &CI);
14717330f729Sjoerg if (Known.isNonNegative())
14727330f729Sjoerg return CastInst::Create(Instruction::ZExt, Src, DestTy);
14737330f729Sjoerg
14747330f729Sjoerg // Try to extend the entire expression tree to the wide destination type.
14757330f729Sjoerg if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
14767330f729Sjoerg // Okay, we can transform this! Insert the new expression now.
14777330f729Sjoerg LLVM_DEBUG(
14787330f729Sjoerg dbgs() << "ICE: EvaluateInDifferentType converting expression type"
14797330f729Sjoerg " to avoid sign extend: "
14807330f729Sjoerg << CI << '\n');
14817330f729Sjoerg Value *Res = EvaluateInDifferentType(Src, DestTy, true);
14827330f729Sjoerg assert(Res->getType() == DestTy);
14837330f729Sjoerg
14847330f729Sjoerg uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
14857330f729Sjoerg uint32_t DestBitSize = DestTy->getScalarSizeInBits();
14867330f729Sjoerg
14877330f729Sjoerg // If the high bits are already filled with sign bit, just replace this
14887330f729Sjoerg // cast with the result.
14897330f729Sjoerg if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
14907330f729Sjoerg return replaceInstUsesWith(CI, Res);
14917330f729Sjoerg
14927330f729Sjoerg // We need to emit a shl + ashr to do the sign extend.
14937330f729Sjoerg Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
14947330f729Sjoerg return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
14957330f729Sjoerg ShAmt);
14967330f729Sjoerg }
14977330f729Sjoerg
14987330f729Sjoerg // If the input is a trunc from the destination type, then turn sext(trunc(x))
14997330f729Sjoerg // into shifts.
15007330f729Sjoerg Value *X;
15017330f729Sjoerg if (match(Src, m_OneUse(m_Trunc(m_Value(X)))) && X->getType() == DestTy) {
15027330f729Sjoerg // sext(trunc(X)) --> ashr(shl(X, C), C)
15037330f729Sjoerg unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
15047330f729Sjoerg unsigned DestBitSize = DestTy->getScalarSizeInBits();
15057330f729Sjoerg Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
15067330f729Sjoerg return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
15077330f729Sjoerg }
15087330f729Sjoerg
15097330f729Sjoerg if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
15107330f729Sjoerg return transformSExtICmp(ICI, CI);
15117330f729Sjoerg
15127330f729Sjoerg // If the input is a shl/ashr pair of a same constant, then this is a sign
15137330f729Sjoerg // extension from a smaller value. If we could trust arbitrary bitwidth
15147330f729Sjoerg // integers, we could turn this into a truncate to the smaller bit and then
15157330f729Sjoerg // use a sext for the whole extension. Since we don't, look deeper and check
15167330f729Sjoerg // for a truncate. If the source and dest are the same type, eliminate the
15177330f729Sjoerg // trunc and extend and just do shifts. For example, turn:
15187330f729Sjoerg // %a = trunc i32 %i to i8
1519*82d56013Sjoerg // %b = shl i8 %a, C
1520*82d56013Sjoerg // %c = ashr i8 %b, C
15217330f729Sjoerg // %d = sext i8 %c to i32
15227330f729Sjoerg // into:
1523*82d56013Sjoerg // %a = shl i32 %i, 32-(8-C)
1524*82d56013Sjoerg // %d = ashr i32 %a, 32-(8-C)
15257330f729Sjoerg Value *A = nullptr;
15267330f729Sjoerg // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1527*82d56013Sjoerg Constant *BA = nullptr, *CA = nullptr;
1528*82d56013Sjoerg if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1529*82d56013Sjoerg m_Constant(CA))) &&
1530*82d56013Sjoerg BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1531*82d56013Sjoerg Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1532*82d56013Sjoerg Constant *NumLowbitsLeft = ConstantExpr::getSub(
1533*82d56013Sjoerg ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1534*82d56013Sjoerg Constant *NewShAmt = ConstantExpr::getSub(
1535*82d56013Sjoerg ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1536*82d56013Sjoerg NumLowbitsLeft);
1537*82d56013Sjoerg NewShAmt =
1538*82d56013Sjoerg Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);
1539*82d56013Sjoerg A = Builder.CreateShl(A, NewShAmt, CI.getName());
1540*82d56013Sjoerg return BinaryOperator::CreateAShr(A, NewShAmt);
15417330f729Sjoerg }
15427330f729Sjoerg
15437330f729Sjoerg return nullptr;
15447330f729Sjoerg }
15457330f729Sjoerg
15467330f729Sjoerg /// Return a Constant* for the specified floating-point constant if it fits
15477330f729Sjoerg /// in the specified FP type without changing its value.
fitsInFPType(ConstantFP * CFP,const fltSemantics & Sem)15487330f729Sjoerg static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
15497330f729Sjoerg bool losesInfo;
15507330f729Sjoerg APFloat F = CFP->getValueAPF();
15517330f729Sjoerg (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
15527330f729Sjoerg return !losesInfo;
15537330f729Sjoerg }
15547330f729Sjoerg
shrinkFPConstant(ConstantFP * CFP)15557330f729Sjoerg static Type *shrinkFPConstant(ConstantFP *CFP) {
15567330f729Sjoerg if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
15577330f729Sjoerg return nullptr; // No constant folding of this.
15587330f729Sjoerg // See if the value can be truncated to half and then reextended.
15597330f729Sjoerg if (fitsInFPType(CFP, APFloat::IEEEhalf()))
15607330f729Sjoerg return Type::getHalfTy(CFP->getContext());
15617330f729Sjoerg // See if the value can be truncated to float and then reextended.
15627330f729Sjoerg if (fitsInFPType(CFP, APFloat::IEEEsingle()))
15637330f729Sjoerg return Type::getFloatTy(CFP->getContext());
15647330f729Sjoerg if (CFP->getType()->isDoubleTy())
15657330f729Sjoerg return nullptr; // Won't shrink.
15667330f729Sjoerg if (fitsInFPType(CFP, APFloat::IEEEdouble()))
15677330f729Sjoerg return Type::getDoubleTy(CFP->getContext());
15687330f729Sjoerg // Don't try to shrink to various long double types.
15697330f729Sjoerg return nullptr;
15707330f729Sjoerg }
15717330f729Sjoerg
15727330f729Sjoerg // Determine if this is a vector of ConstantFPs and if so, return the minimal
15737330f729Sjoerg // type we can safely truncate all elements to.
15747330f729Sjoerg // TODO: Make these support undef elements.
shrinkFPConstantVector(Value * V)15757330f729Sjoerg static Type *shrinkFPConstantVector(Value *V) {
15767330f729Sjoerg auto *CV = dyn_cast<Constant>(V);
1577*82d56013Sjoerg auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1578*82d56013Sjoerg if (!CV || !CVVTy)
15797330f729Sjoerg return nullptr;
15807330f729Sjoerg
15817330f729Sjoerg Type *MinType = nullptr;
15827330f729Sjoerg
1583*82d56013Sjoerg unsigned NumElts = CVVTy->getNumElements();
1584*82d56013Sjoerg
1585*82d56013Sjoerg // For fixed-width vectors we find the minimal type by looking
1586*82d56013Sjoerg // through the constant values of the vector.
15877330f729Sjoerg for (unsigned i = 0; i != NumElts; ++i) {
15887330f729Sjoerg auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
15897330f729Sjoerg if (!CFP)
15907330f729Sjoerg return nullptr;
15917330f729Sjoerg
15927330f729Sjoerg Type *T = shrinkFPConstant(CFP);
15937330f729Sjoerg if (!T)
15947330f729Sjoerg return nullptr;
15957330f729Sjoerg
15967330f729Sjoerg // If we haven't found a type yet or this type has a larger mantissa than
15977330f729Sjoerg // our previous type, this is our new minimal type.
15987330f729Sjoerg if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
15997330f729Sjoerg MinType = T;
16007330f729Sjoerg }
16017330f729Sjoerg
16027330f729Sjoerg // Make a vector type from the minimal type.
1603*82d56013Sjoerg return FixedVectorType::get(MinType, NumElts);
16047330f729Sjoerg }
16057330f729Sjoerg
16067330f729Sjoerg /// Find the minimum FP type we can safely truncate to.
getMinimumFPType(Value * V)16077330f729Sjoerg static Type *getMinimumFPType(Value *V) {
16087330f729Sjoerg if (auto *FPExt = dyn_cast<FPExtInst>(V))
16097330f729Sjoerg return FPExt->getOperand(0)->getType();
16107330f729Sjoerg
16117330f729Sjoerg // If this value is a constant, return the constant in the smallest FP type
16127330f729Sjoerg // that can accurately represent it. This allows us to turn
16137330f729Sjoerg // (float)((double)X+2.0) into x+2.0f.
16147330f729Sjoerg if (auto *CFP = dyn_cast<ConstantFP>(V))
16157330f729Sjoerg if (Type *T = shrinkFPConstant(CFP))
16167330f729Sjoerg return T;
16177330f729Sjoerg
1618*82d56013Sjoerg // We can only correctly find a minimum type for a scalable vector when it is
1619*82d56013Sjoerg // a splat. For splats of constant values the fpext is wrapped up as a
1620*82d56013Sjoerg // ConstantExpr.
1621*82d56013Sjoerg if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1622*82d56013Sjoerg if (FPCExt->getOpcode() == Instruction::FPExt)
1623*82d56013Sjoerg return FPCExt->getOperand(0)->getType();
1624*82d56013Sjoerg
1625*82d56013Sjoerg // Try to shrink a vector of FP constants. This returns nullptr on scalable
1626*82d56013Sjoerg // vectors
16277330f729Sjoerg if (Type *T = shrinkFPConstantVector(V))
16287330f729Sjoerg return T;
16297330f729Sjoerg
16307330f729Sjoerg return V->getType();
16317330f729Sjoerg }
16327330f729Sjoerg
1633*82d56013Sjoerg /// Return true if the cast from integer to FP can be proven to be exact for all
1634*82d56013Sjoerg /// possible inputs (the conversion does not lose any precision).
isKnownExactCastIntToFP(CastInst & I)1635*82d56013Sjoerg static bool isKnownExactCastIntToFP(CastInst &I) {
1636*82d56013Sjoerg CastInst::CastOps Opcode = I.getOpcode();
1637*82d56013Sjoerg assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1638*82d56013Sjoerg "Unexpected cast");
1639*82d56013Sjoerg Value *Src = I.getOperand(0);
1640*82d56013Sjoerg Type *SrcTy = Src->getType();
1641*82d56013Sjoerg Type *FPTy = I.getType();
1642*82d56013Sjoerg bool IsSigned = Opcode == Instruction::SIToFP;
1643*82d56013Sjoerg int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1644*82d56013Sjoerg
1645*82d56013Sjoerg // Easy case - if the source integer type has less bits than the FP mantissa,
1646*82d56013Sjoerg // then the cast must be exact.
1647*82d56013Sjoerg int DestNumSigBits = FPTy->getFPMantissaWidth();
1648*82d56013Sjoerg if (SrcSize <= DestNumSigBits)
1649*82d56013Sjoerg return true;
1650*82d56013Sjoerg
1651*82d56013Sjoerg // Cast from FP to integer and back to FP is independent of the intermediate
1652*82d56013Sjoerg // integer width because of poison on overflow.
1653*82d56013Sjoerg Value *F;
1654*82d56013Sjoerg if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1655*82d56013Sjoerg // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1656*82d56013Sjoerg // potential rounding of negative FP input values.
1657*82d56013Sjoerg int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1658*82d56013Sjoerg if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1659*82d56013Sjoerg SrcNumSigBits++;
1660*82d56013Sjoerg
1661*82d56013Sjoerg // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1662*82d56013Sjoerg // significant bits than the destination (and make sure neither type is
1663*82d56013Sjoerg // weird -- ppc_fp128).
1664*82d56013Sjoerg if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1665*82d56013Sjoerg SrcNumSigBits <= DestNumSigBits)
1666*82d56013Sjoerg return true;
1667*82d56013Sjoerg }
1668*82d56013Sjoerg
1669*82d56013Sjoerg // TODO:
1670*82d56013Sjoerg // Try harder to find if the source integer type has less significant bits.
1671*82d56013Sjoerg // For example, compute number of sign bits or compute low bit mask.
1672*82d56013Sjoerg return false;
1673*82d56013Sjoerg }
1674*82d56013Sjoerg
visitFPTrunc(FPTruncInst & FPT)1675*82d56013Sjoerg Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
16767330f729Sjoerg if (Instruction *I = commonCastTransforms(FPT))
16777330f729Sjoerg return I;
16787330f729Sjoerg
16797330f729Sjoerg // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
16807330f729Sjoerg // simplify this expression to avoid one or more of the trunc/extend
16817330f729Sjoerg // operations if we can do so without changing the numerical results.
16827330f729Sjoerg //
16837330f729Sjoerg // The exact manner in which the widths of the operands interact to limit
16847330f729Sjoerg // what we can and cannot do safely varies from operation to operation, and
16857330f729Sjoerg // is explained below in the various case statements.
16867330f729Sjoerg Type *Ty = FPT.getType();
16877330f729Sjoerg auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
16887330f729Sjoerg if (BO && BO->hasOneUse()) {
16897330f729Sjoerg Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
16907330f729Sjoerg Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
16917330f729Sjoerg unsigned OpWidth = BO->getType()->getFPMantissaWidth();
16927330f729Sjoerg unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
16937330f729Sjoerg unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
16947330f729Sjoerg unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
16957330f729Sjoerg unsigned DstWidth = Ty->getFPMantissaWidth();
16967330f729Sjoerg switch (BO->getOpcode()) {
16977330f729Sjoerg default: break;
16987330f729Sjoerg case Instruction::FAdd:
16997330f729Sjoerg case Instruction::FSub:
17007330f729Sjoerg // For addition and subtraction, the infinitely precise result can
17017330f729Sjoerg // essentially be arbitrarily wide; proving that double rounding
17027330f729Sjoerg // will not occur because the result of OpI is exact (as we will for
17037330f729Sjoerg // FMul, for example) is hopeless. However, we *can* nonetheless
17047330f729Sjoerg // frequently know that double rounding cannot occur (or that it is
17057330f729Sjoerg // innocuous) by taking advantage of the specific structure of
17067330f729Sjoerg // infinitely-precise results that admit double rounding.
17077330f729Sjoerg //
17087330f729Sjoerg // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
17097330f729Sjoerg // to represent both sources, we can guarantee that the double
17107330f729Sjoerg // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
17117330f729Sjoerg // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
17127330f729Sjoerg // for proof of this fact).
17137330f729Sjoerg //
17147330f729Sjoerg // Note: Figueroa does not consider the case where DstFormat !=
17157330f729Sjoerg // SrcFormat. It's possible (likely even!) that this analysis
17167330f729Sjoerg // could be tightened for those cases, but they are rare (the main
17177330f729Sjoerg // case of interest here is (float)((double)float + float)).
17187330f729Sjoerg if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
17197330f729Sjoerg Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
17207330f729Sjoerg Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
17217330f729Sjoerg Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
17227330f729Sjoerg RI->copyFastMathFlags(BO);
17237330f729Sjoerg return RI;
17247330f729Sjoerg }
17257330f729Sjoerg break;
17267330f729Sjoerg case Instruction::FMul:
17277330f729Sjoerg // For multiplication, the infinitely precise result has at most
17287330f729Sjoerg // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
17297330f729Sjoerg // that such a value can be exactly represented, then no double
17307330f729Sjoerg // rounding can possibly occur; we can safely perform the operation
17317330f729Sjoerg // in the destination format if it can represent both sources.
17327330f729Sjoerg if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
17337330f729Sjoerg Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
17347330f729Sjoerg Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
17357330f729Sjoerg return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
17367330f729Sjoerg }
17377330f729Sjoerg break;
17387330f729Sjoerg case Instruction::FDiv:
17397330f729Sjoerg // For division, we use again use the bound from Figueroa's
17407330f729Sjoerg // dissertation. I am entirely certain that this bound can be
17417330f729Sjoerg // tightened in the unbalanced operand case by an analysis based on
17427330f729Sjoerg // the diophantine rational approximation bound, but the well-known
17437330f729Sjoerg // condition used here is a good conservative first pass.
17447330f729Sjoerg // TODO: Tighten bound via rigorous analysis of the unbalanced case.
17457330f729Sjoerg if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
17467330f729Sjoerg Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
17477330f729Sjoerg Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
17487330f729Sjoerg return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
17497330f729Sjoerg }
17507330f729Sjoerg break;
17517330f729Sjoerg case Instruction::FRem: {
17527330f729Sjoerg // Remainder is straightforward. Remainder is always exact, so the
17537330f729Sjoerg // type of OpI doesn't enter into things at all. We simply evaluate
17547330f729Sjoerg // in whichever source type is larger, then convert to the
17557330f729Sjoerg // destination type.
17567330f729Sjoerg if (SrcWidth == OpWidth)
17577330f729Sjoerg break;
17587330f729Sjoerg Value *LHS, *RHS;
17597330f729Sjoerg if (LHSWidth == SrcWidth) {
17607330f729Sjoerg LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
17617330f729Sjoerg RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
17627330f729Sjoerg } else {
17637330f729Sjoerg LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
17647330f729Sjoerg RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
17657330f729Sjoerg }
17667330f729Sjoerg
17677330f729Sjoerg Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
17687330f729Sjoerg return CastInst::CreateFPCast(ExactResult, Ty);
17697330f729Sjoerg }
17707330f729Sjoerg }
17717330f729Sjoerg }
17727330f729Sjoerg
17737330f729Sjoerg // (fptrunc (fneg x)) -> (fneg (fptrunc x))
17747330f729Sjoerg Value *X;
17757330f729Sjoerg Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
17767330f729Sjoerg if (Op && Op->hasOneUse()) {
1777*82d56013Sjoerg // FIXME: The FMF should propagate from the fptrunc, not the source op.
1778*82d56013Sjoerg IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1779*82d56013Sjoerg if (isa<FPMathOperator>(Op))
1780*82d56013Sjoerg Builder.setFastMathFlags(Op->getFastMathFlags());
1781*82d56013Sjoerg
17827330f729Sjoerg if (match(Op, m_FNeg(m_Value(X)))) {
17837330f729Sjoerg Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
17847330f729Sjoerg
17857330f729Sjoerg return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
17867330f729Sjoerg }
1787*82d56013Sjoerg
1788*82d56013Sjoerg // If we are truncating a select that has an extended operand, we can
1789*82d56013Sjoerg // narrow the other operand and do the select as a narrow op.
1790*82d56013Sjoerg Value *Cond, *X, *Y;
1791*82d56013Sjoerg if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1792*82d56013Sjoerg X->getType() == Ty) {
1793*82d56013Sjoerg // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1794*82d56013Sjoerg Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1795*82d56013Sjoerg Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1796*82d56013Sjoerg return replaceInstUsesWith(FPT, Sel);
1797*82d56013Sjoerg }
1798*82d56013Sjoerg if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1799*82d56013Sjoerg X->getType() == Ty) {
1800*82d56013Sjoerg // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1801*82d56013Sjoerg Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1802*82d56013Sjoerg Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1803*82d56013Sjoerg return replaceInstUsesWith(FPT, Sel);
1804*82d56013Sjoerg }
18057330f729Sjoerg }
18067330f729Sjoerg
18077330f729Sjoerg if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
18087330f729Sjoerg switch (II->getIntrinsicID()) {
18097330f729Sjoerg default: break;
18107330f729Sjoerg case Intrinsic::ceil:
18117330f729Sjoerg case Intrinsic::fabs:
18127330f729Sjoerg case Intrinsic::floor:
18137330f729Sjoerg case Intrinsic::nearbyint:
18147330f729Sjoerg case Intrinsic::rint:
18157330f729Sjoerg case Intrinsic::round:
1816*82d56013Sjoerg case Intrinsic::roundeven:
18177330f729Sjoerg case Intrinsic::trunc: {
18187330f729Sjoerg Value *Src = II->getArgOperand(0);
18197330f729Sjoerg if (!Src->hasOneUse())
18207330f729Sjoerg break;
18217330f729Sjoerg
18227330f729Sjoerg // Except for fabs, this transformation requires the input of the unary FP
18237330f729Sjoerg // operation to be itself an fpext from the type to which we're
18247330f729Sjoerg // truncating.
18257330f729Sjoerg if (II->getIntrinsicID() != Intrinsic::fabs) {
18267330f729Sjoerg FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
18277330f729Sjoerg if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
18287330f729Sjoerg break;
18297330f729Sjoerg }
18307330f729Sjoerg
18317330f729Sjoerg // Do unary FP operation on smaller type.
18327330f729Sjoerg // (fptrunc (fabs x)) -> (fabs (fptrunc x))
18337330f729Sjoerg Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
18347330f729Sjoerg Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
18357330f729Sjoerg II->getIntrinsicID(), Ty);
18367330f729Sjoerg SmallVector<OperandBundleDef, 1> OpBundles;
18377330f729Sjoerg II->getOperandBundlesAsDefs(OpBundles);
18387330f729Sjoerg CallInst *NewCI =
18397330f729Sjoerg CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
18407330f729Sjoerg NewCI->copyFastMathFlags(II);
18417330f729Sjoerg return NewCI;
18427330f729Sjoerg }
18437330f729Sjoerg }
18447330f729Sjoerg }
18457330f729Sjoerg
18467330f729Sjoerg if (Instruction *I = shrinkInsertElt(FPT, Builder))
18477330f729Sjoerg return I;
18487330f729Sjoerg
1849*82d56013Sjoerg Value *Src = FPT.getOperand(0);
1850*82d56013Sjoerg if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1851*82d56013Sjoerg auto *FPCast = cast<CastInst>(Src);
1852*82d56013Sjoerg if (isKnownExactCastIntToFP(*FPCast))
1853*82d56013Sjoerg return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1854*82d56013Sjoerg }
1855*82d56013Sjoerg
18567330f729Sjoerg return nullptr;
18577330f729Sjoerg }
18587330f729Sjoerg
visitFPExt(CastInst & FPExt)1859*82d56013Sjoerg Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {
1860*82d56013Sjoerg // If the source operand is a cast from integer to FP and known exact, then
1861*82d56013Sjoerg // cast the integer operand directly to the destination type.
1862*82d56013Sjoerg Type *Ty = FPExt.getType();
1863*82d56013Sjoerg Value *Src = FPExt.getOperand(0);
1864*82d56013Sjoerg if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1865*82d56013Sjoerg auto *FPCast = cast<CastInst>(Src);
1866*82d56013Sjoerg if (isKnownExactCastIntToFP(*FPCast))
1867*82d56013Sjoerg return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
18687330f729Sjoerg }
18697330f729Sjoerg
1870*82d56013Sjoerg return commonCastTransforms(FPExt);
1871*82d56013Sjoerg }
1872*82d56013Sjoerg
1873*82d56013Sjoerg /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1874*82d56013Sjoerg /// This is safe if the intermediate type has enough bits in its mantissa to
1875*82d56013Sjoerg /// accurately represent all values of X. For example, this won't work with
1876*82d56013Sjoerg /// i64 -> float -> i64.
foldItoFPtoI(CastInst & FI)1877*82d56013Sjoerg Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
18787330f729Sjoerg if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
18797330f729Sjoerg return nullptr;
18807330f729Sjoerg
1881*82d56013Sjoerg auto *OpI = cast<CastInst>(FI.getOperand(0));
1882*82d56013Sjoerg Value *X = OpI->getOperand(0);
1883*82d56013Sjoerg Type *XType = X->getType();
1884*82d56013Sjoerg Type *DestType = FI.getType();
18857330f729Sjoerg bool IsOutputSigned = isa<FPToSIInst>(FI);
18867330f729Sjoerg
18877330f729Sjoerg // Since we can assume the conversion won't overflow, our decision as to
18887330f729Sjoerg // whether the input will fit in the float should depend on the minimum
18897330f729Sjoerg // of the input range and output range.
18907330f729Sjoerg
18917330f729Sjoerg // This means this is also safe for a signed input and unsigned output, since
18927330f729Sjoerg // a negative input would lead to undefined behavior.
1893*82d56013Sjoerg if (!isKnownExactCastIntToFP(*OpI)) {
1894*82d56013Sjoerg // The first cast may not round exactly based on the source integer width
1895*82d56013Sjoerg // and FP width, but the overflow UB rules can still allow this to fold.
1896*82d56013Sjoerg // If the destination type is narrow, that means the intermediate FP value
1897*82d56013Sjoerg // must be large enough to hold the source value exactly.
1898*82d56013Sjoerg // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1899*82d56013Sjoerg int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
1900*82d56013Sjoerg if (OutputSize > OpI->getType()->getFPMantissaWidth())
19017330f729Sjoerg return nullptr;
19027330f729Sjoerg }
19037330f729Sjoerg
1904*82d56013Sjoerg if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1905*82d56013Sjoerg bool IsInputSigned = isa<SIToFPInst>(OpI);
1906*82d56013Sjoerg if (IsInputSigned && IsOutputSigned)
1907*82d56013Sjoerg return new SExtInst(X, DestType);
1908*82d56013Sjoerg return new ZExtInst(X, DestType);
1909*82d56013Sjoerg }
1910*82d56013Sjoerg if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1911*82d56013Sjoerg return new TruncInst(X, DestType);
19127330f729Sjoerg
1913*82d56013Sjoerg assert(XType == DestType && "Unexpected types for int to FP to int casts");
1914*82d56013Sjoerg return replaceInstUsesWith(FI, X);
1915*82d56013Sjoerg }
1916*82d56013Sjoerg
visitFPToUI(FPToUIInst & FI)1917*82d56013Sjoerg Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
1918*82d56013Sjoerg if (Instruction *I = foldItoFPtoI(FI))
19197330f729Sjoerg return I;
19207330f729Sjoerg
19217330f729Sjoerg return commonCastTransforms(FI);
19227330f729Sjoerg }
19237330f729Sjoerg
visitFPToSI(FPToSIInst & FI)1924*82d56013Sjoerg Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
1925*82d56013Sjoerg if (Instruction *I = foldItoFPtoI(FI))
19267330f729Sjoerg return I;
19277330f729Sjoerg
19287330f729Sjoerg return commonCastTransforms(FI);
19297330f729Sjoerg }
19307330f729Sjoerg
visitUIToFP(CastInst & CI)1931*82d56013Sjoerg Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {
19327330f729Sjoerg return commonCastTransforms(CI);
19337330f729Sjoerg }
19347330f729Sjoerg
visitSIToFP(CastInst & CI)1935*82d56013Sjoerg Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {
19367330f729Sjoerg return commonCastTransforms(CI);
19377330f729Sjoerg }
19387330f729Sjoerg
visitIntToPtr(IntToPtrInst & CI)1939*82d56013Sjoerg Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
19407330f729Sjoerg // If the source integer type is not the intptr_t type for this target, do a
19417330f729Sjoerg // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
19427330f729Sjoerg // cast to be exposed to other transforms.
19437330f729Sjoerg unsigned AS = CI.getAddressSpace();
19447330f729Sjoerg if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
19457330f729Sjoerg DL.getPointerSizeInBits(AS)) {
1946*82d56013Sjoerg Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
1947*82d56013Sjoerg DL.getIntPtrType(CI.getContext(), AS));
19487330f729Sjoerg Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
19497330f729Sjoerg return new IntToPtrInst(P, CI.getType());
19507330f729Sjoerg }
19517330f729Sjoerg
19527330f729Sjoerg if (Instruction *I = commonCastTransforms(CI))
19537330f729Sjoerg return I;
19547330f729Sjoerg
19557330f729Sjoerg return nullptr;
19567330f729Sjoerg }
19577330f729Sjoerg
19587330f729Sjoerg /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
commonPointerCastTransforms(CastInst & CI)1959*82d56013Sjoerg Instruction *InstCombinerImpl::commonPointerCastTransforms(CastInst &CI) {
19607330f729Sjoerg Value *Src = CI.getOperand(0);
19617330f729Sjoerg
19627330f729Sjoerg if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
19637330f729Sjoerg // If casting the result of a getelementptr instruction with no offset, turn
19647330f729Sjoerg // this into a cast of the original pointer!
19657330f729Sjoerg if (GEP->hasAllZeroIndices() &&
19667330f729Sjoerg // If CI is an addrspacecast and GEP changes the poiner type, merging
19677330f729Sjoerg // GEP into CI would undo canonicalizing addrspacecast with different
19687330f729Sjoerg // pointer types, causing infinite loops.
19697330f729Sjoerg (!isa<AddrSpaceCastInst>(CI) ||
19707330f729Sjoerg GEP->getType() == GEP->getPointerOperandType())) {
19717330f729Sjoerg // Changing the cast operand is usually not a good idea but it is safe
19727330f729Sjoerg // here because the pointer operand is being replaced with another
19737330f729Sjoerg // pointer operand so the opcode doesn't need to change.
1974*82d56013Sjoerg return replaceOperand(CI, 0, GEP->getOperand(0));
19757330f729Sjoerg }
19767330f729Sjoerg }
19777330f729Sjoerg
19787330f729Sjoerg return commonCastTransforms(CI);
19797330f729Sjoerg }
19807330f729Sjoerg
visitPtrToInt(PtrToIntInst & CI)1981*82d56013Sjoerg Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
19827330f729Sjoerg // If the destination integer type is not the intptr_t type for this target,
19837330f729Sjoerg // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
19847330f729Sjoerg // to be exposed to other transforms.
1985*82d56013Sjoerg Value *SrcOp = CI.getPointerOperand();
1986*82d56013Sjoerg Type *SrcTy = SrcOp->getType();
19877330f729Sjoerg Type *Ty = CI.getType();
19887330f729Sjoerg unsigned AS = CI.getPointerAddressSpace();
1989*82d56013Sjoerg unsigned TySize = Ty->getScalarSizeInBits();
1990*82d56013Sjoerg unsigned PtrSize = DL.getPointerSizeInBits(AS);
1991*82d56013Sjoerg if (TySize != PtrSize) {
1992*82d56013Sjoerg Type *IntPtrTy =
1993*82d56013Sjoerg SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
1994*82d56013Sjoerg Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
19957330f729Sjoerg return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
19967330f729Sjoerg }
19977330f729Sjoerg
1998*82d56013Sjoerg Value *Vec, *Scalar, *Index;
1999*82d56013Sjoerg if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
2000*82d56013Sjoerg m_Value(Scalar), m_Value(Index)))) &&
2001*82d56013Sjoerg Vec->getType() == Ty) {
2002*82d56013Sjoerg assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2003*82d56013Sjoerg // Convert the scalar to int followed by insert to eliminate one cast:
2004*82d56013Sjoerg // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2005*82d56013Sjoerg Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2006*82d56013Sjoerg return InsertElementInst::Create(Vec, NewCast, Index);
2007*82d56013Sjoerg }
2008*82d56013Sjoerg
2009*82d56013Sjoerg return commonPointerCastTransforms(CI);
2010*82d56013Sjoerg }
2011*82d56013Sjoerg
20127330f729Sjoerg /// This input value (which is known to have vector type) is being zero extended
2013*82d56013Sjoerg /// or truncated to the specified vector type. Since the zext/trunc is done
2014*82d56013Sjoerg /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2015*82d56013Sjoerg /// endianness will impact which end of the vector that is extended or
2016*82d56013Sjoerg /// truncated.
2017*82d56013Sjoerg ///
2018*82d56013Sjoerg /// A vector is always stored with index 0 at the lowest address, which
2019*82d56013Sjoerg /// corresponds to the most significant bits for a big endian stored integer and
2020*82d56013Sjoerg /// the least significant bits for little endian. A trunc/zext of an integer
2021*82d56013Sjoerg /// impacts the big end of the integer. Thus, we need to add/remove elements at
2022*82d56013Sjoerg /// the front of the vector for big endian targets, and the back of the vector
2023*82d56013Sjoerg /// for little endian targets.
2024*82d56013Sjoerg ///
20257330f729Sjoerg /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
20267330f729Sjoerg ///
20277330f729Sjoerg /// The source and destination vector types may have different element types.
2028*82d56013Sjoerg static Instruction *
optimizeVectorResizeWithIntegerBitCasts(Value * InVal,VectorType * DestTy,InstCombinerImpl & IC)2029*82d56013Sjoerg optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
2030*82d56013Sjoerg InstCombinerImpl &IC) {
20317330f729Sjoerg // We can only do this optimization if the output is a multiple of the input
20327330f729Sjoerg // element size, or the input is a multiple of the output element size.
20337330f729Sjoerg // Convert the input type to have the same element type as the output.
20347330f729Sjoerg VectorType *SrcTy = cast<VectorType>(InVal->getType());
20357330f729Sjoerg
20367330f729Sjoerg if (SrcTy->getElementType() != DestTy->getElementType()) {
20377330f729Sjoerg // The input types don't need to be identical, but for now they must be the
20387330f729Sjoerg // same size. There is no specific reason we couldn't handle things like
20397330f729Sjoerg // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
20407330f729Sjoerg // there yet.
20417330f729Sjoerg if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
20427330f729Sjoerg DestTy->getElementType()->getPrimitiveSizeInBits())
20437330f729Sjoerg return nullptr;
20447330f729Sjoerg
2045*82d56013Sjoerg SrcTy =
2046*82d56013Sjoerg FixedVectorType::get(DestTy->getElementType(),
2047*82d56013Sjoerg cast<FixedVectorType>(SrcTy)->getNumElements());
20487330f729Sjoerg InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
20497330f729Sjoerg }
20507330f729Sjoerg
2051*82d56013Sjoerg bool IsBigEndian = IC.getDataLayout().isBigEndian();
2052*82d56013Sjoerg unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2053*82d56013Sjoerg unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2054*82d56013Sjoerg
2055*82d56013Sjoerg assert(SrcElts != DestElts && "Element counts should be different.");
2056*82d56013Sjoerg
20577330f729Sjoerg // Now that the element types match, get the shuffle mask and RHS of the
20587330f729Sjoerg // shuffle to use, which depends on whether we're increasing or decreasing the
20597330f729Sjoerg // size of the input.
2060*82d56013Sjoerg SmallVector<int, 16> ShuffleMaskStorage;
2061*82d56013Sjoerg ArrayRef<int> ShuffleMask;
20627330f729Sjoerg Value *V2;
20637330f729Sjoerg
2064*82d56013Sjoerg // Produce an identify shuffle mask for the src vector.
2065*82d56013Sjoerg ShuffleMaskStorage.resize(SrcElts);
2066*82d56013Sjoerg std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
2067*82d56013Sjoerg
2068*82d56013Sjoerg if (SrcElts > DestElts) {
2069*82d56013Sjoerg // If we're shrinking the number of elements (rewriting an integer
2070*82d56013Sjoerg // truncate), just shuffle in the elements corresponding to the least
2071*82d56013Sjoerg // significant bits from the input and use undef as the second shuffle
2072*82d56013Sjoerg // input.
20737330f729Sjoerg V2 = UndefValue::get(SrcTy);
2074*82d56013Sjoerg // Make sure the shuffle mask selects the "least significant bits" by
2075*82d56013Sjoerg // keeping elements from back of the src vector for big endian, and from the
2076*82d56013Sjoerg // front for little endian.
2077*82d56013Sjoerg ShuffleMask = ShuffleMaskStorage;
2078*82d56013Sjoerg if (IsBigEndian)
2079*82d56013Sjoerg ShuffleMask = ShuffleMask.take_back(DestElts);
2080*82d56013Sjoerg else
2081*82d56013Sjoerg ShuffleMask = ShuffleMask.take_front(DestElts);
20827330f729Sjoerg } else {
2083*82d56013Sjoerg // If we're increasing the number of elements (rewriting an integer zext),
2084*82d56013Sjoerg // shuffle in all of the elements from InVal. Fill the rest of the result
2085*82d56013Sjoerg // elements with zeros from a constant zero.
20867330f729Sjoerg V2 = Constant::getNullValue(SrcTy);
2087*82d56013Sjoerg // Use first elt from V2 when indicating zero in the shuffle mask.
2088*82d56013Sjoerg uint32_t NullElt = SrcElts;
2089*82d56013Sjoerg // Extend with null values in the "most significant bits" by adding elements
2090*82d56013Sjoerg // in front of the src vector for big endian, and at the back for little
2091*82d56013Sjoerg // endian.
2092*82d56013Sjoerg unsigned DeltaElts = DestElts - SrcElts;
2093*82d56013Sjoerg if (IsBigEndian)
2094*82d56013Sjoerg ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2095*82d56013Sjoerg else
2096*82d56013Sjoerg ShuffleMaskStorage.append(DeltaElts, NullElt);
2097*82d56013Sjoerg ShuffleMask = ShuffleMaskStorage;
20987330f729Sjoerg }
20997330f729Sjoerg
2100*82d56013Sjoerg return new ShuffleVectorInst(InVal, V2, ShuffleMask);
21017330f729Sjoerg }
21027330f729Sjoerg
isMultipleOfTypeSize(unsigned Value,Type * Ty)21037330f729Sjoerg static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
21047330f729Sjoerg return Value % Ty->getPrimitiveSizeInBits() == 0;
21057330f729Sjoerg }
21067330f729Sjoerg
getTypeSizeIndex(unsigned Value,Type * Ty)21077330f729Sjoerg static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
21087330f729Sjoerg return Value / Ty->getPrimitiveSizeInBits();
21097330f729Sjoerg }
21107330f729Sjoerg
21117330f729Sjoerg /// V is a value which is inserted into a vector of VecEltTy.
21127330f729Sjoerg /// Look through the value to see if we can decompose it into
21137330f729Sjoerg /// insertions into the vector. See the example in the comment for
21147330f729Sjoerg /// OptimizeIntegerToVectorInsertions for the pattern this handles.
21157330f729Sjoerg /// The type of V is always a non-zero multiple of VecEltTy's size.
21167330f729Sjoerg /// Shift is the number of bits between the lsb of V and the lsb of
21177330f729Sjoerg /// the vector.
21187330f729Sjoerg ///
21197330f729Sjoerg /// This returns false if the pattern can't be matched or true if it can,
21207330f729Sjoerg /// filling in Elements with the elements found here.
collectInsertionElements(Value * V,unsigned Shift,SmallVectorImpl<Value * > & Elements,Type * VecEltTy,bool isBigEndian)21217330f729Sjoerg static bool collectInsertionElements(Value *V, unsigned Shift,
21227330f729Sjoerg SmallVectorImpl<Value *> &Elements,
21237330f729Sjoerg Type *VecEltTy, bool isBigEndian) {
21247330f729Sjoerg assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
21257330f729Sjoerg "Shift should be a multiple of the element type size");
21267330f729Sjoerg
21277330f729Sjoerg // Undef values never contribute useful bits to the result.
21287330f729Sjoerg if (isa<UndefValue>(V)) return true;
21297330f729Sjoerg
21307330f729Sjoerg // If we got down to a value of the right type, we win, try inserting into the
21317330f729Sjoerg // right element.
21327330f729Sjoerg if (V->getType() == VecEltTy) {
21337330f729Sjoerg // Inserting null doesn't actually insert any elements.
21347330f729Sjoerg if (Constant *C = dyn_cast<Constant>(V))
21357330f729Sjoerg if (C->isNullValue())
21367330f729Sjoerg return true;
21377330f729Sjoerg
21387330f729Sjoerg unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
21397330f729Sjoerg if (isBigEndian)
21407330f729Sjoerg ElementIndex = Elements.size() - ElementIndex - 1;
21417330f729Sjoerg
21427330f729Sjoerg // Fail if multiple elements are inserted into this slot.
21437330f729Sjoerg if (Elements[ElementIndex])
21447330f729Sjoerg return false;
21457330f729Sjoerg
21467330f729Sjoerg Elements[ElementIndex] = V;
21477330f729Sjoerg return true;
21487330f729Sjoerg }
21497330f729Sjoerg
21507330f729Sjoerg if (Constant *C = dyn_cast<Constant>(V)) {
21517330f729Sjoerg // Figure out the # elements this provides, and bitcast it or slice it up
21527330f729Sjoerg // as required.
21537330f729Sjoerg unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
21547330f729Sjoerg VecEltTy);
21557330f729Sjoerg // If the constant is the size of a vector element, we just need to bitcast
21567330f729Sjoerg // it to the right type so it gets properly inserted.
21577330f729Sjoerg if (NumElts == 1)
21587330f729Sjoerg return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
21597330f729Sjoerg Shift, Elements, VecEltTy, isBigEndian);
21607330f729Sjoerg
21617330f729Sjoerg // Okay, this is a constant that covers multiple elements. Slice it up into
21627330f729Sjoerg // pieces and insert each element-sized piece into the vector.
21637330f729Sjoerg if (!isa<IntegerType>(C->getType()))
21647330f729Sjoerg C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
21657330f729Sjoerg C->getType()->getPrimitiveSizeInBits()));
21667330f729Sjoerg unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
21677330f729Sjoerg Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
21687330f729Sjoerg
21697330f729Sjoerg for (unsigned i = 0; i != NumElts; ++i) {
21707330f729Sjoerg unsigned ShiftI = Shift+i*ElementSize;
21717330f729Sjoerg Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
21727330f729Sjoerg ShiftI));
21737330f729Sjoerg Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
21747330f729Sjoerg if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
21757330f729Sjoerg isBigEndian))
21767330f729Sjoerg return false;
21777330f729Sjoerg }
21787330f729Sjoerg return true;
21797330f729Sjoerg }
21807330f729Sjoerg
21817330f729Sjoerg if (!V->hasOneUse()) return false;
21827330f729Sjoerg
21837330f729Sjoerg Instruction *I = dyn_cast<Instruction>(V);
21847330f729Sjoerg if (!I) return false;
21857330f729Sjoerg switch (I->getOpcode()) {
21867330f729Sjoerg default: return false; // Unhandled case.
21877330f729Sjoerg case Instruction::BitCast:
21887330f729Sjoerg return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
21897330f729Sjoerg isBigEndian);
21907330f729Sjoerg case Instruction::ZExt:
21917330f729Sjoerg if (!isMultipleOfTypeSize(
21927330f729Sjoerg I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
21937330f729Sjoerg VecEltTy))
21947330f729Sjoerg return false;
21957330f729Sjoerg return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
21967330f729Sjoerg isBigEndian);
21977330f729Sjoerg case Instruction::Or:
21987330f729Sjoerg return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
21997330f729Sjoerg isBigEndian) &&
22007330f729Sjoerg collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
22017330f729Sjoerg isBigEndian);
22027330f729Sjoerg case Instruction::Shl: {
22037330f729Sjoerg // Must be shifting by a constant that is a multiple of the element size.
22047330f729Sjoerg ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
22057330f729Sjoerg if (!CI) return false;
22067330f729Sjoerg Shift += CI->getZExtValue();
22077330f729Sjoerg if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
22087330f729Sjoerg return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
22097330f729Sjoerg isBigEndian);
22107330f729Sjoerg }
22117330f729Sjoerg
22127330f729Sjoerg }
22137330f729Sjoerg }
22147330f729Sjoerg
22157330f729Sjoerg
22167330f729Sjoerg /// If the input is an 'or' instruction, we may be doing shifts and ors to
22177330f729Sjoerg /// assemble the elements of the vector manually.
22187330f729Sjoerg /// Try to rip the code out and replace it with insertelements. This is to
22197330f729Sjoerg /// optimize code like this:
22207330f729Sjoerg ///
22217330f729Sjoerg /// %tmp37 = bitcast float %inc to i32
22227330f729Sjoerg /// %tmp38 = zext i32 %tmp37 to i64
22237330f729Sjoerg /// %tmp31 = bitcast float %inc5 to i32
22247330f729Sjoerg /// %tmp32 = zext i32 %tmp31 to i64
22257330f729Sjoerg /// %tmp33 = shl i64 %tmp32, 32
22267330f729Sjoerg /// %ins35 = or i64 %tmp33, %tmp38
22277330f729Sjoerg /// %tmp43 = bitcast i64 %ins35 to <2 x float>
22287330f729Sjoerg ///
22297330f729Sjoerg /// Into two insertelements that do "buildvector{%inc, %inc5}".
optimizeIntegerToVectorInsertions(BitCastInst & CI,InstCombinerImpl & IC)22307330f729Sjoerg static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
2231*82d56013Sjoerg InstCombinerImpl &IC) {
2232*82d56013Sjoerg auto *DestVecTy = cast<FixedVectorType>(CI.getType());
22337330f729Sjoerg Value *IntInput = CI.getOperand(0);
22347330f729Sjoerg
22357330f729Sjoerg SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
22367330f729Sjoerg if (!collectInsertionElements(IntInput, 0, Elements,
22377330f729Sjoerg DestVecTy->getElementType(),
22387330f729Sjoerg IC.getDataLayout().isBigEndian()))
22397330f729Sjoerg return nullptr;
22407330f729Sjoerg
22417330f729Sjoerg // If we succeeded, we know that all of the element are specified by Elements
22427330f729Sjoerg // or are zero if Elements has a null entry. Recast this as a set of
22437330f729Sjoerg // insertions.
22447330f729Sjoerg Value *Result = Constant::getNullValue(CI.getType());
22457330f729Sjoerg for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
22467330f729Sjoerg if (!Elements[i]) continue; // Unset element.
22477330f729Sjoerg
22487330f729Sjoerg Result = IC.Builder.CreateInsertElement(Result, Elements[i],
22497330f729Sjoerg IC.Builder.getInt32(i));
22507330f729Sjoerg }
22517330f729Sjoerg
22527330f729Sjoerg return Result;
22537330f729Sjoerg }
22547330f729Sjoerg
22557330f729Sjoerg /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
22567330f729Sjoerg /// vector followed by extract element. The backend tends to handle bitcasts of
22577330f729Sjoerg /// vectors better than bitcasts of scalars because vector registers are
22587330f729Sjoerg /// usually not type-specific like scalar integer or scalar floating-point.
canonicalizeBitCastExtElt(BitCastInst & BitCast,InstCombinerImpl & IC)22597330f729Sjoerg static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
2260*82d56013Sjoerg InstCombinerImpl &IC) {
22617330f729Sjoerg // TODO: Create and use a pattern matcher for ExtractElementInst.
22627330f729Sjoerg auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
22637330f729Sjoerg if (!ExtElt || !ExtElt->hasOneUse())
22647330f729Sjoerg return nullptr;
22657330f729Sjoerg
22667330f729Sjoerg // The bitcast must be to a vectorizable type, otherwise we can't make a new
22677330f729Sjoerg // type to extract from.
22687330f729Sjoerg Type *DestType = BitCast.getType();
22697330f729Sjoerg if (!VectorType::isValidElementType(DestType))
22707330f729Sjoerg return nullptr;
22717330f729Sjoerg
2272*82d56013Sjoerg auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
22737330f729Sjoerg auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
22747330f729Sjoerg NewVecType, "bc");
22757330f729Sjoerg return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
22767330f729Sjoerg }
22777330f729Sjoerg
22787330f729Sjoerg /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
foldBitCastBitwiseLogic(BitCastInst & BitCast,InstCombiner::BuilderTy & Builder)22797330f729Sjoerg static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
22807330f729Sjoerg InstCombiner::BuilderTy &Builder) {
22817330f729Sjoerg Type *DestTy = BitCast.getType();
22827330f729Sjoerg BinaryOperator *BO;
22837330f729Sjoerg if (!DestTy->isIntOrIntVectorTy() ||
22847330f729Sjoerg !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
22857330f729Sjoerg !BO->isBitwiseLogicOp())
22867330f729Sjoerg return nullptr;
22877330f729Sjoerg
22887330f729Sjoerg // FIXME: This transform is restricted to vector types to avoid backend
22897330f729Sjoerg // problems caused by creating potentially illegal operations. If a fix-up is
22907330f729Sjoerg // added to handle that situation, we can remove this check.
22917330f729Sjoerg if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
22927330f729Sjoerg return nullptr;
22937330f729Sjoerg
22947330f729Sjoerg Value *X;
22957330f729Sjoerg if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
22967330f729Sjoerg X->getType() == DestTy && !isa<Constant>(X)) {
22977330f729Sjoerg // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
22987330f729Sjoerg Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
22997330f729Sjoerg return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
23007330f729Sjoerg }
23017330f729Sjoerg
23027330f729Sjoerg if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
23037330f729Sjoerg X->getType() == DestTy && !isa<Constant>(X)) {
23047330f729Sjoerg // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
23057330f729Sjoerg Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
23067330f729Sjoerg return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
23077330f729Sjoerg }
23087330f729Sjoerg
23097330f729Sjoerg // Canonicalize vector bitcasts to come before vector bitwise logic with a
23107330f729Sjoerg // constant. This eases recognition of special constants for later ops.
23117330f729Sjoerg // Example:
23127330f729Sjoerg // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
23137330f729Sjoerg Constant *C;
23147330f729Sjoerg if (match(BO->getOperand(1), m_Constant(C))) {
23157330f729Sjoerg // bitcast (logic X, C) --> logic (bitcast X, C')
23167330f729Sjoerg Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2317*82d56013Sjoerg Value *CastedC = Builder.CreateBitCast(C, DestTy);
23187330f729Sjoerg return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
23197330f729Sjoerg }
23207330f729Sjoerg
23217330f729Sjoerg return nullptr;
23227330f729Sjoerg }
23237330f729Sjoerg
23247330f729Sjoerg /// Change the type of a select if we can eliminate a bitcast.
foldBitCastSelect(BitCastInst & BitCast,InstCombiner::BuilderTy & Builder)23257330f729Sjoerg static Instruction *foldBitCastSelect(BitCastInst &BitCast,
23267330f729Sjoerg InstCombiner::BuilderTy &Builder) {
23277330f729Sjoerg Value *Cond, *TVal, *FVal;
23287330f729Sjoerg if (!match(BitCast.getOperand(0),
23297330f729Sjoerg m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
23307330f729Sjoerg return nullptr;
23317330f729Sjoerg
23327330f729Sjoerg // A vector select must maintain the same number of elements in its operands.
23337330f729Sjoerg Type *CondTy = Cond->getType();
23347330f729Sjoerg Type *DestTy = BitCast.getType();
2335*82d56013Sjoerg if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2336*82d56013Sjoerg if (!DestTy->isVectorTy() ||
2337*82d56013Sjoerg CondVTy->getElementCount() !=
2338*82d56013Sjoerg cast<VectorType>(DestTy)->getElementCount())
23397330f729Sjoerg return nullptr;
23407330f729Sjoerg
23417330f729Sjoerg // FIXME: This transform is restricted from changing the select between
23427330f729Sjoerg // scalars and vectors to avoid backend problems caused by creating
23437330f729Sjoerg // potentially illegal operations. If a fix-up is added to handle that
23447330f729Sjoerg // situation, we can remove this check.
23457330f729Sjoerg if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
23467330f729Sjoerg return nullptr;
23477330f729Sjoerg
23487330f729Sjoerg auto *Sel = cast<Instruction>(BitCast.getOperand(0));
23497330f729Sjoerg Value *X;
23507330f729Sjoerg if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
23517330f729Sjoerg !isa<Constant>(X)) {
23527330f729Sjoerg // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
23537330f729Sjoerg Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
23547330f729Sjoerg return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
23557330f729Sjoerg }
23567330f729Sjoerg
23577330f729Sjoerg if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
23587330f729Sjoerg !isa<Constant>(X)) {
23597330f729Sjoerg // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
23607330f729Sjoerg Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
23617330f729Sjoerg return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
23627330f729Sjoerg }
23637330f729Sjoerg
23647330f729Sjoerg return nullptr;
23657330f729Sjoerg }
23667330f729Sjoerg
23677330f729Sjoerg /// Check if all users of CI are StoreInsts.
hasStoreUsersOnly(CastInst & CI)23687330f729Sjoerg static bool hasStoreUsersOnly(CastInst &CI) {
23697330f729Sjoerg for (User *U : CI.users()) {
23707330f729Sjoerg if (!isa<StoreInst>(U))
23717330f729Sjoerg return false;
23727330f729Sjoerg }
23737330f729Sjoerg return true;
23747330f729Sjoerg }
23757330f729Sjoerg
23767330f729Sjoerg /// This function handles following case
23777330f729Sjoerg ///
23787330f729Sjoerg /// A -> B cast
23797330f729Sjoerg /// PHI
23807330f729Sjoerg /// B -> A cast
23817330f729Sjoerg ///
23827330f729Sjoerg /// All the related PHI nodes can be replaced by new PHI nodes with type A.
23837330f729Sjoerg /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
optimizeBitCastFromPhi(CastInst & CI,PHINode * PN)2384*82d56013Sjoerg Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2385*82d56013Sjoerg PHINode *PN) {
23867330f729Sjoerg // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
23877330f729Sjoerg if (hasStoreUsersOnly(CI))
23887330f729Sjoerg return nullptr;
23897330f729Sjoerg
23907330f729Sjoerg Value *Src = CI.getOperand(0);
23917330f729Sjoerg Type *SrcTy = Src->getType(); // Type B
23927330f729Sjoerg Type *DestTy = CI.getType(); // Type A
23937330f729Sjoerg
23947330f729Sjoerg SmallVector<PHINode *, 4> PhiWorklist;
23957330f729Sjoerg SmallSetVector<PHINode *, 4> OldPhiNodes;
23967330f729Sjoerg
23977330f729Sjoerg // Find all of the A->B casts and PHI nodes.
23987330f729Sjoerg // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
23997330f729Sjoerg // OldPhiNodes is used to track all known PHI nodes, before adding a new
24007330f729Sjoerg // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
24017330f729Sjoerg PhiWorklist.push_back(PN);
24027330f729Sjoerg OldPhiNodes.insert(PN);
24037330f729Sjoerg while (!PhiWorklist.empty()) {
24047330f729Sjoerg auto *OldPN = PhiWorklist.pop_back_val();
24057330f729Sjoerg for (Value *IncValue : OldPN->incoming_values()) {
24067330f729Sjoerg if (isa<Constant>(IncValue))
24077330f729Sjoerg continue;
24087330f729Sjoerg
24097330f729Sjoerg if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
24107330f729Sjoerg // If there is a sequence of one or more load instructions, each loaded
24117330f729Sjoerg // value is used as address of later load instruction, bitcast is
24127330f729Sjoerg // necessary to change the value type, don't optimize it. For
24137330f729Sjoerg // simplicity we give up if the load address comes from another load.
24147330f729Sjoerg Value *Addr = LI->getOperand(0);
24157330f729Sjoerg if (Addr == &CI || isa<LoadInst>(Addr))
24167330f729Sjoerg return nullptr;
2417*82d56013Sjoerg // Don't tranform "load <256 x i32>, <256 x i32>*" to
2418*82d56013Sjoerg // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2419*82d56013Sjoerg // TODO: Remove this check when bitcast between vector and x86_amx
2420*82d56013Sjoerg // is replaced with a specific intrinsic.
2421*82d56013Sjoerg if (DestTy->isX86_AMXTy())
2422*82d56013Sjoerg return nullptr;
24237330f729Sjoerg if (LI->hasOneUse() && LI->isSimple())
24247330f729Sjoerg continue;
24257330f729Sjoerg // If a LoadInst has more than one use, changing the type of loaded
24267330f729Sjoerg // value may create another bitcast.
24277330f729Sjoerg return nullptr;
24287330f729Sjoerg }
24297330f729Sjoerg
24307330f729Sjoerg if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
24317330f729Sjoerg if (OldPhiNodes.insert(PNode))
24327330f729Sjoerg PhiWorklist.push_back(PNode);
24337330f729Sjoerg continue;
24347330f729Sjoerg }
24357330f729Sjoerg
24367330f729Sjoerg auto *BCI = dyn_cast<BitCastInst>(IncValue);
24377330f729Sjoerg // We can't handle other instructions.
24387330f729Sjoerg if (!BCI)
24397330f729Sjoerg return nullptr;
24407330f729Sjoerg
24417330f729Sjoerg // Verify it's a A->B cast.
24427330f729Sjoerg Type *TyA = BCI->getOperand(0)->getType();
24437330f729Sjoerg Type *TyB = BCI->getType();
24447330f729Sjoerg if (TyA != DestTy || TyB != SrcTy)
24457330f729Sjoerg return nullptr;
24467330f729Sjoerg }
24477330f729Sjoerg }
24487330f729Sjoerg
2449*82d56013Sjoerg // Check that each user of each old PHI node is something that we can
2450*82d56013Sjoerg // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2451*82d56013Sjoerg for (auto *OldPN : OldPhiNodes) {
2452*82d56013Sjoerg for (User *V : OldPN->users()) {
2453*82d56013Sjoerg if (auto *SI = dyn_cast<StoreInst>(V)) {
2454*82d56013Sjoerg if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2455*82d56013Sjoerg return nullptr;
2456*82d56013Sjoerg } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2457*82d56013Sjoerg // Verify it's a B->A cast.
2458*82d56013Sjoerg Type *TyB = BCI->getOperand(0)->getType();
2459*82d56013Sjoerg Type *TyA = BCI->getType();
2460*82d56013Sjoerg if (TyA != DestTy || TyB != SrcTy)
2461*82d56013Sjoerg return nullptr;
2462*82d56013Sjoerg } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2463*82d56013Sjoerg // As long as the user is another old PHI node, then even if we don't
2464*82d56013Sjoerg // rewrite it, the PHI web we're considering won't have any users
2465*82d56013Sjoerg // outside itself, so it'll be dead.
2466*82d56013Sjoerg if (OldPhiNodes.count(PHI) == 0)
2467*82d56013Sjoerg return nullptr;
2468*82d56013Sjoerg } else {
2469*82d56013Sjoerg return nullptr;
2470*82d56013Sjoerg }
2471*82d56013Sjoerg }
2472*82d56013Sjoerg }
2473*82d56013Sjoerg
24747330f729Sjoerg // For each old PHI node, create a corresponding new PHI node with a type A.
24757330f729Sjoerg SmallDenseMap<PHINode *, PHINode *> NewPNodes;
24767330f729Sjoerg for (auto *OldPN : OldPhiNodes) {
24777330f729Sjoerg Builder.SetInsertPoint(OldPN);
24787330f729Sjoerg PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
24797330f729Sjoerg NewPNodes[OldPN] = NewPN;
24807330f729Sjoerg }
24817330f729Sjoerg
24827330f729Sjoerg // Fill in the operands of new PHI nodes.
24837330f729Sjoerg for (auto *OldPN : OldPhiNodes) {
24847330f729Sjoerg PHINode *NewPN = NewPNodes[OldPN];
24857330f729Sjoerg for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
24867330f729Sjoerg Value *V = OldPN->getOperand(j);
24877330f729Sjoerg Value *NewV = nullptr;
24887330f729Sjoerg if (auto *C = dyn_cast<Constant>(V)) {
24897330f729Sjoerg NewV = ConstantExpr::getBitCast(C, DestTy);
24907330f729Sjoerg } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2491*82d56013Sjoerg // Explicitly perform load combine to make sure no opposing transform
2492*82d56013Sjoerg // can remove the bitcast in the meantime and trigger an infinite loop.
2493*82d56013Sjoerg Builder.SetInsertPoint(LI);
2494*82d56013Sjoerg NewV = combineLoadToNewType(*LI, DestTy);
2495*82d56013Sjoerg // Remove the old load and its use in the old phi, which itself becomes
2496*82d56013Sjoerg // dead once the whole transform finishes.
2497*82d56013Sjoerg replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
2498*82d56013Sjoerg eraseInstFromFunction(*LI);
24997330f729Sjoerg } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
25007330f729Sjoerg NewV = BCI->getOperand(0);
25017330f729Sjoerg } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
25027330f729Sjoerg NewV = NewPNodes[PrevPN];
25037330f729Sjoerg }
25047330f729Sjoerg assert(NewV);
25057330f729Sjoerg NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
25067330f729Sjoerg }
25077330f729Sjoerg }
25087330f729Sjoerg
25097330f729Sjoerg // Traverse all accumulated PHI nodes and process its users,
25107330f729Sjoerg // which are Stores and BitcCasts. Without this processing
25117330f729Sjoerg // NewPHI nodes could be replicated and could lead to extra
25127330f729Sjoerg // moves generated after DeSSA.
25137330f729Sjoerg // If there is a store with type B, change it to type A.
25147330f729Sjoerg
25157330f729Sjoerg
25167330f729Sjoerg // Replace users of BitCast B->A with NewPHI. These will help
25177330f729Sjoerg // later to get rid off a closure formed by OldPHI nodes.
25187330f729Sjoerg Instruction *RetVal = nullptr;
25197330f729Sjoerg for (auto *OldPN : OldPhiNodes) {
25207330f729Sjoerg PHINode *NewPN = NewPNodes[OldPN];
2521*82d56013Sjoerg for (User *V : make_early_inc_range(OldPN->users())) {
25227330f729Sjoerg if (auto *SI = dyn_cast<StoreInst>(V)) {
2523*82d56013Sjoerg assert(SI->isSimple() && SI->getOperand(0) == OldPN);
25247330f729Sjoerg Builder.SetInsertPoint(SI);
25257330f729Sjoerg auto *NewBC =
25267330f729Sjoerg cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
25277330f729Sjoerg SI->setOperand(0, NewBC);
2528*82d56013Sjoerg Worklist.push(SI);
25297330f729Sjoerg assert(hasStoreUsersOnly(*NewBC));
25307330f729Sjoerg }
25317330f729Sjoerg else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
25327330f729Sjoerg Type *TyB = BCI->getOperand(0)->getType();
25337330f729Sjoerg Type *TyA = BCI->getType();
2534*82d56013Sjoerg assert(TyA == DestTy && TyB == SrcTy);
2535*82d56013Sjoerg (void) TyA;
2536*82d56013Sjoerg (void) TyB;
25377330f729Sjoerg Instruction *I = replaceInstUsesWith(*BCI, NewPN);
25387330f729Sjoerg if (BCI == &CI)
25397330f729Sjoerg RetVal = I;
2540*82d56013Sjoerg } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2541*82d56013Sjoerg assert(OldPhiNodes.contains(PHI));
2542*82d56013Sjoerg (void) PHI;
2543*82d56013Sjoerg } else {
2544*82d56013Sjoerg llvm_unreachable("all uses should be handled");
25457330f729Sjoerg }
25467330f729Sjoerg }
25477330f729Sjoerg }
25487330f729Sjoerg
25497330f729Sjoerg return RetVal;
25507330f729Sjoerg }
25517330f729Sjoerg
visitBitCast(BitCastInst & CI)2552*82d56013Sjoerg Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
25537330f729Sjoerg // If the operands are integer typed then apply the integer transforms,
25547330f729Sjoerg // otherwise just apply the common ones.
25557330f729Sjoerg Value *Src = CI.getOperand(0);
25567330f729Sjoerg Type *SrcTy = Src->getType();
25577330f729Sjoerg Type *DestTy = CI.getType();
25587330f729Sjoerg
25597330f729Sjoerg // Get rid of casts from one type to the same type. These are useless and can
25607330f729Sjoerg // be replaced by the operand.
25617330f729Sjoerg if (DestTy == Src->getType())
25627330f729Sjoerg return replaceInstUsesWith(CI, Src);
25637330f729Sjoerg
2564*82d56013Sjoerg if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
25657330f729Sjoerg PointerType *SrcPTy = cast<PointerType>(SrcTy);
2566*82d56013Sjoerg PointerType *DstPTy = cast<PointerType>(DestTy);
25677330f729Sjoerg Type *DstElTy = DstPTy->getElementType();
25687330f729Sjoerg Type *SrcElTy = SrcPTy->getElementType();
25697330f729Sjoerg
25707330f729Sjoerg // Casting pointers between the same type, but with different address spaces
25717330f729Sjoerg // is an addrspace cast rather than a bitcast.
25727330f729Sjoerg if ((DstElTy == SrcElTy) &&
25737330f729Sjoerg (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace()))
25747330f729Sjoerg return new AddrSpaceCastInst(Src, DestTy);
25757330f729Sjoerg
25767330f729Sjoerg // If we are casting a alloca to a pointer to a type of the same
25777330f729Sjoerg // size, rewrite the allocation instruction to allocate the "right" type.
25787330f729Sjoerg // There is no need to modify malloc calls because it is their bitcast that
25797330f729Sjoerg // needs to be cleaned up.
25807330f729Sjoerg if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
25817330f729Sjoerg if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
25827330f729Sjoerg return V;
25837330f729Sjoerg
25847330f729Sjoerg // When the type pointed to is not sized the cast cannot be
25857330f729Sjoerg // turned into a gep.
25867330f729Sjoerg Type *PointeeType =
25877330f729Sjoerg cast<PointerType>(Src->getType()->getScalarType())->getElementType();
25887330f729Sjoerg if (!PointeeType->isSized())
25897330f729Sjoerg return nullptr;
25907330f729Sjoerg
25917330f729Sjoerg // If the source and destination are pointers, and this cast is equivalent
25927330f729Sjoerg // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
25937330f729Sjoerg // This can enhance SROA and other transforms that want type-safe pointers.
25947330f729Sjoerg unsigned NumZeros = 0;
2595*82d56013Sjoerg while (SrcElTy && SrcElTy != DstElTy) {
2596*82d56013Sjoerg SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
25977330f729Sjoerg ++NumZeros;
25987330f729Sjoerg }
25997330f729Sjoerg
26007330f729Sjoerg // If we found a path from the src to dest, create the getelementptr now.
26017330f729Sjoerg if (SrcElTy == DstElTy) {
26027330f729Sjoerg SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
26037330f729Sjoerg GetElementPtrInst *GEP =
26047330f729Sjoerg GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
26057330f729Sjoerg
26067330f729Sjoerg // If the source pointer is dereferenceable, then assume it points to an
26077330f729Sjoerg // allocated object and apply "inbounds" to the GEP.
2608*82d56013Sjoerg bool CanBeNull, CanBeFreed;
2609*82d56013Sjoerg if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
26107330f729Sjoerg // In a non-default address space (not 0), a null pointer can not be
26117330f729Sjoerg // assumed inbounds, so ignore that case (dereferenceable_or_null).
26127330f729Sjoerg // The reason is that 'null' is not treated differently in these address
26137330f729Sjoerg // spaces, and we consequently ignore the 'gep inbounds' special case
26147330f729Sjoerg // for 'null' which allows 'inbounds' on 'null' if the indices are
26157330f729Sjoerg // zeros.
26167330f729Sjoerg if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
26177330f729Sjoerg GEP->setIsInBounds();
26187330f729Sjoerg }
26197330f729Sjoerg return GEP;
26207330f729Sjoerg }
26217330f729Sjoerg }
26227330f729Sjoerg
2623*82d56013Sjoerg if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2624*82d56013Sjoerg // Beware: messing with this target-specific oddity may cause trouble.
2625*82d56013Sjoerg if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
26267330f729Sjoerg Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
26277330f729Sjoerg return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
26287330f729Sjoerg Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
26297330f729Sjoerg }
26307330f729Sjoerg
26317330f729Sjoerg if (isa<IntegerType>(SrcTy)) {
26327330f729Sjoerg // If this is a cast from an integer to vector, check to see if the input
26337330f729Sjoerg // is a trunc or zext of a bitcast from vector. If so, we can replace all
26347330f729Sjoerg // the casts with a shuffle and (potentially) a bitcast.
26357330f729Sjoerg if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
26367330f729Sjoerg CastInst *SrcCast = cast<CastInst>(Src);
26377330f729Sjoerg if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
26387330f729Sjoerg if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2639*82d56013Sjoerg if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
2640*82d56013Sjoerg BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
26417330f729Sjoerg return I;
26427330f729Sjoerg }
26437330f729Sjoerg
26447330f729Sjoerg // If the input is an 'or' instruction, we may be doing shifts and ors to
26457330f729Sjoerg // assemble the elements of the vector manually. Try to rip the code out
26467330f729Sjoerg // and replace it with insertelements.
26477330f729Sjoerg if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
26487330f729Sjoerg return replaceInstUsesWith(CI, V);
26497330f729Sjoerg }
26507330f729Sjoerg }
26517330f729Sjoerg
2652*82d56013Sjoerg if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
26537330f729Sjoerg if (SrcVTy->getNumElements() == 1) {
26547330f729Sjoerg // If our destination is not a vector, then make this a straight
26557330f729Sjoerg // scalar-scalar cast.
26567330f729Sjoerg if (!DestTy->isVectorTy()) {
26577330f729Sjoerg Value *Elem =
26587330f729Sjoerg Builder.CreateExtractElement(Src,
26597330f729Sjoerg Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
26607330f729Sjoerg return CastInst::Create(Instruction::BitCast, Elem, DestTy);
26617330f729Sjoerg }
26627330f729Sjoerg
26637330f729Sjoerg // Otherwise, see if our source is an insert. If so, then use the scalar
26647330f729Sjoerg // component directly:
26657330f729Sjoerg // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
26667330f729Sjoerg if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
26677330f729Sjoerg return new BitCastInst(InsElt->getOperand(1), DestTy);
26687330f729Sjoerg }
26697330f729Sjoerg }
26707330f729Sjoerg
26717330f729Sjoerg if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
26727330f729Sjoerg // Okay, we have (bitcast (shuffle ..)). Check to see if this is
26737330f729Sjoerg // a bitcast to a vector with the same # elts.
26747330f729Sjoerg Value *ShufOp0 = Shuf->getOperand(0);
26757330f729Sjoerg Value *ShufOp1 = Shuf->getOperand(1);
2676*82d56013Sjoerg auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2677*82d56013Sjoerg auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
26787330f729Sjoerg if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2679*82d56013Sjoerg cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2680*82d56013Sjoerg ShufElts == SrcVecElts) {
26817330f729Sjoerg BitCastInst *Tmp;
26827330f729Sjoerg // If either of the operands is a cast from CI.getType(), then
26837330f729Sjoerg // evaluating the shuffle in the casted destination's type will allow
26847330f729Sjoerg // us to eliminate at least one cast.
26857330f729Sjoerg if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
26867330f729Sjoerg Tmp->getOperand(0)->getType() == DestTy) ||
26877330f729Sjoerg ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
26887330f729Sjoerg Tmp->getOperand(0)->getType() == DestTy)) {
26897330f729Sjoerg Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
26907330f729Sjoerg Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
26917330f729Sjoerg // Return a new shuffle vector. Use the same element ID's, as we
26927330f729Sjoerg // know the vector types match #elts.
2693*82d56013Sjoerg return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
26947330f729Sjoerg }
26957330f729Sjoerg }
26967330f729Sjoerg
26977330f729Sjoerg // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
26987330f729Sjoerg // a byte-swap:
26997330f729Sjoerg // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
27007330f729Sjoerg // TODO: We should match the related pattern for bitreverse.
27017330f729Sjoerg if (DestTy->isIntegerTy() &&
27027330f729Sjoerg DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2703*82d56013Sjoerg SrcTy->getScalarSizeInBits() == 8 &&
2704*82d56013Sjoerg ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
2705*82d56013Sjoerg Shuf->isReverse()) {
27067330f729Sjoerg assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2707*82d56013Sjoerg assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
27087330f729Sjoerg Function *Bswap =
27097330f729Sjoerg Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
27107330f729Sjoerg Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2711*82d56013Sjoerg return CallInst::Create(Bswap, { ScalarX });
27127330f729Sjoerg }
27137330f729Sjoerg }
27147330f729Sjoerg
27157330f729Sjoerg // Handle the A->B->A cast, and there is an intervening PHI node.
27167330f729Sjoerg if (PHINode *PN = dyn_cast<PHINode>(Src))
27177330f729Sjoerg if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
27187330f729Sjoerg return I;
27197330f729Sjoerg
27207330f729Sjoerg if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
27217330f729Sjoerg return I;
27227330f729Sjoerg
27237330f729Sjoerg if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))
27247330f729Sjoerg return I;
27257330f729Sjoerg
27267330f729Sjoerg if (Instruction *I = foldBitCastSelect(CI, Builder))
27277330f729Sjoerg return I;
27287330f729Sjoerg
27297330f729Sjoerg if (SrcTy->isPointerTy())
27307330f729Sjoerg return commonPointerCastTransforms(CI);
27317330f729Sjoerg return commonCastTransforms(CI);
27327330f729Sjoerg }
27337330f729Sjoerg
visitAddrSpaceCast(AddrSpaceCastInst & CI)2734*82d56013Sjoerg Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
27357330f729Sjoerg // If the destination pointer element type is not the same as the source's
27367330f729Sjoerg // first do a bitcast to the destination type, and then the addrspacecast.
27377330f729Sjoerg // This allows the cast to be exposed to other transforms.
27387330f729Sjoerg Value *Src = CI.getOperand(0);
27397330f729Sjoerg PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
27407330f729Sjoerg PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
27417330f729Sjoerg
27427330f729Sjoerg Type *DestElemTy = DestTy->getElementType();
27437330f729Sjoerg if (SrcTy->getElementType() != DestElemTy) {
27447330f729Sjoerg Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
27457330f729Sjoerg // Handle vectors of pointers.
2746*82d56013Sjoerg if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2747*82d56013Sjoerg MidTy = VectorType::get(MidTy, VT->getElementCount());
27487330f729Sjoerg
27497330f729Sjoerg Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
27507330f729Sjoerg return new AddrSpaceCastInst(NewBitCast, CI.getType());
27517330f729Sjoerg }
27527330f729Sjoerg
27537330f729Sjoerg return commonPointerCastTransforms(CI);
27547330f729Sjoerg }
2755