Lines Matching +full:row +full:- +full:stride

1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
23 #include "llvm/Config/llvm-config.h"
39 #define DEBUG_TYPE "x86-isel"
40 #define PASS_NAME "X86 DAG->DAG Instruction Selection"
44 static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
49 "x86-promote-anyext-load", cl::init(true),
54 //===----------------------------------------------------------------------===//
56 //===----------------------------------------------------------------------===//
80 int JT = -1;
89 MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
97 /// Return true if this addressing mode is already RIP-relative.
102 return RegNode->getReg() == X86::RIP;
116 Base_Reg.getNode()->dump(DAG);
126 IndexReg.getNode()->dump(DAG);
132 GV->dump();
137 CP->dump();
158 //===--------------------------------------------------------------------===//
159 /// ISel - X86-specific code to select X86 machine instructions for
184 "indirect-tls-seg-refs");
270 Base = CurDAG->getTargetFrameIndex(
271 AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
275 Base = CurDAG->getRegister(0, VT);
279 #define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
284 SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
292 Index = CurDAG->getRegister(0, VT);
294 // These are 32-bit even in 64-bit mode since RIP-relative offset
295 // is 32-bit.
297 Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
301 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
304 assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
305 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
307 assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
309 Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
310 } else if (AM.JT != -1) {
311 assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
312 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
314 Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
317 Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
322 Segment = CurDAG->getRegister(0, MVT::i16);
337 if (!CurDAG->shouldOptForSize())
341 for (const SDNode *User : N->uses()) {
347 if (User->isMachineOpcode()) {
353 if (User->getOpcode() == ISD::STORE &&
354 User->getOperand(1).getNode() == N) {
365 if (User->getNumOperands() != 2)
368 // If this is a sign-extended 8-bit integer immediate used in an ALU
371 if (C && isInt<8>(C->getSExtValue()))
378 if (User->getOpcode() == X86ISD::ADD ||
379 User->getOpcode() == ISD::ADD ||
380 User->getOpcode() == X86ISD::SUB ||
381 User->getOpcode() == ISD::SUB) {
384 SDValue OtherOp = User->getOperand(0);
386 OtherOp = User->getOperand(1);
390 if (OtherOp->getOpcode() == ISD::CopyFromReg &&
392 OtherOp->getOperand(1).getNode())))
393 if ((RegNode->getReg() == X86::ESP) ||
394 (RegNode->getReg() == X86::RSP))
408 return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
413 return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
418 return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
424 uint64_t Index = N->getConstantOperandVal(1);
425 MVT VecVT = N->getOperand(0).getSimpleValueType();
432 uint64_t Index = N->getConstantOperandVal(2);
433 MVT VecVT = N->getSimpleValueType(0);
440 uint64_t Index = N->getConstantOperandVal(2);
441 MVT VecVT = N->getSimpleValueType(0);
444 // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
445 // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
451 MVT VT = N->getSimpleValueType(0);
454 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
456 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
459 CurDAG->getMachineNode(
461 CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
462 CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
467 unsigned Opcode = N->getOpcode();
472 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
473 N->getOperand(FlagOpIndex), SDValue());
475 // Create a 64-bit instruction if the result is 64-bits otherwise use the
476 // 32-bit version.
479 VTs = CurDAG->getVTList(SBBVT, MVT::i32);
481 CurDAG->getMachineNode(Opc, dl, VTs,
489 assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
490 const APInt &Val = N->getConstantOperandAPInt(1);
495 APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
504 /// Return a reference to the TargetMachine, casted to the target-specific
510 /// Return a reference to the TargetInstrInfo, casted to the target-specific
513 return Subtarget->getInstrInfo();
519 /// Address-mode matching performs shift-of-and to and-of-shift
528 // Indicates we should prefer to use a non-temporal load for this load.
530 if (!N->isNonTemporal())
533 unsigned StoreSize = N->getMemoryVT().getStoreSize();
535 if (N->getAlign().value() < StoreSize)
544 return Subtarget->hasSSE41();
546 return Subtarget->hasAVX2();
548 return Subtarget->hasAVX512();
596 unsigned Opcode = N->getOpcode();
600 // We can get 256-bit 8 element types here without VLX being enabled. When
601 // this happens we will use 512-bit operations and the mask will not be
603 EVT OpVT = N->getOperand(0).getValueType();
607 OpVT = N->getOperand(1).getValueType();
609 return Subtarget->hasVLX();
627 if (N->getOpcode() == ISD::AND)
628 return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
629 isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
645 // Don't fold non-temporal loads if we have an instruction for them.
651 switch (U->getOpcode()) {
665 SDValue Op1 = U->getOperand(1);
667 // If the other operand is a 8-bit immediate we should fold the immediate
678 if (Imm->getAPIntValue().isSignedIntN(8))
681 // If this is a 64-bit AND with an immediate that fits in 32-bits,
686 if (U->getOpcode() == ISD::AND &&
687 Imm->getAPIntValue().getBitWidth() == 64 &&
688 Imm->getAPIntValue().isIntN(32))
693 // TODO: We could shrink the load and fold if it is non-volatile.
694 if (U->getOpcode() == ISD::AND &&
695 (Imm->getAPIntValue() == UINT8_MAX ||
696 Imm->getAPIntValue() == UINT16_MAX ||
697 Imm->getAPIntValue() == UINT32_MAX))
702 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
703 (-Imm->getAPIntValue()).isSignedIntN(8))
706 if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
707 (-Imm->getAPIntValue()).isSignedIntN(8) &&
721 // FIXME: This is probably also true for non-TLS addresses.
730 // BTR: (and X, (rotl -2, n))
732 if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
733 if (U->getOperand(0).getOpcode() == ISD::SHL &&
734 isOneConstant(U->getOperand(0).getOperand(0)))
737 if (U->getOperand(1).getOpcode() == ISD::SHL &&
738 isOneConstant(U->getOperand(1).getOperand(0)))
741 if (U->getOpcode() == ISD::AND) {
742 SDValue U0 = U->getOperand(0);
743 SDValue U1 = U->getOperand(1);
746 if (C && C->getSExtValue() == -2)
752 if (C && C->getSExtValue() == -2)
766 if (isa<ConstantSDNode>(U->getOperand(1)))
775 if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
776 isNullConstant(Root->getOperand(2)) &&
777 (Root->getOperand(0).isUndef() ||
778 ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode())))
785 // false will favor a masked register-register masked move or vblendm and the
789 (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
795 return N->getOperand(1).hasOneUse();
815 CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
819 Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
820 CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
821 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
826 Ops.append(Call->op_begin() + 1, Call->op_end());
827 CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
844 !LD->isSimple() ||
845 LD->getAddressingMode() != ISD::UNINDEXED ||
846 LD->getExtensionType() != ISD::NON_EXTLOAD)
861 cast<MemSDNode>(Chain.getNode())->writeMem())
899 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
900 E = CurDAG->allnodes_end(); I != E; ) {
918 if (N->getOpcode() == ISD::Constant) {
919 MVT VT = N->getSimpleValueType(0);
920 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
921 int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
923 // Check that the cf-protection-branch is enabled.
925 MF->getFunction().getParent()->getModuleFlag(
926 "cf-protection-branch");
929 SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
930 Complement = CurDAG->getNOT(dl, Complement, VT);
931 --I;
932 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
942 if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
943 SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
944 N->getOperand(0), N->getOperand(1));
945 --I;
946 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
952 // Convert vector increment or decrement to sub/add with an all-ones
954 // add X, <1, 1...> --> sub X, <-1, -1...>
955 // sub X, <1, 1...> --> add X, <-1, -1...>
956 // The all-ones vector constant can be materialized using a pcmpeq
971 return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
972 N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
973 !N->getOperand(1).hasOneUse();
975 if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
976 N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
978 if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
982 MVT VT = N->getSimpleValueType(0);
985 CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
986 AllOnes = CurDAG->getBitcast(VT, AllOnes);
988 unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
990 CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
991 --I;
992 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
999 switch (N->getOpcode()) {
1001 MVT VT = N->getSimpleValueType(0);
1003 if (!Subtarget->hasBWI() && needBWI(VT)) {
1007 CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
1009 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1010 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1012 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1013 CurDAG->getIntPtrConstant(Index, dl));
1015 --I;
1016 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1025 MVT VT = N->getSimpleValueType(0);
1027 if (!Subtarget->hasBWI() && needBWI(VT)) {
1031 SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
1032 SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
1033 SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
1034 X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
1035 MemNode->getMemOperand());
1037 CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
1038 NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
1040 Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
1041 CurDAG->getIntPtrConstant(Index, dl));
1043 --I;
1045 CurDAG->ReplaceAllUsesWith(N, To);
1057 MVT VT = N->getSimpleValueType(0);
1058 if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
1064 SDValue Ptr = Ld->getBasePtr();
1065 SDValue Chain = Ld->getChain();
1066 for (SDNode *User : Ptr->uses()) {
1068 MVT UserVT = User->getSimpleValueType(0);
1070 UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
1071 !User->hasAnyUseOfValue(1) &&
1083 SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
1085 CurDAG->getIntPtrConstant(0, dl));
1086 SDValue Res = CurDAG->getBitcast(VT, Extract);
1088 --I;
1090 CurDAG->ReplaceAllUsesWith(N, To);
1098 // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
1099 EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
1103 assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
1104 assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
1107 if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
1109 R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
1110 N->getOperand(0), N->getOperand(1), N->getOperand(2),
1111 CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
1113 R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
1114 N->getOperand(0), N->getOperand(1),
1115 N->getOperand(2));
1117 --I;
1118 CurDAG->ReplaceAllUsesWith(N, R.getNode());
1131 if (!N->getSimpleValueType(0).isVector())
1135 switch (N->getOpcode()) {
1145 if (N->isStrictFPOpcode())
1147 CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
1148 {N->getOperand(0), N->getOperand(1)});
1151 CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1152 N->getOperand(0));
1153 --I;
1154 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1164 if (!N->getValueType(0).isVector())
1168 switch (N->getOpcode()) {
1174 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1175 N->getOperand(0), N->getOperand(1));
1176 --I;
1177 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1186 if (!N->getValueType(0).isVector())
1190 if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
1191 assert(N->getOpcode() == ISD::ANY_EXTEND &&
1195 NewOpc = N->getOpcode() == ISD::ANY_EXTEND
1200 SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
1201 N->getOperand(0));
1202 --I;
1203 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1223 switch (N->getOpcode()) {
1239 bool IsStrict = N->isStrictFPOpcode();
1242 Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
1243 {N->getValueType(0), MVT::Other},
1244 {N->getOperand(0), N->getOperand(1),
1245 CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
1247 Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
1248 N->getOperand(0),
1249 CurDAG->getTargetConstant(Imm, dl, MVT::i32));
1250 --I;
1251 CurDAG->ReplaceAllUsesWith(N, Res.getNode());
1262 MVT VT = N->getSimpleValueType(0);
1271 SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1272 N->getOperand(0));
1273 SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
1274 N->getOperand(1));
1277 if (Subtarget->hasSSE2()) {
1279 Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
1280 Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
1282 switch (N->getOpcode()) {
1289 Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
1290 Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
1292 Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
1294 Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
1295 CurDAG->getIntPtrConstant(0, dl));
1296 --I;
1297 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
1307 !Subtarget->useIndirectThunkCalls() &&
1308 ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
1309 (N->getOpcode() == X86ISD::TC_RETURN &&
1310 (Subtarget->is64Bit() ||
1321 /// / \--
1331 bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
1332 SDValue Chain = N->getOperand(0);
1333 SDValue Load = N->getOperand(1);
1349 // FIXME: This should only happen when not compiled with -O0.
1350 switch (N->getOpcode()) {
1355 MVT SrcVT = N->getOperand(0).getSimpleValueType();
1356 MVT DstVT = N->getSimpleValueType(0);
1366 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1367 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1373 if (N->getOpcode() == ISD::FP_EXTEND)
1375 // If this is a value-preserving FPStack truncation, it is a noop.
1376 if (N->getConstantOperandVal(1))
1380 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1383 MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
1384 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1385 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1387 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1392 SDValue Store = CurDAG->getTruncStore(
1393 CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
1394 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
1401 --I;
1402 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1411 MVT SrcVT = N->getOperand(1).getSimpleValueType();
1412 MVT DstVT = N->getSimpleValueType(0);
1422 bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
1423 bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
1429 if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
1431 // If this is a value-preserving FPStack truncation, it is a noop.
1432 if (N->getConstantOperandVal(2))
1436 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
1439 MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
1440 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
1441 int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
1443 MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
1451 SDVTList VTs = CurDAG->getVTList(MVT::Other);
1452 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
1453 Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
1456 if (N->getFlags().hasNoFPExcept()) {
1457 SDNodeFlags Flags = Store->getFlags();
1459 Store->setFlags(Flags);
1463 Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
1468 SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
1470 Result = CurDAG->getMemIntrinsicNode(
1473 if (N->getFlags().hasNoFPExcept()) {
1474 SDNodeFlags Flags = Result->getFlags();
1476 Result->setFlags(Flags);
1480 Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
1487 --I;
1488 CurDAG->ReplaceAllUsesWith(N, Result.getNode());
1502 CurDAG->RemoveDeadNodes();
1505 // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
1507 unsigned Opc = N->getMachineOpcode();
1512 SDValue N0 = N->getOperand(0);
1530 MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
1542 // Skip peepholes at -O0.
1546 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
1549 while (Position != CurDAG->allnodes_begin()) {
1550 SDNode *N = &*--Position;
1551 // Skip dead nodes and any non-machine opcodes.
1552 if (N->use_empty() || !N->isMachineOpcode())
1560 unsigned Opc = N->getMachineOpcode();
1564 // ANDrr/rm + TESTrr+ -> TESTrr/TESTmr
1569 // ANDrr/rm + CTESTrr -> CTESTrr/CTESTmr
1574 auto &Op0 = N->getOperand(0);
1575 if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
1578 SDValue And = N->getOperand(0);
1589 if (And->hasAnyUseOfValue(1))
1591 SmallVector<SDValue> Ops(N->op_values());
1595 CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
1604 if (And->hasAnyUseOfValue(1))
1625 Ops.push_back(N->getOperand(2));
1626 Ops.push_back(N->getOperand(3));
1632 Ops.push_back(N->getOperand(4));
1634 MachineSDNode *Test = CurDAG->getMachineNode(
1636 CurDAG->setNodeMemRefs(
1637 Test, cast<MachineSDNode>(And.getNode())->memoperands());
1653 SDValue Op0 = N->getOperand(0);
1654 if (Op0 != N->getOperand(1) || !N->isOnlyUserOf(Op0.getNode()) ||
1681 if (NewOpc == X86::KTESTWrr && !Subtarget->hasDQI())
1684 MachineSDNode *KTest = CurDAG->getMachineNode(
1692 unsigned SubRegIdx = N->getConstantOperandVal(2);
1696 SDValue Move = N->getOperand(1);
1728 uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
1736 CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
1743 CurDAG->RemoveDeadNodes();
1749 if (Subtarget->isTargetCygMing()) {
1751 auto &DL = CurDAG->getDataLayout();
1754 CLI.setChain(CurDAG->getRoot())
1755 .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
1756 CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
1758 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
1760 CurDAG->setRoot(Result.second);
1766 const Function &F = MF->getFunction();
1772 // On 64-bit platforms, we can run into an issue where a frame index
1775 // displacement fits into a 31-bit integer (which is only slightly more
1777 // a 32-bit integer), a 31-bit disp should always be safe.
1794 if (Subtarget->is64Bit()) {
1804 // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
1805 // 64 bits. Instructions with 32-bit register addresses perform this zero
1807 // Instructions with only a 32-bit immediate address do not, though: they
1816 // addresses in LP64 mode, by adding the EIZ pseudo-register as an operand
1818 // pseudo-register is not part of any register class and therefore causes
1820 if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
1830 SDValue Address = N->getOperand(1);
1832 // load gs:0 -> GS segment register.
1833 // load fs:0 -> FS segment register.
1836 // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
1837 // with 32-bit registers, as we get in ILP32 mode, those registers are first
1838 // zero-extended to 64 bits and then added it to the base address, which gives
1843 (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
1844 Subtarget->isTargetFuchsia())) {
1845 if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
1847 switch (N->getPointerInfo().getAddrSpace()) {
1849 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
1852 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
1879 // We can't use an addressing mode in the 64-bit large code model.
1885 if (Subtarget->is64Bit() && M == CodeModel::Large && !IsRIPRelTLS)
1898 AM.GV = G->getGlobal();
1899 AM.SymbolFlags = G->getTargetFlags();
1900 Offset = G->getOffset();
1902 AM.CP = CP->getConstVal();
1903 AM.Alignment = CP->getAlign();
1904 AM.SymbolFlags = CP->getTargetFlags();
1905 Offset = CP->getOffset();
1907 AM.ES = S->getSymbol();
1908 AM.SymbolFlags = S->getTargetFlags();
1910 AM.MCSym = S->getMCSymbol();
1912 AM.JT = J->getIndex();
1913 AM.SymbolFlags = J->getTargetFlags();
1915 AM.BlockAddr = BA->getBlockAddress();
1916 AM.SymbolFlags = BA->getTargetFlags();
1917 Offset = BA->getOffset();
1922 if (Subtarget->is64Bit() && !IsRIPRel && AM.GV &&
1934 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
1946 // Post-processing: Make a second attempt to fold a load, if we now know
1948 // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
1950 if (Subtarget->isTarget64BitILP32() &&
1961 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
1962 // a smaller encoding and avoids a scaled-index.
1970 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
1973 (!AM.GV || !TM.isLargeGlobalValue(AM.GV)) && Subtarget->is64Bit() &&
1977 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
2024 if (N->getNodeId() == -1 ||
2027 DAG.RepositionNode(Pos->getIterator(), N.getNode());
2030 // Conservatively mark it with the same -abs(Id) to assure node id
2032 N->setNodeId(Pos->getNodeId());
2037 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
2038 // safe. This allows us to convert the shift and and into an h-register
2050 int ScaleLog = 8 - Shift.getConstantOperandVal(1);
2067 // a valid topological ordering as nothing is going to go back and re-sort
2069 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2095 int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
2138 // a valid topological ordering as nothing is going to go back and re-sort
2140 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2192 unsigned MaskLZ = 64 - (MaskIdx + MaskLen);
2206 unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
2209 MaskLZ -= ScaleDown;
2219 unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
2221 // Assume that we'll replace the any-extend with a zero-extend, and
2224 MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
2252 // a valid topological ordering as nothing is going to go back and re-sort
2254 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2314 // a valid topological ordering as nothing is going to go back and re-sort
2316 // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
2349 // index: add(x,c) -> index: x, disp + c
2350 if (CurDAG->isBaseWithConstantOffset(N)) {
2352 uint64_t Offset = (uint64_t)AddVal->getSExtValue() * AM.Scale;
2357 // index: add(x,x) -> index: x, scale * 2
2365 // index: shl(x,i) -> index: x, scale * (1 << i)
2375 // index: sext(add_nsw(x,c)) -> index: sext(x), disp + sext(c)
2379 if (Src.getOpcode() == ISD::ADD && Src->getFlags().hasNoSignedWrap() &&
2381 if (CurDAG->isBaseWithConstantOffset(Src)) {
2384 uint64_t Offset = (uint64_t)AddVal->getSExtValue();
2387 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2388 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2389 SDValue ExtAdd = CurDAG->getNode(ISD::ADD, DL, VT, ExtSrc, ExtVal);
2393 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2394 CurDAG->RemoveDeadNode(N.getNode());
2401 // index: zext(add_nuw(x,c)) -> index: zext(x), disp + zext(c)
2402 // index: zext(addlike(x,c)) -> index: zext(x), disp + zext(c)
2407 if (((SrcOpc == ISD::ADD && Src->getFlags().hasNoUnsignedWrap()) ||
2408 CurDAG->isADDLike(Src, /*NoWrap=*/true)) &&
2410 if (CurDAG->isBaseWithConstantOffset(Src)) {
2425 (AddSrc->getFlags().hasNoUnsignedWrap() ||
2426 CurDAG->MaskedValueIsZero(ShVal, HiBits))) {
2428 SDValue ExtShVal = CurDAG->getNode(Opc, DL, VT, ShVal);
2429 SDValue ExtShift = CurDAG->getNode(ISD::SHL, DL, VT, ExtShVal,
2437 SDValue ExtSrc = CurDAG->getNode(Opc, DL, VT, AddSrc);
2438 SDValue ExtVal = CurDAG->getConstant(Offset, DL, VT);
2439 SDValue ExtAdd = CurDAG->getNode(SrcOpc, DL, VT, ExtSrc, ExtVal);
2443 CurDAG->ReplaceAllUsesWith(N, ExtAdd);
2444 CurDAG->RemoveDeadNode(N.getNode());
2468 // RIP relative addressing: %rip + 32-bit displacement!
2473 if (!(AM.ES || AM.MCSym) && AM.JT != -1)
2477 if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
2488 AM.MCSym = ESNode->getMCSymbol();
2494 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2514 (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
2516 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
2526 unsigned Val = CN->getZExtValue();
2529 // the base doesn't end up getting used, a post-processing step
2544 // We only handle up to 64-bit values here as those are what matter for
2553 // The mask used for the transform is expected to be post-shift, but we
2575 // X*[3,5,9] -> X+X*[2,4,8]
2580 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
2581 CN->getZExtValue() == 9) {
2582 AM.Scale = unsigned(CN->getZExtValue())-1;
2590 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
2594 uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
2608 // Given A-B, if A can be completely folded into the address and
2609 // the index field with the index field unused, use -B as the index.
2612 // other uses, since it avoids a two-address sub instruction, however
2638 if (!RHS.getNode()->hasOneUse() ||
2639 RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
2640 RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
2641 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
2642 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
2648 !AM.Base_Reg.getNode()->hasOneUse()) ||
2650 --Cost;
2656 --Cost;
2675 if (!CurDAG->isADDLike(N))
2684 // Perform some heroic transforms on an and of a constant-count shift
2690 // We only handle up to 64-bit values here as those are what matter for
2745 Mask = MaskC->getAPIntValue();
2749 if (Src.getOpcode() == ISD::SHL && Src.hasOneUse() && N->hasOneUse()) {
2756 unsigned ShAmtV = ShAmtC->getZExtValue();
2764 if (!Src->getFlags().hasNoUnsignedWrap() &&
2765 !CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
2769 // --> shl (zext i8 %x to i32), (zext C1)
2771 // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
2778 Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
2780 Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
2783 SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
2785 SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
2787 CurDAG->ReplaceAllUsesWith(N, NewShl);
2788 CurDAG->RemoveDeadNode(N.getNode());
2860 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
2910 AM.Scale = ScaleOp->getAsZExtVal();
2913 // sign-extension, which is performed BEFORE scale.
2919 unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
2921 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2923 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2925 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2953 Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
2954 Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
2955 Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
2956 Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
2957 Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
2958 Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
2959 Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
2961 cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
2963 AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
2965 AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
2967 AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
2990 if (N->getOpcode() != X86ISD::Wrapper)
2997 if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
3001 // Small/medium code model can reference non-TargetGlobalAddress objects with
3003 if (N->getOpcode() != ISD::TargetGlobalAddress) {
3008 const GlobalValue *GV = cast<GlobalAddressSDNode>(N)->getGlobal();
3009 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
3010 return CR->getUnsignedMax().ult(1ull << 32);
3025 if (RN && RN->getReg() == 0)
3026 Base = CurDAG->getRegister(0, MVT::i64);
3029 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3031 Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3036 if (RN && RN->getReg() == 0)
3037 Index = CurDAG->getRegister(0, MVT::i64);
3040 "Expect to be extending 32-bit registers for use in LEA");
3041 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
3043 Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
3065 SDValue T = CurDAG->getRegister(0, MVT::i32);
3089 // its three-address nature. Tweak the cost function again when we can run
3092 // For X86-64, always use LEA to materialize RIP-relative addresses.
3093 if (Subtarget->is64Bit())
3101 // duplicating flag-producing instructions later in the pipeline.
3112 their inclusion for different reasons (better for reg-alloc).
3117 // Value 1 is the flag output of the node - verify it's not dead.
3149 AM.GV = GA->getGlobal();
3150 AM.Disp += GA->getOffset();
3151 AM.SymbolFlags = GA->getTargetFlags();
3154 AM.ES = SA->getSymbol();
3155 AM.SymbolFlags = SA->getTargetFlags();
3158 if (Subtarget->is32Bit()) {
3160 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
3182 // We can only use non-GlobalValues as immediates if they were not truncated,
3185 unsigned Opc = N.getOperand(0)->getOpcode();
3195 std::optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
3196 if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
3200 Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
3201 GA->getOffset(), GA->getTargetFlags());
3224 if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
3237 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
3238 auto &DL = MF->getDataLayout();
3239 return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
3243 if (N->getOpcode() == ISD::TRUNCATE)
3244 N = N->getOperand(0).getNode();
3245 if (N->getOpcode() != X86ISD::Wrapper)
3248 auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
3252 auto *GV = GA->getGlobal();
3253 std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange();
3255 return CR->getSignedMin().sge(-1ull << Width) &&
3256 CR->getSignedMax().slt(1ull << Width);
3258 // space, so globals can be a sign extended 32-bit immediate.
3265 assert(N->isMachineOpcode() && "Unexpected node");
3266 unsigned Opc = N->getMachineOpcode();
3267 const MCInstrDesc &MCID = getInstrInfo()->get(Opc);
3272 return static_cast<X86::CondCode>(N->getConstantOperandVal(CondNo));
3279 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3285 if (UI->getOpcode() != ISD::CopyToReg ||
3286 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3289 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3290 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3294 if (!FlagUI->isMachineOpcode()) return false;
3315 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3321 if (UI->getOpcode() != ISD::CopyToReg ||
3322 cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3325 for (SDNode::use_iterator FlagUI = UI->use_begin(),
3326 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
3330 if (!FlagUI->isMachineOpcode()) return false;
3371 for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
3377 unsigned UIOpc = UI->getOpcode();
3381 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
3384 for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
3390 if (!FlagUI->isMachineOpcode())
3403 // This might be an unselected node. So look for the pre-isel opcodes that
3416 X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
3434 if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
3436 // Is the store non-extending and non-indexed?
3437 if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
3440 SDValue Load = StoredVal->getOperand(LoadOpNo);
3441 // Is the stored value a non-extending and non-indexed load?
3452 if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
3453 LoadNode->getOffset() != StoreNode->getOffset())
3462 // Visualization of Load-Op-Store fusion:
3463 // -------------------------
3465 // *-lines = Chain operand dependencies.
3466 // |-lines = Normal operand dependencies.
3467 // Dependencies flow down and right. n-suffix references multiple nodes.
3472 // Xn A-LD Yn TF Yn
3475 // * * \ | => A--LD_OP_ST
3480 // A-ST Zn
3483 // This merge induced dependences from: #1: Xn -> LD, OP, Zn
3484 // #2: Yn -> LD
3485 // #3: ST -> Zn
3499 SDValue Chain = StoreNode->getChain();
3523 for (SDValue Op : StoredVal->ops())
3533 CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
3548 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3552 // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
3559 SDValue StoredVal = StoreNode->getOperand(1);
3560 unsigned Opc = StoredVal->getOpcode();
3565 EVT MemVT = StoreNode->getMemoryVT();
3605 if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
3633 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3640 if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
3643 // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
3650 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
3716 SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
3721 int64_t OperandV = OperandC->getSExtValue();
3727 ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
3729 isInt<32>(-OperandV))) &&
3731 OperandV = -OperandV;
3736 Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
3743 CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
3748 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3753 Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
3762 MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
3763 LoadNode->getMemOperand()};
3764 CurDAG->setNodeMemRefs(Result, MemOps);
3770 CurDAG->RemoveDeadNode(Node);
3776 // a) x & (1 << nbits) - 1
3777 // b) x & ~(-1 << nbits)
3778 // c) x & (-1 >> (32 - y))
3779 // d) x << (32 - y) >> (32 - y)
3780 // e) (1 << nbits) - 1
3783 (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
3784 Node->getOpcode() == ISD::SRL) &&
3785 "Should be either an and-mask, or right-shift after clearing high bits.");
3788 if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
3791 MVT NVT = Node->getSimpleValueType(0);
3800 // If we have BMI2's BZHI, we are ok with muti-use patterns.
3801 // Else, if we only have BMI1's BEXTR, we require one-use.
3802 const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
3807 Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
3821 if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
3824 "Expected i64 -> i32 truncation");
3830 // a) x & ((1 << nbits) + (-1))
3832 &NegateNBits](SDValue Mask) -> bool {
3834 if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
3836 // We should be adding all-ones constant (i.e. subtracting one.)
3837 if (!isAllOnesConstant(Mask->getOperand(1)))
3840 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3841 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3843 if (!isOneConstant(M0->getOperand(0)))
3845 NBits = M0->getOperand(1);
3852 return CurDAG->MaskedValueIsAllOnes(
3857 // b) x & ~(-1 << nbits)
3859 &NBits, &NegateNBits](SDValue Mask) -> bool {
3863 // The -1 only has to be all-ones for the final Node's NVT.
3864 if (!isAllOnes(Mask->getOperand(1)))
3866 // Match `-1 << nbits`. Might be truncated. Must only have one use!
3867 SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
3868 if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
3870 // The -1 only has to be all-ones for the final Node's NVT.
3871 if (!isAllOnes(M0->getOperand(0)))
3873 NBits = M0->getOperand(1);
3878 // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
3879 // or leave the shift amount as-is, but then we'll have to negate it.
3887 // Try to match the shift amount as (bitwidth - y). It should go away, too.
3892 if (!V0 || V0->getZExtValue() != Bitwidth)
3898 // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
3900 // c) x & (-1 >> (32 - y))
3902 canonicalizeShiftAmt](SDValue Mask) -> bool {
3909 // We should be shifting truly all-ones constant.
3917 // Pattern c. is non-canonical, and is expanded into pattern d. iff there
3928 // d) x << (32 - y) >> (32 - y)
3931 &X](SDNode *Node) -> bool {
3932 if (Node->getOpcode() != ISD::SRL)
3934 SDValue N0 = Node->getOperand(0);
3935 if (N0->getOpcode() != ISD::SHL)
3938 SDValue N1 = Node->getOperand(1);
3939 SDValue N01 = N0->getOperand(1);
3950 X = N0->getOperand(0);
3955 matchPatternC](SDValue Mask) -> bool {
3959 if (Node->getOpcode() == ISD::AND) {
3960 X = Node->getOperand(0);
3961 SDValue Mask = Node->getOperand(1);
3971 X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
3977 if (NegateNBits && !Subtarget->hasBMI2())
3983 NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
3986 // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
3989 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
3992 SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
3994 NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
4002 SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
4005 NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
4009 if (Subtarget->hasBMI2()) {
4012 // But have to place the bit count into the wide-enough register first.
4013 NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
4017 SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
4024 // *logically* shifted (potentially with one-use trunc inbetween),
4026 // and if so look past one-use truncation.
4029 // FIXME: only if the shift is one-use?
4044 SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
4046 SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
4050 // FIXME: only if the shift is one-use?
4058 // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
4061 ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
4065 Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
4069 // But have to place the 'control' into the wide-enough register first.
4071 Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
4076 SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
4081 Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
4092 MVT NVT = Node->getSimpleValueType(0);
4095 SDValue N0 = Node->getOperand(0);
4096 SDValue N1 = Node->getOperand(1);
4101 // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
4105 Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
4106 if (!PreferBEXTR && !Subtarget->hasBMI2())
4110 if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
4114 if (!N0->hasOneUse())
4123 auto *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
4128 uint64_t Mask = MaskCst->getZExtValue();
4132 uint64_t Shift = ShiftCst->getZExtValue();
4154 #define GET_EGPR_IF_ENABLED(OPC) (Subtarget->hasEGPR() ? OPC##_EVEX : OPC)
4156 assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
4160 Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
4166 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4172 Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
4173 if (Subtarget->hasTBM()) {
4177 assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
4184 Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
4189 SDValue Input = N0->getOperand(0);
4194 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
4195 NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4198 // Record the mem-refs
4199 CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
4201 NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
4206 SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
4210 CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
4220 SDValue N0 = Node->getOperand(0);
4221 SDValue N1 = Node->getOperand(1);
4222 SDValue Imm = Node->getOperand(2);
4223 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4224 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4231 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
4232 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4235 // Record the mem-refs
4236 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
4241 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
4242 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4253 SDValue N0 = Node->getOperand(0);
4254 SDValue N2 = Node->getOperand(2);
4255 SDValue Imm = Node->getOperand(4);
4256 auto *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
4257 Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
4264 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
4265 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
4269 // Record the mem-refs
4270 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
4275 SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
4276 MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
4282 EVT VT = N->getValueType(0);
4291 SDValue OrigShiftAmt = N->getOperand(1);
4296 if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
4297 ShiftAmt = ShiftAmt->getOperand(0);
4303 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB ||
4304 ShiftAmt->getOpcode() == ISD::XOR) {
4305 SDValue Add0 = ShiftAmt->getOperand(0);
4306 SDValue Add1 = ShiftAmt->getOperand(1);
4309 // If we are shifting by X+/-/^N where N == 0 mod Size, then just shift by X
4311 if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
4314 } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() &&
4315 ((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) ||
4316 (Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) {
4317 // If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X
4322 // We can only do N-X, not X-N
4323 if (ShiftAmt->getOpcode() == ISD::SUB && Add0C == nullptr)
4328 SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
4329 NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
4333 // If we are shifting by N-X where N == 0 mod Size, then just shift by
4334 // -X to generate a NEG instead of a SUB of a constant.
4335 } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
4336 Add0C->getZExtValue() != 0) {
4339 if (Add0C->getZExtValue() % Size == 0)
4342 Add0C->getZExtValue() % 32 == 0) {
4343 // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
4350 Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT);
4354 X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0);
4361 SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
4362 SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
4376 NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
4383 NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
4384 CurDAG->getConstant(Size - 1, DL, MVT::i8));
4388 SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
4399 if (OrigShiftAmt.getNode()->use_empty())
4400 CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
4409 MVT NVT = N->getSimpleValueType(0);
4410 unsigned Opcode = N->getOpcode();
4415 SDValue Shift = N->getOperand(0);
4416 SDValue N1 = N->getOperand(1);
4422 int64_t Val = Cst->getSExtValue();
4446 uint64_t ShAmt = ShlCst->getZExtValue();
4450 uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
4491 unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
4497 NeededMask &= ~Cst->getAPIntValue();
4499 if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
4505 SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
4510 SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
4512 SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
4514 SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
4545 unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
4582 SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
4584 MVT NVT = Root->getSimpleValueType(0);
4588 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
4593 unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
4618 MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
4622 // Record the mem-refs
4623 CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
4636 MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
4640 CurDAG->RemoveDeadNode(Root);
4647 MVT NVT = N->getSimpleValueType(0);
4650 if (!NVT.isVector() || !Subtarget->hasAVX512() ||
4654 // We need VLX for 128/256-bit.
4655 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
4658 SDValue N0 = N->getOperand(0);
4659 SDValue N1 = N->getOperand(1);
4722 switch (N->getOpcode()) {
4740 /// creating a small, sign-extended negative immediate rather than a large
4743 /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
4748 MVT VT = And->getSimpleValueType(0);
4752 auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
4758 // patterns to use a 32-bit and instead of a 64-bit and by relying on the
4761 APInt MaskVal = And1C->getAPIntValue();
4768 MaskLZ -= 32;
4772 SDValue And0 = And->getOperand(0);
4790 if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
4793 // Check if the mask is -1. In that case, this is an unnecessary instruction
4801 SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
4803 SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
4861 assert(Subtarget->hasAVX512() && "Expected AVX512!");
4866 ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
4904 bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
4929 if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
4967 SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
4969 Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
4972 Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
4976 unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
4977 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
4978 InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
4989 SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
4994 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
4998 CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5003 // Record the mem-refs
5004 CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
5007 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
5009 CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
5014 unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
5015 SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
5016 CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
5021 CurDAG->RemoveDeadNode(Root);
5028 assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
5030 MVT NVT = N->getSimpleValueType(0);
5033 if (!NVT.isVector() || !Subtarget->hasAVX512())
5036 // We need VLX for 128/256-bit.
5037 if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
5040 SDValue N0 = N->getOperand(0);
5041 SDValue N1 = N->getOperand(1);
5067 SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
5068 SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
5076 MVT NVT = Node->getSimpleValueType(0);
5077 unsigned Opcode = Node->getOpcode();
5080 if (Node->isMachineOpcode()) {
5081 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
5082 Node->setNodeId(-1);
5089 unsigned IntNo = Node->getConstantOperandVal(1);
5094 if (!Subtarget->hasKL())
5108 SDValue Chain = Node->getOperand(0);
5109 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
5112 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
5115 MachineSDNode *Res = CurDAG->getMachineNode(
5116 Opcode, dl, Node->getVTList(),
5117 {Node->getOperand(2), Chain, Chain.getValue(1)});
5123 if (!Subtarget->hasAMXTILE())
5126 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5127 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5131 // _tile_loadd_internal(row, col, buf, STRIDE)
5132 SDValue Base = Node->getOperand(4);
5134 SDValue Index = Node->getOperand(5);
5135 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5136 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5137 SDValue Chain = Node->getOperand(0);
5139 SDValue Ops[] = {Node->getOperand(2),
5140 Node->getOperand(3),
5147 CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
5155 unsigned IntNo = Node->getConstantOperandVal(1);
5161 bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
5167 if (!Subtarget->hasSSE3())
5172 if (!Subtarget->hasMWAITX())
5177 if (!Subtarget->hasCLZERO())
5185 SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
5186 Node->getOperand(2), SDValue());
5192 Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
5195 Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
5200 MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
5210 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5211 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
5213 // _tile_stored_internal(row, col, buf, STRIDE, c)
5214 SDValue Base = Node->getOperand(4);
5216 SDValue Index = Node->getOperand(5);
5217 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5218 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5219 SDValue Chain = Node->getOperand(0);
5221 SDValue Ops[] = {Node->getOperand(2),
5222 Node->getOperand(3),
5228 Node->getOperand(6),
5230 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5237 if (!Subtarget->hasAMXTILE())
5240 CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5241 MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
5250 unsigned TIndex = Node->getConstantOperandVal(2);
5252 SDValue Base = Node->getOperand(3);
5254 SDValue Index = Node->getOperand(4);
5255 SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
5256 SDValue Segment = CurDAG->getRegister(0, MVT::i16);
5257 SDValue Chain = Node->getOperand(0);
5261 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5264 CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
5274 if (Subtarget->isTargetNaCl())
5278 if (Subtarget->isTarget64BitILP32()) {
5279 // Converts a 32-bit register to a 64-bit, zero-extended version of
5280 // it. This is needed because x86-64 can do many things, but jmp %r32
5282 SDValue Target = Node->getOperand(1);
5284 SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
5285 SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
5286 Node->getOperand(0), ZextTarget);
5299 // Just drop all 128/256/512-bit bitcasts.
5302 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
5303 CurDAG->RemoveDeadNode(Node);
5319 uint8_t Imm = Node->getConstantOperandVal(3);
5320 if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
5321 Node->getOperand(1), Node->getOperand(2), Imm))
5334 SDValue N0 = Node->getOperand(0);
5335 SDValue N1 = Node->getOperand(1);
5346 CurDAG->RemoveDeadNode(Node);
5375 // unavailable to the fast-isel table.
5376 if (!CurDAG->shouldOptForSize())
5383 SDValue N0 = Node->getOperand(0);
5384 SDValue N1 = Node->getOperand(1);
5390 int64_t Val = Cst->getSExtValue();
5393 // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
5398 if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
5518 SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5519 MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5522 // Record the mem-refs
5523 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
5525 CurDAG->RemoveDeadNode(Node);
5530 CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
5540 SDValue N0 = Node->getOperand(0);
5541 SDValue N1 = Node->getOperand(1);
5577 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5586 VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
5588 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
5592 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5596 // Record the mem-refs
5597 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5603 VTs = CurDAG->getVTList(NVT, MVT::i32);
5605 VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
5607 CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue});
5612 CurDAG->RemoveDeadNode(Node);
5618 SDValue N0 = Node->getOperand(0);
5619 SDValue N1 = Node->getOperand(1);
5624 bool UseMULX = !IsSigned && Subtarget->hasBMI2();
5663 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
5672 SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
5673 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5677 SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
5678 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5683 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5684 CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
5691 // Record the mem-refs
5692 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5696 SDVTList VTs = CurDAG->getVTList(NVT);
5697 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5700 SDVTList VTs = CurDAG->getVTList(NVT, NVT);
5701 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5705 SDVTList VTs = CurDAG->getVTList(MVT::Glue);
5706 SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
5715 ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
5720 LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
5727 ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
5732 LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
5736 CurDAG->RemoveDeadNode(Node);
5742 SDValue N0 = Node->getOperand(0);
5743 SDValue N1 = Node->getOperand(1);
5790 bool signBitIsZero = CurDAG->SignBitIsZero(N0);
5802 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
5805 // Record the mem-refs
5806 CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
5810 Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
5811 Chain = CurDAG->getEntryNode();
5813 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
5818 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
5823 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0);
5826 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
5828 CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, std::nullopt), 0);
5832 SDValue(CurDAG->getMachineNode(
5834 CurDAG->getTargetConstant(X86::sub_16bit, dl,
5842 SDValue(CurDAG->getMachineNode(
5844 CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
5845 CurDAG->getTargetConstant(X86::sub_32bit, dl,
5853 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
5862 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
5866 // Record the mem-refs
5867 CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
5870 SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0);
5881 SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
5885 SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
5891 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
5894 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5899 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5903 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5908 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
5912 LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
5915 CurDAG->RemoveDeadNode(Node);
5922 bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
5923 Node->getOpcode() == X86ISD::STRICT_FCMPS;
5924 SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
5925 SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
5931 if (Subtarget->canUseCMOV())
5934 bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
5951 IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
5954 SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
5955 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
5958 Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
5963 SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
5965 // Extract upper 8-bits of AX.
5967 CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
5970 // Some 64-bit targets lack SAHF support, but they do support FCOMI.
5971 assert(Subtarget->canUseLAHFSAHF() &&
5973 SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
5976 CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
5982 CurDAG->RemoveDeadNode(Node);
5987 SDValue N0 = Node->getOperand(0);
5988 SDValue N1 = Node->getOperand(1);
6006 NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
6008 CurDAG->RemoveDeadNode(Node);
6020 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6028 uint64_t Mask = MaskC->getZExtValue();
6045 // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
6064 } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
6067 unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
6089 SDValue ShiftC = CurDAG->getTargetConstant(ShiftAmt, dl, MVT::i64);
6091 CurDAG->getMachineNode(ShiftOpcode, dl, MVT::i64, MVT::i32,
6096 CurDAG->getTargetExtractSubreg(SubRegIdx, dl, SubRegVT, Shift);
6099 CurDAG->getMachineNode(TestOpcode, dl, MVT::i32, Shift, Shift);
6135 // Without minsize 16-bit Cmps can get here so we need to
6154 SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
6162 if (!LoadN->isSimple()) {
6163 unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
6172 NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
6175 // Record the mem-refs
6176 CurDAG->setNodeMemRefs(NewNode,
6177 {cast<LoadSDNode>(Reg)->getMemOperand()});
6181 Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
6183 NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
6192 if (!Subtarget->hasSSE42())
6203 Subtarget->hasAVX() ? X86::VPCMPISTRMrri : X86::PCMPISTRMrri;
6205 Subtarget->hasAVX() ? X86::VPCMPISTRMrmi : X86::PCMPISTRMrmi;
6211 Subtarget->hasAVX() ? X86::VPCMPISTRIrri : X86::PCMPISTRIrri;
6213 Subtarget->hasAVX() ? X86::VPCMPISTRIrmi : X86::PCMPISTRIrmi;
6220 CurDAG->RemoveDeadNode(Node);
6224 if (!Subtarget->hasSSE42())
6228 SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
6229 Node->getOperand(1),
6231 InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
6232 Node->getOperand(3), InGlue).getValue(1);
6242 Subtarget->hasAVX() ? X86::VPCMPESTRMrri : X86::PCMPESTRMrri;
6244 Subtarget->hasAVX() ? X86::VPCMPESTRMrmi : X86::PCMPESTRMrmi;
6251 Subtarget->hasAVX() ? X86::VPCMPESTRIrri : X86::PCMPESTRIrri;
6253 Subtarget->hasAVX() ? X86::VPCMPESTRIrmi : X86::PCMPESTRIrmi;
6259 CurDAG->RemoveDeadNode(Node);
6276 MVT VT = Node->getSimpleValueType(0);
6278 if (Subtarget->hasSBBDepBreaking()) {
6283 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
6284 Node->getOperand(1), SDValue());
6286 // Create a 64-bit instruction if the result is 64-bits otherwise use the
6287 // 32-bit version.
6291 CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)),
6295 // no-source idiom, so we explicitly zero the input values.
6299 // For less than 32-bits we need to extract from the 32-bit node.
6302 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6306 CurDAG->RemoveDeadNode(Node);
6310 if (isNullConstant(Node->getOperand(0)) &&
6311 isNullConstant(Node->getOperand(1))) {
6319 // For less than 32-bits we need to extract from the 32-bit node.
6320 MVT VT = Node->getSimpleValueType(0);
6323 Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
6328 CurDAG->RemoveDeadNode(Node);
6335 SDValue IndexOp = Mgt->getIndex();
6336 SDValue Mask = Mgt->getMask();
6338 MVT ValueVT = Node->getSimpleValueType(0);
6405 if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
6409 SDValue PassThru = Mgt->getPassThru();
6410 SDValue Chain = Mgt->getChain();
6412 SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
6418 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6422 NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6424 CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
6427 CurDAG->RemoveDeadNode(Node);
6432 SDValue Value = Sc->getValue();
6433 SDValue IndexOp = Sc->getIndex();
6478 if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
6482 SDValue Mask = Sc->getMask();
6483 SDValue Chain = Sc->getChain();
6485 SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
6488 MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
6489 CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
6491 CurDAG->RemoveDeadNode(Node);
6495 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6496 auto CallId = MFI->getPreallocatedIdForCallSite(
6497 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6498 SDValue Chain = Node->getOperand(0);
6499 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6500 MachineSDNode *New = CurDAG->getMachineNode(
6503 CurDAG->RemoveDeadNode(Node);
6507 auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
6508 auto CallId = MFI->getPreallocatedIdForCallSite(
6509 cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
6510 SDValue Chain = Node->getOperand(0);
6511 SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
6512 SDValue ArgIndex = Node->getOperand(2);
6517 MachineSDNode *New = CurDAG->getMachineNode(
6519 CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
6524 CurDAG->RemoveDeadNode(Node);
6531 if (!Subtarget->hasWIDEKL())
6535 switch (Node->getOpcode()) {
6552 SDValue Chain = Node->getOperand(0);
6553 SDValue Addr = Node->getOperand(1);
6559 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
6561 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
6563 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
6565 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
6567 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
6569 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
6571 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
6573 Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
6576 MachineSDNode *Res = CurDAG->getMachineNode(
6577 Opcode, dl, Node->getVTList(),
6579 CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
6617 /// This pass converts a legalized DAG into a X86-specific DAG,