1 //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the SelectionDAG class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CodeGen/SelectionDAG.h" 14 #include "SDNodeDbgValue.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/APSInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/DenseSet.h" 21 #include "llvm/ADT/FoldingSet.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallPtrSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/Analysis/MemoryLocation.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/Analysis/VectorUtils.h" 31 #include "llvm/BinaryFormat/Dwarf.h" 32 #include "llvm/CodeGen/Analysis.h" 33 #include "llvm/CodeGen/FunctionLoweringInfo.h" 34 #include "llvm/CodeGen/ISDOpcodes.h" 35 #include "llvm/CodeGen/MachineBasicBlock.h" 36 #include "llvm/CodeGen/MachineConstantPool.h" 37 #include "llvm/CodeGen/MachineFrameInfo.h" 38 #include "llvm/CodeGen/MachineFunction.h" 39 #include "llvm/CodeGen/MachineMemOperand.h" 40 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 41 #include "llvm/CodeGen/SDPatternMatch.h" 42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 43 #include "llvm/CodeGen/SelectionDAGNodes.h" 44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 45 #include "llvm/CodeGen/TargetFrameLowering.h" 46 #include "llvm/CodeGen/TargetLowering.h" 47 #include "llvm/CodeGen/TargetRegisterInfo.h" 48 #include "llvm/CodeGen/TargetSubtargetInfo.h" 49 #include "llvm/CodeGen/ValueTypes.h" 50 #include "llvm/CodeGenTypes/MachineValueType.h" 51 #include "llvm/IR/Constant.h" 52 #include "llvm/IR/Constants.h" 53 #include "llvm/IR/DataLayout.h" 54 #include "llvm/IR/DebugInfoMetadata.h" 55 #include "llvm/IR/DebugLoc.h" 56 #include "llvm/IR/DerivedTypes.h" 57 #include "llvm/IR/Function.h" 58 #include "llvm/IR/GlobalValue.h" 59 #include "llvm/IR/Metadata.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/Support/Casting.h" 62 #include "llvm/Support/CodeGen.h" 63 #include "llvm/Support/Compiler.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/KnownBits.h" 67 #include "llvm/Support/MathExtras.h" 68 #include "llvm/Support/Mutex.h" 69 #include "llvm/Support/raw_ostream.h" 70 #include "llvm/Target/TargetMachine.h" 71 #include "llvm/Target/TargetOptions.h" 72 #include "llvm/TargetParser/Triple.h" 73 #include "llvm/Transforms/Utils/SizeOpts.h" 74 #include <algorithm> 75 #include <cassert> 76 #include <cstdint> 77 #include <cstdlib> 78 #include <deque> 79 #include <limits> 80 #include <optional> 81 #include <set> 82 #include <string> 83 #include <utility> 84 #include <vector> 85 86 using namespace llvm; 87 using namespace llvm::SDPatternMatch; 88 89 /// makeVTList - Return an instance of the SDVTList struct initialized with the 90 /// specified members. 91 static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { 92 SDVTList Res = {VTs, NumVTs}; 93 return Res; 94 } 95 96 // Default null implementations of the callbacks. 97 void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} 98 void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} 99 void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} 100 101 void SelectionDAG::DAGNodeDeletedListener::anchor() {} 102 void SelectionDAG::DAGNodeInsertedListener::anchor() {} 103 104 #define DEBUG_TYPE "selectiondag" 105 106 static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", 107 cl::Hidden, cl::init(true), 108 cl::desc("Gang up loads and stores generated by inlining of memcpy")); 109 110 static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", 111 cl::desc("Number limit for gluing ld/st of memcpy."), 112 cl::Hidden, cl::init(0)); 113 114 static cl::opt<unsigned> 115 MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), 116 cl::desc("DAG combiner limit number of steps when searching DAG " 117 "for predecessor nodes")); 118 119 static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { 120 LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); 121 } 122 123 unsigned SelectionDAG::getHasPredecessorMaxSteps() { return MaxSteps; } 124 125 //===----------------------------------------------------------------------===// 126 // ConstantFPSDNode Class 127 //===----------------------------------------------------------------------===// 128 129 /// isExactlyValue - We don't rely on operator== working on double values, as 130 /// it returns true for things that are clearly not equal, like -0.0 and 0.0. 131 /// As such, this method can be used to do an exact bit-for-bit comparison of 132 /// two floating point values. 133 bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { 134 return getValueAPF().bitwiseIsEqual(V); 135 } 136 137 bool ConstantFPSDNode::isValueValidForType(EVT VT, 138 const APFloat& Val) { 139 assert(VT.isFloatingPoint() && "Can only convert between FP types"); 140 141 // convert modifies in place, so make a copy. 142 APFloat Val2 = APFloat(Val); 143 bool losesInfo; 144 (void)Val2.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 145 &losesInfo); 146 return !losesInfo; 147 } 148 149 //===----------------------------------------------------------------------===// 150 // ISD Namespace 151 //===----------------------------------------------------------------------===// 152 153 bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { 154 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 155 unsigned EltSize = 156 N->getValueType(0).getVectorElementType().getSizeInBits(); 157 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 158 SplatVal = Op0->getAPIntValue().trunc(EltSize); 159 return true; 160 } 161 if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) { 162 SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize); 163 return true; 164 } 165 } 166 167 auto *BV = dyn_cast<BuildVectorSDNode>(N); 168 if (!BV) 169 return false; 170 171 APInt SplatUndef; 172 unsigned SplatBitSize; 173 bool HasUndefs; 174 unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); 175 // Endianness does not matter here. We are checking for a splat given the 176 // element size of the vector, and if we find such a splat for little endian 177 // layout, then that should be valid also for big endian (as the full vector 178 // size is known to be a multiple of the element size). 179 const bool IsBigEndian = false; 180 return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, 181 EltSize, IsBigEndian) && 182 EltSize == SplatBitSize; 183 } 184 185 // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be 186 // specializations of the more general isConstantSplatVector()? 187 188 bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { 189 // Look through a bit convert. 190 while (N->getOpcode() == ISD::BITCAST) 191 N = N->getOperand(0).getNode(); 192 193 if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { 194 APInt SplatVal; 195 return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes(); 196 } 197 198 if (N->getOpcode() != ISD::BUILD_VECTOR) return false; 199 200 unsigned i = 0, e = N->getNumOperands(); 201 202 // Skip over all of the undef values. 203 while (i != e && N->getOperand(i).isUndef()) 204 ++i; 205 206 // Do not accept an all-undef vector. 207 if (i == e) return false; 208 209 // Do not accept build_vectors that aren't all constants or which have non-~0 210 // elements. We have to be a bit careful here, as the type of the constant 211 // may not be the same as the type of the vector elements due to type 212 // legalization (the elements are promoted to a legal type for the target and 213 // a vector of a type may be legal when the base element type is not). 214 // We only want to check enough bits to cover the vector elements, because 215 // we care if the resultant vector is all ones, not whether the individual 216 // constants are. 217 SDValue NotZero = N->getOperand(i); 218 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 219 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { 220 if (CN->getAPIntValue().countr_one() < EltSize) 221 return false; 222 } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { 223 if (CFPN->getValueAPF().bitcastToAPInt().countr_one() < EltSize) 224 return false; 225 } else 226 return false; 227 228 // Okay, we have at least one ~0 value, check to see if the rest match or are 229 // undefs. Even with the above element type twiddling, this should be OK, as 230 // the same type legalization should have applied to all the elements. 231 for (++i; i != e; ++i) 232 if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()) 233 return false; 234 return true; 235 } 236 237 bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { 238 // Look through a bit convert. 239 while (N->getOpcode() == ISD::BITCAST) 240 N = N->getOperand(0).getNode(); 241 242 if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { 243 APInt SplatVal; 244 return isConstantSplatVector(N, SplatVal) && SplatVal.isZero(); 245 } 246 247 if (N->getOpcode() != ISD::BUILD_VECTOR) return false; 248 249 bool IsAllUndef = true; 250 for (const SDValue &Op : N->op_values()) { 251 if (Op.isUndef()) 252 continue; 253 IsAllUndef = false; 254 // Do not accept build_vectors that aren't all constants or which have non-0 255 // elements. We have to be a bit careful here, as the type of the constant 256 // may not be the same as the type of the vector elements due to type 257 // legalization (the elements are promoted to a legal type for the target 258 // and a vector of a type may be legal when the base element type is not). 259 // We only want to check enough bits to cover the vector elements, because 260 // we care if the resultant vector is all zeros, not whether the individual 261 // constants are. 262 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 263 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { 264 if (CN->getAPIntValue().countr_zero() < EltSize) 265 return false; 266 } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) { 267 if (CFPN->getValueAPF().bitcastToAPInt().countr_zero() < EltSize) 268 return false; 269 } else 270 return false; 271 } 272 273 // Do not accept an all-undef vector. 274 if (IsAllUndef) 275 return false; 276 return true; 277 } 278 279 bool ISD::isBuildVectorAllOnes(const SDNode *N) { 280 return isConstantSplatVectorAllOnes(N, /*BuildVectorOnly*/ true); 281 } 282 283 bool ISD::isBuildVectorAllZeros(const SDNode *N) { 284 return isConstantSplatVectorAllZeros(N, /*BuildVectorOnly*/ true); 285 } 286 287 bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { 288 if (N->getOpcode() != ISD::BUILD_VECTOR) 289 return false; 290 291 for (const SDValue &Op : N->op_values()) { 292 if (Op.isUndef()) 293 continue; 294 if (!isa<ConstantSDNode>(Op)) 295 return false; 296 } 297 return true; 298 } 299 300 bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { 301 if (N->getOpcode() != ISD::BUILD_VECTOR) 302 return false; 303 304 for (const SDValue &Op : N->op_values()) { 305 if (Op.isUndef()) 306 continue; 307 if (!isa<ConstantFPSDNode>(Op)) 308 return false; 309 } 310 return true; 311 } 312 313 bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, 314 bool Signed) { 315 assert(N->getValueType(0).isVector() && "Expected a vector!"); 316 317 unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); 318 if (EltSize <= NewEltSize) 319 return false; 320 321 if (N->getOpcode() == ISD::ZERO_EXTEND) { 322 return (N->getOperand(0).getValueType().getScalarSizeInBits() <= 323 NewEltSize) && 324 !Signed; 325 } 326 if (N->getOpcode() == ISD::SIGN_EXTEND) { 327 return (N->getOperand(0).getValueType().getScalarSizeInBits() <= 328 NewEltSize) && 329 Signed; 330 } 331 if (N->getOpcode() != ISD::BUILD_VECTOR) 332 return false; 333 334 for (const SDValue &Op : N->op_values()) { 335 if (Op.isUndef()) 336 continue; 337 if (!isa<ConstantSDNode>(Op)) 338 return false; 339 340 APInt C = Op->getAsAPIntVal().trunc(EltSize); 341 if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) 342 return false; 343 if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) 344 return false; 345 } 346 347 return true; 348 } 349 350 bool ISD::allOperandsUndef(const SDNode *N) { 351 // Return false if the node has no operands. 352 // This is "logically inconsistent" with the definition of "all" but 353 // is probably the desired behavior. 354 if (N->getNumOperands() == 0) 355 return false; 356 return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); 357 } 358 359 bool ISD::isFreezeUndef(const SDNode *N) { 360 return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); 361 } 362 363 template <typename ConstNodeType> 364 bool ISD::matchUnaryPredicateImpl(SDValue Op, 365 std::function<bool(ConstNodeType *)> Match, 366 bool AllowUndefs) { 367 // FIXME: Add support for scalar UNDEF cases? 368 if (auto *C = dyn_cast<ConstNodeType>(Op)) 369 return Match(C); 370 371 // FIXME: Add support for vector UNDEF cases? 372 if (ISD::BUILD_VECTOR != Op.getOpcode() && 373 ISD::SPLAT_VECTOR != Op.getOpcode()) 374 return false; 375 376 EVT SVT = Op.getValueType().getScalarType(); 377 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 378 if (AllowUndefs && Op.getOperand(i).isUndef()) { 379 if (!Match(nullptr)) 380 return false; 381 continue; 382 } 383 384 auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i)); 385 if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) 386 return false; 387 } 388 return true; 389 } 390 // Build used template types. 391 template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>( 392 SDValue, std::function<bool(ConstantSDNode *)>, bool); 393 template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>( 394 SDValue, std::function<bool(ConstantFPSDNode *)>, bool); 395 396 bool ISD::matchBinaryPredicate( 397 SDValue LHS, SDValue RHS, 398 std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, 399 bool AllowUndefs, bool AllowTypeMismatch) { 400 if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()) 401 return false; 402 403 // TODO: Add support for scalar UNDEF cases? 404 if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) 405 if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) 406 return Match(LHSCst, RHSCst); 407 408 // TODO: Add support for vector UNDEF cases? 409 if (LHS.getOpcode() != RHS.getOpcode() || 410 (LHS.getOpcode() != ISD::BUILD_VECTOR && 411 LHS.getOpcode() != ISD::SPLAT_VECTOR)) 412 return false; 413 414 EVT SVT = LHS.getValueType().getScalarType(); 415 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 416 SDValue LHSOp = LHS.getOperand(i); 417 SDValue RHSOp = RHS.getOperand(i); 418 bool LHSUndef = AllowUndefs && LHSOp.isUndef(); 419 bool RHSUndef = AllowUndefs && RHSOp.isUndef(); 420 auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp); 421 auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); 422 if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) 423 return false; 424 if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT || 425 LHSOp.getValueType() != RHSOp.getValueType())) 426 return false; 427 if (!Match(LHSCst, RHSCst)) 428 return false; 429 } 430 return true; 431 } 432 433 ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { 434 switch (MinMaxOpc) { 435 default: 436 llvm_unreachable("unrecognized opcode"); 437 case ISD::UMIN: 438 return ISD::UMAX; 439 case ISD::UMAX: 440 return ISD::UMIN; 441 case ISD::SMIN: 442 return ISD::SMAX; 443 case ISD::SMAX: 444 return ISD::SMIN; 445 } 446 } 447 448 ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { 449 switch (VecReduceOpcode) { 450 default: 451 llvm_unreachable("Expected VECREDUCE opcode"); 452 case ISD::VECREDUCE_FADD: 453 case ISD::VECREDUCE_SEQ_FADD: 454 case ISD::VP_REDUCE_FADD: 455 case ISD::VP_REDUCE_SEQ_FADD: 456 return ISD::FADD; 457 case ISD::VECREDUCE_FMUL: 458 case ISD::VECREDUCE_SEQ_FMUL: 459 case ISD::VP_REDUCE_FMUL: 460 case ISD::VP_REDUCE_SEQ_FMUL: 461 return ISD::FMUL; 462 case ISD::VECREDUCE_ADD: 463 case ISD::VP_REDUCE_ADD: 464 return ISD::ADD; 465 case ISD::VECREDUCE_MUL: 466 case ISD::VP_REDUCE_MUL: 467 return ISD::MUL; 468 case ISD::VECREDUCE_AND: 469 case ISD::VP_REDUCE_AND: 470 return ISD::AND; 471 case ISD::VECREDUCE_OR: 472 case ISD::VP_REDUCE_OR: 473 return ISD::OR; 474 case ISD::VECREDUCE_XOR: 475 case ISD::VP_REDUCE_XOR: 476 return ISD::XOR; 477 case ISD::VECREDUCE_SMAX: 478 case ISD::VP_REDUCE_SMAX: 479 return ISD::SMAX; 480 case ISD::VECREDUCE_SMIN: 481 case ISD::VP_REDUCE_SMIN: 482 return ISD::SMIN; 483 case ISD::VECREDUCE_UMAX: 484 case ISD::VP_REDUCE_UMAX: 485 return ISD::UMAX; 486 case ISD::VECREDUCE_UMIN: 487 case ISD::VP_REDUCE_UMIN: 488 return ISD::UMIN; 489 case ISD::VECREDUCE_FMAX: 490 case ISD::VP_REDUCE_FMAX: 491 return ISD::FMAXNUM; 492 case ISD::VECREDUCE_FMIN: 493 case ISD::VP_REDUCE_FMIN: 494 return ISD::FMINNUM; 495 case ISD::VECREDUCE_FMAXIMUM: 496 case ISD::VP_REDUCE_FMAXIMUM: 497 return ISD::FMAXIMUM; 498 case ISD::VECREDUCE_FMINIMUM: 499 case ISD::VP_REDUCE_FMINIMUM: 500 return ISD::FMINIMUM; 501 } 502 } 503 504 bool ISD::isVPOpcode(unsigned Opcode) { 505 switch (Opcode) { 506 default: 507 return false; 508 #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \ 509 case ISD::VPSD: \ 510 return true; 511 #include "llvm/IR/VPIntrinsics.def" 512 } 513 } 514 515 bool ISD::isVPBinaryOp(unsigned Opcode) { 516 switch (Opcode) { 517 default: 518 break; 519 #define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: 520 #define VP_PROPERTY_BINARYOP return true; 521 #define END_REGISTER_VP_SDNODE(VPSD) break; 522 #include "llvm/IR/VPIntrinsics.def" 523 } 524 return false; 525 } 526 527 bool ISD::isVPReduction(unsigned Opcode) { 528 switch (Opcode) { 529 default: 530 return false; 531 case ISD::VP_REDUCE_ADD: 532 case ISD::VP_REDUCE_MUL: 533 case ISD::VP_REDUCE_AND: 534 case ISD::VP_REDUCE_OR: 535 case ISD::VP_REDUCE_XOR: 536 case ISD::VP_REDUCE_SMAX: 537 case ISD::VP_REDUCE_SMIN: 538 case ISD::VP_REDUCE_UMAX: 539 case ISD::VP_REDUCE_UMIN: 540 case ISD::VP_REDUCE_FMAX: 541 case ISD::VP_REDUCE_FMIN: 542 case ISD::VP_REDUCE_FMAXIMUM: 543 case ISD::VP_REDUCE_FMINIMUM: 544 case ISD::VP_REDUCE_FADD: 545 case ISD::VP_REDUCE_FMUL: 546 case ISD::VP_REDUCE_SEQ_FADD: 547 case ISD::VP_REDUCE_SEQ_FMUL: 548 return true; 549 } 550 } 551 552 /// The operand position of the vector mask. 553 std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { 554 switch (Opcode) { 555 default: 556 return std::nullopt; 557 #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ 558 case ISD::VPSD: \ 559 return MASKPOS; 560 #include "llvm/IR/VPIntrinsics.def" 561 } 562 } 563 564 /// The operand position of the explicit vector length parameter. 565 std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { 566 switch (Opcode) { 567 default: 568 return std::nullopt; 569 #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ 570 case ISD::VPSD: \ 571 return EVLPOS; 572 #include "llvm/IR/VPIntrinsics.def" 573 } 574 } 575 576 std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode, 577 bool hasFPExcept) { 578 // FIXME: Return strict opcodes in case of fp exceptions. 579 switch (VPOpcode) { 580 default: 581 return std::nullopt; 582 #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC: 583 #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC; 584 #define END_REGISTER_VP_SDNODE(VPOPC) break; 585 #include "llvm/IR/VPIntrinsics.def" 586 } 587 return std::nullopt; 588 } 589 590 std::optional<unsigned> ISD::getVPForBaseOpcode(unsigned Opcode) { 591 switch (Opcode) { 592 default: 593 return std::nullopt; 594 #define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break; 595 #define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC: 596 #define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC; 597 #include "llvm/IR/VPIntrinsics.def" 598 } 599 } 600 601 ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { 602 switch (ExtType) { 603 case ISD::EXTLOAD: 604 return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; 605 case ISD::SEXTLOAD: 606 return ISD::SIGN_EXTEND; 607 case ISD::ZEXTLOAD: 608 return ISD::ZERO_EXTEND; 609 default: 610 break; 611 } 612 613 llvm_unreachable("Invalid LoadExtType"); 614 } 615 616 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { 617 // To perform this operation, we just need to swap the L and G bits of the 618 // operation. 619 unsigned OldL = (Operation >> 2) & 1; 620 unsigned OldG = (Operation >> 1) & 1; 621 return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits 622 (OldL << 1) | // New G bit 623 (OldG << 2)); // New L bit. 624 } 625 626 static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) { 627 unsigned Operation = Op; 628 if (isIntegerLike) 629 Operation ^= 7; // Flip L, G, E bits, but not U. 630 else 631 Operation ^= 15; // Flip all of the condition bits. 632 633 if (Operation > ISD::SETTRUE2) 634 Operation &= ~8; // Don't let N and U bits get set. 635 636 return ISD::CondCode(Operation); 637 } 638 639 ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) { 640 return getSetCCInverseImpl(Op, Type.isInteger()); 641 } 642 643 ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op, 644 bool isIntegerLike) { 645 return getSetCCInverseImpl(Op, isIntegerLike); 646 } 647 648 /// For an integer comparison, return 1 if the comparison is a signed operation 649 /// and 2 if the result is an unsigned comparison. Return zero if the operation 650 /// does not depend on the sign of the input (setne and seteq). 651 static int isSignedOp(ISD::CondCode Opcode) { 652 switch (Opcode) { 653 default: llvm_unreachable("Illegal integer setcc operation!"); 654 case ISD::SETEQ: 655 case ISD::SETNE: return 0; 656 case ISD::SETLT: 657 case ISD::SETLE: 658 case ISD::SETGT: 659 case ISD::SETGE: return 1; 660 case ISD::SETULT: 661 case ISD::SETULE: 662 case ISD::SETUGT: 663 case ISD::SETUGE: return 2; 664 } 665 } 666 667 ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, 668 EVT Type) { 669 bool IsInteger = Type.isInteger(); 670 if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) 671 // Cannot fold a signed integer setcc with an unsigned integer setcc. 672 return ISD::SETCC_INVALID; 673 674 unsigned Op = Op1 | Op2; // Combine all of the condition bits. 675 676 // If the N and U bits get set, then the resultant comparison DOES suddenly 677 // care about orderedness, and it is true when ordered. 678 if (Op > ISD::SETTRUE2) 679 Op &= ~16; // Clear the U bit if the N bit is set. 680 681 // Canonicalize illegal integer setcc's. 682 if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT 683 Op = ISD::SETNE; 684 685 return ISD::CondCode(Op); 686 } 687 688 ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, 689 EVT Type) { 690 bool IsInteger = Type.isInteger(); 691 if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) 692 // Cannot fold a signed setcc with an unsigned setcc. 693 return ISD::SETCC_INVALID; 694 695 // Combine all of the condition bits. 696 ISD::CondCode Result = ISD::CondCode(Op1 & Op2); 697 698 // Canonicalize illegal integer setcc's. 699 if (IsInteger) { 700 switch (Result) { 701 default: break; 702 case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT 703 case ISD::SETOEQ: // SETEQ & SETU[LG]E 704 case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE 705 case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE 706 case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE 707 } 708 } 709 710 return Result; 711 } 712 713 //===----------------------------------------------------------------------===// 714 // SDNode Profile Support 715 //===----------------------------------------------------------------------===// 716 717 /// AddNodeIDOpcode - Add the node opcode to the NodeID data. 718 static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { 719 ID.AddInteger(OpC); 720 } 721 722 /// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them 723 /// solely with their pointer. 724 static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { 725 ID.AddPointer(VTList.VTs); 726 } 727 728 /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. 729 static void AddNodeIDOperands(FoldingSetNodeID &ID, 730 ArrayRef<SDValue> Ops) { 731 for (const auto &Op : Ops) { 732 ID.AddPointer(Op.getNode()); 733 ID.AddInteger(Op.getResNo()); 734 } 735 } 736 737 /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. 738 static void AddNodeIDOperands(FoldingSetNodeID &ID, 739 ArrayRef<SDUse> Ops) { 740 for (const auto &Op : Ops) { 741 ID.AddPointer(Op.getNode()); 742 ID.AddInteger(Op.getResNo()); 743 } 744 } 745 746 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC, 747 SDVTList VTList, ArrayRef<SDValue> OpList) { 748 AddNodeIDOpcode(ID, OpC); 749 AddNodeIDValueTypes(ID, VTList); 750 AddNodeIDOperands(ID, OpList); 751 } 752 753 /// If this is an SDNode with special info, add this info to the NodeID data. 754 static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { 755 switch (N->getOpcode()) { 756 case ISD::TargetExternalSymbol: 757 case ISD::ExternalSymbol: 758 case ISD::MCSymbol: 759 llvm_unreachable("Should only be used on nodes with operands"); 760 default: break; // Normal nodes don't need extra info. 761 case ISD::TargetConstant: 762 case ISD::Constant: { 763 const ConstantSDNode *C = cast<ConstantSDNode>(N); 764 ID.AddPointer(C->getConstantIntValue()); 765 ID.AddBoolean(C->isOpaque()); 766 break; 767 } 768 case ISD::TargetConstantFP: 769 case ISD::ConstantFP: 770 ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); 771 break; 772 case ISD::TargetGlobalAddress: 773 case ISD::GlobalAddress: 774 case ISD::TargetGlobalTLSAddress: 775 case ISD::GlobalTLSAddress: { 776 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 777 ID.AddPointer(GA->getGlobal()); 778 ID.AddInteger(GA->getOffset()); 779 ID.AddInteger(GA->getTargetFlags()); 780 break; 781 } 782 case ISD::BasicBlock: 783 ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); 784 break; 785 case ISD::Register: 786 ID.AddInteger(cast<RegisterSDNode>(N)->getReg().id()); 787 break; 788 case ISD::RegisterMask: 789 ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); 790 break; 791 case ISD::SRCVALUE: 792 ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); 793 break; 794 case ISD::FrameIndex: 795 case ISD::TargetFrameIndex: 796 ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); 797 break; 798 case ISD::LIFETIME_START: 799 case ISD::LIFETIME_END: 800 if (cast<LifetimeSDNode>(N)->hasOffset()) { 801 ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); 802 ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); 803 } 804 break; 805 case ISD::PSEUDO_PROBE: 806 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid()); 807 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex()); 808 ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes()); 809 break; 810 case ISD::JumpTable: 811 case ISD::TargetJumpTable: 812 ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); 813 ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); 814 break; 815 case ISD::ConstantPool: 816 case ISD::TargetConstantPool: { 817 const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); 818 ID.AddInteger(CP->getAlign().value()); 819 ID.AddInteger(CP->getOffset()); 820 if (CP->isMachineConstantPoolEntry()) 821 CP->getMachineCPVal()->addSelectionDAGCSEId(ID); 822 else 823 ID.AddPointer(CP->getConstVal()); 824 ID.AddInteger(CP->getTargetFlags()); 825 break; 826 } 827 case ISD::TargetIndex: { 828 const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); 829 ID.AddInteger(TI->getIndex()); 830 ID.AddInteger(TI->getOffset()); 831 ID.AddInteger(TI->getTargetFlags()); 832 break; 833 } 834 case ISD::LOAD: { 835 const LoadSDNode *LD = cast<LoadSDNode>(N); 836 ID.AddInteger(LD->getMemoryVT().getRawBits()); 837 ID.AddInteger(LD->getRawSubclassData()); 838 ID.AddInteger(LD->getPointerInfo().getAddrSpace()); 839 ID.AddInteger(LD->getMemOperand()->getFlags()); 840 break; 841 } 842 case ISD::STORE: { 843 const StoreSDNode *ST = cast<StoreSDNode>(N); 844 ID.AddInteger(ST->getMemoryVT().getRawBits()); 845 ID.AddInteger(ST->getRawSubclassData()); 846 ID.AddInteger(ST->getPointerInfo().getAddrSpace()); 847 ID.AddInteger(ST->getMemOperand()->getFlags()); 848 break; 849 } 850 case ISD::VP_LOAD: { 851 const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N); 852 ID.AddInteger(ELD->getMemoryVT().getRawBits()); 853 ID.AddInteger(ELD->getRawSubclassData()); 854 ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); 855 ID.AddInteger(ELD->getMemOperand()->getFlags()); 856 break; 857 } 858 case ISD::VP_STORE: { 859 const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); 860 ID.AddInteger(EST->getMemoryVT().getRawBits()); 861 ID.AddInteger(EST->getRawSubclassData()); 862 ID.AddInteger(EST->getPointerInfo().getAddrSpace()); 863 ID.AddInteger(EST->getMemOperand()->getFlags()); 864 break; 865 } 866 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { 867 const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N); 868 ID.AddInteger(SLD->getMemoryVT().getRawBits()); 869 ID.AddInteger(SLD->getRawSubclassData()); 870 ID.AddInteger(SLD->getPointerInfo().getAddrSpace()); 871 break; 872 } 873 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { 874 const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); 875 ID.AddInteger(SST->getMemoryVT().getRawBits()); 876 ID.AddInteger(SST->getRawSubclassData()); 877 ID.AddInteger(SST->getPointerInfo().getAddrSpace()); 878 break; 879 } 880 case ISD::VP_GATHER: { 881 const VPGatherSDNode *EG = cast<VPGatherSDNode>(N); 882 ID.AddInteger(EG->getMemoryVT().getRawBits()); 883 ID.AddInteger(EG->getRawSubclassData()); 884 ID.AddInteger(EG->getPointerInfo().getAddrSpace()); 885 ID.AddInteger(EG->getMemOperand()->getFlags()); 886 break; 887 } 888 case ISD::VP_SCATTER: { 889 const VPScatterSDNode *ES = cast<VPScatterSDNode>(N); 890 ID.AddInteger(ES->getMemoryVT().getRawBits()); 891 ID.AddInteger(ES->getRawSubclassData()); 892 ID.AddInteger(ES->getPointerInfo().getAddrSpace()); 893 ID.AddInteger(ES->getMemOperand()->getFlags()); 894 break; 895 } 896 case ISD::MLOAD: { 897 const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); 898 ID.AddInteger(MLD->getMemoryVT().getRawBits()); 899 ID.AddInteger(MLD->getRawSubclassData()); 900 ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); 901 ID.AddInteger(MLD->getMemOperand()->getFlags()); 902 break; 903 } 904 case ISD::MSTORE: { 905 const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); 906 ID.AddInteger(MST->getMemoryVT().getRawBits()); 907 ID.AddInteger(MST->getRawSubclassData()); 908 ID.AddInteger(MST->getPointerInfo().getAddrSpace()); 909 ID.AddInteger(MST->getMemOperand()->getFlags()); 910 break; 911 } 912 case ISD::MGATHER: { 913 const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); 914 ID.AddInteger(MG->getMemoryVT().getRawBits()); 915 ID.AddInteger(MG->getRawSubclassData()); 916 ID.AddInteger(MG->getPointerInfo().getAddrSpace()); 917 ID.AddInteger(MG->getMemOperand()->getFlags()); 918 break; 919 } 920 case ISD::MSCATTER: { 921 const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); 922 ID.AddInteger(MS->getMemoryVT().getRawBits()); 923 ID.AddInteger(MS->getRawSubclassData()); 924 ID.AddInteger(MS->getPointerInfo().getAddrSpace()); 925 ID.AddInteger(MS->getMemOperand()->getFlags()); 926 break; 927 } 928 case ISD::ATOMIC_CMP_SWAP: 929 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 930 case ISD::ATOMIC_SWAP: 931 case ISD::ATOMIC_LOAD_ADD: 932 case ISD::ATOMIC_LOAD_SUB: 933 case ISD::ATOMIC_LOAD_AND: 934 case ISD::ATOMIC_LOAD_CLR: 935 case ISD::ATOMIC_LOAD_OR: 936 case ISD::ATOMIC_LOAD_XOR: 937 case ISD::ATOMIC_LOAD_NAND: 938 case ISD::ATOMIC_LOAD_MIN: 939 case ISD::ATOMIC_LOAD_MAX: 940 case ISD::ATOMIC_LOAD_UMIN: 941 case ISD::ATOMIC_LOAD_UMAX: 942 case ISD::ATOMIC_LOAD: 943 case ISD::ATOMIC_STORE: { 944 const AtomicSDNode *AT = cast<AtomicSDNode>(N); 945 ID.AddInteger(AT->getMemoryVT().getRawBits()); 946 ID.AddInteger(AT->getRawSubclassData()); 947 ID.AddInteger(AT->getPointerInfo().getAddrSpace()); 948 ID.AddInteger(AT->getMemOperand()->getFlags()); 949 break; 950 } 951 case ISD::VECTOR_SHUFFLE: { 952 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); 953 for (int M : Mask) 954 ID.AddInteger(M); 955 break; 956 } 957 case ISD::ADDRSPACECAST: { 958 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 959 ID.AddInteger(ASC->getSrcAddressSpace()); 960 ID.AddInteger(ASC->getDestAddressSpace()); 961 break; 962 } 963 case ISD::TargetBlockAddress: 964 case ISD::BlockAddress: { 965 const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); 966 ID.AddPointer(BA->getBlockAddress()); 967 ID.AddInteger(BA->getOffset()); 968 ID.AddInteger(BA->getTargetFlags()); 969 break; 970 } 971 case ISD::AssertAlign: 972 ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value()); 973 break; 974 case ISD::PREFETCH: 975 case ISD::INTRINSIC_VOID: 976 case ISD::INTRINSIC_W_CHAIN: 977 // Handled by MemIntrinsicSDNode check after the switch. 978 break; 979 } // end switch (N->getOpcode()) 980 981 // MemIntrinsic nodes could also have subclass data, address spaces, and flags 982 // to check. 983 if (auto *MN = dyn_cast<MemIntrinsicSDNode>(N)) { 984 ID.AddInteger(MN->getRawSubclassData()); 985 ID.AddInteger(MN->getPointerInfo().getAddrSpace()); 986 ID.AddInteger(MN->getMemOperand()->getFlags()); 987 ID.AddInteger(MN->getMemoryVT().getRawBits()); 988 } 989 } 990 991 /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID 992 /// data. 993 static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { 994 AddNodeIDOpcode(ID, N->getOpcode()); 995 // Add the return value info. 996 AddNodeIDValueTypes(ID, N->getVTList()); 997 // Add the operand info. 998 AddNodeIDOperands(ID, N->ops()); 999 1000 // Handle SDNode leafs with special info. 1001 AddNodeIDCustom(ID, N); 1002 } 1003 1004 //===----------------------------------------------------------------------===// 1005 // SelectionDAG Class 1006 //===----------------------------------------------------------------------===// 1007 1008 /// doNotCSE - Return true if CSE should not be performed for this node. 1009 static bool doNotCSE(SDNode *N) { 1010 if (N->getValueType(0) == MVT::Glue) 1011 return true; // Never CSE anything that produces a glue result. 1012 1013 switch (N->getOpcode()) { 1014 default: break; 1015 case ISD::HANDLENODE: 1016 case ISD::EH_LABEL: 1017 return true; // Never CSE these nodes. 1018 } 1019 1020 // Check that remaining values produced are not flags. 1021 for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) 1022 if (N->getValueType(i) == MVT::Glue) 1023 return true; // Never CSE anything that produces a glue result. 1024 1025 return false; 1026 } 1027 1028 /// RemoveDeadNodes - This method deletes all unreachable nodes in the 1029 /// SelectionDAG. 1030 void SelectionDAG::RemoveDeadNodes() { 1031 // Create a dummy node (which is not added to allnodes), that adds a reference 1032 // to the root node, preventing it from being deleted. 1033 HandleSDNode Dummy(getRoot()); 1034 1035 SmallVector<SDNode*, 128> DeadNodes; 1036 1037 // Add all obviously-dead nodes to the DeadNodes worklist. 1038 for (SDNode &Node : allnodes()) 1039 if (Node.use_empty()) 1040 DeadNodes.push_back(&Node); 1041 1042 RemoveDeadNodes(DeadNodes); 1043 1044 // If the root changed (e.g. it was a dead load, update the root). 1045 setRoot(Dummy.getValue()); 1046 } 1047 1048 /// RemoveDeadNodes - This method deletes the unreachable nodes in the 1049 /// given list, and any nodes that become unreachable as a result. 1050 void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { 1051 1052 // Process the worklist, deleting the nodes and adding their uses to the 1053 // worklist. 1054 while (!DeadNodes.empty()) { 1055 SDNode *N = DeadNodes.pop_back_val(); 1056 // Skip to next node if we've already managed to delete the node. This could 1057 // happen if replacing a node causes a node previously added to the node to 1058 // be deleted. 1059 if (N->getOpcode() == ISD::DELETED_NODE) 1060 continue; 1061 1062 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1063 DUL->NodeDeleted(N, nullptr); 1064 1065 // Take the node out of the appropriate CSE map. 1066 RemoveNodeFromCSEMaps(N); 1067 1068 // Next, brutally remove the operand list. This is safe to do, as there are 1069 // no cycles in the graph. 1070 for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { 1071 SDUse &Use = *I++; 1072 SDNode *Operand = Use.getNode(); 1073 Use.set(SDValue()); 1074 1075 // Now that we removed this operand, see if there are no uses of it left. 1076 if (Operand->use_empty()) 1077 DeadNodes.push_back(Operand); 1078 } 1079 1080 DeallocateNode(N); 1081 } 1082 } 1083 1084 void SelectionDAG::RemoveDeadNode(SDNode *N){ 1085 SmallVector<SDNode*, 16> DeadNodes(1, N); 1086 1087 // Create a dummy node that adds a reference to the root node, preventing 1088 // it from being deleted. (This matters if the root is an operand of the 1089 // dead node.) 1090 HandleSDNode Dummy(getRoot()); 1091 1092 RemoveDeadNodes(DeadNodes); 1093 } 1094 1095 void SelectionDAG::DeleteNode(SDNode *N) { 1096 // First take this out of the appropriate CSE map. 1097 RemoveNodeFromCSEMaps(N); 1098 1099 // Finally, remove uses due to operands of this node, remove from the 1100 // AllNodes list, and delete the node. 1101 DeleteNodeNotInCSEMaps(N); 1102 } 1103 1104 void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { 1105 assert(N->getIterator() != AllNodes.begin() && 1106 "Cannot delete the entry node!"); 1107 assert(N->use_empty() && "Cannot delete a node that is not dead!"); 1108 1109 // Drop all of the operands and decrement used node's use counts. 1110 N->DropOperands(); 1111 1112 DeallocateNode(N); 1113 } 1114 1115 void SDDbgInfo::add(SDDbgValue *V, bool isParameter) { 1116 assert(!(V->isVariadic() && isParameter)); 1117 if (isParameter) 1118 ByvalParmDbgValues.push_back(V); 1119 else 1120 DbgValues.push_back(V); 1121 for (const SDNode *Node : V->getSDNodes()) 1122 if (Node) 1123 DbgValMap[Node].push_back(V); 1124 } 1125 1126 void SDDbgInfo::erase(const SDNode *Node) { 1127 DbgValMapType::iterator I = DbgValMap.find(Node); 1128 if (I == DbgValMap.end()) 1129 return; 1130 for (auto &Val: I->second) 1131 Val->setIsInvalidated(); 1132 DbgValMap.erase(I); 1133 } 1134 1135 void SelectionDAG::DeallocateNode(SDNode *N) { 1136 // If we have operands, deallocate them. 1137 removeOperands(N); 1138 1139 NodeAllocator.Deallocate(AllNodes.remove(N)); 1140 1141 // Set the opcode to DELETED_NODE to help catch bugs when node 1142 // memory is reallocated. 1143 // FIXME: There are places in SDag that have grown a dependency on the opcode 1144 // value in the released node. 1145 __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); 1146 N->NodeType = ISD::DELETED_NODE; 1147 1148 // If any of the SDDbgValue nodes refer to this SDNode, invalidate 1149 // them and forget about that node. 1150 DbgInfo->erase(N); 1151 1152 // Invalidate extra info. 1153 SDEI.erase(N); 1154 } 1155 1156 #ifndef NDEBUG 1157 /// VerifySDNode - Check the given SDNode. Aborts if it is invalid. 1158 static void VerifySDNode(SDNode *N, const TargetLowering *TLI) { 1159 switch (N->getOpcode()) { 1160 default: 1161 if (N->getOpcode() > ISD::BUILTIN_OP_END) 1162 TLI->verifyTargetSDNode(N); 1163 break; 1164 case ISD::BUILD_PAIR: { 1165 EVT VT = N->getValueType(0); 1166 assert(N->getNumValues() == 1 && "Too many results!"); 1167 assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && 1168 "Wrong return type!"); 1169 assert(N->getNumOperands() == 2 && "Wrong number of operands!"); 1170 assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && 1171 "Mismatched operand types!"); 1172 assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && 1173 "Wrong operand type!"); 1174 assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && 1175 "Wrong return type size"); 1176 break; 1177 } 1178 case ISD::BUILD_VECTOR: { 1179 assert(N->getNumValues() == 1 && "Too many results!"); 1180 assert(N->getValueType(0).isVector() && "Wrong return type!"); 1181 assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && 1182 "Wrong number of operands!"); 1183 EVT EltVT = N->getValueType(0).getVectorElementType(); 1184 for (const SDUse &Op : N->ops()) { 1185 assert((Op.getValueType() == EltVT || 1186 (EltVT.isInteger() && Op.getValueType().isInteger() && 1187 EltVT.bitsLE(Op.getValueType()))) && 1188 "Wrong operand type!"); 1189 assert(Op.getValueType() == N->getOperand(0).getValueType() && 1190 "Operands must all have the same type"); 1191 } 1192 break; 1193 } 1194 } 1195 } 1196 #endif // NDEBUG 1197 1198 /// Insert a newly allocated node into the DAG. 1199 /// 1200 /// Handles insertion into the all nodes list and CSE map, as well as 1201 /// verification and other common operations when a new node is allocated. 1202 void SelectionDAG::InsertNode(SDNode *N) { 1203 AllNodes.push_back(N); 1204 #ifndef NDEBUG 1205 N->PersistentId = NextPersistentId++; 1206 VerifySDNode(N, TLI); 1207 #endif 1208 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1209 DUL->NodeInserted(N); 1210 } 1211 1212 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that 1213 /// correspond to it. This is useful when we're about to delete or repurpose 1214 /// the node. We don't want future request for structurally identical nodes 1215 /// to return N anymore. 1216 bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { 1217 bool Erased = false; 1218 switch (N->getOpcode()) { 1219 case ISD::HANDLENODE: return false; // noop. 1220 case ISD::CONDCODE: 1221 assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && 1222 "Cond code doesn't exist!"); 1223 Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; 1224 CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; 1225 break; 1226 case ISD::ExternalSymbol: 1227 Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); 1228 break; 1229 case ISD::TargetExternalSymbol: { 1230 ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); 1231 Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( 1232 ESN->getSymbol(), ESN->getTargetFlags())); 1233 break; 1234 } 1235 case ISD::MCSymbol: { 1236 auto *MCSN = cast<MCSymbolSDNode>(N); 1237 Erased = MCSymbols.erase(MCSN->getMCSymbol()); 1238 break; 1239 } 1240 case ISD::VALUETYPE: { 1241 EVT VT = cast<VTSDNode>(N)->getVT(); 1242 if (VT.isExtended()) { 1243 Erased = ExtendedValueTypeNodes.erase(VT); 1244 } else { 1245 Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; 1246 ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; 1247 } 1248 break; 1249 } 1250 default: 1251 // Remove it from the CSE Map. 1252 assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); 1253 assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); 1254 Erased = CSEMap.RemoveNode(N); 1255 break; 1256 } 1257 #ifndef NDEBUG 1258 // Verify that the node was actually in one of the CSE maps, unless it has a 1259 // glue result (which cannot be CSE'd) or is one of the special cases that are 1260 // not subject to CSE. 1261 if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && 1262 !N->isMachineOpcode() && !doNotCSE(N)) { 1263 N->dump(this); 1264 dbgs() << "\n"; 1265 llvm_unreachable("Node is not in map!"); 1266 } 1267 #endif 1268 return Erased; 1269 } 1270 1271 /// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE 1272 /// maps and modified in place. Add it back to the CSE maps, unless an identical 1273 /// node already exists, in which case transfer all its users to the existing 1274 /// node. This transfer can potentially trigger recursive merging. 1275 void 1276 SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { 1277 // For node types that aren't CSE'd, just act as if no identical node 1278 // already exists. 1279 if (!doNotCSE(N)) { 1280 SDNode *Existing = CSEMap.GetOrInsertNode(N); 1281 if (Existing != N) { 1282 // If there was already an existing matching node, use ReplaceAllUsesWith 1283 // to replace the dead one with the existing one. This can cause 1284 // recursive merging of other unrelated nodes down the line. 1285 Existing->intersectFlagsWith(N->getFlags()); 1286 ReplaceAllUsesWith(N, Existing); 1287 1288 // N is now dead. Inform the listeners and delete it. 1289 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1290 DUL->NodeDeleted(N, Existing); 1291 DeleteNodeNotInCSEMaps(N); 1292 return; 1293 } 1294 } 1295 1296 // If the node doesn't already exist, we updated it. Inform listeners. 1297 for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) 1298 DUL->NodeUpdated(N); 1299 } 1300 1301 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1302 /// were replaced with those specified. If this node is never memoized, 1303 /// return null, otherwise return a pointer to the slot it would take. If a 1304 /// node already exists with these operands, the slot will be non-null. 1305 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, 1306 void *&InsertPos) { 1307 if (doNotCSE(N)) 1308 return nullptr; 1309 1310 SDValue Ops[] = { Op }; 1311 FoldingSetNodeID ID; 1312 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1313 AddNodeIDCustom(ID, N); 1314 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1315 if (Node) 1316 Node->intersectFlagsWith(N->getFlags()); 1317 return Node; 1318 } 1319 1320 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1321 /// were replaced with those specified. If this node is never memoized, 1322 /// return null, otherwise return a pointer to the slot it would take. If a 1323 /// node already exists with these operands, the slot will be non-null. 1324 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, 1325 SDValue Op1, SDValue Op2, 1326 void *&InsertPos) { 1327 if (doNotCSE(N)) 1328 return nullptr; 1329 1330 SDValue Ops[] = { Op1, Op2 }; 1331 FoldingSetNodeID ID; 1332 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1333 AddNodeIDCustom(ID, N); 1334 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1335 if (Node) 1336 Node->intersectFlagsWith(N->getFlags()); 1337 return Node; 1338 } 1339 1340 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands 1341 /// were replaced with those specified. If this node is never memoized, 1342 /// return null, otherwise return a pointer to the slot it would take. If a 1343 /// node already exists with these operands, the slot will be non-null. 1344 SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, 1345 void *&InsertPos) { 1346 if (doNotCSE(N)) 1347 return nullptr; 1348 1349 FoldingSetNodeID ID; 1350 AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); 1351 AddNodeIDCustom(ID, N); 1352 SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); 1353 if (Node) 1354 Node->intersectFlagsWith(N->getFlags()); 1355 return Node; 1356 } 1357 1358 Align SelectionDAG::getEVTAlign(EVT VT) const { 1359 Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0) 1360 : VT.getTypeForEVT(*getContext()); 1361 1362 return getDataLayout().getABITypeAlign(Ty); 1363 } 1364 1365 // EntryNode could meaningfully have debug info if we can find it... 1366 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL) 1367 : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), 1368 getVTList(MVT::Other, MVT::Glue)), 1369 Root(getEntryNode()) { 1370 InsertNode(&EntryNode); 1371 DbgInfo = new SDDbgInfo(); 1372 } 1373 1374 void SelectionDAG::init(MachineFunction &NewMF, 1375 OptimizationRemarkEmitter &NewORE, Pass *PassPtr, 1376 const TargetLibraryInfo *LibraryInfo, 1377 UniformityInfo *NewUA, ProfileSummaryInfo *PSIin, 1378 BlockFrequencyInfo *BFIin, MachineModuleInfo &MMIin, 1379 FunctionVarLocs const *VarLocs) { 1380 MF = &NewMF; 1381 SDAGISelPass = PassPtr; 1382 ORE = &NewORE; 1383 TLI = getSubtarget().getTargetLowering(); 1384 TSI = getSubtarget().getSelectionDAGInfo(); 1385 LibInfo = LibraryInfo; 1386 Context = &MF->getFunction().getContext(); 1387 UA = NewUA; 1388 PSI = PSIin; 1389 BFI = BFIin; 1390 MMI = &MMIin; 1391 FnVarLocs = VarLocs; 1392 } 1393 1394 SelectionDAG::~SelectionDAG() { 1395 assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); 1396 allnodes_clear(); 1397 OperandRecycler.clear(OperandAllocator); 1398 delete DbgInfo; 1399 } 1400 1401 bool SelectionDAG::shouldOptForSize() const { 1402 return llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); 1403 } 1404 1405 void SelectionDAG::allnodes_clear() { 1406 assert(&*AllNodes.begin() == &EntryNode); 1407 AllNodes.remove(AllNodes.begin()); 1408 while (!AllNodes.empty()) 1409 DeallocateNode(&AllNodes.front()); 1410 #ifndef NDEBUG 1411 NextPersistentId = 0; 1412 #endif 1413 } 1414 1415 SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 1416 void *&InsertPos) { 1417 SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); 1418 if (N) { 1419 switch (N->getOpcode()) { 1420 default: break; 1421 case ISD::Constant: 1422 case ISD::ConstantFP: 1423 llvm_unreachable("Querying for Constant and ConstantFP nodes requires " 1424 "debug location. Use another overload."); 1425 } 1426 } 1427 return N; 1428 } 1429 1430 SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, 1431 const SDLoc &DL, void *&InsertPos) { 1432 SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); 1433 if (N) { 1434 switch (N->getOpcode()) { 1435 case ISD::Constant: 1436 case ISD::ConstantFP: 1437 // Erase debug location from the node if the node is used at several 1438 // different places. Do not propagate one location to all uses as it 1439 // will cause a worse single stepping debugging experience. 1440 if (N->getDebugLoc() != DL.getDebugLoc()) 1441 N->setDebugLoc(DebugLoc()); 1442 break; 1443 default: 1444 // When the node's point of use is located earlier in the instruction 1445 // sequence than its prior point of use, update its debug info to the 1446 // earlier location. 1447 if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) 1448 N->setDebugLoc(DL.getDebugLoc()); 1449 break; 1450 } 1451 } 1452 return N; 1453 } 1454 1455 void SelectionDAG::clear() { 1456 allnodes_clear(); 1457 OperandRecycler.clear(OperandAllocator); 1458 OperandAllocator.Reset(); 1459 CSEMap.clear(); 1460 1461 ExtendedValueTypeNodes.clear(); 1462 ExternalSymbols.clear(); 1463 TargetExternalSymbols.clear(); 1464 MCSymbols.clear(); 1465 SDEI.clear(); 1466 std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), nullptr); 1467 std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), nullptr); 1468 1469 EntryNode.UseList = nullptr; 1470 InsertNode(&EntryNode); 1471 Root = getEntryNode(); 1472 DbgInfo->clear(); 1473 } 1474 1475 SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { 1476 return VT.bitsGT(Op.getValueType()) 1477 ? getNode(ISD::FP_EXTEND, DL, VT, Op) 1478 : getNode(ISD::FP_ROUND, DL, VT, Op, 1479 getIntPtrConstant(0, DL, /*isTarget=*/true)); 1480 } 1481 1482 std::pair<SDValue, SDValue> 1483 SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain, 1484 const SDLoc &DL, EVT VT) { 1485 assert(!VT.bitsEq(Op.getValueType()) && 1486 "Strict no-op FP extend/round not allowed."); 1487 SDValue Res = 1488 VT.bitsGT(Op.getValueType()) 1489 ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}) 1490 : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other}, 1491 {Chain, Op, getIntPtrConstant(0, DL, /*isTarget=*/true)}); 1492 1493 return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1)); 1494 } 1495 1496 SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1497 return VT.bitsGT(Op.getValueType()) ? 1498 getNode(ISD::ANY_EXTEND, DL, VT, Op) : 1499 getNode(ISD::TRUNCATE, DL, VT, Op); 1500 } 1501 1502 SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1503 return VT.bitsGT(Op.getValueType()) ? 1504 getNode(ISD::SIGN_EXTEND, DL, VT, Op) : 1505 getNode(ISD::TRUNCATE, DL, VT, Op); 1506 } 1507 1508 SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1509 return VT.bitsGT(Op.getValueType()) ? 1510 getNode(ISD::ZERO_EXTEND, DL, VT, Op) : 1511 getNode(ISD::TRUNCATE, DL, VT, Op); 1512 } 1513 1514 SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, 1515 EVT VT) { 1516 assert(!VT.isVector()); 1517 auto Type = Op.getValueType(); 1518 SDValue DestOp; 1519 if (Type == VT) 1520 return Op; 1521 auto Size = Op.getValueSizeInBits(); 1522 DestOp = getBitcast(EVT::getIntegerVT(*Context, Size), Op); 1523 if (DestOp.getValueType() == VT) 1524 return DestOp; 1525 1526 return getAnyExtOrTrunc(DestOp, DL, VT); 1527 } 1528 1529 SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL, 1530 EVT VT) { 1531 assert(!VT.isVector()); 1532 auto Type = Op.getValueType(); 1533 SDValue DestOp; 1534 if (Type == VT) 1535 return Op; 1536 auto Size = Op.getValueSizeInBits(); 1537 DestOp = getBitcast(MVT::getIntegerVT(Size), Op); 1538 if (DestOp.getValueType() == VT) 1539 return DestOp; 1540 1541 return getSExtOrTrunc(DestOp, DL, VT); 1542 } 1543 1544 SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL, 1545 EVT VT) { 1546 assert(!VT.isVector()); 1547 auto Type = Op.getValueType(); 1548 SDValue DestOp; 1549 if (Type == VT) 1550 return Op; 1551 auto Size = Op.getValueSizeInBits(); 1552 DestOp = getBitcast(MVT::getIntegerVT(Size), Op); 1553 if (DestOp.getValueType() == VT) 1554 return DestOp; 1555 1556 return getZExtOrTrunc(DestOp, DL, VT); 1557 } 1558 1559 SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, 1560 EVT OpVT) { 1561 if (VT.bitsLE(Op.getValueType())) 1562 return getNode(ISD::TRUNCATE, SL, VT, Op); 1563 1564 TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); 1565 return getNode(TLI->getExtendForContent(BType), SL, VT, Op); 1566 } 1567 1568 SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { 1569 EVT OpVT = Op.getValueType(); 1570 assert(VT.isInteger() && OpVT.isInteger() && 1571 "Cannot getZeroExtendInReg FP types"); 1572 assert(VT.isVector() == OpVT.isVector() && 1573 "getZeroExtendInReg type should be vector iff the operand " 1574 "type is vector!"); 1575 assert((!VT.isVector() || 1576 VT.getVectorElementCount() == OpVT.getVectorElementCount()) && 1577 "Vector element counts must match in getZeroExtendInReg"); 1578 assert(VT.bitsLE(OpVT) && "Not extending!"); 1579 if (OpVT == VT) 1580 return Op; 1581 APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), 1582 VT.getScalarSizeInBits()); 1583 return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); 1584 } 1585 1586 SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask, 1587 SDValue EVL, const SDLoc &DL, 1588 EVT VT) { 1589 EVT OpVT = Op.getValueType(); 1590 assert(VT.isInteger() && OpVT.isInteger() && 1591 "Cannot getVPZeroExtendInReg FP types"); 1592 assert(VT.isVector() && OpVT.isVector() && 1593 "getVPZeroExtendInReg type and operand type should be vector!"); 1594 assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() && 1595 "Vector element counts must match in getZeroExtendInReg"); 1596 assert(VT.bitsLE(OpVT) && "Not extending!"); 1597 if (OpVT == VT) 1598 return Op; 1599 APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), 1600 VT.getScalarSizeInBits()); 1601 return getNode(ISD::VP_AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT), Mask, 1602 EVL); 1603 } 1604 1605 SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { 1606 // Only unsigned pointer semantics are supported right now. In the future this 1607 // might delegate to TLI to check pointer signedness. 1608 return getZExtOrTrunc(Op, DL, VT); 1609 } 1610 1611 SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { 1612 // Only unsigned pointer semantics are supported right now. In the future this 1613 // might delegate to TLI to check pointer signedness. 1614 return getZeroExtendInReg(Op, DL, VT); 1615 } 1616 1617 SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) { 1618 return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val); 1619 } 1620 1621 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). 1622 SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { 1623 return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); 1624 } 1625 1626 SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { 1627 SDValue TrueValue = getBoolConstant(true, DL, VT, VT); 1628 return getNode(ISD::XOR, DL, VT, Val, TrueValue); 1629 } 1630 1631 SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, 1632 SDValue Mask, SDValue EVL, EVT VT) { 1633 SDValue TrueValue = getBoolConstant(true, DL, VT, VT); 1634 return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); 1635 } 1636 1637 SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, 1638 SDValue Mask, SDValue EVL) { 1639 return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL); 1640 } 1641 1642 SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, 1643 SDValue Mask, SDValue EVL) { 1644 if (VT.bitsGT(Op.getValueType())) 1645 return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL); 1646 if (VT.bitsLT(Op.getValueType())) 1647 return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL); 1648 return Op; 1649 } 1650 1651 SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, 1652 EVT OpVT) { 1653 if (!V) 1654 return getConstant(0, DL, VT); 1655 1656 switch (TLI->getBooleanContents(OpVT)) { 1657 case TargetLowering::ZeroOrOneBooleanContent: 1658 case TargetLowering::UndefinedBooleanContent: 1659 return getConstant(1, DL, VT); 1660 case TargetLowering::ZeroOrNegativeOneBooleanContent: 1661 return getAllOnesConstant(DL, VT); 1662 } 1663 llvm_unreachable("Unexpected boolean content enum!"); 1664 } 1665 1666 SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, 1667 bool isT, bool isO) { 1668 return getConstant(APInt(VT.getScalarSizeInBits(), Val, /*isSigned=*/false), 1669 DL, VT, isT, isO); 1670 } 1671 1672 SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, 1673 bool isT, bool isO) { 1674 return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); 1675 } 1676 1677 SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, 1678 EVT VT, bool isT, bool isO) { 1679 assert(VT.isInteger() && "Cannot create FP integer constant!"); 1680 1681 EVT EltVT = VT.getScalarType(); 1682 const ConstantInt *Elt = &Val; 1683 1684 // Vector splats are explicit within the DAG, with ConstantSDNode holding the 1685 // to-be-splatted scalar ConstantInt. 1686 if (isa<VectorType>(Elt->getType())) 1687 Elt = ConstantInt::get(*getContext(), Elt->getValue()); 1688 1689 // In some cases the vector type is legal but the element type is illegal and 1690 // needs to be promoted, for example v8i8 on ARM. In this case, promote the 1691 // inserted value (the type does not need to match the vector element type). 1692 // Any extra bits introduced will be truncated away. 1693 if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == 1694 TargetLowering::TypePromoteInteger) { 1695 EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); 1696 APInt NewVal; 1697 if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) 1698 NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); 1699 else 1700 NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); 1701 Elt = ConstantInt::get(*getContext(), NewVal); 1702 } 1703 // In other cases the element type is illegal and needs to be expanded, for 1704 // example v2i64 on MIPS32. In this case, find the nearest legal type, split 1705 // the value into n parts and use a vector type with n-times the elements. 1706 // Then bitcast to the type requested. 1707 // Legalizing constants too early makes the DAGCombiner's job harder so we 1708 // only legalize if the DAG tells us we must produce legal types. 1709 else if (NewNodesMustHaveLegalTypes && VT.isVector() && 1710 TLI->getTypeAction(*getContext(), EltVT) == 1711 TargetLowering::TypeExpandInteger) { 1712 const APInt &NewVal = Elt->getValue(); 1713 EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); 1714 unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); 1715 1716 // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. 1717 if (VT.isScalableVector() || 1718 TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { 1719 assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && 1720 "Can only handle an even split!"); 1721 unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; 1722 1723 SmallVector<SDValue, 2> ScalarParts; 1724 for (unsigned i = 0; i != Parts; ++i) 1725 ScalarParts.push_back(getConstant( 1726 NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, 1727 ViaEltVT, isT, isO)); 1728 1729 return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts); 1730 } 1731 1732 unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; 1733 EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); 1734 1735 // Check the temporary vector is the correct size. If this fails then 1736 // getTypeToTransformTo() probably returned a type whose size (in bits) 1737 // isn't a power-of-2 factor of the requested type size. 1738 assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); 1739 1740 SmallVector<SDValue, 2> EltParts; 1741 for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) 1742 EltParts.push_back(getConstant( 1743 NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL, 1744 ViaEltVT, isT, isO)); 1745 1746 // EltParts is currently in little endian order. If we actually want 1747 // big-endian order then reverse it now. 1748 if (getDataLayout().isBigEndian()) 1749 std::reverse(EltParts.begin(), EltParts.end()); 1750 1751 // The elements must be reversed when the element order is different 1752 // to the endianness of the elements (because the BITCAST is itself a 1753 // vector shuffle in this situation). However, we do not need any code to 1754 // perform this reversal because getConstant() is producing a vector 1755 // splat. 1756 // This situation occurs in MIPS MSA. 1757 1758 SmallVector<SDValue, 8> Ops; 1759 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) 1760 llvm::append_range(Ops, EltParts); 1761 1762 SDValue V = 1763 getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); 1764 return V; 1765 } 1766 1767 assert(Elt->getBitWidth() == EltVT.getSizeInBits() && 1768 "APInt size does not match type size!"); 1769 unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; 1770 SDVTList VTs = getVTList(EltVT); 1771 FoldingSetNodeID ID; 1772 AddNodeIDNode(ID, Opc, VTs, {}); 1773 ID.AddPointer(Elt); 1774 ID.AddBoolean(isO); 1775 void *IP = nullptr; 1776 SDNode *N = nullptr; 1777 if ((N = FindNodeOrInsertPos(ID, DL, IP))) 1778 if (!VT.isVector()) 1779 return SDValue(N, 0); 1780 1781 if (!N) { 1782 N = newSDNode<ConstantSDNode>(isT, isO, Elt, VTs); 1783 CSEMap.InsertNode(N, IP); 1784 InsertNode(N); 1785 NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); 1786 } 1787 1788 SDValue Result(N, 0); 1789 if (VT.isVector()) 1790 Result = getSplat(VT, DL, Result); 1791 return Result; 1792 } 1793 1794 SDValue SelectionDAG::getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, 1795 bool isT, bool isO) { 1796 unsigned Size = VT.getScalarSizeInBits(); 1797 return getConstant(APInt(Size, Val, /*isSigned=*/true), DL, VT, isT, isO); 1798 } 1799 1800 SDValue SelectionDAG::getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget, 1801 bool IsOpaque) { 1802 return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT, 1803 IsTarget, IsOpaque); 1804 } 1805 1806 SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, 1807 bool isTarget) { 1808 return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); 1809 } 1810 1811 SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, 1812 const SDLoc &DL) { 1813 assert(VT.isInteger() && "Shift amount is not an integer type!"); 1814 EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout()); 1815 return getConstant(Val, DL, ShiftVT); 1816 } 1817 1818 SDValue SelectionDAG::getShiftAmountConstant(const APInt &Val, EVT VT, 1819 const SDLoc &DL) { 1820 assert(Val.ult(VT.getScalarSizeInBits()) && "Out of range shift"); 1821 return getShiftAmountConstant(Val.getZExtValue(), VT, DL); 1822 } 1823 1824 SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, 1825 bool isTarget) { 1826 return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); 1827 } 1828 1829 SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, 1830 bool isTarget) { 1831 return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); 1832 } 1833 1834 SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, 1835 EVT VT, bool isTarget) { 1836 assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); 1837 1838 EVT EltVT = VT.getScalarType(); 1839 const ConstantFP *Elt = &V; 1840 1841 // Vector splats are explicit within the DAG, with ConstantFPSDNode holding 1842 // the to-be-splatted scalar ConstantFP. 1843 if (isa<VectorType>(Elt->getType())) 1844 Elt = ConstantFP::get(*getContext(), Elt->getValue()); 1845 1846 // Do the map lookup using the actual bit pattern for the floating point 1847 // value, so that we don't have problems with 0.0 comparing equal to -0.0, and 1848 // we don't have issues with SNANs. 1849 unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; 1850 SDVTList VTs = getVTList(EltVT); 1851 FoldingSetNodeID ID; 1852 AddNodeIDNode(ID, Opc, VTs, {}); 1853 ID.AddPointer(Elt); 1854 void *IP = nullptr; 1855 SDNode *N = nullptr; 1856 if ((N = FindNodeOrInsertPos(ID, DL, IP))) 1857 if (!VT.isVector()) 1858 return SDValue(N, 0); 1859 1860 if (!N) { 1861 N = newSDNode<ConstantFPSDNode>(isTarget, Elt, VTs); 1862 CSEMap.InsertNode(N, IP); 1863 InsertNode(N); 1864 } 1865 1866 SDValue Result(N, 0); 1867 if (VT.isVector()) 1868 Result = getSplat(VT, DL, Result); 1869 NewSDValueDbgMsg(Result, "Creating fp constant: ", this); 1870 return Result; 1871 } 1872 1873 SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, 1874 bool isTarget) { 1875 EVT EltVT = VT.getScalarType(); 1876 if (EltVT == MVT::f32) 1877 return getConstantFP(APFloat((float)Val), DL, VT, isTarget); 1878 if (EltVT == MVT::f64) 1879 return getConstantFP(APFloat(Val), DL, VT, isTarget); 1880 if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || 1881 EltVT == MVT::f16 || EltVT == MVT::bf16) { 1882 bool Ignored; 1883 APFloat APF = APFloat(Val); 1884 APF.convert(EltVT.getFltSemantics(), APFloat::rmNearestTiesToEven, 1885 &Ignored); 1886 return getConstantFP(APF, DL, VT, isTarget); 1887 } 1888 llvm_unreachable("Unsupported type in getConstantFP"); 1889 } 1890 1891 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, 1892 EVT VT, int64_t Offset, bool isTargetGA, 1893 unsigned TargetFlags) { 1894 assert((TargetFlags == 0 || isTargetGA) && 1895 "Cannot set target flags on target-independent globals"); 1896 1897 // Truncate (with sign-extension) the offset value to the pointer size. 1898 unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); 1899 if (BitWidth < 64) 1900 Offset = SignExtend64(Offset, BitWidth); 1901 1902 unsigned Opc; 1903 if (GV->isThreadLocal()) 1904 Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; 1905 else 1906 Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; 1907 1908 SDVTList VTs = getVTList(VT); 1909 FoldingSetNodeID ID; 1910 AddNodeIDNode(ID, Opc, VTs, {}); 1911 ID.AddPointer(GV); 1912 ID.AddInteger(Offset); 1913 ID.AddInteger(TargetFlags); 1914 void *IP = nullptr; 1915 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 1916 return SDValue(E, 0); 1917 1918 auto *N = newSDNode<GlobalAddressSDNode>( 1919 Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VTs, Offset, TargetFlags); 1920 CSEMap.InsertNode(N, IP); 1921 InsertNode(N); 1922 return SDValue(N, 0); 1923 } 1924 1925 SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { 1926 unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; 1927 SDVTList VTs = getVTList(VT); 1928 FoldingSetNodeID ID; 1929 AddNodeIDNode(ID, Opc, VTs, {}); 1930 ID.AddInteger(FI); 1931 void *IP = nullptr; 1932 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1933 return SDValue(E, 0); 1934 1935 auto *N = newSDNode<FrameIndexSDNode>(FI, VTs, isTarget); 1936 CSEMap.InsertNode(N, IP); 1937 InsertNode(N); 1938 return SDValue(N, 0); 1939 } 1940 1941 SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, 1942 unsigned TargetFlags) { 1943 assert((TargetFlags == 0 || isTarget) && 1944 "Cannot set target flags on target-independent jump tables"); 1945 unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; 1946 SDVTList VTs = getVTList(VT); 1947 FoldingSetNodeID ID; 1948 AddNodeIDNode(ID, Opc, VTs, {}); 1949 ID.AddInteger(JTI); 1950 ID.AddInteger(TargetFlags); 1951 void *IP = nullptr; 1952 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1953 return SDValue(E, 0); 1954 1955 auto *N = newSDNode<JumpTableSDNode>(JTI, VTs, isTarget, TargetFlags); 1956 CSEMap.InsertNode(N, IP); 1957 InsertNode(N); 1958 return SDValue(N, 0); 1959 } 1960 1961 SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, 1962 const SDLoc &DL) { 1963 EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); 1964 return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, 1965 getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true)); 1966 } 1967 1968 SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, 1969 MaybeAlign Alignment, int Offset, 1970 bool isTarget, unsigned TargetFlags) { 1971 assert((TargetFlags == 0 || isTarget) && 1972 "Cannot set target flags on target-independent globals"); 1973 if (!Alignment) 1974 Alignment = shouldOptForSize() 1975 ? getDataLayout().getABITypeAlign(C->getType()) 1976 : getDataLayout().getPrefTypeAlign(C->getType()); 1977 unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; 1978 SDVTList VTs = getVTList(VT); 1979 FoldingSetNodeID ID; 1980 AddNodeIDNode(ID, Opc, VTs, {}); 1981 ID.AddInteger(Alignment->value()); 1982 ID.AddInteger(Offset); 1983 ID.AddPointer(C); 1984 ID.AddInteger(TargetFlags); 1985 void *IP = nullptr; 1986 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 1987 return SDValue(E, 0); 1988 1989 auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, 1990 TargetFlags); 1991 CSEMap.InsertNode(N, IP); 1992 InsertNode(N); 1993 SDValue V = SDValue(N, 0); 1994 NewSDValueDbgMsg(V, "Creating new constant pool: ", this); 1995 return V; 1996 } 1997 1998 SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, 1999 MaybeAlign Alignment, int Offset, 2000 bool isTarget, unsigned TargetFlags) { 2001 assert((TargetFlags == 0 || isTarget) && 2002 "Cannot set target flags on target-independent globals"); 2003 if (!Alignment) 2004 Alignment = getDataLayout().getPrefTypeAlign(C->getType()); 2005 unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; 2006 SDVTList VTs = getVTList(VT); 2007 FoldingSetNodeID ID; 2008 AddNodeIDNode(ID, Opc, VTs, {}); 2009 ID.AddInteger(Alignment->value()); 2010 ID.AddInteger(Offset); 2011 C->addSelectionDAGCSEId(ID); 2012 ID.AddInteger(TargetFlags); 2013 void *IP = nullptr; 2014 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2015 return SDValue(E, 0); 2016 2017 auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VTs, Offset, *Alignment, 2018 TargetFlags); 2019 CSEMap.InsertNode(N, IP); 2020 InsertNode(N); 2021 return SDValue(N, 0); 2022 } 2023 2024 SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { 2025 FoldingSetNodeID ID; 2026 AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), {}); 2027 ID.AddPointer(MBB); 2028 void *IP = nullptr; 2029 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2030 return SDValue(E, 0); 2031 2032 auto *N = newSDNode<BasicBlockSDNode>(MBB); 2033 CSEMap.InsertNode(N, IP); 2034 InsertNode(N); 2035 return SDValue(N, 0); 2036 } 2037 2038 SDValue SelectionDAG::getValueType(EVT VT) { 2039 if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= 2040 ValueTypeNodes.size()) 2041 ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); 2042 2043 SDNode *&N = VT.isExtended() ? 2044 ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; 2045 2046 if (N) return SDValue(N, 0); 2047 N = newSDNode<VTSDNode>(VT); 2048 InsertNode(N); 2049 return SDValue(N, 0); 2050 } 2051 2052 SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { 2053 SDNode *&N = ExternalSymbols[Sym]; 2054 if (N) return SDValue(N, 0); 2055 N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, getVTList(VT)); 2056 InsertNode(N); 2057 return SDValue(N, 0); 2058 } 2059 2060 SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { 2061 SDNode *&N = MCSymbols[Sym]; 2062 if (N) 2063 return SDValue(N, 0); 2064 N = newSDNode<MCSymbolSDNode>(Sym, getVTList(VT)); 2065 InsertNode(N); 2066 return SDValue(N, 0); 2067 } 2068 2069 SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, 2070 unsigned TargetFlags) { 2071 SDNode *&N = 2072 TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; 2073 if (N) return SDValue(N, 0); 2074 N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, getVTList(VT)); 2075 InsertNode(N); 2076 return SDValue(N, 0); 2077 } 2078 2079 SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { 2080 if ((unsigned)Cond >= CondCodeNodes.size()) 2081 CondCodeNodes.resize(Cond+1); 2082 2083 if (!CondCodeNodes[Cond]) { 2084 auto *N = newSDNode<CondCodeSDNode>(Cond); 2085 CondCodeNodes[Cond] = N; 2086 InsertNode(N); 2087 } 2088 2089 return SDValue(CondCodeNodes[Cond], 0); 2090 } 2091 2092 SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, 2093 bool ConstantFold) { 2094 assert(MulImm.getBitWidth() == VT.getSizeInBits() && 2095 "APInt size does not match type size!"); 2096 2097 if (MulImm == 0) 2098 return getConstant(0, DL, VT); 2099 2100 if (ConstantFold) { 2101 const MachineFunction &MF = getMachineFunction(); 2102 const Function &F = MF.getFunction(); 2103 ConstantRange CR = getVScaleRange(&F, 64); 2104 if (const APInt *C = CR.getSingleElement()) 2105 return getConstant(MulImm * C->getZExtValue(), DL, VT); 2106 } 2107 2108 return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); 2109 } 2110 2111 SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, 2112 bool ConstantFold) { 2113 if (EC.isScalable()) 2114 return getVScale(DL, VT, 2115 APInt(VT.getSizeInBits(), EC.getKnownMinValue())); 2116 2117 return getConstant(EC.getKnownMinValue(), DL, VT); 2118 } 2119 2120 SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) { 2121 APInt One(ResVT.getScalarSizeInBits(), 1); 2122 return getStepVector(DL, ResVT, One); 2123 } 2124 2125 SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, 2126 const APInt &StepVal) { 2127 assert(ResVT.getScalarSizeInBits() == StepVal.getBitWidth()); 2128 if (ResVT.isScalableVector()) 2129 return getNode( 2130 ISD::STEP_VECTOR, DL, ResVT, 2131 getTargetConstant(StepVal, DL, ResVT.getVectorElementType())); 2132 2133 SmallVector<SDValue, 16> OpsStepConstants; 2134 for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++) 2135 OpsStepConstants.push_back( 2136 getConstant(StepVal * i, DL, ResVT.getVectorElementType())); 2137 return getBuildVector(ResVT, DL, OpsStepConstants); 2138 } 2139 2140 /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that 2141 /// point at N1 to point at N2 and indices that point at N2 to point at N1. 2142 static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { 2143 std::swap(N1, N2); 2144 ShuffleVectorSDNode::commuteMask(M); 2145 } 2146 2147 SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, 2148 SDValue N2, ArrayRef<int> Mask) { 2149 assert(VT.getVectorNumElements() == Mask.size() && 2150 "Must have the same number of vector elements as mask elements!"); 2151 assert(VT == N1.getValueType() && VT == N2.getValueType() && 2152 "Invalid VECTOR_SHUFFLE"); 2153 2154 // Canonicalize shuffle undef, undef -> undef 2155 if (N1.isUndef() && N2.isUndef()) 2156 return getUNDEF(VT); 2157 2158 // Validate that all indices in Mask are within the range of the elements 2159 // input to the shuffle. 2160 int NElts = Mask.size(); 2161 assert(llvm::all_of(Mask, 2162 [&](int M) { return M < (NElts * 2) && M >= -1; }) && 2163 "Index out of range"); 2164 2165 // Copy the mask so we can do any needed cleanup. 2166 SmallVector<int, 8> MaskVec(Mask); 2167 2168 // Canonicalize shuffle v, v -> v, undef 2169 if (N1 == N2) { 2170 N2 = getUNDEF(VT); 2171 for (int i = 0; i != NElts; ++i) 2172 if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; 2173 } 2174 2175 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 2176 if (N1.isUndef()) 2177 commuteShuffle(N1, N2, MaskVec); 2178 2179 if (TLI->hasVectorBlend()) { 2180 // If shuffling a splat, try to blend the splat instead. We do this here so 2181 // that even when this arises during lowering we don't have to re-handle it. 2182 auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { 2183 BitVector UndefElements; 2184 SDValue Splat = BV->getSplatValue(&UndefElements); 2185 if (!Splat) 2186 return; 2187 2188 for (int i = 0; i < NElts; ++i) { 2189 if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) 2190 continue; 2191 2192 // If this input comes from undef, mark it as such. 2193 if (UndefElements[MaskVec[i] - Offset]) { 2194 MaskVec[i] = -1; 2195 continue; 2196 } 2197 2198 // If we can blend a non-undef lane, use that instead. 2199 if (!UndefElements[i]) 2200 MaskVec[i] = i + Offset; 2201 } 2202 }; 2203 if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) 2204 BlendSplat(N1BV, 0); 2205 if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) 2206 BlendSplat(N2BV, NElts); 2207 } 2208 2209 // Canonicalize all index into lhs, -> shuffle lhs, undef 2210 // Canonicalize all index into rhs, -> shuffle rhs, undef 2211 bool AllLHS = true, AllRHS = true; 2212 bool N2Undef = N2.isUndef(); 2213 for (int i = 0; i != NElts; ++i) { 2214 if (MaskVec[i] >= NElts) { 2215 if (N2Undef) 2216 MaskVec[i] = -1; 2217 else 2218 AllLHS = false; 2219 } else if (MaskVec[i] >= 0) { 2220 AllRHS = false; 2221 } 2222 } 2223 if (AllLHS && AllRHS) 2224 return getUNDEF(VT); 2225 if (AllLHS && !N2Undef) 2226 N2 = getUNDEF(VT); 2227 if (AllRHS) { 2228 N1 = getUNDEF(VT); 2229 commuteShuffle(N1, N2, MaskVec); 2230 } 2231 // Reset our undef status after accounting for the mask. 2232 N2Undef = N2.isUndef(); 2233 // Re-check whether both sides ended up undef. 2234 if (N1.isUndef() && N2Undef) 2235 return getUNDEF(VT); 2236 2237 // If Identity shuffle return that node. 2238 bool Identity = true, AllSame = true; 2239 for (int i = 0; i != NElts; ++i) { 2240 if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; 2241 if (MaskVec[i] != MaskVec[0]) AllSame = false; 2242 } 2243 if (Identity && NElts) 2244 return N1; 2245 2246 // Shuffling a constant splat doesn't change the result. 2247 if (N2Undef) { 2248 SDValue V = N1; 2249 2250 // Look through any bitcasts. We check that these don't change the number 2251 // (and size) of elements and just changes their types. 2252 while (V.getOpcode() == ISD::BITCAST) 2253 V = V->getOperand(0); 2254 2255 // A splat should always show up as a build vector node. 2256 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { 2257 BitVector UndefElements; 2258 SDValue Splat = BV->getSplatValue(&UndefElements); 2259 // If this is a splat of an undef, shuffling it is also undef. 2260 if (Splat && Splat.isUndef()) 2261 return getUNDEF(VT); 2262 2263 bool SameNumElts = 2264 V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); 2265 2266 // We only have a splat which can skip shuffles if there is a splatted 2267 // value and no undef lanes rearranged by the shuffle. 2268 if (Splat && UndefElements.none()) { 2269 // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the 2270 // number of elements match or the value splatted is a zero constant. 2271 if (SameNumElts || isNullConstant(Splat)) 2272 return N1; 2273 } 2274 2275 // If the shuffle itself creates a splat, build the vector directly. 2276 if (AllSame && SameNumElts) { 2277 EVT BuildVT = BV->getValueType(0); 2278 const SDValue &Splatted = BV->getOperand(MaskVec[0]); 2279 SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); 2280 2281 // We may have jumped through bitcasts, so the type of the 2282 // BUILD_VECTOR may not match the type of the shuffle. 2283 if (BuildVT != VT) 2284 NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); 2285 return NewBV; 2286 } 2287 } 2288 } 2289 2290 SDVTList VTs = getVTList(VT); 2291 FoldingSetNodeID ID; 2292 SDValue Ops[2] = { N1, N2 }; 2293 AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, VTs, Ops); 2294 for (int i = 0; i != NElts; ++i) 2295 ID.AddInteger(MaskVec[i]); 2296 2297 void* IP = nullptr; 2298 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 2299 return SDValue(E, 0); 2300 2301 // Allocate the mask array for the node out of the BumpPtrAllocator, since 2302 // SDNode doesn't have access to it. This memory will be "leaked" when 2303 // the node is deallocated, but recovered when the NodeAllocator is released. 2304 int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); 2305 llvm::copy(MaskVec, MaskAlloc); 2306 2307 auto *N = newSDNode<ShuffleVectorSDNode>(VTs, dl.getIROrder(), 2308 dl.getDebugLoc(), MaskAlloc); 2309 createOperands(N, Ops); 2310 2311 CSEMap.InsertNode(N, IP); 2312 InsertNode(N); 2313 SDValue V = SDValue(N, 0); 2314 NewSDValueDbgMsg(V, "Creating new node: ", this); 2315 return V; 2316 } 2317 2318 SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { 2319 EVT VT = SV.getValueType(0); 2320 SmallVector<int, 8> MaskVec(SV.getMask()); 2321 ShuffleVectorSDNode::commuteMask(MaskVec); 2322 2323 SDValue Op0 = SV.getOperand(0); 2324 SDValue Op1 = SV.getOperand(1); 2325 return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); 2326 } 2327 2328 SDValue SelectionDAG::getRegister(Register Reg, EVT VT) { 2329 SDVTList VTs = getVTList(VT); 2330 FoldingSetNodeID ID; 2331 AddNodeIDNode(ID, ISD::Register, VTs, {}); 2332 ID.AddInteger(Reg.id()); 2333 void *IP = nullptr; 2334 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2335 return SDValue(E, 0); 2336 2337 auto *N = newSDNode<RegisterSDNode>(Reg, VTs); 2338 N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA); 2339 CSEMap.InsertNode(N, IP); 2340 InsertNode(N); 2341 return SDValue(N, 0); 2342 } 2343 2344 SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { 2345 FoldingSetNodeID ID; 2346 AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), {}); 2347 ID.AddPointer(RegMask); 2348 void *IP = nullptr; 2349 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2350 return SDValue(E, 0); 2351 2352 auto *N = newSDNode<RegisterMaskSDNode>(RegMask); 2353 CSEMap.InsertNode(N, IP); 2354 InsertNode(N); 2355 return SDValue(N, 0); 2356 } 2357 2358 SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, 2359 MCSymbol *Label) { 2360 return getLabelNode(ISD::EH_LABEL, dl, Root, Label); 2361 } 2362 2363 SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, 2364 SDValue Root, MCSymbol *Label) { 2365 FoldingSetNodeID ID; 2366 SDValue Ops[] = { Root }; 2367 AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); 2368 ID.AddPointer(Label); 2369 void *IP = nullptr; 2370 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2371 return SDValue(E, 0); 2372 2373 auto *N = 2374 newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); 2375 createOperands(N, Ops); 2376 2377 CSEMap.InsertNode(N, IP); 2378 InsertNode(N); 2379 return SDValue(N, 0); 2380 } 2381 2382 SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, 2383 int64_t Offset, bool isTarget, 2384 unsigned TargetFlags) { 2385 unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; 2386 SDVTList VTs = getVTList(VT); 2387 2388 FoldingSetNodeID ID; 2389 AddNodeIDNode(ID, Opc, VTs, {}); 2390 ID.AddPointer(BA); 2391 ID.AddInteger(Offset); 2392 ID.AddInteger(TargetFlags); 2393 void *IP = nullptr; 2394 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2395 return SDValue(E, 0); 2396 2397 auto *N = newSDNode<BlockAddressSDNode>(Opc, VTs, BA, Offset, TargetFlags); 2398 CSEMap.InsertNode(N, IP); 2399 InsertNode(N); 2400 return SDValue(N, 0); 2401 } 2402 2403 SDValue SelectionDAG::getSrcValue(const Value *V) { 2404 FoldingSetNodeID ID; 2405 AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), {}); 2406 ID.AddPointer(V); 2407 2408 void *IP = nullptr; 2409 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2410 return SDValue(E, 0); 2411 2412 auto *N = newSDNode<SrcValueSDNode>(V); 2413 CSEMap.InsertNode(N, IP); 2414 InsertNode(N); 2415 return SDValue(N, 0); 2416 } 2417 2418 SDValue SelectionDAG::getMDNode(const MDNode *MD) { 2419 FoldingSetNodeID ID; 2420 AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), {}); 2421 ID.AddPointer(MD); 2422 2423 void *IP = nullptr; 2424 if (SDNode *E = FindNodeOrInsertPos(ID, IP)) 2425 return SDValue(E, 0); 2426 2427 auto *N = newSDNode<MDNodeSDNode>(MD); 2428 CSEMap.InsertNode(N, IP); 2429 InsertNode(N); 2430 return SDValue(N, 0); 2431 } 2432 2433 SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { 2434 if (VT == V.getValueType()) 2435 return V; 2436 2437 return getNode(ISD::BITCAST, SDLoc(V), VT, V); 2438 } 2439 2440 SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, 2441 unsigned SrcAS, unsigned DestAS) { 2442 SDVTList VTs = getVTList(VT); 2443 SDValue Ops[] = {Ptr}; 2444 FoldingSetNodeID ID; 2445 AddNodeIDNode(ID, ISD::ADDRSPACECAST, VTs, Ops); 2446 ID.AddInteger(SrcAS); 2447 ID.AddInteger(DestAS); 2448 2449 void *IP = nullptr; 2450 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 2451 return SDValue(E, 0); 2452 2453 auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), 2454 VTs, SrcAS, DestAS); 2455 createOperands(N, Ops); 2456 2457 CSEMap.InsertNode(N, IP); 2458 InsertNode(N); 2459 return SDValue(N, 0); 2460 } 2461 2462 SDValue SelectionDAG::getFreeze(SDValue V) { 2463 return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V); 2464 } 2465 2466 /// getShiftAmountOperand - Return the specified value casted to 2467 /// the target's desired shift amount type. 2468 SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { 2469 EVT OpTy = Op.getValueType(); 2470 EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); 2471 if (OpTy == ShTy || OpTy.isVector()) return Op; 2472 2473 return getZExtOrTrunc(Op, SDLoc(Op), ShTy); 2474 } 2475 2476 SDValue SelectionDAG::getPartialReduceAdd(SDLoc DL, EVT ReducedTy, SDValue Op1, 2477 SDValue Op2) { 2478 EVT FullTy = Op2.getValueType(); 2479 2480 unsigned Stride = ReducedTy.getVectorMinNumElements(); 2481 unsigned ScaleFactor = FullTy.getVectorMinNumElements() / Stride; 2482 2483 // Collect all of the subvectors 2484 std::deque<SDValue> Subvectors = {Op1}; 2485 for (unsigned I = 0; I < ScaleFactor; I++) { 2486 auto SourceIndex = getVectorIdxConstant(I * Stride, DL); 2487 Subvectors.push_back( 2488 getNode(ISD::EXTRACT_SUBVECTOR, DL, ReducedTy, {Op2, SourceIndex})); 2489 } 2490 2491 // Flatten the subvector tree 2492 while (Subvectors.size() > 1) { 2493 Subvectors.push_back( 2494 getNode(ISD::ADD, DL, ReducedTy, {Subvectors[0], Subvectors[1]})); 2495 Subvectors.pop_front(); 2496 Subvectors.pop_front(); 2497 } 2498 2499 assert(Subvectors.size() == 1 && 2500 "There should only be one subvector after tree flattening"); 2501 2502 return Subvectors[0]; 2503 } 2504 2505 /// Given a store node \p StoreNode, return true if it is safe to fold that node 2506 /// into \p FPNode, which expands to a library call with output pointers. 2507 static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, 2508 SDNode *FPNode) { 2509 SmallVector<const SDNode *, 8> Worklist; 2510 SmallVector<const SDNode *, 8> DeferredNodes; 2511 SmallPtrSet<const SDNode *, 16> Visited; 2512 2513 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode). 2514 for (SDValue Op : StoreNode->ops()) 2515 if (Op.getNode() != FPNode) 2516 Worklist.push_back(Op.getNode()); 2517 2518 unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); 2519 while (!Worklist.empty()) { 2520 const SDNode *Node = Worklist.pop_back_val(); 2521 auto [_, Inserted] = Visited.insert(Node); 2522 if (!Inserted) 2523 continue; 2524 2525 if (MaxSteps > 0 && Visited.size() >= MaxSteps) 2526 return false; 2527 2528 // Reached the FPNode (would result in a cycle). 2529 // OR Reached CALLSEQ_START (would result in nested call sequences). 2530 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START) 2531 return false; 2532 2533 if (Node->getOpcode() == ISD::CALLSEQ_END) { 2534 // Defer looking into call sequences (so we can check we're outside one). 2535 // We still need to look through these for the predecessor check. 2536 DeferredNodes.push_back(Node); 2537 continue; 2538 } 2539 2540 for (SDValue Op : Node->ops()) 2541 Worklist.push_back(Op.getNode()); 2542 } 2543 2544 // True if we're outside a call sequence and don't have the FPNode as a 2545 // predecessor. No cycles or nested call sequences possible. 2546 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes, 2547 MaxSteps); 2548 } 2549 2550 bool SelectionDAG::expandMultipleResultFPLibCall( 2551 RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl<SDValue> &Results, 2552 std::optional<unsigned> CallRetResNo) { 2553 LLVMContext &Ctx = *getContext(); 2554 EVT VT = Node->getValueType(0); 2555 unsigned NumResults = Node->getNumValues(); 2556 2557 const char *LCName = TLI->getLibcallName(LC); 2558 if (!LC || !LCName) 2559 return false; 2560 2561 auto getVecDesc = [&]() -> VecDesc const * { 2562 for (bool Masked : {false, true}) { 2563 if (VecDesc const *VD = getLibInfo().getVectorMappingInfo( 2564 LCName, VT.getVectorElementCount(), Masked)) { 2565 return VD; 2566 } 2567 } 2568 return nullptr; 2569 }; 2570 2571 // For vector types, we must find a vector mapping for the libcall. 2572 VecDesc const *VD = nullptr; 2573 if (VT.isVector() && !(VD = getVecDesc())) 2574 return false; 2575 2576 // Find users of the node that store the results (and share input chains). The 2577 // destination pointers can be used instead of creating stack allocations. 2578 SDValue StoresInChain; 2579 SmallVector<StoreSDNode *, 2> ResultStores(NumResults); 2580 for (SDNode *User : Node->users()) { 2581 if (!ISD::isNormalStore(User)) 2582 continue; 2583 auto *ST = cast<StoreSDNode>(User); 2584 SDValue StoreValue = ST->getValue(); 2585 unsigned ResNo = StoreValue.getResNo(); 2586 // Ensure the store corresponds to an output pointer. 2587 if (CallRetResNo == ResNo) 2588 continue; 2589 // Ensure the store to the default address space and not atomic or volatile. 2590 if (!ST->isSimple() || ST->getAddressSpace() != 0) 2591 continue; 2592 // Ensure all store chains are the same (so they don't alias). 2593 if (StoresInChain && ST->getChain() != StoresInChain) 2594 continue; 2595 // Ensure the store is properly aligned. 2596 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx); 2597 if (ST->getAlign() < 2598 getDataLayout().getABITypeAlign(StoreType->getScalarType())) 2599 continue; 2600 // Avoid: 2601 // 1. Creating cyclic dependencies. 2602 // 2. Expanding the node to a call within a call sequence. 2603 if (!canFoldStoreIntoLibCallOutputPointers(ST, Node)) 2604 continue; 2605 ResultStores[ResNo] = ST; 2606 StoresInChain = ST->getChain(); 2607 } 2608 2609 TargetLowering::ArgListTy Args; 2610 auto AddArgListEntry = [&](SDValue Node, Type *Ty) { 2611 TargetLowering::ArgListEntry Entry{}; 2612 Entry.Ty = Ty; 2613 Entry.Node = Node; 2614 Args.push_back(Entry); 2615 }; 2616 2617 // Pass the arguments. 2618 for (const SDValue &Op : Node->op_values()) { 2619 EVT ArgVT = Op.getValueType(); 2620 Type *ArgTy = ArgVT.getTypeForEVT(Ctx); 2621 AddArgListEntry(Op, ArgTy); 2622 } 2623 2624 // Pass the output pointers. 2625 SmallVector<SDValue, 2> ResultPtrs(NumResults); 2626 Type *PointerTy = PointerType::getUnqual(Ctx); 2627 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) { 2628 if (ResNo == CallRetResNo) 2629 continue; 2630 EVT ResVT = Node->getValueType(ResNo); 2631 SDValue ResultPtr = ST ? ST->getBasePtr() : CreateStackTemporary(ResVT); 2632 ResultPtrs[ResNo] = ResultPtr; 2633 AddArgListEntry(ResultPtr, PointerTy); 2634 } 2635 2636 SDLoc DL(Node); 2637 2638 // Pass the vector mask (if required). 2639 if (VD && VD->isMasked()) { 2640 EVT MaskVT = TLI->getSetCCResultType(getDataLayout(), Ctx, VT); 2641 SDValue Mask = getBoolConstant(true, DL, MaskVT, VT); 2642 AddArgListEntry(Mask, MaskVT.getTypeForEVT(Ctx)); 2643 } 2644 2645 Type *RetType = CallRetResNo.has_value() 2646 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) 2647 : Type::getVoidTy(Ctx); 2648 SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); 2649 SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, 2650 TLI->getPointerTy(getDataLayout())); 2651 TargetLowering::CallLoweringInfo CLI(*this); 2652 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee( 2653 TLI->getLibcallCallingConv(LC), RetType, Callee, std::move(Args)); 2654 2655 auto [Call, CallChain] = TLI->LowerCallTo(CLI); 2656 2657 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) { 2658 if (ResNo == CallRetResNo) { 2659 Results.push_back(Call); 2660 continue; 2661 } 2662 MachinePointerInfo PtrInfo; 2663 if (StoreSDNode *ST = ResultStores[ResNo]) { 2664 // Replace store with the library call. 2665 ReplaceAllUsesOfValueWith(SDValue(ST, 0), CallChain); 2666 PtrInfo = ST->getPointerInfo(); 2667 } else { 2668 PtrInfo = MachinePointerInfo::getFixedStack( 2669 getMachineFunction(), cast<FrameIndexSDNode>(ResultPtr)->getIndex()); 2670 } 2671 SDValue LoadResult = 2672 getLoad(Node->getValueType(ResNo), DL, CallChain, ResultPtr, PtrInfo); 2673 Results.push_back(LoadResult); 2674 } 2675 2676 if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) { 2677 // FIXME: Find a way to avoid updating the root. This is needed for x86, 2678 // which uses a floating-point stack. If (for example) the node to be 2679 // expanded has two results one floating-point which is returned by the 2680 // call, and one integer result, returned via an output pointer. If only the 2681 // integer result is used then the `CopyFromReg` for the FP result may be 2682 // optimized out. This prevents an FP stack pop from being emitted for it. 2683 // Setting the root like this ensures there will be a use of the 2684 // `CopyFromReg` chain, and ensures the FP pop will be emitted. 2685 SDValue NewRoot = 2686 getNode(ISD::TokenFactor, DL, MVT::Other, getRoot(), CallChain); 2687 setRoot(NewRoot); 2688 // Ensure the new root is reachable from the results. 2689 Results[0] = getMergeValues({Results[0], NewRoot}, DL); 2690 } 2691 2692 return true; 2693 } 2694 2695 SDValue SelectionDAG::expandVAArg(SDNode *Node) { 2696 SDLoc dl(Node); 2697 const TargetLowering &TLI = getTargetLoweringInfo(); 2698 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 2699 EVT VT = Node->getValueType(0); 2700 SDValue Tmp1 = Node->getOperand(0); 2701 SDValue Tmp2 = Node->getOperand(1); 2702 const MaybeAlign MA(Node->getConstantOperandVal(3)); 2703 2704 SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, 2705 Tmp2, MachinePointerInfo(V)); 2706 SDValue VAList = VAListLoad; 2707 2708 if (MA && *MA > TLI.getMinStackArgumentAlignment()) { 2709 VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, 2710 getConstant(MA->value() - 1, dl, VAList.getValueType())); 2711 2712 VAList = getNode( 2713 ISD::AND, dl, VAList.getValueType(), VAList, 2714 getSignedConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); 2715 } 2716 2717 // Increment the pointer, VAList, to the next vaarg 2718 Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, 2719 getConstant(getDataLayout().getTypeAllocSize( 2720 VT.getTypeForEVT(*getContext())), 2721 dl, VAList.getValueType())); 2722 // Store the incremented VAList to the legalized pointer 2723 Tmp1 = 2724 getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); 2725 // Load the actual argument out of the pointer VAList 2726 return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); 2727 } 2728 2729 SDValue SelectionDAG::expandVACopy(SDNode *Node) { 2730 SDLoc dl(Node); 2731 const TargetLowering &TLI = getTargetLoweringInfo(); 2732 // This defaults to loading a pointer from the input and storing it to the 2733 // output, returning the chain. 2734 const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); 2735 const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); 2736 SDValue Tmp1 = 2737 getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), 2738 Node->getOperand(2), MachinePointerInfo(VS)); 2739 return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), 2740 MachinePointerInfo(VD)); 2741 } 2742 2743 Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { 2744 const DataLayout &DL = getDataLayout(); 2745 Type *Ty = VT.getTypeForEVT(*getContext()); 2746 Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); 2747 2748 if (TLI->isTypeLegal(VT) || !VT.isVector()) 2749 return RedAlign; 2750 2751 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 2752 const Align StackAlign = TFI->getStackAlign(); 2753 2754 // See if we can choose a smaller ABI alignment in cases where it's an 2755 // illegal vector type that will get broken down. 2756 if (RedAlign > StackAlign) { 2757 EVT IntermediateVT; 2758 MVT RegisterVT; 2759 unsigned NumIntermediates; 2760 TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT, 2761 NumIntermediates, RegisterVT); 2762 Ty = IntermediateVT.getTypeForEVT(*getContext()); 2763 Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); 2764 if (RedAlign2 < RedAlign) 2765 RedAlign = RedAlign2; 2766 2767 if (!getMachineFunction().getFrameInfo().isStackRealignable()) 2768 // If the stack is not realignable, the alignment should be limited to the 2769 // StackAlignment 2770 RedAlign = std::min(RedAlign, StackAlign); 2771 } 2772 2773 return RedAlign; 2774 } 2775 2776 SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { 2777 MachineFrameInfo &MFI = MF->getFrameInfo(); 2778 const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); 2779 int StackID = 0; 2780 if (Bytes.isScalable()) 2781 StackID = TFI->getStackIDForScalableVectors(); 2782 // The stack id gives an indication of whether the object is scalable or 2783 // not, so it's safe to pass in the minimum size here. 2784 int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment, 2785 false, nullptr, StackID); 2786 return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); 2787 } 2788 2789 SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { 2790 Type *Ty = VT.getTypeForEVT(*getContext()); 2791 Align StackAlign = 2792 std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); 2793 return CreateStackTemporary(VT.getStoreSize(), StackAlign); 2794 } 2795 2796 SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { 2797 TypeSize VT1Size = VT1.getStoreSize(); 2798 TypeSize VT2Size = VT2.getStoreSize(); 2799 assert(VT1Size.isScalable() == VT2Size.isScalable() && 2800 "Don't know how to choose the maximum size when creating a stack " 2801 "temporary"); 2802 TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue() 2803 ? VT1Size 2804 : VT2Size; 2805 2806 Type *Ty1 = VT1.getTypeForEVT(*getContext()); 2807 Type *Ty2 = VT2.getTypeForEVT(*getContext()); 2808 const DataLayout &DL = getDataLayout(); 2809 Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2)); 2810 return CreateStackTemporary(Bytes, Align); 2811 } 2812 2813 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, 2814 ISD::CondCode Cond, const SDLoc &dl) { 2815 EVT OpVT = N1.getValueType(); 2816 2817 auto GetUndefBooleanConstant = [&]() { 2818 if (VT.getScalarType() == MVT::i1 || 2819 TLI->getBooleanContents(OpVT) == 2820 TargetLowering::UndefinedBooleanContent) 2821 return getUNDEF(VT); 2822 // ZeroOrOne / ZeroOrNegative require specific values for the high bits, 2823 // so we cannot use getUNDEF(). Return zero instead. 2824 return getConstant(0, dl, VT); 2825 }; 2826 2827 // These setcc operations always fold. 2828 switch (Cond) { 2829 default: break; 2830 case ISD::SETFALSE: 2831 case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); 2832 case ISD::SETTRUE: 2833 case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); 2834 2835 case ISD::SETOEQ: 2836 case ISD::SETOGT: 2837 case ISD::SETOGE: 2838 case ISD::SETOLT: 2839 case ISD::SETOLE: 2840 case ISD::SETONE: 2841 case ISD::SETO: 2842 case ISD::SETUO: 2843 case ISD::SETUEQ: 2844 case ISD::SETUNE: 2845 assert(!OpVT.isInteger() && "Illegal setcc for integer!"); 2846 break; 2847 } 2848 2849 if (OpVT.isInteger()) { 2850 // For EQ and NE, we can always pick a value for the undef to make the 2851 // predicate pass or fail, so we can return undef. 2852 // Matches behavior in llvm::ConstantFoldCompareInstruction. 2853 // icmp eq/ne X, undef -> undef. 2854 if ((N1.isUndef() || N2.isUndef()) && 2855 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) 2856 return GetUndefBooleanConstant(); 2857 2858 // If both operands are undef, we can return undef for int comparison. 2859 // icmp undef, undef -> undef. 2860 if (N1.isUndef() && N2.isUndef()) 2861 return GetUndefBooleanConstant(); 2862 2863 // icmp X, X -> true/false 2864 // icmp X, undef -> true/false because undef could be X. 2865 if (N1.isUndef() || N2.isUndef() || N1 == N2) 2866 return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); 2867 } 2868 2869 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { 2870 const APInt &C2 = N2C->getAPIntValue(); 2871 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { 2872 const APInt &C1 = N1C->getAPIntValue(); 2873 2874 return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)), 2875 dl, VT, OpVT); 2876 } 2877 } 2878 2879 auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 2880 auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); 2881 2882 if (N1CFP && N2CFP) { 2883 APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); 2884 switch (Cond) { 2885 default: break; 2886 case ISD::SETEQ: if (R==APFloat::cmpUnordered) 2887 return GetUndefBooleanConstant(); 2888 [[fallthrough]]; 2889 case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, 2890 OpVT); 2891 case ISD::SETNE: if (R==APFloat::cmpUnordered) 2892 return GetUndefBooleanConstant(); 2893 [[fallthrough]]; 2894 case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || 2895 R==APFloat::cmpLessThan, dl, VT, 2896 OpVT); 2897 case ISD::SETLT: if (R==APFloat::cmpUnordered) 2898 return GetUndefBooleanConstant(); 2899 [[fallthrough]]; 2900 case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, 2901 OpVT); 2902 case ISD::SETGT: if (R==APFloat::cmpUnordered) 2903 return GetUndefBooleanConstant(); 2904 [[fallthrough]]; 2905 case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, 2906 VT, OpVT); 2907 case ISD::SETLE: if (R==APFloat::cmpUnordered) 2908 return GetUndefBooleanConstant(); 2909 [[fallthrough]]; 2910 case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || 2911 R==APFloat::cmpEqual, dl, VT, 2912 OpVT); 2913 case ISD::SETGE: if (R==APFloat::cmpUnordered) 2914 return GetUndefBooleanConstant(); 2915 [[fallthrough]]; 2916 case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || 2917 R==APFloat::cmpEqual, dl, VT, OpVT); 2918 case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, 2919 OpVT); 2920 case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, 2921 OpVT); 2922 case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || 2923 R==APFloat::cmpEqual, dl, VT, 2924 OpVT); 2925 case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, 2926 OpVT); 2927 case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || 2928 R==APFloat::cmpLessThan, dl, VT, 2929 OpVT); 2930 case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || 2931 R==APFloat::cmpUnordered, dl, VT, 2932 OpVT); 2933 case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, 2934 VT, OpVT); 2935 case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, 2936 OpVT); 2937 } 2938 } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) { 2939 // Ensure that the constant occurs on the RHS. 2940 ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); 2941 if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) 2942 return SDValue(); 2943 return getSetCC(dl, VT, N2, N1, SwappedCond); 2944 } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) || 2945 (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) { 2946 // If an operand is known to be a nan (or undef that could be a nan), we can 2947 // fold it. 2948 // Choosing NaN for the undef will always make unordered comparison succeed 2949 // and ordered comparison fails. 2950 // Matches behavior in llvm::ConstantFoldCompareInstruction. 2951 switch (ISD::getUnorderedFlavor(Cond)) { 2952 default: 2953 llvm_unreachable("Unknown flavor!"); 2954 case 0: // Known false. 2955 return getBoolConstant(false, dl, VT, OpVT); 2956 case 1: // Known true. 2957 return getBoolConstant(true, dl, VT, OpVT); 2958 case 2: // Undefined. 2959 return GetUndefBooleanConstant(); 2960 } 2961 } 2962 2963 // Could not fold it. 2964 return SDValue(); 2965 } 2966 2967 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We 2968 /// use this predicate to simplify operations downstream. 2969 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { 2970 unsigned BitWidth = Op.getScalarValueSizeInBits(); 2971 return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); 2972 } 2973 2974 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 2975 /// this predicate to simplify operations downstream. Mask is known to be zero 2976 /// for bits that V cannot have. 2977 bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, 2978 unsigned Depth) const { 2979 return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero); 2980 } 2981 2982 /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in 2983 /// DemandedElts. We use this predicate to simplify operations downstream. 2984 /// Mask is known to be zero for bits that V cannot have. 2985 bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, 2986 const APInt &DemandedElts, 2987 unsigned Depth) const { 2988 return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); 2989 } 2990 2991 /// MaskedVectorIsZero - Return true if 'Op' is known to be zero in 2992 /// DemandedElts. We use this predicate to simplify operations downstream. 2993 bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts, 2994 unsigned Depth /* = 0 */) const { 2995 return computeKnownBits(V, DemandedElts, Depth).isZero(); 2996 } 2997 2998 /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. 2999 bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, 3000 unsigned Depth) const { 3001 return Mask.isSubsetOf(computeKnownBits(V, Depth).One); 3002 } 3003 3004 APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op, 3005 const APInt &DemandedElts, 3006 unsigned Depth) const { 3007 EVT VT = Op.getValueType(); 3008 assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!"); 3009 3010 unsigned NumElts = VT.getVectorNumElements(); 3011 assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask."); 3012 3013 APInt KnownZeroElements = APInt::getZero(NumElts); 3014 for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) { 3015 if (!DemandedElts[EltIdx]) 3016 continue; // Don't query elements that are not demanded. 3017 APInt Mask = APInt::getOneBitSet(NumElts, EltIdx); 3018 if (MaskedVectorIsZero(Op, Mask, Depth)) 3019 KnownZeroElements.setBit(EltIdx); 3020 } 3021 return KnownZeroElements; 3022 } 3023 3024 /// isSplatValue - Return true if the vector V has the same value 3025 /// across all DemandedElts. For scalable vectors, we don't know the 3026 /// number of lanes at compile time. Instead, we use a 1 bit APInt 3027 /// to represent a conservative value for all lanes; that is, that 3028 /// one bit value is implicitly splatted across all lanes. 3029 bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, 3030 APInt &UndefElts, unsigned Depth) const { 3031 unsigned Opcode = V.getOpcode(); 3032 EVT VT = V.getValueType(); 3033 assert(VT.isVector() && "Vector type expected"); 3034 assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) && 3035 "scalable demanded bits are ignored"); 3036 3037 if (!DemandedElts) 3038 return false; // No demanded elts, better to assume we don't know anything. 3039 3040 if (Depth >= MaxRecursionDepth) 3041 return false; // Limit search depth. 3042 3043 // Deal with some common cases here that work for both fixed and scalable 3044 // vector types. 3045 switch (Opcode) { 3046 case ISD::SPLAT_VECTOR: 3047 UndefElts = V.getOperand(0).isUndef() 3048 ? APInt::getAllOnes(DemandedElts.getBitWidth()) 3049 : APInt(DemandedElts.getBitWidth(), 0); 3050 return true; 3051 case ISD::ADD: 3052 case ISD::SUB: 3053 case ISD::AND: 3054 case ISD::XOR: 3055 case ISD::OR: { 3056 APInt UndefLHS, UndefRHS; 3057 SDValue LHS = V.getOperand(0); 3058 SDValue RHS = V.getOperand(1); 3059 if (isSplatValue(LHS, DemandedElts, UndefLHS, Depth + 1) && 3060 isSplatValue(RHS, DemandedElts, UndefRHS, Depth + 1)) { 3061 UndefElts = UndefLHS | UndefRHS; 3062 return true; 3063 } 3064 return false; 3065 } 3066 case ISD::ABS: 3067 case ISD::TRUNCATE: 3068 case ISD::SIGN_EXTEND: 3069 case ISD::ZERO_EXTEND: 3070 return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); 3071 default: 3072 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 3073 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 3074 return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this, 3075 Depth); 3076 break; 3077 } 3078 3079 // We don't support other cases than those above for scalable vectors at 3080 // the moment. 3081 if (VT.isScalableVector()) 3082 return false; 3083 3084 unsigned NumElts = VT.getVectorNumElements(); 3085 assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); 3086 UndefElts = APInt::getZero(NumElts); 3087 3088 switch (Opcode) { 3089 case ISD::BUILD_VECTOR: { 3090 SDValue Scl; 3091 for (unsigned i = 0; i != NumElts; ++i) { 3092 SDValue Op = V.getOperand(i); 3093 if (Op.isUndef()) { 3094 UndefElts.setBit(i); 3095 continue; 3096 } 3097 if (!DemandedElts[i]) 3098 continue; 3099 if (Scl && Scl != Op) 3100 return false; 3101 Scl = Op; 3102 } 3103 return true; 3104 } 3105 case ISD::VECTOR_SHUFFLE: { 3106 // Check if this is a shuffle node doing a splat or a shuffle of a splat. 3107 APInt DemandedLHS = APInt::getZero(NumElts); 3108 APInt DemandedRHS = APInt::getZero(NumElts); 3109 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); 3110 for (int i = 0; i != (int)NumElts; ++i) { 3111 int M = Mask[i]; 3112 if (M < 0) { 3113 UndefElts.setBit(i); 3114 continue; 3115 } 3116 if (!DemandedElts[i]) 3117 continue; 3118 if (M < (int)NumElts) 3119 DemandedLHS.setBit(M); 3120 else 3121 DemandedRHS.setBit(M - NumElts); 3122 } 3123 3124 // If we aren't demanding either op, assume there's no splat. 3125 // If we are demanding both ops, assume there's no splat. 3126 if ((DemandedLHS.isZero() && DemandedRHS.isZero()) || 3127 (!DemandedLHS.isZero() && !DemandedRHS.isZero())) 3128 return false; 3129 3130 // See if the demanded elts of the source op is a splat or we only demand 3131 // one element, which should always be a splat. 3132 // TODO: Handle source ops splats with undefs. 3133 auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) { 3134 APInt SrcUndefs; 3135 return (SrcElts.popcount() == 1) || 3136 (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) && 3137 (SrcElts & SrcUndefs).isZero()); 3138 }; 3139 if (!DemandedLHS.isZero()) 3140 return CheckSplatSrc(V.getOperand(0), DemandedLHS); 3141 return CheckSplatSrc(V.getOperand(1), DemandedRHS); 3142 } 3143 case ISD::EXTRACT_SUBVECTOR: { 3144 // Offset the demanded elts by the subvector index. 3145 SDValue Src = V.getOperand(0); 3146 // We don't support scalable vectors at the moment. 3147 if (Src.getValueType().isScalableVector()) 3148 return false; 3149 uint64_t Idx = V.getConstantOperandVal(1); 3150 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3151 APInt UndefSrcElts; 3152 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 3153 if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { 3154 UndefElts = UndefSrcElts.extractBits(NumElts, Idx); 3155 return true; 3156 } 3157 break; 3158 } 3159 case ISD::ANY_EXTEND_VECTOR_INREG: 3160 case ISD::SIGN_EXTEND_VECTOR_INREG: 3161 case ISD::ZERO_EXTEND_VECTOR_INREG: { 3162 // Widen the demanded elts by the src element count. 3163 SDValue Src = V.getOperand(0); 3164 // We don't support scalable vectors at the moment. 3165 if (Src.getValueType().isScalableVector()) 3166 return false; 3167 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3168 APInt UndefSrcElts; 3169 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); 3170 if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { 3171 UndefElts = UndefSrcElts.trunc(NumElts); 3172 return true; 3173 } 3174 break; 3175 } 3176 case ISD::BITCAST: { 3177 SDValue Src = V.getOperand(0); 3178 EVT SrcVT = Src.getValueType(); 3179 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits(); 3180 unsigned BitWidth = VT.getScalarSizeInBits(); 3181 3182 // Ignore bitcasts from unsupported types. 3183 // TODO: Add fp support? 3184 if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger()) 3185 break; 3186 3187 // Bitcast 'small element' vector to 'large element' vector. 3188 if ((BitWidth % SrcBitWidth) == 0) { 3189 // See if each sub element is a splat. 3190 unsigned Scale = BitWidth / SrcBitWidth; 3191 unsigned NumSrcElts = SrcVT.getVectorNumElements(); 3192 APInt ScaledDemandedElts = 3193 APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); 3194 for (unsigned I = 0; I != Scale; ++I) { 3195 APInt SubUndefElts; 3196 APInt SubDemandedElt = APInt::getOneBitSet(Scale, I); 3197 APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt); 3198 SubDemandedElts &= ScaledDemandedElts; 3199 if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) 3200 return false; 3201 // TODO: Add support for merging sub undef elements. 3202 if (!SubUndefElts.isZero()) 3203 return false; 3204 } 3205 return true; 3206 } 3207 break; 3208 } 3209 } 3210 3211 return false; 3212 } 3213 3214 /// Helper wrapper to main isSplatValue function. 3215 bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { 3216 EVT VT = V.getValueType(); 3217 assert(VT.isVector() && "Vector type expected"); 3218 3219 APInt UndefElts; 3220 // Since the number of lanes in a scalable vector is unknown at compile time, 3221 // we track one bit which is implicitly broadcast to all lanes. This means 3222 // that all lanes in a scalable vector are considered demanded. 3223 APInt DemandedElts 3224 = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); 3225 return isSplatValue(V, DemandedElts, UndefElts) && 3226 (AllowUndefs || !UndefElts); 3227 } 3228 3229 SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { 3230 V = peekThroughExtractSubvectors(V); 3231 3232 EVT VT = V.getValueType(); 3233 unsigned Opcode = V.getOpcode(); 3234 switch (Opcode) { 3235 default: { 3236 APInt UndefElts; 3237 // Since the number of lanes in a scalable vector is unknown at compile time, 3238 // we track one bit which is implicitly broadcast to all lanes. This means 3239 // that all lanes in a scalable vector are considered demanded. 3240 APInt DemandedElts 3241 = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); 3242 3243 if (isSplatValue(V, DemandedElts, UndefElts)) { 3244 if (VT.isScalableVector()) { 3245 // DemandedElts and UndefElts are ignored for scalable vectors, since 3246 // the only supported cases are SPLAT_VECTOR nodes. 3247 SplatIdx = 0; 3248 } else { 3249 // Handle case where all demanded elements are UNDEF. 3250 if (DemandedElts.isSubsetOf(UndefElts)) { 3251 SplatIdx = 0; 3252 return getUNDEF(VT); 3253 } 3254 SplatIdx = (UndefElts & DemandedElts).countr_one(); 3255 } 3256 return V; 3257 } 3258 break; 3259 } 3260 case ISD::SPLAT_VECTOR: 3261 SplatIdx = 0; 3262 return V; 3263 case ISD::VECTOR_SHUFFLE: { 3264 assert(!VT.isScalableVector()); 3265 // Check if this is a shuffle node doing a splat. 3266 // TODO - remove this and rely purely on SelectionDAG::isSplatValue, 3267 // getTargetVShiftNode currently struggles without the splat source. 3268 auto *SVN = cast<ShuffleVectorSDNode>(V); 3269 if (!SVN->isSplat()) 3270 break; 3271 int Idx = SVN->getSplatIndex(); 3272 int NumElts = V.getValueType().getVectorNumElements(); 3273 SplatIdx = Idx % NumElts; 3274 return V.getOperand(Idx / NumElts); 3275 } 3276 } 3277 3278 return SDValue(); 3279 } 3280 3281 SDValue SelectionDAG::getSplatValue(SDValue V, bool LegalTypes) { 3282 int SplatIdx; 3283 if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) { 3284 EVT SVT = SrcVector.getValueType().getScalarType(); 3285 EVT LegalSVT = SVT; 3286 if (LegalTypes && !TLI->isTypeLegal(SVT)) { 3287 if (!SVT.isInteger()) 3288 return SDValue(); 3289 LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); 3290 if (LegalSVT.bitsLT(SVT)) 3291 return SDValue(); 3292 } 3293 return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), LegalSVT, SrcVector, 3294 getVectorIdxConstant(SplatIdx, SDLoc(V))); 3295 } 3296 return SDValue(); 3297 } 3298 3299 std::optional<ConstantRange> 3300 SelectionDAG::getValidShiftAmountRange(SDValue V, const APInt &DemandedElts, 3301 unsigned Depth) const { 3302 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3303 V.getOpcode() == ISD::SRA) && 3304 "Unknown shift node"); 3305 // Shifting more than the bitwidth is not valid. 3306 unsigned BitWidth = V.getScalarValueSizeInBits(); 3307 3308 if (auto *Cst = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 3309 const APInt &ShAmt = Cst->getAPIntValue(); 3310 if (ShAmt.uge(BitWidth)) 3311 return std::nullopt; 3312 return ConstantRange(ShAmt); 3313 } 3314 3315 if (auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1))) { 3316 const APInt *MinAmt = nullptr, *MaxAmt = nullptr; 3317 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 3318 if (!DemandedElts[i]) 3319 continue; 3320 auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); 3321 if (!SA) { 3322 MinAmt = MaxAmt = nullptr; 3323 break; 3324 } 3325 const APInt &ShAmt = SA->getAPIntValue(); 3326 if (ShAmt.uge(BitWidth)) 3327 return std::nullopt; 3328 if (!MinAmt || MinAmt->ugt(ShAmt)) 3329 MinAmt = &ShAmt; 3330 if (!MaxAmt || MaxAmt->ult(ShAmt)) 3331 MaxAmt = &ShAmt; 3332 } 3333 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) && 3334 "Failed to find matching min/max shift amounts"); 3335 if (MinAmt && MaxAmt) 3336 return ConstantRange(*MinAmt, *MaxAmt + 1); 3337 } 3338 3339 // Use computeKnownBits to find a hidden constant/knownbits (usually type 3340 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc. 3341 KnownBits KnownAmt = computeKnownBits(V.getOperand(1), DemandedElts, Depth); 3342 if (KnownAmt.getMaxValue().ult(BitWidth)) 3343 return ConstantRange::fromKnownBits(KnownAmt, /*IsSigned=*/false); 3344 3345 return std::nullopt; 3346 } 3347 3348 std::optional<uint64_t> 3349 SelectionDAG::getValidShiftAmount(SDValue V, const APInt &DemandedElts, 3350 unsigned Depth) const { 3351 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3352 V.getOpcode() == ISD::SRA) && 3353 "Unknown shift node"); 3354 if (std::optional<ConstantRange> AmtRange = 3355 getValidShiftAmountRange(V, DemandedElts, Depth)) 3356 if (const APInt *ShAmt = AmtRange->getSingleElement()) 3357 return ShAmt->getZExtValue(); 3358 return std::nullopt; 3359 } 3360 3361 std::optional<uint64_t> 3362 SelectionDAG::getValidShiftAmount(SDValue V, unsigned Depth) const { 3363 EVT VT = V.getValueType(); 3364 APInt DemandedElts = VT.isFixedLengthVector() 3365 ? APInt::getAllOnes(VT.getVectorNumElements()) 3366 : APInt(1, 1); 3367 return getValidShiftAmount(V, DemandedElts, Depth); 3368 } 3369 3370 std::optional<uint64_t> 3371 SelectionDAG::getValidMinimumShiftAmount(SDValue V, const APInt &DemandedElts, 3372 unsigned Depth) const { 3373 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3374 V.getOpcode() == ISD::SRA) && 3375 "Unknown shift node"); 3376 if (std::optional<ConstantRange> AmtRange = 3377 getValidShiftAmountRange(V, DemandedElts, Depth)) 3378 return AmtRange->getUnsignedMin().getZExtValue(); 3379 return std::nullopt; 3380 } 3381 3382 std::optional<uint64_t> 3383 SelectionDAG::getValidMinimumShiftAmount(SDValue V, unsigned Depth) const { 3384 EVT VT = V.getValueType(); 3385 APInt DemandedElts = VT.isFixedLengthVector() 3386 ? APInt::getAllOnes(VT.getVectorNumElements()) 3387 : APInt(1, 1); 3388 return getValidMinimumShiftAmount(V, DemandedElts, Depth); 3389 } 3390 3391 std::optional<uint64_t> 3392 SelectionDAG::getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, 3393 unsigned Depth) const { 3394 assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || 3395 V.getOpcode() == ISD::SRA) && 3396 "Unknown shift node"); 3397 if (std::optional<ConstantRange> AmtRange = 3398 getValidShiftAmountRange(V, DemandedElts, Depth)) 3399 return AmtRange->getUnsignedMax().getZExtValue(); 3400 return std::nullopt; 3401 } 3402 3403 std::optional<uint64_t> 3404 SelectionDAG::getValidMaximumShiftAmount(SDValue V, unsigned Depth) const { 3405 EVT VT = V.getValueType(); 3406 APInt DemandedElts = VT.isFixedLengthVector() 3407 ? APInt::getAllOnes(VT.getVectorNumElements()) 3408 : APInt(1, 1); 3409 return getValidMaximumShiftAmount(V, DemandedElts, Depth); 3410 } 3411 3412 /// Determine which bits of Op are known to be either zero or one and return 3413 /// them in Known. For vectors, the known bits are those that are shared by 3414 /// every vector element. 3415 KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { 3416 EVT VT = Op.getValueType(); 3417 3418 // Since the number of lanes in a scalable vector is unknown at compile time, 3419 // we track one bit which is implicitly broadcast to all lanes. This means 3420 // that all lanes in a scalable vector are considered demanded. 3421 APInt DemandedElts = VT.isFixedLengthVector() 3422 ? APInt::getAllOnes(VT.getVectorNumElements()) 3423 : APInt(1, 1); 3424 return computeKnownBits(Op, DemandedElts, Depth); 3425 } 3426 3427 /// Determine which bits of Op are known to be either zero or one and return 3428 /// them in Known. The DemandedElts argument allows us to only collect the known 3429 /// bits that are shared by the requested vector elements. 3430 KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, 3431 unsigned Depth) const { 3432 unsigned BitWidth = Op.getScalarValueSizeInBits(); 3433 3434 KnownBits Known(BitWidth); // Don't know anything. 3435 3436 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 3437 // We know all of the bits for a constant! 3438 return KnownBits::makeConstant(C->getAPIntValue()); 3439 } 3440 if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { 3441 // We know all of the bits for a constant fp! 3442 return KnownBits::makeConstant(C->getValueAPF().bitcastToAPInt()); 3443 } 3444 3445 if (Depth >= MaxRecursionDepth) 3446 return Known; // Limit search depth. 3447 3448 KnownBits Known2; 3449 unsigned NumElts = DemandedElts.getBitWidth(); 3450 assert((!Op.getValueType().isFixedLengthVector() || 3451 NumElts == Op.getValueType().getVectorNumElements()) && 3452 "Unexpected vector size"); 3453 3454 if (!DemandedElts) 3455 return Known; // No demanded elts, better to assume we don't know anything. 3456 3457 unsigned Opcode = Op.getOpcode(); 3458 switch (Opcode) { 3459 case ISD::MERGE_VALUES: 3460 return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, 3461 Depth + 1); 3462 case ISD::SPLAT_VECTOR: { 3463 SDValue SrcOp = Op.getOperand(0); 3464 assert(SrcOp.getValueSizeInBits() >= BitWidth && 3465 "Expected SPLAT_VECTOR implicit truncation"); 3466 // Implicitly truncate the bits to match the official semantics of 3467 // SPLAT_VECTOR. 3468 Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); 3469 break; 3470 } 3471 case ISD::SPLAT_VECTOR_PARTS: { 3472 unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits(); 3473 assert(ScalarSize * Op.getNumOperands() == BitWidth && 3474 "Expected SPLAT_VECTOR_PARTS scalars to cover element width"); 3475 for (auto [I, SrcOp] : enumerate(Op->ops())) { 3476 Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I); 3477 } 3478 break; 3479 } 3480 case ISD::STEP_VECTOR: { 3481 const APInt &Step = Op.getConstantOperandAPInt(0); 3482 3483 if (Step.isPowerOf2()) 3484 Known.Zero.setLowBits(Step.logBase2()); 3485 3486 const Function &F = getMachineFunction().getFunction(); 3487 3488 if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements())) 3489 break; 3490 const APInt MinNumElts = 3491 APInt(BitWidth, Op.getValueType().getVectorMinNumElements()); 3492 3493 bool Overflow; 3494 const APInt MaxNumElts = getVScaleRange(&F, BitWidth) 3495 .getUnsignedMax() 3496 .umul_ov(MinNumElts, Overflow); 3497 if (Overflow) 3498 break; 3499 3500 const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow); 3501 if (Overflow) 3502 break; 3503 3504 Known.Zero.setHighBits(MaxValue.countl_zero()); 3505 break; 3506 } 3507 case ISD::BUILD_VECTOR: 3508 assert(!Op.getValueType().isScalableVector()); 3509 // Collect the known bits that are shared by every demanded vector element. 3510 Known.Zero.setAllBits(); Known.One.setAllBits(); 3511 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 3512 if (!DemandedElts[i]) 3513 continue; 3514 3515 SDValue SrcOp = Op.getOperand(i); 3516 Known2 = computeKnownBits(SrcOp, Depth + 1); 3517 3518 // BUILD_VECTOR can implicitly truncate sources, we must handle this. 3519 if (SrcOp.getValueSizeInBits() != BitWidth) { 3520 assert(SrcOp.getValueSizeInBits() > BitWidth && 3521 "Expected BUILD_VECTOR implicit truncation"); 3522 Known2 = Known2.trunc(BitWidth); 3523 } 3524 3525 // Known bits are the values that are shared by every demanded element. 3526 Known = Known.intersectWith(Known2); 3527 3528 // If we don't know any bits, early out. 3529 if (Known.isUnknown()) 3530 break; 3531 } 3532 break; 3533 case ISD::VECTOR_SHUFFLE: { 3534 assert(!Op.getValueType().isScalableVector()); 3535 // Collect the known bits that are shared by every vector element referenced 3536 // by the shuffle. 3537 APInt DemandedLHS, DemandedRHS; 3538 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 3539 assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); 3540 if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, 3541 DemandedLHS, DemandedRHS)) 3542 break; 3543 3544 // Known bits are the values that are shared by every demanded element. 3545 Known.Zero.setAllBits(); Known.One.setAllBits(); 3546 if (!!DemandedLHS) { 3547 SDValue LHS = Op.getOperand(0); 3548 Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); 3549 Known = Known.intersectWith(Known2); 3550 } 3551 // If we don't know any bits, early out. 3552 if (Known.isUnknown()) 3553 break; 3554 if (!!DemandedRHS) { 3555 SDValue RHS = Op.getOperand(1); 3556 Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); 3557 Known = Known.intersectWith(Known2); 3558 } 3559 break; 3560 } 3561 case ISD::VSCALE: { 3562 const Function &F = getMachineFunction().getFunction(); 3563 const APInt &Multiplier = Op.getConstantOperandAPInt(0); 3564 Known = getVScaleRange(&F, BitWidth).multiply(Multiplier).toKnownBits(); 3565 break; 3566 } 3567 case ISD::CONCAT_VECTORS: { 3568 if (Op.getValueType().isScalableVector()) 3569 break; 3570 // Split DemandedElts and test each of the demanded subvectors. 3571 Known.Zero.setAllBits(); Known.One.setAllBits(); 3572 EVT SubVectorVT = Op.getOperand(0).getValueType(); 3573 unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); 3574 unsigned NumSubVectors = Op.getNumOperands(); 3575 for (unsigned i = 0; i != NumSubVectors; ++i) { 3576 APInt DemandedSub = 3577 DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); 3578 if (!!DemandedSub) { 3579 SDValue Sub = Op.getOperand(i); 3580 Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); 3581 Known = Known.intersectWith(Known2); 3582 } 3583 // If we don't know any bits, early out. 3584 if (Known.isUnknown()) 3585 break; 3586 } 3587 break; 3588 } 3589 case ISD::INSERT_SUBVECTOR: { 3590 if (Op.getValueType().isScalableVector()) 3591 break; 3592 // Demand any elements from the subvector and the remainder from the src its 3593 // inserted into. 3594 SDValue Src = Op.getOperand(0); 3595 SDValue Sub = Op.getOperand(1); 3596 uint64_t Idx = Op.getConstantOperandVal(2); 3597 unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); 3598 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); 3599 APInt DemandedSrcElts = DemandedElts; 3600 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); 3601 3602 Known.One.setAllBits(); 3603 Known.Zero.setAllBits(); 3604 if (!!DemandedSubElts) { 3605 Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); 3606 if (Known.isUnknown()) 3607 break; // early-out. 3608 } 3609 if (!!DemandedSrcElts) { 3610 Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); 3611 Known = Known.intersectWith(Known2); 3612 } 3613 break; 3614 } 3615 case ISD::EXTRACT_SUBVECTOR: { 3616 // Offset the demanded elts by the subvector index. 3617 SDValue Src = Op.getOperand(0); 3618 // Bail until we can represent demanded elements for scalable vectors. 3619 if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector()) 3620 break; 3621 uint64_t Idx = Op.getConstantOperandVal(1); 3622 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 3623 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 3624 Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); 3625 break; 3626 } 3627 case ISD::SCALAR_TO_VECTOR: { 3628 if (Op.getValueType().isScalableVector()) 3629 break; 3630 // We know about scalar_to_vector as much as we know about it source, 3631 // which becomes the first element of otherwise unknown vector. 3632 if (DemandedElts != 1) 3633 break; 3634 3635 SDValue N0 = Op.getOperand(0); 3636 Known = computeKnownBits(N0, Depth + 1); 3637 if (N0.getValueSizeInBits() != BitWidth) 3638 Known = Known.trunc(BitWidth); 3639 3640 break; 3641 } 3642 case ISD::BITCAST: { 3643 if (Op.getValueType().isScalableVector()) 3644 break; 3645 3646 SDValue N0 = Op.getOperand(0); 3647 EVT SubVT = N0.getValueType(); 3648 unsigned SubBitWidth = SubVT.getScalarSizeInBits(); 3649 3650 // Ignore bitcasts from unsupported types. 3651 if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) 3652 break; 3653 3654 // Fast handling of 'identity' bitcasts. 3655 if (BitWidth == SubBitWidth) { 3656 Known = computeKnownBits(N0, DemandedElts, Depth + 1); 3657 break; 3658 } 3659 3660 bool IsLE = getDataLayout().isLittleEndian(); 3661 3662 // Bitcast 'small element' vector to 'large element' scalar/vector. 3663 if ((BitWidth % SubBitWidth) == 0) { 3664 assert(N0.getValueType().isVector() && "Expected bitcast from vector"); 3665 3666 // Collect known bits for the (larger) output by collecting the known 3667 // bits from each set of sub elements and shift these into place. 3668 // We need to separately call computeKnownBits for each set of 3669 // sub elements as the knownbits for each is likely to be different. 3670 unsigned SubScale = BitWidth / SubBitWidth; 3671 APInt SubDemandedElts(NumElts * SubScale, 0); 3672 for (unsigned i = 0; i != NumElts; ++i) 3673 if (DemandedElts[i]) 3674 SubDemandedElts.setBit(i * SubScale); 3675 3676 for (unsigned i = 0; i != SubScale; ++i) { 3677 Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), 3678 Depth + 1); 3679 unsigned Shifts = IsLE ? i : SubScale - 1 - i; 3680 Known.insertBits(Known2, SubBitWidth * Shifts); 3681 } 3682 } 3683 3684 // Bitcast 'large element' scalar/vector to 'small element' vector. 3685 if ((SubBitWidth % BitWidth) == 0) { 3686 assert(Op.getValueType().isVector() && "Expected bitcast to vector"); 3687 3688 // Collect known bits for the (smaller) output by collecting the known 3689 // bits from the overlapping larger input elements and extracting the 3690 // sub sections we actually care about. 3691 unsigned SubScale = SubBitWidth / BitWidth; 3692 APInt SubDemandedElts = 3693 APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); 3694 Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); 3695 3696 Known.Zero.setAllBits(); Known.One.setAllBits(); 3697 for (unsigned i = 0; i != NumElts; ++i) 3698 if (DemandedElts[i]) { 3699 unsigned Shifts = IsLE ? i : NumElts - 1 - i; 3700 unsigned Offset = (Shifts % SubScale) * BitWidth; 3701 Known = Known.intersectWith(Known2.extractBits(BitWidth, Offset)); 3702 // If we don't know any bits, early out. 3703 if (Known.isUnknown()) 3704 break; 3705 } 3706 } 3707 break; 3708 } 3709 case ISD::AND: 3710 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3711 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3712 3713 Known &= Known2; 3714 break; 3715 case ISD::OR: 3716 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3717 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3718 3719 Known |= Known2; 3720 break; 3721 case ISD::XOR: 3722 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3723 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3724 3725 Known ^= Known2; 3726 break; 3727 case ISD::MUL: { 3728 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3729 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3730 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3731 // TODO: SelfMultiply can be poison, but not undef. 3732 if (SelfMultiply) 3733 SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( 3734 Op.getOperand(0), DemandedElts, false, Depth + 1); 3735 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3736 3737 // If the multiplication is known not to overflow, the product of a number 3738 // with itself is non-negative. Only do this if we didn't already computed 3739 // the opposite value for the sign bit. 3740 if (Op->getFlags().hasNoSignedWrap() && 3741 Op.getOperand(0) == Op.getOperand(1) && 3742 !Known.isNegative()) 3743 Known.makeNonNegative(); 3744 break; 3745 } 3746 case ISD::MULHU: { 3747 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3748 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3749 Known = KnownBits::mulhu(Known, Known2); 3750 break; 3751 } 3752 case ISD::MULHS: { 3753 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3754 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3755 Known = KnownBits::mulhs(Known, Known2); 3756 break; 3757 } 3758 case ISD::ABDU: { 3759 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3760 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3761 Known = KnownBits::abdu(Known, Known2); 3762 break; 3763 } 3764 case ISD::ABDS: { 3765 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3766 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3767 Known = KnownBits::abds(Known, Known2); 3768 unsigned SignBits1 = 3769 ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 3770 if (SignBits1 == 1) 3771 break; 3772 unsigned SignBits0 = 3773 ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 3774 Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1); 3775 break; 3776 } 3777 case ISD::UMUL_LOHI: { 3778 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3779 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3780 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3781 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3782 if (Op.getResNo() == 0) 3783 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3784 else 3785 Known = KnownBits::mulhu(Known, Known2); 3786 break; 3787 } 3788 case ISD::SMUL_LOHI: { 3789 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3790 Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3791 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3792 bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); 3793 if (Op.getResNo() == 0) 3794 Known = KnownBits::mul(Known, Known2, SelfMultiply); 3795 else 3796 Known = KnownBits::mulhs(Known, Known2); 3797 break; 3798 } 3799 case ISD::AVGFLOORU: { 3800 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3801 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3802 Known = KnownBits::avgFloorU(Known, Known2); 3803 break; 3804 } 3805 case ISD::AVGCEILU: { 3806 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3807 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3808 Known = KnownBits::avgCeilU(Known, Known2); 3809 break; 3810 } 3811 case ISD::AVGFLOORS: { 3812 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3813 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3814 Known = KnownBits::avgFloorS(Known, Known2); 3815 break; 3816 } 3817 case ISD::AVGCEILS: { 3818 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3819 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3820 Known = KnownBits::avgCeilS(Known, Known2); 3821 break; 3822 } 3823 case ISD::SELECT: 3824 case ISD::VSELECT: 3825 Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); 3826 // If we don't know any bits, early out. 3827 if (Known.isUnknown()) 3828 break; 3829 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); 3830 3831 // Only known if known in both the LHS and RHS. 3832 Known = Known.intersectWith(Known2); 3833 break; 3834 case ISD::SELECT_CC: 3835 Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); 3836 // If we don't know any bits, early out. 3837 if (Known.isUnknown()) 3838 break; 3839 Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); 3840 3841 // Only known if known in both the LHS and RHS. 3842 Known = Known.intersectWith(Known2); 3843 break; 3844 case ISD::SMULO: 3845 case ISD::UMULO: 3846 if (Op.getResNo() != 1) 3847 break; 3848 // The boolean result conforms to getBooleanContents. 3849 // If we know the result of a setcc has the top bits zero, use this info. 3850 // We know that we have an integer-based boolean since these operations 3851 // are only available for integer. 3852 if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == 3853 TargetLowering::ZeroOrOneBooleanContent && 3854 BitWidth > 1) 3855 Known.Zero.setBitsFrom(1); 3856 break; 3857 case ISD::SETCC: 3858 case ISD::SETCCCARRY: 3859 case ISD::STRICT_FSETCC: 3860 case ISD::STRICT_FSETCCS: { 3861 unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; 3862 // If we know the result of a setcc has the top bits zero, use this info. 3863 if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == 3864 TargetLowering::ZeroOrOneBooleanContent && 3865 BitWidth > 1) 3866 Known.Zero.setBitsFrom(1); 3867 break; 3868 } 3869 case ISD::SHL: { 3870 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3871 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3872 3873 bool NUW = Op->getFlags().hasNoUnsignedWrap(); 3874 bool NSW = Op->getFlags().hasNoSignedWrap(); 3875 3876 bool ShAmtNonZero = Known2.isNonZero(); 3877 3878 Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero); 3879 3880 // Minimum shift low bits are known zero. 3881 if (std::optional<uint64_t> ShMinAmt = 3882 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 3883 Known.Zero.setLowBits(*ShMinAmt); 3884 break; 3885 } 3886 case ISD::SRL: 3887 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3888 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3889 Known = KnownBits::lshr(Known, Known2, /*ShAmtNonZero=*/false, 3890 Op->getFlags().hasExact()); 3891 3892 // Minimum shift high bits are known zero. 3893 if (std::optional<uint64_t> ShMinAmt = 3894 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 3895 Known.Zero.setHighBits(*ShMinAmt); 3896 break; 3897 case ISD::SRA: 3898 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3899 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3900 Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false, 3901 Op->getFlags().hasExact()); 3902 break; 3903 case ISD::FSHL: 3904 case ISD::FSHR: 3905 if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { 3906 unsigned Amt = C->getAPIntValue().urem(BitWidth); 3907 3908 // For fshl, 0-shift returns the 1st arg. 3909 // For fshr, 0-shift returns the 2nd arg. 3910 if (Amt == 0) { 3911 Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), 3912 DemandedElts, Depth + 1); 3913 break; 3914 } 3915 3916 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) 3917 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) 3918 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3919 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3920 if (Opcode == ISD::FSHL) { 3921 Known.One <<= Amt; 3922 Known.Zero <<= Amt; 3923 Known2.One.lshrInPlace(BitWidth - Amt); 3924 Known2.Zero.lshrInPlace(BitWidth - Amt); 3925 } else { 3926 Known.One <<= BitWidth - Amt; 3927 Known.Zero <<= BitWidth - Amt; 3928 Known2.One.lshrInPlace(Amt); 3929 Known2.Zero.lshrInPlace(Amt); 3930 } 3931 Known = Known.unionWith(Known2); 3932 } 3933 break; 3934 case ISD::SHL_PARTS: 3935 case ISD::SRA_PARTS: 3936 case ISD::SRL_PARTS: { 3937 assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); 3938 3939 // Collect lo/hi source values and concatenate. 3940 unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); 3941 unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); 3942 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3943 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 3944 Known = Known2.concat(Known); 3945 3946 // Collect shift amount. 3947 Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 3948 3949 if (Opcode == ISD::SHL_PARTS) 3950 Known = KnownBits::shl(Known, Known2); 3951 else if (Opcode == ISD::SRA_PARTS) 3952 Known = KnownBits::ashr(Known, Known2); 3953 else // if (Opcode == ISD::SRL_PARTS) 3954 Known = KnownBits::lshr(Known, Known2); 3955 3956 // TODO: Minimum shift low/high bits are known zero. 3957 3958 if (Op.getResNo() == 0) 3959 Known = Known.extractBits(LoBits, 0); 3960 else 3961 Known = Known.extractBits(HiBits, LoBits); 3962 break; 3963 } 3964 case ISD::SIGN_EXTEND_INREG: { 3965 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3966 EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 3967 Known = Known.sextInReg(EVT.getScalarSizeInBits()); 3968 break; 3969 } 3970 case ISD::CTTZ: 3971 case ISD::CTTZ_ZERO_UNDEF: { 3972 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3973 // If we have a known 1, its position is our upper bound. 3974 unsigned PossibleTZ = Known2.countMaxTrailingZeros(); 3975 unsigned LowBits = llvm::bit_width(PossibleTZ); 3976 Known.Zero.setBitsFrom(LowBits); 3977 break; 3978 } 3979 case ISD::CTLZ: 3980 case ISD::CTLZ_ZERO_UNDEF: { 3981 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3982 // If we have a known 1, its position is our upper bound. 3983 unsigned PossibleLZ = Known2.countMaxLeadingZeros(); 3984 unsigned LowBits = llvm::bit_width(PossibleLZ); 3985 Known.Zero.setBitsFrom(LowBits); 3986 break; 3987 } 3988 case ISD::CTPOP: { 3989 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 3990 // If we know some of the bits are zero, they can't be one. 3991 unsigned PossibleOnes = Known2.countMaxPopulation(); 3992 Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes)); 3993 break; 3994 } 3995 case ISD::PARITY: { 3996 // Parity returns 0 everywhere but the LSB. 3997 Known.Zero.setBitsFrom(1); 3998 break; 3999 } 4000 case ISD::MGATHER: 4001 case ISD::MLOAD: { 4002 ISD::LoadExtType ETy = 4003 (Opcode == ISD::MGATHER) 4004 ? cast<MaskedGatherSDNode>(Op)->getExtensionType() 4005 : cast<MaskedLoadSDNode>(Op)->getExtensionType(); 4006 if (ETy == ISD::ZEXTLOAD) { 4007 EVT MemVT = cast<MemSDNode>(Op)->getMemoryVT(); 4008 KnownBits Known0(MemVT.getScalarSizeInBits()); 4009 return Known0.zext(BitWidth); 4010 } 4011 break; 4012 } 4013 case ISD::LOAD: { 4014 LoadSDNode *LD = cast<LoadSDNode>(Op); 4015 const Constant *Cst = TLI->getTargetConstantFromLoad(LD); 4016 if (ISD::isNON_EXTLoad(LD) && Cst) { 4017 // Determine any common known bits from the loaded constant pool value. 4018 Type *CstTy = Cst->getType(); 4019 if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() && 4020 !Op.getValueType().isScalableVector()) { 4021 // If its a vector splat, then we can (quickly) reuse the scalar path. 4022 // NOTE: We assume all elements match and none are UNDEF. 4023 if (CstTy->isVectorTy()) { 4024 if (const Constant *Splat = Cst->getSplatValue()) { 4025 Cst = Splat; 4026 CstTy = Cst->getType(); 4027 } 4028 } 4029 // TODO - do we need to handle different bitwidths? 4030 if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) { 4031 // Iterate across all vector elements finding common known bits. 4032 Known.One.setAllBits(); 4033 Known.Zero.setAllBits(); 4034 for (unsigned i = 0; i != NumElts; ++i) { 4035 if (!DemandedElts[i]) 4036 continue; 4037 if (Constant *Elt = Cst->getAggregateElement(i)) { 4038 if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { 4039 const APInt &Value = CInt->getValue(); 4040 Known.One &= Value; 4041 Known.Zero &= ~Value; 4042 continue; 4043 } 4044 if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { 4045 APInt Value = CFP->getValueAPF().bitcastToAPInt(); 4046 Known.One &= Value; 4047 Known.Zero &= ~Value; 4048 continue; 4049 } 4050 } 4051 Known.One.clearAllBits(); 4052 Known.Zero.clearAllBits(); 4053 break; 4054 } 4055 } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { 4056 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { 4057 Known = KnownBits::makeConstant(CInt->getValue()); 4058 } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { 4059 Known = 4060 KnownBits::makeConstant(CFP->getValueAPF().bitcastToAPInt()); 4061 } 4062 } 4063 } 4064 } else if (Op.getResNo() == 0) { 4065 KnownBits Known0(!LD->getMemoryVT().isScalableVT() 4066 ? LD->getMemoryVT().getFixedSizeInBits() 4067 : BitWidth); 4068 EVT VT = Op.getValueType(); 4069 // Fill in any known bits from range information. There are 3 types being 4070 // used. The results VT (same vector elt size as BitWidth), the loaded 4071 // MemoryVT (which may or may not be vector) and the range VTs original 4072 // type. The range matadata needs the full range (i.e 4073 // MemoryVT().getSizeInBits()), which is truncated to the correct elt size 4074 // if it is know. These are then extended to the original VT sizes below. 4075 if (const MDNode *MD = LD->getRanges()) { 4076 computeKnownBitsFromRangeMetadata(*MD, Known0); 4077 if (VT.isVector()) { 4078 // Handle truncation to the first demanded element. 4079 // TODO: Figure out which demanded elements are covered 4080 if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) 4081 break; 4082 Known0 = Known0.trunc(BitWidth); 4083 } 4084 } 4085 4086 if (LD->getMemoryVT().isVector()) 4087 Known0 = Known0.trunc(LD->getMemoryVT().getScalarSizeInBits()); 4088 4089 // Extend the Known bits from memory to the size of the result. 4090 if (ISD::isZEXTLoad(Op.getNode())) 4091 Known = Known0.zext(BitWidth); 4092 else if (ISD::isSEXTLoad(Op.getNode())) 4093 Known = Known0.sext(BitWidth); 4094 else if (ISD::isEXTLoad(Op.getNode())) 4095 Known = Known0.anyext(BitWidth); 4096 else 4097 Known = Known0; 4098 assert(Known.getBitWidth() == BitWidth); 4099 return Known; 4100 } 4101 break; 4102 } 4103 case ISD::ZERO_EXTEND_VECTOR_INREG: { 4104 if (Op.getValueType().isScalableVector()) 4105 break; 4106 EVT InVT = Op.getOperand(0).getValueType(); 4107 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4108 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4109 Known = Known.zext(BitWidth); 4110 break; 4111 } 4112 case ISD::ZERO_EXTEND: { 4113 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4114 Known = Known.zext(BitWidth); 4115 break; 4116 } 4117 case ISD::SIGN_EXTEND_VECTOR_INREG: { 4118 if (Op.getValueType().isScalableVector()) 4119 break; 4120 EVT InVT = Op.getOperand(0).getValueType(); 4121 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4122 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4123 // If the sign bit is known to be zero or one, then sext will extend 4124 // it to the top bits, else it will just zext. 4125 Known = Known.sext(BitWidth); 4126 break; 4127 } 4128 case ISD::SIGN_EXTEND: { 4129 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4130 // If the sign bit is known to be zero or one, then sext will extend 4131 // it to the top bits, else it will just zext. 4132 Known = Known.sext(BitWidth); 4133 break; 4134 } 4135 case ISD::ANY_EXTEND_VECTOR_INREG: { 4136 if (Op.getValueType().isScalableVector()) 4137 break; 4138 EVT InVT = Op.getOperand(0).getValueType(); 4139 APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); 4140 Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); 4141 Known = Known.anyext(BitWidth); 4142 break; 4143 } 4144 case ISD::ANY_EXTEND: { 4145 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4146 Known = Known.anyext(BitWidth); 4147 break; 4148 } 4149 case ISD::TRUNCATE: { 4150 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4151 Known = Known.trunc(BitWidth); 4152 break; 4153 } 4154 case ISD::AssertZext: { 4155 EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 4156 APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); 4157 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4158 Known.Zero |= (~InMask); 4159 Known.One &= (~Known.Zero); 4160 break; 4161 } 4162 case ISD::AssertAlign: { 4163 unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); 4164 assert(LogOfAlign != 0); 4165 4166 // TODO: Should use maximum with source 4167 // If a node is guaranteed to be aligned, set low zero bits accordingly as 4168 // well as clearing one bits. 4169 Known.Zero.setLowBits(LogOfAlign); 4170 Known.One.clearLowBits(LogOfAlign); 4171 break; 4172 } 4173 case ISD::FGETSIGN: 4174 // All bits are zero except the low bit. 4175 Known.Zero.setBitsFrom(1); 4176 break; 4177 case ISD::ADD: 4178 case ISD::SUB: { 4179 SDNodeFlags Flags = Op.getNode()->getFlags(); 4180 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4181 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4182 Known = KnownBits::computeForAddSub( 4183 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), 4184 Flags.hasNoUnsignedWrap(), Known, Known2); 4185 break; 4186 } 4187 case ISD::USUBO: 4188 case ISD::SSUBO: 4189 case ISD::USUBO_CARRY: 4190 case ISD::SSUBO_CARRY: 4191 if (Op.getResNo() == 1) { 4192 // If we know the result of a setcc has the top bits zero, use this info. 4193 if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == 4194 TargetLowering::ZeroOrOneBooleanContent && 4195 BitWidth > 1) 4196 Known.Zero.setBitsFrom(1); 4197 break; 4198 } 4199 [[fallthrough]]; 4200 case ISD::SUBC: { 4201 assert(Op.getResNo() == 0 && 4202 "We only compute knownbits for the difference here."); 4203 4204 // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in. 4205 KnownBits Borrow(1); 4206 if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { 4207 Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 4208 // Borrow has bit width 1 4209 Borrow = Borrow.trunc(1); 4210 } else { 4211 Borrow.setAllZero(); 4212 } 4213 4214 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4215 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4216 Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow); 4217 break; 4218 } 4219 case ISD::UADDO: 4220 case ISD::SADDO: 4221 case ISD::UADDO_CARRY: 4222 case ISD::SADDO_CARRY: 4223 if (Op.getResNo() == 1) { 4224 // If we know the result of a setcc has the top bits zero, use this info. 4225 if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == 4226 TargetLowering::ZeroOrOneBooleanContent && 4227 BitWidth > 1) 4228 Known.Zero.setBitsFrom(1); 4229 break; 4230 } 4231 [[fallthrough]]; 4232 case ISD::ADDC: 4233 case ISD::ADDE: { 4234 assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); 4235 4236 // With ADDE and UADDO_CARRY, a carry bit may be added in. 4237 KnownBits Carry(1); 4238 if (Opcode == ISD::ADDE) 4239 // Can't track carry from glue, set carry to unknown. 4240 Carry.resetAll(); 4241 else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { 4242 Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); 4243 // Carry has bit width 1 4244 Carry = Carry.trunc(1); 4245 } else { 4246 Carry.setAllZero(); 4247 } 4248 4249 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4250 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4251 Known = KnownBits::computeForAddCarry(Known, Known2, Carry); 4252 break; 4253 } 4254 case ISD::UDIV: { 4255 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4256 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4257 Known = KnownBits::udiv(Known, Known2, Op->getFlags().hasExact()); 4258 break; 4259 } 4260 case ISD::SDIV: { 4261 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4262 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4263 Known = KnownBits::sdiv(Known, Known2, Op->getFlags().hasExact()); 4264 break; 4265 } 4266 case ISD::SREM: { 4267 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4268 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4269 Known = KnownBits::srem(Known, Known2); 4270 break; 4271 } 4272 case ISD::UREM: { 4273 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4274 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4275 Known = KnownBits::urem(Known, Known2); 4276 break; 4277 } 4278 case ISD::EXTRACT_ELEMENT: { 4279 Known = computeKnownBits(Op.getOperand(0), Depth+1); 4280 const unsigned Index = Op.getConstantOperandVal(1); 4281 const unsigned EltBitWidth = Op.getValueSizeInBits(); 4282 4283 // Remove low part of known bits mask 4284 Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); 4285 Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); 4286 4287 // Remove high part of known bit mask 4288 Known = Known.trunc(EltBitWidth); 4289 break; 4290 } 4291 case ISD::EXTRACT_VECTOR_ELT: { 4292 SDValue InVec = Op.getOperand(0); 4293 SDValue EltNo = Op.getOperand(1); 4294 EVT VecVT = InVec.getValueType(); 4295 // computeKnownBits not yet implemented for scalable vectors. 4296 if (VecVT.isScalableVector()) 4297 break; 4298 const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); 4299 const unsigned NumSrcElts = VecVT.getVectorNumElements(); 4300 4301 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know 4302 // anything about the extended bits. 4303 if (BitWidth > EltBitWidth) 4304 Known = Known.trunc(EltBitWidth); 4305 4306 // If we know the element index, just demand that vector element, else for 4307 // an unknown element index, ignore DemandedElts and demand them all. 4308 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); 4309 auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 4310 if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) 4311 DemandedSrcElts = 4312 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); 4313 4314 Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1); 4315 if (BitWidth > EltBitWidth) 4316 Known = Known.anyext(BitWidth); 4317 break; 4318 } 4319 case ISD::INSERT_VECTOR_ELT: { 4320 if (Op.getValueType().isScalableVector()) 4321 break; 4322 4323 // If we know the element index, split the demand between the 4324 // source vector and the inserted element, otherwise assume we need 4325 // the original demanded vector elements and the value. 4326 SDValue InVec = Op.getOperand(0); 4327 SDValue InVal = Op.getOperand(1); 4328 SDValue EltNo = Op.getOperand(2); 4329 bool DemandedVal = true; 4330 APInt DemandedVecElts = DemandedElts; 4331 auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); 4332 if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { 4333 unsigned EltIdx = CEltNo->getZExtValue(); 4334 DemandedVal = !!DemandedElts[EltIdx]; 4335 DemandedVecElts.clearBit(EltIdx); 4336 } 4337 Known.One.setAllBits(); 4338 Known.Zero.setAllBits(); 4339 if (DemandedVal) { 4340 Known2 = computeKnownBits(InVal, Depth + 1); 4341 Known = Known.intersectWith(Known2.zextOrTrunc(BitWidth)); 4342 } 4343 if (!!DemandedVecElts) { 4344 Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); 4345 Known = Known.intersectWith(Known2); 4346 } 4347 break; 4348 } 4349 case ISD::BITREVERSE: { 4350 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4351 Known = Known2.reverseBits(); 4352 break; 4353 } 4354 case ISD::BSWAP: { 4355 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4356 Known = Known2.byteSwap(); 4357 break; 4358 } 4359 case ISD::ABS: { 4360 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4361 Known = Known2.abs(); 4362 Known.Zero.setHighBits( 4363 ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1) - 1); 4364 break; 4365 } 4366 case ISD::USUBSAT: { 4367 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4368 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4369 Known = KnownBits::usub_sat(Known, Known2); 4370 break; 4371 } 4372 case ISD::UMIN: { 4373 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4374 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4375 Known = KnownBits::umin(Known, Known2); 4376 break; 4377 } 4378 case ISD::UMAX: { 4379 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4380 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4381 Known = KnownBits::umax(Known, Known2); 4382 break; 4383 } 4384 case ISD::SMIN: 4385 case ISD::SMAX: { 4386 // If we have a clamp pattern, we know that the number of sign bits will be 4387 // the minimum of the clamp min/max range. 4388 bool IsMax = (Opcode == ISD::SMAX); 4389 ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; 4390 if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) 4391 if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) 4392 CstHigh = 4393 isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); 4394 if (CstLow && CstHigh) { 4395 if (!IsMax) 4396 std::swap(CstLow, CstHigh); 4397 4398 const APInt &ValueLow = CstLow->getAPIntValue(); 4399 const APInt &ValueHigh = CstHigh->getAPIntValue(); 4400 if (ValueLow.sle(ValueHigh)) { 4401 unsigned LowSignBits = ValueLow.getNumSignBits(); 4402 unsigned HighSignBits = ValueHigh.getNumSignBits(); 4403 unsigned MinSignBits = std::min(LowSignBits, HighSignBits); 4404 if (ValueLow.isNegative() && ValueHigh.isNegative()) { 4405 Known.One.setHighBits(MinSignBits); 4406 break; 4407 } 4408 if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { 4409 Known.Zero.setHighBits(MinSignBits); 4410 break; 4411 } 4412 } 4413 } 4414 4415 Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4416 Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 4417 if (IsMax) 4418 Known = KnownBits::smax(Known, Known2); 4419 else 4420 Known = KnownBits::smin(Known, Known2); 4421 4422 // For SMAX, if CstLow is non-negative we know the result will be 4423 // non-negative and thus all sign bits are 0. 4424 // TODO: There's an equivalent of this for smin with negative constant for 4425 // known ones. 4426 if (IsMax && CstLow) { 4427 const APInt &ValueLow = CstLow->getAPIntValue(); 4428 if (ValueLow.isNonNegative()) { 4429 unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4430 Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits())); 4431 } 4432 } 4433 4434 break; 4435 } 4436 case ISD::UINT_TO_FP: { 4437 Known.makeNonNegative(); 4438 break; 4439 } 4440 case ISD::SINT_TO_FP: { 4441 Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 4442 if (Known2.isNonNegative()) 4443 Known.makeNonNegative(); 4444 else if (Known2.isNegative()) 4445 Known.makeNegative(); 4446 break; 4447 } 4448 case ISD::FP_TO_UINT_SAT: { 4449 // FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT. 4450 EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 4451 Known.Zero |= APInt::getBitsSetFrom(BitWidth, VT.getScalarSizeInBits()); 4452 break; 4453 } 4454 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 4455 if (Op.getResNo() == 1) { 4456 // The boolean result conforms to getBooleanContents. 4457 // If we know the result of a setcc has the top bits zero, use this info. 4458 // We know that we have an integer-based boolean since these operations 4459 // are only available for integer. 4460 if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == 4461 TargetLowering::ZeroOrOneBooleanContent && 4462 BitWidth > 1) 4463 Known.Zero.setBitsFrom(1); 4464 break; 4465 } 4466 [[fallthrough]]; 4467 case ISD::ATOMIC_CMP_SWAP: 4468 case ISD::ATOMIC_SWAP: 4469 case ISD::ATOMIC_LOAD_ADD: 4470 case ISD::ATOMIC_LOAD_SUB: 4471 case ISD::ATOMIC_LOAD_AND: 4472 case ISD::ATOMIC_LOAD_CLR: 4473 case ISD::ATOMIC_LOAD_OR: 4474 case ISD::ATOMIC_LOAD_XOR: 4475 case ISD::ATOMIC_LOAD_NAND: 4476 case ISD::ATOMIC_LOAD_MIN: 4477 case ISD::ATOMIC_LOAD_MAX: 4478 case ISD::ATOMIC_LOAD_UMIN: 4479 case ISD::ATOMIC_LOAD_UMAX: 4480 case ISD::ATOMIC_LOAD: { 4481 unsigned MemBits = 4482 cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits(); 4483 // If we are looking at the loaded value. 4484 if (Op.getResNo() == 0) { 4485 if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) 4486 Known.Zero.setBitsFrom(MemBits); 4487 else if (Op->getOpcode() == ISD::ATOMIC_LOAD && 4488 cast<AtomicSDNode>(Op)->getExtensionType() == ISD::ZEXTLOAD) 4489 Known.Zero.setBitsFrom(MemBits); 4490 } 4491 break; 4492 } 4493 case ISD::FrameIndex: 4494 case ISD::TargetFrameIndex: 4495 TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), 4496 Known, getMachineFunction()); 4497 break; 4498 4499 default: 4500 if (Opcode < ISD::BUILTIN_OP_END) 4501 break; 4502 [[fallthrough]]; 4503 case ISD::INTRINSIC_WO_CHAIN: 4504 case ISD::INTRINSIC_W_CHAIN: 4505 case ISD::INTRINSIC_VOID: 4506 // TODO: Probably okay to remove after audit; here to reduce change size 4507 // in initial enablement patch for scalable vectors 4508 if (Op.getValueType().isScalableVector()) 4509 break; 4510 4511 // Allow the target to implement this method for its nodes. 4512 TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); 4513 break; 4514 } 4515 4516 return Known; 4517 } 4518 4519 /// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind. 4520 static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) { 4521 switch (OR) { 4522 case ConstantRange::OverflowResult::MayOverflow: 4523 return SelectionDAG::OFK_Sometime; 4524 case ConstantRange::OverflowResult::AlwaysOverflowsLow: 4525 case ConstantRange::OverflowResult::AlwaysOverflowsHigh: 4526 return SelectionDAG::OFK_Always; 4527 case ConstantRange::OverflowResult::NeverOverflows: 4528 return SelectionDAG::OFK_Never; 4529 } 4530 llvm_unreachable("Unknown OverflowResult"); 4531 } 4532 4533 SelectionDAG::OverflowKind 4534 SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const { 4535 // X + 0 never overflow 4536 if (isNullConstant(N1)) 4537 return OFK_Never; 4538 4539 // If both operands each have at least two sign bits, the addition 4540 // cannot overflow. 4541 if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) 4542 return OFK_Never; 4543 4544 // TODO: Add ConstantRange::signedAddMayOverflow handling. 4545 return OFK_Sometime; 4546 } 4547 4548 SelectionDAG::OverflowKind 4549 SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const { 4550 // X + 0 never overflow 4551 if (isNullConstant(N1)) 4552 return OFK_Never; 4553 4554 // mulhi + 1 never overflow 4555 KnownBits N1Known = computeKnownBits(N1); 4556 if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && 4557 N1Known.getMaxValue().ult(2)) 4558 return OFK_Never; 4559 4560 KnownBits N0Known = computeKnownBits(N0); 4561 if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 && 4562 N0Known.getMaxValue().ult(2)) 4563 return OFK_Never; 4564 4565 // Fallback to ConstantRange::unsignedAddMayOverflow handling. 4566 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4567 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4568 return mapOverflowResult(N0Range.unsignedAddMayOverflow(N1Range)); 4569 } 4570 4571 SelectionDAG::OverflowKind 4572 SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { 4573 // X - 0 never overflow 4574 if (isNullConstant(N1)) 4575 return OFK_Never; 4576 4577 // If both operands each have at least two sign bits, the subtraction 4578 // cannot overflow. 4579 if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) 4580 return OFK_Never; 4581 4582 KnownBits N0Known = computeKnownBits(N0); 4583 KnownBits N1Known = computeKnownBits(N1); 4584 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true); 4585 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true); 4586 return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range)); 4587 } 4588 4589 SelectionDAG::OverflowKind 4590 SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { 4591 // X - 0 never overflow 4592 if (isNullConstant(N1)) 4593 return OFK_Never; 4594 4595 KnownBits N0Known = computeKnownBits(N0); 4596 KnownBits N1Known = computeKnownBits(N1); 4597 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4598 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4599 return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range)); 4600 } 4601 4602 SelectionDAG::OverflowKind 4603 SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const { 4604 // X * 0 and X * 1 never overflow. 4605 if (isNullConstant(N1) || isOneConstant(N1)) 4606 return OFK_Never; 4607 4608 KnownBits N0Known = computeKnownBits(N0); 4609 KnownBits N1Known = computeKnownBits(N1); 4610 ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); 4611 ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); 4612 return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range)); 4613 } 4614 4615 SelectionDAG::OverflowKind 4616 SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const { 4617 // X * 0 and X * 1 never overflow. 4618 if (isNullConstant(N1) || isOneConstant(N1)) 4619 return OFK_Never; 4620 4621 // Get the size of the result. 4622 unsigned BitWidth = N0.getScalarValueSizeInBits(); 4623 4624 // Sum of the sign bits. 4625 unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1); 4626 4627 // If we have enough sign bits, then there's no overflow. 4628 if (SignBits > BitWidth + 1) 4629 return OFK_Never; 4630 4631 if (SignBits == BitWidth + 1) { 4632 // The overflow occurs when the true multiplication of the 4633 // the operands is the minimum negative number. 4634 KnownBits N0Known = computeKnownBits(N0); 4635 KnownBits N1Known = computeKnownBits(N1); 4636 // If one of the operands is non-negative, then there's no 4637 // overflow. 4638 if (N0Known.isNonNegative() || N1Known.isNonNegative()) 4639 return OFK_Never; 4640 } 4641 4642 return OFK_Sometime; 4643 } 4644 4645 bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { 4646 if (Depth >= MaxRecursionDepth) 4647 return false; // Limit search depth. 4648 4649 EVT OpVT = Val.getValueType(); 4650 unsigned BitWidth = OpVT.getScalarSizeInBits(); 4651 4652 // Is the constant a known power of 2? 4653 if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) { 4654 return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); 4655 })) 4656 return true; 4657 4658 // A left-shift of a constant one will have exactly one bit set because 4659 // shifting the bit off the end is undefined. 4660 if (Val.getOpcode() == ISD::SHL) { 4661 auto *C = isConstOrConstSplat(Val.getOperand(0)); 4662 if (C && C->getAPIntValue() == 1) 4663 return true; 4664 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && 4665 isKnownNeverZero(Val, Depth); 4666 } 4667 4668 // Similarly, a logical right-shift of a constant sign-bit will have exactly 4669 // one bit set. 4670 if (Val.getOpcode() == ISD::SRL) { 4671 auto *C = isConstOrConstSplat(Val.getOperand(0)); 4672 if (C && C->getAPIntValue().isSignMask()) 4673 return true; 4674 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && 4675 isKnownNeverZero(Val, Depth); 4676 } 4677 4678 if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR) 4679 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4680 4681 // Are all operands of a build vector constant powers of two? 4682 if (Val.getOpcode() == ISD::BUILD_VECTOR) 4683 if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { 4684 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) 4685 return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); 4686 return false; 4687 })) 4688 return true; 4689 4690 // Is the operand of a splat vector a constant power of two? 4691 if (Val.getOpcode() == ISD::SPLAT_VECTOR) 4692 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0))) 4693 if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) 4694 return true; 4695 4696 // vscale(power-of-two) is a power-of-two for some targets 4697 if (Val.getOpcode() == ISD::VSCALE && 4698 getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && 4699 isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1)) 4700 return true; 4701 4702 if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX || 4703 Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX) 4704 return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) && 4705 isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4706 4707 if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT) 4708 return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && 4709 isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); 4710 4711 // Looking for `x & -x` pattern: 4712 // If x == 0: 4713 // x & -x -> 0 4714 // If x != 0: 4715 // x & -x -> non-zero pow2 4716 // so if we find the pattern return whether we know `x` is non-zero. 4717 SDValue X; 4718 if (sd_match(Val, m_And(m_Value(X), m_Neg(m_Deferred(X))))) 4719 return isKnownNeverZero(X, Depth); 4720 4721 if (Val.getOpcode() == ISD::ZERO_EXTEND) 4722 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4723 4724 // More could be done here, though the above checks are enough 4725 // to handle some common cases. 4726 return false; 4727 } 4728 4729 bool SelectionDAG::isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth) const { 4730 if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Val, true)) 4731 return C1->getValueAPF().getExactLog2Abs() >= 0; 4732 4733 if (Val.getOpcode() == ISD::UINT_TO_FP || Val.getOpcode() == ISD::SINT_TO_FP) 4734 return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); 4735 4736 return false; 4737 } 4738 4739 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { 4740 EVT VT = Op.getValueType(); 4741 4742 // Since the number of lanes in a scalable vector is unknown at compile time, 4743 // we track one bit which is implicitly broadcast to all lanes. This means 4744 // that all lanes in a scalable vector are considered demanded. 4745 APInt DemandedElts = VT.isFixedLengthVector() 4746 ? APInt::getAllOnes(VT.getVectorNumElements()) 4747 : APInt(1, 1); 4748 return ComputeNumSignBits(Op, DemandedElts, Depth); 4749 } 4750 4751 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, 4752 unsigned Depth) const { 4753 EVT VT = Op.getValueType(); 4754 assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); 4755 unsigned VTBits = VT.getScalarSizeInBits(); 4756 unsigned NumElts = DemandedElts.getBitWidth(); 4757 unsigned Tmp, Tmp2; 4758 unsigned FirstAnswer = 1; 4759 4760 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4761 const APInt &Val = C->getAPIntValue(); 4762 return Val.getNumSignBits(); 4763 } 4764 4765 if (Depth >= MaxRecursionDepth) 4766 return 1; // Limit search depth. 4767 4768 if (!DemandedElts) 4769 return 1; // No demanded elts, better to assume we don't know anything. 4770 4771 unsigned Opcode = Op.getOpcode(); 4772 switch (Opcode) { 4773 default: break; 4774 case ISD::AssertSext: 4775 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); 4776 return VTBits-Tmp+1; 4777 case ISD::AssertZext: 4778 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); 4779 return VTBits-Tmp; 4780 case ISD::MERGE_VALUES: 4781 return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, 4782 Depth + 1); 4783 case ISD::SPLAT_VECTOR: { 4784 // Check if the sign bits of source go down as far as the truncated value. 4785 unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits(); 4786 unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 4787 if (NumSrcSignBits > (NumSrcBits - VTBits)) 4788 return NumSrcSignBits - (NumSrcBits - VTBits); 4789 break; 4790 } 4791 case ISD::BUILD_VECTOR: 4792 assert(!VT.isScalableVector()); 4793 Tmp = VTBits; 4794 for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { 4795 if (!DemandedElts[i]) 4796 continue; 4797 4798 SDValue SrcOp = Op.getOperand(i); 4799 // BUILD_VECTOR can implicitly truncate sources, we handle this specially 4800 // for constant nodes to ensure we only look at the sign bits. 4801 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SrcOp)) { 4802 APInt T = C->getAPIntValue().trunc(VTBits); 4803 Tmp2 = T.getNumSignBits(); 4804 } else { 4805 Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1); 4806 4807 if (SrcOp.getValueSizeInBits() != VTBits) { 4808 assert(SrcOp.getValueSizeInBits() > VTBits && 4809 "Expected BUILD_VECTOR implicit truncation"); 4810 unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; 4811 Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1); 4812 } 4813 } 4814 Tmp = std::min(Tmp, Tmp2); 4815 } 4816 return Tmp; 4817 4818 case ISD::VECTOR_SHUFFLE: { 4819 // Collect the minimum number of sign bits that are shared by every vector 4820 // element referenced by the shuffle. 4821 APInt DemandedLHS, DemandedRHS; 4822 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 4823 assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); 4824 if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, 4825 DemandedLHS, DemandedRHS)) 4826 return 1; 4827 4828 Tmp = std::numeric_limits<unsigned>::max(); 4829 if (!!DemandedLHS) 4830 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); 4831 if (!!DemandedRHS) { 4832 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); 4833 Tmp = std::min(Tmp, Tmp2); 4834 } 4835 // If we don't know anything, early out and try computeKnownBits fall-back. 4836 if (Tmp == 1) 4837 break; 4838 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 4839 return Tmp; 4840 } 4841 4842 case ISD::BITCAST: { 4843 if (VT.isScalableVector()) 4844 break; 4845 SDValue N0 = Op.getOperand(0); 4846 EVT SrcVT = N0.getValueType(); 4847 unsigned SrcBits = SrcVT.getScalarSizeInBits(); 4848 4849 // Ignore bitcasts from unsupported types.. 4850 if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) 4851 break; 4852 4853 // Fast handling of 'identity' bitcasts. 4854 if (VTBits == SrcBits) 4855 return ComputeNumSignBits(N0, DemandedElts, Depth + 1); 4856 4857 bool IsLE = getDataLayout().isLittleEndian(); 4858 4859 // Bitcast 'large element' scalar/vector to 'small element' vector. 4860 if ((SrcBits % VTBits) == 0) { 4861 assert(VT.isVector() && "Expected bitcast to vector"); 4862 4863 unsigned Scale = SrcBits / VTBits; 4864 APInt SrcDemandedElts = 4865 APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale); 4866 4867 // Fast case - sign splat can be simply split across the small elements. 4868 Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); 4869 if (Tmp == SrcBits) 4870 return VTBits; 4871 4872 // Slow case - determine how far the sign extends into each sub-element. 4873 Tmp2 = VTBits; 4874 for (unsigned i = 0; i != NumElts; ++i) 4875 if (DemandedElts[i]) { 4876 unsigned SubOffset = i % Scale; 4877 SubOffset = (IsLE ? ((Scale - 1) - SubOffset) : SubOffset); 4878 SubOffset = SubOffset * VTBits; 4879 if (Tmp <= SubOffset) 4880 return 1; 4881 Tmp2 = std::min(Tmp2, Tmp - SubOffset); 4882 } 4883 return Tmp2; 4884 } 4885 break; 4886 } 4887 4888 case ISD::FP_TO_SINT_SAT: 4889 // FP_TO_SINT_SAT produces a signed value that fits in the saturating VT. 4890 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); 4891 return VTBits - Tmp + 1; 4892 case ISD::SIGN_EXTEND: 4893 Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); 4894 return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; 4895 case ISD::SIGN_EXTEND_INREG: 4896 // Max of the input and what this extends. 4897 Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); 4898 Tmp = VTBits-Tmp+1; 4899 Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); 4900 return std::max(Tmp, Tmp2); 4901 case ISD::SIGN_EXTEND_VECTOR_INREG: { 4902 if (VT.isScalableVector()) 4903 break; 4904 SDValue Src = Op.getOperand(0); 4905 EVT SrcVT = Src.getValueType(); 4906 APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); 4907 Tmp = VTBits - SrcVT.getScalarSizeInBits(); 4908 return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; 4909 } 4910 case ISD::SRA: 4911 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4912 // SRA X, C -> adds C sign bits. 4913 if (std::optional<uint64_t> ShAmt = 4914 getValidMinimumShiftAmount(Op, DemandedElts, Depth + 1)) 4915 Tmp = std::min<uint64_t>(Tmp + *ShAmt, VTBits); 4916 return Tmp; 4917 case ISD::SHL: 4918 if (std::optional<ConstantRange> ShAmtRange = 4919 getValidShiftAmountRange(Op, DemandedElts, Depth + 1)) { 4920 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue(); 4921 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue(); 4922 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are 4923 // shifted out, then we can compute the number of sign bits for the 4924 // operand being extended. A future improvement could be to pass along the 4925 // "shifted left by" information in the recursive calls to 4926 // ComputeKnownSignBits. Allowing us to handle this more generically. 4927 if (ISD::isExtOpcode(Op.getOperand(0).getOpcode())) { 4928 SDValue Ext = Op.getOperand(0); 4929 EVT ExtVT = Ext.getValueType(); 4930 SDValue Extendee = Ext.getOperand(0); 4931 EVT ExtendeeVT = Extendee.getValueType(); 4932 uint64_t SizeDifference = 4933 ExtVT.getScalarSizeInBits() - ExtendeeVT.getScalarSizeInBits(); 4934 if (SizeDifference <= MinShAmt) { 4935 Tmp = SizeDifference + 4936 ComputeNumSignBits(Extendee, DemandedElts, Depth + 1); 4937 if (MaxShAmt < Tmp) 4938 return Tmp - MaxShAmt; 4939 } 4940 } 4941 // shl destroys sign bits, ensure it doesn't shift out all sign bits. 4942 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4943 if (MaxShAmt < Tmp) 4944 return Tmp - MaxShAmt; 4945 } 4946 break; 4947 case ISD::AND: 4948 case ISD::OR: 4949 case ISD::XOR: // NOT is handled here. 4950 // Logical binary ops preserve the number of sign bits at the worst. 4951 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); 4952 if (Tmp != 1) { 4953 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); 4954 FirstAnswer = std::min(Tmp, Tmp2); 4955 // We computed what we know about the sign bits as our first 4956 // answer. Now proceed to the generic code that uses 4957 // computeKnownBits, and pick whichever answer is better. 4958 } 4959 break; 4960 4961 case ISD::SELECT: 4962 case ISD::VSELECT: 4963 Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); 4964 if (Tmp == 1) return 1; // Early out. 4965 Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); 4966 return std::min(Tmp, Tmp2); 4967 case ISD::SELECT_CC: 4968 Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); 4969 if (Tmp == 1) return 1; // Early out. 4970 Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); 4971 return std::min(Tmp, Tmp2); 4972 4973 case ISD::SMIN: 4974 case ISD::SMAX: { 4975 // If we have a clamp pattern, we know that the number of sign bits will be 4976 // the minimum of the clamp min/max range. 4977 bool IsMax = (Opcode == ISD::SMAX); 4978 ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; 4979 if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) 4980 if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) 4981 CstHigh = 4982 isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); 4983 if (CstLow && CstHigh) { 4984 if (!IsMax) 4985 std::swap(CstLow, CstHigh); 4986 if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { 4987 Tmp = CstLow->getAPIntValue().getNumSignBits(); 4988 Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); 4989 return std::min(Tmp, Tmp2); 4990 } 4991 } 4992 4993 // Fallback - just get the minimum number of sign bits of the operands. 4994 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 4995 if (Tmp == 1) 4996 return 1; // Early out. 4997 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 4998 return std::min(Tmp, Tmp2); 4999 } 5000 case ISD::UMIN: 5001 case ISD::UMAX: 5002 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5003 if (Tmp == 1) 5004 return 1; // Early out. 5005 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5006 return std::min(Tmp, Tmp2); 5007 case ISD::SSUBO_CARRY: 5008 case ISD::USUBO_CARRY: 5009 // sub_carry(x,x,c) -> 0/-1 (sext carry) 5010 if (Op.getResNo() == 0 && Op.getOperand(0) == Op.getOperand(1)) 5011 return VTBits; 5012 [[fallthrough]]; 5013 case ISD::SADDO: 5014 case ISD::UADDO: 5015 case ISD::SADDO_CARRY: 5016 case ISD::UADDO_CARRY: 5017 case ISD::SSUBO: 5018 case ISD::USUBO: 5019 case ISD::SMULO: 5020 case ISD::UMULO: 5021 if (Op.getResNo() != 1) 5022 break; 5023 // The boolean result conforms to getBooleanContents. Fall through. 5024 // If setcc returns 0/-1, all bits are sign bits. 5025 // We know that we have an integer-based boolean since these operations 5026 // are only available for integer. 5027 if (TLI->getBooleanContents(VT.isVector(), false) == 5028 TargetLowering::ZeroOrNegativeOneBooleanContent) 5029 return VTBits; 5030 break; 5031 case ISD::SETCC: 5032 case ISD::SETCCCARRY: 5033 case ISD::STRICT_FSETCC: 5034 case ISD::STRICT_FSETCCS: { 5035 unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0; 5036 // If setcc returns 0/-1, all bits are sign bits. 5037 if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) == 5038 TargetLowering::ZeroOrNegativeOneBooleanContent) 5039 return VTBits; 5040 break; 5041 } 5042 case ISD::ROTL: 5043 case ISD::ROTR: 5044 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5045 5046 // If we're rotating an 0/-1 value, then it stays an 0/-1 value. 5047 if (Tmp == VTBits) 5048 return VTBits; 5049 5050 if (ConstantSDNode *C = 5051 isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { 5052 unsigned RotAmt = C->getAPIntValue().urem(VTBits); 5053 5054 // Handle rotate right by N like a rotate left by 32-N. 5055 if (Opcode == ISD::ROTR) 5056 RotAmt = (VTBits - RotAmt) % VTBits; 5057 5058 // If we aren't rotating out all of the known-in sign bits, return the 5059 // number that are left. This handles rotl(sext(x), 1) for example. 5060 if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); 5061 } 5062 break; 5063 case ISD::ADD: 5064 case ISD::ADDC: 5065 // Add can have at most one carry bit. Thus we know that the output 5066 // is, at worst, one more bit than the inputs. 5067 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5068 if (Tmp == 1) return 1; // Early out. 5069 5070 // Special case decrementing a value (ADD X, -1): 5071 if (ConstantSDNode *CRHS = 5072 isConstOrConstSplat(Op.getOperand(1), DemandedElts)) 5073 if (CRHS->isAllOnes()) { 5074 KnownBits Known = 5075 computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); 5076 5077 // If the input is known to be 0 or 1, the output is 0/-1, which is all 5078 // sign bits set. 5079 if ((Known.Zero | 1).isAllOnes()) 5080 return VTBits; 5081 5082 // If we are subtracting one from a positive number, there is no carry 5083 // out of the result. 5084 if (Known.isNonNegative()) 5085 return Tmp; 5086 } 5087 5088 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5089 if (Tmp2 == 1) return 1; // Early out. 5090 return std::min(Tmp, Tmp2) - 1; 5091 case ISD::SUB: 5092 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5093 if (Tmp2 == 1) return 1; // Early out. 5094 5095 // Handle NEG. 5096 if (ConstantSDNode *CLHS = 5097 isConstOrConstSplat(Op.getOperand(0), DemandedElts)) 5098 if (CLHS->isZero()) { 5099 KnownBits Known = 5100 computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); 5101 // If the input is known to be 0 or 1, the output is 0/-1, which is all 5102 // sign bits set. 5103 if ((Known.Zero | 1).isAllOnes()) 5104 return VTBits; 5105 5106 // If the input is known to be positive (the sign bit is known clear), 5107 // the output of the NEG has the same number of sign bits as the input. 5108 if (Known.isNonNegative()) 5109 return Tmp2; 5110 5111 // Otherwise, we treat this like a SUB. 5112 } 5113 5114 // Sub can have at most one carry bit. Thus we know that the output 5115 // is, at worst, one more bit than the inputs. 5116 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5117 if (Tmp == 1) return 1; // Early out. 5118 return std::min(Tmp, Tmp2) - 1; 5119 case ISD::MUL: { 5120 // The output of the Mul can be at most twice the valid bits in the inputs. 5121 unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5122 if (SignBitsOp0 == 1) 5123 break; 5124 unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); 5125 if (SignBitsOp1 == 1) 5126 break; 5127 unsigned OutValidBits = 5128 (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); 5129 return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; 5130 } 5131 case ISD::AVGCEILS: 5132 case ISD::AVGFLOORS: 5133 Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5134 if (Tmp == 1) 5135 return 1; // Early out. 5136 Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); 5137 return std::min(Tmp, Tmp2); 5138 case ISD::SREM: 5139 // The sign bit is the LHS's sign bit, except when the result of the 5140 // remainder is zero. The magnitude of the result should be less than or 5141 // equal to the magnitude of the LHS. Therefore, the result should have 5142 // at least as many sign bits as the left hand side. 5143 return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); 5144 case ISD::TRUNCATE: { 5145 // Check if the sign bits of source go down as far as the truncated value. 5146 unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); 5147 unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); 5148 if (NumSrcSignBits > (NumSrcBits - VTBits)) 5149 return NumSrcSignBits - (NumSrcBits - VTBits); 5150 break; 5151 } 5152 case ISD::EXTRACT_ELEMENT: { 5153 if (VT.isScalableVector()) 5154 break; 5155 const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); 5156 const int BitWidth = Op.getValueSizeInBits(); 5157 const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; 5158 5159 // Get reverse index (starting from 1), Op1 value indexes elements from 5160 // little end. Sign starts at big end. 5161 const int rIndex = Items - 1 - Op.getConstantOperandVal(1); 5162 5163 // If the sign portion ends in our element the subtraction gives correct 5164 // result. Otherwise it gives either negative or > bitwidth result 5165 return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); 5166 } 5167 case ISD::INSERT_VECTOR_ELT: { 5168 if (VT.isScalableVector()) 5169 break; 5170 // If we know the element index, split the demand between the 5171 // source vector and the inserted element, otherwise assume we need 5172 // the original demanded vector elements and the value. 5173 SDValue InVec = Op.getOperand(0); 5174 SDValue InVal = Op.getOperand(1); 5175 SDValue EltNo = Op.getOperand(2); 5176 bool DemandedVal = true; 5177 APInt DemandedVecElts = DemandedElts; 5178 auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); 5179 if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { 5180 unsigned EltIdx = CEltNo->getZExtValue(); 5181 DemandedVal = !!DemandedElts[EltIdx]; 5182 DemandedVecElts.clearBit(EltIdx); 5183 } 5184 Tmp = std::numeric_limits<unsigned>::max(); 5185 if (DemandedVal) { 5186 // TODO - handle implicit truncation of inserted elements. 5187 if (InVal.getScalarValueSizeInBits() != VTBits) 5188 break; 5189 Tmp2 = ComputeNumSignBits(InVal, Depth + 1); 5190 Tmp = std::min(Tmp, Tmp2); 5191 } 5192 if (!!DemandedVecElts) { 5193 Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1); 5194 Tmp = std::min(Tmp, Tmp2); 5195 } 5196 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5197 return Tmp; 5198 } 5199 case ISD::EXTRACT_VECTOR_ELT: { 5200 assert(!VT.isScalableVector()); 5201 SDValue InVec = Op.getOperand(0); 5202 SDValue EltNo = Op.getOperand(1); 5203 EVT VecVT = InVec.getValueType(); 5204 // ComputeNumSignBits not yet implemented for scalable vectors. 5205 if (VecVT.isScalableVector()) 5206 break; 5207 const unsigned BitWidth = Op.getValueSizeInBits(); 5208 const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); 5209 const unsigned NumSrcElts = VecVT.getVectorNumElements(); 5210 5211 // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know 5212 // anything about sign bits. But if the sizes match we can derive knowledge 5213 // about sign bits from the vector operand. 5214 if (BitWidth != EltBitWidth) 5215 break; 5216 5217 // If we know the element index, just demand that vector element, else for 5218 // an unknown element index, ignore DemandedElts and demand them all. 5219 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); 5220 auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 5221 if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) 5222 DemandedSrcElts = 5223 APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); 5224 5225 return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); 5226 } 5227 case ISD::EXTRACT_SUBVECTOR: { 5228 // Offset the demanded elts by the subvector index. 5229 SDValue Src = Op.getOperand(0); 5230 // Bail until we can represent demanded elements for scalable vectors. 5231 if (Src.getValueType().isScalableVector()) 5232 break; 5233 uint64_t Idx = Op.getConstantOperandVal(1); 5234 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 5235 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); 5236 return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); 5237 } 5238 case ISD::CONCAT_VECTORS: { 5239 if (VT.isScalableVector()) 5240 break; 5241 // Determine the minimum number of sign bits across all demanded 5242 // elts of the input vectors. Early out if the result is already 1. 5243 Tmp = std::numeric_limits<unsigned>::max(); 5244 EVT SubVectorVT = Op.getOperand(0).getValueType(); 5245 unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); 5246 unsigned NumSubVectors = Op.getNumOperands(); 5247 for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { 5248 APInt DemandedSub = 5249 DemandedElts.extractBits(NumSubVectorElts, i * NumSubVectorElts); 5250 if (!DemandedSub) 5251 continue; 5252 Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); 5253 Tmp = std::min(Tmp, Tmp2); 5254 } 5255 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5256 return Tmp; 5257 } 5258 case ISD::INSERT_SUBVECTOR: { 5259 if (VT.isScalableVector()) 5260 break; 5261 // Demand any elements from the subvector and the remainder from the src its 5262 // inserted into. 5263 SDValue Src = Op.getOperand(0); 5264 SDValue Sub = Op.getOperand(1); 5265 uint64_t Idx = Op.getConstantOperandVal(2); 5266 unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); 5267 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); 5268 APInt DemandedSrcElts = DemandedElts; 5269 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); 5270 5271 Tmp = std::numeric_limits<unsigned>::max(); 5272 if (!!DemandedSubElts) { 5273 Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); 5274 if (Tmp == 1) 5275 return 1; // early-out 5276 } 5277 if (!!DemandedSrcElts) { 5278 Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); 5279 Tmp = std::min(Tmp, Tmp2); 5280 } 5281 assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); 5282 return Tmp; 5283 } 5284 case ISD::LOAD: { 5285 LoadSDNode *LD = cast<LoadSDNode>(Op); 5286 if (const MDNode *Ranges = LD->getRanges()) { 5287 if (DemandedElts != 1) 5288 break; 5289 5290 ConstantRange CR = getConstantRangeFromMetadata(*Ranges); 5291 if (VTBits > CR.getBitWidth()) { 5292 switch (LD->getExtensionType()) { 5293 case ISD::SEXTLOAD: 5294 CR = CR.signExtend(VTBits); 5295 break; 5296 case ISD::ZEXTLOAD: 5297 CR = CR.zeroExtend(VTBits); 5298 break; 5299 default: 5300 break; 5301 } 5302 } 5303 5304 if (VTBits != CR.getBitWidth()) 5305 break; 5306 return std::min(CR.getSignedMin().getNumSignBits(), 5307 CR.getSignedMax().getNumSignBits()); 5308 } 5309 5310 break; 5311 } 5312 case ISD::ATOMIC_CMP_SWAP: 5313 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 5314 case ISD::ATOMIC_SWAP: 5315 case ISD::ATOMIC_LOAD_ADD: 5316 case ISD::ATOMIC_LOAD_SUB: 5317 case ISD::ATOMIC_LOAD_AND: 5318 case ISD::ATOMIC_LOAD_CLR: 5319 case ISD::ATOMIC_LOAD_OR: 5320 case ISD::ATOMIC_LOAD_XOR: 5321 case ISD::ATOMIC_LOAD_NAND: 5322 case ISD::ATOMIC_LOAD_MIN: 5323 case ISD::ATOMIC_LOAD_MAX: 5324 case ISD::ATOMIC_LOAD_UMIN: 5325 case ISD::ATOMIC_LOAD_UMAX: 5326 case ISD::ATOMIC_LOAD: { 5327 Tmp = cast<AtomicSDNode>(Op)->getMemoryVT().getScalarSizeInBits(); 5328 // If we are looking at the loaded value. 5329 if (Op.getResNo() == 0) { 5330 if (Tmp == VTBits) 5331 return 1; // early-out 5332 if (TLI->getExtendForAtomicOps() == ISD::SIGN_EXTEND) 5333 return VTBits - Tmp + 1; 5334 if (TLI->getExtendForAtomicOps() == ISD::ZERO_EXTEND) 5335 return VTBits - Tmp; 5336 if (Op->getOpcode() == ISD::ATOMIC_LOAD) { 5337 ISD::LoadExtType ETy = cast<AtomicSDNode>(Op)->getExtensionType(); 5338 if (ETy == ISD::SEXTLOAD) 5339 return VTBits - Tmp + 1; 5340 if (ETy == ISD::ZEXTLOAD) 5341 return VTBits - Tmp; 5342 } 5343 } 5344 break; 5345 } 5346 } 5347 5348 // If we are looking at the loaded value of the SDNode. 5349 if (Op.getResNo() == 0) { 5350 // Handle LOADX separately here. EXTLOAD case will fallthrough. 5351 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 5352 unsigned ExtType = LD->getExtensionType(); 5353 switch (ExtType) { 5354 default: break; 5355 case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. 5356 Tmp = LD->getMemoryVT().getScalarSizeInBits(); 5357 return VTBits - Tmp + 1; 5358 case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. 5359 Tmp = LD->getMemoryVT().getScalarSizeInBits(); 5360 return VTBits - Tmp; 5361 case ISD::NON_EXTLOAD: 5362 if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { 5363 // We only need to handle vectors - computeKnownBits should handle 5364 // scalar cases. 5365 Type *CstTy = Cst->getType(); 5366 if (CstTy->isVectorTy() && !VT.isScalableVector() && 5367 (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && 5368 VTBits == CstTy->getScalarSizeInBits()) { 5369 Tmp = VTBits; 5370 for (unsigned i = 0; i != NumElts; ++i) { 5371 if (!DemandedElts[i]) 5372 continue; 5373 if (Constant *Elt = Cst->getAggregateElement(i)) { 5374 if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { 5375 const APInt &Value = CInt->getValue(); 5376 Tmp = std::min(Tmp, Value.getNumSignBits()); 5377 continue; 5378 } 5379 if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { 5380 APInt Value = CFP->getValueAPF().bitcastToAPInt(); 5381 Tmp = std::min(Tmp, Value.getNumSignBits()); 5382 continue; 5383 } 5384 } 5385 // Unknown type. Conservatively assume no bits match sign bit. 5386 return 1; 5387 } 5388 return Tmp; 5389 } 5390 } 5391 break; 5392 } 5393 } 5394 } 5395 5396 // Allow the target to implement this method for its nodes. 5397 if (Opcode >= ISD::BUILTIN_OP_END || 5398 Opcode == ISD::INTRINSIC_WO_CHAIN || 5399 Opcode == ISD::INTRINSIC_W_CHAIN || 5400 Opcode == ISD::INTRINSIC_VOID) { 5401 // TODO: This can probably be removed once target code is audited. This 5402 // is here purely to reduce patch size and review complexity. 5403 if (!VT.isScalableVector()) { 5404 unsigned NumBits = 5405 TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); 5406 if (NumBits > 1) 5407 FirstAnswer = std::max(FirstAnswer, NumBits); 5408 } 5409 } 5410 5411 // Finally, if we can prove that the top bits of the result are 0's or 1's, 5412 // use this information. 5413 KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); 5414 return std::max(FirstAnswer, Known.countMinSignBits()); 5415 } 5416 5417 unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, 5418 unsigned Depth) const { 5419 unsigned SignBits = ComputeNumSignBits(Op, Depth); 5420 return Op.getScalarValueSizeInBits() - SignBits + 1; 5421 } 5422 5423 unsigned SelectionDAG::ComputeMaxSignificantBits(SDValue Op, 5424 const APInt &DemandedElts, 5425 unsigned Depth) const { 5426 unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); 5427 return Op.getScalarValueSizeInBits() - SignBits + 1; 5428 } 5429 5430 bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, 5431 unsigned Depth) const { 5432 // Early out for FREEZE. 5433 if (Op.getOpcode() == ISD::FREEZE) 5434 return true; 5435 5436 EVT VT = Op.getValueType(); 5437 APInt DemandedElts = VT.isFixedLengthVector() 5438 ? APInt::getAllOnes(VT.getVectorNumElements()) 5439 : APInt(1, 1); 5440 return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); 5441 } 5442 5443 bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, 5444 const APInt &DemandedElts, 5445 bool PoisonOnly, 5446 unsigned Depth) const { 5447 unsigned Opcode = Op.getOpcode(); 5448 5449 // Early out for FREEZE. 5450 if (Opcode == ISD::FREEZE) 5451 return true; 5452 5453 if (Depth >= MaxRecursionDepth) 5454 return false; // Limit search depth. 5455 5456 if (isIntOrFPConstant(Op)) 5457 return true; 5458 5459 switch (Opcode) { 5460 case ISD::CONDCODE: 5461 case ISD::VALUETYPE: 5462 case ISD::FrameIndex: 5463 case ISD::TargetFrameIndex: 5464 case ISD::CopyFromReg: 5465 return true; 5466 5467 case ISD::UNDEF: 5468 return PoisonOnly; 5469 5470 case ISD::BUILD_VECTOR: 5471 // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - 5472 // this shouldn't affect the result. 5473 for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { 5474 if (!DemandedElts[i]) 5475 continue; 5476 if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly, 5477 Depth + 1)) 5478 return false; 5479 } 5480 return true; 5481 5482 case ISD::SPLAT_VECTOR: 5483 return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly, 5484 Depth + 1); 5485 5486 case ISD::VECTOR_SHUFFLE: { 5487 APInt DemandedLHS, DemandedRHS; 5488 auto *SVN = cast<ShuffleVectorSDNode>(Op); 5489 if (!getShuffleDemandedElts(DemandedElts.getBitWidth(), SVN->getMask(), 5490 DemandedElts, DemandedLHS, DemandedRHS, 5491 /*AllowUndefElts=*/false)) 5492 return false; 5493 if (!DemandedLHS.isZero() && 5494 !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedLHS, 5495 PoisonOnly, Depth + 1)) 5496 return false; 5497 if (!DemandedRHS.isZero() && 5498 !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedRHS, 5499 PoisonOnly, Depth + 1)) 5500 return false; 5501 return true; 5502 } 5503 5504 // TODO: Search for noundef attributes from library functions. 5505 5506 // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. 5507 5508 default: 5509 // Allow the target to implement this method for its nodes. 5510 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 5511 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 5512 return TLI->isGuaranteedNotToBeUndefOrPoisonForTargetNode( 5513 Op, DemandedElts, *this, PoisonOnly, Depth); 5514 break; 5515 } 5516 5517 // If Op can't create undef/poison and none of its operands are undef/poison 5518 // then Op is never undef/poison. 5519 // NOTE: TargetNodes can handle this in themselves in 5520 // isGuaranteedNotToBeUndefOrPoisonForTargetNode or let 5521 // TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode handle it. 5522 return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, 5523 Depth) && 5524 all_of(Op->ops(), [&](SDValue V) { 5525 return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1); 5526 }); 5527 } 5528 5529 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, 5530 bool ConsiderFlags, 5531 unsigned Depth) const { 5532 EVT VT = Op.getValueType(); 5533 APInt DemandedElts = VT.isFixedLengthVector() 5534 ? APInt::getAllOnes(VT.getVectorNumElements()) 5535 : APInt(1, 1); 5536 return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, 5537 Depth); 5538 } 5539 5540 bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, 5541 bool PoisonOnly, bool ConsiderFlags, 5542 unsigned Depth) const { 5543 if (ConsiderFlags && Op->hasPoisonGeneratingFlags()) 5544 return true; 5545 5546 unsigned Opcode = Op.getOpcode(); 5547 switch (Opcode) { 5548 case ISD::FREEZE: 5549 case ISD::CONCAT_VECTORS: 5550 case ISD::INSERT_SUBVECTOR: 5551 case ISD::SADDSAT: 5552 case ISD::UADDSAT: 5553 case ISD::SSUBSAT: 5554 case ISD::USUBSAT: 5555 case ISD::MULHU: 5556 case ISD::MULHS: 5557 case ISD::SMIN: 5558 case ISD::SMAX: 5559 case ISD::UMIN: 5560 case ISD::UMAX: 5561 case ISD::AND: 5562 case ISD::XOR: 5563 case ISD::ROTL: 5564 case ISD::ROTR: 5565 case ISD::FSHL: 5566 case ISD::FSHR: 5567 case ISD::BSWAP: 5568 case ISD::CTPOP: 5569 case ISD::BITREVERSE: 5570 case ISD::PARITY: 5571 case ISD::SIGN_EXTEND: 5572 case ISD::TRUNCATE: 5573 case ISD::SIGN_EXTEND_INREG: 5574 case ISD::SIGN_EXTEND_VECTOR_INREG: 5575 case ISD::ZERO_EXTEND_VECTOR_INREG: 5576 case ISD::BITCAST: 5577 case ISD::BUILD_VECTOR: 5578 case ISD::BUILD_PAIR: 5579 case ISD::SPLAT_VECTOR: 5580 return false; 5581 5582 case ISD::SELECT_CC: 5583 case ISD::SETCC: { 5584 // Integer setcc cannot create undef or poison. 5585 if (Op.getOperand(0).getValueType().isInteger()) 5586 return false; 5587 5588 // FP compares are more complicated. They can create poison for nan/infinity 5589 // based on options and flags. The options and flags also cause special 5590 // nonan condition codes to be used. Those condition codes may be preserved 5591 // even if the nonan flag is dropped somewhere. 5592 unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4; 5593 ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get(); 5594 if (((unsigned)CCCode & 0x10U)) 5595 return true; 5596 5597 const TargetOptions &Options = getTarget().Options; 5598 return Options.NoNaNsFPMath || Options.NoInfsFPMath; 5599 } 5600 5601 case ISD::OR: 5602 case ISD::ZERO_EXTEND: 5603 case ISD::ADD: 5604 case ISD::SUB: 5605 case ISD::MUL: 5606 // No poison except from flags (which is handled above) 5607 return false; 5608 5609 case ISD::SHL: 5610 case ISD::SRL: 5611 case ISD::SRA: 5612 // If the max shift amount isn't in range, then the shift can 5613 // create poison. 5614 return !isGuaranteedNotToBeUndefOrPoison(Op.getOperand(1), DemandedElts, 5615 PoisonOnly, Depth + 1) || 5616 !getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1); 5617 5618 case ISD::SCALAR_TO_VECTOR: 5619 // Check if we demand any upper (undef) elements. 5620 return !PoisonOnly && DemandedElts.ugt(1); 5621 5622 case ISD::INSERT_VECTOR_ELT: 5623 case ISD::EXTRACT_VECTOR_ELT: { 5624 // Ensure that the element index is in bounds. 5625 EVT VecVT = Op.getOperand(0).getValueType(); 5626 SDValue Idx = Op.getOperand(Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1); 5627 if (isGuaranteedNotToBeUndefOrPoison(Idx, DemandedElts, PoisonOnly, 5628 Depth + 1)) { 5629 KnownBits KnownIdx = computeKnownBits(Idx, Depth + 1); 5630 return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); 5631 } 5632 return true; 5633 } 5634 5635 case ISD::VECTOR_SHUFFLE: { 5636 // Check for any demanded shuffle element that is undef. 5637 auto *SVN = cast<ShuffleVectorSDNode>(Op); 5638 for (auto [Idx, Elt] : enumerate(SVN->getMask())) 5639 if (Elt < 0 && DemandedElts[Idx]) 5640 return true; 5641 return false; 5642 } 5643 5644 default: 5645 // Allow the target to implement this method for its nodes. 5646 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || 5647 Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) 5648 return TLI->canCreateUndefOrPoisonForTargetNode( 5649 Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth); 5650 break; 5651 } 5652 5653 // Be conservative and return true. 5654 return true; 5655 } 5656 5657 bool SelectionDAG::isADDLike(SDValue Op, bool NoWrap) const { 5658 unsigned Opcode = Op.getOpcode(); 5659 if (Opcode == ISD::OR) 5660 return Op->getFlags().hasDisjoint() || 5661 haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); 5662 if (Opcode == ISD::XOR) 5663 return !NoWrap && isMinSignedConstant(Op.getOperand(1)); 5664 return false; 5665 } 5666 5667 bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { 5668 return Op.getNumOperands() == 2 && isa<ConstantSDNode>(Op.getOperand(1)) && 5669 (Op.getOpcode() == ISD::ADD || isADDLike(Op)); 5670 } 5671 5672 bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const { 5673 // If we're told that NaNs won't happen, assume they won't. 5674 if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) 5675 return true; 5676 5677 if (Depth >= MaxRecursionDepth) 5678 return false; // Limit search depth. 5679 5680 // If the value is a constant, we can obviously see if it is a NaN or not. 5681 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { 5682 return !C->getValueAPF().isNaN() || 5683 (SNaN && !C->getValueAPF().isSignaling()); 5684 } 5685 5686 unsigned Opcode = Op.getOpcode(); 5687 switch (Opcode) { 5688 case ISD::FADD: 5689 case ISD::FSUB: 5690 case ISD::FMUL: 5691 case ISD::FDIV: 5692 case ISD::FREM: 5693 case ISD::FSIN: 5694 case ISD::FCOS: 5695 case ISD::FTAN: 5696 case ISD::FASIN: 5697 case ISD::FACOS: 5698 case ISD::FATAN: 5699 case ISD::FATAN2: 5700 case ISD::FSINH: 5701 case ISD::FCOSH: 5702 case ISD::FTANH: 5703 case ISD::FMA: 5704 case ISD::FMAD: { 5705 if (SNaN) 5706 return true; 5707 // TODO: Need isKnownNeverInfinity 5708 return false; 5709 } 5710 case ISD::FCANONICALIZE: 5711 case ISD::FEXP: 5712 case ISD::FEXP2: 5713 case ISD::FEXP10: 5714 case ISD::FTRUNC: 5715 case ISD::FFLOOR: 5716 case ISD::FCEIL: 5717 case ISD::FROUND: 5718 case ISD::FROUNDEVEN: 5719 case ISD::LROUND: 5720 case ISD::LLROUND: 5721 case ISD::FRINT: 5722 case ISD::LRINT: 5723 case ISD::LLRINT: 5724 case ISD::FNEARBYINT: 5725 case ISD::FLDEXP: { 5726 if (SNaN) 5727 return true; 5728 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 5729 } 5730 case ISD::FABS: 5731 case ISD::FNEG: 5732 case ISD::FCOPYSIGN: { 5733 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 5734 } 5735 case ISD::SELECT: 5736 return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 5737 isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 5738 case ISD::FP_EXTEND: 5739 case ISD::FP_ROUND: { 5740 if (SNaN) 5741 return true; 5742 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 5743 } 5744 case ISD::SINT_TO_FP: 5745 case ISD::UINT_TO_FP: 5746 return true; 5747 case ISD::FSQRT: // Need is known positive 5748 case ISD::FLOG: 5749 case ISD::FLOG2: 5750 case ISD::FLOG10: 5751 case ISD::FPOWI: 5752 case ISD::FPOW: { 5753 if (SNaN) 5754 return true; 5755 // TODO: Refine on operand 5756 return false; 5757 } 5758 case ISD::FMINNUM: 5759 case ISD::FMAXNUM: 5760 case ISD::FMINIMUMNUM: 5761 case ISD::FMAXIMUMNUM: { 5762 // Only one needs to be known not-nan, since it will be returned if the 5763 // other ends up being one. 5764 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) || 5765 isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 5766 } 5767 case ISD::FMINNUM_IEEE: 5768 case ISD::FMAXNUM_IEEE: { 5769 if (SNaN) 5770 return true; 5771 // This can return a NaN if either operand is an sNaN, or if both operands 5772 // are NaN. 5773 return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) && 5774 isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) || 5775 (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) && 5776 isKnownNeverSNaN(Op.getOperand(0), Depth + 1)); 5777 } 5778 case ISD::FMINIMUM: 5779 case ISD::FMAXIMUM: { 5780 // TODO: Does this quiet or return the origina NaN as-is? 5781 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 5782 isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 5783 } 5784 case ISD::EXTRACT_VECTOR_ELT: { 5785 return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 5786 } 5787 case ISD::BUILD_VECTOR: { 5788 for (const SDValue &Opnd : Op->ops()) 5789 if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1)) 5790 return false; 5791 return true; 5792 } 5793 default: 5794 if (Opcode >= ISD::BUILTIN_OP_END || 5795 Opcode == ISD::INTRINSIC_WO_CHAIN || 5796 Opcode == ISD::INTRINSIC_W_CHAIN || 5797 Opcode == ISD::INTRINSIC_VOID) { 5798 return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth); 5799 } 5800 5801 return false; 5802 } 5803 } 5804 5805 bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { 5806 assert(Op.getValueType().isFloatingPoint() && 5807 "Floating point type expected"); 5808 5809 // If the value is a constant, we can obviously see if it is a zero or not. 5810 return ISD::matchUnaryFpPredicate( 5811 Op, [](ConstantFPSDNode *C) { return !C->isZero(); }); 5812 } 5813 5814 bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { 5815 if (Depth >= MaxRecursionDepth) 5816 return false; // Limit search depth. 5817 5818 assert(!Op.getValueType().isFloatingPoint() && 5819 "Floating point types unsupported - use isKnownNeverZeroFloat"); 5820 5821 // If the value is a constant, we can obviously see if it is a zero or not. 5822 if (ISD::matchUnaryPredicate(Op, 5823 [](ConstantSDNode *C) { return !C->isZero(); })) 5824 return true; 5825 5826 // TODO: Recognize more cases here. Most of the cases are also incomplete to 5827 // some degree. 5828 switch (Op.getOpcode()) { 5829 default: 5830 break; 5831 5832 case ISD::OR: 5833 return isKnownNeverZero(Op.getOperand(1), Depth + 1) || 5834 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5835 5836 case ISD::VSELECT: 5837 case ISD::SELECT: 5838 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5839 isKnownNeverZero(Op.getOperand(2), Depth + 1); 5840 5841 case ISD::SHL: { 5842 if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) 5843 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5844 KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); 5845 // 1 << X is never zero. 5846 if (ValKnown.One[0]) 5847 return true; 5848 // If max shift cnt of known ones is non-zero, result is non-zero. 5849 APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); 5850 if (MaxCnt.ult(ValKnown.getBitWidth()) && 5851 !ValKnown.One.shl(MaxCnt).isZero()) 5852 return true; 5853 break; 5854 } 5855 case ISD::UADDSAT: 5856 case ISD::UMAX: 5857 return isKnownNeverZero(Op.getOperand(1), Depth + 1) || 5858 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5859 5860 // For smin/smax: If either operand is known negative/positive 5861 // respectively we don't need the other to be known at all. 5862 case ISD::SMAX: { 5863 KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); 5864 if (Op1.isStrictlyPositive()) 5865 return true; 5866 5867 KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); 5868 if (Op0.isStrictlyPositive()) 5869 return true; 5870 5871 if (Op1.isNonZero() && Op0.isNonZero()) 5872 return true; 5873 5874 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5875 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5876 } 5877 case ISD::SMIN: { 5878 KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1); 5879 if (Op1.isNegative()) 5880 return true; 5881 5882 KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1); 5883 if (Op0.isNegative()) 5884 return true; 5885 5886 if (Op1.isNonZero() && Op0.isNonZero()) 5887 return true; 5888 5889 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5890 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5891 } 5892 case ISD::UMIN: 5893 return isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5894 isKnownNeverZero(Op.getOperand(0), Depth + 1); 5895 5896 case ISD::ROTL: 5897 case ISD::ROTR: 5898 case ISD::BITREVERSE: 5899 case ISD::BSWAP: 5900 case ISD::CTPOP: 5901 case ISD::ABS: 5902 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5903 5904 case ISD::SRA: 5905 case ISD::SRL: { 5906 if (Op->getFlags().hasExact()) 5907 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5908 KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); 5909 if (ValKnown.isNegative()) 5910 return true; 5911 // If max shift cnt of known ones is non-zero, result is non-zero. 5912 APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); 5913 if (MaxCnt.ult(ValKnown.getBitWidth()) && 5914 !ValKnown.One.lshr(MaxCnt).isZero()) 5915 return true; 5916 break; 5917 } 5918 case ISD::UDIV: 5919 case ISD::SDIV: 5920 // div exact can only produce a zero if the dividend is zero. 5921 // TODO: For udiv this is also true if Op1 u<= Op0 5922 if (Op->getFlags().hasExact()) 5923 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5924 break; 5925 5926 case ISD::ADD: 5927 if (Op->getFlags().hasNoUnsignedWrap()) 5928 if (isKnownNeverZero(Op.getOperand(1), Depth + 1) || 5929 isKnownNeverZero(Op.getOperand(0), Depth + 1)) 5930 return true; 5931 // TODO: There are a lot more cases we can prove for add. 5932 break; 5933 5934 case ISD::SUB: { 5935 if (isNullConstant(Op.getOperand(0))) 5936 return isKnownNeverZero(Op.getOperand(1), Depth + 1); 5937 5938 std::optional<bool> ne = 5939 KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1), 5940 computeKnownBits(Op.getOperand(1), Depth + 1)); 5941 return ne && *ne; 5942 } 5943 5944 case ISD::MUL: 5945 if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) 5946 if (isKnownNeverZero(Op.getOperand(1), Depth + 1) && 5947 isKnownNeverZero(Op.getOperand(0), Depth + 1)) 5948 return true; 5949 break; 5950 5951 case ISD::ZERO_EXTEND: 5952 case ISD::SIGN_EXTEND: 5953 return isKnownNeverZero(Op.getOperand(0), Depth + 1); 5954 case ISD::VSCALE: { 5955 const Function &F = getMachineFunction().getFunction(); 5956 const APInt &Multiplier = Op.getConstantOperandAPInt(0); 5957 ConstantRange CR = 5958 getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier); 5959 if (!CR.contains(APInt(CR.getBitWidth(), 0))) 5960 return true; 5961 break; 5962 } 5963 } 5964 5965 return computeKnownBits(Op, Depth).isNonZero(); 5966 } 5967 5968 bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const { 5969 if (ConstantFPSDNode *C1 = isConstOrConstSplatFP(Op, true)) 5970 return !C1->isNegative(); 5971 5972 return Op.getOpcode() == ISD::FABS; 5973 } 5974 5975 bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { 5976 // Check the obvious case. 5977 if (A == B) return true; 5978 5979 // For negative and positive zero. 5980 if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) 5981 if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) 5982 if (CA->isZero() && CB->isZero()) return true; 5983 5984 // Otherwise they may not be equal. 5985 return false; 5986 } 5987 5988 // Only bits set in Mask must be negated, other bits may be arbitrary. 5989 SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) { 5990 if (isBitwiseNot(V, AllowUndefs)) 5991 return V.getOperand(0); 5992 5993 // Handle any_extend (not (truncate X)) pattern, where Mask only sets 5994 // bits in the non-extended part. 5995 ConstantSDNode *MaskC = isConstOrConstSplat(Mask); 5996 if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND) 5997 return SDValue(); 5998 SDValue ExtArg = V.getOperand(0); 5999 if (ExtArg.getScalarValueSizeInBits() >= 6000 MaskC->getAPIntValue().getActiveBits() && 6001 isBitwiseNot(ExtArg, AllowUndefs) && 6002 ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE && 6003 ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType()) 6004 return ExtArg.getOperand(0).getOperand(0); 6005 return SDValue(); 6006 } 6007 6008 static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) { 6009 // Match masked merge pattern (X & ~M) op (Y & M) 6010 // Including degenerate case (X & ~M) op M 6011 auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask, 6012 SDValue Other) { 6013 if (SDValue NotOperand = 6014 getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) { 6015 if (NotOperand->getOpcode() == ISD::ZERO_EXTEND || 6016 NotOperand->getOpcode() == ISD::TRUNCATE) 6017 NotOperand = NotOperand->getOperand(0); 6018 6019 if (Other == NotOperand) 6020 return true; 6021 if (Other->getOpcode() == ISD::AND) 6022 return NotOperand == Other->getOperand(0) || 6023 NotOperand == Other->getOperand(1); 6024 } 6025 return false; 6026 }; 6027 6028 if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE) 6029 A = A->getOperand(0); 6030 6031 if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE) 6032 B = B->getOperand(0); 6033 6034 if (A->getOpcode() == ISD::AND) 6035 return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) || 6036 MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B); 6037 return false; 6038 } 6039 6040 // FIXME: unify with llvm::haveNoCommonBitsSet. 6041 bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { 6042 assert(A.getValueType() == B.getValueType() && 6043 "Values must have the same type"); 6044 if (haveNoCommonBitsSetCommutative(A, B) || 6045 haveNoCommonBitsSetCommutative(B, A)) 6046 return true; 6047 return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), 6048 computeKnownBits(B)); 6049 } 6050 6051 static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, 6052 SelectionDAG &DAG) { 6053 if (cast<ConstantSDNode>(Step)->isZero()) 6054 return DAG.getConstant(0, DL, VT); 6055 6056 return SDValue(); 6057 } 6058 6059 static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, 6060 ArrayRef<SDValue> Ops, 6061 SelectionDAG &DAG) { 6062 int NumOps = Ops.size(); 6063 assert(NumOps != 0 && "Can't build an empty vector!"); 6064 assert(!VT.isScalableVector() && 6065 "BUILD_VECTOR cannot be used with scalable types"); 6066 assert(VT.getVectorNumElements() == (unsigned)NumOps && 6067 "Incorrect element count in BUILD_VECTOR!"); 6068 6069 // BUILD_VECTOR of UNDEFs is UNDEF. 6070 if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) 6071 return DAG.getUNDEF(VT); 6072 6073 // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. 6074 SDValue IdentitySrc; 6075 bool IsIdentity = true; 6076 for (int i = 0; i != NumOps; ++i) { 6077 if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || 6078 Ops[i].getOperand(0).getValueType() != VT || 6079 (IdentitySrc && Ops[i].getOperand(0) != IdentitySrc) || 6080 !isa<ConstantSDNode>(Ops[i].getOperand(1)) || 6081 Ops[i].getConstantOperandAPInt(1) != i) { 6082 IsIdentity = false; 6083 break; 6084 } 6085 IdentitySrc = Ops[i].getOperand(0); 6086 } 6087 if (IsIdentity) 6088 return IdentitySrc; 6089 6090 return SDValue(); 6091 } 6092 6093 /// Try to simplify vector concatenation to an input value, undef, or build 6094 /// vector. 6095 static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, 6096 ArrayRef<SDValue> Ops, 6097 SelectionDAG &DAG) { 6098 assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); 6099 assert(llvm::all_of(Ops, 6100 [Ops](SDValue Op) { 6101 return Ops[0].getValueType() == Op.getValueType(); 6102 }) && 6103 "Concatenation of vectors with inconsistent value types!"); 6104 assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == 6105 VT.getVectorElementCount() && 6106 "Incorrect element count in vector concatenation!"); 6107 6108 if (Ops.size() == 1) 6109 return Ops[0]; 6110 6111 // Concat of UNDEFs is UNDEF. 6112 if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) 6113 return DAG.getUNDEF(VT); 6114 6115 // Scan the operands and look for extract operations from a single source 6116 // that correspond to insertion at the same location via this concatenation: 6117 // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... 6118 SDValue IdentitySrc; 6119 bool IsIdentity = true; 6120 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 6121 SDValue Op = Ops[i]; 6122 unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); 6123 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || 6124 Op.getOperand(0).getValueType() != VT || 6125 (IdentitySrc && Op.getOperand(0) != IdentitySrc) || 6126 Op.getConstantOperandVal(1) != IdentityIndex) { 6127 IsIdentity = false; 6128 break; 6129 } 6130 assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && 6131 "Unexpected identity source vector for concat of extracts"); 6132 IdentitySrc = Op.getOperand(0); 6133 } 6134 if (IsIdentity) { 6135 assert(IdentitySrc && "Failed to set source vector of extracts"); 6136 return IdentitySrc; 6137 } 6138 6139 // The code below this point is only designed to work for fixed width 6140 // vectors, so we bail out for now. 6141 if (VT.isScalableVector()) 6142 return SDValue(); 6143 6144 // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be 6145 // simplified to one big BUILD_VECTOR. 6146 // FIXME: Add support for SCALAR_TO_VECTOR as well. 6147 EVT SVT = VT.getScalarType(); 6148 SmallVector<SDValue, 16> Elts; 6149 for (SDValue Op : Ops) { 6150 EVT OpVT = Op.getValueType(); 6151 if (Op.isUndef()) 6152 Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); 6153 else if (Op.getOpcode() == ISD::BUILD_VECTOR) 6154 Elts.append(Op->op_begin(), Op->op_end()); 6155 else 6156 return SDValue(); 6157 } 6158 6159 // BUILD_VECTOR requires all inputs to be of the same type, find the 6160 // maximum type and extend them all. 6161 for (SDValue Op : Elts) 6162 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 6163 6164 if (SVT.bitsGT(VT.getScalarType())) { 6165 for (SDValue &Op : Elts) { 6166 if (Op.isUndef()) 6167 Op = DAG.getUNDEF(SVT); 6168 else 6169 Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) 6170 ? DAG.getZExtOrTrunc(Op, DL, SVT) 6171 : DAG.getSExtOrTrunc(Op, DL, SVT); 6172 } 6173 } 6174 6175 SDValue V = DAG.getBuildVector(VT, DL, Elts); 6176 NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); 6177 return V; 6178 } 6179 6180 /// Gets or creates the specified node. 6181 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { 6182 SDVTList VTs = getVTList(VT); 6183 FoldingSetNodeID ID; 6184 AddNodeIDNode(ID, Opcode, VTs, {}); 6185 void *IP = nullptr; 6186 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 6187 return SDValue(E, 0); 6188 6189 auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6190 CSEMap.InsertNode(N, IP); 6191 6192 InsertNode(N); 6193 SDValue V = SDValue(N, 0); 6194 NewSDValueDbgMsg(V, "Creating new node: ", this); 6195 return V; 6196 } 6197 6198 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 6199 SDValue N1) { 6200 SDNodeFlags Flags; 6201 if (Inserter) 6202 Flags = Inserter->getFlags(); 6203 return getNode(Opcode, DL, VT, N1, Flags); 6204 } 6205 6206 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 6207 SDValue N1, const SDNodeFlags Flags) { 6208 assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); 6209 6210 // Constant fold unary operations with a vector integer or float operand. 6211 switch (Opcode) { 6212 default: 6213 // FIXME: Entirely reasonable to perform folding of other unary 6214 // operations here as the need arises. 6215 break; 6216 case ISD::FNEG: 6217 case ISD::FABS: 6218 case ISD::FCEIL: 6219 case ISD::FTRUNC: 6220 case ISD::FFLOOR: 6221 case ISD::FP_EXTEND: 6222 case ISD::FP_TO_SINT: 6223 case ISD::FP_TO_UINT: 6224 case ISD::FP_TO_FP16: 6225 case ISD::FP_TO_BF16: 6226 case ISD::TRUNCATE: 6227 case ISD::ANY_EXTEND: 6228 case ISD::ZERO_EXTEND: 6229 case ISD::SIGN_EXTEND: 6230 case ISD::UINT_TO_FP: 6231 case ISD::SINT_TO_FP: 6232 case ISD::FP16_TO_FP: 6233 case ISD::BF16_TO_FP: 6234 case ISD::BITCAST: 6235 case ISD::ABS: 6236 case ISD::BITREVERSE: 6237 case ISD::BSWAP: 6238 case ISD::CTLZ: 6239 case ISD::CTLZ_ZERO_UNDEF: 6240 case ISD::CTTZ: 6241 case ISD::CTTZ_ZERO_UNDEF: 6242 case ISD::CTPOP: 6243 case ISD::STEP_VECTOR: { 6244 SDValue Ops = {N1}; 6245 if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) 6246 return Fold; 6247 } 6248 } 6249 6250 unsigned OpOpcode = N1.getNode()->getOpcode(); 6251 switch (Opcode) { 6252 case ISD::STEP_VECTOR: 6253 assert(VT.isScalableVector() && 6254 "STEP_VECTOR can only be used with scalable types"); 6255 assert(OpOpcode == ISD::TargetConstant && 6256 VT.getVectorElementType() == N1.getValueType() && 6257 "Unexpected step operand"); 6258 break; 6259 case ISD::FREEZE: 6260 assert(VT == N1.getValueType() && "Unexpected VT!"); 6261 if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false, 6262 /*Depth*/ 1)) 6263 return N1; 6264 break; 6265 case ISD::TokenFactor: 6266 case ISD::MERGE_VALUES: 6267 case ISD::CONCAT_VECTORS: 6268 return N1; // Factor, merge or concat of one node? No need. 6269 case ISD::BUILD_VECTOR: { 6270 // Attempt to simplify BUILD_VECTOR. 6271 SDValue Ops[] = {N1}; 6272 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 6273 return V; 6274 break; 6275 } 6276 case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); 6277 case ISD::FP_EXTEND: 6278 assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && 6279 "Invalid FP cast!"); 6280 if (N1.getValueType() == VT) return N1; // noop conversion. 6281 assert((!VT.isVector() || VT.getVectorElementCount() == 6282 N1.getValueType().getVectorElementCount()) && 6283 "Vector element count mismatch!"); 6284 assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); 6285 if (N1.isUndef()) 6286 return getUNDEF(VT); 6287 break; 6288 case ISD::FP_TO_SINT: 6289 case ISD::FP_TO_UINT: 6290 if (N1.isUndef()) 6291 return getUNDEF(VT); 6292 break; 6293 case ISD::SINT_TO_FP: 6294 case ISD::UINT_TO_FP: 6295 // [us]itofp(undef) = 0, because the result value is bounded. 6296 if (N1.isUndef()) 6297 return getConstantFP(0.0, DL, VT); 6298 break; 6299 case ISD::SIGN_EXTEND: 6300 assert(VT.isInteger() && N1.getValueType().isInteger() && 6301 "Invalid SIGN_EXTEND!"); 6302 assert(VT.isVector() == N1.getValueType().isVector() && 6303 "SIGN_EXTEND result type type should be vector iff the operand " 6304 "type is vector!"); 6305 if (N1.getValueType() == VT) return N1; // noop extension 6306 assert((!VT.isVector() || VT.getVectorElementCount() == 6307 N1.getValueType().getVectorElementCount()) && 6308 "Vector element count mismatch!"); 6309 assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); 6310 if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) { 6311 SDNodeFlags Flags; 6312 if (OpOpcode == ISD::ZERO_EXTEND) 6313 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6314 return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); 6315 } 6316 if (OpOpcode == ISD::UNDEF) 6317 // sext(undef) = 0, because the top bits will all be the same. 6318 return getConstant(0, DL, VT); 6319 break; 6320 case ISD::ZERO_EXTEND: 6321 assert(VT.isInteger() && N1.getValueType().isInteger() && 6322 "Invalid ZERO_EXTEND!"); 6323 assert(VT.isVector() == N1.getValueType().isVector() && 6324 "ZERO_EXTEND result type type should be vector iff the operand " 6325 "type is vector!"); 6326 if (N1.getValueType() == VT) return N1; // noop extension 6327 assert((!VT.isVector() || VT.getVectorElementCount() == 6328 N1.getValueType().getVectorElementCount()) && 6329 "Vector element count mismatch!"); 6330 assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); 6331 if (OpOpcode == ISD::ZERO_EXTEND) { // (zext (zext x)) -> (zext x) 6332 SDNodeFlags Flags; 6333 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6334 return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags); 6335 } 6336 if (OpOpcode == ISD::UNDEF) 6337 // zext(undef) = 0, because the top bits will be zero. 6338 return getConstant(0, DL, VT); 6339 6340 // Skip unnecessary zext_inreg pattern: 6341 // (zext (trunc x)) -> x iff the upper bits are known zero. 6342 // TODO: Remove (zext (trunc (and x, c))) exception which some targets 6343 // use to recognise zext_inreg patterns. 6344 if (OpOpcode == ISD::TRUNCATE) { 6345 SDValue OpOp = N1.getOperand(0); 6346 if (OpOp.getValueType() == VT) { 6347 if (OpOp.getOpcode() != ISD::AND) { 6348 APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), 6349 N1.getScalarValueSizeInBits()); 6350 if (MaskedValueIsZero(OpOp, HiBits)) { 6351 transferDbgValues(N1, OpOp); 6352 return OpOp; 6353 } 6354 } 6355 } 6356 } 6357 break; 6358 case ISD::ANY_EXTEND: 6359 assert(VT.isInteger() && N1.getValueType().isInteger() && 6360 "Invalid ANY_EXTEND!"); 6361 assert(VT.isVector() == N1.getValueType().isVector() && 6362 "ANY_EXTEND result type type should be vector iff the operand " 6363 "type is vector!"); 6364 if (N1.getValueType() == VT) return N1; // noop extension 6365 assert((!VT.isVector() || VT.getVectorElementCount() == 6366 N1.getValueType().getVectorElementCount()) && 6367 "Vector element count mismatch!"); 6368 assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); 6369 6370 if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || 6371 OpOpcode == ISD::ANY_EXTEND) { 6372 SDNodeFlags Flags; 6373 if (OpOpcode == ISD::ZERO_EXTEND) 6374 Flags.setNonNeg(N1->getFlags().hasNonNeg()); 6375 // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) 6376 return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags); 6377 } 6378 if (OpOpcode == ISD::UNDEF) 6379 return getUNDEF(VT); 6380 6381 // (ext (trunc x)) -> x 6382 if (OpOpcode == ISD::TRUNCATE) { 6383 SDValue OpOp = N1.getOperand(0); 6384 if (OpOp.getValueType() == VT) { 6385 transferDbgValues(N1, OpOp); 6386 return OpOp; 6387 } 6388 } 6389 break; 6390 case ISD::TRUNCATE: 6391 assert(VT.isInteger() && N1.getValueType().isInteger() && 6392 "Invalid TRUNCATE!"); 6393 assert(VT.isVector() == N1.getValueType().isVector() && 6394 "TRUNCATE result type type should be vector iff the operand " 6395 "type is vector!"); 6396 if (N1.getValueType() == VT) return N1; // noop truncate 6397 assert((!VT.isVector() || VT.getVectorElementCount() == 6398 N1.getValueType().getVectorElementCount()) && 6399 "Vector element count mismatch!"); 6400 assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); 6401 if (OpOpcode == ISD::TRUNCATE) 6402 return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0)); 6403 if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || 6404 OpOpcode == ISD::ANY_EXTEND) { 6405 // If the source is smaller than the dest, we still need an extend. 6406 if (N1.getOperand(0).getValueType().getScalarType().bitsLT( 6407 VT.getScalarType())) 6408 return getNode(OpOpcode, DL, VT, N1.getOperand(0)); 6409 if (N1.getOperand(0).getValueType().bitsGT(VT)) 6410 return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0)); 6411 return N1.getOperand(0); 6412 } 6413 if (OpOpcode == ISD::UNDEF) 6414 return getUNDEF(VT); 6415 if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) 6416 return getVScale(DL, VT, 6417 N1.getConstantOperandAPInt(0).trunc(VT.getSizeInBits())); 6418 break; 6419 case ISD::ANY_EXTEND_VECTOR_INREG: 6420 case ISD::ZERO_EXTEND_VECTOR_INREG: 6421 case ISD::SIGN_EXTEND_VECTOR_INREG: 6422 assert(VT.isVector() && "This DAG node is restricted to vector types."); 6423 assert(N1.getValueType().bitsLE(VT) && 6424 "The input must be the same size or smaller than the result."); 6425 assert(VT.getVectorMinNumElements() < 6426 N1.getValueType().getVectorMinNumElements() && 6427 "The destination vector type must have fewer lanes than the input."); 6428 break; 6429 case ISD::ABS: 6430 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!"); 6431 if (OpOpcode == ISD::UNDEF) 6432 return getConstant(0, DL, VT); 6433 break; 6434 case ISD::BSWAP: 6435 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!"); 6436 assert((VT.getScalarSizeInBits() % 16 == 0) && 6437 "BSWAP types must be a multiple of 16 bits!"); 6438 if (OpOpcode == ISD::UNDEF) 6439 return getUNDEF(VT); 6440 // bswap(bswap(X)) -> X. 6441 if (OpOpcode == ISD::BSWAP) 6442 return N1.getOperand(0); 6443 break; 6444 case ISD::BITREVERSE: 6445 assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!"); 6446 if (OpOpcode == ISD::UNDEF) 6447 return getUNDEF(VT); 6448 break; 6449 case ISD::BITCAST: 6450 assert(VT.getSizeInBits() == N1.getValueSizeInBits() && 6451 "Cannot BITCAST between types of different sizes!"); 6452 if (VT == N1.getValueType()) return N1; // noop conversion. 6453 if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) 6454 return getNode(ISD::BITCAST, DL, VT, N1.getOperand(0)); 6455 if (OpOpcode == ISD::UNDEF) 6456 return getUNDEF(VT); 6457 break; 6458 case ISD::SCALAR_TO_VECTOR: 6459 assert(VT.isVector() && !N1.getValueType().isVector() && 6460 (VT.getVectorElementType() == N1.getValueType() || 6461 (VT.getVectorElementType().isInteger() && 6462 N1.getValueType().isInteger() && 6463 VT.getVectorElementType().bitsLE(N1.getValueType()))) && 6464 "Illegal SCALAR_TO_VECTOR node!"); 6465 if (OpOpcode == ISD::UNDEF) 6466 return getUNDEF(VT); 6467 // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. 6468 if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && 6469 isa<ConstantSDNode>(N1.getOperand(1)) && 6470 N1.getConstantOperandVal(1) == 0 && 6471 N1.getOperand(0).getValueType() == VT) 6472 return N1.getOperand(0); 6473 break; 6474 case ISD::FNEG: 6475 // Negation of an unknown bag of bits is still completely undefined. 6476 if (OpOpcode == ISD::UNDEF) 6477 return getUNDEF(VT); 6478 6479 if (OpOpcode == ISD::FNEG) // --X -> X 6480 return N1.getOperand(0); 6481 break; 6482 case ISD::FABS: 6483 if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) 6484 return getNode(ISD::FABS, DL, VT, N1.getOperand(0)); 6485 break; 6486 case ISD::VSCALE: 6487 assert(VT == N1.getValueType() && "Unexpected VT!"); 6488 break; 6489 case ISD::CTPOP: 6490 if (N1.getValueType().getScalarType() == MVT::i1) 6491 return N1; 6492 break; 6493 case ISD::CTLZ: 6494 case ISD::CTTZ: 6495 if (N1.getValueType().getScalarType() == MVT::i1) 6496 return getNOT(DL, N1, N1.getValueType()); 6497 break; 6498 case ISD::VECREDUCE_ADD: 6499 if (N1.getValueType().getScalarType() == MVT::i1) 6500 return getNode(ISD::VECREDUCE_XOR, DL, VT, N1); 6501 break; 6502 case ISD::VECREDUCE_SMIN: 6503 case ISD::VECREDUCE_UMAX: 6504 if (N1.getValueType().getScalarType() == MVT::i1) 6505 return getNode(ISD::VECREDUCE_OR, DL, VT, N1); 6506 break; 6507 case ISD::VECREDUCE_SMAX: 6508 case ISD::VECREDUCE_UMIN: 6509 if (N1.getValueType().getScalarType() == MVT::i1) 6510 return getNode(ISD::VECREDUCE_AND, DL, VT, N1); 6511 break; 6512 case ISD::SPLAT_VECTOR: 6513 assert(VT.isVector() && "Wrong return type!"); 6514 // FIXME: Hexagon uses i32 scalar for a floating point zero vector so allow 6515 // that for now. 6516 assert((VT.getVectorElementType() == N1.getValueType() || 6517 (VT.isFloatingPoint() && N1.getValueType() == MVT::i32) || 6518 (VT.getVectorElementType().isInteger() && 6519 N1.getValueType().isInteger() && 6520 VT.getVectorElementType().bitsLE(N1.getValueType()))) && 6521 "Wrong operand type!"); 6522 break; 6523 } 6524 6525 SDNode *N; 6526 SDVTList VTs = getVTList(VT); 6527 SDValue Ops[] = {N1}; 6528 if (VT != MVT::Glue) { // Don't CSE glue producing nodes 6529 FoldingSetNodeID ID; 6530 AddNodeIDNode(ID, Opcode, VTs, Ops); 6531 void *IP = nullptr; 6532 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 6533 E->intersectFlagsWith(Flags); 6534 return SDValue(E, 0); 6535 } 6536 6537 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6538 N->setFlags(Flags); 6539 createOperands(N, Ops); 6540 CSEMap.InsertNode(N, IP); 6541 } else { 6542 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 6543 createOperands(N, Ops); 6544 } 6545 6546 InsertNode(N); 6547 SDValue V = SDValue(N, 0); 6548 NewSDValueDbgMsg(V, "Creating new node: ", this); 6549 return V; 6550 } 6551 6552 static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, 6553 const APInt &C2) { 6554 switch (Opcode) { 6555 case ISD::ADD: return C1 + C2; 6556 case ISD::SUB: return C1 - C2; 6557 case ISD::MUL: return C1 * C2; 6558 case ISD::AND: return C1 & C2; 6559 case ISD::OR: return C1 | C2; 6560 case ISD::XOR: return C1 ^ C2; 6561 case ISD::SHL: return C1 << C2; 6562 case ISD::SRL: return C1.lshr(C2); 6563 case ISD::SRA: return C1.ashr(C2); 6564 case ISD::ROTL: return C1.rotl(C2); 6565 case ISD::ROTR: return C1.rotr(C2); 6566 case ISD::SMIN: return C1.sle(C2) ? C1 : C2; 6567 case ISD::SMAX: return C1.sge(C2) ? C1 : C2; 6568 case ISD::UMIN: return C1.ule(C2) ? C1 : C2; 6569 case ISD::UMAX: return C1.uge(C2) ? C1 : C2; 6570 case ISD::SADDSAT: return C1.sadd_sat(C2); 6571 case ISD::UADDSAT: return C1.uadd_sat(C2); 6572 case ISD::SSUBSAT: return C1.ssub_sat(C2); 6573 case ISD::USUBSAT: return C1.usub_sat(C2); 6574 case ISD::SSHLSAT: return C1.sshl_sat(C2); 6575 case ISD::USHLSAT: return C1.ushl_sat(C2); 6576 case ISD::UDIV: 6577 if (!C2.getBoolValue()) 6578 break; 6579 return C1.udiv(C2); 6580 case ISD::UREM: 6581 if (!C2.getBoolValue()) 6582 break; 6583 return C1.urem(C2); 6584 case ISD::SDIV: 6585 if (!C2.getBoolValue()) 6586 break; 6587 return C1.sdiv(C2); 6588 case ISD::SREM: 6589 if (!C2.getBoolValue()) 6590 break; 6591 return C1.srem(C2); 6592 case ISD::AVGFLOORS: 6593 return APIntOps::avgFloorS(C1, C2); 6594 case ISD::AVGFLOORU: 6595 return APIntOps::avgFloorU(C1, C2); 6596 case ISD::AVGCEILS: 6597 return APIntOps::avgCeilS(C1, C2); 6598 case ISD::AVGCEILU: 6599 return APIntOps::avgCeilU(C1, C2); 6600 case ISD::ABDS: 6601 return APIntOps::abds(C1, C2); 6602 case ISD::ABDU: 6603 return APIntOps::abdu(C1, C2); 6604 case ISD::MULHS: 6605 return APIntOps::mulhs(C1, C2); 6606 case ISD::MULHU: 6607 return APIntOps::mulhu(C1, C2); 6608 } 6609 return std::nullopt; 6610 } 6611 // Handle constant folding with UNDEF. 6612 // TODO: Handle more cases. 6613 static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, 6614 bool IsUndef1, const APInt &C2, 6615 bool IsUndef2) { 6616 if (!(IsUndef1 || IsUndef2)) 6617 return FoldValue(Opcode, C1, C2); 6618 6619 // Fold and(x, undef) -> 0 6620 // Fold mul(x, undef) -> 0 6621 if (Opcode == ISD::AND || Opcode == ISD::MUL) 6622 return APInt::getZero(C1.getBitWidth()); 6623 6624 return std::nullopt; 6625 } 6626 6627 SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, 6628 const GlobalAddressSDNode *GA, 6629 const SDNode *N2) { 6630 if (GA->getOpcode() != ISD::GlobalAddress) 6631 return SDValue(); 6632 if (!TLI->isOffsetFoldingLegal(GA)) 6633 return SDValue(); 6634 auto *C2 = dyn_cast<ConstantSDNode>(N2); 6635 if (!C2) 6636 return SDValue(); 6637 int64_t Offset = C2->getSExtValue(); 6638 switch (Opcode) { 6639 case ISD::ADD: break; 6640 case ISD::SUB: Offset = -uint64_t(Offset); break; 6641 default: return SDValue(); 6642 } 6643 return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, 6644 GA->getOffset() + uint64_t(Offset)); 6645 } 6646 6647 bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { 6648 switch (Opcode) { 6649 case ISD::SDIV: 6650 case ISD::UDIV: 6651 case ISD::SREM: 6652 case ISD::UREM: { 6653 // If a divisor is zero/undef or any element of a divisor vector is 6654 // zero/undef, the whole op is undef. 6655 assert(Ops.size() == 2 && "Div/rem should have 2 operands"); 6656 SDValue Divisor = Ops[1]; 6657 if (Divisor.isUndef() || isNullConstant(Divisor)) 6658 return true; 6659 6660 return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && 6661 llvm::any_of(Divisor->op_values(), 6662 [](SDValue V) { return V.isUndef() || 6663 isNullConstant(V); }); 6664 // TODO: Handle signed overflow. 6665 } 6666 // TODO: Handle oversized shifts. 6667 default: 6668 return false; 6669 } 6670 } 6671 6672 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, 6673 EVT VT, ArrayRef<SDValue> Ops, 6674 SDNodeFlags Flags) { 6675 // If the opcode is a target-specific ISD node, there's nothing we can 6676 // do here and the operand rules may not line up with the below, so 6677 // bail early. 6678 // We can't create a scalar CONCAT_VECTORS so skip it. It will break 6679 // for concats involving SPLAT_VECTOR. Concats of BUILD_VECTORS are handled by 6680 // foldCONCAT_VECTORS in getNode before this is called. 6681 if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) 6682 return SDValue(); 6683 6684 unsigned NumOps = Ops.size(); 6685 if (NumOps == 0) 6686 return SDValue(); 6687 6688 if (isUndef(Opcode, Ops)) 6689 return getUNDEF(VT); 6690 6691 // Handle unary special cases. 6692 if (NumOps == 1) { 6693 SDValue N1 = Ops[0]; 6694 6695 // Constant fold unary operations with an integer constant operand. Even 6696 // opaque constant will be folded, because the folding of unary operations 6697 // doesn't create new constants with different values. Nevertheless, the 6698 // opaque flag is preserved during folding to prevent future folding with 6699 // other constants. 6700 if (auto *C = dyn_cast<ConstantSDNode>(N1)) { 6701 const APInt &Val = C->getAPIntValue(); 6702 switch (Opcode) { 6703 case ISD::SIGN_EXTEND: 6704 return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, 6705 C->isTargetOpcode(), C->isOpaque()); 6706 case ISD::TRUNCATE: 6707 if (C->isOpaque()) 6708 break; 6709 [[fallthrough]]; 6710 case ISD::ZERO_EXTEND: 6711 return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, 6712 C->isTargetOpcode(), C->isOpaque()); 6713 case ISD::ANY_EXTEND: 6714 // Some targets like RISCV prefer to sign extend some types. 6715 if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) 6716 return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, 6717 C->isTargetOpcode(), C->isOpaque()); 6718 return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, 6719 C->isTargetOpcode(), C->isOpaque()); 6720 case ISD::ABS: 6721 return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), 6722 C->isOpaque()); 6723 case ISD::BITREVERSE: 6724 return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), 6725 C->isOpaque()); 6726 case ISD::BSWAP: 6727 return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), 6728 C->isOpaque()); 6729 case ISD::CTPOP: 6730 return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), 6731 C->isOpaque()); 6732 case ISD::CTLZ: 6733 case ISD::CTLZ_ZERO_UNDEF: 6734 return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), 6735 C->isOpaque()); 6736 case ISD::CTTZ: 6737 case ISD::CTTZ_ZERO_UNDEF: 6738 return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), 6739 C->isOpaque()); 6740 case ISD::UINT_TO_FP: 6741 case ISD::SINT_TO_FP: { 6742 APFloat FPV(VT.getFltSemantics(), APInt::getZero(VT.getSizeInBits())); 6743 (void)FPV.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP, 6744 APFloat::rmNearestTiesToEven); 6745 return getConstantFP(FPV, DL, VT); 6746 } 6747 case ISD::FP16_TO_FP: 6748 case ISD::BF16_TO_FP: { 6749 bool Ignored; 6750 APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() 6751 : APFloat::BFloat(), 6752 (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); 6753 6754 // This can return overflow, underflow, or inexact; we don't care. 6755 // FIXME need to be more flexible about rounding mode. 6756 (void)FPV.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 6757 &Ignored); 6758 return getConstantFP(FPV, DL, VT); 6759 } 6760 case ISD::STEP_VECTOR: 6761 if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) 6762 return V; 6763 break; 6764 case ISD::BITCAST: 6765 if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) 6766 return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); 6767 if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) 6768 return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); 6769 if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) 6770 return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); 6771 if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) 6772 return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); 6773 break; 6774 } 6775 } 6776 6777 // Constant fold unary operations with a floating point constant operand. 6778 if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) { 6779 APFloat V = C->getValueAPF(); // make copy 6780 switch (Opcode) { 6781 case ISD::FNEG: 6782 V.changeSign(); 6783 return getConstantFP(V, DL, VT); 6784 case ISD::FABS: 6785 V.clearSign(); 6786 return getConstantFP(V, DL, VT); 6787 case ISD::FCEIL: { 6788 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); 6789 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6790 return getConstantFP(V, DL, VT); 6791 return SDValue(); 6792 } 6793 case ISD::FTRUNC: { 6794 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); 6795 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6796 return getConstantFP(V, DL, VT); 6797 return SDValue(); 6798 } 6799 case ISD::FFLOOR: { 6800 APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); 6801 if (fs == APFloat::opOK || fs == APFloat::opInexact) 6802 return getConstantFP(V, DL, VT); 6803 return SDValue(); 6804 } 6805 case ISD::FP_EXTEND: { 6806 bool ignored; 6807 // This can return overflow, underflow, or inexact; we don't care. 6808 // FIXME need to be more flexible about rounding mode. 6809 (void)V.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 6810 &ignored); 6811 return getConstantFP(V, DL, VT); 6812 } 6813 case ISD::FP_TO_SINT: 6814 case ISD::FP_TO_UINT: { 6815 bool ignored; 6816 APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); 6817 // FIXME need to be more flexible about rounding mode. 6818 APFloat::opStatus s = 6819 V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); 6820 if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual 6821 break; 6822 return getConstant(IntVal, DL, VT); 6823 } 6824 case ISD::FP_TO_FP16: 6825 case ISD::FP_TO_BF16: { 6826 bool Ignored; 6827 // This can return overflow, underflow, or inexact; we don't care. 6828 // FIXME need to be more flexible about rounding mode. 6829 (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() 6830 : APFloat::BFloat(), 6831 APFloat::rmNearestTiesToEven, &Ignored); 6832 return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); 6833 } 6834 case ISD::BITCAST: 6835 if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) 6836 return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, 6837 VT); 6838 if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) 6839 return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, 6840 VT); 6841 if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) 6842 return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, 6843 VT); 6844 if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) 6845 return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); 6846 break; 6847 } 6848 } 6849 6850 // Early-out if we failed to constant fold a bitcast. 6851 if (Opcode == ISD::BITCAST) 6852 return SDValue(); 6853 } 6854 6855 // Handle binops special cases. 6856 if (NumOps == 2) { 6857 if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) 6858 return CFP; 6859 6860 if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { 6861 if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { 6862 if (C1->isOpaque() || C2->isOpaque()) 6863 return SDValue(); 6864 6865 std::optional<APInt> FoldAttempt = 6866 FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); 6867 if (!FoldAttempt) 6868 return SDValue(); 6869 6870 SDValue Folded = getConstant(*FoldAttempt, DL, VT); 6871 assert((!Folded || !VT.isVector()) && 6872 "Can't fold vectors ops with scalar operands"); 6873 return Folded; 6874 } 6875 } 6876 6877 // fold (add Sym, c) -> Sym+c 6878 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0])) 6879 return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode()); 6880 if (TLI->isCommutativeBinOp(Opcode)) 6881 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1])) 6882 return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode()); 6883 6884 // fold (sext_in_reg c1) -> c2 6885 if (Opcode == ISD::SIGN_EXTEND_INREG) { 6886 EVT EVT = cast<VTSDNode>(Ops[1])->getVT(); 6887 6888 auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { 6889 unsigned FromBits = EVT.getScalarSizeInBits(); 6890 Val <<= Val.getBitWidth() - FromBits; 6891 Val.ashrInPlace(Val.getBitWidth() - FromBits); 6892 return getConstant(Val, DL, ConstantVT); 6893 }; 6894 6895 if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { 6896 const APInt &Val = C1->getAPIntValue(); 6897 return SignExtendInReg(Val, VT); 6898 } 6899 6900 if (ISD::isBuildVectorOfConstantSDNodes(Ops[0].getNode())) { 6901 SmallVector<SDValue, 8> ScalarOps; 6902 llvm::EVT OpVT = Ops[0].getOperand(0).getValueType(); 6903 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) { 6904 SDValue Op = Ops[0].getOperand(I); 6905 if (Op.isUndef()) { 6906 ScalarOps.push_back(getUNDEF(OpVT)); 6907 continue; 6908 } 6909 const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); 6910 ScalarOps.push_back(SignExtendInReg(Val, OpVT)); 6911 } 6912 return getBuildVector(VT, DL, ScalarOps); 6913 } 6914 6915 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR && 6916 isa<ConstantSDNode>(Ops[0].getOperand(0))) 6917 return getNode(ISD::SPLAT_VECTOR, DL, VT, 6918 SignExtendInReg(Ops[0].getConstantOperandAPInt(0), 6919 Ops[0].getOperand(0).getValueType())); 6920 } 6921 } 6922 6923 // This is for vector folding only from here on. 6924 if (!VT.isVector()) 6925 return SDValue(); 6926 6927 ElementCount NumElts = VT.getVectorElementCount(); 6928 6929 // See if we can fold through any bitcasted integer ops. 6930 if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && 6931 Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && 6932 (Ops[0].getOpcode() == ISD::BITCAST || 6933 Ops[1].getOpcode() == ISD::BITCAST)) { 6934 SDValue N1 = peekThroughBitcasts(Ops[0]); 6935 SDValue N2 = peekThroughBitcasts(Ops[1]); 6936 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 6937 auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); 6938 if (BV1 && BV2 && N1.getValueType().isInteger() && 6939 N2.getValueType().isInteger()) { 6940 bool IsLE = getDataLayout().isLittleEndian(); 6941 unsigned EltBits = VT.getScalarSizeInBits(); 6942 SmallVector<APInt> RawBits1, RawBits2; 6943 BitVector UndefElts1, UndefElts2; 6944 if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && 6945 BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) { 6946 SmallVector<APInt> RawBits; 6947 for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { 6948 std::optional<APInt> Fold = FoldValueWithUndef( 6949 Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]); 6950 if (!Fold) 6951 break; 6952 RawBits.push_back(*Fold); 6953 } 6954 if (RawBits.size() == NumElts.getFixedValue()) { 6955 // We have constant folded, but we might need to cast this again back 6956 // to the original (possibly legalized) type. 6957 EVT BVVT, BVEltVT; 6958 if (N1.getValueType() == VT) { 6959 BVVT = N1.getValueType(); 6960 BVEltVT = BV1->getOperand(0).getValueType(); 6961 } else { 6962 BVVT = N2.getValueType(); 6963 BVEltVT = BV2->getOperand(0).getValueType(); 6964 } 6965 unsigned BVEltBits = BVEltVT.getSizeInBits(); 6966 SmallVector<APInt> DstBits; 6967 BitVector DstUndefs; 6968 BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), 6969 DstBits, RawBits, DstUndefs, 6970 BitVector(RawBits.size(), false)); 6971 SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); 6972 for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { 6973 if (DstUndefs[I]) 6974 continue; 6975 Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT); 6976 } 6977 return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); 6978 } 6979 } 6980 } 6981 } 6982 6983 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). 6984 // (shl step_vector(C0), C1) -> (step_vector(C0 << C1)) 6985 if ((Opcode == ISD::MUL || Opcode == ISD::SHL) && 6986 Ops[0].getOpcode() == ISD::STEP_VECTOR) { 6987 APInt RHSVal; 6988 if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) { 6989 APInt NewStep = Opcode == ISD::MUL 6990 ? Ops[0].getConstantOperandAPInt(0) * RHSVal 6991 : Ops[0].getConstantOperandAPInt(0) << RHSVal; 6992 return getStepVector(DL, VT, NewStep); 6993 } 6994 } 6995 6996 auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { 6997 return !Op.getValueType().isVector() || 6998 Op.getValueType().getVectorElementCount() == NumElts; 6999 }; 7000 7001 auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { 7002 return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || 7003 Op.getOpcode() == ISD::BUILD_VECTOR || 7004 Op.getOpcode() == ISD::SPLAT_VECTOR; 7005 }; 7006 7007 // All operands must be vector types with the same number of elements as 7008 // the result type and must be either UNDEF or a build/splat vector 7009 // or UNDEF scalars. 7010 if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) || 7011 !llvm::all_of(Ops, IsScalarOrSameVectorSize)) 7012 return SDValue(); 7013 7014 // If we are comparing vectors, then the result needs to be a i1 boolean that 7015 // is then extended back to the legal result type depending on how booleans 7016 // are represented. 7017 EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); 7018 ISD::NodeType ExtendCode = 7019 (Opcode == ISD::SETCC && SVT != VT.getScalarType()) 7020 ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT)) 7021 : ISD::SIGN_EXTEND; 7022 7023 // Find legal integer scalar type for constant promotion and 7024 // ensure that its scalar size is at least as large as source. 7025 EVT LegalSVT = VT.getScalarType(); 7026 if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { 7027 LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); 7028 if (LegalSVT.bitsLT(VT.getScalarType())) 7029 return SDValue(); 7030 } 7031 7032 // For scalable vector types we know we're dealing with SPLAT_VECTORs. We 7033 // only have one operand to check. For fixed-length vector types we may have 7034 // a combination of BUILD_VECTOR and SPLAT_VECTOR. 7035 unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); 7036 7037 // Constant fold each scalar lane separately. 7038 SmallVector<SDValue, 4> ScalarResults; 7039 for (unsigned I = 0; I != NumVectorElts; I++) { 7040 SmallVector<SDValue, 4> ScalarOps; 7041 for (SDValue Op : Ops) { 7042 EVT InSVT = Op.getValueType().getScalarType(); 7043 if (Op.getOpcode() != ISD::BUILD_VECTOR && 7044 Op.getOpcode() != ISD::SPLAT_VECTOR) { 7045 if (Op.isUndef()) 7046 ScalarOps.push_back(getUNDEF(InSVT)); 7047 else 7048 ScalarOps.push_back(Op); 7049 continue; 7050 } 7051 7052 SDValue ScalarOp = 7053 Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I); 7054 EVT ScalarVT = ScalarOp.getValueType(); 7055 7056 // Build vector (integer) scalar operands may need implicit 7057 // truncation - do this before constant folding. 7058 if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) { 7059 // Don't create illegally-typed nodes unless they're constants or undef 7060 // - if we fail to constant fold we can't guarantee the (dead) nodes 7061 // we're creating will be cleaned up before being visited for 7062 // legalization. 7063 if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() && 7064 !isa<ConstantSDNode>(ScalarOp) && 7065 TLI->getTypeAction(*getContext(), InSVT) != 7066 TargetLowering::TypeLegal) 7067 return SDValue(); 7068 ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); 7069 } 7070 7071 ScalarOps.push_back(ScalarOp); 7072 } 7073 7074 // Constant fold the scalar operands. 7075 SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); 7076 7077 // Scalar folding only succeeded if the result is a constant or UNDEF. 7078 if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && 7079 ScalarResult.getOpcode() != ISD::ConstantFP) 7080 return SDValue(); 7081 7082 // Legalize the (integer) scalar constant if necessary. We only do 7083 // this once we know the folding succeeded, since otherwise we would 7084 // get a node with illegal type which has a user. 7085 if (LegalSVT != SVT) 7086 ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult); 7087 7088 ScalarResults.push_back(ScalarResult); 7089 } 7090 7091 SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0]) 7092 : getBuildVector(VT, DL, ScalarResults); 7093 NewSDValueDbgMsg(V, "New node fold constant vector: ", this); 7094 return V; 7095 } 7096 7097 SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, 7098 EVT VT, ArrayRef<SDValue> Ops) { 7099 // TODO: Add support for unary/ternary fp opcodes. 7100 if (Ops.size() != 2) 7101 return SDValue(); 7102 7103 // TODO: We don't do any constant folding for strict FP opcodes here, but we 7104 // should. That will require dealing with a potentially non-default 7105 // rounding mode, checking the "opStatus" return value from the APFloat 7106 // math calculations, and possibly other variations. 7107 SDValue N1 = Ops[0]; 7108 SDValue N2 = Ops[1]; 7109 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); 7110 ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); 7111 if (N1CFP && N2CFP) { 7112 APFloat C1 = N1CFP->getValueAPF(); // make copy 7113 const APFloat &C2 = N2CFP->getValueAPF(); 7114 switch (Opcode) { 7115 case ISD::FADD: 7116 C1.add(C2, APFloat::rmNearestTiesToEven); 7117 return getConstantFP(C1, DL, VT); 7118 case ISD::FSUB: 7119 C1.subtract(C2, APFloat::rmNearestTiesToEven); 7120 return getConstantFP(C1, DL, VT); 7121 case ISD::FMUL: 7122 C1.multiply(C2, APFloat::rmNearestTiesToEven); 7123 return getConstantFP(C1, DL, VT); 7124 case ISD::FDIV: 7125 C1.divide(C2, APFloat::rmNearestTiesToEven); 7126 return getConstantFP(C1, DL, VT); 7127 case ISD::FREM: 7128 C1.mod(C2); 7129 return getConstantFP(C1, DL, VT); 7130 case ISD::FCOPYSIGN: 7131 C1.copySign(C2); 7132 return getConstantFP(C1, DL, VT); 7133 case ISD::FMINNUM: 7134 return getConstantFP(minnum(C1, C2), DL, VT); 7135 case ISD::FMAXNUM: 7136 return getConstantFP(maxnum(C1, C2), DL, VT); 7137 case ISD::FMINIMUM: 7138 return getConstantFP(minimum(C1, C2), DL, VT); 7139 case ISD::FMAXIMUM: 7140 return getConstantFP(maximum(C1, C2), DL, VT); 7141 case ISD::FMINIMUMNUM: 7142 return getConstantFP(minimumnum(C1, C2), DL, VT); 7143 case ISD::FMAXIMUMNUM: 7144 return getConstantFP(maximumnum(C1, C2), DL, VT); 7145 default: break; 7146 } 7147 } 7148 if (N1CFP && Opcode == ISD::FP_ROUND) { 7149 APFloat C1 = N1CFP->getValueAPF(); // make copy 7150 bool Unused; 7151 // This can return overflow, underflow, or inexact; we don't care. 7152 // FIXME need to be more flexible about rounding mode. 7153 (void)C1.convert(VT.getFltSemantics(), APFloat::rmNearestTiesToEven, 7154 &Unused); 7155 return getConstantFP(C1, DL, VT); 7156 } 7157 7158 switch (Opcode) { 7159 case ISD::FSUB: 7160 // -0.0 - undef --> undef (consistent with "fneg undef") 7161 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true)) 7162 if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) 7163 return getUNDEF(VT); 7164 [[fallthrough]]; 7165 7166 case ISD::FADD: 7167 case ISD::FMUL: 7168 case ISD::FDIV: 7169 case ISD::FREM: 7170 // If both operands are undef, the result is undef. If 1 operand is undef, 7171 // the result is NaN. This should match the behavior of the IR optimizer. 7172 if (N1.isUndef() && N2.isUndef()) 7173 return getUNDEF(VT); 7174 if (N1.isUndef() || N2.isUndef()) 7175 return getConstantFP(APFloat::getNaN(VT.getFltSemantics()), DL, VT); 7176 } 7177 return SDValue(); 7178 } 7179 7180 SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { 7181 assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); 7182 7183 // There's no need to assert on a byte-aligned pointer. All pointers are at 7184 // least byte aligned. 7185 if (A == Align(1)) 7186 return Val; 7187 7188 SDVTList VTs = getVTList(Val.getValueType()); 7189 FoldingSetNodeID ID; 7190 AddNodeIDNode(ID, ISD::AssertAlign, VTs, {Val}); 7191 ID.AddInteger(A.value()); 7192 7193 void *IP = nullptr; 7194 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) 7195 return SDValue(E, 0); 7196 7197 auto *N = 7198 newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, A); 7199 createOperands(N, {Val}); 7200 7201 CSEMap.InsertNode(N, IP); 7202 InsertNode(N); 7203 7204 SDValue V(N, 0); 7205 NewSDValueDbgMsg(V, "Creating new node: ", this); 7206 return V; 7207 } 7208 7209 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7210 SDValue N1, SDValue N2) { 7211 SDNodeFlags Flags; 7212 if (Inserter) 7213 Flags = Inserter->getFlags(); 7214 return getNode(Opcode, DL, VT, N1, N2, Flags); 7215 } 7216 7217 void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, 7218 SDValue &N2) const { 7219 if (!TLI->isCommutativeBinOp(Opcode)) 7220 return; 7221 7222 // Canonicalize: 7223 // binop(const, nonconst) -> binop(nonconst, const) 7224 bool N1C = isConstantIntBuildVectorOrConstantInt(N1); 7225 bool N2C = isConstantIntBuildVectorOrConstantInt(N2); 7226 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 7227 bool N2CFP = isConstantFPBuildVectorOrConstantFP(N2); 7228 if ((N1C && !N2C) || (N1CFP && !N2CFP)) 7229 std::swap(N1, N2); 7230 7231 // Canonicalize: 7232 // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) 7233 else if (N1.getOpcode() == ISD::SPLAT_VECTOR && 7234 N2.getOpcode() == ISD::STEP_VECTOR) 7235 std::swap(N1, N2); 7236 } 7237 7238 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7239 SDValue N1, SDValue N2, const SDNodeFlags Flags) { 7240 assert(N1.getOpcode() != ISD::DELETED_NODE && 7241 N2.getOpcode() != ISD::DELETED_NODE && 7242 "Operand is DELETED_NODE!"); 7243 7244 canonicalizeCommutativeBinop(Opcode, N1, N2); 7245 7246 auto *N1C = dyn_cast<ConstantSDNode>(N1); 7247 auto *N2C = dyn_cast<ConstantSDNode>(N2); 7248 7249 // Don't allow undefs in vector splats - we might be returning N2 when folding 7250 // to zero etc. 7251 ConstantSDNode *N2CV = 7252 isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true); 7253 7254 switch (Opcode) { 7255 default: break; 7256 case ISD::TokenFactor: 7257 assert(VT == MVT::Other && N1.getValueType() == MVT::Other && 7258 N2.getValueType() == MVT::Other && "Invalid token factor!"); 7259 // Fold trivial token factors. 7260 if (N1.getOpcode() == ISD::EntryToken) return N2; 7261 if (N2.getOpcode() == ISD::EntryToken) return N1; 7262 if (N1 == N2) return N1; 7263 break; 7264 case ISD::BUILD_VECTOR: { 7265 // Attempt to simplify BUILD_VECTOR. 7266 SDValue Ops[] = {N1, N2}; 7267 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 7268 return V; 7269 break; 7270 } 7271 case ISD::CONCAT_VECTORS: { 7272 SDValue Ops[] = {N1, N2}; 7273 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 7274 return V; 7275 break; 7276 } 7277 case ISD::AND: 7278 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7279 assert(N1.getValueType() == N2.getValueType() && 7280 N1.getValueType() == VT && "Binary operator types must match!"); 7281 // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's 7282 // worth handling here. 7283 if (N2CV && N2CV->isZero()) 7284 return N2; 7285 if (N2CV && N2CV->isAllOnes()) // X & -1 -> X 7286 return N1; 7287 break; 7288 case ISD::OR: 7289 case ISD::XOR: 7290 case ISD::ADD: 7291 case ISD::SUB: 7292 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7293 assert(N1.getValueType() == N2.getValueType() && 7294 N1.getValueType() == VT && "Binary operator types must match!"); 7295 // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so 7296 // it's worth handling here. 7297 if (N2CV && N2CV->isZero()) 7298 return N1; 7299 if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && 7300 VT.getVectorElementType() == MVT::i1) 7301 return getNode(ISD::XOR, DL, VT, N1, N2); 7302 break; 7303 case ISD::MUL: 7304 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7305 assert(N1.getValueType() == N2.getValueType() && 7306 N1.getValueType() == VT && "Binary operator types must match!"); 7307 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 7308 return getNode(ISD::AND, DL, VT, N1, N2); 7309 if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { 7310 const APInt &MulImm = N1->getConstantOperandAPInt(0); 7311 const APInt &N2CImm = N2C->getAPIntValue(); 7312 return getVScale(DL, VT, MulImm * N2CImm); 7313 } 7314 break; 7315 case ISD::UDIV: 7316 case ISD::UREM: 7317 case ISD::MULHU: 7318 case ISD::MULHS: 7319 case ISD::SDIV: 7320 case ISD::SREM: 7321 case ISD::SADDSAT: 7322 case ISD::SSUBSAT: 7323 case ISD::UADDSAT: 7324 case ISD::USUBSAT: 7325 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7326 assert(N1.getValueType() == N2.getValueType() && 7327 N1.getValueType() == VT && "Binary operator types must match!"); 7328 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { 7329 // fold (add_sat x, y) -> (or x, y) for bool types. 7330 if (Opcode == ISD::SADDSAT || Opcode == ISD::UADDSAT) 7331 return getNode(ISD::OR, DL, VT, N1, N2); 7332 // fold (sub_sat x, y) -> (and x, ~y) for bool types. 7333 if (Opcode == ISD::SSUBSAT || Opcode == ISD::USUBSAT) 7334 return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); 7335 } 7336 break; 7337 case ISD::SCMP: 7338 case ISD::UCMP: 7339 assert(N1.getValueType() == N2.getValueType() && 7340 "Types of operands of UCMP/SCMP must match"); 7341 assert(N1.getValueType().isVector() == VT.isVector() && 7342 "Operands and return type of must both be scalars or vectors"); 7343 if (VT.isVector()) 7344 assert(VT.getVectorElementCount() == 7345 N1.getValueType().getVectorElementCount() && 7346 "Result and operands must have the same number of elements"); 7347 break; 7348 case ISD::AVGFLOORS: 7349 case ISD::AVGFLOORU: 7350 case ISD::AVGCEILS: 7351 case ISD::AVGCEILU: 7352 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7353 assert(N1.getValueType() == N2.getValueType() && 7354 N1.getValueType() == VT && "Binary operator types must match!"); 7355 break; 7356 case ISD::ABDS: 7357 case ISD::ABDU: 7358 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7359 assert(N1.getValueType() == N2.getValueType() && 7360 N1.getValueType() == VT && "Binary operator types must match!"); 7361 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 7362 return getNode(ISD::XOR, DL, VT, N1, N2); 7363 break; 7364 case ISD::SMIN: 7365 case ISD::UMAX: 7366 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7367 assert(N1.getValueType() == N2.getValueType() && 7368 N1.getValueType() == VT && "Binary operator types must match!"); 7369 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 7370 return getNode(ISD::OR, DL, VT, N1, N2); 7371 break; 7372 case ISD::SMAX: 7373 case ISD::UMIN: 7374 assert(VT.isInteger() && "This operator does not apply to FP types!"); 7375 assert(N1.getValueType() == N2.getValueType() && 7376 N1.getValueType() == VT && "Binary operator types must match!"); 7377 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 7378 return getNode(ISD::AND, DL, VT, N1, N2); 7379 break; 7380 case ISD::FADD: 7381 case ISD::FSUB: 7382 case ISD::FMUL: 7383 case ISD::FDIV: 7384 case ISD::FREM: 7385 assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); 7386 assert(N1.getValueType() == N2.getValueType() && 7387 N1.getValueType() == VT && "Binary operator types must match!"); 7388 if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags)) 7389 return V; 7390 break; 7391 case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. 7392 assert(N1.getValueType() == VT && 7393 N1.getValueType().isFloatingPoint() && 7394 N2.getValueType().isFloatingPoint() && 7395 "Invalid FCOPYSIGN!"); 7396 break; 7397 case ISD::SHL: 7398 if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { 7399 const APInt &MulImm = N1->getConstantOperandAPInt(0); 7400 const APInt &ShiftImm = N2C->getAPIntValue(); 7401 return getVScale(DL, VT, MulImm << ShiftImm); 7402 } 7403 [[fallthrough]]; 7404 case ISD::SRA: 7405 case ISD::SRL: 7406 if (SDValue V = simplifyShift(N1, N2)) 7407 return V; 7408 [[fallthrough]]; 7409 case ISD::ROTL: 7410 case ISD::ROTR: 7411 assert(VT == N1.getValueType() && 7412 "Shift operators return type must be the same as their first arg"); 7413 assert(VT.isInteger() && N2.getValueType().isInteger() && 7414 "Shifts only work on integers"); 7415 assert((!VT.isVector() || VT == N2.getValueType()) && 7416 "Vector shift amounts must be in the same as their first arg"); 7417 // Verify that the shift amount VT is big enough to hold valid shift 7418 // amounts. This catches things like trying to shift an i1024 value by an 7419 // i8, which is easy to fall into in generic code that uses 7420 // TLI.getShiftAmount(). 7421 assert(N2.getValueType().getScalarSizeInBits() >= 7422 Log2_32_Ceil(VT.getScalarSizeInBits()) && 7423 "Invalid use of small shift amount with oversized value!"); 7424 7425 // Always fold shifts of i1 values so the code generator doesn't need to 7426 // handle them. Since we know the size of the shift has to be less than the 7427 // size of the value, the shift/rotate count is guaranteed to be zero. 7428 if (VT == MVT::i1) 7429 return N1; 7430 if (N2CV && N2CV->isZero()) 7431 return N1; 7432 break; 7433 case ISD::FP_ROUND: 7434 assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && 7435 VT.bitsLE(N1.getValueType()) && N2C && 7436 (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && 7437 N2.getOpcode() == ISD::TargetConstant && "Invalid FP_ROUND!"); 7438 if (N1.getValueType() == VT) return N1; // noop conversion. 7439 break; 7440 case ISD::AssertSext: 7441 case ISD::AssertZext: { 7442 EVT EVT = cast<VTSDNode>(N2)->getVT(); 7443 assert(VT == N1.getValueType() && "Not an inreg extend!"); 7444 assert(VT.isInteger() && EVT.isInteger() && 7445 "Cannot *_EXTEND_INREG FP types"); 7446 assert(!EVT.isVector() && 7447 "AssertSExt/AssertZExt type should be the vector element type " 7448 "rather than the vector type!"); 7449 assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); 7450 if (VT.getScalarType() == EVT) return N1; // noop assertion. 7451 break; 7452 } 7453 case ISD::SIGN_EXTEND_INREG: { 7454 EVT EVT = cast<VTSDNode>(N2)->getVT(); 7455 assert(VT == N1.getValueType() && "Not an inreg extend!"); 7456 assert(VT.isInteger() && EVT.isInteger() && 7457 "Cannot *_EXTEND_INREG FP types"); 7458 assert(EVT.isVector() == VT.isVector() && 7459 "SIGN_EXTEND_INREG type should be vector iff the operand " 7460 "type is vector!"); 7461 assert((!EVT.isVector() || 7462 EVT.getVectorElementCount() == VT.getVectorElementCount()) && 7463 "Vector element counts must match in SIGN_EXTEND_INREG"); 7464 assert(EVT.bitsLE(VT) && "Not extending!"); 7465 if (EVT == VT) return N1; // Not actually extending 7466 break; 7467 } 7468 case ISD::FP_TO_SINT_SAT: 7469 case ISD::FP_TO_UINT_SAT: { 7470 assert(VT.isInteger() && cast<VTSDNode>(N2)->getVT().isInteger() && 7471 N1.getValueType().isFloatingPoint() && "Invalid FP_TO_*INT_SAT"); 7472 assert(N1.getValueType().isVector() == VT.isVector() && 7473 "FP_TO_*INT_SAT type should be vector iff the operand type is " 7474 "vector!"); 7475 assert((!VT.isVector() || VT.getVectorElementCount() == 7476 N1.getValueType().getVectorElementCount()) && 7477 "Vector element counts must match in FP_TO_*INT_SAT"); 7478 assert(!cast<VTSDNode>(N2)->getVT().isVector() && 7479 "Type to saturate to must be a scalar."); 7480 assert(cast<VTSDNode>(N2)->getVT().bitsLE(VT.getScalarType()) && 7481 "Not extending!"); 7482 break; 7483 } 7484 case ISD::EXTRACT_VECTOR_ELT: 7485 assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && 7486 "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ 7487 element type of the vector."); 7488 7489 // Extract from an undefined value or using an undefined index is undefined. 7490 if (N1.isUndef() || N2.isUndef()) 7491 return getUNDEF(VT); 7492 7493 // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length 7494 // vectors. For scalable vectors we will provide appropriate support for 7495 // dealing with arbitrary indices. 7496 if (N2C && N1.getValueType().isFixedLengthVector() && 7497 N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) 7498 return getUNDEF(VT); 7499 7500 // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is 7501 // expanding copies of large vectors from registers. This only works for 7502 // fixed length vectors, since we need to know the exact number of 7503 // elements. 7504 if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && 7505 N1.getOperand(0).getValueType().isFixedLengthVector()) { 7506 unsigned Factor = 7507 N1.getOperand(0).getValueType().getVectorNumElements(); 7508 return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, 7509 N1.getOperand(N2C->getZExtValue() / Factor), 7510 getVectorIdxConstant(N2C->getZExtValue() % Factor, DL)); 7511 } 7512 7513 // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while 7514 // lowering is expanding large vector constants. 7515 if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || 7516 N1.getOpcode() == ISD::SPLAT_VECTOR)) { 7517 assert((N1.getOpcode() != ISD::BUILD_VECTOR || 7518 N1.getValueType().isFixedLengthVector()) && 7519 "BUILD_VECTOR used for scalable vectors"); 7520 unsigned Index = 7521 N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; 7522 SDValue Elt = N1.getOperand(Index); 7523 7524 if (VT != Elt.getValueType()) 7525 // If the vector element type is not legal, the BUILD_VECTOR operands 7526 // are promoted and implicitly truncated, and the result implicitly 7527 // extended. Make that explicit here. 7528 Elt = getAnyExtOrTrunc(Elt, DL, VT); 7529 7530 return Elt; 7531 } 7532 7533 // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector 7534 // operations are lowered to scalars. 7535 if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { 7536 // If the indices are the same, return the inserted element else 7537 // if the indices are known different, extract the element from 7538 // the original vector. 7539 SDValue N1Op2 = N1.getOperand(2); 7540 ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); 7541 7542 if (N1Op2C && N2C) { 7543 if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { 7544 if (VT == N1.getOperand(1).getValueType()) 7545 return N1.getOperand(1); 7546 if (VT.isFloatingPoint()) { 7547 assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits()); 7548 return getFPExtendOrRound(N1.getOperand(1), DL, VT); 7549 } 7550 return getSExtOrTrunc(N1.getOperand(1), DL, VT); 7551 } 7552 return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); 7553 } 7554 } 7555 7556 // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed 7557 // when vector types are scalarized and v1iX is legal. 7558 // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). 7559 // Here we are completely ignoring the extract element index (N2), 7560 // which is fine for fixed width vectors, since any index other than 0 7561 // is undefined anyway. However, this cannot be ignored for scalable 7562 // vectors - in theory we could support this, but we don't want to do this 7563 // without a profitability check. 7564 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && 7565 N1.getValueType().isFixedLengthVector() && 7566 N1.getValueType().getVectorNumElements() == 1) { 7567 return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), 7568 N1.getOperand(1)); 7569 } 7570 break; 7571 case ISD::EXTRACT_ELEMENT: 7572 assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); 7573 assert(!N1.getValueType().isVector() && !VT.isVector() && 7574 (N1.getValueType().isInteger() == VT.isInteger()) && 7575 N1.getValueType() != VT && 7576 "Wrong types for EXTRACT_ELEMENT!"); 7577 7578 // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding 7579 // 64-bit integers into 32-bit parts. Instead of building the extract of 7580 // the BUILD_PAIR, only to have legalize rip it apart, just do it now. 7581 if (N1.getOpcode() == ISD::BUILD_PAIR) 7582 return N1.getOperand(N2C->getZExtValue()); 7583 7584 // EXTRACT_ELEMENT of a constant int is also very common. 7585 if (N1C) { 7586 unsigned ElementSize = VT.getSizeInBits(); 7587 unsigned Shift = ElementSize * N2C->getZExtValue(); 7588 const APInt &Val = N1C->getAPIntValue(); 7589 return getConstant(Val.extractBits(ElementSize, Shift), DL, VT); 7590 } 7591 break; 7592 case ISD::EXTRACT_SUBVECTOR: { 7593 EVT N1VT = N1.getValueType(); 7594 assert(VT.isVector() && N1VT.isVector() && 7595 "Extract subvector VTs must be vectors!"); 7596 assert(VT.getVectorElementType() == N1VT.getVectorElementType() && 7597 "Extract subvector VTs must have the same element type!"); 7598 assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && 7599 "Cannot extract a scalable vector from a fixed length vector!"); 7600 assert((VT.isScalableVector() != N1VT.isScalableVector() || 7601 VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && 7602 "Extract subvector must be from larger vector to smaller vector!"); 7603 assert(N2C && "Extract subvector index must be a constant"); 7604 assert((VT.isScalableVector() != N1VT.isScalableVector() || 7605 (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= 7606 N1VT.getVectorMinNumElements()) && 7607 "Extract subvector overflow!"); 7608 assert(N2C->getAPIntValue().getBitWidth() == 7609 TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && 7610 "Constant index for EXTRACT_SUBVECTOR has an invalid size"); 7611 7612 // Trivial extraction. 7613 if (VT == N1VT) 7614 return N1; 7615 7616 // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. 7617 if (N1.isUndef()) 7618 return getUNDEF(VT); 7619 7620 // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of 7621 // the concat have the same type as the extract. 7622 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 7623 VT == N1.getOperand(0).getValueType()) { 7624 unsigned Factor = VT.getVectorMinNumElements(); 7625 return N1.getOperand(N2C->getZExtValue() / Factor); 7626 } 7627 7628 // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created 7629 // during shuffle legalization. 7630 if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && 7631 VT == N1.getOperand(1).getValueType()) 7632 return N1.getOperand(1); 7633 break; 7634 } 7635 } 7636 7637 // Perform trivial constant folding. 7638 if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) 7639 return SV; 7640 7641 // Canonicalize an UNDEF to the RHS, even over a constant. 7642 if (N1.isUndef()) { 7643 if (TLI->isCommutativeBinOp(Opcode)) { 7644 std::swap(N1, N2); 7645 } else { 7646 switch (Opcode) { 7647 case ISD::SUB: 7648 return getUNDEF(VT); // fold op(undef, arg2) -> undef 7649 case ISD::SIGN_EXTEND_INREG: 7650 case ISD::UDIV: 7651 case ISD::SDIV: 7652 case ISD::UREM: 7653 case ISD::SREM: 7654 case ISD::SSUBSAT: 7655 case ISD::USUBSAT: 7656 return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 7657 } 7658 } 7659 } 7660 7661 // Fold a bunch of operators when the RHS is undef. 7662 if (N2.isUndef()) { 7663 switch (Opcode) { 7664 case ISD::XOR: 7665 if (N1.isUndef()) 7666 // Handle undef ^ undef -> 0 special case. This is a common 7667 // idiom (misuse). 7668 return getConstant(0, DL, VT); 7669 [[fallthrough]]; 7670 case ISD::ADD: 7671 case ISD::SUB: 7672 case ISD::UDIV: 7673 case ISD::SDIV: 7674 case ISD::UREM: 7675 case ISD::SREM: 7676 return getUNDEF(VT); // fold op(arg1, undef) -> undef 7677 case ISD::MUL: 7678 case ISD::AND: 7679 case ISD::SSUBSAT: 7680 case ISD::USUBSAT: 7681 return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 7682 case ISD::OR: 7683 case ISD::SADDSAT: 7684 case ISD::UADDSAT: 7685 return getAllOnesConstant(DL, VT); 7686 } 7687 } 7688 7689 // Memoize this node if possible. 7690 SDNode *N; 7691 SDVTList VTs = getVTList(VT); 7692 SDValue Ops[] = {N1, N2}; 7693 if (VT != MVT::Glue) { 7694 FoldingSetNodeID ID; 7695 AddNodeIDNode(ID, Opcode, VTs, Ops); 7696 void *IP = nullptr; 7697 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 7698 E->intersectFlagsWith(Flags); 7699 return SDValue(E, 0); 7700 } 7701 7702 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7703 N->setFlags(Flags); 7704 createOperands(N, Ops); 7705 CSEMap.InsertNode(N, IP); 7706 } else { 7707 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7708 createOperands(N, Ops); 7709 } 7710 7711 InsertNode(N); 7712 SDValue V = SDValue(N, 0); 7713 NewSDValueDbgMsg(V, "Creating new node: ", this); 7714 return V; 7715 } 7716 7717 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7718 SDValue N1, SDValue N2, SDValue N3) { 7719 SDNodeFlags Flags; 7720 if (Inserter) 7721 Flags = Inserter->getFlags(); 7722 return getNode(Opcode, DL, VT, N1, N2, N3, Flags); 7723 } 7724 7725 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7726 SDValue N1, SDValue N2, SDValue N3, 7727 const SDNodeFlags Flags) { 7728 assert(N1.getOpcode() != ISD::DELETED_NODE && 7729 N2.getOpcode() != ISD::DELETED_NODE && 7730 N3.getOpcode() != ISD::DELETED_NODE && 7731 "Operand is DELETED_NODE!"); 7732 // Perform various simplifications. 7733 switch (Opcode) { 7734 case ISD::FMA: 7735 case ISD::FMAD: { 7736 assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); 7737 assert(N1.getValueType() == VT && N2.getValueType() == VT && 7738 N3.getValueType() == VT && "FMA types must match!"); 7739 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7740 ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); 7741 ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); 7742 if (N1CFP && N2CFP && N3CFP) { 7743 APFloat V1 = N1CFP->getValueAPF(); 7744 const APFloat &V2 = N2CFP->getValueAPF(); 7745 const APFloat &V3 = N3CFP->getValueAPF(); 7746 if (Opcode == ISD::FMAD) { 7747 V1.multiply(V2, APFloat::rmNearestTiesToEven); 7748 V1.add(V3, APFloat::rmNearestTiesToEven); 7749 } else 7750 V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); 7751 return getConstantFP(V1, DL, VT); 7752 } 7753 break; 7754 } 7755 case ISD::BUILD_VECTOR: { 7756 // Attempt to simplify BUILD_VECTOR. 7757 SDValue Ops[] = {N1, N2, N3}; 7758 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 7759 return V; 7760 break; 7761 } 7762 case ISD::CONCAT_VECTORS: { 7763 SDValue Ops[] = {N1, N2, N3}; 7764 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 7765 return V; 7766 break; 7767 } 7768 case ISD::SETCC: { 7769 assert(VT.isInteger() && "SETCC result type must be an integer!"); 7770 assert(N1.getValueType() == N2.getValueType() && 7771 "SETCC operands must have the same type!"); 7772 assert(VT.isVector() == N1.getValueType().isVector() && 7773 "SETCC type should be vector iff the operand type is vector!"); 7774 assert((!VT.isVector() || VT.getVectorElementCount() == 7775 N1.getValueType().getVectorElementCount()) && 7776 "SETCC vector element counts must match!"); 7777 // Use FoldSetCC to simplify SETCC's. 7778 if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) 7779 return V; 7780 // Vector constant folding. 7781 SDValue Ops[] = {N1, N2, N3}; 7782 if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { 7783 NewSDValueDbgMsg(V, "New node vector constant folding: ", this); 7784 return V; 7785 } 7786 break; 7787 } 7788 case ISD::SELECT: 7789 case ISD::VSELECT: 7790 if (SDValue V = simplifySelect(N1, N2, N3)) 7791 return V; 7792 break; 7793 case ISD::VECTOR_SHUFFLE: 7794 llvm_unreachable("should use getVectorShuffle constructor!"); 7795 case ISD::VECTOR_SPLICE: { 7796 if (cast<ConstantSDNode>(N3)->isZero()) 7797 return N1; 7798 break; 7799 } 7800 case ISD::INSERT_VECTOR_ELT: { 7801 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); 7802 // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except 7803 // for scalable vectors where we will generate appropriate code to 7804 // deal with out-of-bounds cases correctly. 7805 if (N3C && N1.getValueType().isFixedLengthVector() && 7806 N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) 7807 return getUNDEF(VT); 7808 7809 // Undefined index can be assumed out-of-bounds, so that's UNDEF too. 7810 if (N3.isUndef()) 7811 return getUNDEF(VT); 7812 7813 // If the inserted element is an UNDEF, just use the input vector. 7814 if (N2.isUndef()) 7815 return N1; 7816 7817 break; 7818 } 7819 case ISD::INSERT_SUBVECTOR: { 7820 // Inserting undef into undef is still undef. 7821 if (N1.isUndef() && N2.isUndef()) 7822 return getUNDEF(VT); 7823 7824 EVT N2VT = N2.getValueType(); 7825 assert(VT == N1.getValueType() && 7826 "Dest and insert subvector source types must match!"); 7827 assert(VT.isVector() && N2VT.isVector() && 7828 "Insert subvector VTs must be vectors!"); 7829 assert(VT.getVectorElementType() == N2VT.getVectorElementType() && 7830 "Insert subvector VTs must have the same element type!"); 7831 assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && 7832 "Cannot insert a scalable vector into a fixed length vector!"); 7833 assert((VT.isScalableVector() != N2VT.isScalableVector() || 7834 VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && 7835 "Insert subvector must be from smaller vector to larger vector!"); 7836 assert(isa<ConstantSDNode>(N3) && 7837 "Insert subvector index must be constant"); 7838 assert((VT.isScalableVector() != N2VT.isScalableVector() || 7839 (N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <= 7840 VT.getVectorMinNumElements()) && 7841 "Insert subvector overflow!"); 7842 assert(N3->getAsAPIntVal().getBitWidth() == 7843 TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() && 7844 "Constant index for INSERT_SUBVECTOR has an invalid size"); 7845 7846 // Trivial insertion. 7847 if (VT == N2VT) 7848 return N2; 7849 7850 // If this is an insert of an extracted vector into an undef vector, we 7851 // can just use the input to the extract. 7852 if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && 7853 N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) 7854 return N2.getOperand(0); 7855 break; 7856 } 7857 case ISD::BITCAST: 7858 // Fold bit_convert nodes from a type to themselves. 7859 if (N1.getValueType() == VT) 7860 return N1; 7861 break; 7862 case ISD::VP_TRUNCATE: 7863 case ISD::VP_SIGN_EXTEND: 7864 case ISD::VP_ZERO_EXTEND: 7865 // Don't create noop casts. 7866 if (N1.getValueType() == VT) 7867 return N1; 7868 break; 7869 case ISD::VECTOR_COMPRESS: { 7870 [[maybe_unused]] EVT VecVT = N1.getValueType(); 7871 [[maybe_unused]] EVT MaskVT = N2.getValueType(); 7872 [[maybe_unused]] EVT PassthruVT = N3.getValueType(); 7873 assert(VT == VecVT && "Vector and result type don't match."); 7874 assert(VecVT.isVector() && MaskVT.isVector() && PassthruVT.isVector() && 7875 "All inputs must be vectors."); 7876 assert(VecVT == PassthruVT && "Vector and passthru types don't match."); 7877 assert(VecVT.getVectorElementCount() == MaskVT.getVectorElementCount() && 7878 "Vector and mask must have same number of elements."); 7879 7880 if (N1.isUndef() || N2.isUndef()) 7881 return N3; 7882 7883 break; 7884 } 7885 } 7886 7887 // Memoize node if it doesn't produce a glue result. 7888 SDNode *N; 7889 SDVTList VTs = getVTList(VT); 7890 SDValue Ops[] = {N1, N2, N3}; 7891 if (VT != MVT::Glue) { 7892 FoldingSetNodeID ID; 7893 AddNodeIDNode(ID, Opcode, VTs, Ops); 7894 void *IP = nullptr; 7895 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 7896 E->intersectFlagsWith(Flags); 7897 return SDValue(E, 0); 7898 } 7899 7900 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7901 N->setFlags(Flags); 7902 createOperands(N, Ops); 7903 CSEMap.InsertNode(N, IP); 7904 } else { 7905 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 7906 createOperands(N, Ops); 7907 } 7908 7909 InsertNode(N); 7910 SDValue V = SDValue(N, 0); 7911 NewSDValueDbgMsg(V, "Creating new node: ", this); 7912 return V; 7913 } 7914 7915 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7916 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 7917 const SDNodeFlags Flags) { 7918 SDValue Ops[] = { N1, N2, N3, N4 }; 7919 return getNode(Opcode, DL, VT, Ops, Flags); 7920 } 7921 7922 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7923 SDValue N1, SDValue N2, SDValue N3, SDValue N4) { 7924 SDNodeFlags Flags; 7925 if (Inserter) 7926 Flags = Inserter->getFlags(); 7927 return getNode(Opcode, DL, VT, N1, N2, N3, N4, Flags); 7928 } 7929 7930 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7931 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 7932 SDValue N5, const SDNodeFlags Flags) { 7933 SDValue Ops[] = { N1, N2, N3, N4, N5 }; 7934 return getNode(Opcode, DL, VT, Ops, Flags); 7935 } 7936 7937 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 7938 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 7939 SDValue N5) { 7940 SDNodeFlags Flags; 7941 if (Inserter) 7942 Flags = Inserter->getFlags(); 7943 return getNode(Opcode, DL, VT, N1, N2, N3, N4, N5, Flags); 7944 } 7945 7946 /// getStackArgumentTokenFactor - Compute a TokenFactor to force all 7947 /// the incoming stack arguments to be loaded from the stack. 7948 SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { 7949 SmallVector<SDValue, 8> ArgChains; 7950 7951 // Include the original chain at the beginning of the list. When this is 7952 // used by target LowerCall hooks, this helps legalize find the 7953 // CALLSEQ_BEGIN node. 7954 ArgChains.push_back(Chain); 7955 7956 // Add a chain value for each stack argument. 7957 for (SDNode *U : getEntryNode().getNode()->users()) 7958 if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) 7959 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) 7960 if (FI->getIndex() < 0) 7961 ArgChains.push_back(SDValue(L, 1)); 7962 7963 // Build a tokenfactor for all the chains. 7964 return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); 7965 } 7966 7967 /// getMemsetValue - Vectorized representation of the memset value 7968 /// operand. 7969 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, 7970 const SDLoc &dl) { 7971 assert(!Value.isUndef()); 7972 7973 unsigned NumBits = VT.getScalarSizeInBits(); 7974 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { 7975 assert(C->getAPIntValue().getBitWidth() == 8); 7976 APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); 7977 if (VT.isInteger()) { 7978 bool IsOpaque = VT.getSizeInBits() > 64 || 7979 !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue()); 7980 return DAG.getConstant(Val, dl, VT, false, IsOpaque); 7981 } 7982 return DAG.getConstantFP(APFloat(VT.getFltSemantics(), Val), dl, VT); 7983 } 7984 7985 assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); 7986 EVT IntVT = VT.getScalarType(); 7987 if (!IntVT.isInteger()) 7988 IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); 7989 7990 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); 7991 if (NumBits > 8) { 7992 // Use a multiplication with 0x010101... to extend the input to the 7993 // required length. 7994 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); 7995 Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, 7996 DAG.getConstant(Magic, dl, IntVT)); 7997 } 7998 7999 if (VT != Value.getValueType() && !VT.isInteger()) 8000 Value = DAG.getBitcast(VT.getScalarType(), Value); 8001 if (VT != Value.getValueType()) 8002 Value = DAG.getSplatBuildVector(VT, dl, Value); 8003 8004 return Value; 8005 } 8006 8007 /// getMemsetStringVal - Similar to getMemsetValue. Except this is only 8008 /// used when a memcpy is turned into a memset when the source is a constant 8009 /// string ptr. 8010 static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, 8011 const TargetLowering &TLI, 8012 const ConstantDataArraySlice &Slice) { 8013 // Handle vector with all elements zero. 8014 if (Slice.Array == nullptr) { 8015 if (VT.isInteger()) 8016 return DAG.getConstant(0, dl, VT); 8017 if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) 8018 return DAG.getConstantFP(0.0, dl, VT); 8019 if (VT.isVector()) { 8020 unsigned NumElts = VT.getVectorNumElements(); 8021 MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; 8022 return DAG.getNode(ISD::BITCAST, dl, VT, 8023 DAG.getConstant(0, dl, 8024 EVT::getVectorVT(*DAG.getContext(), 8025 EltVT, NumElts))); 8026 } 8027 llvm_unreachable("Expected type!"); 8028 } 8029 8030 assert(!VT.isVector() && "Can't handle vector type here!"); 8031 unsigned NumVTBits = VT.getSizeInBits(); 8032 unsigned NumVTBytes = NumVTBits / 8; 8033 unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); 8034 8035 APInt Val(NumVTBits, 0); 8036 if (DAG.getDataLayout().isLittleEndian()) { 8037 for (unsigned i = 0; i != NumBytes; ++i) 8038 Val |= (uint64_t)(unsigned char)Slice[i] << i*8; 8039 } else { 8040 for (unsigned i = 0; i != NumBytes; ++i) 8041 Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; 8042 } 8043 8044 // If the "cost" of materializing the integer immediate is less than the cost 8045 // of a load, then it is cost effective to turn the load into the immediate. 8046 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 8047 if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) 8048 return DAG.getConstant(Val, dl, VT); 8049 return SDValue(); 8050 } 8051 8052 SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, 8053 const SDLoc &DL, 8054 const SDNodeFlags Flags) { 8055 EVT VT = Base.getValueType(); 8056 SDValue Index; 8057 8058 if (Offset.isScalable()) 8059 Index = getVScale(DL, Base.getValueType(), 8060 APInt(Base.getValueSizeInBits().getFixedValue(), 8061 Offset.getKnownMinValue())); 8062 else 8063 Index = getConstant(Offset.getFixedValue(), DL, VT); 8064 8065 return getMemBasePlusOffset(Base, Index, DL, Flags); 8066 } 8067 8068 SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, 8069 const SDLoc &DL, 8070 const SDNodeFlags Flags) { 8071 assert(Offset.getValueType().isInteger()); 8072 EVT BasePtrVT = Ptr.getValueType(); 8073 return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); 8074 } 8075 8076 /// Returns true if memcpy source is constant data. 8077 static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { 8078 uint64_t SrcDelta = 0; 8079 GlobalAddressSDNode *G = nullptr; 8080 if (Src.getOpcode() == ISD::GlobalAddress) 8081 G = cast<GlobalAddressSDNode>(Src); 8082 else if (Src.getOpcode() == ISD::ADD && 8083 Src.getOperand(0).getOpcode() == ISD::GlobalAddress && 8084 Src.getOperand(1).getOpcode() == ISD::Constant) { 8085 G = cast<GlobalAddressSDNode>(Src.getOperand(0)); 8086 SrcDelta = Src.getConstantOperandVal(1); 8087 } 8088 if (!G) 8089 return false; 8090 8091 return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, 8092 SrcDelta + G->getOffset()); 8093 } 8094 8095 static bool shouldLowerMemFuncForSize(const MachineFunction &MF, 8096 SelectionDAG &DAG) { 8097 // On Darwin, -Os means optimize for size without hurting performance, so 8098 // only really optimize for size when -Oz (MinSize) is used. 8099 if (MF.getTarget().getTargetTriple().isOSDarwin()) 8100 return MF.getFunction().hasMinSize(); 8101 return DAG.shouldOptForSize(); 8102 } 8103 8104 static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, 8105 SmallVector<SDValue, 32> &OutChains, unsigned From, 8106 unsigned To, SmallVector<SDValue, 16> &OutLoadChains, 8107 SmallVector<SDValue, 16> &OutStoreChains) { 8108 assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); 8109 assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); 8110 SmallVector<SDValue, 16> GluedLoadChains; 8111 for (unsigned i = From; i < To; ++i) { 8112 OutChains.push_back(OutLoadChains[i]); 8113 GluedLoadChains.push_back(OutLoadChains[i]); 8114 } 8115 8116 // Chain for all loads. 8117 SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8118 GluedLoadChains); 8119 8120 for (unsigned i = From; i < To; ++i) { 8121 StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); 8122 SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), 8123 ST->getBasePtr(), ST->getMemoryVT(), 8124 ST->getMemOperand()); 8125 OutChains.push_back(NewStore); 8126 } 8127 } 8128 8129 static SDValue getMemcpyLoadsAndStores( 8130 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 8131 uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, 8132 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, 8133 const AAMDNodes &AAInfo, BatchAAResults *BatchAA) { 8134 // Turn a memcpy of undef to nop. 8135 // FIXME: We need to honor volatile even is Src is undef. 8136 if (Src.isUndef()) 8137 return Chain; 8138 8139 // Expand memcpy to a series of load and store ops if the size operand falls 8140 // below a certain threshold. 8141 // TODO: In the AlwaysInline case, if the size is big then generate a loop 8142 // rather than maybe a humongous number of loads and stores. 8143 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8144 const DataLayout &DL = DAG.getDataLayout(); 8145 LLVMContext &C = *DAG.getContext(); 8146 std::vector<EVT> MemOps; 8147 bool DstAlignCanChange = false; 8148 MachineFunction &MF = DAG.getMachineFunction(); 8149 MachineFrameInfo &MFI = MF.getFrameInfo(); 8150 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8151 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8152 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8153 DstAlignCanChange = true; 8154 MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); 8155 if (!SrcAlign || Alignment > *SrcAlign) 8156 SrcAlign = Alignment; 8157 assert(SrcAlign && "SrcAlign must be set"); 8158 ConstantDataArraySlice Slice; 8159 // If marked as volatile, perform a copy even when marked as constant. 8160 bool CopyFromConstant = !isVol && isMemSrcFromConstant(Src, Slice); 8161 bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; 8162 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); 8163 const MemOp Op = isZeroConstant 8164 ? MemOp::Set(Size, DstAlignCanChange, Alignment, 8165 /*IsZeroMemset*/ true, isVol) 8166 : MemOp::Copy(Size, DstAlignCanChange, Alignment, 8167 *SrcAlign, isVol, CopyFromConstant); 8168 if (!TLI.findOptimalMemOpLowering( 8169 MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), 8170 SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) 8171 return SDValue(); 8172 8173 if (DstAlignCanChange) { 8174 Type *Ty = MemOps[0].getTypeForEVT(C); 8175 Align NewAlign = DL.getABITypeAlign(Ty); 8176 8177 // Don't promote to an alignment that would require dynamic stack 8178 // realignment which may conflict with optimizations such as tail call 8179 // optimization. 8180 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8181 if (!TRI->hasStackRealignment(MF)) 8182 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8183 NewAlign = std::min(NewAlign, *StackAlign); 8184 8185 if (NewAlign > Alignment) { 8186 // Give the stack frame object a larger alignment if needed. 8187 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8188 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8189 Alignment = NewAlign; 8190 } 8191 } 8192 8193 // Prepare AAInfo for loads/stores after lowering this memcpy. 8194 AAMDNodes NewAAInfo = AAInfo; 8195 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8196 8197 const Value *SrcVal = dyn_cast_if_present<const Value *>(SrcPtrInfo.V); 8198 bool isConstant = 8199 BatchAA && SrcVal && 8200 BatchAA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); 8201 8202 MachineMemOperand::Flags MMOFlags = 8203 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; 8204 SmallVector<SDValue, 16> OutLoadChains; 8205 SmallVector<SDValue, 16> OutStoreChains; 8206 SmallVector<SDValue, 32> OutChains; 8207 unsigned NumMemOps = MemOps.size(); 8208 uint64_t SrcOff = 0, DstOff = 0; 8209 for (unsigned i = 0; i != NumMemOps; ++i) { 8210 EVT VT = MemOps[i]; 8211 unsigned VTSize = VT.getSizeInBits() / 8; 8212 SDValue Value, Store; 8213 8214 if (VTSize > Size) { 8215 // Issuing an unaligned load / store pair that overlaps with the previous 8216 // pair. Adjust the offset accordingly. 8217 assert(i == NumMemOps-1 && i != 0); 8218 SrcOff -= VTSize - Size; 8219 DstOff -= VTSize - Size; 8220 } 8221 8222 if (CopyFromConstant && 8223 (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { 8224 // It's unlikely a store of a vector immediate can be done in a single 8225 // instruction. It would require a load from a constantpool first. 8226 // We only handle zero vectors here. 8227 // FIXME: Handle other cases where store of vector immediate is done in 8228 // a single instruction. 8229 ConstantDataArraySlice SubSlice; 8230 if (SrcOff < Slice.Length) { 8231 SubSlice = Slice; 8232 SubSlice.move(SrcOff); 8233 } else { 8234 // This is an out-of-bounds access and hence UB. Pretend we read zero. 8235 SubSlice.Array = nullptr; 8236 SubSlice.Offset = 0; 8237 SubSlice.Length = VTSize; 8238 } 8239 Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); 8240 if (Value.getNode()) { 8241 Store = DAG.getStore( 8242 Chain, dl, Value, 8243 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8244 DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); 8245 OutChains.push_back(Store); 8246 } 8247 } 8248 8249 if (!Store.getNode()) { 8250 // The type might not be legal for the target. This should only happen 8251 // if the type is smaller than a legal type, as on PPC, so the right 8252 // thing to do is generate a LoadExt/StoreTrunc pair. These simplify 8253 // to Load/Store if NVT==VT. 8254 // FIXME does the case above also need this? 8255 EVT NVT = TLI.getTypeToTransformTo(C, VT); 8256 assert(NVT.bitsGE(VT)); 8257 8258 bool isDereferenceable = 8259 SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); 8260 MachineMemOperand::Flags SrcMMOFlags = MMOFlags; 8261 if (isDereferenceable) 8262 SrcMMOFlags |= MachineMemOperand::MODereferenceable; 8263 if (isConstant) 8264 SrcMMOFlags |= MachineMemOperand::MOInvariant; 8265 8266 Value = DAG.getExtLoad( 8267 ISD::EXTLOAD, dl, NVT, Chain, 8268 DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), 8269 SrcPtrInfo.getWithOffset(SrcOff), VT, 8270 commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); 8271 OutLoadChains.push_back(Value.getValue(1)); 8272 8273 Store = DAG.getTruncStore( 8274 Chain, dl, Value, 8275 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8276 DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); 8277 OutStoreChains.push_back(Store); 8278 } 8279 SrcOff += VTSize; 8280 DstOff += VTSize; 8281 Size -= VTSize; 8282 } 8283 8284 unsigned GluedLdStLimit = MaxLdStGlue == 0 ? 8285 TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; 8286 unsigned NumLdStInMemcpy = OutStoreChains.size(); 8287 8288 if (NumLdStInMemcpy) { 8289 // It may be that memcpy might be converted to memset if it's memcpy 8290 // of constants. In such a case, we won't have loads and stores, but 8291 // just stores. In the absence of loads, there is nothing to gang up. 8292 if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { 8293 // If target does not care, just leave as it. 8294 for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { 8295 OutChains.push_back(OutLoadChains[i]); 8296 OutChains.push_back(OutStoreChains[i]); 8297 } 8298 } else { 8299 // Ld/St less than/equal limit set by target. 8300 if (NumLdStInMemcpy <= GluedLdStLimit) { 8301 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, 8302 NumLdStInMemcpy, OutLoadChains, 8303 OutStoreChains); 8304 } else { 8305 unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; 8306 unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; 8307 unsigned GlueIter = 0; 8308 8309 for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { 8310 unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; 8311 unsigned IndexTo = NumLdStInMemcpy - GlueIter; 8312 8313 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, 8314 OutLoadChains, OutStoreChains); 8315 GlueIter += GluedLdStLimit; 8316 } 8317 8318 // Residual ld/st. 8319 if (RemainingLdStInMemcpy) { 8320 chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, 8321 RemainingLdStInMemcpy, OutLoadChains, 8322 OutStoreChains); 8323 } 8324 } 8325 } 8326 } 8327 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8328 } 8329 8330 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, 8331 SDValue Chain, SDValue Dst, SDValue Src, 8332 uint64_t Size, Align Alignment, 8333 bool isVol, bool AlwaysInline, 8334 MachinePointerInfo DstPtrInfo, 8335 MachinePointerInfo SrcPtrInfo, 8336 const AAMDNodes &AAInfo) { 8337 // Turn a memmove of undef to nop. 8338 // FIXME: We need to honor volatile even is Src is undef. 8339 if (Src.isUndef()) 8340 return Chain; 8341 8342 // Expand memmove to a series of load and store ops if the size operand falls 8343 // below a certain threshold. 8344 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8345 const DataLayout &DL = DAG.getDataLayout(); 8346 LLVMContext &C = *DAG.getContext(); 8347 std::vector<EVT> MemOps; 8348 bool DstAlignCanChange = false; 8349 MachineFunction &MF = DAG.getMachineFunction(); 8350 MachineFrameInfo &MFI = MF.getFrameInfo(); 8351 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8352 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8353 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8354 DstAlignCanChange = true; 8355 MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); 8356 if (!SrcAlign || Alignment > *SrcAlign) 8357 SrcAlign = Alignment; 8358 assert(SrcAlign && "SrcAlign must be set"); 8359 unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); 8360 if (!TLI.findOptimalMemOpLowering( 8361 MemOps, Limit, 8362 MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, 8363 /*IsVolatile*/ true), 8364 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), 8365 MF.getFunction().getAttributes())) 8366 return SDValue(); 8367 8368 if (DstAlignCanChange) { 8369 Type *Ty = MemOps[0].getTypeForEVT(C); 8370 Align NewAlign = DL.getABITypeAlign(Ty); 8371 8372 // Don't promote to an alignment that would require dynamic stack 8373 // realignment which may conflict with optimizations such as tail call 8374 // optimization. 8375 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8376 if (!TRI->hasStackRealignment(MF)) 8377 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8378 NewAlign = std::min(NewAlign, *StackAlign); 8379 8380 if (NewAlign > Alignment) { 8381 // Give the stack frame object a larger alignment if needed. 8382 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8383 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8384 Alignment = NewAlign; 8385 } 8386 } 8387 8388 // Prepare AAInfo for loads/stores after lowering this memmove. 8389 AAMDNodes NewAAInfo = AAInfo; 8390 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8391 8392 MachineMemOperand::Flags MMOFlags = 8393 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; 8394 uint64_t SrcOff = 0, DstOff = 0; 8395 SmallVector<SDValue, 8> LoadValues; 8396 SmallVector<SDValue, 8> LoadChains; 8397 SmallVector<SDValue, 8> OutChains; 8398 unsigned NumMemOps = MemOps.size(); 8399 for (unsigned i = 0; i < NumMemOps; i++) { 8400 EVT VT = MemOps[i]; 8401 unsigned VTSize = VT.getSizeInBits() / 8; 8402 SDValue Value; 8403 8404 bool isDereferenceable = 8405 SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); 8406 MachineMemOperand::Flags SrcMMOFlags = MMOFlags; 8407 if (isDereferenceable) 8408 SrcMMOFlags |= MachineMemOperand::MODereferenceable; 8409 8410 Value = DAG.getLoad( 8411 VT, dl, Chain, 8412 DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), 8413 SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); 8414 LoadValues.push_back(Value); 8415 LoadChains.push_back(Value.getValue(1)); 8416 SrcOff += VTSize; 8417 } 8418 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); 8419 OutChains.clear(); 8420 for (unsigned i = 0; i < NumMemOps; i++) { 8421 EVT VT = MemOps[i]; 8422 unsigned VTSize = VT.getSizeInBits() / 8; 8423 SDValue Store; 8424 8425 Store = DAG.getStore( 8426 Chain, dl, LoadValues[i], 8427 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8428 DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); 8429 OutChains.push_back(Store); 8430 DstOff += VTSize; 8431 } 8432 8433 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8434 } 8435 8436 /// Lower the call to 'memset' intrinsic function into a series of store 8437 /// operations. 8438 /// 8439 /// \param DAG Selection DAG where lowered code is placed. 8440 /// \param dl Link to corresponding IR location. 8441 /// \param Chain Control flow dependency. 8442 /// \param Dst Pointer to destination memory location. 8443 /// \param Src Value of byte to write into the memory. 8444 /// \param Size Number of bytes to write. 8445 /// \param Alignment Alignment of the destination in bytes. 8446 /// \param isVol True if destination is volatile. 8447 /// \param AlwaysInline Makes sure no function call is generated. 8448 /// \param DstPtrInfo IR information on the memory pointer. 8449 /// \returns New head in the control flow, if lowering was successful, empty 8450 /// SDValue otherwise. 8451 /// 8452 /// The function tries to replace 'llvm.memset' intrinsic with several store 8453 /// operations and value calculation code. This is usually profitable for small 8454 /// memory size or when the semantic requires inlining. 8455 static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, 8456 SDValue Chain, SDValue Dst, SDValue Src, 8457 uint64_t Size, Align Alignment, bool isVol, 8458 bool AlwaysInline, MachinePointerInfo DstPtrInfo, 8459 const AAMDNodes &AAInfo) { 8460 // Turn a memset of undef to nop. 8461 // FIXME: We need to honor volatile even is Src is undef. 8462 if (Src.isUndef()) 8463 return Chain; 8464 8465 // Expand memset to a series of load/store ops if the size operand 8466 // falls below a certain threshold. 8467 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8468 std::vector<EVT> MemOps; 8469 bool DstAlignCanChange = false; 8470 MachineFunction &MF = DAG.getMachineFunction(); 8471 MachineFrameInfo &MFI = MF.getFrameInfo(); 8472 bool OptSize = shouldLowerMemFuncForSize(MF, DAG); 8473 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); 8474 if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) 8475 DstAlignCanChange = true; 8476 bool IsZeroVal = isNullConstant(Src); 8477 unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); 8478 8479 if (!TLI.findOptimalMemOpLowering( 8480 MemOps, Limit, 8481 MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), 8482 DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) 8483 return SDValue(); 8484 8485 if (DstAlignCanChange) { 8486 Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); 8487 const DataLayout &DL = DAG.getDataLayout(); 8488 Align NewAlign = DL.getABITypeAlign(Ty); 8489 8490 // Don't promote to an alignment that would require dynamic stack 8491 // realignment which may conflict with optimizations such as tail call 8492 // optimization. 8493 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 8494 if (!TRI->hasStackRealignment(MF)) 8495 if (MaybeAlign StackAlign = DL.getStackAlignment()) 8496 NewAlign = std::min(NewAlign, *StackAlign); 8497 8498 if (NewAlign > Alignment) { 8499 // Give the stack frame object a larger alignment if needed. 8500 if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) 8501 MFI.setObjectAlignment(FI->getIndex(), NewAlign); 8502 Alignment = NewAlign; 8503 } 8504 } 8505 8506 SmallVector<SDValue, 8> OutChains; 8507 uint64_t DstOff = 0; 8508 unsigned NumMemOps = MemOps.size(); 8509 8510 // Find the largest store and generate the bit pattern for it. 8511 EVT LargestVT = MemOps[0]; 8512 for (unsigned i = 1; i < NumMemOps; i++) 8513 if (MemOps[i].bitsGT(LargestVT)) 8514 LargestVT = MemOps[i]; 8515 SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); 8516 8517 // Prepare AAInfo for loads/stores after lowering this memset. 8518 AAMDNodes NewAAInfo = AAInfo; 8519 NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; 8520 8521 for (unsigned i = 0; i < NumMemOps; i++) { 8522 EVT VT = MemOps[i]; 8523 unsigned VTSize = VT.getSizeInBits() / 8; 8524 if (VTSize > Size) { 8525 // Issuing an unaligned load / store pair that overlaps with the previous 8526 // pair. Adjust the offset accordingly. 8527 assert(i == NumMemOps-1 && i != 0); 8528 DstOff -= VTSize - Size; 8529 } 8530 8531 // If this store is smaller than the largest store see whether we can get 8532 // the smaller value for free with a truncate or extract vector element and 8533 // then store. 8534 SDValue Value = MemSetValue; 8535 if (VT.bitsLT(LargestVT)) { 8536 unsigned Index; 8537 unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits(); 8538 EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts); 8539 if (!LargestVT.isVector() && !VT.isVector() && 8540 TLI.isTruncateFree(LargestVT, VT)) 8541 Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); 8542 else if (LargestVT.isVector() && !VT.isVector() && 8543 TLI.shallExtractConstSplatVectorElementToStore( 8544 LargestVT.getTypeForEVT(*DAG.getContext()), 8545 VT.getSizeInBits(), Index) && 8546 TLI.isTypeLegal(SVT) && 8547 LargestVT.getSizeInBits() == SVT.getSizeInBits()) { 8548 // Target which can combine store(extractelement VectorTy, Idx) can get 8549 // the smaller value for free. 8550 SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue); 8551 Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue, 8552 DAG.getVectorIdxConstant(Index, dl)); 8553 } else 8554 Value = getMemsetValue(Src, VT, DAG, dl); 8555 } 8556 assert(Value.getValueType() == VT && "Value with wrong type."); 8557 SDValue Store = DAG.getStore( 8558 Chain, dl, Value, 8559 DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), 8560 DstPtrInfo.getWithOffset(DstOff), Alignment, 8561 isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, 8562 NewAAInfo); 8563 OutChains.push_back(Store); 8564 DstOff += VT.getSizeInBits() / 8; 8565 Size -= VTSize; 8566 } 8567 8568 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); 8569 } 8570 8571 static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, 8572 unsigned AS) { 8573 // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all 8574 // pointer operands can be losslessly bitcasted to pointers of address space 0 8575 if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) { 8576 report_fatal_error("cannot lower memory intrinsic in address space " + 8577 Twine(AS)); 8578 } 8579 } 8580 8581 SDValue SelectionDAG::getMemcpy( 8582 SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, 8583 Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, 8584 std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo, 8585 MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo, 8586 BatchAAResults *BatchAA) { 8587 // Check to see if we should lower the memcpy to loads and stores first. 8588 // For cases within the target-specified limits, this is the best choice. 8589 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 8590 if (ConstantSize) { 8591 // Memcpy with size zero? Just return the original chain. 8592 if (ConstantSize->isZero()) 8593 return Chain; 8594 8595 SDValue Result = getMemcpyLoadsAndStores( 8596 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8597 isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); 8598 if (Result.getNode()) 8599 return Result; 8600 } 8601 8602 // Then check to see if we should lower the memcpy with target-specific 8603 // code. If the target chooses to do this, this is the next best. 8604 if (TSI) { 8605 SDValue Result = TSI->EmitTargetCodeForMemcpy( 8606 *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, 8607 DstPtrInfo, SrcPtrInfo); 8608 if (Result.getNode()) 8609 return Result; 8610 } 8611 8612 // If we really need inline code and the target declined to provide it, 8613 // use a (potentially long) sequence of loads and stores. 8614 if (AlwaysInline) { 8615 assert(ConstantSize && "AlwaysInline requires a constant size!"); 8616 return getMemcpyLoadsAndStores( 8617 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8618 isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, BatchAA); 8619 } 8620 8621 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 8622 checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); 8623 8624 // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc 8625 // memcpy is not guaranteed to be safe. libc memcpys aren't required to 8626 // respect volatile, so they may do things like read or write memory 8627 // beyond the given memory regions. But fixing this isn't easy, and most 8628 // people don't care. 8629 8630 // Emit a library call. 8631 TargetLowering::ArgListTy Args; 8632 TargetLowering::ArgListEntry Entry; 8633 Entry.Ty = PointerType::getUnqual(*getContext()); 8634 Entry.Node = Dst; Args.push_back(Entry); 8635 Entry.Node = Src; Args.push_back(Entry); 8636 8637 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8638 Entry.Node = Size; Args.push_back(Entry); 8639 // FIXME: pass in SDLoc 8640 TargetLowering::CallLoweringInfo CLI(*this); 8641 bool IsTailCall = false; 8642 if (OverrideTailCall.has_value()) { 8643 IsTailCall = *OverrideTailCall; 8644 } else { 8645 bool LowersToMemcpy = 8646 TLI->getLibcallName(RTLIB::MEMCPY) == StringRef("memcpy"); 8647 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); 8648 IsTailCall = CI && CI->isTailCall() && 8649 isInTailCallPosition(*CI, getTarget(), 8650 ReturnsFirstArg && LowersToMemcpy); 8651 } 8652 8653 CLI.setDebugLoc(dl) 8654 .setChain(Chain) 8655 .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), 8656 Dst.getValueType().getTypeForEVT(*getContext()), 8657 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), 8658 TLI->getPointerTy(getDataLayout())), 8659 std::move(Args)) 8660 .setDiscardResult() 8661 .setTailCall(IsTailCall); 8662 8663 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 8664 return CallResult.second; 8665 } 8666 8667 SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, 8668 SDValue Dst, SDValue Src, SDValue Size, 8669 Type *SizeTy, unsigned ElemSz, 8670 bool isTailCall, 8671 MachinePointerInfo DstPtrInfo, 8672 MachinePointerInfo SrcPtrInfo) { 8673 // Emit a library call. 8674 TargetLowering::ArgListTy Args; 8675 TargetLowering::ArgListEntry Entry; 8676 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8677 Entry.Node = Dst; 8678 Args.push_back(Entry); 8679 8680 Entry.Node = Src; 8681 Args.push_back(Entry); 8682 8683 Entry.Ty = SizeTy; 8684 Entry.Node = Size; 8685 Args.push_back(Entry); 8686 8687 RTLIB::Libcall LibraryCall = 8688 RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); 8689 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 8690 report_fatal_error("Unsupported element size"); 8691 8692 TargetLowering::CallLoweringInfo CLI(*this); 8693 CLI.setDebugLoc(dl) 8694 .setChain(Chain) 8695 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 8696 Type::getVoidTy(*getContext()), 8697 getExternalSymbol(TLI->getLibcallName(LibraryCall), 8698 TLI->getPointerTy(getDataLayout())), 8699 std::move(Args)) 8700 .setDiscardResult() 8701 .setTailCall(isTailCall); 8702 8703 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 8704 return CallResult.second; 8705 } 8706 8707 SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, 8708 SDValue Src, SDValue Size, Align Alignment, 8709 bool isVol, const CallInst *CI, 8710 std::optional<bool> OverrideTailCall, 8711 MachinePointerInfo DstPtrInfo, 8712 MachinePointerInfo SrcPtrInfo, 8713 const AAMDNodes &AAInfo, 8714 BatchAAResults *BatchAA) { 8715 // Check to see if we should lower the memmove to loads and stores first. 8716 // For cases within the target-specified limits, this is the best choice. 8717 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 8718 if (ConstantSize) { 8719 // Memmove with size zero? Just return the original chain. 8720 if (ConstantSize->isZero()) 8721 return Chain; 8722 8723 SDValue Result = getMemmoveLoadsAndStores( 8724 *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, 8725 isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); 8726 if (Result.getNode()) 8727 return Result; 8728 } 8729 8730 // Then check to see if we should lower the memmove with target-specific 8731 // code. If the target chooses to do this, this is the next best. 8732 if (TSI) { 8733 SDValue Result = 8734 TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, 8735 Alignment, isVol, DstPtrInfo, SrcPtrInfo); 8736 if (Result.getNode()) 8737 return Result; 8738 } 8739 8740 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 8741 checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); 8742 8743 // FIXME: If the memmove is volatile, lowering it to plain libc memmove may 8744 // not be safe. See memcpy above for more details. 8745 8746 // Emit a library call. 8747 TargetLowering::ArgListTy Args; 8748 TargetLowering::ArgListEntry Entry; 8749 Entry.Ty = PointerType::getUnqual(*getContext()); 8750 Entry.Node = Dst; Args.push_back(Entry); 8751 Entry.Node = Src; Args.push_back(Entry); 8752 8753 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8754 Entry.Node = Size; Args.push_back(Entry); 8755 // FIXME: pass in SDLoc 8756 TargetLowering::CallLoweringInfo CLI(*this); 8757 8758 bool IsTailCall = false; 8759 if (OverrideTailCall.has_value()) { 8760 IsTailCall = *OverrideTailCall; 8761 } else { 8762 bool LowersToMemmove = 8763 TLI->getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove"); 8764 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); 8765 IsTailCall = CI && CI->isTailCall() && 8766 isInTailCallPosition(*CI, getTarget(), 8767 ReturnsFirstArg && LowersToMemmove); 8768 } 8769 8770 CLI.setDebugLoc(dl) 8771 .setChain(Chain) 8772 .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), 8773 Dst.getValueType().getTypeForEVT(*getContext()), 8774 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), 8775 TLI->getPointerTy(getDataLayout())), 8776 std::move(Args)) 8777 .setDiscardResult() 8778 .setTailCall(IsTailCall); 8779 8780 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 8781 return CallResult.second; 8782 } 8783 8784 SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, 8785 SDValue Dst, SDValue Src, SDValue Size, 8786 Type *SizeTy, unsigned ElemSz, 8787 bool isTailCall, 8788 MachinePointerInfo DstPtrInfo, 8789 MachinePointerInfo SrcPtrInfo) { 8790 // Emit a library call. 8791 TargetLowering::ArgListTy Args; 8792 TargetLowering::ArgListEntry Entry; 8793 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8794 Entry.Node = Dst; 8795 Args.push_back(Entry); 8796 8797 Entry.Node = Src; 8798 Args.push_back(Entry); 8799 8800 Entry.Ty = SizeTy; 8801 Entry.Node = Size; 8802 Args.push_back(Entry); 8803 8804 RTLIB::Libcall LibraryCall = 8805 RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); 8806 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 8807 report_fatal_error("Unsupported element size"); 8808 8809 TargetLowering::CallLoweringInfo CLI(*this); 8810 CLI.setDebugLoc(dl) 8811 .setChain(Chain) 8812 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 8813 Type::getVoidTy(*getContext()), 8814 getExternalSymbol(TLI->getLibcallName(LibraryCall), 8815 TLI->getPointerTy(getDataLayout())), 8816 std::move(Args)) 8817 .setDiscardResult() 8818 .setTailCall(isTailCall); 8819 8820 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 8821 return CallResult.second; 8822 } 8823 8824 SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, 8825 SDValue Src, SDValue Size, Align Alignment, 8826 bool isVol, bool AlwaysInline, 8827 const CallInst *CI, 8828 MachinePointerInfo DstPtrInfo, 8829 const AAMDNodes &AAInfo) { 8830 // Check to see if we should lower the memset to stores first. 8831 // For cases within the target-specified limits, this is the best choice. 8832 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 8833 if (ConstantSize) { 8834 // Memset with size zero? Just return the original chain. 8835 if (ConstantSize->isZero()) 8836 return Chain; 8837 8838 SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, 8839 ConstantSize->getZExtValue(), Alignment, 8840 isVol, false, DstPtrInfo, AAInfo); 8841 8842 if (Result.getNode()) 8843 return Result; 8844 } 8845 8846 // Then check to see if we should lower the memset with target-specific 8847 // code. If the target chooses to do this, this is the next best. 8848 if (TSI) { 8849 SDValue Result = TSI->EmitTargetCodeForMemset( 8850 *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo); 8851 if (Result.getNode()) 8852 return Result; 8853 } 8854 8855 // If we really need inline code and the target declined to provide it, 8856 // use a (potentially long) sequence of loads and stores. 8857 if (AlwaysInline) { 8858 assert(ConstantSize && "AlwaysInline requires a constant size!"); 8859 SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, 8860 ConstantSize->getZExtValue(), Alignment, 8861 isVol, true, DstPtrInfo, AAInfo); 8862 assert(Result && 8863 "getMemsetStores must return a valid sequence when AlwaysInline"); 8864 return Result; 8865 } 8866 8867 checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); 8868 8869 // Emit a library call. 8870 auto &Ctx = *getContext(); 8871 const auto& DL = getDataLayout(); 8872 8873 TargetLowering::CallLoweringInfo CLI(*this); 8874 // FIXME: pass in SDLoc 8875 CLI.setDebugLoc(dl).setChain(Chain); 8876 8877 const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); 8878 8879 // Helper function to create an Entry from Node and Type. 8880 const auto CreateEntry = [](SDValue Node, Type *Ty) { 8881 TargetLowering::ArgListEntry Entry; 8882 Entry.Node = Node; 8883 Entry.Ty = Ty; 8884 return Entry; 8885 }; 8886 8887 bool UseBZero = isNullConstant(Src) && BzeroName; 8888 // If zeroing out and bzero is present, use it. 8889 if (UseBZero) { 8890 TargetLowering::ArgListTy Args; 8891 Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); 8892 Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); 8893 CLI.setLibCallee( 8894 TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), 8895 getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); 8896 } else { 8897 TargetLowering::ArgListTy Args; 8898 Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); 8899 Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); 8900 Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); 8901 CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), 8902 Dst.getValueType().getTypeForEVT(Ctx), 8903 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), 8904 TLI->getPointerTy(DL)), 8905 std::move(Args)); 8906 } 8907 bool LowersToMemset = 8908 TLI->getLibcallName(RTLIB::MEMSET) == StringRef("memset"); 8909 // If we're going to use bzero, make sure not to tail call unless the 8910 // subsequent return doesn't need a value, as bzero doesn't return the first 8911 // arg unlike memset. 8912 bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI) && !UseBZero; 8913 bool IsTailCall = 8914 CI && CI->isTailCall() && 8915 isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg && LowersToMemset); 8916 CLI.setDiscardResult().setTailCall(IsTailCall); 8917 8918 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 8919 return CallResult.second; 8920 } 8921 8922 SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, 8923 SDValue Dst, SDValue Value, SDValue Size, 8924 Type *SizeTy, unsigned ElemSz, 8925 bool isTailCall, 8926 MachinePointerInfo DstPtrInfo) { 8927 // Emit a library call. 8928 TargetLowering::ArgListTy Args; 8929 TargetLowering::ArgListEntry Entry; 8930 Entry.Ty = getDataLayout().getIntPtrType(*getContext()); 8931 Entry.Node = Dst; 8932 Args.push_back(Entry); 8933 8934 Entry.Ty = Type::getInt8Ty(*getContext()); 8935 Entry.Node = Value; 8936 Args.push_back(Entry); 8937 8938 Entry.Ty = SizeTy; 8939 Entry.Node = Size; 8940 Args.push_back(Entry); 8941 8942 RTLIB::Libcall LibraryCall = 8943 RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); 8944 if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) 8945 report_fatal_error("Unsupported element size"); 8946 8947 TargetLowering::CallLoweringInfo CLI(*this); 8948 CLI.setDebugLoc(dl) 8949 .setChain(Chain) 8950 .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), 8951 Type::getVoidTy(*getContext()), 8952 getExternalSymbol(TLI->getLibcallName(LibraryCall), 8953 TLI->getPointerTy(getDataLayout())), 8954 std::move(Args)) 8955 .setDiscardResult() 8956 .setTailCall(isTailCall); 8957 8958 std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); 8959 return CallResult.second; 8960 } 8961 8962 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, 8963 SDVTList VTList, ArrayRef<SDValue> Ops, 8964 MachineMemOperand *MMO) { 8965 FoldingSetNodeID ID; 8966 ID.AddInteger(MemVT.getRawBits()); 8967 AddNodeIDNode(ID, Opcode, VTList, Ops); 8968 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 8969 ID.AddInteger(MMO->getFlags()); 8970 void* IP = nullptr; 8971 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 8972 cast<AtomicSDNode>(E)->refineAlignment(MMO); 8973 return SDValue(E, 0); 8974 } 8975 8976 auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), 8977 VTList, MemVT, MMO); 8978 createOperands(N, Ops); 8979 8980 CSEMap.InsertNode(N, IP); 8981 InsertNode(N); 8982 return SDValue(N, 0); 8983 } 8984 8985 SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, 8986 EVT MemVT, SDVTList VTs, SDValue Chain, 8987 SDValue Ptr, SDValue Cmp, SDValue Swp, 8988 MachineMemOperand *MMO) { 8989 assert(Opcode == ISD::ATOMIC_CMP_SWAP || 8990 Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); 8991 assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); 8992 8993 SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; 8994 return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); 8995 } 8996 8997 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, 8998 SDValue Chain, SDValue Ptr, SDValue Val, 8999 MachineMemOperand *MMO) { 9000 assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || 9001 Opcode == ISD::ATOMIC_LOAD_AND || Opcode == ISD::ATOMIC_LOAD_CLR || 9002 Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || 9003 Opcode == ISD::ATOMIC_LOAD_NAND || Opcode == ISD::ATOMIC_LOAD_MIN || 9004 Opcode == ISD::ATOMIC_LOAD_MAX || Opcode == ISD::ATOMIC_LOAD_UMIN || 9005 Opcode == ISD::ATOMIC_LOAD_UMAX || Opcode == ISD::ATOMIC_LOAD_FADD || 9006 Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || 9007 Opcode == ISD::ATOMIC_LOAD_FMIN || 9008 Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || 9009 Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || 9010 Opcode == ISD::ATOMIC_LOAD_USUB_COND || 9011 Opcode == ISD::ATOMIC_LOAD_USUB_SAT || Opcode == ISD::ATOMIC_SWAP || 9012 Opcode == ISD::ATOMIC_STORE) && 9013 "Invalid Atomic Op"); 9014 9015 EVT VT = Val.getValueType(); 9016 9017 SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : 9018 getVTList(VT, MVT::Other); 9019 SDValue Ops[] = {Chain, Ptr, Val}; 9020 return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); 9021 } 9022 9023 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, 9024 EVT VT, SDValue Chain, SDValue Ptr, 9025 MachineMemOperand *MMO) { 9026 assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); 9027 9028 SDVTList VTs = getVTList(VT, MVT::Other); 9029 SDValue Ops[] = {Chain, Ptr}; 9030 return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); 9031 } 9032 9033 /// getMergeValues - Create a MERGE_VALUES node from the given operands. 9034 SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { 9035 if (Ops.size() == 1) 9036 return Ops[0]; 9037 9038 SmallVector<EVT, 4> VTs; 9039 VTs.reserve(Ops.size()); 9040 for (const SDValue &Op : Ops) 9041 VTs.push_back(Op.getValueType()); 9042 return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); 9043 } 9044 9045 SDValue SelectionDAG::getMemIntrinsicNode( 9046 unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, 9047 EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, 9048 MachineMemOperand::Flags Flags, LocationSize Size, 9049 const AAMDNodes &AAInfo) { 9050 if (Size.hasValue() && !Size.getValue()) 9051 Size = LocationSize::precise(MemVT.getStoreSize()); 9052 9053 MachineFunction &MF = getMachineFunction(); 9054 MachineMemOperand *MMO = 9055 MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); 9056 9057 return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); 9058 } 9059 9060 SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, 9061 SDVTList VTList, 9062 ArrayRef<SDValue> Ops, EVT MemVT, 9063 MachineMemOperand *MMO) { 9064 assert( 9065 (Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || 9066 Opcode == ISD::PREFETCH || 9067 (Opcode <= (unsigned)std::numeric_limits<int>::max() && 9068 Opcode >= ISD::BUILTIN_OP_END && TSI->isTargetMemoryOpcode(Opcode))) && 9069 "Opcode is not a memory-accessing opcode!"); 9070 9071 // Memoize the node unless it returns a glue result. 9072 MemIntrinsicSDNode *N; 9073 if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { 9074 FoldingSetNodeID ID; 9075 AddNodeIDNode(ID, Opcode, VTList, Ops); 9076 ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( 9077 Opcode, dl.getIROrder(), VTList, MemVT, MMO)); 9078 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9079 ID.AddInteger(MMO->getFlags()); 9080 ID.AddInteger(MemVT.getRawBits()); 9081 void *IP = nullptr; 9082 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9083 cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); 9084 return SDValue(E, 0); 9085 } 9086 9087 N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), 9088 VTList, MemVT, MMO); 9089 createOperands(N, Ops); 9090 9091 CSEMap.InsertNode(N, IP); 9092 } else { 9093 N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), 9094 VTList, MemVT, MMO); 9095 createOperands(N, Ops); 9096 } 9097 InsertNode(N); 9098 SDValue V(N, 0); 9099 NewSDValueDbgMsg(V, "Creating new node: ", this); 9100 return V; 9101 } 9102 9103 SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, 9104 SDValue Chain, int FrameIndex, 9105 int64_t Size, int64_t Offset) { 9106 const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END; 9107 const auto VTs = getVTList(MVT::Other); 9108 SDValue Ops[2] = { 9109 Chain, 9110 getFrameIndex(FrameIndex, 9111 getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), 9112 true)}; 9113 9114 FoldingSetNodeID ID; 9115 AddNodeIDNode(ID, Opcode, VTs, Ops); 9116 ID.AddInteger(FrameIndex); 9117 ID.AddInteger(Size); 9118 ID.AddInteger(Offset); 9119 void *IP = nullptr; 9120 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 9121 return SDValue(E, 0); 9122 9123 LifetimeSDNode *N = newSDNode<LifetimeSDNode>( 9124 Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); 9125 createOperands(N, Ops); 9126 CSEMap.InsertNode(N, IP); 9127 InsertNode(N); 9128 SDValue V(N, 0); 9129 NewSDValueDbgMsg(V, "Creating new node: ", this); 9130 return V; 9131 } 9132 9133 SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, 9134 uint64_t Guid, uint64_t Index, 9135 uint32_t Attr) { 9136 const unsigned Opcode = ISD::PSEUDO_PROBE; 9137 const auto VTs = getVTList(MVT::Other); 9138 SDValue Ops[] = {Chain}; 9139 FoldingSetNodeID ID; 9140 AddNodeIDNode(ID, Opcode, VTs, Ops); 9141 ID.AddInteger(Guid); 9142 ID.AddInteger(Index); 9143 void *IP = nullptr; 9144 if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP)) 9145 return SDValue(E, 0); 9146 9147 auto *N = newSDNode<PseudoProbeSDNode>( 9148 Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr); 9149 createOperands(N, Ops); 9150 CSEMap.InsertNode(N, IP); 9151 InsertNode(N); 9152 SDValue V(N, 0); 9153 NewSDValueDbgMsg(V, "Creating new node: ", this); 9154 return V; 9155 } 9156 9157 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a 9158 /// MachinePointerInfo record from it. This is particularly useful because the 9159 /// code generator has many cases where it doesn't bother passing in a 9160 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". 9161 static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, 9162 SelectionDAG &DAG, SDValue Ptr, 9163 int64_t Offset = 0) { 9164 // If this is FI+Offset, we can model it. 9165 if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) 9166 return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), 9167 FI->getIndex(), Offset); 9168 9169 // If this is (FI+Offset1)+Offset2, we can model it. 9170 if (Ptr.getOpcode() != ISD::ADD || 9171 !isa<ConstantSDNode>(Ptr.getOperand(1)) || 9172 !isa<FrameIndexSDNode>(Ptr.getOperand(0))) 9173 return Info; 9174 9175 int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 9176 return MachinePointerInfo::getFixedStack( 9177 DAG.getMachineFunction(), FI, 9178 Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); 9179 } 9180 9181 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a 9182 /// MachinePointerInfo record from it. This is particularly useful because the 9183 /// code generator has many cases where it doesn't bother passing in a 9184 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". 9185 static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, 9186 SelectionDAG &DAG, SDValue Ptr, 9187 SDValue OffsetOp) { 9188 // If the 'Offset' value isn't a constant, we can't handle this. 9189 if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) 9190 return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); 9191 if (OffsetOp.isUndef()) 9192 return InferPointerInfo(Info, DAG, Ptr); 9193 return Info; 9194 } 9195 9196 SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 9197 EVT VT, const SDLoc &dl, SDValue Chain, 9198 SDValue Ptr, SDValue Offset, 9199 MachinePointerInfo PtrInfo, EVT MemVT, 9200 Align Alignment, 9201 MachineMemOperand::Flags MMOFlags, 9202 const AAMDNodes &AAInfo, const MDNode *Ranges) { 9203 assert(Chain.getValueType() == MVT::Other && 9204 "Invalid chain type"); 9205 9206 MMOFlags |= MachineMemOperand::MOLoad; 9207 assert((MMOFlags & MachineMemOperand::MOStore) == 0); 9208 // If we don't have a PtrInfo, infer the trivial frame index case to simplify 9209 // clients. 9210 if (PtrInfo.V.isNull()) 9211 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); 9212 9213 LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); 9214 MachineFunction &MF = getMachineFunction(); 9215 MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, 9216 Alignment, AAInfo, Ranges); 9217 return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); 9218 } 9219 9220 SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, 9221 EVT VT, const SDLoc &dl, SDValue Chain, 9222 SDValue Ptr, SDValue Offset, EVT MemVT, 9223 MachineMemOperand *MMO) { 9224 if (VT == MemVT) { 9225 ExtType = ISD::NON_EXTLOAD; 9226 } else if (ExtType == ISD::NON_EXTLOAD) { 9227 assert(VT == MemVT && "Non-extending load from different memory type!"); 9228 } else { 9229 // Extending load. 9230 assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && 9231 "Should only be an extending load, not truncating!"); 9232 assert(VT.isInteger() == MemVT.isInteger() && 9233 "Cannot convert from FP to Int or Int -> FP!"); 9234 assert(VT.isVector() == MemVT.isVector() && 9235 "Cannot use an ext load to convert to or from a vector!"); 9236 assert((!VT.isVector() || 9237 VT.getVectorElementCount() == MemVT.getVectorElementCount()) && 9238 "Cannot use an ext load to change the number of vector elements!"); 9239 } 9240 9241 bool Indexed = AM != ISD::UNINDEXED; 9242 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9243 9244 SDVTList VTs = Indexed ? 9245 getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); 9246 SDValue Ops[] = { Chain, Ptr, Offset }; 9247 FoldingSetNodeID ID; 9248 AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); 9249 ID.AddInteger(MemVT.getRawBits()); 9250 ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( 9251 dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); 9252 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9253 ID.AddInteger(MMO->getFlags()); 9254 void *IP = nullptr; 9255 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9256 cast<LoadSDNode>(E)->refineAlignment(MMO); 9257 return SDValue(E, 0); 9258 } 9259 auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9260 ExtType, MemVT, MMO); 9261 createOperands(N, Ops); 9262 9263 CSEMap.InsertNode(N, IP); 9264 InsertNode(N); 9265 SDValue V(N, 0); 9266 NewSDValueDbgMsg(V, "Creating new node: ", this); 9267 return V; 9268 } 9269 9270 SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, 9271 SDValue Ptr, MachinePointerInfo PtrInfo, 9272 MaybeAlign Alignment, 9273 MachineMemOperand::Flags MMOFlags, 9274 const AAMDNodes &AAInfo, const MDNode *Ranges) { 9275 SDValue Undef = getUNDEF(Ptr.getValueType()); 9276 return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9277 PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); 9278 } 9279 9280 SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, 9281 SDValue Ptr, MachineMemOperand *MMO) { 9282 SDValue Undef = getUNDEF(Ptr.getValueType()); 9283 return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9284 VT, MMO); 9285 } 9286 9287 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, 9288 EVT VT, SDValue Chain, SDValue Ptr, 9289 MachinePointerInfo PtrInfo, EVT MemVT, 9290 MaybeAlign Alignment, 9291 MachineMemOperand::Flags MMOFlags, 9292 const AAMDNodes &AAInfo) { 9293 SDValue Undef = getUNDEF(Ptr.getValueType()); 9294 return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, 9295 MemVT, Alignment, MMOFlags, AAInfo); 9296 } 9297 9298 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, 9299 EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, 9300 MachineMemOperand *MMO) { 9301 SDValue Undef = getUNDEF(Ptr.getValueType()); 9302 return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, 9303 MemVT, MMO); 9304 } 9305 9306 SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, 9307 SDValue Base, SDValue Offset, 9308 ISD::MemIndexedMode AM) { 9309 LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); 9310 assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); 9311 // Don't propagate the invariant or dereferenceable flags. 9312 auto MMOFlags = 9313 LD->getMemOperand()->getFlags() & 9314 ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); 9315 return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, 9316 LD->getChain(), Base, Offset, LD->getPointerInfo(), 9317 LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo()); 9318 } 9319 9320 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9321 SDValue Ptr, MachinePointerInfo PtrInfo, 9322 Align Alignment, 9323 MachineMemOperand::Flags MMOFlags, 9324 const AAMDNodes &AAInfo) { 9325 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9326 9327 MMOFlags |= MachineMemOperand::MOStore; 9328 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9329 9330 if (PtrInfo.V.isNull()) 9331 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9332 9333 MachineFunction &MF = getMachineFunction(); 9334 LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize()); 9335 MachineMemOperand *MMO = 9336 MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); 9337 return getStore(Chain, dl, Val, Ptr, MMO); 9338 } 9339 9340 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9341 SDValue Ptr, MachineMemOperand *MMO) { 9342 assert(Chain.getValueType() == MVT::Other && 9343 "Invalid chain type"); 9344 EVT VT = Val.getValueType(); 9345 SDVTList VTs = getVTList(MVT::Other); 9346 SDValue Undef = getUNDEF(Ptr.getValueType()); 9347 SDValue Ops[] = { Chain, Val, Ptr, Undef }; 9348 FoldingSetNodeID ID; 9349 AddNodeIDNode(ID, ISD::STORE, VTs, Ops); 9350 ID.AddInteger(VT.getRawBits()); 9351 ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( 9352 dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); 9353 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9354 ID.AddInteger(MMO->getFlags()); 9355 void *IP = nullptr; 9356 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9357 cast<StoreSDNode>(E)->refineAlignment(MMO); 9358 return SDValue(E, 0); 9359 } 9360 auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9361 ISD::UNINDEXED, false, VT, MMO); 9362 createOperands(N, Ops); 9363 9364 CSEMap.InsertNode(N, IP); 9365 InsertNode(N); 9366 SDValue V(N, 0); 9367 NewSDValueDbgMsg(V, "Creating new node: ", this); 9368 return V; 9369 } 9370 9371 SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9372 SDValue Ptr, MachinePointerInfo PtrInfo, 9373 EVT SVT, Align Alignment, 9374 MachineMemOperand::Flags MMOFlags, 9375 const AAMDNodes &AAInfo) { 9376 assert(Chain.getValueType() == MVT::Other && 9377 "Invalid chain type"); 9378 9379 MMOFlags |= MachineMemOperand::MOStore; 9380 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9381 9382 if (PtrInfo.V.isNull()) 9383 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9384 9385 MachineFunction &MF = getMachineFunction(); 9386 MachineMemOperand *MMO = MF.getMachineMemOperand( 9387 PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, 9388 AAInfo); 9389 return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); 9390 } 9391 9392 SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, 9393 SDValue Ptr, EVT SVT, 9394 MachineMemOperand *MMO) { 9395 EVT VT = Val.getValueType(); 9396 9397 assert(Chain.getValueType() == MVT::Other && 9398 "Invalid chain type"); 9399 if (VT == SVT) 9400 return getStore(Chain, dl, Val, Ptr, MMO); 9401 9402 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 9403 "Should only be a truncating store, not extending!"); 9404 assert(VT.isInteger() == SVT.isInteger() && 9405 "Can't do FP-INT conversion!"); 9406 assert(VT.isVector() == SVT.isVector() && 9407 "Cannot use trunc store to convert to or from a vector!"); 9408 assert((!VT.isVector() || 9409 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 9410 "Cannot use trunc store to change the number of vector elements!"); 9411 9412 SDVTList VTs = getVTList(MVT::Other); 9413 SDValue Undef = getUNDEF(Ptr.getValueType()); 9414 SDValue Ops[] = { Chain, Val, Ptr, Undef }; 9415 FoldingSetNodeID ID; 9416 AddNodeIDNode(ID, ISD::STORE, VTs, Ops); 9417 ID.AddInteger(SVT.getRawBits()); 9418 ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( 9419 dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); 9420 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9421 ID.AddInteger(MMO->getFlags()); 9422 void *IP = nullptr; 9423 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9424 cast<StoreSDNode>(E)->refineAlignment(MMO); 9425 return SDValue(E, 0); 9426 } 9427 auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9428 ISD::UNINDEXED, true, SVT, MMO); 9429 createOperands(N, Ops); 9430 9431 CSEMap.InsertNode(N, IP); 9432 InsertNode(N); 9433 SDValue V(N, 0); 9434 NewSDValueDbgMsg(V, "Creating new node: ", this); 9435 return V; 9436 } 9437 9438 SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, 9439 SDValue Base, SDValue Offset, 9440 ISD::MemIndexedMode AM) { 9441 StoreSDNode *ST = cast<StoreSDNode>(OrigStore); 9442 assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); 9443 SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); 9444 SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; 9445 FoldingSetNodeID ID; 9446 AddNodeIDNode(ID, ISD::STORE, VTs, Ops); 9447 ID.AddInteger(ST->getMemoryVT().getRawBits()); 9448 ID.AddInteger(ST->getRawSubclassData()); 9449 ID.AddInteger(ST->getPointerInfo().getAddrSpace()); 9450 ID.AddInteger(ST->getMemOperand()->getFlags()); 9451 void *IP = nullptr; 9452 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 9453 return SDValue(E, 0); 9454 9455 auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9456 ST->isTruncatingStore(), ST->getMemoryVT(), 9457 ST->getMemOperand()); 9458 createOperands(N, Ops); 9459 9460 CSEMap.InsertNode(N, IP); 9461 InsertNode(N); 9462 SDValue V(N, 0); 9463 NewSDValueDbgMsg(V, "Creating new node: ", this); 9464 return V; 9465 } 9466 9467 SDValue SelectionDAG::getLoadVP( 9468 ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, 9469 SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, 9470 MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, 9471 MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, 9472 const MDNode *Ranges, bool IsExpanding) { 9473 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9474 9475 MMOFlags |= MachineMemOperand::MOLoad; 9476 assert((MMOFlags & MachineMemOperand::MOStore) == 0); 9477 // If we don't have a PtrInfo, infer the trivial frame index case to simplify 9478 // clients. 9479 if (PtrInfo.V.isNull()) 9480 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); 9481 9482 LocationSize Size = LocationSize::precise(MemVT.getStoreSize()); 9483 MachineFunction &MF = getMachineFunction(); 9484 MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, 9485 Alignment, AAInfo, Ranges); 9486 return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, 9487 MMO, IsExpanding); 9488 } 9489 9490 SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, 9491 ISD::LoadExtType ExtType, EVT VT, 9492 const SDLoc &dl, SDValue Chain, SDValue Ptr, 9493 SDValue Offset, SDValue Mask, SDValue EVL, 9494 EVT MemVT, MachineMemOperand *MMO, 9495 bool IsExpanding) { 9496 bool Indexed = AM != ISD::UNINDEXED; 9497 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9498 9499 SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) 9500 : getVTList(VT, MVT::Other); 9501 SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; 9502 FoldingSetNodeID ID; 9503 AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops); 9504 ID.AddInteger(MemVT.getRawBits()); 9505 ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( 9506 dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); 9507 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9508 ID.AddInteger(MMO->getFlags()); 9509 void *IP = nullptr; 9510 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9511 cast<VPLoadSDNode>(E)->refineAlignment(MMO); 9512 return SDValue(E, 0); 9513 } 9514 auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9515 ExtType, IsExpanding, MemVT, MMO); 9516 createOperands(N, Ops); 9517 9518 CSEMap.InsertNode(N, IP); 9519 InsertNode(N); 9520 SDValue V(N, 0); 9521 NewSDValueDbgMsg(V, "Creating new node: ", this); 9522 return V; 9523 } 9524 9525 SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, 9526 SDValue Ptr, SDValue Mask, SDValue EVL, 9527 MachinePointerInfo PtrInfo, 9528 MaybeAlign Alignment, 9529 MachineMemOperand::Flags MMOFlags, 9530 const AAMDNodes &AAInfo, const MDNode *Ranges, 9531 bool IsExpanding) { 9532 SDValue Undef = getUNDEF(Ptr.getValueType()); 9533 return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9534 Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges, 9535 IsExpanding); 9536 } 9537 9538 SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, 9539 SDValue Ptr, SDValue Mask, SDValue EVL, 9540 MachineMemOperand *MMO, bool IsExpanding) { 9541 SDValue Undef = getUNDEF(Ptr.getValueType()); 9542 return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, 9543 Mask, EVL, VT, MMO, IsExpanding); 9544 } 9545 9546 SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, 9547 EVT VT, SDValue Chain, SDValue Ptr, 9548 SDValue Mask, SDValue EVL, 9549 MachinePointerInfo PtrInfo, EVT MemVT, 9550 MaybeAlign Alignment, 9551 MachineMemOperand::Flags MMOFlags, 9552 const AAMDNodes &AAInfo, bool IsExpanding) { 9553 SDValue Undef = getUNDEF(Ptr.getValueType()); 9554 return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, 9555 EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr, 9556 IsExpanding); 9557 } 9558 9559 SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, 9560 EVT VT, SDValue Chain, SDValue Ptr, 9561 SDValue Mask, SDValue EVL, EVT MemVT, 9562 MachineMemOperand *MMO, bool IsExpanding) { 9563 SDValue Undef = getUNDEF(Ptr.getValueType()); 9564 return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, 9565 EVL, MemVT, MMO, IsExpanding); 9566 } 9567 9568 SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, 9569 SDValue Base, SDValue Offset, 9570 ISD::MemIndexedMode AM) { 9571 auto *LD = cast<VPLoadSDNode>(OrigLoad); 9572 assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); 9573 // Don't propagate the invariant or dereferenceable flags. 9574 auto MMOFlags = 9575 LD->getMemOperand()->getFlags() & 9576 ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); 9577 return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, 9578 LD->getChain(), Base, Offset, LD->getMask(), 9579 LD->getVectorLength(), LD->getPointerInfo(), 9580 LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(), 9581 nullptr, LD->isExpandingLoad()); 9582 } 9583 9584 SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, 9585 SDValue Ptr, SDValue Offset, SDValue Mask, 9586 SDValue EVL, EVT MemVT, MachineMemOperand *MMO, 9587 ISD::MemIndexedMode AM, bool IsTruncating, 9588 bool IsCompressing) { 9589 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9590 bool Indexed = AM != ISD::UNINDEXED; 9591 assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); 9592 SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) 9593 : getVTList(MVT::Other); 9594 SDValue Ops[] = {Chain, Val, Ptr, Offset, Mask, EVL}; 9595 FoldingSetNodeID ID; 9596 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9597 ID.AddInteger(MemVT.getRawBits()); 9598 ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( 9599 dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 9600 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9601 ID.AddInteger(MMO->getFlags()); 9602 void *IP = nullptr; 9603 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9604 cast<VPStoreSDNode>(E)->refineAlignment(MMO); 9605 return SDValue(E, 0); 9606 } 9607 auto *N = newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 9608 IsTruncating, IsCompressing, MemVT, MMO); 9609 createOperands(N, Ops); 9610 9611 CSEMap.InsertNode(N, IP); 9612 InsertNode(N); 9613 SDValue V(N, 0); 9614 NewSDValueDbgMsg(V, "Creating new node: ", this); 9615 return V; 9616 } 9617 9618 SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, 9619 SDValue Val, SDValue Ptr, SDValue Mask, 9620 SDValue EVL, MachinePointerInfo PtrInfo, 9621 EVT SVT, Align Alignment, 9622 MachineMemOperand::Flags MMOFlags, 9623 const AAMDNodes &AAInfo, 9624 bool IsCompressing) { 9625 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9626 9627 MMOFlags |= MachineMemOperand::MOStore; 9628 assert((MMOFlags & MachineMemOperand::MOLoad) == 0); 9629 9630 if (PtrInfo.V.isNull()) 9631 PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); 9632 9633 MachineFunction &MF = getMachineFunction(); 9634 MachineMemOperand *MMO = MF.getMachineMemOperand( 9635 PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment, 9636 AAInfo); 9637 return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, 9638 IsCompressing); 9639 } 9640 9641 SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, 9642 SDValue Val, SDValue Ptr, SDValue Mask, 9643 SDValue EVL, EVT SVT, 9644 MachineMemOperand *MMO, 9645 bool IsCompressing) { 9646 EVT VT = Val.getValueType(); 9647 9648 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9649 if (VT == SVT) 9650 return getStoreVP(Chain, dl, Val, Ptr, getUNDEF(Ptr.getValueType()), Mask, 9651 EVL, VT, MMO, ISD::UNINDEXED, 9652 /*IsTruncating*/ false, IsCompressing); 9653 9654 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 9655 "Should only be a truncating store, not extending!"); 9656 assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); 9657 assert(VT.isVector() == SVT.isVector() && 9658 "Cannot use trunc store to convert to or from a vector!"); 9659 assert((!VT.isVector() || 9660 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 9661 "Cannot use trunc store to change the number of vector elements!"); 9662 9663 SDVTList VTs = getVTList(MVT::Other); 9664 SDValue Undef = getUNDEF(Ptr.getValueType()); 9665 SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; 9666 FoldingSetNodeID ID; 9667 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9668 ID.AddInteger(SVT.getRawBits()); 9669 ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( 9670 dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); 9671 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9672 ID.AddInteger(MMO->getFlags()); 9673 void *IP = nullptr; 9674 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9675 cast<VPStoreSDNode>(E)->refineAlignment(MMO); 9676 return SDValue(E, 0); 9677 } 9678 auto *N = 9679 newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9680 ISD::UNINDEXED, true, IsCompressing, SVT, MMO); 9681 createOperands(N, Ops); 9682 9683 CSEMap.InsertNode(N, IP); 9684 InsertNode(N); 9685 SDValue V(N, 0); 9686 NewSDValueDbgMsg(V, "Creating new node: ", this); 9687 return V; 9688 } 9689 9690 SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, 9691 SDValue Base, SDValue Offset, 9692 ISD::MemIndexedMode AM) { 9693 auto *ST = cast<VPStoreSDNode>(OrigStore); 9694 assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); 9695 SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); 9696 SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, 9697 Offset, ST->getMask(), ST->getVectorLength()}; 9698 FoldingSetNodeID ID; 9699 AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); 9700 ID.AddInteger(ST->getMemoryVT().getRawBits()); 9701 ID.AddInteger(ST->getRawSubclassData()); 9702 ID.AddInteger(ST->getPointerInfo().getAddrSpace()); 9703 ID.AddInteger(ST->getMemOperand()->getFlags()); 9704 void *IP = nullptr; 9705 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 9706 return SDValue(E, 0); 9707 9708 auto *N = newSDNode<VPStoreSDNode>( 9709 dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), 9710 ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand()); 9711 createOperands(N, Ops); 9712 9713 CSEMap.InsertNode(N, IP); 9714 InsertNode(N); 9715 SDValue V(N, 0); 9716 NewSDValueDbgMsg(V, "Creating new node: ", this); 9717 return V; 9718 } 9719 9720 SDValue SelectionDAG::getStridedLoadVP( 9721 ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, 9722 SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, 9723 SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { 9724 bool Indexed = AM != ISD::UNINDEXED; 9725 assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); 9726 9727 SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; 9728 SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) 9729 : getVTList(VT, MVT::Other); 9730 FoldingSetNodeID ID; 9731 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops); 9732 ID.AddInteger(VT.getRawBits()); 9733 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>( 9734 DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); 9735 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9736 9737 void *IP = nullptr; 9738 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 9739 cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO); 9740 return SDValue(E, 0); 9741 } 9742 9743 auto *N = 9744 newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM, 9745 ExtType, IsExpanding, MemVT, MMO); 9746 createOperands(N, Ops); 9747 CSEMap.InsertNode(N, IP); 9748 InsertNode(N); 9749 SDValue V(N, 0); 9750 NewSDValueDbgMsg(V, "Creating new node: ", this); 9751 return V; 9752 } 9753 9754 SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, 9755 SDValue Ptr, SDValue Stride, 9756 SDValue Mask, SDValue EVL, 9757 MachineMemOperand *MMO, 9758 bool IsExpanding) { 9759 SDValue Undef = getUNDEF(Ptr.getValueType()); 9760 return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, 9761 Undef, Stride, Mask, EVL, VT, MMO, IsExpanding); 9762 } 9763 9764 SDValue SelectionDAG::getExtStridedLoadVP( 9765 ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, 9766 SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, 9767 MachineMemOperand *MMO, bool IsExpanding) { 9768 SDValue Undef = getUNDEF(Ptr.getValueType()); 9769 return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, 9770 Stride, Mask, EVL, MemVT, MMO, IsExpanding); 9771 } 9772 9773 SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, 9774 SDValue Val, SDValue Ptr, 9775 SDValue Offset, SDValue Stride, 9776 SDValue Mask, SDValue EVL, EVT MemVT, 9777 MachineMemOperand *MMO, 9778 ISD::MemIndexedMode AM, 9779 bool IsTruncating, bool IsCompressing) { 9780 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9781 bool Indexed = AM != ISD::UNINDEXED; 9782 assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); 9783 SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) 9784 : getVTList(MVT::Other); 9785 SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; 9786 FoldingSetNodeID ID; 9787 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); 9788 ID.AddInteger(MemVT.getRawBits()); 9789 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( 9790 DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 9791 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9792 void *IP = nullptr; 9793 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 9794 cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); 9795 return SDValue(E, 0); 9796 } 9797 auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), 9798 VTs, AM, IsTruncating, 9799 IsCompressing, MemVT, MMO); 9800 createOperands(N, Ops); 9801 9802 CSEMap.InsertNode(N, IP); 9803 InsertNode(N); 9804 SDValue V(N, 0); 9805 NewSDValueDbgMsg(V, "Creating new node: ", this); 9806 return V; 9807 } 9808 9809 SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, 9810 SDValue Val, SDValue Ptr, 9811 SDValue Stride, SDValue Mask, 9812 SDValue EVL, EVT SVT, 9813 MachineMemOperand *MMO, 9814 bool IsCompressing) { 9815 EVT VT = Val.getValueType(); 9816 9817 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 9818 if (VT == SVT) 9819 return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()), 9820 Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED, 9821 /*IsTruncating*/ false, IsCompressing); 9822 9823 assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && 9824 "Should only be a truncating store, not extending!"); 9825 assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); 9826 assert(VT.isVector() == SVT.isVector() && 9827 "Cannot use trunc store to convert to or from a vector!"); 9828 assert((!VT.isVector() || 9829 VT.getVectorElementCount() == SVT.getVectorElementCount()) && 9830 "Cannot use trunc store to change the number of vector elements!"); 9831 9832 SDVTList VTs = getVTList(MVT::Other); 9833 SDValue Undef = getUNDEF(Ptr.getValueType()); 9834 SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; 9835 FoldingSetNodeID ID; 9836 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); 9837 ID.AddInteger(SVT.getRawBits()); 9838 ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( 9839 DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); 9840 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9841 void *IP = nullptr; 9842 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 9843 cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); 9844 return SDValue(E, 0); 9845 } 9846 auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), 9847 VTs, ISD::UNINDEXED, true, 9848 IsCompressing, SVT, MMO); 9849 createOperands(N, Ops); 9850 9851 CSEMap.InsertNode(N, IP); 9852 InsertNode(N); 9853 SDValue V(N, 0); 9854 NewSDValueDbgMsg(V, "Creating new node: ", this); 9855 return V; 9856 } 9857 9858 SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, 9859 ArrayRef<SDValue> Ops, MachineMemOperand *MMO, 9860 ISD::MemIndexType IndexType) { 9861 assert(Ops.size() == 6 && "Incompatible number of operands"); 9862 9863 FoldingSetNodeID ID; 9864 AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops); 9865 ID.AddInteger(VT.getRawBits()); 9866 ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( 9867 dl.getIROrder(), VTs, VT, MMO, IndexType)); 9868 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9869 ID.AddInteger(MMO->getFlags()); 9870 void *IP = nullptr; 9871 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9872 cast<VPGatherSDNode>(E)->refineAlignment(MMO); 9873 return SDValue(E, 0); 9874 } 9875 9876 auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9877 VT, MMO, IndexType); 9878 createOperands(N, Ops); 9879 9880 assert(N->getMask().getValueType().getVectorElementCount() == 9881 N->getValueType(0).getVectorElementCount() && 9882 "Vector width mismatch between mask and data"); 9883 assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == 9884 N->getValueType(0).getVectorElementCount().isScalable() && 9885 "Scalable flags of index and data do not match"); 9886 assert(ElementCount::isKnownGE( 9887 N->getIndex().getValueType().getVectorElementCount(), 9888 N->getValueType(0).getVectorElementCount()) && 9889 "Vector width mismatch between index and data"); 9890 assert(isa<ConstantSDNode>(N->getScale()) && 9891 N->getScale()->getAsAPIntVal().isPowerOf2() && 9892 "Scale should be a constant power of 2"); 9893 9894 CSEMap.InsertNode(N, IP); 9895 InsertNode(N); 9896 SDValue V(N, 0); 9897 NewSDValueDbgMsg(V, "Creating new node: ", this); 9898 return V; 9899 } 9900 9901 SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, 9902 ArrayRef<SDValue> Ops, 9903 MachineMemOperand *MMO, 9904 ISD::MemIndexType IndexType) { 9905 assert(Ops.size() == 7 && "Incompatible number of operands"); 9906 9907 FoldingSetNodeID ID; 9908 AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops); 9909 ID.AddInteger(VT.getRawBits()); 9910 ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( 9911 dl.getIROrder(), VTs, VT, MMO, IndexType)); 9912 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9913 ID.AddInteger(MMO->getFlags()); 9914 void *IP = nullptr; 9915 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9916 cast<VPScatterSDNode>(E)->refineAlignment(MMO); 9917 return SDValue(E, 0); 9918 } 9919 auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9920 VT, MMO, IndexType); 9921 createOperands(N, Ops); 9922 9923 assert(N->getMask().getValueType().getVectorElementCount() == 9924 N->getValue().getValueType().getVectorElementCount() && 9925 "Vector width mismatch between mask and data"); 9926 assert( 9927 N->getIndex().getValueType().getVectorElementCount().isScalable() == 9928 N->getValue().getValueType().getVectorElementCount().isScalable() && 9929 "Scalable flags of index and data do not match"); 9930 assert(ElementCount::isKnownGE( 9931 N->getIndex().getValueType().getVectorElementCount(), 9932 N->getValue().getValueType().getVectorElementCount()) && 9933 "Vector width mismatch between index and data"); 9934 assert(isa<ConstantSDNode>(N->getScale()) && 9935 N->getScale()->getAsAPIntVal().isPowerOf2() && 9936 "Scale should be a constant power of 2"); 9937 9938 CSEMap.InsertNode(N, IP); 9939 InsertNode(N); 9940 SDValue V(N, 0); 9941 NewSDValueDbgMsg(V, "Creating new node: ", this); 9942 return V; 9943 } 9944 9945 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, 9946 SDValue Base, SDValue Offset, SDValue Mask, 9947 SDValue PassThru, EVT MemVT, 9948 MachineMemOperand *MMO, 9949 ISD::MemIndexedMode AM, 9950 ISD::LoadExtType ExtTy, bool isExpanding) { 9951 bool Indexed = AM != ISD::UNINDEXED; 9952 assert((Indexed || Offset.isUndef()) && 9953 "Unindexed masked load with an offset!"); 9954 SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) 9955 : getVTList(VT, MVT::Other); 9956 SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; 9957 FoldingSetNodeID ID; 9958 AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); 9959 ID.AddInteger(MemVT.getRawBits()); 9960 ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( 9961 dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); 9962 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 9963 ID.AddInteger(MMO->getFlags()); 9964 void *IP = nullptr; 9965 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 9966 cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); 9967 return SDValue(E, 0); 9968 } 9969 auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, 9970 AM, ExtTy, isExpanding, MemVT, MMO); 9971 createOperands(N, Ops); 9972 9973 CSEMap.InsertNode(N, IP); 9974 InsertNode(N); 9975 SDValue V(N, 0); 9976 NewSDValueDbgMsg(V, "Creating new node: ", this); 9977 return V; 9978 } 9979 9980 SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, 9981 SDValue Base, SDValue Offset, 9982 ISD::MemIndexedMode AM) { 9983 MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad); 9984 assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); 9985 return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, 9986 Offset, LD->getMask(), LD->getPassThru(), 9987 LD->getMemoryVT(), LD->getMemOperand(), AM, 9988 LD->getExtensionType(), LD->isExpandingLoad()); 9989 } 9990 9991 SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, 9992 SDValue Val, SDValue Base, SDValue Offset, 9993 SDValue Mask, EVT MemVT, 9994 MachineMemOperand *MMO, 9995 ISD::MemIndexedMode AM, bool IsTruncating, 9996 bool IsCompressing) { 9997 assert(Chain.getValueType() == MVT::Other && 9998 "Invalid chain type"); 9999 bool Indexed = AM != ISD::UNINDEXED; 10000 assert((Indexed || Offset.isUndef()) && 10001 "Unindexed masked store with an offset!"); 10002 SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) 10003 : getVTList(MVT::Other); 10004 SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; 10005 FoldingSetNodeID ID; 10006 AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); 10007 ID.AddInteger(MemVT.getRawBits()); 10008 ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( 10009 dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); 10010 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10011 ID.AddInteger(MMO->getFlags()); 10012 void *IP = nullptr; 10013 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10014 cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); 10015 return SDValue(E, 0); 10016 } 10017 auto *N = 10018 newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, 10019 IsTruncating, IsCompressing, MemVT, MMO); 10020 createOperands(N, Ops); 10021 10022 CSEMap.InsertNode(N, IP); 10023 InsertNode(N); 10024 SDValue V(N, 0); 10025 NewSDValueDbgMsg(V, "Creating new node: ", this); 10026 return V; 10027 } 10028 10029 SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, 10030 SDValue Base, SDValue Offset, 10031 ISD::MemIndexedMode AM) { 10032 MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore); 10033 assert(ST->getOffset().isUndef() && 10034 "Masked store is already a indexed store!"); 10035 return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, 10036 ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), 10037 AM, ST->isTruncatingStore(), ST->isCompressingStore()); 10038 } 10039 10040 SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, 10041 ArrayRef<SDValue> Ops, 10042 MachineMemOperand *MMO, 10043 ISD::MemIndexType IndexType, 10044 ISD::LoadExtType ExtTy) { 10045 assert(Ops.size() == 6 && "Incompatible number of operands"); 10046 10047 FoldingSetNodeID ID; 10048 AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); 10049 ID.AddInteger(MemVT.getRawBits()); 10050 ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( 10051 dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy)); 10052 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10053 ID.AddInteger(MMO->getFlags()); 10054 void *IP = nullptr; 10055 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10056 cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); 10057 return SDValue(E, 0); 10058 } 10059 10060 auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10061 VTs, MemVT, MMO, IndexType, ExtTy); 10062 createOperands(N, Ops); 10063 10064 assert(N->getPassThru().getValueType() == N->getValueType(0) && 10065 "Incompatible type of the PassThru value in MaskedGatherSDNode"); 10066 assert(N->getMask().getValueType().getVectorElementCount() == 10067 N->getValueType(0).getVectorElementCount() && 10068 "Vector width mismatch between mask and data"); 10069 assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == 10070 N->getValueType(0).getVectorElementCount().isScalable() && 10071 "Scalable flags of index and data do not match"); 10072 assert(ElementCount::isKnownGE( 10073 N->getIndex().getValueType().getVectorElementCount(), 10074 N->getValueType(0).getVectorElementCount()) && 10075 "Vector width mismatch between index and data"); 10076 assert(isa<ConstantSDNode>(N->getScale()) && 10077 N->getScale()->getAsAPIntVal().isPowerOf2() && 10078 "Scale should be a constant power of 2"); 10079 10080 CSEMap.InsertNode(N, IP); 10081 InsertNode(N); 10082 SDValue V(N, 0); 10083 NewSDValueDbgMsg(V, "Creating new node: ", this); 10084 return V; 10085 } 10086 10087 SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, 10088 ArrayRef<SDValue> Ops, 10089 MachineMemOperand *MMO, 10090 ISD::MemIndexType IndexType, 10091 bool IsTrunc) { 10092 assert(Ops.size() == 6 && "Incompatible number of operands"); 10093 10094 FoldingSetNodeID ID; 10095 AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); 10096 ID.AddInteger(MemVT.getRawBits()); 10097 ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( 10098 dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc)); 10099 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10100 ID.AddInteger(MMO->getFlags()); 10101 void *IP = nullptr; 10102 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10103 cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); 10104 return SDValue(E, 0); 10105 } 10106 10107 auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10108 VTs, MemVT, MMO, IndexType, IsTrunc); 10109 createOperands(N, Ops); 10110 10111 assert(N->getMask().getValueType().getVectorElementCount() == 10112 N->getValue().getValueType().getVectorElementCount() && 10113 "Vector width mismatch between mask and data"); 10114 assert( 10115 N->getIndex().getValueType().getVectorElementCount().isScalable() == 10116 N->getValue().getValueType().getVectorElementCount().isScalable() && 10117 "Scalable flags of index and data do not match"); 10118 assert(ElementCount::isKnownGE( 10119 N->getIndex().getValueType().getVectorElementCount(), 10120 N->getValue().getValueType().getVectorElementCount()) && 10121 "Vector width mismatch between index and data"); 10122 assert(isa<ConstantSDNode>(N->getScale()) && 10123 N->getScale()->getAsAPIntVal().isPowerOf2() && 10124 "Scale should be a constant power of 2"); 10125 10126 CSEMap.InsertNode(N, IP); 10127 InsertNode(N); 10128 SDValue V(N, 0); 10129 NewSDValueDbgMsg(V, "Creating new node: ", this); 10130 return V; 10131 } 10132 10133 SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, 10134 const SDLoc &dl, ArrayRef<SDValue> Ops, 10135 MachineMemOperand *MMO, 10136 ISD::MemIndexType IndexType) { 10137 assert(Ops.size() == 7 && "Incompatible number of operands"); 10138 10139 FoldingSetNodeID ID; 10140 AddNodeIDNode(ID, ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, VTs, Ops); 10141 ID.AddInteger(MemVT.getRawBits()); 10142 ID.AddInteger(getSyntheticNodeSubclassData<MaskedHistogramSDNode>( 10143 dl.getIROrder(), VTs, MemVT, MMO, IndexType)); 10144 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10145 ID.AddInteger(MMO->getFlags()); 10146 void *IP = nullptr; 10147 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { 10148 cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); 10149 return SDValue(E, 0); 10150 } 10151 10152 auto *N = newSDNode<MaskedHistogramSDNode>(dl.getIROrder(), dl.getDebugLoc(), 10153 VTs, MemVT, MMO, IndexType); 10154 createOperands(N, Ops); 10155 10156 assert(N->getMask().getValueType().getVectorElementCount() == 10157 N->getIndex().getValueType().getVectorElementCount() && 10158 "Vector width mismatch between mask and data"); 10159 assert(isa<ConstantSDNode>(N->getScale()) && 10160 N->getScale()->getAsAPIntVal().isPowerOf2() && 10161 "Scale should be a constant power of 2"); 10162 assert(N->getInc().getValueType().isInteger() && "Non integer update value"); 10163 10164 CSEMap.InsertNode(N, IP); 10165 InsertNode(N); 10166 SDValue V(N, 0); 10167 NewSDValueDbgMsg(V, "Creating new node: ", this); 10168 return V; 10169 } 10170 10171 SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, 10172 EVT MemVT, MachineMemOperand *MMO) { 10173 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10174 SDVTList VTs = getVTList(MVT::Other); 10175 SDValue Ops[] = {Chain, Ptr}; 10176 FoldingSetNodeID ID; 10177 AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops); 10178 ID.AddInteger(MemVT.getRawBits()); 10179 ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>( 10180 ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); 10181 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10182 ID.AddInteger(MMO->getFlags()); 10183 void *IP = nullptr; 10184 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 10185 return SDValue(E, 0); 10186 10187 auto *N = newSDNode<FPStateAccessSDNode>(ISD::GET_FPENV_MEM, dl.getIROrder(), 10188 dl.getDebugLoc(), VTs, MemVT, MMO); 10189 createOperands(N, Ops); 10190 10191 CSEMap.InsertNode(N, IP); 10192 InsertNode(N); 10193 SDValue V(N, 0); 10194 NewSDValueDbgMsg(V, "Creating new node: ", this); 10195 return V; 10196 } 10197 10198 SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, 10199 EVT MemVT, MachineMemOperand *MMO) { 10200 assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); 10201 SDVTList VTs = getVTList(MVT::Other); 10202 SDValue Ops[] = {Chain, Ptr}; 10203 FoldingSetNodeID ID; 10204 AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops); 10205 ID.AddInteger(MemVT.getRawBits()); 10206 ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>( 10207 ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); 10208 ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); 10209 ID.AddInteger(MMO->getFlags()); 10210 void *IP = nullptr; 10211 if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) 10212 return SDValue(E, 0); 10213 10214 auto *N = newSDNode<FPStateAccessSDNode>(ISD::SET_FPENV_MEM, dl.getIROrder(), 10215 dl.getDebugLoc(), VTs, MemVT, MMO); 10216 createOperands(N, Ops); 10217 10218 CSEMap.InsertNode(N, IP); 10219 InsertNode(N); 10220 SDValue V(N, 0); 10221 NewSDValueDbgMsg(V, "Creating new node: ", this); 10222 return V; 10223 } 10224 10225 SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { 10226 // select undef, T, F --> T (if T is a constant), otherwise F 10227 // select, ?, undef, F --> F 10228 // select, ?, T, undef --> T 10229 if (Cond.isUndef()) 10230 return isConstantValueOfAnyType(T) ? T : F; 10231 if (T.isUndef()) 10232 return F; 10233 if (F.isUndef()) 10234 return T; 10235 10236 // select true, T, F --> T 10237 // select false, T, F --> F 10238 if (auto C = isBoolConstant(Cond, /*AllowTruncation=*/true)) 10239 return *C ? T : F; 10240 10241 // select ?, T, T --> T 10242 if (T == F) 10243 return T; 10244 10245 return SDValue(); 10246 } 10247 10248 SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { 10249 // shift undef, Y --> 0 (can always assume that the undef value is 0) 10250 if (X.isUndef()) 10251 return getConstant(0, SDLoc(X.getNode()), X.getValueType()); 10252 // shift X, undef --> undef (because it may shift by the bitwidth) 10253 if (Y.isUndef()) 10254 return getUNDEF(X.getValueType()); 10255 10256 // shift 0, Y --> 0 10257 // shift X, 0 --> X 10258 if (isNullOrNullSplat(X) || isNullOrNullSplat(Y)) 10259 return X; 10260 10261 // shift X, C >= bitwidth(X) --> undef 10262 // All vector elements must be too big (or undef) to avoid partial undefs. 10263 auto isShiftTooBig = [X](ConstantSDNode *Val) { 10264 return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits()); 10265 }; 10266 if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) 10267 return getUNDEF(X.getValueType()); 10268 10269 // shift i1/vXi1 X, Y --> X (any non-zero shift amount is undefined). 10270 if (X.getValueType().getScalarType() == MVT::i1) 10271 return X; 10272 10273 return SDValue(); 10274 } 10275 10276 SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, 10277 SDNodeFlags Flags) { 10278 // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand 10279 // (an undef operand can be chosen to be Nan/Inf), then the result of this 10280 // operation is poison. That result can be relaxed to undef. 10281 ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true); 10282 ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); 10283 bool HasNan = (XC && XC->getValueAPF().isNaN()) || 10284 (YC && YC->getValueAPF().isNaN()); 10285 bool HasInf = (XC && XC->getValueAPF().isInfinity()) || 10286 (YC && YC->getValueAPF().isInfinity()); 10287 10288 if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) 10289 return getUNDEF(X.getValueType()); 10290 10291 if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) 10292 return getUNDEF(X.getValueType()); 10293 10294 if (!YC) 10295 return SDValue(); 10296 10297 // X + -0.0 --> X 10298 if (Opcode == ISD::FADD) 10299 if (YC->getValueAPF().isNegZero()) 10300 return X; 10301 10302 // X - +0.0 --> X 10303 if (Opcode == ISD::FSUB) 10304 if (YC->getValueAPF().isPosZero()) 10305 return X; 10306 10307 // X * 1.0 --> X 10308 // X / 1.0 --> X 10309 if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) 10310 if (YC->getValueAPF().isExactlyValue(1.0)) 10311 return X; 10312 10313 // X * 0.0 --> 0.0 10314 if (Opcode == ISD::FMUL && Flags.hasNoNaNs() && Flags.hasNoSignedZeros()) 10315 if (YC->getValueAPF().isZero()) 10316 return getConstantFP(0.0, SDLoc(Y), Y.getValueType()); 10317 10318 return SDValue(); 10319 } 10320 10321 SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, 10322 SDValue Ptr, SDValue SV, unsigned Align) { 10323 SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; 10324 return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); 10325 } 10326 10327 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10328 ArrayRef<SDUse> Ops) { 10329 switch (Ops.size()) { 10330 case 0: return getNode(Opcode, DL, VT); 10331 case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); 10332 case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); 10333 case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); 10334 default: break; 10335 } 10336 10337 // Copy from an SDUse array into an SDValue array for use with 10338 // the regular getNode logic. 10339 SmallVector<SDValue, 8> NewOps(Ops); 10340 return getNode(Opcode, DL, VT, NewOps); 10341 } 10342 10343 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10344 ArrayRef<SDValue> Ops) { 10345 SDNodeFlags Flags; 10346 if (Inserter) 10347 Flags = Inserter->getFlags(); 10348 return getNode(Opcode, DL, VT, Ops, Flags); 10349 } 10350 10351 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, 10352 ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { 10353 unsigned NumOps = Ops.size(); 10354 switch (NumOps) { 10355 case 0: return getNode(Opcode, DL, VT); 10356 case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); 10357 case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); 10358 case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags); 10359 default: break; 10360 } 10361 10362 #ifndef NDEBUG 10363 for (const auto &Op : Ops) 10364 assert(Op.getOpcode() != ISD::DELETED_NODE && 10365 "Operand is DELETED_NODE!"); 10366 #endif 10367 10368 switch (Opcode) { 10369 default: break; 10370 case ISD::BUILD_VECTOR: 10371 // Attempt to simplify BUILD_VECTOR. 10372 if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) 10373 return V; 10374 break; 10375 case ISD::CONCAT_VECTORS: 10376 if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) 10377 return V; 10378 break; 10379 case ISD::SELECT_CC: 10380 assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); 10381 assert(Ops[0].getValueType() == Ops[1].getValueType() && 10382 "LHS and RHS of condition must have same type!"); 10383 assert(Ops[2].getValueType() == Ops[3].getValueType() && 10384 "True and False arms of SelectCC must have same type!"); 10385 assert(Ops[2].getValueType() == VT && 10386 "select_cc node must be of same type as true and false value!"); 10387 assert((!Ops[0].getValueType().isVector() || 10388 Ops[0].getValueType().getVectorElementCount() == 10389 VT.getVectorElementCount()) && 10390 "Expected select_cc with vector result to have the same sized " 10391 "comparison type!"); 10392 break; 10393 case ISD::BR_CC: 10394 assert(NumOps == 5 && "BR_CC takes 5 operands!"); 10395 assert(Ops[2].getValueType() == Ops[3].getValueType() && 10396 "LHS/RHS of comparison should match types!"); 10397 break; 10398 case ISD::VP_ADD: 10399 case ISD::VP_SUB: 10400 // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR 10401 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 10402 Opcode = ISD::VP_XOR; 10403 break; 10404 case ISD::VP_MUL: 10405 // If it is VP_MUL mask operation then turn it to VP_AND 10406 if (VT.isVector() && VT.getVectorElementType() == MVT::i1) 10407 Opcode = ISD::VP_AND; 10408 break; 10409 case ISD::VP_REDUCE_MUL: 10410 // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND 10411 if (VT == MVT::i1) 10412 Opcode = ISD::VP_REDUCE_AND; 10413 break; 10414 case ISD::VP_REDUCE_ADD: 10415 // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR 10416 if (VT == MVT::i1) 10417 Opcode = ISD::VP_REDUCE_XOR; 10418 break; 10419 case ISD::VP_REDUCE_SMAX: 10420 case ISD::VP_REDUCE_UMIN: 10421 // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to 10422 // VP_REDUCE_AND. 10423 if (VT == MVT::i1) 10424 Opcode = ISD::VP_REDUCE_AND; 10425 break; 10426 case ISD::VP_REDUCE_SMIN: 10427 case ISD::VP_REDUCE_UMAX: 10428 // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to 10429 // VP_REDUCE_OR. 10430 if (VT == MVT::i1) 10431 Opcode = ISD::VP_REDUCE_OR; 10432 break; 10433 } 10434 10435 // Memoize nodes. 10436 SDNode *N; 10437 SDVTList VTs = getVTList(VT); 10438 10439 if (VT != MVT::Glue) { 10440 FoldingSetNodeID ID; 10441 AddNodeIDNode(ID, Opcode, VTs, Ops); 10442 void *IP = nullptr; 10443 10444 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10445 E->intersectFlagsWith(Flags); 10446 return SDValue(E, 0); 10447 } 10448 10449 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 10450 createOperands(N, Ops); 10451 10452 CSEMap.InsertNode(N, IP); 10453 } else { 10454 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 10455 createOperands(N, Ops); 10456 } 10457 10458 N->setFlags(Flags); 10459 InsertNode(N); 10460 SDValue V(N, 0); 10461 NewSDValueDbgMsg(V, "Creating new node: ", this); 10462 return V; 10463 } 10464 10465 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, 10466 ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { 10467 return getNode(Opcode, DL, getVTList(ResultTys), Ops); 10468 } 10469 10470 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10471 ArrayRef<SDValue> Ops) { 10472 SDNodeFlags Flags; 10473 if (Inserter) 10474 Flags = Inserter->getFlags(); 10475 return getNode(Opcode, DL, VTList, Ops, Flags); 10476 } 10477 10478 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10479 ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { 10480 if (VTList.NumVTs == 1) 10481 return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); 10482 10483 #ifndef NDEBUG 10484 for (const auto &Op : Ops) 10485 assert(Op.getOpcode() != ISD::DELETED_NODE && 10486 "Operand is DELETED_NODE!"); 10487 #endif 10488 10489 switch (Opcode) { 10490 case ISD::SADDO: 10491 case ISD::UADDO: 10492 case ISD::SSUBO: 10493 case ISD::USUBO: { 10494 assert(VTList.NumVTs == 2 && Ops.size() == 2 && 10495 "Invalid add/sub overflow op!"); 10496 assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && 10497 Ops[0].getValueType() == Ops[1].getValueType() && 10498 Ops[0].getValueType() == VTList.VTs[0] && 10499 "Binary operator types must match!"); 10500 SDValue N1 = Ops[0], N2 = Ops[1]; 10501 canonicalizeCommutativeBinop(Opcode, N1, N2); 10502 10503 // (X +- 0) -> X with zero-overflow. 10504 ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, 10505 /*AllowTruncation*/ true); 10506 if (N2CV && N2CV->isZero()) { 10507 SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); 10508 return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); 10509 } 10510 10511 if (VTList.VTs[0].isVector() && 10512 VTList.VTs[0].getVectorElementType() == MVT::i1 && 10513 VTList.VTs[1].getVectorElementType() == MVT::i1) { 10514 SDValue F1 = getFreeze(N1); 10515 SDValue F2 = getFreeze(N2); 10516 // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} 10517 if (Opcode == ISD::UADDO || Opcode == ISD::SADDO) 10518 return getNode(ISD::MERGE_VALUES, DL, VTList, 10519 {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), 10520 getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)}, 10521 Flags); 10522 // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)} 10523 if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) { 10524 SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]); 10525 return getNode(ISD::MERGE_VALUES, DL, VTList, 10526 {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), 10527 getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)}, 10528 Flags); 10529 } 10530 } 10531 break; 10532 } 10533 case ISD::SADDO_CARRY: 10534 case ISD::UADDO_CARRY: 10535 case ISD::SSUBO_CARRY: 10536 case ISD::USUBO_CARRY: 10537 assert(VTList.NumVTs == 2 && Ops.size() == 3 && 10538 "Invalid add/sub overflow op!"); 10539 assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && 10540 Ops[0].getValueType() == Ops[1].getValueType() && 10541 Ops[0].getValueType() == VTList.VTs[0] && 10542 Ops[2].getValueType() == VTList.VTs[1] && 10543 "Binary operator types must match!"); 10544 break; 10545 case ISD::SMUL_LOHI: 10546 case ISD::UMUL_LOHI: { 10547 assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid mul lo/hi op!"); 10548 assert(VTList.VTs[0].isInteger() && VTList.VTs[0] == VTList.VTs[1] && 10549 VTList.VTs[0] == Ops[0].getValueType() && 10550 VTList.VTs[0] == Ops[1].getValueType() && 10551 "Binary operator types must match!"); 10552 // Constant fold. 10553 ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]); 10554 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]); 10555 if (LHS && RHS) { 10556 unsigned Width = VTList.VTs[0].getScalarSizeInBits(); 10557 unsigned OutWidth = Width * 2; 10558 APInt Val = LHS->getAPIntValue(); 10559 APInt Mul = RHS->getAPIntValue(); 10560 if (Opcode == ISD::SMUL_LOHI) { 10561 Val = Val.sext(OutWidth); 10562 Mul = Mul.sext(OutWidth); 10563 } else { 10564 Val = Val.zext(OutWidth); 10565 Mul = Mul.zext(OutWidth); 10566 } 10567 Val *= Mul; 10568 10569 SDValue Hi = 10570 getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]); 10571 SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]); 10572 return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags); 10573 } 10574 break; 10575 } 10576 case ISD::FFREXP: { 10577 assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!"); 10578 assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() && 10579 VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch"); 10580 10581 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Ops[0])) { 10582 int FrexpExp; 10583 APFloat FrexpMant = 10584 frexp(C->getValueAPF(), FrexpExp, APFloat::rmNearestTiesToEven); 10585 SDValue Result0 = getConstantFP(FrexpMant, DL, VTList.VTs[0]); 10586 SDValue Result1 = 10587 getConstant(FrexpMant.isFinite() ? FrexpExp : 0, DL, VTList.VTs[1]); 10588 return getNode(ISD::MERGE_VALUES, DL, VTList, {Result0, Result1}, Flags); 10589 } 10590 10591 break; 10592 } 10593 case ISD::STRICT_FP_EXTEND: 10594 assert(VTList.NumVTs == 2 && Ops.size() == 2 && 10595 "Invalid STRICT_FP_EXTEND!"); 10596 assert(VTList.VTs[0].isFloatingPoint() && 10597 Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!"); 10598 assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && 10599 "STRICT_FP_EXTEND result type should be vector iff the operand " 10600 "type is vector!"); 10601 assert((!VTList.VTs[0].isVector() || 10602 VTList.VTs[0].getVectorElementCount() == 10603 Ops[1].getValueType().getVectorElementCount()) && 10604 "Vector element count mismatch!"); 10605 assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) && 10606 "Invalid fpext node, dst <= src!"); 10607 break; 10608 case ISD::STRICT_FP_ROUND: 10609 assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!"); 10610 assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() && 10611 "STRICT_FP_ROUND result type should be vector iff the operand " 10612 "type is vector!"); 10613 assert((!VTList.VTs[0].isVector() || 10614 VTList.VTs[0].getVectorElementCount() == 10615 Ops[1].getValueType().getVectorElementCount()) && 10616 "Vector element count mismatch!"); 10617 assert(VTList.VTs[0].isFloatingPoint() && 10618 Ops[1].getValueType().isFloatingPoint() && 10619 VTList.VTs[0].bitsLT(Ops[1].getValueType()) && 10620 Ops[2].getOpcode() == ISD::TargetConstant && 10621 (Ops[2]->getAsZExtVal() == 0 || Ops[2]->getAsZExtVal() == 1) && 10622 "Invalid STRICT_FP_ROUND!"); 10623 break; 10624 #if 0 10625 // FIXME: figure out how to safely handle things like 10626 // int foo(int x) { return 1 << (x & 255); } 10627 // int bar() { return foo(256); } 10628 case ISD::SRA_PARTS: 10629 case ISD::SRL_PARTS: 10630 case ISD::SHL_PARTS: 10631 if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && 10632 cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) 10633 return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); 10634 else if (N3.getOpcode() == ISD::AND) 10635 if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { 10636 // If the and is only masking out bits that cannot effect the shift, 10637 // eliminate the and. 10638 unsigned NumBits = VT.getScalarSizeInBits()*2; 10639 if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) 10640 return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); 10641 } 10642 break; 10643 #endif 10644 } 10645 10646 // Memoize the node unless it returns a glue result. 10647 SDNode *N; 10648 if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { 10649 FoldingSetNodeID ID; 10650 AddNodeIDNode(ID, Opcode, VTList, Ops); 10651 void *IP = nullptr; 10652 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 10653 E->intersectFlagsWith(Flags); 10654 return SDValue(E, 0); 10655 } 10656 10657 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); 10658 createOperands(N, Ops); 10659 CSEMap.InsertNode(N, IP); 10660 } else { 10661 N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); 10662 createOperands(N, Ops); 10663 } 10664 10665 N->setFlags(Flags); 10666 InsertNode(N); 10667 SDValue V(N, 0); 10668 NewSDValueDbgMsg(V, "Creating new node: ", this); 10669 return V; 10670 } 10671 10672 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, 10673 SDVTList VTList) { 10674 return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); 10675 } 10676 10677 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10678 SDValue N1) { 10679 SDValue Ops[] = { N1 }; 10680 return getNode(Opcode, DL, VTList, Ops); 10681 } 10682 10683 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10684 SDValue N1, SDValue N2) { 10685 SDValue Ops[] = { N1, N2 }; 10686 return getNode(Opcode, DL, VTList, Ops); 10687 } 10688 10689 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10690 SDValue N1, SDValue N2, SDValue N3) { 10691 SDValue Ops[] = { N1, N2, N3 }; 10692 return getNode(Opcode, DL, VTList, Ops); 10693 } 10694 10695 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10696 SDValue N1, SDValue N2, SDValue N3, SDValue N4) { 10697 SDValue Ops[] = { N1, N2, N3, N4 }; 10698 return getNode(Opcode, DL, VTList, Ops); 10699 } 10700 10701 SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, 10702 SDValue N1, SDValue N2, SDValue N3, SDValue N4, 10703 SDValue N5) { 10704 SDValue Ops[] = { N1, N2, N3, N4, N5 }; 10705 return getNode(Opcode, DL, VTList, Ops); 10706 } 10707 10708 SDVTList SelectionDAG::getVTList(EVT VT) { 10709 if (!VT.isExtended()) 10710 return makeVTList(SDNode::getValueTypeList(VT.getSimpleVT()), 1); 10711 10712 return makeVTList(&(*EVTs.insert(VT).first), 1); 10713 } 10714 10715 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { 10716 FoldingSetNodeID ID; 10717 ID.AddInteger(2U); 10718 ID.AddInteger(VT1.getRawBits()); 10719 ID.AddInteger(VT2.getRawBits()); 10720 10721 void *IP = nullptr; 10722 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10723 if (!Result) { 10724 EVT *Array = Allocator.Allocate<EVT>(2); 10725 Array[0] = VT1; 10726 Array[1] = VT2; 10727 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); 10728 VTListMap.InsertNode(Result, IP); 10729 } 10730 return Result->getSDVTList(); 10731 } 10732 10733 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { 10734 FoldingSetNodeID ID; 10735 ID.AddInteger(3U); 10736 ID.AddInteger(VT1.getRawBits()); 10737 ID.AddInteger(VT2.getRawBits()); 10738 ID.AddInteger(VT3.getRawBits()); 10739 10740 void *IP = nullptr; 10741 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10742 if (!Result) { 10743 EVT *Array = Allocator.Allocate<EVT>(3); 10744 Array[0] = VT1; 10745 Array[1] = VT2; 10746 Array[2] = VT3; 10747 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); 10748 VTListMap.InsertNode(Result, IP); 10749 } 10750 return Result->getSDVTList(); 10751 } 10752 10753 SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { 10754 FoldingSetNodeID ID; 10755 ID.AddInteger(4U); 10756 ID.AddInteger(VT1.getRawBits()); 10757 ID.AddInteger(VT2.getRawBits()); 10758 ID.AddInteger(VT3.getRawBits()); 10759 ID.AddInteger(VT4.getRawBits()); 10760 10761 void *IP = nullptr; 10762 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10763 if (!Result) { 10764 EVT *Array = Allocator.Allocate<EVT>(4); 10765 Array[0] = VT1; 10766 Array[1] = VT2; 10767 Array[2] = VT3; 10768 Array[3] = VT4; 10769 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); 10770 VTListMap.InsertNode(Result, IP); 10771 } 10772 return Result->getSDVTList(); 10773 } 10774 10775 SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { 10776 unsigned NumVTs = VTs.size(); 10777 FoldingSetNodeID ID; 10778 ID.AddInteger(NumVTs); 10779 for (unsigned index = 0; index < NumVTs; index++) { 10780 ID.AddInteger(VTs[index].getRawBits()); 10781 } 10782 10783 void *IP = nullptr; 10784 SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); 10785 if (!Result) { 10786 EVT *Array = Allocator.Allocate<EVT>(NumVTs); 10787 llvm::copy(VTs, Array); 10788 Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); 10789 VTListMap.InsertNode(Result, IP); 10790 } 10791 return Result->getSDVTList(); 10792 } 10793 10794 10795 /// UpdateNodeOperands - *Mutate* the specified node in-place to have the 10796 /// specified operands. If the resultant node already exists in the DAG, 10797 /// this does not modify the specified node, instead it returns the node that 10798 /// already exists. If the resultant node does not exist in the DAG, the 10799 /// input node is returned. As a degenerate case, if you specify the same 10800 /// input operands as the node already has, the input node is returned. 10801 SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { 10802 assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); 10803 10804 // Check to see if there is no change. 10805 if (Op == N->getOperand(0)) return N; 10806 10807 // See if the modified node already exists. 10808 void *InsertPos = nullptr; 10809 if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) 10810 return Existing; 10811 10812 // Nope it doesn't. Remove the node from its current place in the maps. 10813 if (InsertPos) 10814 if (!RemoveNodeFromCSEMaps(N)) 10815 InsertPos = nullptr; 10816 10817 // Now we update the operands. 10818 N->OperandList[0].set(Op); 10819 10820 updateDivergence(N); 10821 // If this gets put into a CSE map, add it. 10822 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 10823 return N; 10824 } 10825 10826 SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { 10827 assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); 10828 10829 // Check to see if there is no change. 10830 if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) 10831 return N; // No operands changed, just return the input node. 10832 10833 // See if the modified node already exists. 10834 void *InsertPos = nullptr; 10835 if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) 10836 return Existing; 10837 10838 // Nope it doesn't. Remove the node from its current place in the maps. 10839 if (InsertPos) 10840 if (!RemoveNodeFromCSEMaps(N)) 10841 InsertPos = nullptr; 10842 10843 // Now we update the operands. 10844 if (N->OperandList[0] != Op1) 10845 N->OperandList[0].set(Op1); 10846 if (N->OperandList[1] != Op2) 10847 N->OperandList[1].set(Op2); 10848 10849 updateDivergence(N); 10850 // If this gets put into a CSE map, add it. 10851 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 10852 return N; 10853 } 10854 10855 SDNode *SelectionDAG:: 10856 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { 10857 SDValue Ops[] = { Op1, Op2, Op3 }; 10858 return UpdateNodeOperands(N, Ops); 10859 } 10860 10861 SDNode *SelectionDAG:: 10862 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, 10863 SDValue Op3, SDValue Op4) { 10864 SDValue Ops[] = { Op1, Op2, Op3, Op4 }; 10865 return UpdateNodeOperands(N, Ops); 10866 } 10867 10868 SDNode *SelectionDAG:: 10869 UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, 10870 SDValue Op3, SDValue Op4, SDValue Op5) { 10871 SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; 10872 return UpdateNodeOperands(N, Ops); 10873 } 10874 10875 SDNode *SelectionDAG:: 10876 UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { 10877 unsigned NumOps = Ops.size(); 10878 assert(N->getNumOperands() == NumOps && 10879 "Update with wrong number of operands"); 10880 10881 // If no operands changed just return the input node. 10882 if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) 10883 return N; 10884 10885 // See if the modified node already exists. 10886 void *InsertPos = nullptr; 10887 if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) 10888 return Existing; 10889 10890 // Nope it doesn't. Remove the node from its current place in the maps. 10891 if (InsertPos) 10892 if (!RemoveNodeFromCSEMaps(N)) 10893 InsertPos = nullptr; 10894 10895 // Now we update the operands. 10896 for (unsigned i = 0; i != NumOps; ++i) 10897 if (N->OperandList[i] != Ops[i]) 10898 N->OperandList[i].set(Ops[i]); 10899 10900 updateDivergence(N); 10901 // If this gets put into a CSE map, add it. 10902 if (InsertPos) CSEMap.InsertNode(N, InsertPos); 10903 return N; 10904 } 10905 10906 /// DropOperands - Release the operands and set this node to have 10907 /// zero operands. 10908 void SDNode::DropOperands() { 10909 // Unlike the code in MorphNodeTo that does this, we don't need to 10910 // watch for dead nodes here. 10911 for (op_iterator I = op_begin(), E = op_end(); I != E; ) { 10912 SDUse &Use = *I++; 10913 Use.set(SDValue()); 10914 } 10915 } 10916 10917 void SelectionDAG::setNodeMemRefs(MachineSDNode *N, 10918 ArrayRef<MachineMemOperand *> NewMemRefs) { 10919 if (NewMemRefs.empty()) { 10920 N->clearMemRefs(); 10921 return; 10922 } 10923 10924 // Check if we can avoid allocating by storing a single reference directly. 10925 if (NewMemRefs.size() == 1) { 10926 N->MemRefs = NewMemRefs[0]; 10927 N->NumMemRefs = 1; 10928 return; 10929 } 10930 10931 MachineMemOperand **MemRefsBuffer = 10932 Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size()); 10933 llvm::copy(NewMemRefs, MemRefsBuffer); 10934 N->MemRefs = MemRefsBuffer; 10935 N->NumMemRefs = static_cast<int>(NewMemRefs.size()); 10936 } 10937 10938 /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a 10939 /// machine opcode. 10940 /// 10941 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10942 EVT VT) { 10943 SDVTList VTs = getVTList(VT); 10944 return SelectNodeTo(N, MachineOpc, VTs, {}); 10945 } 10946 10947 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10948 EVT VT, SDValue Op1) { 10949 SDVTList VTs = getVTList(VT); 10950 SDValue Ops[] = { Op1 }; 10951 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10952 } 10953 10954 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10955 EVT VT, SDValue Op1, 10956 SDValue Op2) { 10957 SDVTList VTs = getVTList(VT); 10958 SDValue Ops[] = { Op1, Op2 }; 10959 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10960 } 10961 10962 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10963 EVT VT, SDValue Op1, 10964 SDValue Op2, SDValue Op3) { 10965 SDVTList VTs = getVTList(VT); 10966 SDValue Ops[] = { Op1, Op2, Op3 }; 10967 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10968 } 10969 10970 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10971 EVT VT, ArrayRef<SDValue> Ops) { 10972 SDVTList VTs = getVTList(VT); 10973 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10974 } 10975 10976 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10977 EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { 10978 SDVTList VTs = getVTList(VT1, VT2); 10979 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10980 } 10981 10982 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10983 EVT VT1, EVT VT2) { 10984 SDVTList VTs = getVTList(VT1, VT2); 10985 return SelectNodeTo(N, MachineOpc, VTs, {}); 10986 } 10987 10988 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10989 EVT VT1, EVT VT2, EVT VT3, 10990 ArrayRef<SDValue> Ops) { 10991 SDVTList VTs = getVTList(VT1, VT2, VT3); 10992 return SelectNodeTo(N, MachineOpc, VTs, Ops); 10993 } 10994 10995 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 10996 EVT VT1, EVT VT2, 10997 SDValue Op1, SDValue Op2) { 10998 SDVTList VTs = getVTList(VT1, VT2); 10999 SDValue Ops[] = { Op1, Op2 }; 11000 return SelectNodeTo(N, MachineOpc, VTs, Ops); 11001 } 11002 11003 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, 11004 SDVTList VTs,ArrayRef<SDValue> Ops) { 11005 SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); 11006 // Reset the NodeID to -1. 11007 New->setNodeId(-1); 11008 if (New != N) { 11009 ReplaceAllUsesWith(N, New); 11010 RemoveDeadNode(N); 11011 } 11012 return New; 11013 } 11014 11015 /// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away 11016 /// the line number information on the merged node since it is not possible to 11017 /// preserve the information that operation is associated with multiple lines. 11018 /// This will make the debugger working better at -O0, were there is a higher 11019 /// probability having other instructions associated with that line. 11020 /// 11021 /// For IROrder, we keep the smaller of the two 11022 SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { 11023 DebugLoc NLoc = N->getDebugLoc(); 11024 if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) { 11025 N->setDebugLoc(DebugLoc()); 11026 } 11027 unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); 11028 N->setIROrder(Order); 11029 return N; 11030 } 11031 11032 /// MorphNodeTo - This *mutates* the specified node to have the specified 11033 /// return type, opcode, and operands. 11034 /// 11035 /// Note that MorphNodeTo returns the resultant node. If there is already a 11036 /// node of the specified opcode and operands, it returns that node instead of 11037 /// the current one. Note that the SDLoc need not be the same. 11038 /// 11039 /// Using MorphNodeTo is faster than creating a new node and swapping it in 11040 /// with ReplaceAllUsesWith both because it often avoids allocating a new 11041 /// node, and because it doesn't require CSE recalculation for any of 11042 /// the node's users. 11043 /// 11044 /// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. 11045 /// As a consequence it isn't appropriate to use from within the DAG combiner or 11046 /// the legalizer which maintain worklists that would need to be updated when 11047 /// deleting things. 11048 SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, 11049 SDVTList VTs, ArrayRef<SDValue> Ops) { 11050 // If an identical node already exists, use it. 11051 void *IP = nullptr; 11052 if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { 11053 FoldingSetNodeID ID; 11054 AddNodeIDNode(ID, Opc, VTs, Ops); 11055 if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) 11056 return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); 11057 } 11058 11059 if (!RemoveNodeFromCSEMaps(N)) 11060 IP = nullptr; 11061 11062 // Start the morphing. 11063 N->NodeType = Opc; 11064 N->ValueList = VTs.VTs; 11065 N->NumValues = VTs.NumVTs; 11066 11067 // Clear the operands list, updating used nodes to remove this from their 11068 // use list. Keep track of any operands that become dead as a result. 11069 SmallPtrSet<SDNode*, 16> DeadNodeSet; 11070 for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { 11071 SDUse &Use = *I++; 11072 SDNode *Used = Use.getNode(); 11073 Use.set(SDValue()); 11074 if (Used->use_empty()) 11075 DeadNodeSet.insert(Used); 11076 } 11077 11078 // For MachineNode, initialize the memory references information. 11079 if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) 11080 MN->clearMemRefs(); 11081 11082 // Swap for an appropriately sized array from the recycler. 11083 removeOperands(N); 11084 createOperands(N, Ops); 11085 11086 // Delete any nodes that are still dead after adding the uses for the 11087 // new operands. 11088 if (!DeadNodeSet.empty()) { 11089 SmallVector<SDNode *, 16> DeadNodes; 11090 for (SDNode *N : DeadNodeSet) 11091 if (N->use_empty()) 11092 DeadNodes.push_back(N); 11093 RemoveDeadNodes(DeadNodes); 11094 } 11095 11096 if (IP) 11097 CSEMap.InsertNode(N, IP); // Memoize the new node. 11098 return N; 11099 } 11100 11101 SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { 11102 unsigned OrigOpc = Node->getOpcode(); 11103 unsigned NewOpc; 11104 switch (OrigOpc) { 11105 default: 11106 llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); 11107 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 11108 case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; 11109 #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 11110 case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; 11111 #include "llvm/IR/ConstrainedOps.def" 11112 } 11113 11114 assert(Node->getNumValues() == 2 && "Unexpected number of results!"); 11115 11116 // We're taking this node out of the chain, so we need to re-link things. 11117 SDValue InputChain = Node->getOperand(0); 11118 SDValue OutputChain = SDValue(Node, 1); 11119 ReplaceAllUsesOfValueWith(OutputChain, InputChain); 11120 11121 SmallVector<SDValue, 3> Ops; 11122 for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) 11123 Ops.push_back(Node->getOperand(i)); 11124 11125 SDVTList VTs = getVTList(Node->getValueType(0)); 11126 SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); 11127 11128 // MorphNodeTo can operate in two ways: if an existing node with the 11129 // specified operands exists, it can just return it. Otherwise, it 11130 // updates the node in place to have the requested operands. 11131 if (Res == Node) { 11132 // If we updated the node in place, reset the node ID. To the isel, 11133 // this should be just like a newly allocated machine node. 11134 Res->setNodeId(-1); 11135 } else { 11136 ReplaceAllUsesWith(Node, Res); 11137 RemoveDeadNode(Node); 11138 } 11139 11140 return Res; 11141 } 11142 11143 /// getMachineNode - These are used for target selectors to create a new node 11144 /// with specified return type(s), MachineInstr opcode, and operands. 11145 /// 11146 /// Note that getMachineNode returns the resultant node. If there is already a 11147 /// node of the specified opcode and operands, it returns that node instead of 11148 /// the current one. 11149 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11150 EVT VT) { 11151 SDVTList VTs = getVTList(VT); 11152 return getMachineNode(Opcode, dl, VTs, {}); 11153 } 11154 11155 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11156 EVT VT, SDValue Op1) { 11157 SDVTList VTs = getVTList(VT); 11158 SDValue Ops[] = { Op1 }; 11159 return getMachineNode(Opcode, dl, VTs, Ops); 11160 } 11161 11162 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11163 EVT VT, SDValue Op1, SDValue Op2) { 11164 SDVTList VTs = getVTList(VT); 11165 SDValue Ops[] = { Op1, Op2 }; 11166 return getMachineNode(Opcode, dl, VTs, Ops); 11167 } 11168 11169 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11170 EVT VT, SDValue Op1, SDValue Op2, 11171 SDValue Op3) { 11172 SDVTList VTs = getVTList(VT); 11173 SDValue Ops[] = { Op1, Op2, Op3 }; 11174 return getMachineNode(Opcode, dl, VTs, Ops); 11175 } 11176 11177 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11178 EVT VT, ArrayRef<SDValue> Ops) { 11179 SDVTList VTs = getVTList(VT); 11180 return getMachineNode(Opcode, dl, VTs, Ops); 11181 } 11182 11183 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11184 EVT VT1, EVT VT2, SDValue Op1, 11185 SDValue Op2) { 11186 SDVTList VTs = getVTList(VT1, VT2); 11187 SDValue Ops[] = { Op1, Op2 }; 11188 return getMachineNode(Opcode, dl, VTs, Ops); 11189 } 11190 11191 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11192 EVT VT1, EVT VT2, SDValue Op1, 11193 SDValue Op2, SDValue Op3) { 11194 SDVTList VTs = getVTList(VT1, VT2); 11195 SDValue Ops[] = { Op1, Op2, Op3 }; 11196 return getMachineNode(Opcode, dl, VTs, Ops); 11197 } 11198 11199 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11200 EVT VT1, EVT VT2, 11201 ArrayRef<SDValue> Ops) { 11202 SDVTList VTs = getVTList(VT1, VT2); 11203 return getMachineNode(Opcode, dl, VTs, Ops); 11204 } 11205 11206 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11207 EVT VT1, EVT VT2, EVT VT3, 11208 SDValue Op1, SDValue Op2) { 11209 SDVTList VTs = getVTList(VT1, VT2, VT3); 11210 SDValue Ops[] = { Op1, Op2 }; 11211 return getMachineNode(Opcode, dl, VTs, Ops); 11212 } 11213 11214 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11215 EVT VT1, EVT VT2, EVT VT3, 11216 SDValue Op1, SDValue Op2, 11217 SDValue Op3) { 11218 SDVTList VTs = getVTList(VT1, VT2, VT3); 11219 SDValue Ops[] = { Op1, Op2, Op3 }; 11220 return getMachineNode(Opcode, dl, VTs, Ops); 11221 } 11222 11223 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11224 EVT VT1, EVT VT2, EVT VT3, 11225 ArrayRef<SDValue> Ops) { 11226 SDVTList VTs = getVTList(VT1, VT2, VT3); 11227 return getMachineNode(Opcode, dl, VTs, Ops); 11228 } 11229 11230 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, 11231 ArrayRef<EVT> ResultTys, 11232 ArrayRef<SDValue> Ops) { 11233 SDVTList VTs = getVTList(ResultTys); 11234 return getMachineNode(Opcode, dl, VTs, Ops); 11235 } 11236 11237 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, 11238 SDVTList VTs, 11239 ArrayRef<SDValue> Ops) { 11240 bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; 11241 MachineSDNode *N; 11242 void *IP = nullptr; 11243 11244 if (DoCSE) { 11245 FoldingSetNodeID ID; 11246 AddNodeIDNode(ID, ~Opcode, VTs, Ops); 11247 IP = nullptr; 11248 if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { 11249 return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL)); 11250 } 11251 } 11252 11253 // Allocate a new MachineSDNode. 11254 N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); 11255 createOperands(N, Ops); 11256 11257 if (DoCSE) 11258 CSEMap.InsertNode(N, IP); 11259 11260 InsertNode(N); 11261 NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); 11262 return N; 11263 } 11264 11265 /// getTargetExtractSubreg - A convenience function for creating 11266 /// TargetOpcode::EXTRACT_SUBREG nodes. 11267 SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, 11268 SDValue Operand) { 11269 SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); 11270 SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, 11271 VT, Operand, SRIdxVal); 11272 return SDValue(Subreg, 0); 11273 } 11274 11275 /// getTargetInsertSubreg - A convenience function for creating 11276 /// TargetOpcode::INSERT_SUBREG nodes. 11277 SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, 11278 SDValue Operand, SDValue Subreg) { 11279 SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); 11280 SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, 11281 VT, Operand, Subreg, SRIdxVal); 11282 return SDValue(Result, 0); 11283 } 11284 11285 /// getNodeIfExists - Get the specified node if it's already available, or 11286 /// else return NULL. 11287 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, 11288 ArrayRef<SDValue> Ops) { 11289 SDNodeFlags Flags; 11290 if (Inserter) 11291 Flags = Inserter->getFlags(); 11292 return getNodeIfExists(Opcode, VTList, Ops, Flags); 11293 } 11294 11295 SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, 11296 ArrayRef<SDValue> Ops, 11297 const SDNodeFlags Flags) { 11298 if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { 11299 FoldingSetNodeID ID; 11300 AddNodeIDNode(ID, Opcode, VTList, Ops); 11301 void *IP = nullptr; 11302 if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { 11303 E->intersectFlagsWith(Flags); 11304 return E; 11305 } 11306 } 11307 return nullptr; 11308 } 11309 11310 /// doesNodeExist - Check if a node exists without modifying its flags. 11311 bool SelectionDAG::doesNodeExist(unsigned Opcode, SDVTList VTList, 11312 ArrayRef<SDValue> Ops) { 11313 if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { 11314 FoldingSetNodeID ID; 11315 AddNodeIDNode(ID, Opcode, VTList, Ops); 11316 void *IP = nullptr; 11317 if (FindNodeOrInsertPos(ID, SDLoc(), IP)) 11318 return true; 11319 } 11320 return false; 11321 } 11322 11323 /// getDbgValue - Creates a SDDbgValue node. 11324 /// 11325 /// SDNode 11326 SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, 11327 SDNode *N, unsigned R, bool IsIndirect, 11328 const DebugLoc &DL, unsigned O) { 11329 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11330 "Expected inlined-at fields to agree"); 11331 return new (DbgInfo->getAlloc()) 11332 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromNode(N, R), 11333 {}, IsIndirect, DL, O, 11334 /*IsVariadic=*/false); 11335 } 11336 11337 /// Constant 11338 SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, 11339 DIExpression *Expr, 11340 const Value *C, 11341 const DebugLoc &DL, unsigned O) { 11342 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11343 "Expected inlined-at fields to agree"); 11344 return new (DbgInfo->getAlloc()) 11345 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromConst(C), {}, 11346 /*IsIndirect=*/false, DL, O, 11347 /*IsVariadic=*/false); 11348 } 11349 11350 /// FrameIndex 11351 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, 11352 DIExpression *Expr, unsigned FI, 11353 bool IsIndirect, 11354 const DebugLoc &DL, 11355 unsigned O) { 11356 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11357 "Expected inlined-at fields to agree"); 11358 return getFrameIndexDbgValue(Var, Expr, FI, {}, IsIndirect, DL, O); 11359 } 11360 11361 /// FrameIndex with dependencies 11362 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, 11363 DIExpression *Expr, unsigned FI, 11364 ArrayRef<SDNode *> Dependencies, 11365 bool IsIndirect, 11366 const DebugLoc &DL, 11367 unsigned O) { 11368 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11369 "Expected inlined-at fields to agree"); 11370 return new (DbgInfo->getAlloc()) 11371 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromFrameIdx(FI), 11372 Dependencies, IsIndirect, DL, O, 11373 /*IsVariadic=*/false); 11374 } 11375 11376 /// VReg 11377 SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, DIExpression *Expr, 11378 unsigned VReg, bool IsIndirect, 11379 const DebugLoc &DL, unsigned O) { 11380 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11381 "Expected inlined-at fields to agree"); 11382 return new (DbgInfo->getAlloc()) 11383 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, SDDbgOperand::fromVReg(VReg), 11384 {}, IsIndirect, DL, O, 11385 /*IsVariadic=*/false); 11386 } 11387 11388 SDDbgValue *SelectionDAG::getDbgValueList(DIVariable *Var, DIExpression *Expr, 11389 ArrayRef<SDDbgOperand> Locs, 11390 ArrayRef<SDNode *> Dependencies, 11391 bool IsIndirect, const DebugLoc &DL, 11392 unsigned O, bool IsVariadic) { 11393 assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && 11394 "Expected inlined-at fields to agree"); 11395 return new (DbgInfo->getAlloc()) 11396 SDDbgValue(DbgInfo->getAlloc(), Var, Expr, Locs, Dependencies, IsIndirect, 11397 DL, O, IsVariadic); 11398 } 11399 11400 void SelectionDAG::transferDbgValues(SDValue From, SDValue To, 11401 unsigned OffsetInBits, unsigned SizeInBits, 11402 bool InvalidateDbg) { 11403 SDNode *FromNode = From.getNode(); 11404 SDNode *ToNode = To.getNode(); 11405 assert(FromNode && ToNode && "Can't modify dbg values"); 11406 11407 // PR35338 11408 // TODO: assert(From != To && "Redundant dbg value transfer"); 11409 // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); 11410 if (From == To || FromNode == ToNode) 11411 return; 11412 11413 if (!FromNode->getHasDebugValue()) 11414 return; 11415 11416 SDDbgOperand FromLocOp = 11417 SDDbgOperand::fromNode(From.getNode(), From.getResNo()); 11418 SDDbgOperand ToLocOp = SDDbgOperand::fromNode(To.getNode(), To.getResNo()); 11419 11420 SmallVector<SDDbgValue *, 2> ClonedDVs; 11421 for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { 11422 if (Dbg->isInvalidated()) 11423 continue; 11424 11425 // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); 11426 11427 // Create a new location ops vector that is equal to the old vector, but 11428 // with each instance of FromLocOp replaced with ToLocOp. 11429 bool Changed = false; 11430 auto NewLocOps = Dbg->copyLocationOps(); 11431 std::replace_if( 11432 NewLocOps.begin(), NewLocOps.end(), 11433 [&Changed, FromLocOp](const SDDbgOperand &Op) { 11434 bool Match = Op == FromLocOp; 11435 Changed |= Match; 11436 return Match; 11437 }, 11438 ToLocOp); 11439 // Ignore this SDDbgValue if we didn't find a matching location. 11440 if (!Changed) 11441 continue; 11442 11443 DIVariable *Var = Dbg->getVariable(); 11444 auto *Expr = Dbg->getExpression(); 11445 // If a fragment is requested, update the expression. 11446 if (SizeInBits) { 11447 // When splitting a larger (e.g., sign-extended) value whose 11448 // lower bits are described with an SDDbgValue, do not attempt 11449 // to transfer the SDDbgValue to the upper bits. 11450 if (auto FI = Expr->getFragmentInfo()) 11451 if (OffsetInBits + SizeInBits > FI->SizeInBits) 11452 continue; 11453 auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, 11454 SizeInBits); 11455 if (!Fragment) 11456 continue; 11457 Expr = *Fragment; 11458 } 11459 11460 auto AdditionalDependencies = Dbg->getAdditionalDependencies(); 11461 // Clone the SDDbgValue and move it to To. 11462 SDDbgValue *Clone = getDbgValueList( 11463 Var, Expr, NewLocOps, AdditionalDependencies, Dbg->isIndirect(), 11464 Dbg->getDebugLoc(), std::max(ToNode->getIROrder(), Dbg->getOrder()), 11465 Dbg->isVariadic()); 11466 ClonedDVs.push_back(Clone); 11467 11468 if (InvalidateDbg) { 11469 // Invalidate value and indicate the SDDbgValue should not be emitted. 11470 Dbg->setIsInvalidated(); 11471 Dbg->setIsEmitted(); 11472 } 11473 } 11474 11475 for (SDDbgValue *Dbg : ClonedDVs) { 11476 assert(is_contained(Dbg->getSDNodes(), ToNode) && 11477 "Transferred DbgValues should depend on the new SDNode"); 11478 AddDbgValue(Dbg, false); 11479 } 11480 } 11481 11482 void SelectionDAG::salvageDebugInfo(SDNode &N) { 11483 if (!N.getHasDebugValue()) 11484 return; 11485 11486 auto GetLocationOperand = [](SDNode *Node, unsigned ResNo) { 11487 if (auto *FISDN = dyn_cast<FrameIndexSDNode>(Node)) 11488 return SDDbgOperand::fromFrameIdx(FISDN->getIndex()); 11489 return SDDbgOperand::fromNode(Node, ResNo); 11490 }; 11491 11492 SmallVector<SDDbgValue *, 2> ClonedDVs; 11493 for (auto *DV : GetDbgValues(&N)) { 11494 if (DV->isInvalidated()) 11495 continue; 11496 switch (N.getOpcode()) { 11497 default: 11498 break; 11499 case ISD::ADD: { 11500 SDValue N0 = N.getOperand(0); 11501 SDValue N1 = N.getOperand(1); 11502 if (!isa<ConstantSDNode>(N0)) { 11503 bool RHSConstant = isa<ConstantSDNode>(N1); 11504 uint64_t Offset; 11505 if (RHSConstant) 11506 Offset = N.getConstantOperandVal(1); 11507 // We are not allowed to turn indirect debug values variadic, so 11508 // don't salvage those. 11509 if (!RHSConstant && DV->isIndirect()) 11510 continue; 11511 11512 // Rewrite an ADD constant node into a DIExpression. Since we are 11513 // performing arithmetic to compute the variable's *value* in the 11514 // DIExpression, we need to mark the expression with a 11515 // DW_OP_stack_value. 11516 auto *DIExpr = DV->getExpression(); 11517 auto NewLocOps = DV->copyLocationOps(); 11518 bool Changed = false; 11519 size_t OrigLocOpsSize = NewLocOps.size(); 11520 for (size_t i = 0; i < OrigLocOpsSize; ++i) { 11521 // We're not given a ResNo to compare against because the whole 11522 // node is going away. We know that any ISD::ADD only has one 11523 // result, so we can assume any node match is using the result. 11524 if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || 11525 NewLocOps[i].getSDNode() != &N) 11526 continue; 11527 NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); 11528 if (RHSConstant) { 11529 SmallVector<uint64_t, 3> ExprOps; 11530 DIExpression::appendOffset(ExprOps, Offset); 11531 DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); 11532 } else { 11533 // Convert to a variadic expression (if not already). 11534 // convertToVariadicExpression() returns a const pointer, so we use 11535 // a temporary const variable here. 11536 const auto *TmpDIExpr = 11537 DIExpression::convertToVariadicExpression(DIExpr); 11538 SmallVector<uint64_t, 3> ExprOps; 11539 ExprOps.push_back(dwarf::DW_OP_LLVM_arg); 11540 ExprOps.push_back(NewLocOps.size()); 11541 ExprOps.push_back(dwarf::DW_OP_plus); 11542 SDDbgOperand RHS = 11543 SDDbgOperand::fromNode(N1.getNode(), N1.getResNo()); 11544 NewLocOps.push_back(RHS); 11545 DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true); 11546 } 11547 Changed = true; 11548 } 11549 (void)Changed; 11550 assert(Changed && "Salvage target doesn't use N"); 11551 11552 bool IsVariadic = 11553 DV->isVariadic() || OrigLocOpsSize != NewLocOps.size(); 11554 11555 auto AdditionalDependencies = DV->getAdditionalDependencies(); 11556 SDDbgValue *Clone = getDbgValueList( 11557 DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies, 11558 DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic); 11559 ClonedDVs.push_back(Clone); 11560 DV->setIsInvalidated(); 11561 DV->setIsEmitted(); 11562 LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; 11563 N0.getNode()->dumprFull(this); 11564 dbgs() << " into " << *DIExpr << '\n'); 11565 } 11566 break; 11567 } 11568 case ISD::TRUNCATE: { 11569 SDValue N0 = N.getOperand(0); 11570 TypeSize FromSize = N0.getValueSizeInBits(); 11571 TypeSize ToSize = N.getValueSizeInBits(0); 11572 11573 DIExpression *DbgExpression = DV->getExpression(); 11574 auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false); 11575 auto NewLocOps = DV->copyLocationOps(); 11576 bool Changed = false; 11577 for (size_t i = 0; i < NewLocOps.size(); ++i) { 11578 if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || 11579 NewLocOps[i].getSDNode() != &N) 11580 continue; 11581 11582 NewLocOps[i] = GetLocationOperand(N0.getNode(), N0.getResNo()); 11583 DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i); 11584 Changed = true; 11585 } 11586 assert(Changed && "Salvage target doesn't use N"); 11587 (void)Changed; 11588 11589 SDDbgValue *Clone = 11590 getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps, 11591 DV->getAdditionalDependencies(), DV->isIndirect(), 11592 DV->getDebugLoc(), DV->getOrder(), DV->isVariadic()); 11593 11594 ClonedDVs.push_back(Clone); 11595 DV->setIsInvalidated(); 11596 DV->setIsEmitted(); 11597 LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); 11598 dbgs() << " into " << *DbgExpression << '\n'); 11599 break; 11600 } 11601 } 11602 } 11603 11604 for (SDDbgValue *Dbg : ClonedDVs) { 11605 assert((!Dbg->getSDNodes().empty() || 11606 llvm::any_of(Dbg->getLocationOps(), 11607 [&](const SDDbgOperand &Op) { 11608 return Op.getKind() == SDDbgOperand::FRAMEIX; 11609 })) && 11610 "Salvaged DbgValue should depend on a new SDNode"); 11611 AddDbgValue(Dbg, false); 11612 } 11613 } 11614 11615 /// Creates a SDDbgLabel node. 11616 SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, 11617 const DebugLoc &DL, unsigned O) { 11618 assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && 11619 "Expected inlined-at fields to agree"); 11620 return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); 11621 } 11622 11623 namespace { 11624 11625 /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node 11626 /// pointed to by a use iterator is deleted, increment the use iterator 11627 /// so that it doesn't dangle. 11628 /// 11629 class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { 11630 SDNode::use_iterator &UI; 11631 SDNode::use_iterator &UE; 11632 11633 void NodeDeleted(SDNode *N, SDNode *E) override { 11634 // Increment the iterator as needed. 11635 while (UI != UE && N == UI->getUser()) 11636 ++UI; 11637 } 11638 11639 public: 11640 RAUWUpdateListener(SelectionDAG &d, 11641 SDNode::use_iterator &ui, 11642 SDNode::use_iterator &ue) 11643 : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} 11644 }; 11645 11646 } // end anonymous namespace 11647 11648 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11649 /// This can cause recursive merging of nodes in the DAG. 11650 /// 11651 /// This version assumes From has a single result value. 11652 /// 11653 void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { 11654 SDNode *From = FromN.getNode(); 11655 assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && 11656 "Cannot replace with this method!"); 11657 assert(From != To.getNode() && "Cannot replace uses of with self"); 11658 11659 // Preserve Debug Values 11660 transferDbgValues(FromN, To); 11661 // Preserve extra info. 11662 copyExtraInfo(From, To.getNode()); 11663 11664 // Iterate over all the existing uses of From. New uses will be added 11665 // to the beginning of the use list, which we avoid visiting. 11666 // This specifically avoids visiting uses of From that arise while the 11667 // replacement is happening, because any such uses would be the result 11668 // of CSE: If an existing node looks like From after one of its operands 11669 // is replaced by To, we don't want to replace of all its users with To 11670 // too. See PR3018 for more info. 11671 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 11672 RAUWUpdateListener Listener(*this, UI, UE); 11673 while (UI != UE) { 11674 SDNode *User = UI->getUser(); 11675 11676 // This node is about to morph, remove its old self from the CSE maps. 11677 RemoveNodeFromCSEMaps(User); 11678 11679 // A user can appear in a use list multiple times, and when this 11680 // happens the uses are usually next to each other in the list. 11681 // To help reduce the number of CSE recomputations, process all 11682 // the uses of this user that we can find this way. 11683 do { 11684 SDUse &Use = *UI; 11685 ++UI; 11686 Use.set(To); 11687 if (To->isDivergent() != From->isDivergent()) 11688 updateDivergence(User); 11689 } while (UI != UE && UI->getUser() == User); 11690 // Now that we have modified User, add it back to the CSE maps. If it 11691 // already exists there, recursively merge the results together. 11692 AddModifiedNodeToCSEMaps(User); 11693 } 11694 11695 // If we just RAUW'd the root, take note. 11696 if (FromN == getRoot()) 11697 setRoot(To); 11698 } 11699 11700 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11701 /// This can cause recursive merging of nodes in the DAG. 11702 /// 11703 /// This version assumes that for each value of From, there is a 11704 /// corresponding value in To in the same position with the same type. 11705 /// 11706 void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { 11707 #ifndef NDEBUG 11708 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) 11709 assert((!From->hasAnyUseOfValue(i) || 11710 From->getValueType(i) == To->getValueType(i)) && 11711 "Cannot use this version of ReplaceAllUsesWith!"); 11712 #endif 11713 11714 // Handle the trivial case. 11715 if (From == To) 11716 return; 11717 11718 // Preserve Debug Info. Only do this if there's a use. 11719 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) 11720 if (From->hasAnyUseOfValue(i)) { 11721 assert((i < To->getNumValues()) && "Invalid To location"); 11722 transferDbgValues(SDValue(From, i), SDValue(To, i)); 11723 } 11724 // Preserve extra info. 11725 copyExtraInfo(From, To); 11726 11727 // Iterate over just the existing users of From. See the comments in 11728 // the ReplaceAllUsesWith above. 11729 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 11730 RAUWUpdateListener Listener(*this, UI, UE); 11731 while (UI != UE) { 11732 SDNode *User = UI->getUser(); 11733 11734 // This node is about to morph, remove its old self from the CSE maps. 11735 RemoveNodeFromCSEMaps(User); 11736 11737 // A user can appear in a use list multiple times, and when this 11738 // happens the uses are usually next to each other in the list. 11739 // To help reduce the number of CSE recomputations, process all 11740 // the uses of this user that we can find this way. 11741 do { 11742 SDUse &Use = *UI; 11743 ++UI; 11744 Use.setNode(To); 11745 if (To->isDivergent() != From->isDivergent()) 11746 updateDivergence(User); 11747 } while (UI != UE && UI->getUser() == User); 11748 11749 // Now that we have modified User, add it back to the CSE maps. If it 11750 // already exists there, recursively merge the results together. 11751 AddModifiedNodeToCSEMaps(User); 11752 } 11753 11754 // If we just RAUW'd the root, take note. 11755 if (From == getRoot().getNode()) 11756 setRoot(SDValue(To, getRoot().getResNo())); 11757 } 11758 11759 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. 11760 /// This can cause recursive merging of nodes in the DAG. 11761 /// 11762 /// This version can replace From with any result values. To must match the 11763 /// number and types of values returned by From. 11764 void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { 11765 if (From->getNumValues() == 1) // Handle the simple case efficiently. 11766 return ReplaceAllUsesWith(SDValue(From, 0), To[0]); 11767 11768 for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) { 11769 // Preserve Debug Info. 11770 transferDbgValues(SDValue(From, i), To[i]); 11771 // Preserve extra info. 11772 copyExtraInfo(From, To[i].getNode()); 11773 } 11774 11775 // Iterate over just the existing users of From. See the comments in 11776 // the ReplaceAllUsesWith above. 11777 SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); 11778 RAUWUpdateListener Listener(*this, UI, UE); 11779 while (UI != UE) { 11780 SDNode *User = UI->getUser(); 11781 11782 // This node is about to morph, remove its old self from the CSE maps. 11783 RemoveNodeFromCSEMaps(User); 11784 11785 // A user can appear in a use list multiple times, and when this happens the 11786 // uses are usually next to each other in the list. To help reduce the 11787 // number of CSE and divergence recomputations, process all the uses of this 11788 // user that we can find this way. 11789 bool To_IsDivergent = false; 11790 do { 11791 SDUse &Use = *UI; 11792 const SDValue &ToOp = To[Use.getResNo()]; 11793 ++UI; 11794 Use.set(ToOp); 11795 To_IsDivergent |= ToOp->isDivergent(); 11796 } while (UI != UE && UI->getUser() == User); 11797 11798 if (To_IsDivergent != From->isDivergent()) 11799 updateDivergence(User); 11800 11801 // Now that we have modified User, add it back to the CSE maps. If it 11802 // already exists there, recursively merge the results together. 11803 AddModifiedNodeToCSEMaps(User); 11804 } 11805 11806 // If we just RAUW'd the root, take note. 11807 if (From == getRoot().getNode()) 11808 setRoot(SDValue(To[getRoot().getResNo()])); 11809 } 11810 11811 /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving 11812 /// uses of other values produced by From.getNode() alone. The Deleted 11813 /// vector is handled the same way as for ReplaceAllUsesWith. 11814 void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ 11815 // Handle the really simple, really trivial case efficiently. 11816 if (From == To) return; 11817 11818 // Handle the simple, trivial, case efficiently. 11819 if (From.getNode()->getNumValues() == 1) { 11820 ReplaceAllUsesWith(From, To); 11821 return; 11822 } 11823 11824 // Preserve Debug Info. 11825 transferDbgValues(From, To); 11826 copyExtraInfo(From.getNode(), To.getNode()); 11827 11828 // Iterate over just the existing users of From. See the comments in 11829 // the ReplaceAllUsesWith above. 11830 SDNode::use_iterator UI = From.getNode()->use_begin(), 11831 UE = From.getNode()->use_end(); 11832 RAUWUpdateListener Listener(*this, UI, UE); 11833 while (UI != UE) { 11834 SDNode *User = UI->getUser(); 11835 bool UserRemovedFromCSEMaps = false; 11836 11837 // A user can appear in a use list multiple times, and when this 11838 // happens the uses are usually next to each other in the list. 11839 // To help reduce the number of CSE recomputations, process all 11840 // the uses of this user that we can find this way. 11841 do { 11842 SDUse &Use = *UI; 11843 11844 // Skip uses of different values from the same node. 11845 if (Use.getResNo() != From.getResNo()) { 11846 ++UI; 11847 continue; 11848 } 11849 11850 // If this node hasn't been modified yet, it's still in the CSE maps, 11851 // so remove its old self from the CSE maps. 11852 if (!UserRemovedFromCSEMaps) { 11853 RemoveNodeFromCSEMaps(User); 11854 UserRemovedFromCSEMaps = true; 11855 } 11856 11857 ++UI; 11858 Use.set(To); 11859 if (To->isDivergent() != From->isDivergent()) 11860 updateDivergence(User); 11861 } while (UI != UE && UI->getUser() == User); 11862 // We are iterating over all uses of the From node, so if a use 11863 // doesn't use the specific value, no changes are made. 11864 if (!UserRemovedFromCSEMaps) 11865 continue; 11866 11867 // Now that we have modified User, add it back to the CSE maps. If it 11868 // already exists there, recursively merge the results together. 11869 AddModifiedNodeToCSEMaps(User); 11870 } 11871 11872 // If we just RAUW'd the root, take note. 11873 if (From == getRoot()) 11874 setRoot(To); 11875 } 11876 11877 namespace { 11878 11879 /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith 11880 /// to record information about a use. 11881 struct UseMemo { 11882 SDNode *User; 11883 unsigned Index; 11884 SDUse *Use; 11885 }; 11886 11887 /// operator< - Sort Memos by User. 11888 bool operator<(const UseMemo &L, const UseMemo &R) { 11889 return (intptr_t)L.User < (intptr_t)R.User; 11890 } 11891 11892 /// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node 11893 /// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that 11894 /// the node already has been taken care of recursively. 11895 class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { 11896 SmallVectorImpl<UseMemo> &Uses; 11897 11898 void NodeDeleted(SDNode *N, SDNode *E) override { 11899 for (UseMemo &Memo : Uses) 11900 if (Memo.User == N) 11901 Memo.User = nullptr; 11902 } 11903 11904 public: 11905 RAUOVWUpdateListener(SelectionDAG &d, SmallVectorImpl<UseMemo> &uses) 11906 : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} 11907 }; 11908 11909 } // end anonymous namespace 11910 11911 /// Return true if a glue output should propagate divergence information. 11912 static bool gluePropagatesDivergence(const SDNode *Node) { 11913 switch (Node->getOpcode()) { 11914 case ISD::CopyFromReg: 11915 case ISD::CopyToReg: 11916 return false; 11917 default: 11918 return true; 11919 } 11920 11921 llvm_unreachable("covered opcode switch"); 11922 } 11923 11924 bool SelectionDAG::calculateDivergence(SDNode *N) { 11925 if (TLI->isSDNodeAlwaysUniform(N)) { 11926 assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) && 11927 "Conflicting divergence information!"); 11928 return false; 11929 } 11930 if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA)) 11931 return true; 11932 for (const auto &Op : N->ops()) { 11933 EVT VT = Op.getValueType(); 11934 11935 // Skip Chain. It does not carry divergence. 11936 if (VT != MVT::Other && Op.getNode()->isDivergent() && 11937 (VT != MVT::Glue || gluePropagatesDivergence(Op.getNode()))) 11938 return true; 11939 } 11940 return false; 11941 } 11942 11943 void SelectionDAG::updateDivergence(SDNode *N) { 11944 SmallVector<SDNode *, 16> Worklist(1, N); 11945 do { 11946 N = Worklist.pop_back_val(); 11947 bool IsDivergent = calculateDivergence(N); 11948 if (N->SDNodeBits.IsDivergent != IsDivergent) { 11949 N->SDNodeBits.IsDivergent = IsDivergent; 11950 llvm::append_range(Worklist, N->users()); 11951 } 11952 } while (!Worklist.empty()); 11953 } 11954 11955 void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { 11956 DenseMap<SDNode *, unsigned> Degree; 11957 Order.reserve(AllNodes.size()); 11958 for (auto &N : allnodes()) { 11959 unsigned NOps = N.getNumOperands(); 11960 Degree[&N] = NOps; 11961 if (0 == NOps) 11962 Order.push_back(&N); 11963 } 11964 for (size_t I = 0; I != Order.size(); ++I) { 11965 SDNode *N = Order[I]; 11966 for (auto *U : N->users()) { 11967 unsigned &UnsortedOps = Degree[U]; 11968 if (0 == --UnsortedOps) 11969 Order.push_back(U); 11970 } 11971 } 11972 } 11973 11974 #if !defined(NDEBUG) && LLVM_ENABLE_ABI_BREAKING_CHECKS 11975 void SelectionDAG::VerifyDAGDivergence() { 11976 std::vector<SDNode *> TopoOrder; 11977 CreateTopologicalOrder(TopoOrder); 11978 for (auto *N : TopoOrder) { 11979 assert(calculateDivergence(N) == N->isDivergent() && 11980 "Divergence bit inconsistency detected"); 11981 } 11982 } 11983 #endif 11984 11985 /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving 11986 /// uses of other values produced by From.getNode() alone. The same value 11987 /// may appear in both the From and To list. The Deleted vector is 11988 /// handled the same way as for ReplaceAllUsesWith. 11989 void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, 11990 const SDValue *To, 11991 unsigned Num){ 11992 // Handle the simple, trivial case efficiently. 11993 if (Num == 1) 11994 return ReplaceAllUsesOfValueWith(*From, *To); 11995 11996 transferDbgValues(*From, *To); 11997 copyExtraInfo(From->getNode(), To->getNode()); 11998 11999 // Read up all the uses and make records of them. This helps 12000 // processing new uses that are introduced during the 12001 // replacement process. 12002 SmallVector<UseMemo, 4> Uses; 12003 for (unsigned i = 0; i != Num; ++i) { 12004 unsigned FromResNo = From[i].getResNo(); 12005 SDNode *FromNode = From[i].getNode(); 12006 for (SDUse &Use : FromNode->uses()) { 12007 if (Use.getResNo() == FromResNo) { 12008 UseMemo Memo = {Use.getUser(), i, &Use}; 12009 Uses.push_back(Memo); 12010 } 12011 } 12012 } 12013 12014 // Sort the uses, so that all the uses from a given User are together. 12015 llvm::sort(Uses); 12016 RAUOVWUpdateListener Listener(*this, Uses); 12017 12018 for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); 12019 UseIndex != UseIndexEnd; ) { 12020 // We know that this user uses some value of From. If it is the right 12021 // value, update it. 12022 SDNode *User = Uses[UseIndex].User; 12023 // If the node has been deleted by recursive CSE updates when updating 12024 // another node, then just skip this entry. 12025 if (User == nullptr) { 12026 ++UseIndex; 12027 continue; 12028 } 12029 12030 // This node is about to morph, remove its old self from the CSE maps. 12031 RemoveNodeFromCSEMaps(User); 12032 12033 // The Uses array is sorted, so all the uses for a given User 12034 // are next to each other in the list. 12035 // To help reduce the number of CSE recomputations, process all 12036 // the uses of this user that we can find this way. 12037 do { 12038 unsigned i = Uses[UseIndex].Index; 12039 SDUse &Use = *Uses[UseIndex].Use; 12040 ++UseIndex; 12041 12042 Use.set(To[i]); 12043 } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); 12044 12045 // Now that we have modified User, add it back to the CSE maps. If it 12046 // already exists there, recursively merge the results together. 12047 AddModifiedNodeToCSEMaps(User); 12048 } 12049 } 12050 12051 /// AssignTopologicalOrder - Assign a unique node id for each node in the DAG 12052 /// based on their topological order. It returns the maximum id and a vector 12053 /// of the SDNodes* in assigned order by reference. 12054 unsigned SelectionDAG::AssignTopologicalOrder() { 12055 unsigned DAGSize = 0; 12056 12057 // SortedPos tracks the progress of the algorithm. Nodes before it are 12058 // sorted, nodes after it are unsorted. When the algorithm completes 12059 // it is at the end of the list. 12060 allnodes_iterator SortedPos = allnodes_begin(); 12061 12062 // Visit all the nodes. Move nodes with no operands to the front of 12063 // the list immediately. Annotate nodes that do have operands with their 12064 // operand count. Before we do this, the Node Id fields of the nodes 12065 // may contain arbitrary values. After, the Node Id fields for nodes 12066 // before SortedPos will contain the topological sort index, and the 12067 // Node Id fields for nodes At SortedPos and after will contain the 12068 // count of outstanding operands. 12069 for (SDNode &N : llvm::make_early_inc_range(allnodes())) { 12070 checkForCycles(&N, this); 12071 unsigned Degree = N.getNumOperands(); 12072 if (Degree == 0) { 12073 // A node with no uses, add it to the result array immediately. 12074 N.setNodeId(DAGSize++); 12075 allnodes_iterator Q(&N); 12076 if (Q != SortedPos) 12077 SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); 12078 assert(SortedPos != AllNodes.end() && "Overran node list"); 12079 ++SortedPos; 12080 } else { 12081 // Temporarily use the Node Id as scratch space for the degree count. 12082 N.setNodeId(Degree); 12083 } 12084 } 12085 12086 // Visit all the nodes. As we iterate, move nodes into sorted order, 12087 // such that by the time the end is reached all nodes will be sorted. 12088 for (SDNode &Node : allnodes()) { 12089 SDNode *N = &Node; 12090 checkForCycles(N, this); 12091 // N is in sorted position, so all its uses have one less operand 12092 // that needs to be sorted. 12093 for (SDNode *P : N->users()) { 12094 unsigned Degree = P->getNodeId(); 12095 assert(Degree != 0 && "Invalid node degree"); 12096 --Degree; 12097 if (Degree == 0) { 12098 // All of P's operands are sorted, so P may sorted now. 12099 P->setNodeId(DAGSize++); 12100 if (P->getIterator() != SortedPos) 12101 SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); 12102 assert(SortedPos != AllNodes.end() && "Overran node list"); 12103 ++SortedPos; 12104 } else { 12105 // Update P's outstanding operand count. 12106 P->setNodeId(Degree); 12107 } 12108 } 12109 if (Node.getIterator() == SortedPos) { 12110 #ifndef NDEBUG 12111 allnodes_iterator I(N); 12112 SDNode *S = &*++I; 12113 dbgs() << "Overran sorted position:\n"; 12114 S->dumprFull(this); dbgs() << "\n"; 12115 dbgs() << "Checking if this is due to cycles\n"; 12116 checkForCycles(this, true); 12117 #endif 12118 llvm_unreachable(nullptr); 12119 } 12120 } 12121 12122 assert(SortedPos == AllNodes.end() && 12123 "Topological sort incomplete!"); 12124 assert(AllNodes.front().getOpcode() == ISD::EntryToken && 12125 "First node in topological sort is not the entry token!"); 12126 assert(AllNodes.front().getNodeId() == 0 && 12127 "First node in topological sort has non-zero id!"); 12128 assert(AllNodes.front().getNumOperands() == 0 && 12129 "First node in topological sort has operands!"); 12130 assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && 12131 "Last node in topologic sort has unexpected id!"); 12132 assert(AllNodes.back().use_empty() && 12133 "Last node in topologic sort has users!"); 12134 assert(DAGSize == allnodes_size() && "Node count mismatch!"); 12135 return DAGSize; 12136 } 12137 12138 /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the 12139 /// value is produced by SD. 12140 void SelectionDAG::AddDbgValue(SDDbgValue *DB, bool isParameter) { 12141 for (SDNode *SD : DB->getSDNodes()) { 12142 if (!SD) 12143 continue; 12144 assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); 12145 SD->setHasDebugValue(true); 12146 } 12147 DbgInfo->add(DB, isParameter); 12148 } 12149 12150 void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { DbgInfo->add(DB); } 12151 12152 SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, 12153 SDValue NewMemOpChain) { 12154 assert(isa<MemSDNode>(NewMemOpChain) && "Expected a memop node"); 12155 assert(NewMemOpChain.getValueType() == MVT::Other && "Expected a token VT"); 12156 // The new memory operation must have the same position as the old load in 12157 // terms of memory dependency. Create a TokenFactor for the old load and new 12158 // memory operation and update uses of the old load's output chain to use that 12159 // TokenFactor. 12160 if (OldChain == NewMemOpChain || OldChain.use_empty()) 12161 return NewMemOpChain; 12162 12163 SDValue TokenFactor = getNode(ISD::TokenFactor, SDLoc(OldChain), MVT::Other, 12164 OldChain, NewMemOpChain); 12165 ReplaceAllUsesOfValueWith(OldChain, TokenFactor); 12166 UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewMemOpChain); 12167 return TokenFactor; 12168 } 12169 12170 SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, 12171 SDValue NewMemOp) { 12172 assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); 12173 SDValue OldChain = SDValue(OldLoad, 1); 12174 SDValue NewMemOpChain = NewMemOp.getValue(1); 12175 return makeEquivalentMemoryOrdering(OldChain, NewMemOpChain); 12176 } 12177 12178 SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, 12179 Function **OutFunction) { 12180 assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); 12181 12182 auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 12183 auto *Module = MF->getFunction().getParent(); 12184 auto *Function = Module->getFunction(Symbol); 12185 12186 if (OutFunction != nullptr) 12187 *OutFunction = Function; 12188 12189 if (Function != nullptr) { 12190 auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); 12191 return getGlobalAddress(Function, SDLoc(Op), PtrTy); 12192 } 12193 12194 std::string ErrorStr; 12195 raw_string_ostream ErrorFormatter(ErrorStr); 12196 ErrorFormatter << "Undefined external symbol "; 12197 ErrorFormatter << '"' << Symbol << '"'; 12198 report_fatal_error(Twine(ErrorStr)); 12199 } 12200 12201 //===----------------------------------------------------------------------===// 12202 // SDNode Class 12203 //===----------------------------------------------------------------------===// 12204 12205 bool llvm::isNullConstant(SDValue V) { 12206 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12207 return Const != nullptr && Const->isZero(); 12208 } 12209 12210 bool llvm::isNullConstantOrUndef(SDValue V) { 12211 return V.isUndef() || isNullConstant(V); 12212 } 12213 12214 bool llvm::isNullFPConstant(SDValue V) { 12215 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); 12216 return Const != nullptr && Const->isZero() && !Const->isNegative(); 12217 } 12218 12219 bool llvm::isAllOnesConstant(SDValue V) { 12220 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12221 return Const != nullptr && Const->isAllOnes(); 12222 } 12223 12224 bool llvm::isOneConstant(SDValue V) { 12225 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12226 return Const != nullptr && Const->isOne(); 12227 } 12228 12229 bool llvm::isMinSignedConstant(SDValue V) { 12230 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 12231 return Const != nullptr && Const->isMinSignedValue(); 12232 } 12233 12234 bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, 12235 unsigned OperandNo) { 12236 // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). 12237 // TODO: Target-specific opcodes could be added. 12238 if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, 12239 /*AllowTruncation*/ true)) { 12240 APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); 12241 switch (Opcode) { 12242 case ISD::ADD: 12243 case ISD::OR: 12244 case ISD::XOR: 12245 case ISD::UMAX: 12246 return Const.isZero(); 12247 case ISD::MUL: 12248 return Const.isOne(); 12249 case ISD::AND: 12250 case ISD::UMIN: 12251 return Const.isAllOnes(); 12252 case ISD::SMAX: 12253 return Const.isMinSignedValue(); 12254 case ISD::SMIN: 12255 return Const.isMaxSignedValue(); 12256 case ISD::SUB: 12257 case ISD::SHL: 12258 case ISD::SRA: 12259 case ISD::SRL: 12260 return OperandNo == 1 && Const.isZero(); 12261 case ISD::UDIV: 12262 case ISD::SDIV: 12263 return OperandNo == 1 && Const.isOne(); 12264 } 12265 } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { 12266 switch (Opcode) { 12267 case ISD::FADD: 12268 return ConstFP->isZero() && 12269 (Flags.hasNoSignedZeros() || ConstFP->isNegative()); 12270 case ISD::FSUB: 12271 return OperandNo == 1 && ConstFP->isZero() && 12272 (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); 12273 case ISD::FMUL: 12274 return ConstFP->isExactlyValue(1.0); 12275 case ISD::FDIV: 12276 return OperandNo == 1 && ConstFP->isExactlyValue(1.0); 12277 case ISD::FMINNUM: 12278 case ISD::FMAXNUM: { 12279 // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. 12280 EVT VT = V.getValueType(); 12281 const fltSemantics &Semantics = VT.getFltSemantics(); 12282 APFloat NeutralAF = !Flags.hasNoNaNs() 12283 ? APFloat::getQNaN(Semantics) 12284 : !Flags.hasNoInfs() 12285 ? APFloat::getInf(Semantics) 12286 : APFloat::getLargest(Semantics); 12287 if (Opcode == ISD::FMAXNUM) 12288 NeutralAF.changeSign(); 12289 12290 return ConstFP->isExactlyValue(NeutralAF); 12291 } 12292 } 12293 } 12294 return false; 12295 } 12296 12297 SDValue llvm::peekThroughBitcasts(SDValue V) { 12298 while (V.getOpcode() == ISD::BITCAST) 12299 V = V.getOperand(0); 12300 return V; 12301 } 12302 12303 SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { 12304 while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse()) 12305 V = V.getOperand(0); 12306 return V; 12307 } 12308 12309 SDValue llvm::peekThroughExtractSubvectors(SDValue V) { 12310 while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) 12311 V = V.getOperand(0); 12312 return V; 12313 } 12314 12315 SDValue llvm::peekThroughTruncates(SDValue V) { 12316 while (V.getOpcode() == ISD::TRUNCATE) 12317 V = V.getOperand(0); 12318 return V; 12319 } 12320 12321 bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { 12322 if (V.getOpcode() != ISD::XOR) 12323 return false; 12324 V = peekThroughBitcasts(V.getOperand(1)); 12325 unsigned NumBits = V.getScalarValueSizeInBits(); 12326 ConstantSDNode *C = 12327 isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); 12328 return C && (C->getAPIntValue().countr_one() >= NumBits); 12329 } 12330 12331 ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, 12332 bool AllowTruncation) { 12333 EVT VT = N.getValueType(); 12334 APInt DemandedElts = VT.isFixedLengthVector() 12335 ? APInt::getAllOnes(VT.getVectorMinNumElements()) 12336 : APInt(1, 1); 12337 return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); 12338 } 12339 12340 ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, 12341 bool AllowUndefs, 12342 bool AllowTruncation) { 12343 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 12344 return CN; 12345 12346 // SplatVectors can truncate their operands. Ignore that case here unless 12347 // AllowTruncation is set. 12348 if (N->getOpcode() == ISD::SPLAT_VECTOR) { 12349 EVT VecEltVT = N->getValueType(0).getVectorElementType(); 12350 if (auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 12351 EVT CVT = CN->getValueType(0); 12352 assert(CVT.bitsGE(VecEltVT) && "Illegal splat_vector element extension"); 12353 if (AllowTruncation || CVT == VecEltVT) 12354 return CN; 12355 } 12356 } 12357 12358 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 12359 BitVector UndefElements; 12360 ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); 12361 12362 // BuildVectors can truncate their operands. Ignore that case here unless 12363 // AllowTruncation is set. 12364 // TODO: Look into whether we should allow UndefElements in non-DemandedElts 12365 if (CN && (UndefElements.none() || AllowUndefs)) { 12366 EVT CVT = CN->getValueType(0); 12367 EVT NSVT = N.getValueType().getScalarType(); 12368 assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); 12369 if (AllowTruncation || (CVT == NSVT)) 12370 return CN; 12371 } 12372 } 12373 12374 return nullptr; 12375 } 12376 12377 ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { 12378 EVT VT = N.getValueType(); 12379 APInt DemandedElts = VT.isFixedLengthVector() 12380 ? APInt::getAllOnes(VT.getVectorMinNumElements()) 12381 : APInt(1, 1); 12382 return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); 12383 } 12384 12385 ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, 12386 const APInt &DemandedElts, 12387 bool AllowUndefs) { 12388 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 12389 return CN; 12390 12391 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 12392 BitVector UndefElements; 12393 ConstantFPSDNode *CN = 12394 BV->getConstantFPSplatNode(DemandedElts, &UndefElements); 12395 // TODO: Look into whether we should allow UndefElements in non-DemandedElts 12396 if (CN && (UndefElements.none() || AllowUndefs)) 12397 return CN; 12398 } 12399 12400 if (N.getOpcode() == ISD::SPLAT_VECTOR) 12401 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) 12402 return CN; 12403 12404 return nullptr; 12405 } 12406 12407 bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { 12408 // TODO: may want to use peekThroughBitcast() here. 12409 ConstantSDNode *C = 12410 isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); 12411 return C && C->isZero(); 12412 } 12413 12414 bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { 12415 ConstantSDNode *C = 12416 isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true); 12417 return C && C->isOne(); 12418 } 12419 12420 bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { 12421 N = peekThroughBitcasts(N); 12422 unsigned BitWidth = N.getScalarValueSizeInBits(); 12423 ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); 12424 return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth; 12425 } 12426 12427 HandleSDNode::~HandleSDNode() { 12428 DropOperands(); 12429 } 12430 12431 MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, 12432 SDVTList VTs, EVT memvt, MachineMemOperand *mmo) 12433 : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { 12434 MemSDNodeBits.IsVolatile = MMO->isVolatile(); 12435 MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); 12436 MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); 12437 MemSDNodeBits.IsInvariant = MMO->isInvariant(); 12438 12439 // We check here that the size of the memory operand fits within the size of 12440 // the MMO. This is because the MMO might indicate only a possible address 12441 // range instead of specifying the affected memory addresses precisely. 12442 assert( 12443 (!MMO->getType().isValid() || 12444 TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) && 12445 "Size mismatch!"); 12446 } 12447 12448 /// Profile - Gather unique data for the node. 12449 /// 12450 void SDNode::Profile(FoldingSetNodeID &ID) const { 12451 AddNodeIDNode(ID, this); 12452 } 12453 12454 namespace { 12455 12456 struct EVTArray { 12457 std::vector<EVT> VTs; 12458 12459 EVTArray() { 12460 VTs.reserve(MVT::VALUETYPE_SIZE); 12461 for (unsigned i = 0; i < MVT::VALUETYPE_SIZE; ++i) 12462 VTs.push_back(MVT((MVT::SimpleValueType)i)); 12463 } 12464 }; 12465 12466 } // end anonymous namespace 12467 12468 /// getValueTypeList - Return a pointer to the specified value type. 12469 /// 12470 const EVT *SDNode::getValueTypeList(MVT VT) { 12471 static EVTArray SimpleVTArray; 12472 12473 assert(VT < MVT::VALUETYPE_SIZE && "Value type out of range!"); 12474 return &SimpleVTArray.VTs[VT.SimpleTy]; 12475 } 12476 12477 /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the 12478 /// indicated value. This method ignores uses of other values defined by this 12479 /// operation. 12480 bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { 12481 assert(Value < getNumValues() && "Bad value!"); 12482 12483 // TODO: Only iterate over uses of a given value of the node 12484 for (SDUse &U : uses()) { 12485 if (U.getResNo() == Value) { 12486 if (NUses == 0) 12487 return false; 12488 --NUses; 12489 } 12490 } 12491 12492 // Found exactly the right number of uses? 12493 return NUses == 0; 12494 } 12495 12496 /// hasAnyUseOfValue - Return true if there are any use of the indicated 12497 /// value. This method ignores uses of other values defined by this operation. 12498 bool SDNode::hasAnyUseOfValue(unsigned Value) const { 12499 assert(Value < getNumValues() && "Bad value!"); 12500 12501 for (SDUse &U : uses()) 12502 if (U.getResNo() == Value) 12503 return true; 12504 12505 return false; 12506 } 12507 12508 /// isOnlyUserOf - Return true if this node is the only use of N. 12509 bool SDNode::isOnlyUserOf(const SDNode *N) const { 12510 bool Seen = false; 12511 for (const SDNode *User : N->users()) { 12512 if (User == this) 12513 Seen = true; 12514 else 12515 return false; 12516 } 12517 12518 return Seen; 12519 } 12520 12521 /// Return true if the only users of N are contained in Nodes. 12522 bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { 12523 bool Seen = false; 12524 for (const SDNode *User : N->users()) { 12525 if (llvm::is_contained(Nodes, User)) 12526 Seen = true; 12527 else 12528 return false; 12529 } 12530 12531 return Seen; 12532 } 12533 12534 /// isOperand - Return true if this node is an operand of N. 12535 bool SDValue::isOperandOf(const SDNode *N) const { 12536 return is_contained(N->op_values(), *this); 12537 } 12538 12539 bool SDNode::isOperandOf(const SDNode *N) const { 12540 return any_of(N->op_values(), 12541 [this](SDValue Op) { return this == Op.getNode(); }); 12542 } 12543 12544 /// reachesChainWithoutSideEffects - Return true if this operand (which must 12545 /// be a chain) reaches the specified operand without crossing any 12546 /// side-effecting instructions on any chain path. In practice, this looks 12547 /// through token factors and non-volatile loads. In order to remain efficient, 12548 /// this only looks a couple of nodes in, it does not do an exhaustive search. 12549 /// 12550 /// Note that we only need to examine chains when we're searching for 12551 /// side-effects; SelectionDAG requires that all side-effects are represented 12552 /// by chains, even if another operand would force a specific ordering. This 12553 /// constraint is necessary to allow transformations like splitting loads. 12554 bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, 12555 unsigned Depth) const { 12556 if (*this == Dest) return true; 12557 12558 // Don't search too deeply, we just want to be able to see through 12559 // TokenFactor's etc. 12560 if (Depth == 0) return false; 12561 12562 // If this is a token factor, all inputs to the TF happen in parallel. 12563 if (getOpcode() == ISD::TokenFactor) { 12564 // First, try a shallow search. 12565 if (is_contained((*this)->ops(), Dest)) { 12566 // We found the chain we want as an operand of this TokenFactor. 12567 // Essentially, we reach the chain without side-effects if we could 12568 // serialize the TokenFactor into a simple chain of operations with 12569 // Dest as the last operation. This is automatically true if the 12570 // chain has one use: there are no other ordering constraints. 12571 // If the chain has more than one use, we give up: some other 12572 // use of Dest might force a side-effect between Dest and the current 12573 // node. 12574 if (Dest.hasOneUse()) 12575 return true; 12576 } 12577 // Next, try a deep search: check whether every operand of the TokenFactor 12578 // reaches Dest. 12579 return llvm::all_of((*this)->ops(), [=](SDValue Op) { 12580 return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); 12581 }); 12582 } 12583 12584 // Loads don't have side effects, look through them. 12585 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { 12586 if (Ld->isUnordered()) 12587 return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); 12588 } 12589 return false; 12590 } 12591 12592 bool SDNode::hasPredecessor(const SDNode *N) const { 12593 SmallPtrSet<const SDNode *, 32> Visited; 12594 SmallVector<const SDNode *, 16> Worklist; 12595 Worklist.push_back(this); 12596 return hasPredecessorHelper(N, Visited, Worklist); 12597 } 12598 12599 void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { 12600 this->Flags &= Flags; 12601 } 12602 12603 SDValue 12604 SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, 12605 ArrayRef<ISD::NodeType> CandidateBinOps, 12606 bool AllowPartials) { 12607 // The pattern must end in an extract from index 0. 12608 if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12609 !isNullConstant(Extract->getOperand(1))) 12610 return SDValue(); 12611 12612 // Match against one of the candidate binary ops. 12613 SDValue Op = Extract->getOperand(0); 12614 if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { 12615 return Op.getOpcode() == unsigned(BinOp); 12616 })) 12617 return SDValue(); 12618 12619 // Floating-point reductions may require relaxed constraints on the final step 12620 // of the reduction because they may reorder intermediate operations. 12621 unsigned CandidateBinOp = Op.getOpcode(); 12622 if (Op.getValueType().isFloatingPoint()) { 12623 SDNodeFlags Flags = Op->getFlags(); 12624 switch (CandidateBinOp) { 12625 case ISD::FADD: 12626 if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) 12627 return SDValue(); 12628 break; 12629 default: 12630 llvm_unreachable("Unhandled FP opcode for binop reduction"); 12631 } 12632 } 12633 12634 // Matching failed - attempt to see if we did enough stages that a partial 12635 // reduction from a subvector is possible. 12636 auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { 12637 if (!AllowPartials || !Op) 12638 return SDValue(); 12639 EVT OpVT = Op.getValueType(); 12640 EVT OpSVT = OpVT.getScalarType(); 12641 EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); 12642 if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) 12643 return SDValue(); 12644 BinOp = (ISD::NodeType)CandidateBinOp; 12645 return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, 12646 getVectorIdxConstant(0, SDLoc(Op))); 12647 }; 12648 12649 // At each stage, we're looking for something that looks like: 12650 // %s = shufflevector <8 x i32> %op, <8 x i32> undef, 12651 // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, 12652 // i32 undef, i32 undef, i32 undef, i32 undef> 12653 // %a = binop <8 x i32> %op, %s 12654 // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, 12655 // we expect something like: 12656 // <4,5,6,7,u,u,u,u> 12657 // <2,3,u,u,u,u,u,u> 12658 // <1,u,u,u,u,u,u,u> 12659 // While a partial reduction match would be: 12660 // <2,3,u,u,u,u,u,u> 12661 // <1,u,u,u,u,u,u,u> 12662 unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); 12663 SDValue PrevOp; 12664 for (unsigned i = 0; i < Stages; ++i) { 12665 unsigned MaskEnd = (1 << i); 12666 12667 if (Op.getOpcode() != CandidateBinOp) 12668 return PartialReduction(PrevOp, MaskEnd); 12669 12670 SDValue Op0 = Op.getOperand(0); 12671 SDValue Op1 = Op.getOperand(1); 12672 12673 ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0); 12674 if (Shuffle) { 12675 Op = Op1; 12676 } else { 12677 Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1); 12678 Op = Op0; 12679 } 12680 12681 // The first operand of the shuffle should be the same as the other operand 12682 // of the binop. 12683 if (!Shuffle || Shuffle->getOperand(0) != Op) 12684 return PartialReduction(PrevOp, MaskEnd); 12685 12686 // Verify the shuffle has the expected (at this stage of the pyramid) mask. 12687 for (int Index = 0; Index < (int)MaskEnd; ++Index) 12688 if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) 12689 return PartialReduction(PrevOp, MaskEnd); 12690 12691 PrevOp = Op; 12692 } 12693 12694 // Handle subvector reductions, which tend to appear after the shuffle 12695 // reduction stages. 12696 while (Op.getOpcode() == CandidateBinOp) { 12697 unsigned NumElts = Op.getValueType().getVectorNumElements(); 12698 SDValue Op0 = Op.getOperand(0); 12699 SDValue Op1 = Op.getOperand(1); 12700 if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || 12701 Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || 12702 Op0.getOperand(0) != Op1.getOperand(0)) 12703 break; 12704 SDValue Src = Op0.getOperand(0); 12705 unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); 12706 if (NumSrcElts != (2 * NumElts)) 12707 break; 12708 if (!(Op0.getConstantOperandAPInt(1) == 0 && 12709 Op1.getConstantOperandAPInt(1) == NumElts) && 12710 !(Op1.getConstantOperandAPInt(1) == 0 && 12711 Op0.getConstantOperandAPInt(1) == NumElts)) 12712 break; 12713 Op = Src; 12714 } 12715 12716 BinOp = (ISD::NodeType)CandidateBinOp; 12717 return Op; 12718 } 12719 12720 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { 12721 EVT VT = N->getValueType(0); 12722 EVT EltVT = VT.getVectorElementType(); 12723 unsigned NE = VT.getVectorNumElements(); 12724 12725 SDLoc dl(N); 12726 12727 // If ResNE is 0, fully unroll the vector op. 12728 if (ResNE == 0) 12729 ResNE = NE; 12730 else if (NE > ResNE) 12731 NE = ResNE; 12732 12733 if (N->getNumValues() == 2) { 12734 SmallVector<SDValue, 8> Scalars0, Scalars1; 12735 SmallVector<SDValue, 4> Operands(N->getNumOperands()); 12736 EVT VT1 = N->getValueType(1); 12737 EVT EltVT1 = VT1.getVectorElementType(); 12738 12739 unsigned i; 12740 for (i = 0; i != NE; ++i) { 12741 for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { 12742 SDValue Operand = N->getOperand(j); 12743 EVT OperandVT = Operand.getValueType(); 12744 12745 // A vector operand; extract a single element. 12746 EVT OperandEltVT = OperandVT.getVectorElementType(); 12747 Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, 12748 Operand, getVectorIdxConstant(i, dl)); 12749 } 12750 12751 SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands); 12752 Scalars0.push_back(EltOp); 12753 Scalars1.push_back(EltOp.getValue(1)); 12754 } 12755 12756 for (; i < ResNE; ++i) { 12757 Scalars0.push_back(getUNDEF(EltVT)); 12758 Scalars1.push_back(getUNDEF(EltVT1)); 12759 } 12760 12761 EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); 12762 EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE); 12763 SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0); 12764 SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1); 12765 return getMergeValues({Vec0, Vec1}, dl); 12766 } 12767 12768 assert(N->getNumValues() == 1 && 12769 "Can't unroll a vector with multiple results!"); 12770 12771 SmallVector<SDValue, 8> Scalars; 12772 SmallVector<SDValue, 4> Operands(N->getNumOperands()); 12773 12774 unsigned i; 12775 for (i= 0; i != NE; ++i) { 12776 for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { 12777 SDValue Operand = N->getOperand(j); 12778 EVT OperandVT = Operand.getValueType(); 12779 if (OperandVT.isVector()) { 12780 // A vector operand; extract a single element. 12781 EVT OperandEltVT = OperandVT.getVectorElementType(); 12782 Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, 12783 Operand, getVectorIdxConstant(i, dl)); 12784 } else { 12785 // A scalar operand; just use it as is. 12786 Operands[j] = Operand; 12787 } 12788 } 12789 12790 switch (N->getOpcode()) { 12791 default: { 12792 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, 12793 N->getFlags())); 12794 break; 12795 } 12796 case ISD::VSELECT: 12797 Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); 12798 break; 12799 case ISD::SHL: 12800 case ISD::SRA: 12801 case ISD::SRL: 12802 case ISD::ROTL: 12803 case ISD::ROTR: 12804 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], 12805 getShiftAmountOperand(Operands[0].getValueType(), 12806 Operands[1]))); 12807 break; 12808 case ISD::SIGN_EXTEND_INREG: { 12809 EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); 12810 Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, 12811 Operands[0], 12812 getValueType(ExtVT))); 12813 break; 12814 } 12815 case ISD::ADDRSPACECAST: { 12816 const auto *ASC = cast<AddrSpaceCastSDNode>(N); 12817 Scalars.push_back(getAddrSpaceCast(dl, EltVT, Operands[0], 12818 ASC->getSrcAddressSpace(), 12819 ASC->getDestAddressSpace())); 12820 break; 12821 } 12822 } 12823 } 12824 12825 for (; i < ResNE; ++i) 12826 Scalars.push_back(getUNDEF(EltVT)); 12827 12828 EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); 12829 return getBuildVector(VecVT, dl, Scalars); 12830 } 12831 12832 std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( 12833 SDNode *N, unsigned ResNE) { 12834 unsigned Opcode = N->getOpcode(); 12835 assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || 12836 Opcode == ISD::USUBO || Opcode == ISD::SSUBO || 12837 Opcode == ISD::UMULO || Opcode == ISD::SMULO) && 12838 "Expected an overflow opcode"); 12839 12840 EVT ResVT = N->getValueType(0); 12841 EVT OvVT = N->getValueType(1); 12842 EVT ResEltVT = ResVT.getVectorElementType(); 12843 EVT OvEltVT = OvVT.getVectorElementType(); 12844 SDLoc dl(N); 12845 12846 // If ResNE is 0, fully unroll the vector op. 12847 unsigned NE = ResVT.getVectorNumElements(); 12848 if (ResNE == 0) 12849 ResNE = NE; 12850 else if (NE > ResNE) 12851 NE = ResNE; 12852 12853 SmallVector<SDValue, 8> LHSScalars; 12854 SmallVector<SDValue, 8> RHSScalars; 12855 ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); 12856 ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); 12857 12858 EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); 12859 SDVTList VTs = getVTList(ResEltVT, SVT); 12860 SmallVector<SDValue, 8> ResScalars; 12861 SmallVector<SDValue, 8> OvScalars; 12862 for (unsigned i = 0; i < NE; ++i) { 12863 SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); 12864 SDValue Ov = 12865 getSelect(dl, OvEltVT, Res.getValue(1), 12866 getBoolConstant(true, dl, OvEltVT, ResVT), 12867 getConstant(0, dl, OvEltVT)); 12868 12869 ResScalars.push_back(Res); 12870 OvScalars.push_back(Ov); 12871 } 12872 12873 ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); 12874 OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); 12875 12876 EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); 12877 EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); 12878 return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), 12879 getBuildVector(NewOvVT, dl, OvScalars)); 12880 } 12881 12882 bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, 12883 LoadSDNode *Base, 12884 unsigned Bytes, 12885 int Dist) const { 12886 if (LD->isVolatile() || Base->isVolatile()) 12887 return false; 12888 // TODO: probably too restrictive for atomics, revisit 12889 if (!LD->isSimple()) 12890 return false; 12891 if (LD->isIndexed() || Base->isIndexed()) 12892 return false; 12893 if (LD->getChain() != Base->getChain()) 12894 return false; 12895 EVT VT = LD->getMemoryVT(); 12896 if (VT.getSizeInBits() / 8 != Bytes) 12897 return false; 12898 12899 auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); 12900 auto LocDecomp = BaseIndexOffset::match(LD, *this); 12901 12902 int64_t Offset = 0; 12903 if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) 12904 return (Dist * (int64_t)Bytes == Offset); 12905 return false; 12906 } 12907 12908 /// InferPtrAlignment - Infer alignment of a load / store address. Return 12909 /// std::nullopt if it cannot be inferred. 12910 MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { 12911 // If this is a GlobalAddress + cst, return the alignment. 12912 const GlobalValue *GV = nullptr; 12913 int64_t GVOffset = 0; 12914 if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { 12915 unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); 12916 KnownBits Known(PtrWidth); 12917 llvm::computeKnownBits(GV, Known, getDataLayout()); 12918 unsigned AlignBits = Known.countMinTrailingZeros(); 12919 if (AlignBits) 12920 return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); 12921 } 12922 12923 // If this is a direct reference to a stack slot, use information about the 12924 // stack slot's alignment. 12925 int FrameIdx = INT_MIN; 12926 int64_t FrameOffset = 0; 12927 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { 12928 FrameIdx = FI->getIndex(); 12929 } else if (isBaseWithConstantOffset(Ptr) && 12930 isa<FrameIndexSDNode>(Ptr.getOperand(0))) { 12931 // Handle FI+Cst 12932 FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 12933 FrameOffset = Ptr.getConstantOperandVal(1); 12934 } 12935 12936 if (FrameIdx != INT_MIN) { 12937 const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); 12938 return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); 12939 } 12940 12941 return std::nullopt; 12942 } 12943 12944 /// Split the scalar node with EXTRACT_ELEMENT using the provided 12945 /// VTs and return the low/high part. 12946 std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N, 12947 const SDLoc &DL, 12948 const EVT &LoVT, 12949 const EVT &HiVT) { 12950 assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() && 12951 "Split node must be a scalar type"); 12952 SDValue Lo = 12953 getNode(ISD::EXTRACT_ELEMENT, DL, LoVT, N, getIntPtrConstant(0, DL)); 12954 SDValue Hi = 12955 getNode(ISD::EXTRACT_ELEMENT, DL, HiVT, N, getIntPtrConstant(1, DL)); 12956 return std::make_pair(Lo, Hi); 12957 } 12958 12959 /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type 12960 /// which is split (or expanded) into two not necessarily identical pieces. 12961 std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { 12962 // Currently all types are split in half. 12963 EVT LoVT, HiVT; 12964 if (!VT.isVector()) 12965 LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); 12966 else 12967 LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); 12968 12969 return std::make_pair(LoVT, HiVT); 12970 } 12971 12972 /// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a 12973 /// type, dependent on an enveloping VT that has been split into two identical 12974 /// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. 12975 std::pair<EVT, EVT> 12976 SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, 12977 bool *HiIsEmpty) const { 12978 EVT EltTp = VT.getVectorElementType(); 12979 // Examples: 12980 // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) 12981 // custom VL=9 with enveloping VL=8/8 yields 8/1 12982 // custom VL=10 with enveloping VL=8/8 yields 8/2 12983 // etc. 12984 ElementCount VTNumElts = VT.getVectorElementCount(); 12985 ElementCount EnvNumElts = EnvVT.getVectorElementCount(); 12986 assert(VTNumElts.isScalable() == EnvNumElts.isScalable() && 12987 "Mixing fixed width and scalable vectors when enveloping a type"); 12988 EVT LoVT, HiVT; 12989 if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { 12990 LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); 12991 HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts); 12992 *HiIsEmpty = false; 12993 } else { 12994 // Flag that hi type has zero storage size, but return split envelop type 12995 // (this would be easier if vector types with zero elements were allowed). 12996 LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); 12997 HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); 12998 *HiIsEmpty = true; 12999 } 13000 return std::make_pair(LoVT, HiVT); 13001 } 13002 13003 /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the 13004 /// low/high part. 13005 std::pair<SDValue, SDValue> 13006 SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, 13007 const EVT &HiVT) { 13008 assert(LoVT.isScalableVector() == HiVT.isScalableVector() && 13009 LoVT.isScalableVector() == N.getValueType().isScalableVector() && 13010 "Splitting vector with an invalid mixture of fixed and scalable " 13011 "vector types"); 13012 assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= 13013 N.getValueType().getVectorMinNumElements() && 13014 "More vector elements requested than available!"); 13015 SDValue Lo, Hi; 13016 Lo = 13017 getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL)); 13018 // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() 13019 // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales 13020 // IDX with the runtime scaling factor of the result vector type. For 13021 // fixed-width result vectors, that runtime scaling factor is 1. 13022 Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, 13023 getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); 13024 return std::make_pair(Lo, Hi); 13025 } 13026 13027 std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT, 13028 const SDLoc &DL) { 13029 // Split the vector length parameter. 13030 // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). 13031 EVT VT = N.getValueType(); 13032 assert(VecVT.getVectorElementCount().isKnownEven() && 13033 "Expecting the mask to be an evenly-sized vector"); 13034 unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; 13035 SDValue HalfNumElts = 13036 VecVT.isFixedLengthVector() 13037 ? getConstant(HalfMinNumElts, DL, VT) 13038 : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts)); 13039 SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts); 13040 SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts); 13041 return std::make_pair(Lo, Hi); 13042 } 13043 13044 /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. 13045 SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { 13046 EVT VT = N.getValueType(); 13047 EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), 13048 NextPowerOf2(VT.getVectorNumElements())); 13049 return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, 13050 getVectorIdxConstant(0, DL)); 13051 } 13052 13053 void SelectionDAG::ExtractVectorElements(SDValue Op, 13054 SmallVectorImpl<SDValue> &Args, 13055 unsigned Start, unsigned Count, 13056 EVT EltVT) { 13057 EVT VT = Op.getValueType(); 13058 if (Count == 0) 13059 Count = VT.getVectorNumElements(); 13060 if (EltVT == EVT()) 13061 EltVT = VT.getVectorElementType(); 13062 SDLoc SL(Op); 13063 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 13064 Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op, 13065 getVectorIdxConstant(i, SL))); 13066 } 13067 } 13068 13069 // getAddressSpace - Return the address space this GlobalAddress belongs to. 13070 unsigned GlobalAddressSDNode::getAddressSpace() const { 13071 return getGlobal()->getType()->getAddressSpace(); 13072 } 13073 13074 Type *ConstantPoolSDNode::getType() const { 13075 if (isMachineConstantPoolEntry()) 13076 return Val.MachineCPVal->getType(); 13077 return Val.ConstVal->getType(); 13078 } 13079 13080 bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, 13081 unsigned &SplatBitSize, 13082 bool &HasAnyUndefs, 13083 unsigned MinSplatBits, 13084 bool IsBigEndian) const { 13085 EVT VT = getValueType(0); 13086 assert(VT.isVector() && "Expected a vector type"); 13087 unsigned VecWidth = VT.getSizeInBits(); 13088 if (MinSplatBits > VecWidth) 13089 return false; 13090 13091 // FIXME: The widths are based on this node's type, but build vectors can 13092 // truncate their operands. 13093 SplatValue = APInt(VecWidth, 0); 13094 SplatUndef = APInt(VecWidth, 0); 13095 13096 // Get the bits. Bits with undefined values (when the corresponding element 13097 // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared 13098 // in SplatValue. If any of the values are not constant, give up and return 13099 // false. 13100 unsigned int NumOps = getNumOperands(); 13101 assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); 13102 unsigned EltWidth = VT.getScalarSizeInBits(); 13103 13104 for (unsigned j = 0; j < NumOps; ++j) { 13105 unsigned i = IsBigEndian ? NumOps - 1 - j : j; 13106 SDValue OpVal = getOperand(i); 13107 unsigned BitPos = j * EltWidth; 13108 13109 if (OpVal.isUndef()) 13110 SplatUndef.setBits(BitPos, BitPos + EltWidth); 13111 else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal)) 13112 SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); 13113 else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal)) 13114 SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); 13115 else 13116 return false; 13117 } 13118 13119 // The build_vector is all constants or undefs. Find the smallest element 13120 // size that splats the vector. 13121 HasAnyUndefs = (SplatUndef != 0); 13122 13123 // FIXME: This does not work for vectors with elements less than 8 bits. 13124 while (VecWidth > 8) { 13125 // If we can't split in half, stop here. 13126 if (VecWidth & 1) 13127 break; 13128 13129 unsigned HalfSize = VecWidth / 2; 13130 APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); 13131 APInt LowValue = SplatValue.extractBits(HalfSize, 0); 13132 APInt HighUndef = SplatUndef.extractBits(HalfSize, HalfSize); 13133 APInt LowUndef = SplatUndef.extractBits(HalfSize, 0); 13134 13135 // If the two halves do not match (ignoring undef bits), stop here. 13136 if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || 13137 MinSplatBits > HalfSize) 13138 break; 13139 13140 SplatValue = HighValue | LowValue; 13141 SplatUndef = HighUndef & LowUndef; 13142 13143 VecWidth = HalfSize; 13144 } 13145 13146 // FIXME: The loop above only tries to split in halves. But if the input 13147 // vector for example is <3 x i16> it wouldn't be able to detect a 13148 // SplatBitSize of 16. No idea if that is a design flaw currently limiting 13149 // optimizations. I guess that back in the days when this helper was created 13150 // vectors normally was power-of-2 sized. 13151 13152 SplatBitSize = VecWidth; 13153 return true; 13154 } 13155 13156 SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, 13157 BitVector *UndefElements) const { 13158 unsigned NumOps = getNumOperands(); 13159 if (UndefElements) { 13160 UndefElements->clear(); 13161 UndefElements->resize(NumOps); 13162 } 13163 assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); 13164 if (!DemandedElts) 13165 return SDValue(); 13166 SDValue Splatted; 13167 for (unsigned i = 0; i != NumOps; ++i) { 13168 if (!DemandedElts[i]) 13169 continue; 13170 SDValue Op = getOperand(i); 13171 if (Op.isUndef()) { 13172 if (UndefElements) 13173 (*UndefElements)[i] = true; 13174 } else if (!Splatted) { 13175 Splatted = Op; 13176 } else if (Splatted != Op) { 13177 return SDValue(); 13178 } 13179 } 13180 13181 if (!Splatted) { 13182 unsigned FirstDemandedIdx = DemandedElts.countr_zero(); 13183 assert(getOperand(FirstDemandedIdx).isUndef() && 13184 "Can only have a splat without a constant for all undefs."); 13185 return getOperand(FirstDemandedIdx); 13186 } 13187 13188 return Splatted; 13189 } 13190 13191 SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { 13192 APInt DemandedElts = APInt::getAllOnes(getNumOperands()); 13193 return getSplatValue(DemandedElts, UndefElements); 13194 } 13195 13196 bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, 13197 SmallVectorImpl<SDValue> &Sequence, 13198 BitVector *UndefElements) const { 13199 unsigned NumOps = getNumOperands(); 13200 Sequence.clear(); 13201 if (UndefElements) { 13202 UndefElements->clear(); 13203 UndefElements->resize(NumOps); 13204 } 13205 assert(NumOps == DemandedElts.getBitWidth() && "Unexpected vector size"); 13206 if (!DemandedElts || NumOps < 2 || !isPowerOf2_32(NumOps)) 13207 return false; 13208 13209 // Set the undefs even if we don't find a sequence (like getSplatValue). 13210 if (UndefElements) 13211 for (unsigned I = 0; I != NumOps; ++I) 13212 if (DemandedElts[I] && getOperand(I).isUndef()) 13213 (*UndefElements)[I] = true; 13214 13215 // Iteratively widen the sequence length looking for repetitions. 13216 for (unsigned SeqLen = 1; SeqLen < NumOps; SeqLen *= 2) { 13217 Sequence.append(SeqLen, SDValue()); 13218 for (unsigned I = 0; I != NumOps; ++I) { 13219 if (!DemandedElts[I]) 13220 continue; 13221 SDValue &SeqOp = Sequence[I % SeqLen]; 13222 SDValue Op = getOperand(I); 13223 if (Op.isUndef()) { 13224 if (!SeqOp) 13225 SeqOp = Op; 13226 continue; 13227 } 13228 if (SeqOp && !SeqOp.isUndef() && SeqOp != Op) { 13229 Sequence.clear(); 13230 break; 13231 } 13232 SeqOp = Op; 13233 } 13234 if (!Sequence.empty()) 13235 return true; 13236 } 13237 13238 assert(Sequence.empty() && "Failed to empty non-repeating sequence pattern"); 13239 return false; 13240 } 13241 13242 bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, 13243 BitVector *UndefElements) const { 13244 APInt DemandedElts = APInt::getAllOnes(getNumOperands()); 13245 return getRepeatedSequence(DemandedElts, Sequence, UndefElements); 13246 } 13247 13248 ConstantSDNode * 13249 BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, 13250 BitVector *UndefElements) const { 13251 return dyn_cast_or_null<ConstantSDNode>( 13252 getSplatValue(DemandedElts, UndefElements)); 13253 } 13254 13255 ConstantSDNode * 13256 BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { 13257 return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); 13258 } 13259 13260 ConstantFPSDNode * 13261 BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, 13262 BitVector *UndefElements) const { 13263 return dyn_cast_or_null<ConstantFPSDNode>( 13264 getSplatValue(DemandedElts, UndefElements)); 13265 } 13266 13267 ConstantFPSDNode * 13268 BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { 13269 return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); 13270 } 13271 13272 int32_t 13273 BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, 13274 uint32_t BitWidth) const { 13275 if (ConstantFPSDNode *CN = 13276 dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { 13277 bool IsExact; 13278 APSInt IntVal(BitWidth); 13279 const APFloat &APF = CN->getValueAPF(); 13280 if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != 13281 APFloat::opOK || 13282 !IsExact) 13283 return -1; 13284 13285 return IntVal.exactLogBase2(); 13286 } 13287 return -1; 13288 } 13289 13290 bool BuildVectorSDNode::getConstantRawBits( 13291 bool IsLittleEndian, unsigned DstEltSizeInBits, 13292 SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { 13293 // Early-out if this contains anything but Undef/Constant/ConstantFP. 13294 if (!isConstant()) 13295 return false; 13296 13297 unsigned NumSrcOps = getNumOperands(); 13298 unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); 13299 assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && 13300 "Invalid bitcast scale"); 13301 13302 // Extract raw src bits. 13303 SmallVector<APInt> SrcBitElements(NumSrcOps, 13304 APInt::getZero(SrcEltSizeInBits)); 13305 BitVector SrcUndeElements(NumSrcOps, false); 13306 13307 for (unsigned I = 0; I != NumSrcOps; ++I) { 13308 SDValue Op = getOperand(I); 13309 if (Op.isUndef()) { 13310 SrcUndeElements.set(I); 13311 continue; 13312 } 13313 auto *CInt = dyn_cast<ConstantSDNode>(Op); 13314 auto *CFP = dyn_cast<ConstantFPSDNode>(Op); 13315 assert((CInt || CFP) && "Unknown constant"); 13316 SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits) 13317 : CFP->getValueAPF().bitcastToAPInt(); 13318 } 13319 13320 // Recast to dst width. 13321 recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements, 13322 SrcBitElements, UndefElements, SrcUndeElements); 13323 return true; 13324 } 13325 13326 void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, 13327 unsigned DstEltSizeInBits, 13328 SmallVectorImpl<APInt> &DstBitElements, 13329 ArrayRef<APInt> SrcBitElements, 13330 BitVector &DstUndefElements, 13331 const BitVector &SrcUndefElements) { 13332 unsigned NumSrcOps = SrcBitElements.size(); 13333 unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); 13334 assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && 13335 "Invalid bitcast scale"); 13336 assert(NumSrcOps == SrcUndefElements.size() && 13337 "Vector size mismatch"); 13338 13339 unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; 13340 DstUndefElements.clear(); 13341 DstUndefElements.resize(NumDstOps, false); 13342 DstBitElements.assign(NumDstOps, APInt::getZero(DstEltSizeInBits)); 13343 13344 // Concatenate src elements constant bits together into dst element. 13345 if (SrcEltSizeInBits <= DstEltSizeInBits) { 13346 unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; 13347 for (unsigned I = 0; I != NumDstOps; ++I) { 13348 DstUndefElements.set(I); 13349 APInt &DstBits = DstBitElements[I]; 13350 for (unsigned J = 0; J != Scale; ++J) { 13351 unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); 13352 if (SrcUndefElements[Idx]) 13353 continue; 13354 DstUndefElements.reset(I); 13355 const APInt &SrcBits = SrcBitElements[Idx]; 13356 assert(SrcBits.getBitWidth() == SrcEltSizeInBits && 13357 "Illegal constant bitwidths"); 13358 DstBits.insertBits(SrcBits, J * SrcEltSizeInBits); 13359 } 13360 } 13361 return; 13362 } 13363 13364 // Split src element constant bits into dst elements. 13365 unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; 13366 for (unsigned I = 0; I != NumSrcOps; ++I) { 13367 if (SrcUndefElements[I]) { 13368 DstUndefElements.set(I * Scale, (I + 1) * Scale); 13369 continue; 13370 } 13371 const APInt &SrcBits = SrcBitElements[I]; 13372 for (unsigned J = 0; J != Scale; ++J) { 13373 unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); 13374 APInt &DstBits = DstBitElements[Idx]; 13375 DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); 13376 } 13377 } 13378 } 13379 13380 bool BuildVectorSDNode::isConstant() const { 13381 for (const SDValue &Op : op_values()) { 13382 unsigned Opc = Op.getOpcode(); 13383 if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) 13384 return false; 13385 } 13386 return true; 13387 } 13388 13389 std::optional<std::pair<APInt, APInt>> 13390 BuildVectorSDNode::isConstantSequence() const { 13391 unsigned NumOps = getNumOperands(); 13392 if (NumOps < 2) 13393 return std::nullopt; 13394 13395 if (!isa<ConstantSDNode>(getOperand(0)) || 13396 !isa<ConstantSDNode>(getOperand(1))) 13397 return std::nullopt; 13398 13399 unsigned EltSize = getValueType(0).getScalarSizeInBits(); 13400 APInt Start = getConstantOperandAPInt(0).trunc(EltSize); 13401 APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start; 13402 13403 if (Stride.isZero()) 13404 return std::nullopt; 13405 13406 for (unsigned i = 2; i < NumOps; ++i) { 13407 if (!isa<ConstantSDNode>(getOperand(i))) 13408 return std::nullopt; 13409 13410 APInt Val = getConstantOperandAPInt(i).trunc(EltSize); 13411 if (Val != (Start + (Stride * i))) 13412 return std::nullopt; 13413 } 13414 13415 return std::make_pair(Start, Stride); 13416 } 13417 13418 bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { 13419 // Find the first non-undef value in the shuffle mask. 13420 unsigned i, e; 13421 for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) 13422 /* search */; 13423 13424 // If all elements are undefined, this shuffle can be considered a splat 13425 // (although it should eventually get simplified away completely). 13426 if (i == e) 13427 return true; 13428 13429 // Make sure all remaining elements are either undef or the same as the first 13430 // non-undef value. 13431 for (int Idx = Mask[i]; i != e; ++i) 13432 if (Mask[i] >= 0 && Mask[i] != Idx) 13433 return false; 13434 return true; 13435 } 13436 13437 // Returns true if it is a constant integer BuildVector or constant integer, 13438 // possibly hidden by a bitcast. 13439 bool SelectionDAG::isConstantIntBuildVectorOrConstantInt( 13440 SDValue N, bool AllowOpaques) const { 13441 N = peekThroughBitcasts(N); 13442 13443 if (auto *C = dyn_cast<ConstantSDNode>(N)) 13444 return AllowOpaques || !C->isOpaque(); 13445 13446 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 13447 return true; 13448 13449 // Treat a GlobalAddress supporting constant offset folding as a 13450 // constant integer. 13451 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) 13452 if (GA->getOpcode() == ISD::GlobalAddress && 13453 TLI->isOffsetFoldingLegal(GA)) 13454 return true; 13455 13456 if ((N.getOpcode() == ISD::SPLAT_VECTOR) && 13457 isa<ConstantSDNode>(N.getOperand(0))) 13458 return true; 13459 return false; 13460 } 13461 13462 // Returns true if it is a constant float BuildVector or constant float. 13463 bool SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { 13464 if (isa<ConstantFPSDNode>(N)) 13465 return true; 13466 13467 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 13468 return true; 13469 13470 if ((N.getOpcode() == ISD::SPLAT_VECTOR) && 13471 isa<ConstantFPSDNode>(N.getOperand(0))) 13472 return true; 13473 13474 return false; 13475 } 13476 13477 std::optional<bool> SelectionDAG::isBoolConstant(SDValue N, 13478 bool AllowTruncation) const { 13479 ConstantSDNode *Const = isConstOrConstSplat(N, false, AllowTruncation); 13480 if (!Const) 13481 return std::nullopt; 13482 13483 const APInt &CVal = Const->getAPIntValue(); 13484 switch (TLI->getBooleanContents(N.getValueType())) { 13485 case TargetLowering::ZeroOrOneBooleanContent: 13486 if (CVal.isOne()) 13487 return true; 13488 if (CVal.isZero()) 13489 return false; 13490 return std::nullopt; 13491 case TargetLowering::ZeroOrNegativeOneBooleanContent: 13492 if (CVal.isAllOnes()) 13493 return true; 13494 if (CVal.isZero()) 13495 return false; 13496 return std::nullopt; 13497 case TargetLowering::UndefinedBooleanContent: 13498 return CVal[0]; 13499 } 13500 llvm_unreachable("Unknown BooleanContent enum"); 13501 } 13502 13503 void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { 13504 assert(!Node->OperandList && "Node already has operands"); 13505 assert(SDNode::getMaxNumOperands() >= Vals.size() && 13506 "too many operands to fit into SDNode"); 13507 SDUse *Ops = OperandRecycler.allocate( 13508 ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); 13509 13510 bool IsDivergent = false; 13511 for (unsigned I = 0; I != Vals.size(); ++I) { 13512 Ops[I].setUser(Node); 13513 Ops[I].setInitial(Vals[I]); 13514 EVT VT = Ops[I].getValueType(); 13515 13516 // Skip Chain. It does not carry divergence. 13517 if (VT != MVT::Other && 13518 (VT != MVT::Glue || gluePropagatesDivergence(Ops[I].getNode())) && 13519 Ops[I].getNode()->isDivergent()) { 13520 IsDivergent = true; 13521 } 13522 } 13523 Node->NumOperands = Vals.size(); 13524 Node->OperandList = Ops; 13525 if (!TLI->isSDNodeAlwaysUniform(Node)) { 13526 IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA); 13527 Node->SDNodeBits.IsDivergent = IsDivergent; 13528 } 13529 checkForCycles(Node); 13530 } 13531 13532 SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, 13533 SmallVectorImpl<SDValue> &Vals) { 13534 size_t Limit = SDNode::getMaxNumOperands(); 13535 while (Vals.size() > Limit) { 13536 unsigned SliceIdx = Vals.size() - Limit; 13537 auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); 13538 SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); 13539 Vals.erase(Vals.begin() + SliceIdx, Vals.end()); 13540 Vals.emplace_back(NewTF); 13541 } 13542 return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); 13543 } 13544 13545 SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, 13546 EVT VT, SDNodeFlags Flags) { 13547 switch (Opcode) { 13548 default: 13549 return SDValue(); 13550 case ISD::ADD: 13551 case ISD::OR: 13552 case ISD::XOR: 13553 case ISD::UMAX: 13554 return getConstant(0, DL, VT); 13555 case ISD::MUL: 13556 return getConstant(1, DL, VT); 13557 case ISD::AND: 13558 case ISD::UMIN: 13559 return getAllOnesConstant(DL, VT); 13560 case ISD::SMAX: 13561 return getConstant(APInt::getSignedMinValue(VT.getSizeInBits()), DL, VT); 13562 case ISD::SMIN: 13563 return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT); 13564 case ISD::FADD: 13565 // If flags allow, prefer positive zero since it's generally cheaper 13566 // to materialize on most targets. 13567 return getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT); 13568 case ISD::FMUL: 13569 return getConstantFP(1.0, DL, VT); 13570 case ISD::FMINNUM: 13571 case ISD::FMAXNUM: { 13572 // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. 13573 const fltSemantics &Semantics = VT.getFltSemantics(); 13574 APFloat NeutralAF = !Flags.hasNoNaNs() ? APFloat::getQNaN(Semantics) : 13575 !Flags.hasNoInfs() ? APFloat::getInf(Semantics) : 13576 APFloat::getLargest(Semantics); 13577 if (Opcode == ISD::FMAXNUM) 13578 NeutralAF.changeSign(); 13579 13580 return getConstantFP(NeutralAF, DL, VT); 13581 } 13582 case ISD::FMINIMUM: 13583 case ISD::FMAXIMUM: { 13584 // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF. 13585 const fltSemantics &Semantics = VT.getFltSemantics(); 13586 APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics) 13587 : APFloat::getLargest(Semantics); 13588 if (Opcode == ISD::FMAXIMUM) 13589 NeutralAF.changeSign(); 13590 13591 return getConstantFP(NeutralAF, DL, VT); 13592 } 13593 13594 } 13595 } 13596 13597 /// Helper used to make a call to a library function that has one argument of 13598 /// pointer type. 13599 /// 13600 /// Such functions include 'fegetmode', 'fesetenv' and some others, which are 13601 /// used to get or set floating-point state. They have one argument of pointer 13602 /// type, which points to the memory region containing bits of the 13603 /// floating-point state. The value returned by such function is ignored in the 13604 /// created call. 13605 /// 13606 /// \param LibFunc Reference to library function (value of RTLIB::Libcall). 13607 /// \param Ptr Pointer used to save/load state. 13608 /// \param InChain Ingoing token chain. 13609 /// \returns Outgoing chain token. 13610 SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, 13611 SDValue InChain, 13612 const SDLoc &DLoc) { 13613 assert(InChain.getValueType() == MVT::Other && "Expected token chain"); 13614 TargetLowering::ArgListTy Args; 13615 TargetLowering::ArgListEntry Entry; 13616 Entry.Node = Ptr; 13617 Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext()); 13618 Args.push_back(Entry); 13619 RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc); 13620 SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC), 13621 TLI->getPointerTy(getDataLayout())); 13622 TargetLowering::CallLoweringInfo CLI(*this); 13623 CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee( 13624 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee, 13625 std::move(Args)); 13626 return TLI->LowerCallTo(CLI).second; 13627 } 13628 13629 void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { 13630 assert(From && To && "Invalid SDNode; empty source SDValue?"); 13631 auto I = SDEI.find(From); 13632 if (I == SDEI.end()) 13633 return; 13634 13635 // Use of operator[] on the DenseMap may cause an insertion, which invalidates 13636 // the iterator, hence the need to make a copy to prevent a use-after-free. 13637 NodeExtraInfo NEI = I->second; 13638 if (LLVM_LIKELY(!NEI.PCSections)) { 13639 // No deep copy required for the types of extra info set. 13640 // 13641 // FIXME: Investigate if other types of extra info also need deep copy. This 13642 // depends on the types of nodes they can be attached to: if some extra info 13643 // is only ever attached to nodes where a replacement To node is always the 13644 // node where later use and propagation of the extra info has the intended 13645 // semantics, no deep copy is required. 13646 SDEI[To] = std::move(NEI); 13647 return; 13648 } 13649 13650 // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced 13651 // through the replacement of From with To. Otherwise, replacements of a node 13652 // (From) with more complex nodes (To and its operands) may result in lost 13653 // extra info where the root node (To) is insignificant in further propagating 13654 // and using extra info when further lowering to MIR. 13655 // 13656 // In the first step pre-populate the visited set with the nodes reachable 13657 // from the old From node. This avoids copying NodeExtraInfo to parts of the 13658 // DAG that is not new and should be left untouched. 13659 SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom. 13660 DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From. 13661 auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) { 13662 if (MaxDepth == 0) { 13663 // Remember this node in case we need to increase MaxDepth and continue 13664 // populating FromReach from this node. 13665 Leafs.emplace_back(N); 13666 return; 13667 } 13668 if (!FromReach.insert(N).second) 13669 return; 13670 for (const SDValue &Op : N->op_values()) 13671 Self(Self, Op.getNode(), MaxDepth - 1); 13672 }; 13673 13674 // Copy extra info to To and all its transitive operands (that are new). 13675 SmallPtrSet<const SDNode *, 8> Visited; 13676 auto DeepCopyTo = [&](auto &&Self, const SDNode *N) { 13677 if (FromReach.contains(N)) 13678 return true; 13679 if (!Visited.insert(N).second) 13680 return true; 13681 if (getEntryNode().getNode() == N) 13682 return false; 13683 for (const SDValue &Op : N->op_values()) { 13684 if (!Self(Self, Op.getNode())) 13685 return false; 13686 } 13687 // Copy only if entry node was not reached. 13688 SDEI[N] = NEI; 13689 return true; 13690 }; 13691 13692 // We first try with a lower MaxDepth, assuming that the path to common 13693 // operands between From and To is relatively short. This significantly 13694 // improves performance in the common case. The initial MaxDepth is big 13695 // enough to avoid retry in the common case; the last MaxDepth is large 13696 // enough to avoid having to use the fallback below (and protects from 13697 // potential stack exhaustion from recursion). 13698 for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024; 13699 PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) { 13700 // StartFrom is the previous (or initial) set of leafs reachable at the 13701 // previous maximum depth. 13702 SmallVector<const SDNode *> StartFrom; 13703 std::swap(StartFrom, Leafs); 13704 for (const SDNode *N : StartFrom) 13705 VisitFrom(VisitFrom, N, MaxDepth - PrevDepth); 13706 if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To))) 13707 return; 13708 // This should happen very rarely (reached the entry node). 13709 LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n"); 13710 assert(!Leafs.empty()); 13711 } 13712 13713 // This should not happen - but if it did, that means the subgraph reachable 13714 // from From has depth greater or equal to maximum MaxDepth, and VisitFrom() 13715 // could not visit all reachable common operands. Consequently, we were able 13716 // to reach the entry node. 13717 errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n"; 13718 assert(false && "From subgraph too complex - increase max. MaxDepth?"); 13719 // Best-effort fallback if assertions disabled. 13720 SDEI[To] = std::move(NEI); 13721 } 13722 13723 #ifndef NDEBUG 13724 static void checkForCyclesHelper(const SDNode *N, 13725 SmallPtrSetImpl<const SDNode*> &Visited, 13726 SmallPtrSetImpl<const SDNode*> &Checked, 13727 const llvm::SelectionDAG *DAG) { 13728 // If this node has already been checked, don't check it again. 13729 if (Checked.count(N)) 13730 return; 13731 13732 // If a node has already been visited on this depth-first walk, reject it as 13733 // a cycle. 13734 if (!Visited.insert(N).second) { 13735 errs() << "Detected cycle in SelectionDAG\n"; 13736 dbgs() << "Offending node:\n"; 13737 N->dumprFull(DAG); dbgs() << "\n"; 13738 abort(); 13739 } 13740 13741 for (const SDValue &Op : N->op_values()) 13742 checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG); 13743 13744 Checked.insert(N); 13745 Visited.erase(N); 13746 } 13747 #endif 13748 13749 void llvm::checkForCycles(const llvm::SDNode *N, 13750 const llvm::SelectionDAG *DAG, 13751 bool force) { 13752 #ifndef NDEBUG 13753 bool check = force; 13754 #ifdef EXPENSIVE_CHECKS 13755 check = true; 13756 #endif // EXPENSIVE_CHECKS 13757 if (check) { 13758 assert(N && "Checking nonexistent SDNode"); 13759 SmallPtrSet<const SDNode*, 32> visited; 13760 SmallPtrSet<const SDNode*, 32> checked; 13761 checkForCyclesHelper(N, visited, checked, DAG); 13762 } 13763 #endif // !NDEBUG 13764 } 13765 13766 void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { 13767 checkForCycles(DAG->getRoot().getNode(), DAG, force); 13768 } 13769