1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISC-V target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMCTargetDesc.h" 16 #include "MCTargetDesc/RISCVMatInt.h" 17 #include "RISCVISelLowering.h" 18 #include "RISCVInstrInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/IR/IntrinsicsRISCV.h" 21 #include "llvm/Support/Alignment.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" 30 31 static cl::opt<bool> UsePseudoMovImm( 32 "riscv-use-rematerializable-movimm", cl::Hidden, 33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " 34 "constant materialization"), 35 cl::init(false)); 36 37 namespace llvm::RISCV { 38 #define GET_RISCVVSSEGTable_IMPL 39 #define GET_RISCVVLSEGTable_IMPL 40 #define GET_RISCVVLXSEGTable_IMPL 41 #define GET_RISCVVSXSEGTable_IMPL 42 #define GET_RISCVVLETable_IMPL 43 #define GET_RISCVVSETable_IMPL 44 #define GET_RISCVVLXTable_IMPL 45 #define GET_RISCVVSXTable_IMPL 46 #include "RISCVGenSearchableTables.inc" 47 } // namespace llvm::RISCV 48 49 void RISCVDAGToDAGISel::PreprocessISelDAG() { 50 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 51 52 bool MadeChange = false; 53 while (Position != CurDAG->allnodes_begin()) { 54 SDNode *N = &*--Position; 55 if (N->use_empty()) 56 continue; 57 58 SDValue Result; 59 switch (N->getOpcode()) { 60 case ISD::SPLAT_VECTOR: { 61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 63 MVT VT = N->getSimpleValueType(0); 64 unsigned Opc = 65 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 66 SDLoc DL(N); 67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 68 SDValue Src = N->getOperand(0); 69 if (VT.isInteger()) 70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(), 71 N->getOperand(0)); 72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL); 73 break; 74 } 75 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 77 // load. Done after lowering and combining so that we have a chance to 78 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 79 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 80 MVT VT = N->getSimpleValueType(0); 81 SDValue Passthru = N->getOperand(0); 82 SDValue Lo = N->getOperand(1); 83 SDValue Hi = N->getOperand(2); 84 SDValue VL = N->getOperand(3); 85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 87 "Unexpected VTs!"); 88 MachineFunction &MF = CurDAG->getMachineFunction(); 89 SDLoc DL(N); 90 91 // Create temporary stack for each expanding node. 92 SDValue StackSlot = 93 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8)); 94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); 95 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 96 97 SDValue Chain = CurDAG->getEntryNode(); 98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 99 100 SDValue OffsetSlot = 101 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL); 102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 103 Align(8)); 104 105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 106 107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 108 SDValue IntID = 109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 110 SDValue Ops[] = {Chain, 111 IntID, 112 Passthru, 113 StackSlot, 114 CurDAG->getRegister(RISCV::X0, MVT::i64), 115 VL}; 116 117 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 118 MVT::i64, MPI, Align(8), 119 MachineMemOperand::MOLoad); 120 break; 121 } 122 } 123 124 if (Result) { 125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: "); 126 LLVM_DEBUG(N->dump(CurDAG)); 127 LLVM_DEBUG(dbgs() << "\nNew: "); 128 LLVM_DEBUG(Result->dump(CurDAG)); 129 LLVM_DEBUG(dbgs() << "\n"); 130 131 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 132 MadeChange = true; 133 } 134 } 135 136 if (MadeChange) 137 CurDAG->RemoveDeadNodes(); 138 } 139 140 void RISCVDAGToDAGISel::PostprocessISelDAG() { 141 HandleSDNode Dummy(CurDAG->getRoot()); 142 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 143 144 bool MadeChange = false; 145 while (Position != CurDAG->allnodes_begin()) { 146 SDNode *N = &*--Position; 147 // Skip dead nodes and any non-machine opcodes. 148 if (N->use_empty() || !N->isMachineOpcode()) 149 continue; 150 151 MadeChange |= doPeepholeSExtW(N); 152 153 // FIXME: This is here only because the VMerge transform doesn't 154 // know how to handle masked true inputs. Once that has been moved 155 // to post-ISEL, this can be deleted as well. 156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N)); 157 } 158 159 CurDAG->setRoot(Dummy.getValue()); 160 161 MadeChange |= doPeepholeMergeVVMFold(); 162 163 // After we're done with everything else, convert IMPLICIT_DEF 164 // passthru operands to NoRegister. This is required to workaround 165 // an optimization deficiency in MachineCSE. This really should 166 // be merged back into each of the patterns (i.e. there's no good 167 // reason not to go directly to NoReg), but is being done this way 168 // to allow easy backporting. 169 MadeChange |= doPeepholeNoRegPassThru(); 170 171 if (MadeChange) 172 CurDAG->RemoveDeadNodes(); 173 } 174 175 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 176 RISCVMatInt::InstSeq &Seq) { 177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 178 for (const RISCVMatInt::Inst &Inst : Seq) { 179 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT); 180 SDNode *Result = nullptr; 181 switch (Inst.getOpndKind()) { 182 case RISCVMatInt::Imm: 183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 184 break; 185 case RISCVMatInt::RegX0: 186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 187 CurDAG->getRegister(RISCV::X0, VT)); 188 break; 189 case RISCVMatInt::RegReg: 190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 191 break; 192 case RISCVMatInt::RegImm: 193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 194 break; 195 } 196 197 // Only the first instruction has X0 as its source. 198 SrcReg = SDValue(Result, 0); 199 } 200 201 return SrcReg; 202 } 203 204 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 205 int64_t Imm, const RISCVSubtarget &Subtarget) { 206 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 207 208 // Use a rematerializable pseudo instruction for short sequences if enabled. 209 if (Seq.size() == 2 && UsePseudoMovImm) 210 return SDValue( 211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, 212 CurDAG->getSignedTargetConstant(Imm, DL, VT)), 213 0); 214 215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at 216 // worst an LUI+ADDIW. This will require an extra register, but avoids a 217 // constant pool. 218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 219 // low and high 32 bits are the same and bit 31 and 63 are set. 220 if (Seq.size() > 3) { 221 unsigned ShiftAmt, AddOpc; 222 RISCVMatInt::InstSeq SeqLo = 223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { 225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); 226 227 SDValue SLLI = SDValue( 228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, 229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)), 230 0); 231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); 232 } 233 } 234 235 // Otherwise, use the original sequence. 236 return selectImmSeq(CurDAG, DL, VT, Seq); 237 } 238 239 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 240 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 241 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 242 bool IsLoad, MVT *IndexVT) { 243 SDValue Chain = Node->getOperand(0); 244 SDValue Glue; 245 246 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 247 248 if (IsStridedOrIndexed) { 249 Operands.push_back(Node->getOperand(CurOp++)); // Index. 250 if (IndexVT) 251 *IndexVT = Operands.back()->getSimpleValueType(0); 252 } 253 254 if (IsMasked) { 255 // Mask needs to be copied to V0. 256 SDValue Mask = Node->getOperand(CurOp++); 257 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 258 Glue = Chain.getValue(1); 259 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 260 } 261 SDValue VL; 262 selectVLOp(Node->getOperand(CurOp++), VL); 263 Operands.push_back(VL); 264 265 MVT XLenVT = Subtarget->getXLenVT(); 266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 267 Operands.push_back(SEWOp); 268 269 // At the IR layer, all the masked load intrinsics have policy operands, 270 // none of the others do. All have passthru operands. For our pseudos, 271 // all loads have policy operands. 272 if (IsLoad) { 273 uint64_t Policy = RISCVII::MASK_AGNOSTIC; 274 if (IsMasked) 275 Policy = Node->getConstantOperandVal(CurOp++); 276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 277 Operands.push_back(PolicyOp); 278 } 279 280 Operands.push_back(Chain); // Chain. 281 if (Glue) 282 Operands.push_back(Glue); 283 } 284 285 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, 286 bool IsStrided) { 287 SDLoc DL(Node); 288 MVT VT = Node->getSimpleValueType(0); 289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 290 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 291 292 unsigned CurOp = 2; 293 SmallVector<SDValue, 8> Operands; 294 295 Operands.push_back(Node->getOperand(CurOp++)); 296 297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 298 Operands, /*IsLoad=*/true); 299 300 const RISCV::VLSEGPseudo *P = 301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 302 static_cast<unsigned>(LMUL)); 303 MachineSDNode *Load = 304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 305 306 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 307 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 308 309 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 310 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); 311 CurDAG->RemoveDeadNode(Node); 312 } 313 314 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF, 315 bool IsMasked) { 316 SDLoc DL(Node); 317 MVT VT = Node->getSimpleValueType(0); 318 MVT XLenVT = Subtarget->getXLenVT(); 319 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 320 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 321 322 unsigned CurOp = 2; 323 SmallVector<SDValue, 7> Operands; 324 325 Operands.push_back(Node->getOperand(CurOp++)); 326 327 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 328 /*IsStridedOrIndexed*/ false, Operands, 329 /*IsLoad=*/true); 330 331 const RISCV::VLSEGPseudo *P = 332 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 333 Log2SEW, static_cast<unsigned>(LMUL)); 334 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 335 XLenVT, MVT::Other, Operands); 336 337 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 338 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 339 340 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result 341 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL 342 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain 343 CurDAG->RemoveDeadNode(Node); 344 } 345 346 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, 347 bool IsOrdered) { 348 SDLoc DL(Node); 349 MVT VT = Node->getSimpleValueType(0); 350 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 351 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 352 353 unsigned CurOp = 2; 354 SmallVector<SDValue, 8> Operands; 355 356 Operands.push_back(Node->getOperand(CurOp++)); 357 358 MVT IndexVT; 359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 360 /*IsStridedOrIndexed*/ true, Operands, 361 /*IsLoad=*/true, &IndexVT); 362 363 #ifndef NDEBUG 364 // Number of element = RVVBitsPerBlock * LMUL / SEW 365 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW; 366 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL); 367 if (DecodedLMUL.second) 368 ContainedTyNumElts /= DecodedLMUL.first; 369 else 370 ContainedTyNumElts *= DecodedLMUL.first; 371 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() && 372 "Element count mismatch"); 373 #endif 374 375 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 376 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 377 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 378 report_fatal_error("The V extension does not support EEW=64 for index " 379 "values when XLEN=32"); 380 } 381 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 382 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 383 static_cast<unsigned>(IndexLMUL)); 384 MachineSDNode *Load = 385 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 386 387 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 388 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 389 390 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 391 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); 392 CurDAG->RemoveDeadNode(Node); 393 } 394 395 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, 396 bool IsStrided) { 397 SDLoc DL(Node); 398 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 399 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 400 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 401 402 unsigned CurOp = 2; 403 SmallVector<SDValue, 8> Operands; 404 405 Operands.push_back(Node->getOperand(CurOp++)); 406 407 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 408 Operands); 409 410 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 411 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 412 MachineSDNode *Store = 413 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 414 415 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 416 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 417 418 ReplaceNode(Node, Store); 419 } 420 421 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, 422 bool IsOrdered) { 423 SDLoc DL(Node); 424 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 425 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 426 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 427 428 unsigned CurOp = 2; 429 SmallVector<SDValue, 8> Operands; 430 431 Operands.push_back(Node->getOperand(CurOp++)); 432 433 MVT IndexVT; 434 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 435 /*IsStridedOrIndexed*/ true, Operands, 436 /*IsLoad=*/false, &IndexVT); 437 438 #ifndef NDEBUG 439 // Number of element = RVVBitsPerBlock * LMUL / SEW 440 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW; 441 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL); 442 if (DecodedLMUL.second) 443 ContainedTyNumElts /= DecodedLMUL.first; 444 else 445 ContainedTyNumElts *= DecodedLMUL.first; 446 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() && 447 "Element count mismatch"); 448 #endif 449 450 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 451 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 452 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 453 report_fatal_error("The V extension does not support EEW=64 for index " 454 "values when XLEN=32"); 455 } 456 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 457 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 458 static_cast<unsigned>(IndexLMUL)); 459 MachineSDNode *Store = 460 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 461 462 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 463 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 464 465 ReplaceNode(Node, Store); 466 } 467 468 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 469 if (!Subtarget->hasVInstructions()) 470 return; 471 472 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); 473 474 SDLoc DL(Node); 475 MVT XLenVT = Subtarget->getXLenVT(); 476 477 unsigned IntNo = Node->getConstantOperandVal(0); 478 479 assert((IntNo == Intrinsic::riscv_vsetvli || 480 IntNo == Intrinsic::riscv_vsetvlimax) && 481 "Unexpected vsetvli intrinsic"); 482 483 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 484 unsigned Offset = (VLMax ? 1 : 2); 485 486 assert(Node->getNumOperands() == Offset + 2 && 487 "Unexpected number of operands"); 488 489 unsigned SEW = 490 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 491 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 492 Node->getConstantOperandVal(Offset + 1) & 0x7); 493 494 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 495 /*MaskAgnostic*/ true); 496 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 497 498 SDValue VLOperand; 499 unsigned Opcode = RISCV::PseudoVSETVLI; 500 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { 501 if (auto VLEN = Subtarget->getRealVLen()) 502 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) 503 VLMax = true; 504 } 505 if (VLMax || isAllOnesConstant(Node->getOperand(1))) { 506 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 507 Opcode = RISCV::PseudoVSETVLIX0; 508 } else { 509 VLOperand = Node->getOperand(1); 510 511 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 512 uint64_t AVL = C->getZExtValue(); 513 if (isUInt<5>(AVL)) { 514 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 515 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, 516 XLenVT, VLImm, VTypeIOp)); 517 return; 518 } 519 } 520 } 521 522 ReplaceNode(Node, 523 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); 524 } 525 526 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 527 MVT VT = Node->getSimpleValueType(0); 528 unsigned Opcode = Node->getOpcode(); 529 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 530 "Unexpected opcode"); 531 SDLoc DL(Node); 532 533 // For operations of the form (x << C1) op C2, check if we can use 534 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 535 SDValue N0 = Node->getOperand(0); 536 SDValue N1 = Node->getOperand(1); 537 538 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 539 if (!Cst) 540 return false; 541 542 int64_t Val = Cst->getSExtValue(); 543 544 // Check if immediate can already use ANDI/ORI/XORI. 545 if (isInt<12>(Val)) 546 return false; 547 548 SDValue Shift = N0; 549 550 // If Val is simm32 and we have a sext_inreg from i32, then the binop 551 // produces at least 33 sign bits. We can peek through the sext_inreg and use 552 // a SLLIW at the end. 553 bool SignExt = false; 554 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 555 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 556 SignExt = true; 557 Shift = N0.getOperand(0); 558 } 559 560 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 561 return false; 562 563 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 564 if (!ShlCst) 565 return false; 566 567 uint64_t ShAmt = ShlCst->getZExtValue(); 568 569 // Make sure that we don't change the operation by removing bits. 570 // This only matters for OR and XOR, AND is unaffected. 571 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 572 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 573 return false; 574 575 int64_t ShiftedVal = Val >> ShAmt; 576 if (!isInt<12>(ShiftedVal)) 577 return false; 578 579 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 580 if (SignExt && ShAmt >= 32) 581 return false; 582 583 // Ok, we can reorder to get a smaller immediate. 584 unsigned BinOpc; 585 switch (Opcode) { 586 default: llvm_unreachable("Unexpected opcode"); 587 case ISD::AND: BinOpc = RISCV::ANDI; break; 588 case ISD::OR: BinOpc = RISCV::ORI; break; 589 case ISD::XOR: BinOpc = RISCV::XORI; break; 590 } 591 592 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 593 594 SDNode *BinOp = CurDAG->getMachineNode( 595 BinOpc, DL, VT, Shift.getOperand(0), 596 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT)); 597 SDNode *SLLI = 598 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 599 CurDAG->getTargetConstant(ShAmt, DL, VT)); 600 ReplaceNode(Node, SLLI); 601 return true; 602 } 603 604 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { 605 // Only supported with XTHeadBb at the moment. 606 if (!Subtarget->hasVendorXTHeadBb()) 607 return false; 608 609 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 610 if (!N1C) 611 return false; 612 613 SDValue N0 = Node->getOperand(0); 614 if (!N0.hasOneUse()) 615 return false; 616 617 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, 618 MVT VT) { 619 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), 620 CurDAG->getTargetConstant(Msb, DL, VT), 621 CurDAG->getTargetConstant(Lsb, DL, VT)); 622 }; 623 624 SDLoc DL(Node); 625 MVT VT = Node->getSimpleValueType(0); 626 const unsigned RightShAmt = N1C->getZExtValue(); 627 628 // Transform (sra (shl X, C1) C2) with C1 < C2 629 // -> (TH.EXT X, msb, lsb) 630 if (N0.getOpcode() == ISD::SHL) { 631 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 632 if (!N01C) 633 return false; 634 635 const unsigned LeftShAmt = N01C->getZExtValue(); 636 // Make sure that this is a bitfield extraction (i.e., the shift-right 637 // amount can not be less than the left-shift). 638 if (LeftShAmt > RightShAmt) 639 return false; 640 641 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; 642 const unsigned Msb = MsbPlusOne - 1; 643 const unsigned Lsb = RightShAmt - LeftShAmt; 644 645 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 646 ReplaceNode(Node, TH_EXT); 647 return true; 648 } 649 650 // Transform (sra (sext_inreg X, _), C) -> 651 // (TH.EXT X, msb, lsb) 652 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { 653 unsigned ExtSize = 654 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 655 656 // ExtSize of 32 should use sraiw via tablegen pattern. 657 if (ExtSize == 32) 658 return false; 659 660 const unsigned Msb = ExtSize - 1; 661 const unsigned Lsb = RightShAmt; 662 663 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 664 ReplaceNode(Node, TH_EXT); 665 return true; 666 } 667 668 return false; 669 } 670 671 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { 672 // Target does not support indexed loads. 673 if (!Subtarget->hasVendorXTHeadMemIdx()) 674 return false; 675 676 LoadSDNode *Ld = cast<LoadSDNode>(Node); 677 ISD::MemIndexedMode AM = Ld->getAddressingMode(); 678 if (AM == ISD::UNINDEXED) 679 return false; 680 681 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset()); 682 if (!C) 683 return false; 684 685 EVT LoadVT = Ld->getMemoryVT(); 686 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && 687 "Unexpected addressing mode"); 688 bool IsPre = AM == ISD::PRE_INC; 689 bool IsPost = AM == ISD::POST_INC; 690 int64_t Offset = C->getSExtValue(); 691 692 // The constants that can be encoded in the THeadMemIdx instructions 693 // are of the form (sign_extend(imm5) << imm2). 694 unsigned Shift; 695 for (Shift = 0; Shift < 4; Shift++) 696 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 697 break; 698 699 // Constant cannot be encoded. 700 if (Shift == 4) 701 return false; 702 703 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); 704 unsigned Opcode; 705 if (LoadVT == MVT::i8 && IsPre) 706 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; 707 else if (LoadVT == MVT::i8 && IsPost) 708 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; 709 else if (LoadVT == MVT::i16 && IsPre) 710 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; 711 else if (LoadVT == MVT::i16 && IsPost) 712 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; 713 else if (LoadVT == MVT::i32 && IsPre) 714 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; 715 else if (LoadVT == MVT::i32 && IsPost) 716 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; 717 else if (LoadVT == MVT::i64 && IsPre) 718 Opcode = RISCV::TH_LDIB; 719 else if (LoadVT == MVT::i64 && IsPost) 720 Opcode = RISCV::TH_LDIA; 721 else 722 return false; 723 724 EVT Ty = Ld->getOffset().getValueType(); 725 SDValue Ops[] = { 726 Ld->getBasePtr(), 727 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty), 728 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()}; 729 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), 730 Ld->getValueType(1), MVT::Other, Ops); 731 732 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand(); 733 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp}); 734 735 ReplaceNode(Node, New); 736 737 return true; 738 } 739 740 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { 741 if (!Subtarget->hasVInstructions()) 742 return; 743 744 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode"); 745 746 SDLoc DL(Node); 747 unsigned IntNo = Node->getConstantOperandVal(1); 748 749 assert((IntNo == Intrinsic::riscv_sf_vc_x_se || 750 IntNo == Intrinsic::riscv_sf_vc_i_se) && 751 "Unexpected vsetvli intrinsic"); 752 753 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl 754 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6)); 755 SDValue SEWOp = 756 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT()); 757 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3), 758 Node->getOperand(4), Node->getOperand(5), 759 Node->getOperand(8), SEWOp, 760 Node->getOperand(0)}; 761 762 unsigned Opcode; 763 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7)); 764 switch (LMulSDNode->getSExtValue()) { 765 case 5: 766 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8 767 : RISCV::PseudoVC_I_SE_MF8; 768 break; 769 case 6: 770 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4 771 : RISCV::PseudoVC_I_SE_MF4; 772 break; 773 case 7: 774 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2 775 : RISCV::PseudoVC_I_SE_MF2; 776 break; 777 case 0: 778 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1 779 : RISCV::PseudoVC_I_SE_M1; 780 break; 781 case 1: 782 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2 783 : RISCV::PseudoVC_I_SE_M2; 784 break; 785 case 2: 786 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4 787 : RISCV::PseudoVC_I_SE_M4; 788 break; 789 case 3: 790 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8 791 : RISCV::PseudoVC_I_SE_M8; 792 break; 793 } 794 795 ReplaceNode(Node, CurDAG->getMachineNode( 796 Opcode, DL, Node->getSimpleValueType(0), Operands)); 797 } 798 799 static unsigned getSegInstNF(unsigned Intrinsic) { 800 #define INST_NF_CASE(NAME, NF) \ 801 case Intrinsic::riscv_##NAME##NF: \ 802 return NF; 803 #define INST_NF_CASE_MASK(NAME, NF) \ 804 case Intrinsic::riscv_##NAME##NF##_mask: \ 805 return NF; 806 #define INST_NF_CASE_FF(NAME, NF) \ 807 case Intrinsic::riscv_##NAME##NF##ff: \ 808 return NF; 809 #define INST_NF_CASE_FF_MASK(NAME, NF) \ 810 case Intrinsic::riscv_##NAME##NF##ff_mask: \ 811 return NF; 812 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \ 813 MACRO_NAME(NAME, 2) \ 814 MACRO_NAME(NAME, 3) \ 815 MACRO_NAME(NAME, 4) \ 816 MACRO_NAME(NAME, 5) \ 817 MACRO_NAME(NAME, 6) \ 818 MACRO_NAME(NAME, 7) \ 819 MACRO_NAME(NAME, 8) 820 #define INST_ALL_NF_CASE(NAME) \ 821 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \ 822 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME) 823 #define INST_ALL_NF_CASE_WITH_FF(NAME) \ 824 INST_ALL_NF_CASE(NAME) \ 825 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \ 826 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME) 827 switch (Intrinsic) { 828 default: 829 llvm_unreachable("Unexpected segment load/store intrinsic"); 830 INST_ALL_NF_CASE_WITH_FF(vlseg) 831 INST_ALL_NF_CASE(vlsseg) 832 INST_ALL_NF_CASE(vloxseg) 833 INST_ALL_NF_CASE(vluxseg) 834 INST_ALL_NF_CASE(vsseg) 835 INST_ALL_NF_CASE(vssseg) 836 INST_ALL_NF_CASE(vsoxseg) 837 INST_ALL_NF_CASE(vsuxseg) 838 } 839 } 840 841 void RISCVDAGToDAGISel::Select(SDNode *Node) { 842 // If we have a custom node, we have already selected. 843 if (Node->isMachineOpcode()) { 844 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 845 Node->setNodeId(-1); 846 return; 847 } 848 849 // Instruction Selection not handled by the auto-generated tablegen selection 850 // should be handled here. 851 unsigned Opcode = Node->getOpcode(); 852 MVT XLenVT = Subtarget->getXLenVT(); 853 SDLoc DL(Node); 854 MVT VT = Node->getSimpleValueType(0); 855 856 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); 857 858 switch (Opcode) { 859 case ISD::Constant: { 860 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT"); 861 auto *ConstNode = cast<ConstantSDNode>(Node); 862 if (ConstNode->isZero()) { 863 SDValue New = 864 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 865 ReplaceNode(Node, New.getNode()); 866 return; 867 } 868 int64_t Imm = ConstNode->getSExtValue(); 869 // If only the lower 8 bits are used, try to convert this to a simm6 by 870 // sign-extending bit 7. This is neutral without the C extension, and 871 // allows C.LI to be used if C is present. 872 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node)) 873 Imm = SignExtend64<8>(Imm); 874 // If the upper XLen-16 bits are not used, try to convert this to a simm12 875 // by sign extending bit 15. 876 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 877 hasAllHUsers(Node)) 878 Imm = SignExtend64<16>(Imm); 879 // If the upper 32-bits are not used try to convert this into a simm32 by 880 // sign extending bit 32. 881 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 882 Imm = SignExtend64<32>(Imm); 883 884 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode()); 885 return; 886 } 887 case ISD::ConstantFP: { 888 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); 889 890 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; 891 SDValue Imm; 892 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will 893 // create an integer immediate. 894 if (APF.isPosZero() || NegZeroF64) 895 Imm = CurDAG->getRegister(RISCV::X0, XLenVT); 896 else 897 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 898 *Subtarget); 899 900 bool HasZdinx = Subtarget->hasStdExtZdinx(); 901 bool Is64Bit = Subtarget->is64Bit(); 902 unsigned Opc; 903 switch (VT.SimpleTy) { 904 default: 905 llvm_unreachable("Unexpected size"); 906 case MVT::bf16: 907 assert(Subtarget->hasStdExtZfbfmin()); 908 Opc = RISCV::FMV_H_X; 909 break; 910 case MVT::f16: 911 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; 912 break; 913 case MVT::f32: 914 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; 915 break; 916 case MVT::f64: 917 // For RV32, we can't move from a GPR, we need to convert instead. This 918 // should only happen for +0.0 and -0.0. 919 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); 920 if (Is64Bit) 921 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; 922 else 923 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; 924 break; 925 } 926 927 SDNode *Res; 928 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) { 929 Res = 930 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode(); 931 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) { 932 Res = 933 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode(); 934 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) 935 Res = CurDAG->getMachineNode( 936 Opc, DL, VT, Imm, 937 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); 938 else 939 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); 940 941 // For f64 -0.0, we need to insert a fneg.d idiom. 942 if (NegZeroF64) { 943 Opc = RISCV::FSGNJN_D; 944 if (HasZdinx) 945 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; 946 Res = 947 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0)); 948 } 949 950 ReplaceNode(Node, Res); 951 return; 952 } 953 case RISCVISD::BuildGPRPair: 954 case RISCVISD::BuildPairF64: { 955 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx()) 956 break; 957 958 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) && 959 "BuildPairF64 only handled here on rv32i_zdinx"); 960 961 SDValue Ops[] = { 962 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), 963 Node->getOperand(0), 964 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), 965 Node->getOperand(1), 966 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)}; 967 968 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); 969 ReplaceNode(Node, N); 970 return; 971 } 972 case RISCVISD::SplitGPRPair: 973 case RISCVISD::SplitF64: { 974 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) { 975 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) && 976 "SplitF64 only handled here on rv32i_zdinx"); 977 978 if (!SDValue(Node, 0).use_empty()) { 979 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, 980 Node->getValueType(0), 981 Node->getOperand(0)); 982 ReplaceUses(SDValue(Node, 0), Lo); 983 } 984 985 if (!SDValue(Node, 1).use_empty()) { 986 SDValue Hi = CurDAG->getTargetExtractSubreg( 987 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0)); 988 ReplaceUses(SDValue(Node, 1), Hi); 989 } 990 991 CurDAG->RemoveDeadNode(Node); 992 return; 993 } 994 995 assert(Opcode != RISCVISD::SplitGPRPair && 996 "SplitGPRPair should already be handled"); 997 998 if (!Subtarget->hasStdExtZfa()) 999 break; 1000 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && 1001 "Unexpected subtarget"); 1002 1003 // With Zfa, lower to fmv.x.w and fmvh.x.d. 1004 if (!SDValue(Node, 0).use_empty()) { 1005 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, 1006 Node->getOperand(0)); 1007 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0)); 1008 } 1009 if (!SDValue(Node, 1).use_empty()) { 1010 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, 1011 Node->getOperand(0)); 1012 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0)); 1013 } 1014 1015 CurDAG->RemoveDeadNode(Node); 1016 return; 1017 } 1018 case ISD::SHL: { 1019 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1020 if (!N1C) 1021 break; 1022 SDValue N0 = Node->getOperand(0); 1023 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 1024 !isa<ConstantSDNode>(N0.getOperand(1))) 1025 break; 1026 unsigned ShAmt = N1C->getZExtValue(); 1027 uint64_t Mask = N0.getConstantOperandVal(1); 1028 1029 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 1030 unsigned XLen = Subtarget->getXLen(); 1031 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1032 unsigned TrailingZeros = llvm::countr_zero(Mask); 1033 if (TrailingZeros > 0 && LeadingZeros == 32) { 1034 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) 1035 // where C2 has 32 leading zeros and C3 trailing zeros. 1036 SDNode *SRLIW = CurDAG->getMachineNode( 1037 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1038 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1039 SDNode *SLLI = CurDAG->getMachineNode( 1040 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1041 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 1042 ReplaceNode(Node, SLLI); 1043 return; 1044 } 1045 if (TrailingZeros == 0 && LeadingZeros > ShAmt && 1046 XLen - LeadingZeros > 11 && LeadingZeros != 32) { 1047 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C) 1048 // where C2 has C4 leading zeros and no trailing zeros. 1049 // This is profitable if the "and" was to be lowered to 1050 // (srli (slli X, C4), C4) and not (andi X, C2). 1051 // For "LeadingZeros == 32": 1052 // - with Zba it's just (slli.uw X, C) 1053 // - without Zba a tablegen pattern applies the very same 1054 // transform as we would have done here 1055 SDNode *SLLI = CurDAG->getMachineNode( 1056 RISCV::SLLI, DL, VT, N0->getOperand(0), 1057 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1058 SDNode *SRLI = CurDAG->getMachineNode( 1059 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1060 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT)); 1061 ReplaceNode(Node, SRLI); 1062 return; 1063 } 1064 } 1065 break; 1066 } 1067 case ISD::SRL: { 1068 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1069 if (!N1C) 1070 break; 1071 SDValue N0 = Node->getOperand(0); 1072 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1073 break; 1074 unsigned ShAmt = N1C->getZExtValue(); 1075 uint64_t Mask = N0.getConstantOperandVal(1); 1076 1077 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 1078 // 32 leading zeros and C3 trailing zeros. 1079 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 1080 unsigned XLen = Subtarget->getXLen(); 1081 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1082 unsigned TrailingZeros = llvm::countr_zero(Mask); 1083 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 1084 SDNode *SRLIW = CurDAG->getMachineNode( 1085 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1086 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1087 SDNode *SLLI = CurDAG->getMachineNode( 1088 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1089 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 1090 ReplaceNode(Node, SLLI); 1091 return; 1092 } 1093 } 1094 1095 // Optimize (srl (and X, C2), C) -> 1096 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 1097 // Where C2 is a mask with C3 trailing ones. 1098 // Taking into account that the C2 may have had lower bits unset by 1099 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 1100 // This pattern occurs when type legalizing right shifts for types with 1101 // less than XLen bits. 1102 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 1103 if (!isMask_64(Mask)) 1104 break; 1105 unsigned TrailingOnes = llvm::countr_one(Mask); 1106 if (ShAmt >= TrailingOnes) 1107 break; 1108 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. 1109 if (TrailingOnes == 32) { 1110 SDNode *SRLI = CurDAG->getMachineNode( 1111 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, 1112 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1113 ReplaceNode(Node, SRLI); 1114 return; 1115 } 1116 1117 // Only do the remaining transforms if the AND has one use. 1118 if (!N0.hasOneUse()) 1119 break; 1120 1121 // If C2 is (1 << ShAmt) use bexti or th.tst if possible. 1122 if (HasBitTest && ShAmt + 1 == TrailingOnes) { 1123 SDNode *BEXTI = CurDAG->getMachineNode( 1124 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, 1125 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1126 ReplaceNode(Node, BEXTI); 1127 return; 1128 } 1129 1130 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 1131 if (Subtarget->hasVendorXTHeadBb()) { 1132 SDNode *THEXTU = CurDAG->getMachineNode( 1133 RISCV::TH_EXTU, DL, VT, N0->getOperand(0), 1134 CurDAG->getTargetConstant(TrailingOnes - 1, DL, VT), 1135 CurDAG->getTargetConstant(ShAmt, DL, VT)); 1136 ReplaceNode(Node, THEXTU); 1137 return; 1138 } 1139 1140 SDNode *SLLI = 1141 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1142 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1143 SDNode *SRLI = CurDAG->getMachineNode( 1144 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1145 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1146 ReplaceNode(Node, SRLI); 1147 return; 1148 } 1149 case ISD::SRA: { 1150 if (trySignedBitfieldExtract(Node)) 1151 return; 1152 1153 // Optimize (sra (sext_inreg X, i16), C) -> 1154 // (srai (slli X, (XLen-16), (XLen-16) + C) 1155 // And (sra (sext_inreg X, i8), C) -> 1156 // (srai (slli X, (XLen-8), (XLen-8) + C) 1157 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 1158 // This transform matches the code we get without Zbb. The shifts are more 1159 // compressible, and this can help expose CSE opportunities in the sdiv by 1160 // constant optimization. 1161 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1162 if (!N1C) 1163 break; 1164 SDValue N0 = Node->getOperand(0); 1165 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 1166 break; 1167 unsigned ShAmt = N1C->getZExtValue(); 1168 unsigned ExtSize = 1169 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 1170 // ExtSize of 32 should use sraiw via tablegen pattern. 1171 if (ExtSize >= 32 || ShAmt >= ExtSize) 1172 break; 1173 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 1174 SDNode *SLLI = 1175 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1176 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1177 SDNode *SRAI = CurDAG->getMachineNode( 1178 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 1179 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1180 ReplaceNode(Node, SRAI); 1181 return; 1182 } 1183 case ISD::OR: 1184 case ISD::XOR: 1185 if (tryShrinkShlLogicImm(Node)) 1186 return; 1187 1188 break; 1189 case ISD::AND: { 1190 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1191 if (!N1C) 1192 break; 1193 1194 SDValue N0 = Node->getOperand(0); 1195 1196 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, 1197 SDValue X, unsigned Msb, 1198 unsigned Lsb) { 1199 if (!Subtarget->hasVendorXTHeadBb()) 1200 return false; 1201 1202 SDNode *TH_EXTU = CurDAG->getMachineNode( 1203 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), 1204 CurDAG->getTargetConstant(Lsb, DL, VT)); 1205 ReplaceNode(Node, TH_EXTU); 1206 return true; 1207 }; 1208 1209 bool LeftShift = N0.getOpcode() == ISD::SHL; 1210 if (LeftShift || N0.getOpcode() == ISD::SRL) { 1211 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 1212 if (!C) 1213 break; 1214 unsigned C2 = C->getZExtValue(); 1215 unsigned XLen = Subtarget->getXLen(); 1216 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1217 1218 // Keep track of whether this is a c.andi. If we can't use c.andi, the 1219 // shift pair might offer more compression opportunities. 1220 // TODO: We could check for C extension here, but we don't have many lit 1221 // tests with the C extension enabled so not checking gets better 1222 // coverage. 1223 // TODO: What if ANDI faster than shift? 1224 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 1225 1226 uint64_t C1 = N1C->getZExtValue(); 1227 1228 // Clear irrelevant bits in the mask. 1229 if (LeftShift) 1230 C1 &= maskTrailingZeros<uint64_t>(C2); 1231 else 1232 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 1233 1234 // Some transforms should only be done if the shift has a single use or 1235 // the AND would become (srli (slli X, 32), 32) 1236 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 1237 1238 SDValue X = N0.getOperand(0); 1239 1240 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 1241 // with c3 leading zeros. 1242 if (!LeftShift && isMask_64(C1)) { 1243 unsigned Leading = XLen - llvm::bit_width(C1); 1244 if (C2 < Leading) { 1245 // If the number of leading zeros is C2+32 this can be SRLIW. 1246 if (C2 + 32 == Leading) { 1247 SDNode *SRLIW = CurDAG->getMachineNode( 1248 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 1249 ReplaceNode(Node, SRLIW); 1250 return; 1251 } 1252 1253 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 1254 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 1255 // 1256 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 1257 // legalized and goes through DAG combine. 1258 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 1259 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1260 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 1261 SDNode *SRAIW = 1262 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 1263 CurDAG->getTargetConstant(31, DL, VT)); 1264 SDNode *SRLIW = CurDAG->getMachineNode( 1265 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 1266 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 1267 ReplaceNode(Node, SRLIW); 1268 return; 1269 } 1270 1271 // Try to use an unsigned bitfield extract (e.g., th.extu) if 1272 // available. 1273 // Transform (and (srl x, C2), C1) 1274 // -> (<bfextract> x, msb, lsb) 1275 // 1276 // Make sure to keep this below the SRLIW cases, as we always want to 1277 // prefer the more common instruction. 1278 const unsigned Msb = llvm::bit_width(C1) + C2 - 1; 1279 const unsigned Lsb = C2; 1280 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) 1281 return; 1282 1283 // (srli (slli x, c3-c2), c3). 1284 // Skip if we could use (zext.w (sraiw X, C2)). 1285 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 1288 // Also Skip if we can use bexti or th.tst. 1289 Skip |= HasBitTest && Leading == XLen - 1; 1290 if (OneUseOrZExtW && !Skip) { 1291 SDNode *SLLI = CurDAG->getMachineNode( 1292 RISCV::SLLI, DL, VT, X, 1293 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 1294 SDNode *SRLI = CurDAG->getMachineNode( 1295 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1296 CurDAG->getTargetConstant(Leading, DL, VT)); 1297 ReplaceNode(Node, SRLI); 1298 return; 1299 } 1300 } 1301 } 1302 1303 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 1304 // shifted by c2 bits with c3 leading zeros. 1305 if (LeftShift && isShiftedMask_64(C1)) { 1306 unsigned Leading = XLen - llvm::bit_width(C1); 1307 1308 if (C2 + Leading < XLen && 1309 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 1310 // Use slli.uw when possible. 1311 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 1312 SDNode *SLLI_UW = 1313 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 1314 CurDAG->getTargetConstant(C2, DL, VT)); 1315 ReplaceNode(Node, SLLI_UW); 1316 return; 1317 } 1318 1319 // (srli (slli c2+c3), c3) 1320 if (OneUseOrZExtW && !IsCANDI) { 1321 SDNode *SLLI = CurDAG->getMachineNode( 1322 RISCV::SLLI, DL, VT, X, 1323 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 1324 SDNode *SRLI = CurDAG->getMachineNode( 1325 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1326 CurDAG->getTargetConstant(Leading, DL, VT)); 1327 ReplaceNode(Node, SRLI); 1328 return; 1329 } 1330 } 1331 } 1332 1333 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 1334 // shifted mask with c2 leading zeros and c3 trailing zeros. 1335 if (!LeftShift && isShiftedMask_64(C1)) { 1336 unsigned Leading = XLen - llvm::bit_width(C1); 1337 unsigned Trailing = llvm::countr_zero(C1); 1338 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 1339 !IsCANDI) { 1340 unsigned SrliOpc = RISCV::SRLI; 1341 // If the input is zexti32 we should use SRLIW. 1342 if (X.getOpcode() == ISD::AND && 1343 isa<ConstantSDNode>(X.getOperand(1)) && 1344 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 1345 SrliOpc = RISCV::SRLIW; 1346 X = X.getOperand(0); 1347 } 1348 SDNode *SRLI = CurDAG->getMachineNode( 1349 SrliOpc, DL, VT, X, 1350 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1351 SDNode *SLLI = CurDAG->getMachineNode( 1352 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1353 CurDAG->getTargetConstant(Trailing, DL, VT)); 1354 ReplaceNode(Node, SLLI); 1355 return; 1356 } 1357 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 1358 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 1359 OneUseOrZExtW && !IsCANDI) { 1360 SDNode *SRLIW = CurDAG->getMachineNode( 1361 RISCV::SRLIW, DL, VT, X, 1362 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1363 SDNode *SLLI = CurDAG->getMachineNode( 1364 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1365 CurDAG->getTargetConstant(Trailing, DL, VT)); 1366 ReplaceNode(Node, SLLI); 1367 return; 1368 } 1369 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI. 1370 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen && 1371 OneUseOrZExtW && Subtarget->hasStdExtZba()) { 1372 SDNode *SRLI = CurDAG->getMachineNode( 1373 RISCV::SRLI, DL, VT, X, 1374 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1375 SDNode *SLLI_UW = CurDAG->getMachineNode( 1376 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0), 1377 CurDAG->getTargetConstant(Trailing, DL, VT)); 1378 ReplaceNode(Node, SLLI_UW); 1379 return; 1380 } 1381 } 1382 1383 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1384 // shifted mask with no leading zeros and c3 trailing zeros. 1385 if (LeftShift && isShiftedMask_64(C1)) { 1386 unsigned Leading = XLen - llvm::bit_width(C1); 1387 unsigned Trailing = llvm::countr_zero(C1); 1388 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1389 SDNode *SRLI = CurDAG->getMachineNode( 1390 RISCV::SRLI, DL, VT, X, 1391 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1392 SDNode *SLLI = CurDAG->getMachineNode( 1393 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1394 CurDAG->getTargetConstant(Trailing, DL, VT)); 1395 ReplaceNode(Node, SLLI); 1396 return; 1397 } 1398 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1399 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1400 SDNode *SRLIW = CurDAG->getMachineNode( 1401 RISCV::SRLIW, DL, VT, X, 1402 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1403 SDNode *SLLI = CurDAG->getMachineNode( 1404 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1405 CurDAG->getTargetConstant(Trailing, DL, VT)); 1406 ReplaceNode(Node, SLLI); 1407 return; 1408 } 1409 1410 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI. 1411 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW && 1412 Subtarget->hasStdExtZba()) { 1413 SDNode *SRLI = CurDAG->getMachineNode( 1414 RISCV::SRLI, DL, VT, X, 1415 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1416 SDNode *SLLI_UW = CurDAG->getMachineNode( 1417 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0), 1418 CurDAG->getTargetConstant(Trailing, DL, VT)); 1419 ReplaceNode(Node, SLLI_UW); 1420 return; 1421 } 1422 } 1423 } 1424 1425 const uint64_t C1 = N1C->getZExtValue(); 1426 1427 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && 1428 N0.hasOneUse()) { 1429 unsigned C2 = N0.getConstantOperandVal(1); 1430 unsigned XLen = Subtarget->getXLen(); 1431 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1432 1433 SDValue X = N0.getOperand(0); 1434 1435 // Prefer SRAIW + ANDI when possible. 1436 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) && 1437 X.getOpcode() == ISD::SHL && 1438 isa<ConstantSDNode>(X.getOperand(1)) && 1439 X.getConstantOperandVal(1) == 32; 1440 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a 1441 // mask with c3 leading zeros and c2 is larger than c3. 1442 if (isMask_64(C1) && !Skip) { 1443 unsigned Leading = XLen - llvm::bit_width(C1); 1444 if (C2 > Leading) { 1445 SDNode *SRAI = CurDAG->getMachineNode( 1446 RISCV::SRAI, DL, VT, X, 1447 CurDAG->getTargetConstant(C2 - Leading, DL, VT)); 1448 SDNode *SRLI = CurDAG->getMachineNode( 1449 RISCV::SRLI, DL, VT, SDValue(SRAI, 0), 1450 CurDAG->getTargetConstant(Leading, DL, VT)); 1451 ReplaceNode(Node, SRLI); 1452 return; 1453 } 1454 } 1455 1456 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 1457 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can 1458 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4). 1459 if (isShiftedMask_64(C1) && !Skip) { 1460 unsigned Leading = XLen - llvm::bit_width(C1); 1461 unsigned Trailing = llvm::countr_zero(C1); 1462 if (C2 > Leading && Leading > 0 && Trailing > 0) { 1463 SDNode *SRAI = CurDAG->getMachineNode( 1464 RISCV::SRAI, DL, VT, N0.getOperand(0), 1465 CurDAG->getTargetConstant(C2 - Leading, DL, VT)); 1466 SDNode *SRLI = CurDAG->getMachineNode( 1467 RISCV::SRLI, DL, VT, SDValue(SRAI, 0), 1468 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)); 1469 SDNode *SLLI = CurDAG->getMachineNode( 1470 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1471 CurDAG->getTargetConstant(Trailing, DL, VT)); 1472 ReplaceNode(Node, SLLI); 1473 return; 1474 } 1475 } 1476 } 1477 1478 // If C1 masks off the upper bits only (but can't be formed as an 1479 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if 1480 // available. 1481 // Transform (and x, C1) 1482 // -> (<bfextract> x, msb, lsb) 1483 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) { 1484 const unsigned Msb = llvm::bit_width(C1) - 1; 1485 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) 1486 return; 1487 } 1488 1489 if (tryShrinkShlLogicImm(Node)) 1490 return; 1491 1492 break; 1493 } 1494 case ISD::MUL: { 1495 // Special case for calculating (mul (and X, C2), C1) where the full product 1496 // fits in XLen bits. We can shift X left by the number of leading zeros in 1497 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1498 // product has XLen trailing zeros, putting it in the output of MULHU. This 1499 // can avoid materializing a constant in a register for C2. 1500 1501 // RHS should be a constant. 1502 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1503 if (!N1C || !N1C->hasOneUse()) 1504 break; 1505 1506 // LHS should be an AND with constant. 1507 SDValue N0 = Node->getOperand(0); 1508 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1509 break; 1510 1511 uint64_t C2 = N0.getConstantOperandVal(1); 1512 1513 // Constant should be a mask. 1514 if (!isMask_64(C2)) 1515 break; 1516 1517 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has 1518 // multiple users or the constant is a simm12. This prevents inserting a 1519 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely 1520 // make it more costly to materialize. Otherwise, using a SLLI might allow 1521 // it to be compressed. 1522 bool IsANDIOrZExt = 1523 isInt<12>(C2) || 1524 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); 1525 // With XTHeadBb, we can use TH.EXTU. 1526 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); 1527 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1528 break; 1529 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or 1530 // the constant is a simm32. 1531 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); 1532 // With XTHeadBb, we can use TH.EXTU. 1533 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); 1534 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) 1535 break; 1536 1537 // We need to shift left the AND input and C1 by a total of XLen bits. 1538 1539 // How far left do we need to shift the AND input? 1540 unsigned XLen = Subtarget->getXLen(); 1541 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1542 1543 // The constant gets shifted by the remaining amount unless that would 1544 // shift bits out. 1545 uint64_t C1 = N1C->getZExtValue(); 1546 unsigned ConstantShift = XLen - LeadingZeros; 1547 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1548 break; 1549 1550 uint64_t ShiftedC1 = C1 << ConstantShift; 1551 // If this RV32, we need to sign extend the constant. 1552 if (XLen == 32) 1553 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1554 1555 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1556 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode(); 1557 SDNode *SLLI = 1558 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1559 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1560 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1561 SDValue(SLLI, 0), SDValue(Imm, 0)); 1562 ReplaceNode(Node, MULHU); 1563 return; 1564 } 1565 case ISD::LOAD: { 1566 if (tryIndexedLoad(Node)) 1567 return; 1568 1569 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) { 1570 // We match post-incrementing load here 1571 LoadSDNode *Load = cast<LoadSDNode>(Node); 1572 if (Load->getAddressingMode() != ISD::POST_INC) 1573 break; 1574 1575 SDValue Chain = Node->getOperand(0); 1576 SDValue Base = Node->getOperand(1); 1577 SDValue Offset = Node->getOperand(2); 1578 1579 bool Simm12 = false; 1580 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD; 1581 1582 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) { 1583 int ConstantVal = ConstantOffset->getSExtValue(); 1584 Simm12 = isInt<12>(ConstantVal); 1585 if (Simm12) 1586 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset), 1587 Offset.getValueType()); 1588 } 1589 1590 unsigned Opcode = 0; 1591 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) { 1592 case MVT::i8: 1593 if (Simm12 && SignExtend) 1594 Opcode = RISCV::CV_LB_ri_inc; 1595 else if (Simm12 && !SignExtend) 1596 Opcode = RISCV::CV_LBU_ri_inc; 1597 else if (!Simm12 && SignExtend) 1598 Opcode = RISCV::CV_LB_rr_inc; 1599 else 1600 Opcode = RISCV::CV_LBU_rr_inc; 1601 break; 1602 case MVT::i16: 1603 if (Simm12 && SignExtend) 1604 Opcode = RISCV::CV_LH_ri_inc; 1605 else if (Simm12 && !SignExtend) 1606 Opcode = RISCV::CV_LHU_ri_inc; 1607 else if (!Simm12 && SignExtend) 1608 Opcode = RISCV::CV_LH_rr_inc; 1609 else 1610 Opcode = RISCV::CV_LHU_rr_inc; 1611 break; 1612 case MVT::i32: 1613 if (Simm12) 1614 Opcode = RISCV::CV_LW_ri_inc; 1615 else 1616 Opcode = RISCV::CV_LW_rr_inc; 1617 break; 1618 default: 1619 break; 1620 } 1621 if (!Opcode) 1622 break; 1623 1624 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT, 1625 Chain.getSimpleValueType(), Base, 1626 Offset, Chain)); 1627 return; 1628 } 1629 break; 1630 } 1631 case ISD::INTRINSIC_WO_CHAIN: { 1632 unsigned IntNo = Node->getConstantOperandVal(0); 1633 switch (IntNo) { 1634 // By default we do not custom select any intrinsic. 1635 default: 1636 break; 1637 case Intrinsic::riscv_vmsgeu: 1638 case Intrinsic::riscv_vmsge: { 1639 SDValue Src1 = Node->getOperand(1); 1640 SDValue Src2 = Node->getOperand(2); 1641 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1642 bool IsCmpConstant = false; 1643 bool IsCmpMinimum = false; 1644 // Only custom select scalar second operand. 1645 if (Src2.getValueType() != XLenVT) 1646 break; 1647 // Small constants are handled with patterns. 1648 int64_t CVal = 0; 1649 MVT Src1VT = Src1.getSimpleValueType(); 1650 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1651 IsCmpConstant = true; 1652 CVal = C->getSExtValue(); 1653 if (CVal >= -15 && CVal <= 16) { 1654 if (!IsUnsigned || CVal != 0) 1655 break; 1656 IsCmpMinimum = true; 1657 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue( 1658 Src1VT.getScalarSizeInBits()) 1659 .getSExtValue()) { 1660 IsCmpMinimum = true; 1661 } 1662 } 1663 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode; 1664 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1665 default: 1666 llvm_unreachable("Unexpected LMUL!"); 1667 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \ 1668 case RISCVII::VLMUL::lmulenum: \ 1669 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1670 : RISCV::PseudoVMSLT_VX_##suffix; \ 1671 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \ 1672 : RISCV::PseudoVMSGT_VX_##suffix; \ 1673 break; 1674 CASE_VMSLT_OPCODES(LMUL_F8, MF8) 1675 CASE_VMSLT_OPCODES(LMUL_F4, MF4) 1676 CASE_VMSLT_OPCODES(LMUL_F2, MF2) 1677 CASE_VMSLT_OPCODES(LMUL_1, M1) 1678 CASE_VMSLT_OPCODES(LMUL_2, M2) 1679 CASE_VMSLT_OPCODES(LMUL_4, M4) 1680 CASE_VMSLT_OPCODES(LMUL_8, M8) 1681 #undef CASE_VMSLT_OPCODES 1682 } 1683 // Mask operations use the LMUL from the mask type. 1684 switch (RISCVTargetLowering::getLMUL(VT)) { 1685 default: 1686 llvm_unreachable("Unexpected LMUL!"); 1687 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \ 1688 case RISCVII::VLMUL::lmulenum: \ 1689 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1690 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \ 1691 break; 1692 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64) 1693 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32) 1694 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16) 1695 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8) 1696 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4) 1697 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2) 1698 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1) 1699 #undef CASE_VMNAND_VMSET_OPCODES 1700 } 1701 SDValue SEW = CurDAG->getTargetConstant( 1702 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1703 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1704 SDValue VL; 1705 selectVLOp(Node->getOperand(3), VL); 1706 1707 // If vmsge(u) with minimum value, expand it to vmset. 1708 if (IsCmpMinimum) { 1709 ReplaceNode(Node, 1710 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW)); 1711 return; 1712 } 1713 1714 if (IsCmpConstant) { 1715 SDValue Imm = 1716 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget); 1717 1718 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT, 1719 {Src1, Imm, VL, SEW})); 1720 return; 1721 } 1722 1723 // Expand to 1724 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1725 SDValue Cmp = SDValue( 1726 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1727 0); 1728 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1729 {Cmp, Cmp, VL, MaskSEW})); 1730 return; 1731 } 1732 case Intrinsic::riscv_vmsgeu_mask: 1733 case Intrinsic::riscv_vmsge_mask: { 1734 SDValue Src1 = Node->getOperand(2); 1735 SDValue Src2 = Node->getOperand(3); 1736 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1737 bool IsCmpConstant = false; 1738 bool IsCmpMinimum = false; 1739 // Only custom select scalar second operand. 1740 if (Src2.getValueType() != XLenVT) 1741 break; 1742 // Small constants are handled with patterns. 1743 MVT Src1VT = Src1.getSimpleValueType(); 1744 int64_t CVal = 0; 1745 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1746 IsCmpConstant = true; 1747 CVal = C->getSExtValue(); 1748 if (CVal >= -15 && CVal <= 16) { 1749 if (!IsUnsigned || CVal != 0) 1750 break; 1751 IsCmpMinimum = true; 1752 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue( 1753 Src1VT.getScalarSizeInBits()) 1754 .getSExtValue()) { 1755 IsCmpMinimum = true; 1756 } 1757 } 1758 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1759 VMOROpcode, VMSGTMaskOpcode; 1760 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1761 default: 1762 llvm_unreachable("Unexpected LMUL!"); 1763 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \ 1764 case RISCVII::VLMUL::lmulenum: \ 1765 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1766 : RISCV::PseudoVMSLT_VX_##suffix; \ 1767 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1768 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1769 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \ 1770 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \ 1771 break; 1772 CASE_VMSLT_OPCODES(LMUL_F8, MF8) 1773 CASE_VMSLT_OPCODES(LMUL_F4, MF4) 1774 CASE_VMSLT_OPCODES(LMUL_F2, MF2) 1775 CASE_VMSLT_OPCODES(LMUL_1, M1) 1776 CASE_VMSLT_OPCODES(LMUL_2, M2) 1777 CASE_VMSLT_OPCODES(LMUL_4, M4) 1778 CASE_VMSLT_OPCODES(LMUL_8, M8) 1779 #undef CASE_VMSLT_OPCODES 1780 } 1781 // Mask operations use the LMUL from the mask type. 1782 switch (RISCVTargetLowering::getLMUL(VT)) { 1783 default: 1784 llvm_unreachable("Unexpected LMUL!"); 1785 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1786 case RISCVII::VLMUL::lmulenum: \ 1787 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1788 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1789 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1790 break; 1791 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64) 1792 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32) 1793 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16) 1794 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8) 1795 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4) 1796 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2) 1797 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1) 1798 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1799 } 1800 SDValue SEW = CurDAG->getTargetConstant( 1801 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1802 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1803 SDValue VL; 1804 selectVLOp(Node->getOperand(5), VL); 1805 SDValue MaskedOff = Node->getOperand(1); 1806 SDValue Mask = Node->getOperand(4); 1807 1808 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff. 1809 if (IsCmpMinimum) { 1810 // We don't need vmor if the MaskedOff and the Mask are the same 1811 // value. 1812 if (Mask == MaskedOff) { 1813 ReplaceUses(Node, Mask.getNode()); 1814 return; 1815 } 1816 ReplaceNode(Node, 1817 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1818 {Mask, MaskedOff, VL, MaskSEW})); 1819 return; 1820 } 1821 1822 // If the MaskedOff value and the Mask are the same value use 1823 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1824 // This avoids needing to copy v0 to vd before starting the next sequence. 1825 if (Mask == MaskedOff) { 1826 SDValue Cmp = SDValue( 1827 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1828 0); 1829 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1830 {Mask, Cmp, VL, MaskSEW})); 1831 return; 1832 } 1833 1834 // Mask needs to be copied to V0. 1835 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1836 RISCV::V0, Mask, SDValue()); 1837 SDValue Glue = Chain.getValue(1); 1838 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1839 1840 if (IsCmpConstant) { 1841 SDValue Imm = 1842 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget); 1843 1844 ReplaceNode(Node, CurDAG->getMachineNode( 1845 VMSGTMaskOpcode, DL, VT, 1846 {MaskedOff, Src1, Imm, V0, VL, SEW, Glue})); 1847 return; 1848 } 1849 1850 // Otherwise use 1851 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1852 // The result is mask undisturbed. 1853 // We use the same instructions to emulate mask agnostic behavior, because 1854 // the agnostic result can be either undisturbed or all 1. 1855 SDValue Cmp = SDValue( 1856 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1857 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1858 0); 1859 // vmxor.mm vd, vd, v0 is used to update active value. 1860 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1861 {Cmp, Mask, VL, MaskSEW})); 1862 return; 1863 } 1864 case Intrinsic::riscv_vsetvli: 1865 case Intrinsic::riscv_vsetvlimax: 1866 return selectVSETVLI(Node); 1867 } 1868 break; 1869 } 1870 case ISD::INTRINSIC_W_CHAIN: { 1871 unsigned IntNo = Node->getConstantOperandVal(1); 1872 switch (IntNo) { 1873 // By default we do not custom select any intrinsic. 1874 default: 1875 break; 1876 case Intrinsic::riscv_vlseg2: 1877 case Intrinsic::riscv_vlseg3: 1878 case Intrinsic::riscv_vlseg4: 1879 case Intrinsic::riscv_vlseg5: 1880 case Intrinsic::riscv_vlseg6: 1881 case Intrinsic::riscv_vlseg7: 1882 case Intrinsic::riscv_vlseg8: { 1883 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 1884 /*IsStrided*/ false); 1885 return; 1886 } 1887 case Intrinsic::riscv_vlseg2_mask: 1888 case Intrinsic::riscv_vlseg3_mask: 1889 case Intrinsic::riscv_vlseg4_mask: 1890 case Intrinsic::riscv_vlseg5_mask: 1891 case Intrinsic::riscv_vlseg6_mask: 1892 case Intrinsic::riscv_vlseg7_mask: 1893 case Intrinsic::riscv_vlseg8_mask: { 1894 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 1895 /*IsStrided*/ false); 1896 return; 1897 } 1898 case Intrinsic::riscv_vlsseg2: 1899 case Intrinsic::riscv_vlsseg3: 1900 case Intrinsic::riscv_vlsseg4: 1901 case Intrinsic::riscv_vlsseg5: 1902 case Intrinsic::riscv_vlsseg6: 1903 case Intrinsic::riscv_vlsseg7: 1904 case Intrinsic::riscv_vlsseg8: { 1905 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 1906 /*IsStrided*/ true); 1907 return; 1908 } 1909 case Intrinsic::riscv_vlsseg2_mask: 1910 case Intrinsic::riscv_vlsseg3_mask: 1911 case Intrinsic::riscv_vlsseg4_mask: 1912 case Intrinsic::riscv_vlsseg5_mask: 1913 case Intrinsic::riscv_vlsseg6_mask: 1914 case Intrinsic::riscv_vlsseg7_mask: 1915 case Intrinsic::riscv_vlsseg8_mask: { 1916 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 1917 /*IsStrided*/ true); 1918 return; 1919 } 1920 case Intrinsic::riscv_vloxseg2: 1921 case Intrinsic::riscv_vloxseg3: 1922 case Intrinsic::riscv_vloxseg4: 1923 case Intrinsic::riscv_vloxseg5: 1924 case Intrinsic::riscv_vloxseg6: 1925 case Intrinsic::riscv_vloxseg7: 1926 case Intrinsic::riscv_vloxseg8: 1927 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 1928 /*IsOrdered*/ true); 1929 return; 1930 case Intrinsic::riscv_vluxseg2: 1931 case Intrinsic::riscv_vluxseg3: 1932 case Intrinsic::riscv_vluxseg4: 1933 case Intrinsic::riscv_vluxseg5: 1934 case Intrinsic::riscv_vluxseg6: 1935 case Intrinsic::riscv_vluxseg7: 1936 case Intrinsic::riscv_vluxseg8: 1937 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 1938 /*IsOrdered*/ false); 1939 return; 1940 case Intrinsic::riscv_vloxseg2_mask: 1941 case Intrinsic::riscv_vloxseg3_mask: 1942 case Intrinsic::riscv_vloxseg4_mask: 1943 case Intrinsic::riscv_vloxseg5_mask: 1944 case Intrinsic::riscv_vloxseg6_mask: 1945 case Intrinsic::riscv_vloxseg7_mask: 1946 case Intrinsic::riscv_vloxseg8_mask: 1947 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 1948 /*IsOrdered*/ true); 1949 return; 1950 case Intrinsic::riscv_vluxseg2_mask: 1951 case Intrinsic::riscv_vluxseg3_mask: 1952 case Intrinsic::riscv_vluxseg4_mask: 1953 case Intrinsic::riscv_vluxseg5_mask: 1954 case Intrinsic::riscv_vluxseg6_mask: 1955 case Intrinsic::riscv_vluxseg7_mask: 1956 case Intrinsic::riscv_vluxseg8_mask: 1957 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 1958 /*IsOrdered*/ false); 1959 return; 1960 case Intrinsic::riscv_vlseg8ff: 1961 case Intrinsic::riscv_vlseg7ff: 1962 case Intrinsic::riscv_vlseg6ff: 1963 case Intrinsic::riscv_vlseg5ff: 1964 case Intrinsic::riscv_vlseg4ff: 1965 case Intrinsic::riscv_vlseg3ff: 1966 case Intrinsic::riscv_vlseg2ff: { 1967 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false); 1968 return; 1969 } 1970 case Intrinsic::riscv_vlseg8ff_mask: 1971 case Intrinsic::riscv_vlseg7ff_mask: 1972 case Intrinsic::riscv_vlseg6ff_mask: 1973 case Intrinsic::riscv_vlseg5ff_mask: 1974 case Intrinsic::riscv_vlseg4ff_mask: 1975 case Intrinsic::riscv_vlseg3ff_mask: 1976 case Intrinsic::riscv_vlseg2ff_mask: { 1977 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true); 1978 return; 1979 } 1980 case Intrinsic::riscv_vloxei: 1981 case Intrinsic::riscv_vloxei_mask: 1982 case Intrinsic::riscv_vluxei: 1983 case Intrinsic::riscv_vluxei_mask: { 1984 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1985 IntNo == Intrinsic::riscv_vluxei_mask; 1986 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1987 IntNo == Intrinsic::riscv_vloxei_mask; 1988 1989 MVT VT = Node->getSimpleValueType(0); 1990 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1991 1992 unsigned CurOp = 2; 1993 SmallVector<SDValue, 8> Operands; 1994 Operands.push_back(Node->getOperand(CurOp++)); 1995 1996 MVT IndexVT; 1997 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1998 /*IsStridedOrIndexed*/ true, Operands, 1999 /*IsLoad=*/true, &IndexVT); 2000 2001 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 2002 "Element count mismatch"); 2003 2004 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2005 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 2006 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 2007 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 2008 report_fatal_error("The V extension does not support EEW=64 for index " 2009 "values when XLEN=32"); 2010 } 2011 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 2012 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 2013 static_cast<unsigned>(IndexLMUL)); 2014 MachineSDNode *Load = 2015 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2016 2017 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2018 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 2019 2020 ReplaceNode(Node, Load); 2021 return; 2022 } 2023 case Intrinsic::riscv_vlm: 2024 case Intrinsic::riscv_vle: 2025 case Intrinsic::riscv_vle_mask: 2026 case Intrinsic::riscv_vlse: 2027 case Intrinsic::riscv_vlse_mask: { 2028 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 2029 IntNo == Intrinsic::riscv_vlse_mask; 2030 bool IsStrided = 2031 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 2032 2033 MVT VT = Node->getSimpleValueType(0); 2034 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2035 2036 // The riscv_vlm intrinsic are always tail agnostic and no passthru 2037 // operand at the IR level. In pseudos, they have both policy and 2038 // passthru operand. The passthru operand is needed to track the 2039 // "tail undefined" state, and the policy is there just for 2040 // for consistency - it will always be "don't care" for the 2041 // unmasked form. 2042 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 2043 unsigned CurOp = 2; 2044 SmallVector<SDValue, 8> Operands; 2045 if (HasPassthruOperand) 2046 Operands.push_back(Node->getOperand(CurOp++)); 2047 else { 2048 // We eagerly lower to implicit_def (instead of undef), as we 2049 // otherwise fail to select nodes such as: nxv1i1 = undef 2050 SDNode *Passthru = 2051 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 2052 Operands.push_back(SDValue(Passthru, 0)); 2053 } 2054 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 2055 Operands, /*IsLoad=*/true); 2056 2057 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2058 const RISCV::VLEPseudo *P = 2059 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 2060 static_cast<unsigned>(LMUL)); 2061 MachineSDNode *Load = 2062 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2063 2064 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2065 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 2066 2067 ReplaceNode(Node, Load); 2068 return; 2069 } 2070 case Intrinsic::riscv_vleff: 2071 case Intrinsic::riscv_vleff_mask: { 2072 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 2073 2074 MVT VT = Node->getSimpleValueType(0); 2075 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2076 2077 unsigned CurOp = 2; 2078 SmallVector<SDValue, 7> Operands; 2079 Operands.push_back(Node->getOperand(CurOp++)); 2080 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2081 /*IsStridedOrIndexed*/ false, Operands, 2082 /*IsLoad=*/true); 2083 2084 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2085 const RISCV::VLEPseudo *P = 2086 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, 2087 Log2SEW, static_cast<unsigned>(LMUL)); 2088 MachineSDNode *Load = CurDAG->getMachineNode( 2089 P->Pseudo, DL, Node->getVTList(), Operands); 2090 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2091 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 2092 2093 ReplaceNode(Node, Load); 2094 return; 2095 } 2096 } 2097 break; 2098 } 2099 case ISD::INTRINSIC_VOID: { 2100 unsigned IntNo = Node->getConstantOperandVal(1); 2101 switch (IntNo) { 2102 case Intrinsic::riscv_vsseg2: 2103 case Intrinsic::riscv_vsseg3: 2104 case Intrinsic::riscv_vsseg4: 2105 case Intrinsic::riscv_vsseg5: 2106 case Intrinsic::riscv_vsseg6: 2107 case Intrinsic::riscv_vsseg7: 2108 case Intrinsic::riscv_vsseg8: { 2109 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2110 /*IsStrided*/ false); 2111 return; 2112 } 2113 case Intrinsic::riscv_vsseg2_mask: 2114 case Intrinsic::riscv_vsseg3_mask: 2115 case Intrinsic::riscv_vsseg4_mask: 2116 case Intrinsic::riscv_vsseg5_mask: 2117 case Intrinsic::riscv_vsseg6_mask: 2118 case Intrinsic::riscv_vsseg7_mask: 2119 case Intrinsic::riscv_vsseg8_mask: { 2120 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2121 /*IsStrided*/ false); 2122 return; 2123 } 2124 case Intrinsic::riscv_vssseg2: 2125 case Intrinsic::riscv_vssseg3: 2126 case Intrinsic::riscv_vssseg4: 2127 case Intrinsic::riscv_vssseg5: 2128 case Intrinsic::riscv_vssseg6: 2129 case Intrinsic::riscv_vssseg7: 2130 case Intrinsic::riscv_vssseg8: { 2131 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2132 /*IsStrided*/ true); 2133 return; 2134 } 2135 case Intrinsic::riscv_vssseg2_mask: 2136 case Intrinsic::riscv_vssseg3_mask: 2137 case Intrinsic::riscv_vssseg4_mask: 2138 case Intrinsic::riscv_vssseg5_mask: 2139 case Intrinsic::riscv_vssseg6_mask: 2140 case Intrinsic::riscv_vssseg7_mask: 2141 case Intrinsic::riscv_vssseg8_mask: { 2142 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2143 /*IsStrided*/ true); 2144 return; 2145 } 2146 case Intrinsic::riscv_vsoxseg2: 2147 case Intrinsic::riscv_vsoxseg3: 2148 case Intrinsic::riscv_vsoxseg4: 2149 case Intrinsic::riscv_vsoxseg5: 2150 case Intrinsic::riscv_vsoxseg6: 2151 case Intrinsic::riscv_vsoxseg7: 2152 case Intrinsic::riscv_vsoxseg8: 2153 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2154 /*IsOrdered*/ true); 2155 return; 2156 case Intrinsic::riscv_vsuxseg2: 2157 case Intrinsic::riscv_vsuxseg3: 2158 case Intrinsic::riscv_vsuxseg4: 2159 case Intrinsic::riscv_vsuxseg5: 2160 case Intrinsic::riscv_vsuxseg6: 2161 case Intrinsic::riscv_vsuxseg7: 2162 case Intrinsic::riscv_vsuxseg8: 2163 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2164 /*IsOrdered*/ false); 2165 return; 2166 case Intrinsic::riscv_vsoxseg2_mask: 2167 case Intrinsic::riscv_vsoxseg3_mask: 2168 case Intrinsic::riscv_vsoxseg4_mask: 2169 case Intrinsic::riscv_vsoxseg5_mask: 2170 case Intrinsic::riscv_vsoxseg6_mask: 2171 case Intrinsic::riscv_vsoxseg7_mask: 2172 case Intrinsic::riscv_vsoxseg8_mask: 2173 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2174 /*IsOrdered*/ true); 2175 return; 2176 case Intrinsic::riscv_vsuxseg2_mask: 2177 case Intrinsic::riscv_vsuxseg3_mask: 2178 case Intrinsic::riscv_vsuxseg4_mask: 2179 case Intrinsic::riscv_vsuxseg5_mask: 2180 case Intrinsic::riscv_vsuxseg6_mask: 2181 case Intrinsic::riscv_vsuxseg7_mask: 2182 case Intrinsic::riscv_vsuxseg8_mask: 2183 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2184 /*IsOrdered*/ false); 2185 return; 2186 case Intrinsic::riscv_vsoxei: 2187 case Intrinsic::riscv_vsoxei_mask: 2188 case Intrinsic::riscv_vsuxei: 2189 case Intrinsic::riscv_vsuxei_mask: { 2190 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 2191 IntNo == Intrinsic::riscv_vsuxei_mask; 2192 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 2193 IntNo == Intrinsic::riscv_vsoxei_mask; 2194 2195 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 2196 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2197 2198 unsigned CurOp = 2; 2199 SmallVector<SDValue, 8> Operands; 2200 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 2201 2202 MVT IndexVT; 2203 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2204 /*IsStridedOrIndexed*/ true, Operands, 2205 /*IsLoad=*/false, &IndexVT); 2206 2207 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 2208 "Element count mismatch"); 2209 2210 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2211 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 2212 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 2213 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 2214 report_fatal_error("The V extension does not support EEW=64 for index " 2215 "values when XLEN=32"); 2216 } 2217 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 2218 IsMasked, IsOrdered, IndexLog2EEW, 2219 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 2220 MachineSDNode *Store = 2221 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2222 2223 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2224 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 2225 2226 ReplaceNode(Node, Store); 2227 return; 2228 } 2229 case Intrinsic::riscv_vsm: 2230 case Intrinsic::riscv_vse: 2231 case Intrinsic::riscv_vse_mask: 2232 case Intrinsic::riscv_vsse: 2233 case Intrinsic::riscv_vsse_mask: { 2234 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 2235 IntNo == Intrinsic::riscv_vsse_mask; 2236 bool IsStrided = 2237 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 2238 2239 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 2240 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2241 2242 unsigned CurOp = 2; 2243 SmallVector<SDValue, 8> Operands; 2244 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 2245 2246 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 2247 Operands); 2248 2249 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2250 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 2251 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 2252 MachineSDNode *Store = 2253 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2254 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2255 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 2256 2257 ReplaceNode(Node, Store); 2258 return; 2259 } 2260 case Intrinsic::riscv_sf_vc_x_se: 2261 case Intrinsic::riscv_sf_vc_i_se: 2262 selectSF_VC_X_SE(Node); 2263 return; 2264 } 2265 break; 2266 } 2267 case ISD::BITCAST: { 2268 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 2269 // Just drop bitcasts between vectors if both are fixed or both are 2270 // scalable. 2271 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 2272 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 2273 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 2274 CurDAG->RemoveDeadNode(Node); 2275 return; 2276 } 2277 break; 2278 } 2279 case ISD::INSERT_SUBVECTOR: 2280 case RISCVISD::TUPLE_INSERT: { 2281 SDValue V = Node->getOperand(0); 2282 SDValue SubV = Node->getOperand(1); 2283 SDLoc DL(SubV); 2284 auto Idx = Node->getConstantOperandVal(2); 2285 MVT SubVecVT = SubV.getSimpleValueType(); 2286 2287 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2288 MVT SubVecContainerVT = SubVecVT; 2289 // Establish the correct scalable-vector types for any fixed-length type. 2290 if (SubVecVT.isFixedLengthVector()) { 2291 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 2292 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); 2293 [[maybe_unused]] bool ExactlyVecRegSized = 2294 Subtarget->expandVScale(SubVecVT.getSizeInBits()) 2295 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize)); 2296 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits()) 2297 .getKnownMinValue())); 2298 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef())); 2299 } 2300 MVT ContainerVT = VT; 2301 if (VT.isFixedLengthVector()) 2302 ContainerVT = TLI.getContainerForFixedLengthVector(VT); 2303 2304 const auto *TRI = Subtarget->getRegisterInfo(); 2305 unsigned SubRegIdx; 2306 std::tie(SubRegIdx, Idx) = 2307 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2308 ContainerVT, SubVecContainerVT, Idx, TRI); 2309 2310 // If the Idx hasn't been completely eliminated then this is a subvector 2311 // insert which doesn't naturally align to a vector register. These must 2312 // be handled using instructions to manipulate the vector registers. 2313 if (Idx != 0) 2314 break; 2315 2316 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 2317 [[maybe_unused]] bool IsSubVecPartReg = 2318 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 2319 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 2320 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 2321 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg || 2322 V.isUndef()) && 2323 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 2324 "the subvector is smaller than a full-sized register"); 2325 2326 // If we haven't set a SubRegIdx, then we must be going between 2327 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 2328 if (SubRegIdx == RISCV::NoSubRegister) { 2329 unsigned InRegClassID = 2330 RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT); 2331 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2332 InRegClassID && 2333 "Unexpected subvector extraction"); 2334 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2335 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2336 DL, VT, SubV, RC); 2337 ReplaceNode(Node, NewNode); 2338 return; 2339 } 2340 2341 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 2342 ReplaceNode(Node, Insert.getNode()); 2343 return; 2344 } 2345 case ISD::EXTRACT_SUBVECTOR: 2346 case RISCVISD::TUPLE_EXTRACT: { 2347 SDValue V = Node->getOperand(0); 2348 auto Idx = Node->getConstantOperandVal(1); 2349 MVT InVT = V.getSimpleValueType(); 2350 SDLoc DL(V); 2351 2352 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2353 MVT SubVecContainerVT = VT; 2354 // Establish the correct scalable-vector types for any fixed-length type. 2355 if (VT.isFixedLengthVector()) { 2356 assert(Idx == 0); 2357 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 2358 } 2359 if (InVT.isFixedLengthVector()) 2360 InVT = TLI.getContainerForFixedLengthVector(InVT); 2361 2362 const auto *TRI = Subtarget->getRegisterInfo(); 2363 unsigned SubRegIdx; 2364 std::tie(SubRegIdx, Idx) = 2365 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2366 InVT, SubVecContainerVT, Idx, TRI); 2367 2368 // If the Idx hasn't been completely eliminated then this is a subvector 2369 // extract which doesn't naturally align to a vector register. These must 2370 // be handled using instructions to manipulate the vector registers. 2371 if (Idx != 0) 2372 break; 2373 2374 // If we haven't set a SubRegIdx, then we must be going between 2375 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 2376 if (SubRegIdx == RISCV::NoSubRegister) { 2377 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 2378 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2379 InRegClassID && 2380 "Unexpected subvector extraction"); 2381 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2382 SDNode *NewNode = 2383 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 2384 ReplaceNode(Node, NewNode); 2385 return; 2386 } 2387 2388 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 2389 ReplaceNode(Node, Extract.getNode()); 2390 return; 2391 } 2392 case RISCVISD::VMV_S_X_VL: 2393 case RISCVISD::VFMV_S_F_VL: 2394 case RISCVISD::VMV_V_X_VL: 2395 case RISCVISD::VFMV_V_F_VL: { 2396 // Try to match splat of a scalar load to a strided load with stride of x0. 2397 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 2398 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 2399 if (!Node->getOperand(0).isUndef()) 2400 break; 2401 SDValue Src = Node->getOperand(1); 2402 auto *Ld = dyn_cast<LoadSDNode>(Src); 2403 // Can't fold load update node because the second 2404 // output is used so that load update node can't be removed. 2405 if (!Ld || Ld->isIndexed()) 2406 break; 2407 EVT MemVT = Ld->getMemoryVT(); 2408 // The memory VT should be the same size as the element type. 2409 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 2410 break; 2411 if (!IsProfitableToFold(Src, Node, Node) || 2412 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 2413 break; 2414 2415 SDValue VL; 2416 if (IsScalarMove) { 2417 // We could deal with more VL if we update the VSETVLI insert pass to 2418 // avoid introducing more VSETVLI. 2419 if (!isOneConstant(Node->getOperand(2))) 2420 break; 2421 selectVLOp(Node->getOperand(2), VL); 2422 } else 2423 selectVLOp(Node->getOperand(2), VL); 2424 2425 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2426 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 2427 2428 // If VL=1, then we don't need to do a strided load and can just do a 2429 // regular load. 2430 bool IsStrided = !isOneConstant(VL); 2431 2432 // Only do a strided load if we have optimized zero-stride vector load. 2433 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) 2434 break; 2435 2436 SmallVector<SDValue> Operands = { 2437 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0), 2438 Ld->getBasePtr()}; 2439 if (IsStrided) 2440 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); 2441 uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; 2442 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 2443 Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); 2444 2445 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2446 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 2447 /*IsMasked*/ false, IsStrided, /*FF*/ false, 2448 Log2SEW, static_cast<unsigned>(LMUL)); 2449 MachineSDNode *Load = 2450 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 2451 // Update the chain. 2452 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 2453 // Record the mem-refs 2454 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 2455 // Replace the splat with the vlse. 2456 ReplaceNode(Node, Load); 2457 return; 2458 } 2459 case ISD::PREFETCH: 2460 unsigned Locality = Node->getConstantOperandVal(3); 2461 if (Locality > 2) 2462 break; 2463 2464 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) { 2465 MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); 2466 MMO->setFlags(MachineMemOperand::MONonTemporal); 2467 2468 int NontemporalLevel = 0; 2469 switch (Locality) { 2470 case 0: 2471 NontemporalLevel = 3; // NTL.ALL 2472 break; 2473 case 1: 2474 NontemporalLevel = 1; // NTL.PALL 2475 break; 2476 case 2: 2477 NontemporalLevel = 0; // NTL.P1 2478 break; 2479 default: 2480 llvm_unreachable("unexpected locality value."); 2481 } 2482 2483 if (NontemporalLevel & 0b1) 2484 MMO->setFlags(MONontemporalBit0); 2485 if (NontemporalLevel & 0b10) 2486 MMO->setFlags(MONontemporalBit1); 2487 } 2488 break; 2489 } 2490 2491 // Select the default instruction. 2492 SelectCode(Node); 2493 } 2494 2495 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 2496 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 2497 std::vector<SDValue> &OutOps) { 2498 // Always produce a register and immediate operand, as expected by 2499 // RISCVAsmPrinter::PrintAsmMemoryOperand. 2500 switch (ConstraintID) { 2501 case InlineAsm::ConstraintCode::o: 2502 case InlineAsm::ConstraintCode::m: { 2503 SDValue Op0, Op1; 2504 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1); 2505 assert(Found && "SelectAddrRegImm should always succeed"); 2506 OutOps.push_back(Op0); 2507 OutOps.push_back(Op1); 2508 return false; 2509 } 2510 case InlineAsm::ConstraintCode::A: 2511 OutOps.push_back(Op); 2512 OutOps.push_back( 2513 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); 2514 return false; 2515 default: 2516 report_fatal_error("Unexpected asm memory constraint " + 2517 InlineAsm::getMemConstraintName(ConstraintID)); 2518 } 2519 2520 return true; 2521 } 2522 2523 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 2524 SDValue &Offset) { 2525 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 2526 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 2527 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 2528 return true; 2529 } 2530 2531 return false; 2532 } 2533 2534 // Fold constant addresses. 2535 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 2536 const MVT VT, const RISCVSubtarget *Subtarget, 2537 SDValue Addr, SDValue &Base, SDValue &Offset, 2538 bool IsPrefetch = false, 2539 bool IsRV32Zdinx = false) { 2540 if (!isa<ConstantSDNode>(Addr)) 2541 return false; 2542 2543 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 2544 2545 // If the constant is a simm12, we can fold the whole constant and use X0 as 2546 // the base. If the constant can be materialized with LUI+simm12, use LUI as 2547 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 2548 int64_t Lo12 = SignExtend64<12>(CVal); 2549 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 2550 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 2551 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2552 return false; 2553 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) 2554 return false; 2555 2556 if (Hi) { 2557 int64_t Hi20 = (Hi >> 12) & 0xfffff; 2558 Base = SDValue( 2559 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 2560 CurDAG->getTargetConstant(Hi20, DL, VT)), 2561 0); 2562 } else { 2563 Base = CurDAG->getRegister(RISCV::X0, VT); 2564 } 2565 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT); 2566 return true; 2567 } 2568 2569 // Ask how constant materialization would handle this constant. 2570 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); 2571 2572 // If the last instruction would be an ADDI, we can fold its immediate and 2573 // emit the rest of the sequence as the base. 2574 if (Seq.back().getOpcode() != RISCV::ADDI) 2575 return false; 2576 Lo12 = Seq.back().getImm(); 2577 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2578 return false; 2579 if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) 2580 return false; 2581 2582 // Drop the last instruction. 2583 Seq.pop_back(); 2584 assert(!Seq.empty() && "Expected more instructions in sequence"); 2585 2586 Base = selectImmSeq(CurDAG, DL, VT, Seq); 2587 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT); 2588 return true; 2589 } 2590 2591 // Is this ADD instruction only used as the base pointer of scalar loads and 2592 // stores? 2593 static bool isWorthFoldingAdd(SDValue Add) { 2594 for (auto *User : Add->users()) { 2595 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE && 2596 User->getOpcode() != ISD::ATOMIC_LOAD && 2597 User->getOpcode() != ISD::ATOMIC_STORE) 2598 return false; 2599 EVT VT = cast<MemSDNode>(User)->getMemoryVT(); 2600 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 2601 VT != MVT::f64) 2602 return false; 2603 // Don't allow stores of the value. It must be used as the address. 2604 if (User->getOpcode() == ISD::STORE && 2605 cast<StoreSDNode>(User)->getValue() == Add) 2606 return false; 2607 if (User->getOpcode() == ISD::ATOMIC_STORE && 2608 cast<AtomicSDNode>(User)->getVal() == Add) 2609 return false; 2610 } 2611 2612 return true; 2613 } 2614 2615 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, 2616 unsigned MaxShiftAmount, 2617 SDValue &Base, SDValue &Index, 2618 SDValue &Scale) { 2619 EVT VT = Addr.getSimpleValueType(); 2620 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, 2621 SDValue &Shift) { 2622 uint64_t ShiftAmt = 0; 2623 Index = N; 2624 2625 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) { 2626 // Only match shifts by a value in range [0, MaxShiftAmount]. 2627 if (N.getConstantOperandVal(1) <= MaxShiftAmount) { 2628 Index = N.getOperand(0); 2629 ShiftAmt = N.getConstantOperandVal(1); 2630 } 2631 } 2632 2633 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); 2634 return ShiftAmt != 0; 2635 }; 2636 2637 if (Addr.getOpcode() == ISD::ADD) { 2638 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 2639 SDValue AddrB = Addr.getOperand(0); 2640 if (AddrB.getOpcode() == ISD::ADD && 2641 UnwrapShl(AddrB.getOperand(0), Index, Scale) && 2642 !isa<ConstantSDNode>(AddrB.getOperand(1)) && 2643 isInt<12>(C1->getSExtValue())) { 2644 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) 2645 SDValue C1Val = 2646 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); 2647 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, 2648 AddrB.getOperand(1), C1Val), 2649 0); 2650 return true; 2651 } 2652 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { 2653 Base = Addr.getOperand(1); 2654 return true; 2655 } else { 2656 UnwrapShl(Addr.getOperand(1), Index, Scale); 2657 Base = Addr.getOperand(0); 2658 return true; 2659 } 2660 } else if (UnwrapShl(Addr, Index, Scale)) { 2661 EVT VT = Addr.getValueType(); 2662 Base = CurDAG->getRegister(RISCV::X0, VT); 2663 return true; 2664 } 2665 2666 return false; 2667 } 2668 2669 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 2670 SDValue &Offset, bool IsRV32Zdinx) { 2671 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2672 return true; 2673 2674 SDLoc DL(Addr); 2675 MVT VT = Addr.getSimpleValueType(); 2676 2677 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 2678 // If this is non RV32Zdinx we can always fold. 2679 if (!IsRV32Zdinx) { 2680 Base = Addr.getOperand(0); 2681 Offset = Addr.getOperand(1); 2682 return true; 2683 } 2684 2685 // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4 2686 // to the offset when we expand in RISCVExpandPseudoInsts. 2687 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) { 2688 const DataLayout &DL = CurDAG->getDataLayout(); 2689 Align Alignment = commonAlignment( 2690 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2691 if (Alignment > 4) { 2692 Base = Addr.getOperand(0); 2693 Offset = Addr.getOperand(1); 2694 return true; 2695 } 2696 } 2697 if (auto *CP = dyn_cast<ConstantPoolSDNode>(Addr.getOperand(1))) { 2698 Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); 2699 if (Alignment > 4) { 2700 Base = Addr.getOperand(0); 2701 Offset = Addr.getOperand(1); 2702 return true; 2703 } 2704 } 2705 } 2706 2707 int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0; 2708 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2709 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2710 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { 2711 Base = Addr.getOperand(0); 2712 if (Base.getOpcode() == RISCVISD::ADD_LO) { 2713 SDValue LoOperand = Base.getOperand(1); 2714 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 2715 // If the Lo in (ADD_LO hi, lo) is a global variable's address 2716 // (its low part, really), then we can rely on the alignment of that 2717 // variable to provide a margin of safety before low part can overflow 2718 // the 12 bits of the load/store offset. Check if CVal falls within 2719 // that margin; if so (low part + CVal) can't overflow. 2720 const DataLayout &DL = CurDAG->getDataLayout(); 2721 Align Alignment = commonAlignment( 2722 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2723 if ((CVal == 0 || Alignment > CVal) && 2724 (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) { 2725 int64_t CombinedOffset = CVal + GA->getOffset(); 2726 Base = Base.getOperand(0); 2727 Offset = CurDAG->getTargetGlobalAddress( 2728 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2729 CombinedOffset, GA->getTargetFlags()); 2730 return true; 2731 } 2732 } 2733 } 2734 2735 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2736 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2737 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); 2738 return true; 2739 } 2740 } 2741 2742 // Handle ADD with large immediates. 2743 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2744 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2745 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && 2746 "simm12 not already handled?"); 2747 2748 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2749 // an ADDI for part of the offset and fold the rest into the load/store. 2750 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2751 if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) { 2752 int64_t Adj = CVal < 0 ? -2048 : 2047; 2753 Base = SDValue( 2754 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2755 CurDAG->getSignedTargetConstant(Adj, DL, VT)), 2756 0); 2757 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT); 2758 return true; 2759 } 2760 2761 // For larger immediates, we might be able to save one instruction from 2762 // constant materialization by folding the Lo12 bits of the immediate into 2763 // the address. We should only do this if the ADD is only used by loads and 2764 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2765 // separately with the full materialized immediate creating extra 2766 // instructions. 2767 if (isWorthFoldingAdd(Addr) && 2768 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2769 Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) { 2770 // Insert an ADD instruction with the materialized Hi52 bits. 2771 Base = SDValue( 2772 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2773 0); 2774 return true; 2775 } 2776 } 2777 2778 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, 2779 /*IsPrefetch=*/false, RV32ZdinxRange)) 2780 return true; 2781 2782 Base = Addr; 2783 Offset = CurDAG->getTargetConstant(0, DL, VT); 2784 return true; 2785 } 2786 2787 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of 2788 /// Offset should be all zeros. 2789 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, 2790 SDValue &Offset) { 2791 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2792 return true; 2793 2794 SDLoc DL(Addr); 2795 MVT VT = Addr.getSimpleValueType(); 2796 2797 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2798 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2799 if (isInt<12>(CVal)) { 2800 Base = Addr.getOperand(0); 2801 2802 // Early-out if not a valid offset. 2803 if ((CVal & 0b11111) != 0) { 2804 Base = Addr; 2805 Offset = CurDAG->getTargetConstant(0, DL, VT); 2806 return true; 2807 } 2808 2809 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2810 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2811 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); 2812 return true; 2813 } 2814 } 2815 2816 // Handle ADD with large immediates. 2817 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2818 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2819 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && 2820 "simm12 not already handled?"); 2821 2822 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save 2823 // one instruction by folding adjustment (-2048 or 2016) into the address. 2824 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { 2825 int64_t Adj = CVal < 0 ? -2048 : 2016; 2826 int64_t AdjustedOffset = CVal - Adj; 2827 Base = 2828 SDValue(CurDAG->getMachineNode( 2829 RISCV::ADDI, DL, VT, Addr.getOperand(0), 2830 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)), 2831 0); 2832 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT); 2833 return true; 2834 } 2835 2836 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2837 Offset, /*IsPrefetch=*/true)) { 2838 // Insert an ADD instruction with the materialized Hi52 bits. 2839 Base = SDValue( 2840 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2841 0); 2842 return true; 2843 } 2844 } 2845 2846 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, 2847 /*IsPrefetch=*/true)) 2848 return true; 2849 2850 Base = Addr; 2851 Offset = CurDAG->getTargetConstant(0, DL, VT); 2852 return true; 2853 } 2854 2855 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, 2856 SDValue &Offset) { 2857 if (Addr.getOpcode() != ISD::ADD) 2858 return false; 2859 2860 if (isa<ConstantSDNode>(Addr.getOperand(1))) 2861 return false; 2862 2863 Base = Addr.getOperand(1); 2864 Offset = Addr.getOperand(0); 2865 return true; 2866 } 2867 2868 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2869 SDValue &ShAmt) { 2870 ShAmt = N; 2871 2872 // Peek through zext. 2873 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) 2874 ShAmt = ShAmt.getOperand(0); 2875 2876 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift 2877 // amount. If there is an AND on the shift amount, we can bypass it if it 2878 // doesn't affect any of those bits. 2879 if (ShAmt.getOpcode() == ISD::AND && 2880 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2881 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 2882 2883 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2884 // mask that covers the bits needed to represent all shift amounts. 2885 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2886 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2887 2888 if (ShMask.isSubsetOf(AndMask)) { 2889 ShAmt = ShAmt.getOperand(0); 2890 } else { 2891 // SimplifyDemandedBits may have optimized the mask so try restoring any 2892 // bits that are known zero. 2893 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 2894 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 2895 return true; 2896 ShAmt = ShAmt.getOperand(0); 2897 } 2898 } 2899 2900 if (ShAmt.getOpcode() == ISD::ADD && 2901 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2902 uint64_t Imm = ShAmt.getConstantOperandVal(1); 2903 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 2904 // to avoid the ADD. 2905 if (Imm != 0 && Imm % ShiftWidth == 0) { 2906 ShAmt = ShAmt.getOperand(0); 2907 return true; 2908 } 2909 } else if (ShAmt.getOpcode() == ISD::SUB && 2910 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 2911 uint64_t Imm = ShAmt.getConstantOperandVal(0); 2912 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2913 // generate a NEG instead of a SUB of a constant. 2914 if (Imm != 0 && Imm % ShiftWidth == 0) { 2915 SDLoc DL(ShAmt); 2916 EVT VT = ShAmt.getValueType(); 2917 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2918 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2919 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2920 ShAmt.getOperand(1)); 2921 ShAmt = SDValue(Neg, 0); 2922 return true; 2923 } 2924 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2925 // to generate a NOT instead of a SUB of a constant. 2926 if (Imm % ShiftWidth == ShiftWidth - 1) { 2927 SDLoc DL(ShAmt); 2928 EVT VT = ShAmt.getValueType(); 2929 MachineSDNode *Not = CurDAG->getMachineNode( 2930 RISCV::XORI, DL, VT, ShAmt.getOperand(1), 2931 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true)); 2932 ShAmt = SDValue(Not, 0); 2933 return true; 2934 } 2935 } 2936 2937 return true; 2938 } 2939 2940 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 2941 /// check for equality with 0. This function emits instructions that convert the 2942 /// seteq/setne into something that can be compared with 0. 2943 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. 2944 /// ISD::SETNE). 2945 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 2946 SDValue &Val) { 2947 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 2948 "Unexpected condition code!"); 2949 2950 // We're looking for a setcc. 2951 if (N->getOpcode() != ISD::SETCC) 2952 return false; 2953 2954 // Must be an equality comparison. 2955 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2956 if (CCVal != ExpectedCCVal) 2957 return false; 2958 2959 SDValue LHS = N->getOperand(0); 2960 SDValue RHS = N->getOperand(1); 2961 2962 if (!LHS.getValueType().isScalarInteger()) 2963 return false; 2964 2965 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 2966 if (isNullConstant(RHS)) { 2967 Val = LHS; 2968 return true; 2969 } 2970 2971 SDLoc DL(N); 2972 2973 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 2974 int64_t CVal = C->getSExtValue(); 2975 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 2976 // non-zero otherwise. 2977 if (CVal == -2048) { 2978 Val = SDValue( 2979 CurDAG->getMachineNode( 2980 RISCV::XORI, DL, N->getValueType(0), LHS, 2981 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))), 2982 0); 2983 return true; 2984 } 2985 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 2986 // LHS is equal to the RHS and non-zero otherwise. 2987 if (isInt<12>(CVal) || CVal == 2048) { 2988 Val = SDValue( 2989 CurDAG->getMachineNode( 2990 RISCV::ADDI, DL, N->getValueType(0), LHS, 2991 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))), 2992 0); 2993 return true; 2994 } 2995 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) { 2996 Val = SDValue( 2997 CurDAG->getMachineNode( 2998 RISCV::BINVI, DL, N->getValueType(0), LHS, 2999 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))), 3000 0); 3001 return true; 3002 } 3003 } 3004 3005 // If nothing else we can XOR the LHS and RHS to produce zero if they are 3006 // equal and a non-zero value if they aren't. 3007 Val = SDValue( 3008 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); 3009 return true; 3010 } 3011 3012 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { 3013 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 3014 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) { 3015 Val = N.getOperand(0); 3016 return true; 3017 } 3018 3019 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { 3020 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1))) 3021 return N; 3022 3023 SDValue N0 = N.getOperand(0); 3024 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 3025 N.getConstantOperandVal(1) == ShiftAmt && 3026 N0.getConstantOperandVal(1) == ShiftAmt) 3027 return N0.getOperand(0); 3028 3029 return N; 3030 }; 3031 3032 MVT VT = N.getSimpleValueType(); 3033 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) { 3034 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); 3035 return true; 3036 } 3037 3038 return false; 3039 } 3040 3041 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 3042 if (N.getOpcode() == ISD::AND) { 3043 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3044 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 3045 Val = N.getOperand(0); 3046 return true; 3047 } 3048 } 3049 MVT VT = N.getSimpleValueType(); 3050 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 3051 if (CurDAG->MaskedValueIsZero(N, Mask)) { 3052 Val = N; 3053 return true; 3054 } 3055 3056 return false; 3057 } 3058 3059 /// Look for various patterns that can be done with a SHL that can be folded 3060 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 3061 /// SHXADD we are trying to match. 3062 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 3063 SDValue &Val) { 3064 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 3065 SDValue N0 = N.getOperand(0); 3066 3067 if (bool LeftShift = N0.getOpcode() == ISD::SHL; 3068 (LeftShift || N0.getOpcode() == ISD::SRL) && 3069 isa<ConstantSDNode>(N0.getOperand(1))) { 3070 uint64_t Mask = N.getConstantOperandVal(1); 3071 unsigned C2 = N0.getConstantOperandVal(1); 3072 3073 unsigned XLen = Subtarget->getXLen(); 3074 if (LeftShift) 3075 Mask &= maskTrailingZeros<uint64_t>(C2); 3076 else 3077 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 3078 3079 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 3080 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 3081 // followed by a SHXADD with c3 for the X amount. 3082 if (isShiftedMask_64(Mask)) { 3083 unsigned Leading = XLen - llvm::bit_width(Mask); 3084 unsigned Trailing = llvm::countr_zero(Mask); 3085 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 3086 SDLoc DL(N); 3087 EVT VT = N.getValueType(); 3088 Val = SDValue(CurDAG->getMachineNode( 3089 RISCV::SRLI, DL, VT, N0.getOperand(0), 3090 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 3091 0); 3092 return true; 3093 } 3094 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 3095 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 3096 // followed by a SHXADD using c3 for the X amount. 3097 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 3098 SDLoc DL(N); 3099 EVT VT = N.getValueType(); 3100 Val = SDValue( 3101 CurDAG->getMachineNode( 3102 RISCV::SRLI, DL, VT, N0.getOperand(0), 3103 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 3104 0); 3105 return true; 3106 } 3107 } 3108 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() && 3109 isa<ConstantSDNode>(N0.getOperand(1))) { 3110 uint64_t Mask = N.getConstantOperandVal(1); 3111 unsigned C2 = N0.getConstantOperandVal(1); 3112 3113 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 3114 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can 3115 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as 3116 // the X amount. 3117 if (isShiftedMask_64(Mask)) { 3118 unsigned XLen = Subtarget->getXLen(); 3119 unsigned Leading = XLen - llvm::bit_width(Mask); 3120 unsigned Trailing = llvm::countr_zero(Mask); 3121 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) { 3122 SDLoc DL(N); 3123 EVT VT = N.getValueType(); 3124 Val = SDValue(CurDAG->getMachineNode( 3125 RISCV::SRAI, DL, VT, N0.getOperand(0), 3126 CurDAG->getTargetConstant(C2 - Leading, DL, VT)), 3127 0); 3128 Val = SDValue(CurDAG->getMachineNode( 3129 RISCV::SRLI, DL, VT, Val, 3130 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)), 3131 0); 3132 return true; 3133 } 3134 } 3135 } 3136 } else if (bool LeftShift = N.getOpcode() == ISD::SHL; 3137 (LeftShift || N.getOpcode() == ISD::SRL) && 3138 isa<ConstantSDNode>(N.getOperand(1))) { 3139 SDValue N0 = N.getOperand(0); 3140 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 3141 isa<ConstantSDNode>(N0.getOperand(1))) { 3142 uint64_t Mask = N0.getConstantOperandVal(1); 3143 if (isShiftedMask_64(Mask)) { 3144 unsigned C1 = N.getConstantOperandVal(1); 3145 unsigned XLen = Subtarget->getXLen(); 3146 unsigned Leading = XLen - llvm::bit_width(Mask); 3147 unsigned Trailing = llvm::countr_zero(Mask); 3148 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 3149 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 3150 if (LeftShift && Leading == 32 && Trailing > 0 && 3151 (Trailing + C1) == ShAmt) { 3152 SDLoc DL(N); 3153 EVT VT = N.getValueType(); 3154 Val = SDValue(CurDAG->getMachineNode( 3155 RISCV::SRLIW, DL, VT, N0.getOperand(0), 3156 CurDAG->getTargetConstant(Trailing, DL, VT)), 3157 0); 3158 return true; 3159 } 3160 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 3161 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 3162 if (!LeftShift && Leading == 32 && Trailing > C1 && 3163 (Trailing - C1) == ShAmt) { 3164 SDLoc DL(N); 3165 EVT VT = N.getValueType(); 3166 Val = SDValue(CurDAG->getMachineNode( 3167 RISCV::SRLIW, DL, VT, N0.getOperand(0), 3168 CurDAG->getTargetConstant(Trailing, DL, VT)), 3169 0); 3170 return true; 3171 } 3172 } 3173 } 3174 } 3175 3176 return false; 3177 } 3178 3179 /// Look for various patterns that can be done with a SHL that can be folded 3180 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 3181 /// SHXADD_UW we are trying to match. 3182 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 3183 SDValue &Val) { 3184 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 3185 N.hasOneUse()) { 3186 SDValue N0 = N.getOperand(0); 3187 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 3188 N0.hasOneUse()) { 3189 uint64_t Mask = N.getConstantOperandVal(1); 3190 unsigned C2 = N0.getConstantOperandVal(1); 3191 3192 Mask &= maskTrailingZeros<uint64_t>(C2); 3193 3194 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 3195 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 3196 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 3197 if (isShiftedMask_64(Mask)) { 3198 unsigned Leading = llvm::countl_zero(Mask); 3199 unsigned Trailing = llvm::countr_zero(Mask); 3200 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 3201 SDLoc DL(N); 3202 EVT VT = N.getValueType(); 3203 Val = SDValue(CurDAG->getMachineNode( 3204 RISCV::SLLI, DL, VT, N0.getOperand(0), 3205 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 3206 0); 3207 return true; 3208 } 3209 } 3210 } 3211 } 3212 3213 return false; 3214 } 3215 3216 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) { 3217 if (!isa<ConstantSDNode>(N)) 3218 return false; 3219 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); 3220 3221 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI. 3222 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1)) 3223 return false; 3224 3225 // Abandon this transform if the constant is needed elsewhere. 3226 for (const SDNode *U : N->users()) { 3227 switch (U->getOpcode()) { 3228 case ISD::AND: 3229 case ISD::OR: 3230 case ISD::XOR: 3231 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb())) 3232 return false; 3233 break; 3234 case RISCVISD::VMV_V_X_VL: 3235 if (!Subtarget->hasStdExtZvkb()) 3236 return false; 3237 if (!all_of(U->users(), [](const SDNode *V) { 3238 return V->getOpcode() == ISD::AND || 3239 V->getOpcode() == RISCVISD::AND_VL; 3240 })) 3241 return false; 3242 break; 3243 default: 3244 return false; 3245 } 3246 } 3247 3248 // For 64-bit constants, the instruction sequences get complex, 3249 // so we select inverted only if it's cheaper. 3250 if (!isInt<32>(Imm)) { 3251 int OrigImmCost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget, 3252 /*CompressionCost=*/true); 3253 int NegImmCost = RISCVMatInt::getIntMatCost(APInt(64, ~Imm), 64, *Subtarget, 3254 /*CompressionCost=*/true); 3255 if (OrigImmCost <= NegImmCost) 3256 return false; 3257 } 3258 3259 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget); 3260 return true; 3261 } 3262 3263 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, 3264 unsigned Bits, 3265 const TargetInstrInfo *TII) { 3266 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode()); 3267 3268 if (!MCOpcode) 3269 return false; 3270 3271 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); 3272 const uint64_t TSFlags = MCID.TSFlags; 3273 if (!RISCVII::hasSEWOp(TSFlags)) 3274 return false; 3275 assert(RISCVII::hasVLOp(TSFlags)); 3276 3277 bool HasGlueOp = User->getGluedNode() != nullptr; 3278 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; 3279 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; 3280 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); 3281 unsigned VLIdx = 3282 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 3283 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); 3284 3285 if (UserOpNo == VLIdx) 3286 return false; 3287 3288 auto NumDemandedBits = 3289 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); 3290 return NumDemandedBits && Bits >= *NumDemandedBits; 3291 } 3292 3293 // Return true if all users of this SDNode* only consume the lower \p Bits. 3294 // This can be used to form W instructions for add/sub/mul/shl even when the 3295 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 3296 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 3297 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 3298 // the add/sub/mul/shl to become non-W instructions. By checking the users we 3299 // may be able to use a W instruction and CSE with the other instruction if 3300 // this has happened. We could try to detect that the CSE opportunity exists 3301 // before doing this, but that would be more complicated. 3302 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 3303 const unsigned Depth) const { 3304 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 3305 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 3306 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 3307 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 3308 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 3309 isa<ConstantSDNode>(Node) || Depth != 0) && 3310 "Unexpected opcode"); 3311 3312 if (Depth >= SelectionDAG::MaxRecursionDepth) 3313 return false; 3314 3315 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked 3316 // the VT. Ensure the type is scalar to avoid wasting time on vectors. 3317 if (Depth == 0 && !Node->getValueType(0).isScalarInteger()) 3318 return false; 3319 3320 for (SDUse &Use : Node->uses()) { 3321 SDNode *User = Use.getUser(); 3322 // Users of this node should have already been instruction selected 3323 if (!User->isMachineOpcode()) 3324 return false; 3325 3326 // TODO: Add more opcodes? 3327 switch (User->getMachineOpcode()) { 3328 default: 3329 if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII)) 3330 break; 3331 return false; 3332 case RISCV::ADDW: 3333 case RISCV::ADDIW: 3334 case RISCV::SUBW: 3335 case RISCV::MULW: 3336 case RISCV::SLLW: 3337 case RISCV::SLLIW: 3338 case RISCV::SRAW: 3339 case RISCV::SRAIW: 3340 case RISCV::SRLW: 3341 case RISCV::SRLIW: 3342 case RISCV::DIVW: 3343 case RISCV::DIVUW: 3344 case RISCV::REMW: 3345 case RISCV::REMUW: 3346 case RISCV::ROLW: 3347 case RISCV::RORW: 3348 case RISCV::RORIW: 3349 case RISCV::CLZW: 3350 case RISCV::CTZW: 3351 case RISCV::CPOPW: 3352 case RISCV::SLLI_UW: 3353 case RISCV::FMV_W_X: 3354 case RISCV::FCVT_H_W: 3355 case RISCV::FCVT_H_W_INX: 3356 case RISCV::FCVT_H_WU: 3357 case RISCV::FCVT_H_WU_INX: 3358 case RISCV::FCVT_S_W: 3359 case RISCV::FCVT_S_W_INX: 3360 case RISCV::FCVT_S_WU: 3361 case RISCV::FCVT_S_WU_INX: 3362 case RISCV::FCVT_D_W: 3363 case RISCV::FCVT_D_W_INX: 3364 case RISCV::FCVT_D_WU: 3365 case RISCV::FCVT_D_WU_INX: 3366 case RISCV::TH_REVW: 3367 case RISCV::TH_SRRIW: 3368 if (Bits >= 32) 3369 break; 3370 return false; 3371 case RISCV::SLL: 3372 case RISCV::SRA: 3373 case RISCV::SRL: 3374 case RISCV::ROL: 3375 case RISCV::ROR: 3376 case RISCV::BSET: 3377 case RISCV::BCLR: 3378 case RISCV::BINV: 3379 // Shift amount operands only use log2(Xlen) bits. 3380 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen())) 3381 break; 3382 return false; 3383 case RISCV::SLLI: 3384 // SLLI only uses the lower (XLen - ShAmt) bits. 3385 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1)) 3386 break; 3387 return false; 3388 case RISCV::ANDI: 3389 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 3390 break; 3391 goto RecCheck; 3392 case RISCV::ORI: { 3393 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 3394 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 3395 break; 3396 [[fallthrough]]; 3397 } 3398 case RISCV::AND: 3399 case RISCV::OR: 3400 case RISCV::XOR: 3401 case RISCV::XORI: 3402 case RISCV::ANDN: 3403 case RISCV::ORN: 3404 case RISCV::XNOR: 3405 case RISCV::SH1ADD: 3406 case RISCV::SH2ADD: 3407 case RISCV::SH3ADD: 3408 RecCheck: 3409 if (hasAllNBitUsers(User, Bits, Depth + 1)) 3410 break; 3411 return false; 3412 case RISCV::SRLI: { 3413 unsigned ShAmt = User->getConstantOperandVal(1); 3414 // If we are shifting right by less than Bits, and users don't demand any 3415 // bits that were shifted into [Bits-1:0], then we can consider this as an 3416 // N-Bit user. 3417 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 3418 break; 3419 return false; 3420 } 3421 case RISCV::SEXT_B: 3422 case RISCV::PACKH: 3423 if (Bits >= 8) 3424 break; 3425 return false; 3426 case RISCV::SEXT_H: 3427 case RISCV::FMV_H_X: 3428 case RISCV::ZEXT_H_RV32: 3429 case RISCV::ZEXT_H_RV64: 3430 case RISCV::PACKW: 3431 if (Bits >= 16) 3432 break; 3433 return false; 3434 case RISCV::PACK: 3435 if (Bits >= (Subtarget->getXLen() / 2)) 3436 break; 3437 return false; 3438 case RISCV::ADD_UW: 3439 case RISCV::SH1ADD_UW: 3440 case RISCV::SH2ADD_UW: 3441 case RISCV::SH3ADD_UW: 3442 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 3443 // 32 bits. 3444 if (Use.getOperandNo() == 0 && Bits >= 32) 3445 break; 3446 return false; 3447 case RISCV::SB: 3448 if (Use.getOperandNo() == 0 && Bits >= 8) 3449 break; 3450 return false; 3451 case RISCV::SH: 3452 if (Use.getOperandNo() == 0 && Bits >= 16) 3453 break; 3454 return false; 3455 case RISCV::SW: 3456 if (Use.getOperandNo() == 0 && Bits >= 32) 3457 break; 3458 return false; 3459 } 3460 } 3461 3462 return true; 3463 } 3464 3465 // Select a constant that can be represented as (sign_extend(imm5) << imm2). 3466 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, 3467 SDValue &Shl2) { 3468 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3469 int64_t Offset = C->getSExtValue(); 3470 unsigned Shift; 3471 for (Shift = 0; Shift < 4; Shift++) 3472 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 3473 break; 3474 3475 // Constant cannot be encoded. 3476 if (Shift == 4) 3477 return false; 3478 3479 EVT Ty = N->getValueType(0); 3480 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), Ty); 3481 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty); 3482 return true; 3483 } 3484 3485 return false; 3486 } 3487 3488 // Select VL as a 5 bit immediate or a value that will become a register. This 3489 // allows us to choose betwen VSETIVLI or VSETVLI later. 3490 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 3491 auto *C = dyn_cast<ConstantSDNode>(N); 3492 if (C && isUInt<5>(C->getZExtValue())) { 3493 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 3494 N->getValueType(0)); 3495 } else if (C && C->isAllOnes()) { 3496 // Treat all ones as VLMax. 3497 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3498 N->getValueType(0)); 3499 } else if (isa<RegisterSDNode>(N) && 3500 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 3501 // All our VL operands use an operand that allows GPRNoX0 or an immediate 3502 // as the register class. Convert X0 to a special immediate to pass the 3503 // MachineVerifier. This is recognized specially by the vsetvli insertion 3504 // pass. 3505 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3506 N->getValueType(0)); 3507 } else { 3508 VL = N; 3509 } 3510 3511 return true; 3512 } 3513 3514 static SDValue findVSplat(SDValue N) { 3515 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { 3516 if (!N.getOperand(0).isUndef()) 3517 return SDValue(); 3518 N = N.getOperand(1); 3519 } 3520 SDValue Splat = N; 3521 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && 3522 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || 3523 !Splat.getOperand(0).isUndef()) 3524 return SDValue(); 3525 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); 3526 return Splat; 3527 } 3528 3529 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 3530 SDValue Splat = findVSplat(N); 3531 if (!Splat) 3532 return false; 3533 3534 SplatVal = Splat.getOperand(1); 3535 return true; 3536 } 3537 3538 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, 3539 SelectionDAG &DAG, 3540 const RISCVSubtarget &Subtarget, 3541 std::function<bool(int64_t)> ValidateImm) { 3542 SDValue Splat = findVSplat(N); 3543 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1))) 3544 return false; 3545 3546 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); 3547 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && 3548 "Unexpected splat operand type"); 3549 3550 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 3551 // type is wider than the resulting vector element type: an implicit 3552 // truncation first takes place. Therefore, perform a manual 3553 // truncation/sign-extension in order to ignore any truncated bits and catch 3554 // any zero-extended immediate. 3555 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 3556 // sign-extending to (XLenVT -1). 3557 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); 3558 3559 int64_t SplatImm = SplatConst.getSExtValue(); 3560 3561 if (!ValidateImm(SplatImm)) 3562 return false; 3563 3564 SplatVal = 3565 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT()); 3566 return true; 3567 } 3568 3569 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 3570 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget, 3571 [](int64_t Imm) { return isInt<5>(Imm); }); 3572 } 3573 3574 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 3575 return selectVSplatImmHelper( 3576 N, SplatVal, *CurDAG, *Subtarget, 3577 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 3578 } 3579 3580 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 3581 SDValue &SplatVal) { 3582 return selectVSplatImmHelper( 3583 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 3584 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 3585 }); 3586 } 3587 3588 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, 3589 SDValue &SplatVal) { 3590 return selectVSplatImmHelper( 3591 N, SplatVal, *CurDAG, *Subtarget, 3592 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); }); 3593 } 3594 3595 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { 3596 auto IsExtOrTrunc = [](SDValue N) { 3597 switch (N->getOpcode()) { 3598 case ISD::SIGN_EXTEND: 3599 case ISD::ZERO_EXTEND: 3600 // There's no passthru on these _VL nodes so any VL/mask is ok, since any 3601 // inactive elements will be undef. 3602 case RISCVISD::TRUNCATE_VECTOR_VL: 3603 case RISCVISD::VSEXT_VL: 3604 case RISCVISD::VZEXT_VL: 3605 return true; 3606 default: 3607 return false; 3608 } 3609 }; 3610 3611 // We can have multiple nested nodes, so unravel them all if needed. 3612 while (IsExtOrTrunc(N)) { 3613 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8) 3614 return false; 3615 N = N->getOperand(0); 3616 } 3617 3618 return selectVSplat(N, SplatVal); 3619 } 3620 3621 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) { 3622 // Allow bitcasts from XLenVT -> FP. 3623 if (N.getOpcode() == ISD::BITCAST && 3624 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) { 3625 Imm = N.getOperand(0); 3626 return true; 3627 } 3628 // Allow moves from XLenVT to FP. 3629 if (N.getOpcode() == RISCVISD::FMV_H_X || 3630 N.getOpcode() == RISCVISD::FMV_W_X_RV64) { 3631 Imm = N.getOperand(0); 3632 return true; 3633 } 3634 3635 // Otherwise, look for FP constants that can materialized with scalar int. 3636 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode()); 3637 if (!CFP) 3638 return false; 3639 const APFloat &APF = CFP->getValueAPF(); 3640 // td can handle +0.0 already. 3641 if (APF.isPosZero()) 3642 return false; 3643 3644 MVT VT = CFP->getSimpleValueType(0); 3645 3646 MVT XLenVT = Subtarget->getXLenVT(); 3647 if (VT == MVT::f64 && !Subtarget->is64Bit()) { 3648 assert(APF.isNegZero() && "Unexpected constant."); 3649 return false; 3650 } 3651 SDLoc DL(N); 3652 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 3653 *Subtarget); 3654 return true; 3655 } 3656 3657 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 3658 SDValue &Imm) { 3659 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3660 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 3661 3662 if (!isInt<5>(ImmVal)) 3663 return false; 3664 3665 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N), 3666 Subtarget->getXLenVT()); 3667 return true; 3668 } 3669 3670 return false; 3671 } 3672 3673 // Try to remove sext.w if the input is a W instruction or can be made into 3674 // a W instruction cheaply. 3675 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 3676 // Look for the sext.w pattern, addiw rd, rs1, 0. 3677 if (N->getMachineOpcode() != RISCV::ADDIW || 3678 !isNullConstant(N->getOperand(1))) 3679 return false; 3680 3681 SDValue N0 = N->getOperand(0); 3682 if (!N0.isMachineOpcode()) 3683 return false; 3684 3685 switch (N0.getMachineOpcode()) { 3686 default: 3687 break; 3688 case RISCV::ADD: 3689 case RISCV::ADDI: 3690 case RISCV::SUB: 3691 case RISCV::MUL: 3692 case RISCV::SLLI: { 3693 // Convert sext.w+add/sub/mul to their W instructions. This will create 3694 // a new independent instruction. This improves latency. 3695 unsigned Opc; 3696 switch (N0.getMachineOpcode()) { 3697 default: 3698 llvm_unreachable("Unexpected opcode!"); 3699 case RISCV::ADD: Opc = RISCV::ADDW; break; 3700 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 3701 case RISCV::SUB: Opc = RISCV::SUBW; break; 3702 case RISCV::MUL: Opc = RISCV::MULW; break; 3703 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 3704 } 3705 3706 SDValue N00 = N0.getOperand(0); 3707 SDValue N01 = N0.getOperand(1); 3708 3709 // Shift amount needs to be uimm5. 3710 if (N0.getMachineOpcode() == RISCV::SLLI && 3711 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 3712 break; 3713 3714 SDNode *Result = 3715 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 3716 N00, N01); 3717 ReplaceUses(N, Result); 3718 return true; 3719 } 3720 case RISCV::ADDW: 3721 case RISCV::ADDIW: 3722 case RISCV::SUBW: 3723 case RISCV::MULW: 3724 case RISCV::SLLIW: 3725 case RISCV::PACKW: 3726 case RISCV::TH_MULAW: 3727 case RISCV::TH_MULAH: 3728 case RISCV::TH_MULSW: 3729 case RISCV::TH_MULSH: 3730 if (N0.getValueType() == MVT::i32) 3731 break; 3732 3733 // Result is already sign extended just remove the sext.w. 3734 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 3735 ReplaceUses(N, N0.getNode()); 3736 return true; 3737 } 3738 3739 return false; 3740 } 3741 3742 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg 3743 // that's glued to the pseudo. This tries to look up the value that was copied 3744 // to V0. 3745 static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) { 3746 // Check that we're using V0 as a mask register. 3747 if (!isa<RegisterSDNode>(MaskOp) || 3748 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0) 3749 return SDValue(); 3750 3751 // The glued user defines V0. 3752 const auto *Glued = GlueOp.getNode(); 3753 3754 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 3755 return SDValue(); 3756 3757 // Check that we're defining V0 as a mask register. 3758 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 3759 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 3760 return SDValue(); 3761 3762 SDValue MaskSetter = Glued->getOperand(2); 3763 3764 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came 3765 // from an extract_subvector or insert_subvector. 3766 if (MaskSetter->isMachineOpcode() && 3767 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) 3768 MaskSetter = MaskSetter->getOperand(0); 3769 3770 return MaskSetter; 3771 } 3772 3773 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { 3774 // Check the instruction defining V0; it needs to be a VMSET pseudo. 3775 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp); 3776 if (!MaskSetter) 3777 return false; 3778 3779 const auto IsVMSet = [](unsigned Opc) { 3780 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 3781 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 3782 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 3783 Opc == RISCV::PseudoVMSET_M_B8; 3784 }; 3785 3786 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 3787 // undefined behaviour if it's the wrong bitwidth, so we could choose to 3788 // assume that it's all-ones? Same applies to its VL. 3789 return MaskSetter->isMachineOpcode() && 3790 IsVMSet(MaskSetter.getMachineOpcode()); 3791 } 3792 3793 // Return true if we can make sure mask of N is all-ones mask. 3794 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { 3795 return usesAllOnesMask(N->getOperand(MaskOpIdx), 3796 N->getOperand(N->getNumOperands() - 1)); 3797 } 3798 3799 static bool isImplicitDef(SDValue V) { 3800 if (!V.isMachineOpcode()) 3801 return false; 3802 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) { 3803 for (unsigned I = 1; I < V.getNumOperands(); I += 2) 3804 if (!isImplicitDef(V.getOperand(I))) 3805 return false; 3806 return true; 3807 } 3808 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; 3809 } 3810 3811 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 3812 // corresponding "unmasked" pseudo versions. The mask we're interested in will 3813 // take the form of a V0 physical register operand, with a glued 3814 // register-setting instruction. 3815 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { 3816 const RISCV::RISCVMaskedPseudoInfo *I = 3817 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 3818 if (!I) 3819 return false; 3820 3821 unsigned MaskOpIdx = I->MaskOpIdx; 3822 if (!usesAllOnesMask(N, MaskOpIdx)) 3823 return false; 3824 3825 // There are two classes of pseudos in the table - compares and 3826 // everything else. See the comment on RISCVMaskedPseudo for details. 3827 const unsigned Opc = I->UnmaskedPseudo; 3828 const MCInstrDesc &MCID = TII->get(Opc); 3829 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID); 3830 3831 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 3832 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID); 3833 3834 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == 3835 RISCVII::hasVecPolicyOp(MCID.TSFlags) && 3836 "Masked and unmasked pseudos are inconsistent"); 3837 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru && 3838 "Unexpected pseudo structure"); 3839 assert(!(HasPassthru && !MaskedHasPassthru) && 3840 "Unmasked pseudo has passthru but masked pseudo doesn't?"); 3841 3842 SmallVector<SDValue, 8> Ops; 3843 // Skip the passthru operand at index 0 if the unmasked don't have one. 3844 bool ShouldSkip = !HasPassthru && MaskedHasPassthru; 3845 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) { 3846 // Skip the mask, and the Glue. 3847 SDValue Op = N->getOperand(I); 3848 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) 3849 continue; 3850 Ops.push_back(Op); 3851 } 3852 3853 // Transitively apply any node glued to our new node. 3854 const auto *Glued = N->getGluedNode(); 3855 if (auto *TGlued = Glued->getGluedNode()) 3856 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 3857 3858 MachineSDNode *Result = 3859 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3860 3861 if (!N->memoperands_empty()) 3862 CurDAG->setNodeMemRefs(Result, N->memoperands()); 3863 3864 Result->setFlags(N->getFlags()); 3865 ReplaceUses(N, Result); 3866 3867 return true; 3868 } 3869 3870 static bool IsVMerge(SDNode *N) { 3871 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; 3872 } 3873 3874 // Try to fold away VMERGE_VVM instructions into their true operands: 3875 // 3876 // %true = PseudoVADD_VV ... 3877 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask 3878 // -> 3879 // %x = PseudoVADD_VV_MASK %false, ..., %mask 3880 // 3881 // We can only fold if vmerge's passthru operand, vmerge's false operand and 3882 // %true's passthru operand (if it has one) are the same. This is because we 3883 // have to consolidate them into one passthru operand in the result. 3884 // 3885 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's 3886 // mask is all ones. 3887 // 3888 // The resulting VL is the minimum of the two VLs. 3889 // 3890 // The resulting policy is the effective policy the vmerge would have had, 3891 // i.e. whether or not it's passthru operand was implicit-def. 3892 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { 3893 SDValue Passthru, False, True, VL, Mask, Glue; 3894 assert(IsVMerge(N)); 3895 Passthru = N->getOperand(0); 3896 False = N->getOperand(1); 3897 True = N->getOperand(2); 3898 Mask = N->getOperand(3); 3899 VL = N->getOperand(4); 3900 // We always have a glue node for the mask at v0. 3901 Glue = N->getOperand(N->getNumOperands() - 1); 3902 assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); 3903 assert(Glue.getValueType() == MVT::Glue); 3904 3905 // If the EEW of True is different from vmerge's SEW, then we can't fold. 3906 if (True.getSimpleValueType() != N->getSimpleValueType(0)) 3907 return false; 3908 3909 // We require that either passthru and false are the same, or that passthru 3910 // is undefined. 3911 if (Passthru != False && !isImplicitDef(Passthru)) 3912 return false; 3913 3914 assert(True.getResNo() == 0 && 3915 "Expect True is the first output of an instruction."); 3916 3917 // Need N is the exactly one using True. 3918 if (!True.hasOneUse()) 3919 return false; 3920 3921 if (!True.isMachineOpcode()) 3922 return false; 3923 3924 unsigned TrueOpc = True.getMachineOpcode(); 3925 const MCInstrDesc &TrueMCID = TII->get(TrueOpc); 3926 uint64_t TrueTSFlags = TrueMCID.TSFlags; 3927 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); 3928 3929 const RISCV::RISCVMaskedPseudoInfo *Info = 3930 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); 3931 if (!Info) 3932 return false; 3933 3934 // If True has a passthru operand then it needs to be the same as vmerge's 3935 // False, since False will be used for the result's passthru operand. 3936 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { 3937 SDValue PassthruOpTrue = True->getOperand(0); 3938 if (False != PassthruOpTrue) 3939 return false; 3940 } 3941 3942 // Skip if True has side effect. 3943 if (TII->get(TrueOpc).hasUnmodeledSideEffects()) 3944 return false; 3945 3946 // The last operand of a masked instruction may be glued. 3947 bool HasGlueOp = True->getGluedNode() != nullptr; 3948 3949 // The chain operand may exist either before the glued operands or in the last 3950 // position. 3951 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; 3952 bool HasChainOp = 3953 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; 3954 3955 if (HasChainOp) { 3956 // Avoid creating cycles in the DAG. We must ensure that none of the other 3957 // operands depend on True through it's Chain. 3958 SmallVector<const SDNode *, 4> LoopWorklist; 3959 SmallPtrSet<const SDNode *, 16> Visited; 3960 LoopWorklist.push_back(False.getNode()); 3961 LoopWorklist.push_back(Mask.getNode()); 3962 LoopWorklist.push_back(VL.getNode()); 3963 LoopWorklist.push_back(Glue.getNode()); 3964 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) 3965 return false; 3966 } 3967 3968 // The vector policy operand may be present for masked intrinsics 3969 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); 3970 unsigned TrueVLIndex = 3971 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 3972 SDValue TrueVL = True.getOperand(TrueVLIndex); 3973 SDValue SEW = True.getOperand(TrueVLIndex + 1); 3974 3975 auto GetMinVL = [](SDValue LHS, SDValue RHS) { 3976 if (LHS == RHS) 3977 return LHS; 3978 if (isAllOnesConstant(LHS)) 3979 return RHS; 3980 if (isAllOnesConstant(RHS)) 3981 return LHS; 3982 auto *CLHS = dyn_cast<ConstantSDNode>(LHS); 3983 auto *CRHS = dyn_cast<ConstantSDNode>(RHS); 3984 if (!CLHS || !CRHS) 3985 return SDValue(); 3986 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; 3987 }; 3988 3989 // Because N and True must have the same passthru operand (or True's operand 3990 // is implicit_def), the "effective" body is the minimum of their VLs. 3991 SDValue OrigVL = VL; 3992 VL = GetMinVL(TrueVL, VL); 3993 if (!VL) 3994 return false; 3995 3996 // Some operations produce different elementwise results depending on the 3997 // active elements, like viota.m or vredsum. This transformation is illegal 3998 // for these if we change the active elements (i.e. mask or VL). 3999 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc)); 4000 if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL)) 4001 return false; 4002 if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) && 4003 (Mask && !usesAllOnesMask(Mask, Glue))) 4004 return false; 4005 4006 // Make sure it doesn't raise any observable fp exceptions, since changing the 4007 // active elements will affect how fflags is set. 4008 if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept()) 4009 return false; 4010 4011 SDLoc DL(N); 4012 4013 unsigned MaskedOpc = Info->MaskedPseudo; 4014 #ifndef NDEBUG 4015 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); 4016 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && 4017 "Expected instructions with mask have policy operand."); 4018 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), 4019 MCOI::TIED_TO) == 0 && 4020 "Expected instructions with mask have a tied dest."); 4021 #endif 4022 4023 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru 4024 // operand is undefined. 4025 // 4026 // However, if the VL became smaller than what the vmerge had originally, then 4027 // elements past VL that were previously in the vmerge's body will have moved 4028 // to the tail. In that case we always need to use tail undisturbed to 4029 // preserve them. 4030 bool MergeVLShrunk = VL != OrigVL; 4031 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk) 4032 ? RISCVII::TAIL_AGNOSTIC 4033 : /*TUMU*/ 0; 4034 SDValue PolicyOp = 4035 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); 4036 4037 4038 SmallVector<SDValue, 8> Ops; 4039 Ops.push_back(False); 4040 4041 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); 4042 const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode; 4043 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); 4044 4045 Ops.push_back(Mask); 4046 4047 // For unmasked "VOp" with rounding mode operand, that is interfaces like 4048 // (..., rm, vl) or (..., rm, vl, policy). 4049 // Its masked version is (..., vm, rm, vl, policy). 4050 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td 4051 if (HasRoundingMode) 4052 Ops.push_back(True->getOperand(TrueVLIndex - 1)); 4053 4054 Ops.append({VL, SEW, PolicyOp}); 4055 4056 // Result node should have chain operand of True. 4057 if (HasChainOp) 4058 Ops.push_back(True.getOperand(TrueChainOpIdx)); 4059 4060 // Add the glue for the CopyToReg of mask->v0. 4061 Ops.push_back(Glue); 4062 4063 MachineSDNode *Result = 4064 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); 4065 Result->setFlags(True->getFlags()); 4066 4067 if (!cast<MachineSDNode>(True)->memoperands_empty()) 4068 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands()); 4069 4070 // Replace vmerge.vvm node by Result. 4071 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 4072 4073 // Replace another value of True. E.g. chain and VL. 4074 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) 4075 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); 4076 4077 return true; 4078 } 4079 4080 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { 4081 bool MadeChange = false; 4082 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 4083 4084 while (Position != CurDAG->allnodes_begin()) { 4085 SDNode *N = &*--Position; 4086 if (N->use_empty() || !N->isMachineOpcode()) 4087 continue; 4088 4089 if (IsVMerge(N)) 4090 MadeChange |= performCombineVMergeAndVOps(N); 4091 } 4092 return MadeChange; 4093 } 4094 4095 /// If our passthru is an implicit_def, use noreg instead. This side 4096 /// steps issues with MachineCSE not being able to CSE expressions with 4097 /// IMPLICIT_DEF operands while preserving the semantic intent. See 4098 /// pr64282 for context. Note that this transform is the last one 4099 /// performed at ISEL DAG to DAG. 4100 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { 4101 bool MadeChange = false; 4102 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 4103 4104 while (Position != CurDAG->allnodes_begin()) { 4105 SDNode *N = &*--Position; 4106 if (N->use_empty() || !N->isMachineOpcode()) 4107 continue; 4108 4109 const unsigned Opc = N->getMachineOpcode(); 4110 if (!RISCVVPseudosTable::getPseudoInfo(Opc) || 4111 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || 4112 !isImplicitDef(N->getOperand(0))) 4113 continue; 4114 4115 SmallVector<SDValue> Ops; 4116 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); 4117 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 4118 SDValue Op = N->getOperand(I); 4119 Ops.push_back(Op); 4120 } 4121 4122 MachineSDNode *Result = 4123 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4124 Result->setFlags(N->getFlags()); 4125 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands()); 4126 ReplaceUses(N, Result); 4127 MadeChange = true; 4128 } 4129 return MadeChange; 4130 } 4131 4132 4133 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 4134 // for instruction scheduling. 4135 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 4136 CodeGenOptLevel OptLevel) { 4137 return new RISCVDAGToDAGISelLegacy(TM, OptLevel); 4138 } 4139 4140 char RISCVDAGToDAGISelLegacy::ID = 0; 4141 4142 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM, 4143 CodeGenOptLevel OptLevel) 4144 : SelectionDAGISelLegacy( 4145 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {} 4146 4147 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 4148