1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 16 17 #include "RISCV.h" 18 #include "RISCVCallingConv.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include <optional> 23 24 namespace llvm { 25 class InstructionCost; 26 class RISCVSubtarget; 27 struct RISCVRegisterInfo; 28 29 namespace RISCVISD { 30 // clang-format off 31 enum NodeType : unsigned { 32 FIRST_NUMBER = ISD::BUILTIN_OP_END, 33 RET_GLUE, 34 SRET_GLUE, 35 MRET_GLUE, 36 CALL, 37 TAIL, 38 /// Select with condition operator - This selects between a true value and 39 /// a false value (ops #3 and #4) based on the boolean result of comparing 40 /// the lhs and rhs (ops #0 and #1) of a conditional expression with the 41 /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. 42 /// The lhs and rhs are XLenVT integers. The true and false values can be 43 /// integer or floating point. 44 SELECT_CC, 45 BR_CC, 46 47 /// Turn a pair of `i<xlen>`s into an even-odd register pair (`untyped`). 48 /// - Output: `untyped` even-odd register pair 49 /// - Input 0: `i<xlen>` low-order bits, for even register. 50 /// - Input 1: `i<xlen>` high-order bits, for odd register. 51 BuildGPRPair, 52 53 /// Turn an even-odd register pair (`untyped`) into a pair of `i<xlen>`s. 54 /// - Output 0: `i<xlen>` low-order bits, from even register. 55 /// - Output 1: `i<xlen>` high-order bits, from odd register. 56 /// - Input: `untyped` even-odd register pair 57 SplitGPRPair, 58 59 /// Turns a pair of `i32`s into an `f64`. Needed for rv32d/ilp32. 60 /// - Output: `f64`. 61 /// - Input 0: low-order bits (31-0) (as `i32`), for even register. 62 /// - Input 1: high-order bits (63-32) (as `i32`), for odd register. 63 BuildPairF64, 64 65 /// Turns a `f64` into a pair of `i32`s. Needed for rv32d/ilp32. 66 /// - Output 0: low-order bits (31-0) (as `i32`), from even register. 67 /// - Output 1: high-order bits (63-32) (as `i32`), from odd register. 68 /// - Input 0: `f64`. 69 SplitF64, 70 71 // Add the Lo 12 bits from an address. Selected to ADDI. 72 ADD_LO, 73 // Get the Hi 20 bits from an address. Selected to LUI. 74 HI, 75 76 // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. 77 LLA, 78 79 // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. 80 ADD_TPREL, 81 82 // Multiply high for signedxunsigned. 83 MULHSU, 84 85 // Represents (ADD (SHL a, b), c) with the arguments appearing in the order 86 // a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba 87 // or addsl with XTheadBa. 88 SHL_ADD, 89 90 // RV64I shifts, directly matching the semantics of the named RISC-V 91 // instructions. 92 SLLW, 93 SRAW, 94 SRLW, 95 // 32-bit operations from RV64M that can't be simply matched with a pattern 96 // at instruction selection time. These have undefined behavior for division 97 // by 0 or overflow (divw) like their target independent counterparts. 98 DIVW, 99 DIVUW, 100 REMUW, 101 // RV64IB rotates, directly matching the semantics of the named RISC-V 102 // instructions. 103 ROLW, 104 RORW, 105 // RV64IZbb bit counting instructions directly matching the semantics of the 106 // named RISC-V instructions. 107 CLZW, 108 CTZW, 109 110 // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. 111 ABSW, 112 113 // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as 114 // XLEN is the only legal integer width. 115 // 116 // FMV_H_X matches the semantics of the FMV.H.X. 117 // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. 118 // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. 119 // FMV_W_X_RV64 matches the semantics of the FMV.W.X. 120 // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. 121 // 122 // This is a more convenient semantic for producing dagcombines that remove 123 // unnecessary GPR->FPR->GPR moves. 124 FMV_H_X, 125 FMV_X_ANYEXTH, 126 FMV_X_SIGNEXTH, 127 FMV_W_X_RV64, 128 FMV_X_ANYEXTW_RV64, 129 // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and 130 // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of 131 // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode 132 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 133 FCVT_X, 134 FCVT_XU, 135 // FP to 32 bit int conversions for RV64. These are used to keep track of the 136 // result being sign extended to 64 bit. These saturate out of range inputs. 137 // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode 138 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 139 FCVT_W_RV64, 140 FCVT_WU_RV64, 141 142 // Rounds an FP value to its corresponding integer in the same FP format. 143 // First operand is the value to round, the second operand is the largest 144 // integer that can be represented exactly in the FP format. This will be 145 // expanded into multiple instructions and basic blocks with a custom 146 // inserter. 147 FROUND, 148 149 FCLASS, 150 FSGNJX, 151 152 // Floating point fmax and fmin matching the RISC-V instruction semantics. 153 FMAX, FMIN, 154 155 // Zfa fli instruction for constant materialization. 156 FLI, 157 158 // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)). 159 // It takes a chain operand and another two target constant operands (the 160 // CSR numbers of the low and high parts of the counter). 161 READ_COUNTER_WIDE, 162 163 // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or 164 // XLenVT. 165 BREV8, 166 ORC_B, 167 ZIP, 168 UNZIP, 169 170 // Scalar cryptography 171 CLMUL, CLMULH, CLMULR, 172 SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1, 173 SM4KS, SM4ED, 174 SM3P0, SM3P1, 175 176 // May-Be-Operations 177 MOPR, MOPRR, 178 179 // Vector Extension 180 FIRST_VL_VECTOR_OP, 181 // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand 182 // for the VL value to be used for the operation. The first operand is 183 // passthru operand. 184 VMV_V_V_VL = FIRST_VL_VECTOR_OP, 185 // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand 186 // for the VL value to be used for the operation. The first operand is 187 // passthru operand. 188 VMV_V_X_VL, 189 // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand 190 // for the VL value to be used for the operation. The first operand is 191 // passthru operand. 192 VFMV_V_F_VL, 193 // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign 194 // extended from the vector element size. 195 VMV_X_S, 196 // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand. 197 VMV_S_X_VL, 198 // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. 199 VFMV_S_F_VL, 200 // Splats an 64-bit value that has been split into two i32 parts. This is 201 // expanded late to two scalar stores and a stride 0 vector load. 202 // The first operand is passthru operand. 203 SPLAT_VECTOR_SPLIT_I64_VL, 204 // Truncates a RVV integer vector by one power-of-two. Carries both an extra 205 // mask and VL operand. 206 TRUNCATE_VECTOR_VL, 207 // Truncates a RVV integer vector by one power-of-two. If the value doesn't 208 // fit in the destination type, the result is saturated. These correspond to 209 // vnclip and vnclipu with a shift of 0. Carries both an extra mask and VL 210 // operand. 211 TRUNCATE_VECTOR_VL_SSAT, 212 TRUNCATE_VECTOR_VL_USAT, 213 // Matches the semantics of vslideup/vslidedown. The first operand is the 214 // pass-thru operand, the second is the source vector, the third is the XLenVT 215 // index (either constant or non-constant), the fourth is the mask, the fifth 216 // is the VL and the sixth is the policy. 217 VSLIDEUP_VL, 218 VSLIDEDOWN_VL, 219 // Matches the semantics of vslide1up/slide1down. The first operand is 220 // passthru operand, the second is source vector, third is the XLenVT scalar 221 // value. The fourth and fifth operands are the mask and VL operands. 222 VSLIDE1UP_VL, 223 VSLIDE1DOWN_VL, 224 // Matches the semantics of vfslide1up/vfslide1down. The first operand is 225 // passthru operand, the second is source vector, third is a scalar value 226 // whose type matches the element type of the vectors. The fourth and fifth 227 // operands are the mask and VL operands. 228 VFSLIDE1UP_VL, 229 VFSLIDE1DOWN_VL, 230 // Matches the semantics of the vid.v instruction, with a mask and VL 231 // operand. 232 VID_VL, 233 // Matches the semantics of the vfcnvt.rod function (Convert double-width 234 // float to single-width float, rounding towards odd). Takes a double-width 235 // float vector and produces a single-width float vector. Also has a mask and 236 // VL operand. 237 VFNCVT_ROD_VL, 238 // These nodes match the semantics of the corresponding RVV vector reduction 239 // instructions. They produce a vector result which is the reduction 240 // performed over the second vector operand plus the first element of the 241 // third vector operand. The first operand is the pass-thru operand. The 242 // second operand is an unconstrained vector type, and the result, first, and 243 // third operand's types are expected to be the corresponding full-width 244 // LMUL=1 type for the second operand: 245 // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8 246 // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32 247 // The different in types does introduce extra vsetvli instructions but 248 // similarly it reduces the number of registers consumed per reduction. 249 // Also has a mask and VL operand. 250 VECREDUCE_ADD_VL, 251 VECREDUCE_UMAX_VL, 252 VECREDUCE_SMAX_VL, 253 VECREDUCE_UMIN_VL, 254 VECREDUCE_SMIN_VL, 255 VECREDUCE_AND_VL, 256 VECREDUCE_OR_VL, 257 VECREDUCE_XOR_VL, 258 VECREDUCE_FADD_VL, 259 VECREDUCE_SEQ_FADD_VL, 260 VECREDUCE_FMIN_VL, 261 VECREDUCE_FMAX_VL, 262 263 // Vector binary ops with a passthru as a third operand, a mask as a fourth 264 // operand, and VL as a fifth operand. 265 ADD_VL, 266 AND_VL, 267 MUL_VL, 268 OR_VL, 269 SDIV_VL, 270 SHL_VL, 271 SREM_VL, 272 SRA_VL, 273 SRL_VL, 274 ROTL_VL, 275 ROTR_VL, 276 SUB_VL, 277 UDIV_VL, 278 UREM_VL, 279 XOR_VL, 280 SMIN_VL, 281 SMAX_VL, 282 UMIN_VL, 283 UMAX_VL, 284 285 BITREVERSE_VL, 286 BSWAP_VL, 287 CTLZ_VL, 288 CTTZ_VL, 289 CTPOP_VL, 290 291 SADDSAT_VL, 292 UADDSAT_VL, 293 SSUBSAT_VL, 294 USUBSAT_VL, 295 296 // Averaging adds of signed integers. 297 AVGFLOORS_VL, 298 // Averaging adds of unsigned integers. 299 AVGFLOORU_VL, 300 // Rounding averaging adds of signed integers. 301 AVGCEILS_VL, 302 // Rounding averaging adds of unsigned integers. 303 AVGCEILU_VL, 304 305 MULHS_VL, 306 MULHU_VL, 307 FADD_VL, 308 FSUB_VL, 309 FMUL_VL, 310 FDIV_VL, 311 VFMIN_VL, 312 VFMAX_VL, 313 314 // Vector unary ops with a mask as a second operand and VL as a third operand. 315 FNEG_VL, 316 FABS_VL, 317 FSQRT_VL, 318 FCLASS_VL, 319 FCOPYSIGN_VL, // Has a passthru operand 320 VFCVT_RTZ_X_F_VL, 321 VFCVT_RTZ_XU_F_VL, 322 VFROUND_NOEXCEPT_VL, 323 VFCVT_RM_X_F_VL, // Has a rounding mode operand. 324 VFCVT_RM_XU_F_VL, // Has a rounding mode operand. 325 SINT_TO_FP_VL, 326 UINT_TO_FP_VL, 327 VFCVT_RM_F_X_VL, // Has a rounding mode operand. 328 VFCVT_RM_F_XU_VL, // Has a rounding mode operand. 329 FP_ROUND_VL, 330 FP_EXTEND_VL, 331 332 // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand. 333 VFMADD_VL, 334 VFNMADD_VL, 335 VFMSUB_VL, 336 VFNMSUB_VL, 337 338 // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth 339 // operand. 340 VFWMADD_VL, 341 VFWNMADD_VL, 342 VFWMSUB_VL, 343 VFWNMSUB_VL, 344 345 // Widening instructions with a passthru value a third operand, a mask as a 346 // fourth operand, and VL as a fifth operand. 347 VWMUL_VL, 348 VWMULU_VL, 349 VWMULSU_VL, 350 VWADD_VL, 351 VWADDU_VL, 352 VWSUB_VL, 353 VWSUBU_VL, 354 VWADD_W_VL, 355 VWADDU_W_VL, 356 VWSUB_W_VL, 357 VWSUBU_W_VL, 358 VWSLL_VL, 359 360 VFWMUL_VL, 361 VFWADD_VL, 362 VFWSUB_VL, 363 VFWADD_W_VL, 364 VFWSUB_W_VL, 365 366 // Widening ternary operations with a mask as the fourth operand and VL as the 367 // fifth operand. 368 VWMACC_VL, 369 VWMACCU_VL, 370 VWMACCSU_VL, 371 372 // Vector compare producing a mask. Fourth operand is input mask. Fifth 373 // operand is VL. 374 SETCC_VL, 375 376 // General vmerge node with mask, true, false, passthru, and vl operands. 377 // Tail agnostic vselect can be implemented by setting passthru to undef. 378 VMERGE_VL, 379 380 // Mask binary operators. 381 VMAND_VL, 382 VMOR_VL, 383 VMXOR_VL, 384 385 // Set mask vector to all zeros or ones. 386 VMCLR_VL, 387 VMSET_VL, 388 389 // Matches the semantics of vrgather.vx and vrgather.vv with extra operands 390 // for passthru and VL. Operands are (src, index, mask, passthru, vl). 391 VRGATHER_VX_VL, 392 VRGATHER_VV_VL, 393 VRGATHEREI16_VV_VL, 394 395 // Vector sign/zero extend with additional mask & VL operands. 396 VSEXT_VL, 397 VZEXT_VL, 398 399 // vcpop.m with additional mask and VL operands. 400 VCPOP_VL, 401 402 // vfirst.m with additional mask and VL operands. 403 VFIRST_VL, 404 405 LAST_VL_VECTOR_OP = VFIRST_VL, 406 407 // Read VLENB CSR 408 READ_VLENB, 409 // Reads value of CSR. 410 // The first operand is a chain pointer. The second specifies address of the 411 // required CSR. Two results are produced, the read value and the new chain 412 // pointer. 413 READ_CSR, 414 // Write value to CSR. 415 // The first operand is a chain pointer, the second specifies address of the 416 // required CSR and the third is the value to write. The result is the new 417 // chain pointer. 418 WRITE_CSR, 419 // Read and write value of CSR. 420 // The first operand is a chain pointer, the second specifies address of the 421 // required CSR and the third is the value to write. Two results are produced, 422 // the value read before the modification and the new chain pointer. 423 SWAP_CSR, 424 425 // Branchless select operations, matching the semantics of the instructions 426 // defined in Zicond or XVentanaCondOps. 427 CZERO_EQZ, // vt.maskc for XVentanaCondOps. 428 CZERO_NEZ, // vt.maskcn for XVentanaCondOps. 429 430 // Software guarded BRIND node. Operand 0 is the chain operand and 431 // operand 1 is the target address. 432 SW_GUARDED_BRIND, 433 // Software guarded calls for large code model 434 SW_GUARDED_CALL, 435 SW_GUARDED_TAIL, 436 437 SF_VC_XV_SE, 438 SF_VC_IV_SE, 439 SF_VC_VV_SE, 440 SF_VC_FV_SE, 441 SF_VC_XVV_SE, 442 SF_VC_IVV_SE, 443 SF_VC_VVV_SE, 444 SF_VC_FVV_SE, 445 SF_VC_XVW_SE, 446 SF_VC_IVW_SE, 447 SF_VC_VVW_SE, 448 SF_VC_FVW_SE, 449 SF_VC_V_X_SE, 450 SF_VC_V_I_SE, 451 SF_VC_V_XV_SE, 452 SF_VC_V_IV_SE, 453 SF_VC_V_VV_SE, 454 SF_VC_V_FV_SE, 455 SF_VC_V_XVV_SE, 456 SF_VC_V_IVV_SE, 457 SF_VC_V_VVV_SE, 458 SF_VC_V_FVV_SE, 459 SF_VC_V_XVW_SE, 460 SF_VC_V_IVW_SE, 461 SF_VC_V_VVW_SE, 462 SF_VC_V_FVW_SE, 463 464 // To avoid stack clash, allocation is performed by block and each block is 465 // probed. 466 PROBED_ALLOCA, 467 468 // RISC-V vector tuple type version of INSERT_SUBVECTOR/EXTRACT_SUBVECTOR. 469 TUPLE_INSERT, 470 TUPLE_EXTRACT, 471 472 // FP to 32 bit int conversions for RV64. These are used to keep track of the 473 // result being sign extended to 64 bit. These saturate out of range inputs. 474 FIRST_STRICTFP_OPCODE, 475 STRICT_FCVT_W_RV64 = FIRST_STRICTFP_OPCODE, 476 STRICT_FCVT_WU_RV64, 477 STRICT_FADD_VL, 478 STRICT_FSUB_VL, 479 STRICT_FMUL_VL, 480 STRICT_FDIV_VL, 481 STRICT_FSQRT_VL, 482 STRICT_VFMADD_VL, 483 STRICT_VFNMADD_VL, 484 STRICT_VFMSUB_VL, 485 STRICT_VFNMSUB_VL, 486 STRICT_FP_ROUND_VL, 487 STRICT_FP_EXTEND_VL, 488 STRICT_VFNCVT_ROD_VL, 489 STRICT_SINT_TO_FP_VL, 490 STRICT_UINT_TO_FP_VL, 491 STRICT_VFCVT_RM_X_F_VL, 492 STRICT_VFCVT_RTZ_X_F_VL, 493 STRICT_VFCVT_RTZ_XU_F_VL, 494 STRICT_FSETCC_VL, 495 STRICT_FSETCCS_VL, 496 STRICT_VFROUND_NOEXCEPT_VL, 497 LAST_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, 498 499 FIRST_MEMORY_OPCODE, 500 TH_LWD = FIRST_MEMORY_OPCODE, 501 TH_LWUD, 502 TH_LDD, 503 TH_SWD, 504 TH_SDD, 505 LAST_MEMORY_OPCODE = TH_SDD, 506 }; 507 // clang-format on 508 } // namespace RISCVISD 509 510 class RISCVTargetLowering : public TargetLowering { 511 const RISCVSubtarget &Subtarget; 512 513 public: 514 explicit RISCVTargetLowering(const TargetMachine &TM, 515 const RISCVSubtarget &STI); 516 517 const RISCVSubtarget &getSubtarget() const { return Subtarget; } 518 519 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 520 MachineFunction &MF, 521 unsigned Intrinsic) const override; 522 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 523 unsigned AS, 524 Instruction *I = nullptr) const override; 525 bool isLegalICmpImmediate(int64_t Imm) const override; 526 bool isLegalAddImmediate(int64_t Imm) const override; 527 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; 528 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; 529 bool isTruncateFree(SDValue Val, EVT VT2) const override; 530 bool isZExtFree(SDValue Val, EVT VT2) const override; 531 bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; 532 bool signExtendConstant(const ConstantInt *CI) const override; 533 bool isCheapToSpeculateCttz(Type *Ty) const override; 534 bool isCheapToSpeculateCtlz(Type *Ty) const override; 535 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 536 bool hasAndNotCompare(SDValue Y) const override; 537 bool hasBitTest(SDValue X, SDValue Y) const override; 538 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 539 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 540 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 541 SelectionDAG &DAG) const override; 542 bool shouldScalarizeBinop(SDValue VecOp) const override; 543 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 544 int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; 545 bool isFPImmLegal(const APFloat &Imm, EVT VT, 546 bool ForCodeSize) const override; 547 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 548 unsigned Index) const override; 549 550 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 551 552 bool preferScalarizeSplat(SDNode *N) const override; 553 554 bool softPromoteHalfType() const override { return true; } 555 556 /// Return the register type for a given MVT, ensuring vectors are treated 557 /// as a series of gpr sized integers. 558 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 559 EVT VT) const override; 560 561 /// Return the number of registers for a given MVT, for inline assembly 562 unsigned 563 getNumRegisters(LLVMContext &Context, EVT VT, 564 std::optional<MVT> RegisterVT = std::nullopt) const override; 565 566 /// Return the number of registers for a given MVT, ensuring vectors are 567 /// treated as a series of gpr sized integers. 568 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 569 CallingConv::ID CC, 570 EVT VT) const override; 571 572 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 573 CallingConv::ID CC, EVT VT, 574 EVT &IntermediateVT, 575 unsigned &NumIntermediates, 576 MVT &RegisterVT) const override; 577 578 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 579 EVT VT) const override; 580 581 /// Return true if the given shuffle mask can be codegen'd directly, or if it 582 /// should be stack expanded. 583 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 584 585 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 586 // If the pair to store is a mixture of float and int values, we will 587 // save two bitwise instructions and one float-to-int instruction and 588 // increase one store instruction. There is potentially a more 589 // significant benefit because it avoids the float->int domain switch 590 // for input value. So It is more likely a win. 591 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 592 (LTy.isInteger() && HTy.isFloatingPoint())) 593 return true; 594 // If the pair only contains int values, we will save two bitwise 595 // instructions and increase one store instruction (costing one more 596 // store buffer). Since the benefit is more blurred we leave such a pair 597 // out until we get testcase to prove it is a win. 598 return false; 599 } 600 601 bool 602 shouldExpandBuildVectorWithShuffles(EVT VT, 603 unsigned DefinedValues) const override; 604 605 bool shouldExpandCttzElements(EVT VT) const override; 606 607 /// Return the cost of LMUL for linear operations. 608 InstructionCost getLMULCost(MVT VT) const; 609 610 InstructionCost getVRGatherVVCost(MVT VT) const; 611 InstructionCost getVRGatherVICost(MVT VT) const; 612 InstructionCost getVSlideVXCost(MVT VT) const; 613 InstructionCost getVSlideVICost(MVT VT) const; 614 615 // Provide custom lowering hooks for some operations. 616 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 617 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 618 SelectionDAG &DAG) const override; 619 620 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 621 622 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 623 const APInt &DemandedElts, 624 TargetLoweringOpt &TLO) const override; 625 626 void computeKnownBitsForTargetNode(const SDValue Op, 627 KnownBits &Known, 628 const APInt &DemandedElts, 629 const SelectionDAG &DAG, 630 unsigned Depth) const override; 631 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 632 const APInt &DemandedElts, 633 const SelectionDAG &DAG, 634 unsigned Depth) const override; 635 636 bool canCreateUndefOrPoisonForTargetNode(SDValue Op, 637 const APInt &DemandedElts, 638 const SelectionDAG &DAG, 639 bool PoisonOnly, bool ConsiderFlags, 640 unsigned Depth) const override; 641 642 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 643 644 // This method returns the name of a target specific DAG node. 645 const char *getTargetNodeName(unsigned Opcode) const override; 646 647 MachineMemOperand::Flags 648 getTargetMMOFlags(const Instruction &I) const override; 649 650 MachineMemOperand::Flags 651 getTargetMMOFlags(const MemSDNode &Node) const override; 652 653 bool 654 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, 655 const MemSDNode &NodeY) const override; 656 657 ConstraintType getConstraintType(StringRef Constraint) const override; 658 659 InlineAsm::ConstraintCode 660 getInlineAsmMemConstraint(StringRef ConstraintCode) const override; 661 662 std::pair<unsigned, const TargetRegisterClass *> 663 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 664 StringRef Constraint, MVT VT) const override; 665 666 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 667 std::vector<SDValue> &Ops, 668 SelectionDAG &DAG) const override; 669 670 MachineBasicBlock * 671 EmitInstrWithCustomInserter(MachineInstr &MI, 672 MachineBasicBlock *BB) const override; 673 674 void AdjustInstrPostInstrSelection(MachineInstr &MI, 675 SDNode *Node) const override; 676 677 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 678 EVT VT) const override; 679 680 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 681 bool MathUsed) const override { 682 if (VT == MVT::i8 || VT == MVT::i16) 683 return false; 684 685 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); 686 } 687 688 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 689 unsigned AddrSpace) const override { 690 // If we can replace 4 or more scalar stores, there will be a reduction 691 // in instructions even after we add a vector constant load. 692 return NumElem >= 4; 693 } 694 695 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 696 return VT.isScalarInteger(); 697 } 698 bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } 699 700 bool isCtpopFast(EVT VT) const override; 701 702 unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; 703 704 bool preferZeroCompareBranch() const override { return true; } 705 706 // Note that one specific case requires fence insertion for an 707 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather 708 // than this hook due to limitations in the interface here. 709 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 710 711 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, 712 AtomicOrdering Ord) const override; 713 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, 714 AtomicOrdering Ord) const override; 715 716 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 717 EVT VT) const override; 718 719 ISD::NodeType getExtendForAtomicOps() const override { 720 return ISD::SIGN_EXTEND; 721 } 722 723 ISD::NodeType getExtendForAtomicCmpSwapArg() const override; 724 725 bool shouldTransformSignedTruncationCheck(EVT XVT, 726 unsigned KeptBits) const override; 727 728 TargetLowering::ShiftLegalizationStrategy 729 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 730 unsigned ExpansionFactor) const override { 731 if (DAG.getMachineFunction().getFunction().hasMinSize()) 732 return ShiftLegalizationStrategy::LowerToLibcall; 733 return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, 734 ExpansionFactor); 735 } 736 737 bool isDesirableToCommuteWithShift(const SDNode *N, 738 CombineLevel Level) const override; 739 740 /// If a physical register, this returns the register that receives the 741 /// exception address on entry to an EH pad. 742 Register 743 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 744 745 /// If a physical register, this returns the register that receives the 746 /// exception typeid on entry to a landing pad. 747 Register 748 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 749 750 bool shouldExtendTypeInLibCall(EVT Type) const override; 751 bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; 752 753 /// Returns the register with the specified architectural or ABI name. This 754 /// method is necessary to lower the llvm.read_register.* and 755 /// llvm.write_register.* intrinsics. Allocatable registers must be reserved 756 /// with the clang -ffixed-xX flag for access to be allowed. 757 Register getRegisterByName(const char *RegName, LLT VT, 758 const MachineFunction &MF) const override; 759 760 // Lower incoming arguments, copy physregs into vregs 761 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 762 bool IsVarArg, 763 const SmallVectorImpl<ISD::InputArg> &Ins, 764 const SDLoc &DL, SelectionDAG &DAG, 765 SmallVectorImpl<SDValue> &InVals) const override; 766 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 767 bool IsVarArg, 768 const SmallVectorImpl<ISD::OutputArg> &Outs, 769 LLVMContext &Context, const Type *RetTy) const override; 770 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 771 const SmallVectorImpl<ISD::OutputArg> &Outs, 772 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 773 SelectionDAG &DAG) const override; 774 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, 775 SmallVectorImpl<SDValue> &InVals) const override; 776 777 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 778 Type *Ty) const override; 779 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 780 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 781 bool shouldConsiderGEPOffsetSplit() const override { return true; } 782 783 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 784 SDValue C) const override; 785 786 bool isMulAddWithConstProfitable(SDValue AddNode, 787 SDValue ConstNode) const override; 788 789 TargetLowering::AtomicExpansionKind 790 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 791 Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, 792 Value *AlignedAddr, Value *Incr, 793 Value *Mask, Value *ShiftAmt, 794 AtomicOrdering Ord) const override; 795 TargetLowering::AtomicExpansionKind 796 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; 797 Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, 798 AtomicCmpXchgInst *CI, 799 Value *AlignedAddr, Value *CmpVal, 800 Value *NewVal, Value *Mask, 801 AtomicOrdering Ord) const override; 802 803 /// Returns true if the target allows unaligned memory accesses of the 804 /// specified type. 805 bool allowsMisalignedMemoryAccesses( 806 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 807 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 808 unsigned *Fast = nullptr) const override; 809 810 EVT getOptimalMemOpType(const MemOp &Op, 811 const AttributeList &FuncAttributes) const override; 812 813 bool splitValueIntoRegisterParts( 814 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 815 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) 816 const override; 817 818 SDValue joinRegisterPartsIntoValue( 819 SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, 820 unsigned NumParts, MVT PartVT, EVT ValueVT, 821 std::optional<CallingConv::ID> CC) const override; 822 823 // Return the value of VLMax for the given vector type (i.e. SEW and LMUL) 824 SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const; 825 826 static RISCVII::VLMUL getLMUL(MVT VT); 827 inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, 828 unsigned MinSize) { 829 // Original equation: 830 // VLMAX = (VectorBits / EltSize) * LMUL 831 // where LMUL = MinSize / RISCV::RVVBitsPerBlock 832 // The following equations have been reordered to prevent loss of precision 833 // when calculating fractional LMUL. 834 return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 835 } 836 837 // Return inclusive (low, high) bounds on the value of VLMAX for the 838 // given scalable container type given known bounds on VLEN. 839 static std::pair<unsigned, unsigned> 840 computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget); 841 842 static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); 843 static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); 844 static unsigned getRegClassIDForVecVT(MVT VT); 845 static std::pair<unsigned, unsigned> 846 decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, 847 unsigned InsertExtractIdx, 848 const RISCVRegisterInfo *TRI); 849 MVT getContainerForFixedLengthVector(MVT VT) const; 850 851 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 852 853 bool isLegalElementTypeForRVV(EVT ScalarTy) const; 854 855 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 856 857 unsigned getJumpTableEncoding() const override; 858 859 const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 860 const MachineBasicBlock *MBB, 861 unsigned uid, 862 MCContext &Ctx) const override; 863 864 bool isVScaleKnownToBeAPowerOfTwo() const override; 865 866 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 867 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; 868 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 869 ISD::MemIndexedMode &AM, 870 SelectionDAG &DAG) const override; 871 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 872 SDValue &Offset, ISD::MemIndexedMode &AM, 873 SelectionDAG &DAG) const override; 874 875 bool isLegalScaleForGatherScatter(uint64_t Scale, 876 uint64_t ElemSize) const override { 877 // Scaled addressing not supported on indexed load/stores 878 return Scale == 1; 879 } 880 881 /// If the target has a standard location for the stack protector cookie, 882 /// returns the address of that location. Otherwise, returns nullptr. 883 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 884 885 /// Returns whether or not generating a interleaved load/store intrinsic for 886 /// this type will be legal. 887 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, 888 Align Alignment, unsigned AddrSpace, 889 const DataLayout &) const; 890 891 /// Return true if a stride load store of the given result type and 892 /// alignment is legal. 893 bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; 894 895 unsigned getMaxSupportedInterleaveFactor() const override { return 8; } 896 897 bool fallBackToDAGISel(const Instruction &Inst) const override; 898 899 bool lowerInterleavedLoad(LoadInst *LI, 900 ArrayRef<ShuffleVectorInst *> Shuffles, 901 ArrayRef<unsigned> Indices, 902 unsigned Factor) const override; 903 904 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 905 unsigned Factor) const override; 906 907 bool lowerDeinterleaveIntrinsicToLoad( 908 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override; 909 910 bool lowerInterleaveIntrinsicToStore( 911 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override; 912 913 bool supportKCFIBundles() const override { return true; } 914 915 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 916 int JTI, SelectionDAG &DAG) const override; 917 918 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 919 MachineBasicBlock::instr_iterator &MBBI, 920 const TargetInstrInfo *TII) const override; 921 922 /// True if stack clash protection is enabled for this functions. 923 bool hasInlineStackProbe(const MachineFunction &MF) const override; 924 925 unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const; 926 927 MachineBasicBlock *emitDynamicProbedAlloc(MachineInstr &MI, 928 MachineBasicBlock *MBB) const; 929 930 private: 931 void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, 932 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 933 RISCVCCAssignFn Fn) const; 934 void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, 935 const SmallVectorImpl<ISD::OutputArg> &Outs, 936 bool IsRet, CallLoweringInfo *CLI, 937 RISCVCCAssignFn Fn) const; 938 939 template <class NodeTy> 940 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true, 941 bool IsExternWeak = false) const; 942 SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, 943 bool UseGOT) const; 944 SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; 945 SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; 946 947 SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; 948 SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 949 SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 950 SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 951 SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 952 SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 953 SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; 954 SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 955 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; 956 SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 957 SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 958 SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 959 SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; 960 SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; 961 SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; 962 SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 963 int64_t ExtTrueVal) const; 964 SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; 965 SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; 966 SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 967 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 968 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 969 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 970 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 971 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 972 SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const; 973 SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 974 SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG, 975 bool IsVP) const; 976 SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 977 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 978 SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 979 SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 980 SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 981 SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; 982 SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; 983 SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 984 SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; 985 SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; 986 SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; 987 SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const; 988 SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, 989 SelectionDAG &DAG) const; 990 SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; 991 SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; 992 SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; 993 SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; 994 SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; 995 SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, 996 SelectionDAG &DAG) const; 997 SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 998 SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; 999 SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; 1000 SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; 1001 SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; 1002 SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; 1003 SDValue lowerVPMergeMask(SDValue Op, SelectionDAG &DAG) const; 1004 SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; 1005 SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const; 1011 SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, 1012 unsigned ExtendOpc) const; 1013 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1015 1016 SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; 1018 1019 SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 1020 1021 SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; 1022 1023 SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1024 1025 SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; 1026 SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; 1027 1028 SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1029 SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1030 1031 bool isEligibleForTailCallOptimization( 1032 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1033 const SmallVector<CCValAssign, 16> &ArgLocs) const; 1034 1035 /// Generate error diagnostics if any register used by CC has been marked 1036 /// reserved. 1037 void validateCCReservedRegs( 1038 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 1039 MachineFunction &MF) const; 1040 1041 bool useRVVForFixedLengthVectorVT(MVT VT) const; 1042 1043 MVT getVPExplicitVectorLengthTy() const override; 1044 1045 bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, 1046 bool IsScalable) const override; 1047 1048 /// RVV code generation for fixed length vectors does not lower all 1049 /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to 1050 /// merge. However, merging them creates a BUILD_VECTOR that is just as 1051 /// illegal as the original, thus leading to an infinite legalisation loop. 1052 /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, 1053 /// this override can be removed. 1054 bool mergeStoresAfterLegalization(EVT VT) const override; 1055 1056 /// Disable normalizing 1057 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 1058 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) 1059 /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. 1060 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { 1061 return false; 1062 } 1063 1064 /// For available scheduling models FDIV + two independent FMULs are much 1065 /// faster than two FDIVs. 1066 unsigned combineRepeatedFPDivisors() const override; 1067 1068 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1069 SmallVectorImpl<SDNode *> &Created) const override; 1070 1071 bool shouldFoldSelectWithSingleBitTest(EVT VT, 1072 const APInt &AndMask) const override; 1073 1074 unsigned getMinimumJumpTableEntries() const override; 1075 1076 SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start, 1077 SDValue End, SDValue Flags, SDLoc DL) const; 1078 1079 std::pair<const TargetRegisterClass *, uint8_t> 1080 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; 1081 }; 1082 1083 namespace RISCVVIntrinsicsTable { 1084 1085 struct RISCVVIntrinsicInfo { 1086 unsigned IntrinsicID; 1087 uint8_t ScalarOperand; 1088 uint8_t VLOperand; 1089 bool hasScalarOperand() const { 1090 // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. 1091 return ScalarOperand != 0xF; 1092 } 1093 bool hasVLOperand() const { 1094 // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. 1095 return VLOperand != 0x1F; 1096 } 1097 }; 1098 1099 using namespace RISCV; 1100 1101 #define GET_RISCVVIntrinsicsTable_DECL 1102 #include "RISCVGenSearchableTables.inc" 1103 #undef GET_RISCVVIntrinsicsTable_DECL 1104 1105 } // end namespace RISCVVIntrinsicsTable 1106 1107 } // end namespace llvm 1108 1109 #endif 1110